ia64.c revision 117395
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3   Contributed by James E. Wilson <wilson@cygnus.com> and
4   		  David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING.  If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA.  */
22
23#include "config.h"
24#include "system.h"
25#include "rtl.h"
26#include "tree.h"
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "output.h"
33#include "insn-attr.h"
34#include "flags.h"
35#include "recog.h"
36#include "expr.h"
37#include "optabs.h"
38#include "except.h"
39#include "function.h"
40#include "ggc.h"
41#include "basic-block.h"
42#include "toplev.h"
43#include "sched-int.h"
44#include "timevar.h"
45#include "target.h"
46#include "target-def.h"
47#include "tm_p.h"
48#include "langhooks.h"
49
50/* This is used for communication between ASM_OUTPUT_LABEL and
51   ASM_OUTPUT_LABELREF.  */
52int ia64_asm_output_label = 0;
53
54/* Define the information needed to generate branch and scc insns.  This is
55   stored from the compare operation.  */
56struct rtx_def * ia64_compare_op0;
57struct rtx_def * ia64_compare_op1;
58
59/* Register names for ia64_expand_prologue.  */
60static const char * const ia64_reg_numbers[96] =
61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70  "r104","r105","r106","r107","r108","r109","r110","r111",
71  "r112","r113","r114","r115","r116","r117","r118","r119",
72  "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74/* ??? These strings could be shared with REGISTER_NAMES.  */
75static const char * const ia64_input_reg_names[8] =
76{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
77
78/* ??? These strings could be shared with REGISTER_NAMES.  */
79static const char * const ia64_local_reg_names[80] =
80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91/* ??? These strings could be shared with REGISTER_NAMES.  */
92static const char * const ia64_output_reg_names[8] =
93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95/* String used with the -mfixed-range= option.  */
96const char *ia64_fixed_range_string;
97
98/* Determines whether we use adds, addl, or movl to generate our
99   TLS immediate offsets.  */
100int ia64_tls_size = 22;
101
102/* String used with the -mtls-size= option.  */
103const char *ia64_tls_size_string;
104
105/* Determines whether we run our final scheduling pass or not.  We always
106   avoid the normal second scheduling pass.  */
107static int ia64_flag_schedule_insns2;
108
109/* Variables which are this size or smaller are put in the sdata/sbss
110   sections.  */
111
112unsigned int ia64_section_threshold;
113
114/* Structure to be filled in by ia64_compute_frame_size with register
115   save masks and offsets for the current function.  */
116
117struct ia64_frame_info
118{
119  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
120				   the caller's scratch area.  */
121  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
122  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
123  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
124  HARD_REG_SET mask;		/* mask of saved registers.  */
125  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
126				   registers or long-term scratches.  */
127  int n_spilled;		/* number of spilled registers.  */
128  int reg_fp;			/* register for fp.  */
129  int reg_save_b0;		/* save register for b0.  */
130  int reg_save_pr;		/* save register for prs.  */
131  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
132  int reg_save_ar_unat;		/* save register for ar.unat.  */
133  int reg_save_ar_lc;		/* save register for ar.lc.  */
134  int reg_save_gp;		/* save register for gp.  */
135  int n_input_regs;		/* number of input registers used.  */
136  int n_local_regs;		/* number of local registers used.  */
137  int n_output_regs;		/* number of output registers used.  */
138  int n_rotate_regs;		/* number of rotating registers used.  */
139
140  char need_regstk;		/* true if a .regstk directive needed.  */
141  char initialized;		/* true if the data is finalized.  */
142};
143
144/* Current frame information calculated by ia64_compute_frame_size.  */
145static struct ia64_frame_info current_frame_info;
146
147static rtx gen_tls_get_addr PARAMS ((void));
148static rtx gen_thread_pointer PARAMS ((void));
149static int find_gr_spill PARAMS ((int));
150static int next_scratch_gr_reg PARAMS ((void));
151static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
154static void finish_spill_pointers PARAMS ((void));
155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
161
162static enum machine_mode hfa_element_mode PARAMS ((tree, int));
163static void fix_range PARAMS ((const char *));
164static struct machine_function * ia64_init_machine_status PARAMS ((void));
165static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
167static void emit_predicate_relation_info PARAMS ((void));
168static bool ia64_in_small_data_p PARAMS ((tree));
169static void ia64_encode_section_info PARAMS ((tree, int));
170static const char *ia64_strip_name_encoding PARAMS ((const char *));
171static void process_epilogue PARAMS ((void));
172static int process_set PARAMS ((FILE *, rtx));
173
174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
175					     tree, rtx));
176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
177					     tree, rtx));
178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
179						 tree, rtx));
180static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
181						  tree, rtx));
182static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
183static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
184static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
185static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
186static void ia64_output_function_end_prologue PARAMS ((FILE *));
187
188static int ia64_issue_rate PARAMS ((void));
189static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
190static void ia64_sched_init PARAMS ((FILE *, int, int));
191static void ia64_sched_finish PARAMS ((FILE *, int));
192static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
193						int *, int, int));
194static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
195static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
196static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
197
198static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
199					  HOST_WIDE_INT, tree));
200
201static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
202					     unsigned HOST_WIDE_INT));
203static void ia64_rwreloc_select_section PARAMS ((tree, int,
204					         unsigned HOST_WIDE_INT))
205     ATTRIBUTE_UNUSED;
206static void ia64_rwreloc_unique_section PARAMS ((tree, int))
207     ATTRIBUTE_UNUSED;
208static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
209					             unsigned HOST_WIDE_INT))
210     ATTRIBUTE_UNUSED;
211static unsigned int ia64_rwreloc_section_type_flags
212     PARAMS ((tree, const char *, int))
213     ATTRIBUTE_UNUSED;
214
215static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
216     ATTRIBUTE_UNUSED;
217
218/* Table of valid machine attributes.  */
219static const struct attribute_spec ia64_attribute_table[] =
220{
221  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
222  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
223  { NULL,              0, 0, false, false, false, NULL }
224};
225
226/* Initialize the GCC target structure.  */
227#undef TARGET_ATTRIBUTE_TABLE
228#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
229
230#undef TARGET_INIT_BUILTINS
231#define TARGET_INIT_BUILTINS ia64_init_builtins
232
233#undef TARGET_EXPAND_BUILTIN
234#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
235
236#undef TARGET_ASM_BYTE_OP
237#define TARGET_ASM_BYTE_OP "\tdata1\t"
238#undef TARGET_ASM_ALIGNED_HI_OP
239#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
240#undef TARGET_ASM_ALIGNED_SI_OP
241#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
242#undef TARGET_ASM_ALIGNED_DI_OP
243#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
244#undef TARGET_ASM_UNALIGNED_HI_OP
245#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
246#undef TARGET_ASM_UNALIGNED_SI_OP
247#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
248#undef TARGET_ASM_UNALIGNED_DI_OP
249#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
250#undef TARGET_ASM_INTEGER
251#define TARGET_ASM_INTEGER ia64_assemble_integer
252
253#undef TARGET_ASM_FUNCTION_PROLOGUE
254#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
255#undef TARGET_ASM_FUNCTION_END_PROLOGUE
256#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
257#undef TARGET_ASM_FUNCTION_EPILOGUE
258#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
259
260#undef TARGET_IN_SMALL_DATA_P
261#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
262#undef TARGET_ENCODE_SECTION_INFO
263#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
264#undef TARGET_STRIP_NAME_ENCODING
265#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
266
267#undef TARGET_SCHED_ADJUST_COST
268#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
269#undef TARGET_SCHED_ISSUE_RATE
270#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
271#undef TARGET_SCHED_VARIABLE_ISSUE
272#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
273#undef TARGET_SCHED_INIT
274#define TARGET_SCHED_INIT ia64_sched_init
275#undef TARGET_SCHED_FINISH
276#define TARGET_SCHED_FINISH ia64_sched_finish
277#undef TARGET_SCHED_REORDER
278#define TARGET_SCHED_REORDER ia64_sched_reorder
279#undef TARGET_SCHED_REORDER2
280#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
281
282#ifdef HAVE_AS_TLS
283#undef TARGET_HAVE_TLS
284#define TARGET_HAVE_TLS true
285#endif
286
287#undef TARGET_ASM_OUTPUT_MI_THUNK
288#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
289#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
291
292struct gcc_target targetm = TARGET_INITIALIZER;
293
294/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
295
296int
297call_operand (op, mode)
298     rtx op;
299     enum machine_mode mode;
300{
301  if (mode != GET_MODE (op) && mode != VOIDmode)
302    return 0;
303
304  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
305	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
306}
307
308/* Return 1 if OP refers to a symbol in the sdata section.  */
309
310int
311sdata_symbolic_operand (op, mode)
312     rtx op;
313     enum machine_mode mode ATTRIBUTE_UNUSED;
314{
315  switch (GET_CODE (op))
316    {
317    case CONST:
318      if (GET_CODE (XEXP (op, 0)) != PLUS
319	  || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
320	break;
321      op = XEXP (XEXP (op, 0), 0);
322      /* FALLTHRU */
323
324    case SYMBOL_REF:
325      if (CONSTANT_POOL_ADDRESS_P (op))
326	return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
327      else
328	{
329	  const char *str = XSTR (op, 0);
330          return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
331	}
332
333    default:
334      break;
335    }
336
337  return 0;
338}
339
340/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
341
342int
343got_symbolic_operand (op, mode)
344     rtx op;
345     enum machine_mode mode ATTRIBUTE_UNUSED;
346{
347  switch (GET_CODE (op))
348    {
349    case CONST:
350      op = XEXP (op, 0);
351      if (GET_CODE (op) != PLUS)
352	return 0;
353      if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
354	return 0;
355      op = XEXP (op, 1);
356      if (GET_CODE (op) != CONST_INT)
357	return 0;
358
359	return 1;
360
361      /* Ok if we're not using GOT entries at all.  */
362      if (TARGET_NO_PIC || TARGET_AUTO_PIC)
363	return 1;
364
365      /* "Ok" while emitting rtl, since otherwise we won't be provided
366	 with the entire offset during emission, which makes it very
367	 hard to split the offset into high and low parts.  */
368      if (rtx_equal_function_value_matters)
369	return 1;
370
371      /* Force the low 14 bits of the constant to zero so that we do not
372	 use up so many GOT entries.  */
373      return (INTVAL (op) & 0x3fff) == 0;
374
375    case SYMBOL_REF:
376    case LABEL_REF:
377      return 1;
378
379    default:
380      break;
381    }
382  return 0;
383}
384
385/* Return 1 if OP refers to a symbol.  */
386
387int
388symbolic_operand (op, mode)
389     rtx op;
390     enum machine_mode mode ATTRIBUTE_UNUSED;
391{
392  switch (GET_CODE (op))
393    {
394    case CONST:
395    case SYMBOL_REF:
396    case LABEL_REF:
397      return 1;
398
399    default:
400      break;
401    }
402  return 0;
403}
404
405/* Return tls_model if OP refers to a TLS symbol.  */
406
407int
408tls_symbolic_operand (op, mode)
409     rtx op;
410     enum machine_mode mode ATTRIBUTE_UNUSED;
411{
412  const char *str;
413
414  if (GET_CODE (op) != SYMBOL_REF)
415    return 0;
416  str = XSTR (op, 0);
417  if (str[0] != ENCODE_SECTION_INFO_CHAR)
418    return 0;
419  switch (str[1])
420    {
421    case 'G':
422      return TLS_MODEL_GLOBAL_DYNAMIC;
423    case 'L':
424      return TLS_MODEL_LOCAL_DYNAMIC;
425    case 'i':
426      return TLS_MODEL_INITIAL_EXEC;
427    case 'l':
428      return TLS_MODEL_LOCAL_EXEC;
429    }
430  return 0;
431}
432
433
434/* Return 1 if OP refers to a function.  */
435
436int
437function_operand (op, mode)
438     rtx op;
439     enum machine_mode mode ATTRIBUTE_UNUSED;
440{
441  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
442    return 1;
443  else
444    return 0;
445}
446
447/* Return 1 if OP is setjmp or a similar function.  */
448
449/* ??? This is an unsatisfying solution.  Should rethink.  */
450
451int
452setjmp_operand (op, mode)
453     rtx op;
454     enum machine_mode mode ATTRIBUTE_UNUSED;
455{
456  const char *name;
457  int retval = 0;
458
459  if (GET_CODE (op) != SYMBOL_REF)
460    return 0;
461
462  name = XSTR (op, 0);
463
464  /* The following code is borrowed from special_function_p in calls.c.  */
465
466  /* Disregard prefix _, __ or __x.  */
467  if (name[0] == '_')
468    {
469      if (name[1] == '_' && name[2] == 'x')
470	name += 3;
471      else if (name[1] == '_')
472	name += 2;
473      else
474	name += 1;
475    }
476
477  if (name[0] == 's')
478    {
479      retval
480	= ((name[1] == 'e'
481	    && (! strcmp (name, "setjmp")
482		|| ! strcmp (name, "setjmp_syscall")))
483	   || (name[1] == 'i'
484	       && ! strcmp (name, "sigsetjmp"))
485	   || (name[1] == 'a'
486	       && ! strcmp (name, "savectx")));
487    }
488  else if ((name[0] == 'q' && name[1] == 's'
489	    && ! strcmp (name, "qsetjmp"))
490	   || (name[0] == 'v' && name[1] == 'f'
491	       && ! strcmp (name, "vfork")))
492    retval = 1;
493
494  return retval;
495}
496
497/* Return 1 if OP is a general operand, but when pic exclude symbolic
498   operands.  */
499
500/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
501   from PREDICATE_CODES.  */
502
503int
504move_operand (op, mode)
505     rtx op;
506     enum machine_mode mode;
507{
508  if (! TARGET_NO_PIC && symbolic_operand (op, mode))
509    return 0;
510
511  return general_operand (op, mode);
512}
513
514/* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
515
516int
517gr_register_operand (op, mode)
518     rtx op;
519     enum machine_mode mode;
520{
521  if (! register_operand (op, mode))
522    return 0;
523  if (GET_CODE (op) == SUBREG)
524    op = SUBREG_REG (op);
525  if (GET_CODE (op) == REG)
526    {
527      unsigned int regno = REGNO (op);
528      if (regno < FIRST_PSEUDO_REGISTER)
529	return GENERAL_REGNO_P (regno);
530    }
531  return 1;
532}
533
534/* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
535
536int
537fr_register_operand (op, mode)
538     rtx op;
539     enum machine_mode mode;
540{
541  if (! register_operand (op, mode))
542    return 0;
543  if (GET_CODE (op) == SUBREG)
544    op = SUBREG_REG (op);
545  if (GET_CODE (op) == REG)
546    {
547      unsigned int regno = REGNO (op);
548      if (regno < FIRST_PSEUDO_REGISTER)
549	return FR_REGNO_P (regno);
550    }
551  return 1;
552}
553
554/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
555
556int
557grfr_register_operand (op, mode)
558     rtx op;
559     enum machine_mode mode;
560{
561  if (! register_operand (op, mode))
562    return 0;
563  if (GET_CODE (op) == SUBREG)
564    op = SUBREG_REG (op);
565  if (GET_CODE (op) == REG)
566    {
567      unsigned int regno = REGNO (op);
568      if (regno < FIRST_PSEUDO_REGISTER)
569	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
570    }
571  return 1;
572}
573
574/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
575
576int
577gr_nonimmediate_operand (op, mode)
578     rtx op;
579     enum machine_mode mode;
580{
581  if (! nonimmediate_operand (op, mode))
582    return 0;
583  if (GET_CODE (op) == SUBREG)
584    op = SUBREG_REG (op);
585  if (GET_CODE (op) == REG)
586    {
587      unsigned int regno = REGNO (op);
588      if (regno < FIRST_PSEUDO_REGISTER)
589	return GENERAL_REGNO_P (regno);
590    }
591  return 1;
592}
593
594/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
595
596int
597fr_nonimmediate_operand (op, mode)
598     rtx op;
599     enum machine_mode mode;
600{
601  if (! nonimmediate_operand (op, mode))
602    return 0;
603  if (GET_CODE (op) == SUBREG)
604    op = SUBREG_REG (op);
605  if (GET_CODE (op) == REG)
606    {
607      unsigned int regno = REGNO (op);
608      if (regno < FIRST_PSEUDO_REGISTER)
609	return FR_REGNO_P (regno);
610    }
611  return 1;
612}
613
614/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
615
616int
617grfr_nonimmediate_operand (op, mode)
618     rtx op;
619     enum machine_mode mode;
620{
621  if (! nonimmediate_operand (op, mode))
622    return 0;
623  if (GET_CODE (op) == SUBREG)
624    op = SUBREG_REG (op);
625  if (GET_CODE (op) == REG)
626    {
627      unsigned int regno = REGNO (op);
628      if (regno < FIRST_PSEUDO_REGISTER)
629	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
630    }
631  return 1;
632}
633
634/* Return 1 if OP is a GR register operand, or zero.  */
635
636int
637gr_reg_or_0_operand (op, mode)
638     rtx op;
639     enum machine_mode mode;
640{
641  return (op == const0_rtx || gr_register_operand (op, mode));
642}
643
644/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
645
646int
647gr_reg_or_5bit_operand (op, mode)
648     rtx op;
649     enum machine_mode mode;
650{
651  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
652	  || GET_CODE (op) == CONSTANT_P_RTX
653	  || gr_register_operand (op, mode));
654}
655
656/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
657
658int
659gr_reg_or_6bit_operand (op, mode)
660     rtx op;
661     enum machine_mode mode;
662{
663  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
664	  || GET_CODE (op) == CONSTANT_P_RTX
665	  || gr_register_operand (op, mode));
666}
667
668/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
669
670int
671gr_reg_or_8bit_operand (op, mode)
672     rtx op;
673     enum machine_mode mode;
674{
675  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
676	  || GET_CODE (op) == CONSTANT_P_RTX
677	  || gr_register_operand (op, mode));
678}
679
680/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
681
682int
683grfr_reg_or_8bit_operand (op, mode)
684     rtx op;
685     enum machine_mode mode;
686{
687  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
688	  || GET_CODE (op) == CONSTANT_P_RTX
689	  || grfr_register_operand (op, mode));
690}
691
692/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
693   operand.  */
694
695int
696gr_reg_or_8bit_adjusted_operand (op, mode)
697     rtx op;
698     enum machine_mode mode;
699{
700  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
701	  || GET_CODE (op) == CONSTANT_P_RTX
702	  || gr_register_operand (op, mode));
703}
704
705/* Return 1 if OP is a register operand, or is valid for both an 8 bit
706   immediate and an 8 bit adjusted immediate operand.  This is necessary
707   because when we emit a compare, we don't know what the condition will be,
708   so we need the union of the immediates accepted by GT and LT.  */
709
710int
711gr_reg_or_8bit_and_adjusted_operand (op, mode)
712     rtx op;
713     enum machine_mode mode;
714{
715  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
716	   && CONST_OK_FOR_L (INTVAL (op)))
717	  || GET_CODE (op) == CONSTANT_P_RTX
718	  || gr_register_operand (op, mode));
719}
720
721/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
722
723int
724gr_reg_or_14bit_operand (op, mode)
725     rtx op;
726     enum machine_mode mode;
727{
728  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
729	  || GET_CODE (op) == CONSTANT_P_RTX
730	  || gr_register_operand (op, mode));
731}
732
733/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
734
735int
736gr_reg_or_22bit_operand (op, mode)
737     rtx op;
738     enum machine_mode mode;
739{
740  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
741	  || GET_CODE (op) == CONSTANT_P_RTX
742	  || gr_register_operand (op, mode));
743}
744
745/* Return 1 if OP is a 6 bit immediate operand.  */
746
747int
748shift_count_operand (op, mode)
749     rtx op;
750     enum machine_mode mode ATTRIBUTE_UNUSED;
751{
752  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
753	  || GET_CODE (op) == CONSTANT_P_RTX);
754}
755
756/* Return 1 if OP is a 5 bit immediate operand.  */
757
758int
759shift_32bit_count_operand (op, mode)
760     rtx op;
761     enum machine_mode mode ATTRIBUTE_UNUSED;
762{
763  return ((GET_CODE (op) == CONST_INT
764	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
765	  || GET_CODE (op) == CONSTANT_P_RTX);
766}
767
768/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
769
770int
771shladd_operand (op, mode)
772     rtx op;
773     enum machine_mode mode ATTRIBUTE_UNUSED;
774{
775  return (GET_CODE (op) == CONST_INT
776	  && (INTVAL (op) == 2 || INTVAL (op) == 4
777	      || INTVAL (op) == 8 || INTVAL (op) == 16));
778}
779
780/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
781
782int
783fetchadd_operand (op, mode)
784     rtx op;
785     enum machine_mode mode ATTRIBUTE_UNUSED;
786{
787  return (GET_CODE (op) == CONST_INT
788          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
789              INTVAL (op) == -4  || INTVAL (op) == -1 ||
790              INTVAL (op) == 1   || INTVAL (op) == 4  ||
791              INTVAL (op) == 8   || INTVAL (op) == 16));
792}
793
794/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
795
796int
797fr_reg_or_fp01_operand (op, mode)
798     rtx op;
799     enum machine_mode mode;
800{
801  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
802	  || fr_register_operand (op, mode));
803}
804
805/* Like nonimmediate_operand, but don't allow MEMs that try to use a
806   POST_MODIFY with a REG as displacement.  */
807
808int
809destination_operand (op, mode)
810     rtx op;
811     enum machine_mode mode;
812{
813  if (! nonimmediate_operand (op, mode))
814    return 0;
815  if (GET_CODE (op) == MEM
816      && GET_CODE (XEXP (op, 0)) == POST_MODIFY
817      && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
818    return 0;
819  return 1;
820}
821
822/* Like memory_operand, but don't allow post-increments.  */
823
824int
825not_postinc_memory_operand (op, mode)
826     rtx op;
827     enum machine_mode mode;
828{
829  return (memory_operand (op, mode)
830	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
831}
832
833/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
834   signed immediate operand.  */
835
836int
837normal_comparison_operator (op, mode)
838    register rtx op;
839    enum machine_mode mode;
840{
841  enum rtx_code code = GET_CODE (op);
842  return ((mode == VOIDmode || GET_MODE (op) == mode)
843	  && (code == EQ || code == NE
844	      || code == GT || code == LE || code == GTU || code == LEU));
845}
846
847/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
848   signed immediate operand.  */
849
850int
851adjusted_comparison_operator (op, mode)
852    register rtx op;
853    enum machine_mode mode;
854{
855  enum rtx_code code = GET_CODE (op);
856  return ((mode == VOIDmode || GET_MODE (op) == mode)
857	  && (code == LT || code == GE || code == LTU || code == GEU));
858}
859
860/* Return 1 if this is a signed inequality operator.  */
861
862int
863signed_inequality_operator (op, mode)
864    register rtx op;
865    enum machine_mode mode;
866{
867  enum rtx_code code = GET_CODE (op);
868  return ((mode == VOIDmode || GET_MODE (op) == mode)
869	  && (code == GE || code == GT
870	      || code == LE || code == LT));
871}
872
873/* Return 1 if this operator is valid for predication.  */
874
875int
876predicate_operator (op, mode)
877    register rtx op;
878    enum machine_mode mode;
879{
880  enum rtx_code code = GET_CODE (op);
881  return ((GET_MODE (op) == mode || mode == VOIDmode)
882	  && (code == EQ || code == NE));
883}
884
885/* Return 1 if this operator can be used in a conditional operation.  */
886
887int
888condop_operator (op, mode)
889    register rtx op;
890    enum machine_mode mode;
891{
892  enum rtx_code code = GET_CODE (op);
893  return ((GET_MODE (op) == mode || mode == VOIDmode)
894	  && (code == PLUS || code == MINUS || code == AND
895	      || code == IOR || code == XOR));
896}
897
898/* Return 1 if this is the ar.lc register.  */
899
900int
901ar_lc_reg_operand (op, mode)
902     register rtx op;
903     enum machine_mode mode;
904{
905  return (GET_MODE (op) == DImode
906	  && (mode == DImode || mode == VOIDmode)
907	  && GET_CODE (op) == REG
908	  && REGNO (op) == AR_LC_REGNUM);
909}
910
911/* Return 1 if this is the ar.ccv register.  */
912
913int
914ar_ccv_reg_operand (op, mode)
915     register rtx op;
916     enum machine_mode mode;
917{
918  return ((GET_MODE (op) == mode || mode == VOIDmode)
919	  && GET_CODE (op) == REG
920	  && REGNO (op) == AR_CCV_REGNUM);
921}
922
923/* Return 1 if this is the ar.pfs register.  */
924
925int
926ar_pfs_reg_operand (op, mode)
927     register rtx op;
928     enum machine_mode mode;
929{
930  return ((GET_MODE (op) == mode || mode == VOIDmode)
931	  && GET_CODE (op) == REG
932	  && REGNO (op) == AR_PFS_REGNUM);
933}
934
935/* Like general_operand, but don't allow (mem (addressof)).  */
936
937int
938general_tfmode_operand (op, mode)
939     rtx op;
940     enum machine_mode mode;
941{
942  if (! general_operand (op, mode))
943    return 0;
944  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
945    return 0;
946  return 1;
947}
948
949/* Similarly.  */
950
951int
952destination_tfmode_operand (op, mode)
953     rtx op;
954     enum machine_mode mode;
955{
956  if (! destination_operand (op, mode))
957    return 0;
958  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
959    return 0;
960  return 1;
961}
962
963/* Similarly.  */
964
965int
966tfreg_or_fp01_operand (op, mode)
967     rtx op;
968     enum machine_mode mode;
969{
970  if (GET_CODE (op) == SUBREG)
971    return 0;
972  return fr_reg_or_fp01_operand (op, mode);
973}
974
975/* Return 1 if OP is valid as a base register in a reg + offset address.  */
976
977int
978basereg_operand (op, mode)
979     rtx op;
980     enum machine_mode mode;
981{
982  /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
983     checks from pa.c basereg_operand as well?  Seems to be OK without them
984     in test runs.  */
985
986  return (register_operand (op, mode) &&
987	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
988}
989
990/* Return 1 if the operands of a move are ok.  */
991
992int
993ia64_move_ok (dst, src)
994     rtx dst, src;
995{
996  /* If we're under init_recog_no_volatile, we'll not be able to use
997     memory_operand.  So check the code directly and don't worry about
998     the validity of the underlying address, which should have been
999     checked elsewhere anyway.  */
1000  if (GET_CODE (dst) != MEM)
1001    return 1;
1002  if (GET_CODE (src) == MEM)
1003    return 0;
1004  if (register_operand (src, VOIDmode))
1005    return 1;
1006
1007  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1008  if (INTEGRAL_MODE_P (GET_MODE (dst)))
1009    return src == const0_rtx;
1010  else
1011    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1012}
1013
1014/* Return 0 if we are doing C++ code.  This optimization fails with
1015   C++ because of GNAT c++/6685.  */
1016
1017int
1018addp4_optimize_ok (op1, op2)
1019     rtx op1, op2;
1020{
1021
1022  if (!strcmp (lang_hooks.name, "GNU C++"))
1023    return 0;
1024
1025  return (basereg_operand (op1, GET_MODE(op1)) !=
1026	  basereg_operand (op2, GET_MODE(op2)));
1027}
1028
1029/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
1030   Return the length of the field, or <= 0 on failure.  */
1031
1032int
1033ia64_depz_field_mask (rop, rshift)
1034     rtx rop, rshift;
1035{
1036  unsigned HOST_WIDE_INT op = INTVAL (rop);
1037  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1038
1039  /* Get rid of the zero bits we're shifting in.  */
1040  op >>= shift;
1041
1042  /* We must now have a solid block of 1's at bit 0.  */
1043  return exact_log2 (op + 1);
1044}
1045
1046/* Expand a symbolic constant load.  */
1047/* ??? Should generalize this, so that we can also support 32 bit pointers.  */
1048
1049void
1050ia64_expand_load_address (dest, src, scratch)
1051      rtx dest, src, scratch;
1052{
1053  rtx temp;
1054
1055  /* The destination could be a MEM during initial rtl generation,
1056     which isn't a valid destination for the PIC load address patterns.  */
1057  if (! register_operand (dest, DImode))
1058    if (! scratch || ! register_operand (scratch, DImode))
1059      temp = gen_reg_rtx (DImode);
1060    else
1061      temp = scratch;
1062  else
1063    temp = dest;
1064
1065  if (tls_symbolic_operand (src, Pmode))
1066    abort ();
1067
1068  if (TARGET_AUTO_PIC)
1069    emit_insn (gen_load_gprel64 (temp, src));
1070  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1071    emit_insn (gen_load_fptr (temp, src));
1072  else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1073           && sdata_symbolic_operand (src, VOIDmode))
1074    emit_insn (gen_load_gprel (temp, src));
1075  else if (GET_CODE (src) == CONST
1076	   && GET_CODE (XEXP (src, 0)) == PLUS
1077	   && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1078	   && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1079    {
1080      rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1081      rtx sym = XEXP (XEXP (src, 0), 0);
1082      HOST_WIDE_INT ofs, hi, lo;
1083
1084      /* Split the offset into a sign extended 14-bit low part
1085	 and a complementary high part.  */
1086      ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1087      lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1088      hi = ofs - lo;
1089
1090      if (! scratch)
1091	scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1092
1093      emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1094				  scratch));
1095      emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1096    }
1097  else
1098    {
1099      rtx insn;
1100      if (! scratch)
1101	scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1102
1103      insn = emit_insn (gen_load_symptr (temp, src, scratch));
1104#ifdef POINTERS_EXTEND_UNSIGNED
1105      if (GET_MODE (temp) != GET_MODE (src))
1106	src = convert_memory_address (GET_MODE (temp), src);
1107#endif
1108      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1109    }
1110
1111  if (temp != dest)
1112    {
1113      if (GET_MODE (dest) != GET_MODE (temp))
1114	temp = convert_to_mode (GET_MODE (dest), temp, 0);
1115      emit_move_insn (dest, temp);
1116    }
1117}
1118
1119static GTY(()) rtx gen_tls_tga;
1120static rtx
1121gen_tls_get_addr ()
1122{
1123  if (!gen_tls_tga)
1124    {
1125      gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1126     }
1127  return gen_tls_tga;
1128}
1129
1130static GTY(()) rtx thread_pointer_rtx;
1131static rtx
1132gen_thread_pointer ()
1133{
1134  if (!thread_pointer_rtx)
1135    {
1136      thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1137      RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1138    }
1139  return thread_pointer_rtx;
1140}
1141
1142rtx
1143ia64_expand_move (op0, op1)
1144     rtx op0, op1;
1145{
1146  enum machine_mode mode = GET_MODE (op0);
1147
1148  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1149    op1 = force_reg (mode, op1);
1150
1151  if (mode == Pmode || mode == ptr_mode)
1152    {
1153      enum tls_model tls_kind;
1154      if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1155	{
1156	  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1157
1158	  switch (tls_kind)
1159	    {
1160	    case TLS_MODEL_GLOBAL_DYNAMIC:
1161	      start_sequence ();
1162
1163	      tga_op1 = gen_reg_rtx (Pmode);
1164	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1165	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1166	      RTX_UNCHANGING_P (tga_op1) = 1;
1167
1168	      tga_op2 = gen_reg_rtx (Pmode);
1169	      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1170	      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1171	      RTX_UNCHANGING_P (tga_op2) = 1;
1172
1173	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1174						 LCT_CONST, Pmode, 2, tga_op1,
1175						 Pmode, tga_op2, Pmode);
1176
1177	      insns = get_insns ();
1178	      end_sequence ();
1179
1180	      emit_libcall_block (insns, op0, tga_ret, op1);
1181	      return NULL_RTX;
1182
1183	    case TLS_MODEL_LOCAL_DYNAMIC:
1184	      /* ??? This isn't the completely proper way to do local-dynamic
1185		 If the call to __tls_get_addr is used only by a single symbol,
1186		 then we should (somehow) move the dtprel to the second arg
1187		 to avoid the extra add.  */
1188	      start_sequence ();
1189
1190	      tga_op1 = gen_reg_rtx (Pmode);
1191	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1192	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1193	      RTX_UNCHANGING_P (tga_op1) = 1;
1194
1195	      tga_op2 = const0_rtx;
1196
1197	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1198						 LCT_CONST, Pmode, 2, tga_op1,
1199						 Pmode, tga_op2, Pmode);
1200
1201	      insns = get_insns ();
1202	      end_sequence ();
1203
1204	      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1205					UNSPEC_LD_BASE);
1206	      tmp = gen_reg_rtx (Pmode);
1207	      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1208
1209	      if (register_operand (op0, Pmode))
1210		tga_ret = op0;
1211	      else
1212		tga_ret = gen_reg_rtx (Pmode);
1213	      if (TARGET_TLS64)
1214		{
1215		  emit_insn (gen_load_dtprel (tga_ret, op1));
1216		  emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1217		}
1218	      else
1219		emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1220	      if (tga_ret == op0)
1221		return NULL_RTX;
1222	      op1 = tga_ret;
1223	      break;
1224
1225	    case TLS_MODEL_INITIAL_EXEC:
1226	      tmp = gen_reg_rtx (Pmode);
1227	      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1228	      tmp = gen_rtx_MEM (Pmode, tmp);
1229	      RTX_UNCHANGING_P (tmp) = 1;
1230	      tmp = force_reg (Pmode, tmp);
1231
1232	      if (register_operand (op0, Pmode))
1233		op1 = op0;
1234	      else
1235		op1 = gen_reg_rtx (Pmode);
1236	      emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1237	      if (op1 == op0)
1238		return NULL_RTX;
1239	      break;
1240
1241	    case TLS_MODEL_LOCAL_EXEC:
1242	      if (register_operand (op0, Pmode))
1243		tmp = op0;
1244	      else
1245		tmp = gen_reg_rtx (Pmode);
1246	      if (TARGET_TLS64)
1247		{
1248		  emit_insn (gen_load_tprel (tmp, op1));
1249		  emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1250		}
1251	      else
1252		emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1253	      if (tmp == op0)
1254		return NULL_RTX;
1255	      op1 = tmp;
1256	      break;
1257
1258	    default:
1259	      abort ();
1260	    }
1261	}
1262      else if (!TARGET_NO_PIC &&
1263	       (symbolic_operand (op1, Pmode) ||
1264		symbolic_operand (op1, ptr_mode)))
1265	{
1266	  /* Before optimization starts, delay committing to any particular
1267	     type of PIC address load.  If this function gets deferred, we
1268	     may acquire information that changes the value of the
1269	     sdata_symbolic_operand predicate.
1270
1271	     But don't delay for function pointers.  Loading a function address
1272	     actually loads the address of the descriptor not the function.
1273	     If we represent these as SYMBOL_REFs, then they get cse'd with
1274	     calls, and we end up with calls to the descriptor address instead
1275	     of calls to the function address.  Functions are not candidates
1276	     for sdata anyways.
1277
1278	     Don't delay for LABEL_REF because the splitter loses REG_LABEL
1279	     notes.  Don't delay for pool addresses on general principals;
1280	     they'll never become non-local behind our back.  */
1281
1282	  if (rtx_equal_function_value_matters
1283	      && GET_CODE (op1) != LABEL_REF
1284	      && ! (GET_CODE (op1) == SYMBOL_REF
1285		    && (SYMBOL_REF_FLAG (op1)
1286			|| CONSTANT_POOL_ADDRESS_P (op1)
1287			|| STRING_POOL_ADDRESS_P (op1))))
1288	    if (GET_MODE (op1) == DImode)
1289	      emit_insn (gen_movdi_symbolic (op0, op1));
1290	    else
1291	      emit_insn (gen_movsi_symbolic (op0, op1));
1292	  else
1293	    ia64_expand_load_address (op0, op1, NULL_RTX);
1294	  return NULL_RTX;
1295	}
1296    }
1297
1298  return op1;
1299}
1300
1301/* Split a post-reload TImode reference into two DImode components.  */
1302
1303rtx
1304ia64_split_timode (out, in, scratch)
1305     rtx out[2];
1306     rtx in, scratch;
1307{
1308  switch (GET_CODE (in))
1309    {
1310    case REG:
1311      out[0] = gen_rtx_REG (DImode, REGNO (in));
1312      out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1313      return NULL_RTX;
1314
1315    case MEM:
1316      {
1317	rtx base = XEXP (in, 0);
1318
1319	switch (GET_CODE (base))
1320	  {
1321	  case REG:
1322	    out[0] = adjust_address (in, DImode, 0);
1323	    break;
1324	  case POST_MODIFY:
1325	    base = XEXP (base, 0);
1326	    out[0] = adjust_address (in, DImode, 0);
1327	    break;
1328
1329	  /* Since we're changing the mode, we need to change to POST_MODIFY
1330	     as well to preserve the size of the increment.  Either that or
1331	     do the update in two steps, but we've already got this scratch
1332	     register handy so let's use it.  */
1333	  case POST_INC:
1334	    base = XEXP (base, 0);
1335	    out[0]
1336	      = change_address (in, DImode,
1337				gen_rtx_POST_MODIFY
1338				(Pmode, base, plus_constant (base, 16)));
1339	    break;
1340	  case POST_DEC:
1341	    base = XEXP (base, 0);
1342	    out[0]
1343	      = change_address (in, DImode,
1344				gen_rtx_POST_MODIFY
1345				(Pmode, base, plus_constant (base, -16)));
1346	    break;
1347	  default:
1348	    abort ();
1349	  }
1350
1351	if (scratch == NULL_RTX)
1352	  abort ();
1353	out[1] = change_address (in, DImode, scratch);
1354	return gen_adddi3 (scratch, base, GEN_INT (8));
1355      }
1356
1357    case CONST_INT:
1358    case CONST_DOUBLE:
1359      split_double (in, &out[0], &out[1]);
1360      return NULL_RTX;
1361
1362    default:
1363      abort ();
1364    }
1365}
1366
1367/* ??? Fixing GR->FR TFmode moves during reload is hard.  You need to go
1368   through memory plus an extra GR scratch register.  Except that you can
1369   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1370   SECONDARY_RELOAD_CLASS, but not both.
1371
1372   We got into problems in the first place by allowing a construct like
1373   (subreg:TF (reg:TI)), which we got from a union containing a long double.
1374   This solution attempts to prevent this situation from occurring.  When
1375   we see something like the above, we spill the inner register to memory.  */
1376
1377rtx
1378spill_tfmode_operand (in, force)
1379     rtx in;
1380     int force;
1381{
1382  if (GET_CODE (in) == SUBREG
1383      && GET_MODE (SUBREG_REG (in)) == TImode
1384      && GET_CODE (SUBREG_REG (in)) == REG)
1385    {
1386      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
1387      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1388    }
1389  else if (force && GET_CODE (in) == REG)
1390    {
1391      rtx mem = gen_mem_addressof (in, NULL_TREE, true);
1392      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1393    }
1394  else if (GET_CODE (in) == MEM
1395	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1396    return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1397  else
1398    return in;
1399}
1400
1401/* Emit comparison instruction if necessary, returning the expression
1402   that holds the compare result in the proper mode.  */
1403
1404rtx
1405ia64_expand_compare (code, mode)
1406     enum rtx_code code;
1407     enum machine_mode mode;
1408{
1409  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1410  rtx cmp;
1411
1412  /* If we have a BImode input, then we already have a compare result, and
1413     do not need to emit another comparison.  */
1414  if (GET_MODE (op0) == BImode)
1415    {
1416      if ((code == NE || code == EQ) && op1 == const0_rtx)
1417	cmp = op0;
1418      else
1419	abort ();
1420    }
1421  else
1422    {
1423      cmp = gen_reg_rtx (BImode);
1424      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1425			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1426      code = NE;
1427    }
1428
1429  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1430}
1431
1432/* Emit the appropriate sequence for a call.  */
1433void
1434ia64_expand_call (retval, addr, nextarg, sibcall_p)
1435     rtx retval;
1436     rtx addr;
1437     rtx nextarg ATTRIBUTE_UNUSED;
1438     int sibcall_p;
1439{
1440  rtx insn, b0;
1441
1442  addr = XEXP (addr, 0);
1443  b0 = gen_rtx_REG (DImode, R_BR (0));
1444
1445  /* ??? Should do this for functions known to bind local too.  */
1446  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1447    {
1448      if (sibcall_p)
1449	insn = gen_sibcall_nogp (addr);
1450      else if (! retval)
1451	insn = gen_call_nogp (addr, b0);
1452      else
1453	insn = gen_call_value_nogp (retval, addr, b0);
1454      insn = emit_call_insn (insn);
1455    }
1456  else
1457    {
1458      if (sibcall_p)
1459	insn = gen_sibcall_gp (addr);
1460      else if (! retval)
1461	insn = gen_call_gp (addr, b0);
1462      else
1463	insn = gen_call_value_gp (retval, addr, b0);
1464      insn = emit_call_insn (insn);
1465
1466      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1467    }
1468
1469  if (sibcall_p)
1470    {
1471      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1472      use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
1473	       gen_rtx_REG (DImode, AR_PFS_REGNUM));
1474    }
1475}
1476void
1477ia64_reload_gp ()
1478{
1479  rtx tmp;
1480
1481  if (current_frame_info.reg_save_gp)
1482    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1483  else
1484    {
1485      HOST_WIDE_INT offset;
1486
1487      offset = (current_frame_info.spill_cfa_off
1488	        + current_frame_info.spill_size);
1489      if (frame_pointer_needed)
1490        {
1491          tmp = hard_frame_pointer_rtx;
1492          offset = -offset;
1493        }
1494      else
1495        {
1496          tmp = stack_pointer_rtx;
1497          offset = current_frame_info.total_size - offset;
1498        }
1499
1500      if (CONST_OK_FOR_I (offset))
1501        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1502			       tmp, GEN_INT (offset)));
1503      else
1504        {
1505          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1506          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1507			         pic_offset_table_rtx, tmp));
1508        }
1509
1510      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1511    }
1512
1513  emit_move_insn (pic_offset_table_rtx, tmp);
1514}
1515
1516void
1517ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1518		 noreturn_p, sibcall_p)
1519     rtx retval, addr, retaddr, scratch_r, scratch_b;
1520     int noreturn_p, sibcall_p;
1521{
1522  rtx insn;
1523  bool is_desc = false;
1524
1525  /* If we find we're calling through a register, then we're actually
1526     calling through a descriptor, so load up the values.  */
1527  if (REG_P (addr))
1528    {
1529      rtx tmp;
1530      bool addr_dead_p;
1531
1532      /* ??? We are currently constrained to *not* use peep2, because
1533	 we can legitimiately change the global lifetime of the GP
1534	 (in the form of killing where previously live).  This is
1535	 because a call through a descriptor doesn't use the previous
1536	 value of the GP, while a direct call does, and we do not
1537	 commit to either form until the split here.
1538
1539	 That said, this means that we lack precise life info for
1540	 whether ADDR is dead after this call.  This is not terribly
1541	 important, since we can fix things up essentially for free
1542	 with the POST_DEC below, but it's nice to not use it when we
1543	 can immediately tell it's not necessary.  */
1544      addr_dead_p = ((noreturn_p || sibcall_p
1545		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1546					    REGNO (addr)))
1547		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1548
1549      /* Load the code address into scratch_b.  */
1550      tmp = gen_rtx_POST_INC (Pmode, addr);
1551      tmp = gen_rtx_MEM (Pmode, tmp);
1552      emit_move_insn (scratch_r, tmp);
1553      emit_move_insn (scratch_b, scratch_r);
1554
1555      /* Load the GP address.  If ADDR is not dead here, then we must
1556	 revert the change made above via the POST_INCREMENT.  */
1557      if (!addr_dead_p)
1558	tmp = gen_rtx_POST_DEC (Pmode, addr);
1559      else
1560	tmp = addr;
1561      tmp = gen_rtx_MEM (Pmode, tmp);
1562      emit_move_insn (pic_offset_table_rtx, tmp);
1563
1564      is_desc = true;
1565      addr = scratch_b;
1566    }
1567
1568  if (sibcall_p)
1569    insn = gen_sibcall_nogp (addr);
1570  else if (retval)
1571    insn = gen_call_value_nogp (retval, addr, retaddr);
1572  else
1573    insn = gen_call_nogp (addr, retaddr);
1574  emit_call_insn (insn);
1575
1576  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1577    ia64_reload_gp ();
1578}
1579
1580/* Begin the assembly file.  */
1581
1582void
1583emit_safe_across_calls (f)
1584     FILE *f;
1585{
1586  unsigned int rs, re;
1587  int out_state;
1588
1589  rs = 1;
1590  out_state = 0;
1591  while (1)
1592    {
1593      while (rs < 64 && call_used_regs[PR_REG (rs)])
1594	rs++;
1595      if (rs >= 64)
1596	break;
1597      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1598	continue;
1599      if (out_state == 0)
1600	{
1601	  fputs ("\t.pred.safe_across_calls ", f);
1602	  out_state = 1;
1603	}
1604      else
1605	fputc (',', f);
1606      if (re == rs + 1)
1607	fprintf (f, "p%u", rs);
1608      else
1609	fprintf (f, "p%u-p%u", rs, re - 1);
1610      rs = re + 1;
1611    }
1612  if (out_state)
1613    fputc ('\n', f);
1614}
1615
1616/* Helper function for ia64_compute_frame_size: find an appropriate general
1617   register to spill some special register to.  SPECIAL_SPILL_MASK contains
1618   bits in GR0 to GR31 that have already been allocated by this routine.
1619   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1620
1621static int
1622find_gr_spill (try_locals)
1623     int try_locals;
1624{
1625  int regno;
1626
1627  /* If this is a leaf function, first try an otherwise unused
1628     call-clobbered register.  */
1629  if (current_function_is_leaf)
1630    {
1631      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1632	if (! regs_ever_live[regno]
1633	    && call_used_regs[regno]
1634	    && ! fixed_regs[regno]
1635	    && ! global_regs[regno]
1636	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1637	  {
1638	    current_frame_info.gr_used_mask |= 1 << regno;
1639	    return regno;
1640	  }
1641    }
1642
1643  if (try_locals)
1644    {
1645      regno = current_frame_info.n_local_regs;
1646      /* If there is a frame pointer, then we can't use loc79, because
1647	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1648	 reg_name switching code in ia64_expand_prologue.  */
1649      if (regno < (80 - frame_pointer_needed))
1650	{
1651	  current_frame_info.n_local_regs = regno + 1;
1652	  return LOC_REG (0) + regno;
1653	}
1654    }
1655
1656  /* Failed to find a general register to spill to.  Must use stack.  */
1657  return 0;
1658}
1659
1660/* In order to make for nice schedules, we try to allocate every temporary
1661   to a different register.  We must of course stay away from call-saved,
1662   fixed, and global registers.  We must also stay away from registers
1663   allocated in current_frame_info.gr_used_mask, since those include regs
1664   used all through the prologue.
1665
1666   Any register allocated here must be used immediately.  The idea is to
1667   aid scheduling, not to solve data flow problems.  */
1668
1669static int last_scratch_gr_reg;
1670
1671static int
1672next_scratch_gr_reg ()
1673{
1674  int i, regno;
1675
1676  for (i = 0; i < 32; ++i)
1677    {
1678      regno = (last_scratch_gr_reg + i + 1) & 31;
1679      if (call_used_regs[regno]
1680	  && ! fixed_regs[regno]
1681	  && ! global_regs[regno]
1682	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1683	{
1684	  last_scratch_gr_reg = regno;
1685	  return regno;
1686	}
1687    }
1688
1689  /* There must be _something_ available.  */
1690  abort ();
1691}
1692
1693/* Helper function for ia64_compute_frame_size, called through
1694   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
1695
1696static void
1697mark_reg_gr_used_mask (reg, data)
1698     rtx reg;
1699     void *data ATTRIBUTE_UNUSED;
1700{
1701  unsigned int regno = REGNO (reg);
1702  if (regno < 32)
1703    {
1704      unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1705      for (i = 0; i < n; ++i)
1706	current_frame_info.gr_used_mask |= 1 << (regno + i);
1707    }
1708}
1709
1710/* Returns the number of bytes offset between the frame pointer and the stack
1711   pointer for the current function.  SIZE is the number of bytes of space
1712   needed for local variables.  */
1713
1714static void
1715ia64_compute_frame_size (size)
1716     HOST_WIDE_INT size;
1717{
1718  HOST_WIDE_INT total_size;
1719  HOST_WIDE_INT spill_size = 0;
1720  HOST_WIDE_INT extra_spill_size = 0;
1721  HOST_WIDE_INT pretend_args_size;
1722  HARD_REG_SET mask;
1723  int n_spilled = 0;
1724  int spilled_gr_p = 0;
1725  int spilled_fr_p = 0;
1726  unsigned int regno;
1727  int i;
1728
1729  if (current_frame_info.initialized)
1730    return;
1731
1732  memset (&current_frame_info, 0, sizeof current_frame_info);
1733  CLEAR_HARD_REG_SET (mask);
1734
1735  /* Don't allocate scratches to the return register.  */
1736  diddle_return_value (mark_reg_gr_used_mask, NULL);
1737
1738  /* Don't allocate scratches to the EH scratch registers.  */
1739  if (cfun->machine->ia64_eh_epilogue_sp)
1740    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1741  if (cfun->machine->ia64_eh_epilogue_bsp)
1742    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1743
1744  /* Find the size of the register stack frame.  We have only 80 local
1745     registers, because we reserve 8 for the inputs and 8 for the
1746     outputs.  */
1747
1748  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1749     since we'll be adjusting that down later.  */
1750  regno = LOC_REG (78) + ! frame_pointer_needed;
1751  for (; regno >= LOC_REG (0); regno--)
1752    if (regs_ever_live[regno])
1753      break;
1754  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1755
1756  /* For functions marked with the syscall_linkage attribute, we must mark
1757     all eight input registers as in use, so that locals aren't visible to
1758     the caller.  */
1759
1760  if (cfun->machine->n_varargs > 0
1761      || lookup_attribute ("syscall_linkage",
1762			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1763    current_frame_info.n_input_regs = 8;
1764  else
1765    {
1766      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1767	if (regs_ever_live[regno])
1768	  break;
1769      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1770    }
1771
1772  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1773    if (regs_ever_live[regno])
1774      break;
1775  i = regno - OUT_REG (0) + 1;
1776
1777  /* When -p profiling, we need one output register for the mcount argument.
1778     Likwise for -a profiling for the bb_init_func argument.  For -ax
1779     profiling, we need two output registers for the two bb_init_trace_func
1780     arguments.  */
1781  if (current_function_profile)
1782    i = MAX (i, 1);
1783  current_frame_info.n_output_regs = i;
1784
1785  /* ??? No rotating register support yet.  */
1786  current_frame_info.n_rotate_regs = 0;
1787
1788  /* Discover which registers need spilling, and how much room that
1789     will take.  Begin with floating point and general registers,
1790     which will always wind up on the stack.  */
1791
1792  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1793    if (regs_ever_live[regno] && ! call_used_regs[regno])
1794      {
1795	SET_HARD_REG_BIT (mask, regno);
1796	spill_size += 16;
1797	n_spilled += 1;
1798	spilled_fr_p = 1;
1799      }
1800
1801  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1802    if (regs_ever_live[regno] && ! call_used_regs[regno])
1803      {
1804	SET_HARD_REG_BIT (mask, regno);
1805	spill_size += 8;
1806	n_spilled += 1;
1807	spilled_gr_p = 1;
1808      }
1809
1810  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1811    if (regs_ever_live[regno] && ! call_used_regs[regno])
1812      {
1813	SET_HARD_REG_BIT (mask, regno);
1814	spill_size += 8;
1815	n_spilled += 1;
1816      }
1817
1818  /* Now come all special registers that might get saved in other
1819     general registers.  */
1820
1821  if (frame_pointer_needed)
1822    {
1823      current_frame_info.reg_fp = find_gr_spill (1);
1824      /* If we did not get a register, then we take LOC79.  This is guaranteed
1825	 to be free, even if regs_ever_live is already set, because this is
1826	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
1827	 as we don't count loc79 above.  */
1828      if (current_frame_info.reg_fp == 0)
1829	{
1830	  current_frame_info.reg_fp = LOC_REG (79);
1831	  current_frame_info.n_local_regs++;
1832	}
1833    }
1834
1835  if (! current_function_is_leaf)
1836    {
1837      /* Emit a save of BR0 if we call other functions.  Do this even
1838	 if this function doesn't return, as EH depends on this to be
1839	 able to unwind the stack.  */
1840      SET_HARD_REG_BIT (mask, BR_REG (0));
1841
1842      current_frame_info.reg_save_b0 = find_gr_spill (1);
1843      if (current_frame_info.reg_save_b0 == 0)
1844	{
1845	  spill_size += 8;
1846	  n_spilled += 1;
1847	}
1848
1849      /* Similarly for ar.pfs.  */
1850      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1851      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1852      if (current_frame_info.reg_save_ar_pfs == 0)
1853	{
1854	  extra_spill_size += 8;
1855	  n_spilled += 1;
1856	}
1857
1858      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
1859	 registers are clobbered, so we fall back to the stack.  */
1860      current_frame_info.reg_save_gp
1861	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1862      if (current_frame_info.reg_save_gp == 0)
1863	{
1864	  SET_HARD_REG_BIT (mask, GR_REG (1));
1865	  spill_size += 8;
1866	  n_spilled += 1;
1867	}
1868    }
1869  else
1870    {
1871      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1872	{
1873	  SET_HARD_REG_BIT (mask, BR_REG (0));
1874	  spill_size += 8;
1875	  n_spilled += 1;
1876	}
1877
1878      if (regs_ever_live[AR_PFS_REGNUM])
1879	{
1880	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1881	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1882	  if (current_frame_info.reg_save_ar_pfs == 0)
1883	    {
1884	      extra_spill_size += 8;
1885	      n_spilled += 1;
1886	    }
1887	}
1888    }
1889
1890  /* Unwind descriptor hackery: things are most efficient if we allocate
1891     consecutive GR save registers for RP, PFS, FP in that order. However,
1892     it is absolutely critical that FP get the only hard register that's
1893     guaranteed to be free, so we allocated it first.  If all three did
1894     happen to be allocated hard regs, and are consecutive, rearrange them
1895     into the preferred order now.  */
1896  if (current_frame_info.reg_fp != 0
1897      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1898      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1899    {
1900      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1901      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1902      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1903    }
1904
1905  /* See if we need to store the predicate register block.  */
1906  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1907    if (regs_ever_live[regno] && ! call_used_regs[regno])
1908      break;
1909  if (regno <= PR_REG (63))
1910    {
1911      SET_HARD_REG_BIT (mask, PR_REG (0));
1912      current_frame_info.reg_save_pr = find_gr_spill (1);
1913      if (current_frame_info.reg_save_pr == 0)
1914	{
1915	  extra_spill_size += 8;
1916	  n_spilled += 1;
1917	}
1918
1919      /* ??? Mark them all as used so that register renaming and such
1920	 are free to use them.  */
1921      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1922	regs_ever_live[regno] = 1;
1923    }
1924
1925  /* If we're forced to use st8.spill, we're forced to save and restore
1926     ar.unat as well.  The check for existing liveness allows inline asm
1927     to touch ar.unat.  */
1928  if (spilled_gr_p || cfun->machine->n_varargs
1929      || regs_ever_live[AR_UNAT_REGNUM])
1930    {
1931      regs_ever_live[AR_UNAT_REGNUM] = 1;
1932      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1933      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1934      if (current_frame_info.reg_save_ar_unat == 0)
1935	{
1936	  extra_spill_size += 8;
1937	  n_spilled += 1;
1938	}
1939    }
1940
1941  if (regs_ever_live[AR_LC_REGNUM])
1942    {
1943      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1944      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1945      if (current_frame_info.reg_save_ar_lc == 0)
1946	{
1947	  extra_spill_size += 8;
1948	  n_spilled += 1;
1949	}
1950    }
1951
1952  /* If we have an odd number of words of pretend arguments written to
1953     the stack, then the FR save area will be unaligned.  We round the
1954     size of this area up to keep things 16 byte aligned.  */
1955  if (spilled_fr_p)
1956    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1957  else
1958    pretend_args_size = current_function_pretend_args_size;
1959
1960  total_size = (spill_size + extra_spill_size + size + pretend_args_size
1961		+ current_function_outgoing_args_size);
1962  total_size = IA64_STACK_ALIGN (total_size);
1963
1964  /* We always use the 16-byte scratch area provided by the caller, but
1965     if we are a leaf function, there's no one to which we need to provide
1966     a scratch area.  */
1967  if (current_function_is_leaf)
1968    total_size = MAX (0, total_size - 16);
1969
1970  current_frame_info.total_size = total_size;
1971  current_frame_info.spill_cfa_off = pretend_args_size - 16;
1972  current_frame_info.spill_size = spill_size;
1973  current_frame_info.extra_spill_size = extra_spill_size;
1974  COPY_HARD_REG_SET (current_frame_info.mask, mask);
1975  current_frame_info.n_spilled = n_spilled;
1976  current_frame_info.initialized = reload_completed;
1977}
1978
1979/* Compute the initial difference between the specified pair of registers.  */
1980
1981HOST_WIDE_INT
1982ia64_initial_elimination_offset (from, to)
1983     int from, to;
1984{
1985  HOST_WIDE_INT offset;
1986
1987  ia64_compute_frame_size (get_frame_size ());
1988  switch (from)
1989    {
1990    case FRAME_POINTER_REGNUM:
1991      if (to == HARD_FRAME_POINTER_REGNUM)
1992	{
1993	  if (current_function_is_leaf)
1994	    offset = -current_frame_info.total_size;
1995	  else
1996	    offset = -(current_frame_info.total_size
1997		       - current_function_outgoing_args_size - 16);
1998	}
1999      else if (to == STACK_POINTER_REGNUM)
2000	{
2001	  if (current_function_is_leaf)
2002	    offset = 0;
2003	  else
2004	    offset = 16 + current_function_outgoing_args_size;
2005	}
2006      else
2007	abort ();
2008      break;
2009
2010    case ARG_POINTER_REGNUM:
2011      /* Arguments start above the 16 byte save area, unless stdarg
2012	 in which case we store through the 16 byte save area.  */
2013      if (to == HARD_FRAME_POINTER_REGNUM)
2014	offset = 16 - current_function_pretend_args_size;
2015      else if (to == STACK_POINTER_REGNUM)
2016	offset = (current_frame_info.total_size
2017		  + 16 - current_function_pretend_args_size);
2018      else
2019	abort ();
2020      break;
2021
2022    case RETURN_ADDRESS_POINTER_REGNUM:
2023      offset = 0;
2024      break;
2025
2026    default:
2027      abort ();
2028    }
2029
2030  return offset;
2031}
2032
2033/* If there are more than a trivial number of register spills, we use
2034   two interleaved iterators so that we can get two memory references
2035   per insn group.
2036
2037   In order to simplify things in the prologue and epilogue expanders,
2038   we use helper functions to fix up the memory references after the
2039   fact with the appropriate offsets to a POST_MODIFY memory mode.
2040   The following data structure tracks the state of the two iterators
2041   while insns are being emitted.  */
2042
2043struct spill_fill_data
2044{
2045  rtx init_after;		/* point at which to emit initializations */
2046  rtx init_reg[2];		/* initial base register */
2047  rtx iter_reg[2];		/* the iterator registers */
2048  rtx *prev_addr[2];		/* address of last memory use */
2049  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2050  HOST_WIDE_INT prev_off[2];	/* last offset */
2051  int n_iter;			/* number of iterators in use */
2052  int next_iter;		/* next iterator to use */
2053  unsigned int save_gr_used_mask;
2054};
2055
2056static struct spill_fill_data spill_fill_data;
2057
2058static void
2059setup_spill_pointers (n_spills, init_reg, cfa_off)
2060     int n_spills;
2061     rtx init_reg;
2062     HOST_WIDE_INT cfa_off;
2063{
2064  int i;
2065
2066  spill_fill_data.init_after = get_last_insn ();
2067  spill_fill_data.init_reg[0] = init_reg;
2068  spill_fill_data.init_reg[1] = init_reg;
2069  spill_fill_data.prev_addr[0] = NULL;
2070  spill_fill_data.prev_addr[1] = NULL;
2071  spill_fill_data.prev_insn[0] = NULL;
2072  spill_fill_data.prev_insn[1] = NULL;
2073  spill_fill_data.prev_off[0] = cfa_off;
2074  spill_fill_data.prev_off[1] = cfa_off;
2075  spill_fill_data.next_iter = 0;
2076  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2077
2078  spill_fill_data.n_iter = 1 + (n_spills > 2);
2079  for (i = 0; i < spill_fill_data.n_iter; ++i)
2080    {
2081      int regno = next_scratch_gr_reg ();
2082      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2083      current_frame_info.gr_used_mask |= 1 << regno;
2084    }
2085}
2086
2087static void
2088finish_spill_pointers ()
2089{
2090  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2091}
2092
2093static rtx
2094spill_restore_mem (reg, cfa_off)
2095     rtx reg;
2096     HOST_WIDE_INT cfa_off;
2097{
2098  int iter = spill_fill_data.next_iter;
2099  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2100  rtx disp_rtx = GEN_INT (disp);
2101  rtx mem;
2102
2103  if (spill_fill_data.prev_addr[iter])
2104    {
2105      if (CONST_OK_FOR_N (disp))
2106	{
2107	  *spill_fill_data.prev_addr[iter]
2108	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2109				   gen_rtx_PLUS (DImode,
2110						 spill_fill_data.iter_reg[iter],
2111						 disp_rtx));
2112	  REG_NOTES (spill_fill_data.prev_insn[iter])
2113	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2114				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2115	}
2116      else
2117	{
2118	  /* ??? Could use register post_modify for loads.  */
2119	  if (! CONST_OK_FOR_I (disp))
2120	    {
2121	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2122	      emit_move_insn (tmp, disp_rtx);
2123	      disp_rtx = tmp;
2124	    }
2125	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2126				 spill_fill_data.iter_reg[iter], disp_rtx));
2127	}
2128    }
2129  /* Micro-optimization: if we've created a frame pointer, it's at
2130     CFA 0, which may allow the real iterator to be initialized lower,
2131     slightly increasing parallelism.  Also, if there are few saves
2132     it may eliminate the iterator entirely.  */
2133  else if (disp == 0
2134	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2135	   && frame_pointer_needed)
2136    {
2137      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2138      set_mem_alias_set (mem, get_varargs_alias_set ());
2139      return mem;
2140    }
2141  else
2142    {
2143      rtx seq, insn;
2144
2145      if (disp == 0)
2146	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2147			 spill_fill_data.init_reg[iter]);
2148      else
2149	{
2150	  start_sequence ();
2151
2152	  if (! CONST_OK_FOR_I (disp))
2153	    {
2154	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2155	      emit_move_insn (tmp, disp_rtx);
2156	      disp_rtx = tmp;
2157	    }
2158
2159	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2160				 spill_fill_data.init_reg[iter],
2161				 disp_rtx));
2162
2163	  seq = get_insns ();
2164	  end_sequence ();
2165	}
2166
2167      /* Careful for being the first insn in a sequence.  */
2168      if (spill_fill_data.init_after)
2169	insn = emit_insn_after (seq, spill_fill_data.init_after);
2170      else
2171	{
2172	  rtx first = get_insns ();
2173	  if (first)
2174	    insn = emit_insn_before (seq, first);
2175	  else
2176	    insn = emit_insn (seq);
2177	}
2178      spill_fill_data.init_after = insn;
2179
2180      /* If DISP is 0, we may or may not have a further adjustment
2181	 afterward.  If we do, then the load/store insn may be modified
2182	 to be a post-modify.  If we don't, then this copy may be
2183	 eliminated by copyprop_hardreg_forward, which makes this
2184	 insn garbage, which runs afoul of the sanity check in
2185	 propagate_one_insn.  So mark this insn as legal to delete.  */
2186      if (disp == 0)
2187	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2188					     REG_NOTES (insn));
2189    }
2190
2191  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2192
2193  /* ??? Not all of the spills are for varargs, but some of them are.
2194     The rest of the spills belong in an alias set of their own.  But
2195     it doesn't actually hurt to include them here.  */
2196  set_mem_alias_set (mem, get_varargs_alias_set ());
2197
2198  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2199  spill_fill_data.prev_off[iter] = cfa_off;
2200
2201  if (++iter >= spill_fill_data.n_iter)
2202    iter = 0;
2203  spill_fill_data.next_iter = iter;
2204
2205  return mem;
2206}
2207
2208static void
2209do_spill (move_fn, reg, cfa_off, frame_reg)
2210     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2211     rtx reg, frame_reg;
2212     HOST_WIDE_INT cfa_off;
2213{
2214  int iter = spill_fill_data.next_iter;
2215  rtx mem, insn;
2216
2217  mem = spill_restore_mem (reg, cfa_off);
2218  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2219  spill_fill_data.prev_insn[iter] = insn;
2220
2221  if (frame_reg)
2222    {
2223      rtx base;
2224      HOST_WIDE_INT off;
2225
2226      RTX_FRAME_RELATED_P (insn) = 1;
2227
2228      /* Don't even pretend that the unwind code can intuit its way
2229	 through a pair of interleaved post_modify iterators.  Just
2230	 provide the correct answer.  */
2231
2232      if (frame_pointer_needed)
2233	{
2234	  base = hard_frame_pointer_rtx;
2235	  off = - cfa_off;
2236	}
2237      else
2238	{
2239	  base = stack_pointer_rtx;
2240	  off = current_frame_info.total_size - cfa_off;
2241	}
2242
2243      REG_NOTES (insn)
2244	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2245		gen_rtx_SET (VOIDmode,
2246			     gen_rtx_MEM (GET_MODE (reg),
2247					  plus_constant (base, off)),
2248			     frame_reg),
2249		REG_NOTES (insn));
2250    }
2251}
2252
2253static void
2254do_restore (move_fn, reg, cfa_off)
2255     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2256     rtx reg;
2257     HOST_WIDE_INT cfa_off;
2258{
2259  int iter = spill_fill_data.next_iter;
2260  rtx insn;
2261
2262  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2263				GEN_INT (cfa_off)));
2264  spill_fill_data.prev_insn[iter] = insn;
2265}
2266
2267/* Wrapper functions that discards the CONST_INT spill offset.  These
2268   exist so that we can give gr_spill/gr_fill the offset they need and
2269   use a consistant function interface.  */
2270
2271static rtx
2272gen_movdi_x (dest, src, offset)
2273     rtx dest, src;
2274     rtx offset ATTRIBUTE_UNUSED;
2275{
2276  return gen_movdi (dest, src);
2277}
2278
2279static rtx
2280gen_fr_spill_x (dest, src, offset)
2281     rtx dest, src;
2282     rtx offset ATTRIBUTE_UNUSED;
2283{
2284  return gen_fr_spill (dest, src);
2285}
2286
2287static rtx
2288gen_fr_restore_x (dest, src, offset)
2289     rtx dest, src;
2290     rtx offset ATTRIBUTE_UNUSED;
2291{
2292  return gen_fr_restore (dest, src);
2293}
2294
2295/* Called after register allocation to add any instructions needed for the
2296   prologue.  Using a prologue insn is favored compared to putting all of the
2297   instructions in output_function_prologue(), since it allows the scheduler
2298   to intermix instructions with the saves of the caller saved registers.  In
2299   some cases, it might be necessary to emit a barrier instruction as the last
2300   insn to prevent such scheduling.
2301
2302   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2303   so that the debug info generation code can handle them properly.
2304
2305   The register save area is layed out like so:
2306   cfa+16
2307	[ varargs spill area ]
2308	[ fr register spill area ]
2309	[ br register spill area ]
2310	[ ar register spill area ]
2311	[ pr register spill area ]
2312	[ gr register spill area ] */
2313
2314/* ??? Get inefficient code when the frame size is larger than can fit in an
2315   adds instruction.  */
2316
2317void
2318ia64_expand_prologue ()
2319{
2320  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2321  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2322  rtx reg, alt_reg;
2323
2324  ia64_compute_frame_size (get_frame_size ());
2325  last_scratch_gr_reg = 15;
2326
2327  /* If there is no epilogue, then we don't need some prologue insns.
2328     We need to avoid emitting the dead prologue insns, because flow
2329     will complain about them.  */
2330  if (optimize)
2331    {
2332      edge e;
2333
2334      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2335	if ((e->flags & EDGE_FAKE) == 0
2336	    && (e->flags & EDGE_FALLTHRU) != 0)
2337	  break;
2338      epilogue_p = (e != NULL);
2339    }
2340  else
2341    epilogue_p = 1;
2342
2343  /* Set the local, input, and output register names.  We need to do this
2344     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2345     half.  If we use in/loc/out register names, then we get assembler errors
2346     in crtn.S because there is no alloc insn or regstk directive in there.  */
2347  if (! TARGET_REG_NAMES)
2348    {
2349      int inputs = current_frame_info.n_input_regs;
2350      int locals = current_frame_info.n_local_regs;
2351      int outputs = current_frame_info.n_output_regs;
2352
2353      for (i = 0; i < inputs; i++)
2354	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2355      for (i = 0; i < locals; i++)
2356	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2357      for (i = 0; i < outputs; i++)
2358	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2359    }
2360
2361  /* Set the frame pointer register name.  The regnum is logically loc79,
2362     but of course we'll not have allocated that many locals.  Rather than
2363     worrying about renumbering the existing rtxs, we adjust the name.  */
2364  /* ??? This code means that we can never use one local register when
2365     there is a frame pointer.  loc79 gets wasted in this case, as it is
2366     renamed to a register that will never be used.  See also the try_locals
2367     code in find_gr_spill.  */
2368  if (current_frame_info.reg_fp)
2369    {
2370      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2371      reg_names[HARD_FRAME_POINTER_REGNUM]
2372	= reg_names[current_frame_info.reg_fp];
2373      reg_names[current_frame_info.reg_fp] = tmp;
2374    }
2375
2376  /* Fix up the return address placeholder.  */
2377  /* ??? We can fail if __builtin_return_address is used, and we didn't
2378     allocate a register in which to save b0.  I can't think of a way to
2379     eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2380     then be sure that I got the right one.  Further, reload doesn't seem
2381     to care if an eliminable register isn't used, and "eliminates" it
2382     anyway.  */
2383  if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2384      && current_frame_info.reg_save_b0 != 0)
2385    XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2386
2387  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2388  if (current_frame_info.n_local_regs == 0
2389      && current_frame_info.n_output_regs == 0
2390      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2391      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2392    {
2393      /* If there is no alloc, but there are input registers used, then we
2394	 need a .regstk directive.  */
2395      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2396      ar_pfs_save_reg = NULL_RTX;
2397    }
2398  else
2399    {
2400      current_frame_info.need_regstk = 0;
2401
2402      if (current_frame_info.reg_save_ar_pfs)
2403	regno = current_frame_info.reg_save_ar_pfs;
2404      else
2405	regno = next_scratch_gr_reg ();
2406      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2407
2408      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2409				   GEN_INT (current_frame_info.n_input_regs),
2410				   GEN_INT (current_frame_info.n_local_regs),
2411				   GEN_INT (current_frame_info.n_output_regs),
2412				   GEN_INT (current_frame_info.n_rotate_regs)));
2413      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2414    }
2415
2416  /* Set up frame pointer, stack pointer, and spill iterators.  */
2417
2418  n_varargs = cfun->machine->n_varargs;
2419  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2420			stack_pointer_rtx, 0);
2421
2422  if (frame_pointer_needed)
2423    {
2424      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2425      RTX_FRAME_RELATED_P (insn) = 1;
2426    }
2427
2428  if (current_frame_info.total_size != 0)
2429    {
2430      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2431      rtx offset;
2432
2433      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2434	offset = frame_size_rtx;
2435      else
2436	{
2437	  regno = next_scratch_gr_reg ();
2438 	  offset = gen_rtx_REG (DImode, regno);
2439	  emit_move_insn (offset, frame_size_rtx);
2440	}
2441
2442      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2443				    stack_pointer_rtx, offset));
2444
2445      if (! frame_pointer_needed)
2446	{
2447	  RTX_FRAME_RELATED_P (insn) = 1;
2448	  if (GET_CODE (offset) != CONST_INT)
2449	    {
2450	      REG_NOTES (insn)
2451		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2452			gen_rtx_SET (VOIDmode,
2453				     stack_pointer_rtx,
2454				     gen_rtx_PLUS (DImode,
2455						   stack_pointer_rtx,
2456						   frame_size_rtx)),
2457			REG_NOTES (insn));
2458	    }
2459	}
2460
2461      /* ??? At this point we must generate a magic insn that appears to
2462	 modify the stack pointer, the frame pointer, and all spill
2463	 iterators.  This would allow the most scheduling freedom.  For
2464	 now, just hard stop.  */
2465      emit_insn (gen_blockage ());
2466    }
2467
2468  /* Must copy out ar.unat before doing any integer spills.  */
2469  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2470    {
2471      if (current_frame_info.reg_save_ar_unat)
2472	ar_unat_save_reg
2473	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2474      else
2475	{
2476	  alt_regno = next_scratch_gr_reg ();
2477	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2478	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2479	}
2480
2481      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2482      insn = emit_move_insn (ar_unat_save_reg, reg);
2483      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2484
2485      /* Even if we're not going to generate an epilogue, we still
2486	 need to save the register so that EH works.  */
2487      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2488	emit_insn (gen_prologue_use (ar_unat_save_reg));
2489    }
2490  else
2491    ar_unat_save_reg = NULL_RTX;
2492
2493  /* Spill all varargs registers.  Do this before spilling any GR registers,
2494     since we want the UNAT bits for the GR registers to override the UNAT
2495     bits from varargs, which we don't care about.  */
2496
2497  cfa_off = -16;
2498  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2499    {
2500      reg = gen_rtx_REG (DImode, regno);
2501      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2502    }
2503
2504  /* Locate the bottom of the register save area.  */
2505  cfa_off = (current_frame_info.spill_cfa_off
2506	     + current_frame_info.spill_size
2507	     + current_frame_info.extra_spill_size);
2508
2509  /* Save the predicate register block either in a register or in memory.  */
2510  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2511    {
2512      reg = gen_rtx_REG (DImode, PR_REG (0));
2513      if (current_frame_info.reg_save_pr != 0)
2514	{
2515	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2516	  insn = emit_move_insn (alt_reg, reg);
2517
2518	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2519	     64 hard registers.  */
2520	  RTX_FRAME_RELATED_P (insn) = 1;
2521	  REG_NOTES (insn)
2522	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2523			gen_rtx_SET (VOIDmode, alt_reg, reg),
2524			REG_NOTES (insn));
2525
2526	  /* Even if we're not going to generate an epilogue, we still
2527	     need to save the register so that EH works.  */
2528	  if (! epilogue_p)
2529	    emit_insn (gen_prologue_use (alt_reg));
2530	}
2531      else
2532	{
2533	  alt_regno = next_scratch_gr_reg ();
2534	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2535	  insn = emit_move_insn (alt_reg, reg);
2536	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2537	  cfa_off -= 8;
2538	}
2539    }
2540
2541  /* Handle AR regs in numerical order.  All of them get special handling.  */
2542  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2543      && current_frame_info.reg_save_ar_unat == 0)
2544    {
2545      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2546      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2547      cfa_off -= 8;
2548    }
2549
2550  /* The alloc insn already copied ar.pfs into a general register.  The
2551     only thing we have to do now is copy that register to a stack slot
2552     if we'd not allocated a local register for the job.  */
2553  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2554      && current_frame_info.reg_save_ar_pfs == 0)
2555    {
2556      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2557      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2558      cfa_off -= 8;
2559    }
2560
2561  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2562    {
2563      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2564      if (current_frame_info.reg_save_ar_lc != 0)
2565	{
2566	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2567	  insn = emit_move_insn (alt_reg, reg);
2568	  RTX_FRAME_RELATED_P (insn) = 1;
2569
2570	  /* Even if we're not going to generate an epilogue, we still
2571	     need to save the register so that EH works.  */
2572	  if (! epilogue_p)
2573	    emit_insn (gen_prologue_use (alt_reg));
2574	}
2575      else
2576	{
2577	  alt_regno = next_scratch_gr_reg ();
2578	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2579	  emit_move_insn (alt_reg, reg);
2580	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2581	  cfa_off -= 8;
2582	}
2583    }
2584
2585  if (current_frame_info.reg_save_gp)
2586    {
2587      insn = emit_move_insn (gen_rtx_REG (DImode,
2588					  current_frame_info.reg_save_gp),
2589			     pic_offset_table_rtx);
2590      /* We don't know for sure yet if this is actually needed, since
2591	 we've not split the PIC call patterns.  If all of the calls
2592	 are indirect, and not followed by any uses of the gp, then
2593	 this save is dead.  Allow it to go away.  */
2594      REG_NOTES (insn)
2595	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2596    }
2597
2598  /* We should now be at the base of the gr/br/fr spill area.  */
2599  if (cfa_off != (current_frame_info.spill_cfa_off
2600		  + current_frame_info.spill_size))
2601    abort ();
2602
2603  /* Spill all general registers.  */
2604  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2605    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2606      {
2607	reg = gen_rtx_REG (DImode, regno);
2608	do_spill (gen_gr_spill, reg, cfa_off, reg);
2609	cfa_off -= 8;
2610      }
2611
2612  /* Handle BR0 specially -- it may be getting stored permanently in
2613     some GR register.  */
2614  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2615    {
2616      reg = gen_rtx_REG (DImode, BR_REG (0));
2617      if (current_frame_info.reg_save_b0 != 0)
2618	{
2619	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2620	  insn = emit_move_insn (alt_reg, reg);
2621	  RTX_FRAME_RELATED_P (insn) = 1;
2622
2623	  /* Even if we're not going to generate an epilogue, we still
2624	     need to save the register so that EH works.  */
2625	  if (! epilogue_p)
2626	    emit_insn (gen_prologue_use (alt_reg));
2627	}
2628      else
2629	{
2630	  alt_regno = next_scratch_gr_reg ();
2631	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2632	  emit_move_insn (alt_reg, reg);
2633	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2634	  cfa_off -= 8;
2635	}
2636    }
2637
2638  /* Spill the rest of the BR registers.  */
2639  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2640    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2641      {
2642	alt_regno = next_scratch_gr_reg ();
2643	alt_reg = gen_rtx_REG (DImode, alt_regno);
2644	reg = gen_rtx_REG (DImode, regno);
2645	emit_move_insn (alt_reg, reg);
2646	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2647	cfa_off -= 8;
2648      }
2649
2650  /* Align the frame and spill all FR registers.  */
2651  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2652    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2653      {
2654        if (cfa_off & 15)
2655	  abort ();
2656	reg = gen_rtx_REG (TFmode, regno);
2657	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2658	cfa_off -= 16;
2659      }
2660
2661  if (cfa_off != current_frame_info.spill_cfa_off)
2662    abort ();
2663
2664  finish_spill_pointers ();
2665}
2666
2667/* Called after register allocation to add any instructions needed for the
2668   epilogue.  Using an epilogue insn is favored compared to putting all of the
2669   instructions in output_function_prologue(), since it allows the scheduler
2670   to intermix instructions with the saves of the caller saved registers.  In
2671   some cases, it might be necessary to emit a barrier instruction as the last
2672   insn to prevent such scheduling.  */
2673
2674void
2675ia64_expand_epilogue (sibcall_p)
2676     int sibcall_p;
2677{
2678  rtx insn, reg, alt_reg, ar_unat_save_reg;
2679  int regno, alt_regno, cfa_off;
2680
2681  ia64_compute_frame_size (get_frame_size ());
2682
2683  /* If there is a frame pointer, then we use it instead of the stack
2684     pointer, so that the stack pointer does not need to be valid when
2685     the epilogue starts.  See EXIT_IGNORE_STACK.  */
2686  if (frame_pointer_needed)
2687    setup_spill_pointers (current_frame_info.n_spilled,
2688			  hard_frame_pointer_rtx, 0);
2689  else
2690    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2691			  current_frame_info.total_size);
2692
2693  if (current_frame_info.total_size != 0)
2694    {
2695      /* ??? At this point we must generate a magic insn that appears to
2696         modify the spill iterators and the frame pointer.  This would
2697	 allow the most scheduling freedom.  For now, just hard stop.  */
2698      emit_insn (gen_blockage ());
2699    }
2700
2701  /* Locate the bottom of the register save area.  */
2702  cfa_off = (current_frame_info.spill_cfa_off
2703	     + current_frame_info.spill_size
2704	     + current_frame_info.extra_spill_size);
2705
2706  /* Restore the predicate registers.  */
2707  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2708    {
2709      if (current_frame_info.reg_save_pr != 0)
2710	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2711      else
2712	{
2713	  alt_regno = next_scratch_gr_reg ();
2714	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2715	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2716	  cfa_off -= 8;
2717	}
2718      reg = gen_rtx_REG (DImode, PR_REG (0));
2719      emit_move_insn (reg, alt_reg);
2720    }
2721
2722  /* Restore the application registers.  */
2723
2724  /* Load the saved unat from the stack, but do not restore it until
2725     after the GRs have been restored.  */
2726  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2727    {
2728      if (current_frame_info.reg_save_ar_unat != 0)
2729        ar_unat_save_reg
2730	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2731      else
2732	{
2733	  alt_regno = next_scratch_gr_reg ();
2734	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2735	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2736	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2737	  cfa_off -= 8;
2738	}
2739    }
2740  else
2741    ar_unat_save_reg = NULL_RTX;
2742
2743  if (current_frame_info.reg_save_ar_pfs != 0)
2744    {
2745      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2746      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2747      emit_move_insn (reg, alt_reg);
2748    }
2749  else if (! current_function_is_leaf)
2750    {
2751      alt_regno = next_scratch_gr_reg ();
2752      alt_reg = gen_rtx_REG (DImode, alt_regno);
2753      do_restore (gen_movdi_x, alt_reg, cfa_off);
2754      cfa_off -= 8;
2755      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2756      emit_move_insn (reg, alt_reg);
2757    }
2758
2759  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2760    {
2761      if (current_frame_info.reg_save_ar_lc != 0)
2762	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2763      else
2764	{
2765	  alt_regno = next_scratch_gr_reg ();
2766	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2767	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2768	  cfa_off -= 8;
2769	}
2770      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2771      emit_move_insn (reg, alt_reg);
2772    }
2773
2774  /* We should now be at the base of the gr/br/fr spill area.  */
2775  if (cfa_off != (current_frame_info.spill_cfa_off
2776		  + current_frame_info.spill_size))
2777    abort ();
2778
2779  /* The GP may be stored on the stack in the prologue, but it's
2780     never restored in the epilogue.  Skip the stack slot.  */
2781  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2782    cfa_off -= 8;
2783
2784  /* Restore all general registers.  */
2785  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2786    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2787      {
2788	reg = gen_rtx_REG (DImode, regno);
2789	do_restore (gen_gr_restore, reg, cfa_off);
2790	cfa_off -= 8;
2791      }
2792
2793  /* Restore the branch registers.  Handle B0 specially, as it may
2794     have gotten stored in some GR register.  */
2795  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2796    {
2797      if (current_frame_info.reg_save_b0 != 0)
2798	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2799      else
2800	{
2801	  alt_regno = next_scratch_gr_reg ();
2802	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2803	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2804	  cfa_off -= 8;
2805	}
2806      reg = gen_rtx_REG (DImode, BR_REG (0));
2807      emit_move_insn (reg, alt_reg);
2808    }
2809
2810  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2811    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2812      {
2813	alt_regno = next_scratch_gr_reg ();
2814	alt_reg = gen_rtx_REG (DImode, alt_regno);
2815	do_restore (gen_movdi_x, alt_reg, cfa_off);
2816	cfa_off -= 8;
2817	reg = gen_rtx_REG (DImode, regno);
2818	emit_move_insn (reg, alt_reg);
2819      }
2820
2821  /* Restore floating point registers.  */
2822  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2823    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2824      {
2825        if (cfa_off & 15)
2826	  abort ();
2827	reg = gen_rtx_REG (TFmode, regno);
2828	do_restore (gen_fr_restore_x, reg, cfa_off);
2829	cfa_off -= 16;
2830      }
2831
2832  /* Restore ar.unat for real.  */
2833  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2834    {
2835      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2836      emit_move_insn (reg, ar_unat_save_reg);
2837    }
2838
2839  if (cfa_off != current_frame_info.spill_cfa_off)
2840    abort ();
2841
2842  finish_spill_pointers ();
2843
2844  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2845    {
2846      /* ??? At this point we must generate a magic insn that appears to
2847         modify the spill iterators, the stack pointer, and the frame
2848	 pointer.  This would allow the most scheduling freedom.  For now,
2849	 just hard stop.  */
2850      emit_insn (gen_blockage ());
2851    }
2852
2853  if (cfun->machine->ia64_eh_epilogue_sp)
2854    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2855  else if (frame_pointer_needed)
2856    {
2857      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2858      RTX_FRAME_RELATED_P (insn) = 1;
2859    }
2860  else if (current_frame_info.total_size)
2861    {
2862      rtx offset, frame_size_rtx;
2863
2864      frame_size_rtx = GEN_INT (current_frame_info.total_size);
2865      if (CONST_OK_FOR_I (current_frame_info.total_size))
2866	offset = frame_size_rtx;
2867      else
2868	{
2869	  regno = next_scratch_gr_reg ();
2870	  offset = gen_rtx_REG (DImode, regno);
2871	  emit_move_insn (offset, frame_size_rtx);
2872	}
2873
2874      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2875				    offset));
2876
2877      RTX_FRAME_RELATED_P (insn) = 1;
2878      if (GET_CODE (offset) != CONST_INT)
2879	{
2880	  REG_NOTES (insn)
2881	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2882			gen_rtx_SET (VOIDmode,
2883				     stack_pointer_rtx,
2884				     gen_rtx_PLUS (DImode,
2885						   stack_pointer_rtx,
2886						   frame_size_rtx)),
2887			REG_NOTES (insn));
2888	}
2889    }
2890
2891  if (cfun->machine->ia64_eh_epilogue_bsp)
2892    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2893
2894  if (! sibcall_p)
2895    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2896  else
2897    {
2898      int fp = GR_REG (2);
2899      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2900	 first available call clobbered register.  If there was a frame_pointer
2901	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2902	 so we have to make sure we're using the string "r2" when emitting
2903	 the register name for the assmbler.  */
2904      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2905	fp = HARD_FRAME_POINTER_REGNUM;
2906
2907      /* We must emit an alloc to force the input registers to become output
2908	 registers.  Otherwise, if the callee tries to pass its parameters
2909	 through to another call without an intervening alloc, then these
2910	 values get lost.  */
2911      /* ??? We don't need to preserve all input registers.  We only need to
2912	 preserve those input registers used as arguments to the sibling call.
2913	 It is unclear how to compute that number here.  */
2914      if (current_frame_info.n_input_regs != 0)
2915	emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2916			      GEN_INT (0), GEN_INT (0),
2917			      GEN_INT (current_frame_info.n_input_regs),
2918			      GEN_INT (0)));
2919    }
2920}
2921
2922/* Return 1 if br.ret can do all the work required to return from a
2923   function.  */
2924
2925int
2926ia64_direct_return ()
2927{
2928  if (reload_completed && ! frame_pointer_needed)
2929    {
2930      ia64_compute_frame_size (get_frame_size ());
2931
2932      return (current_frame_info.total_size == 0
2933	      && current_frame_info.n_spilled == 0
2934	      && current_frame_info.reg_save_b0 == 0
2935	      && current_frame_info.reg_save_pr == 0
2936	      && current_frame_info.reg_save_ar_pfs == 0
2937	      && current_frame_info.reg_save_ar_unat == 0
2938	      && current_frame_info.reg_save_ar_lc == 0);
2939    }
2940  return 0;
2941}
2942
2943int
2944ia64_hard_regno_rename_ok (from, to)
2945     int from;
2946     int to;
2947{
2948  /* Don't clobber any of the registers we reserved for the prologue.  */
2949  if (to == current_frame_info.reg_fp
2950      || to == current_frame_info.reg_save_b0
2951      || to == current_frame_info.reg_save_pr
2952      || to == current_frame_info.reg_save_ar_pfs
2953      || to == current_frame_info.reg_save_ar_unat
2954      || to == current_frame_info.reg_save_ar_lc)
2955    return 0;
2956
2957  if (from == current_frame_info.reg_fp
2958      || from == current_frame_info.reg_save_b0
2959      || from == current_frame_info.reg_save_pr
2960      || from == current_frame_info.reg_save_ar_pfs
2961      || from == current_frame_info.reg_save_ar_unat
2962      || from == current_frame_info.reg_save_ar_lc)
2963    return 0;
2964
2965  /* Don't use output registers outside the register frame.  */
2966  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2967    return 0;
2968
2969  /* Retain even/oddness on predicate register pairs.  */
2970  if (PR_REGNO_P (from) && PR_REGNO_P (to))
2971    return (from & 1) == (to & 1);
2972
2973  return 1;
2974}
2975
2976/* Target hook for assembling integer objects.  Handle word-sized
2977   aligned objects and detect the cases when @fptr is needed.  */
2978
2979static bool
2980ia64_assemble_integer (x, size, aligned_p)
2981     rtx x;
2982     unsigned int size;
2983     int aligned_p;
2984{
2985  if (size == (TARGET_ILP32 ? 4 : 8)
2986      && aligned_p
2987      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2988      && GET_CODE (x) == SYMBOL_REF
2989      && SYMBOL_REF_FLAG (x))
2990    {
2991      if (TARGET_ILP32)
2992	fputs ("\tdata4\t@fptr(", asm_out_file);
2993      else
2994	fputs ("\tdata8\t@fptr(", asm_out_file);
2995      output_addr_const (asm_out_file, x);
2996      fputs (")\n", asm_out_file);
2997      return true;
2998    }
2999  return default_assemble_integer (x, size, aligned_p);
3000}
3001
3002/* Emit the function prologue.  */
3003
3004static void
3005ia64_output_function_prologue (file, size)
3006     FILE *file;
3007     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3008{
3009  int mask, grsave, grsave_prev;
3010
3011  if (current_frame_info.need_regstk)
3012    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3013	     current_frame_info.n_input_regs,
3014	     current_frame_info.n_local_regs,
3015	     current_frame_info.n_output_regs,
3016	     current_frame_info.n_rotate_regs);
3017
3018  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3019    return;
3020
3021  /* Emit the .prologue directive.  */
3022
3023  mask = 0;
3024  grsave = grsave_prev = 0;
3025  if (current_frame_info.reg_save_b0 != 0)
3026    {
3027      mask |= 8;
3028      grsave = grsave_prev = current_frame_info.reg_save_b0;
3029    }
3030  if (current_frame_info.reg_save_ar_pfs != 0
3031      && (grsave_prev == 0
3032	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3033    {
3034      mask |= 4;
3035      if (grsave_prev == 0)
3036	grsave = current_frame_info.reg_save_ar_pfs;
3037      grsave_prev = current_frame_info.reg_save_ar_pfs;
3038    }
3039  if (current_frame_info.reg_fp != 0
3040      && (grsave_prev == 0
3041	  || current_frame_info.reg_fp == grsave_prev + 1))
3042    {
3043      mask |= 2;
3044      if (grsave_prev == 0)
3045	grsave = HARD_FRAME_POINTER_REGNUM;
3046      grsave_prev = current_frame_info.reg_fp;
3047    }
3048  if (current_frame_info.reg_save_pr != 0
3049      && (grsave_prev == 0
3050	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3051    {
3052      mask |= 1;
3053      if (grsave_prev == 0)
3054	grsave = current_frame_info.reg_save_pr;
3055    }
3056
3057  if (mask)
3058    fprintf (file, "\t.prologue %d, %d\n", mask,
3059	     ia64_dbx_register_number (grsave));
3060  else
3061    fputs ("\t.prologue\n", file);
3062
3063  /* Emit a .spill directive, if necessary, to relocate the base of
3064     the register spill area.  */
3065  if (current_frame_info.spill_cfa_off != -16)
3066    fprintf (file, "\t.spill %ld\n",
3067	     (long) (current_frame_info.spill_cfa_off
3068		     + current_frame_info.spill_size));
3069}
3070
3071/* Emit the .body directive at the scheduled end of the prologue.  */
3072
3073static void
3074ia64_output_function_end_prologue (file)
3075     FILE *file;
3076{
3077  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3078    return;
3079
3080  fputs ("\t.body\n", file);
3081}
3082
3083/* Emit the function epilogue.  */
3084
3085static void
3086ia64_output_function_epilogue (file, size)
3087     FILE *file ATTRIBUTE_UNUSED;
3088     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3089{
3090  int i;
3091
3092  /* Reset from the function's potential modifications.  */
3093  XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
3094
3095  if (current_frame_info.reg_fp)
3096    {
3097      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3098      reg_names[HARD_FRAME_POINTER_REGNUM]
3099	= reg_names[current_frame_info.reg_fp];
3100      reg_names[current_frame_info.reg_fp] = tmp;
3101    }
3102  if (! TARGET_REG_NAMES)
3103    {
3104      for (i = 0; i < current_frame_info.n_input_regs; i++)
3105	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3106      for (i = 0; i < current_frame_info.n_local_regs; i++)
3107	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3108      for (i = 0; i < current_frame_info.n_output_regs; i++)
3109	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3110    }
3111
3112  current_frame_info.initialized = 0;
3113}
3114
3115int
3116ia64_dbx_register_number (regno)
3117     int regno;
3118{
3119  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3120     from its home at loc79 to something inside the register frame.  We
3121     must perform the same renumbering here for the debug info.  */
3122  if (current_frame_info.reg_fp)
3123    {
3124      if (regno == HARD_FRAME_POINTER_REGNUM)
3125	regno = current_frame_info.reg_fp;
3126      else if (regno == current_frame_info.reg_fp)
3127	regno = HARD_FRAME_POINTER_REGNUM;
3128    }
3129
3130  if (IN_REGNO_P (regno))
3131    return 32 + regno - IN_REG (0);
3132  else if (LOC_REGNO_P (regno))
3133    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3134  else if (OUT_REGNO_P (regno))
3135    return (32 + current_frame_info.n_input_regs
3136	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3137  else
3138    return regno;
3139}
3140
3141void
3142ia64_initialize_trampoline (addr, fnaddr, static_chain)
3143     rtx addr, fnaddr, static_chain;
3144{
3145  rtx addr_reg, eight = GEN_INT (8);
3146
3147  /* Load up our iterator.  */
3148  addr_reg = gen_reg_rtx (Pmode);
3149  emit_move_insn (addr_reg, addr);
3150
3151  /* The first two words are the fake descriptor:
3152     __ia64_trampoline, ADDR+16.  */
3153  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3154		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3155  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3156
3157  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3158		  copy_to_reg (plus_constant (addr, 16)));
3159  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3160
3161  /* The third word is the target descriptor.  */
3162  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3163  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3164
3165  /* The fourth word is the static chain.  */
3166  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3167}
3168
3169/* Do any needed setup for a variadic function.  CUM has not been updated
3170   for the last named argument which has type TYPE and mode MODE.
3171
3172   We generate the actual spill instructions during prologue generation.  */
3173
3174void
3175ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3176     CUMULATIVE_ARGS cum;
3177     int             int_mode;
3178     tree            type;
3179     int *           pretend_size;
3180     int	     second_time ATTRIBUTE_UNUSED;
3181{
3182  /* Skip the current argument.  */
3183  ia64_function_arg_advance (&cum, int_mode, type, 1);
3184
3185  if (cum.words < MAX_ARGUMENT_SLOTS)
3186    {
3187      int n = MAX_ARGUMENT_SLOTS - cum.words;
3188      *pretend_size = n * UNITS_PER_WORD;
3189      cfun->machine->n_varargs = n;
3190    }
3191}
3192
3193/* Check whether TYPE is a homogeneous floating point aggregate.  If
3194   it is, return the mode of the floating point type that appears
3195   in all leafs.  If it is not, return VOIDmode.
3196
3197   An aggregate is a homogeneous floating point aggregate is if all
3198   fields/elements in it have the same floating point type (e.g,
3199   SFmode).  128-bit quad-precision floats are excluded.  */
3200
3201static enum machine_mode
3202hfa_element_mode (type, nested)
3203     tree type;
3204     int nested;
3205{
3206  enum machine_mode element_mode = VOIDmode;
3207  enum machine_mode mode;
3208  enum tree_code code = TREE_CODE (type);
3209  int know_element_mode = 0;
3210  tree t;
3211
3212  switch (code)
3213    {
3214    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3215    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3216    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3217    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3218    case FUNCTION_TYPE:
3219      return VOIDmode;
3220
3221      /* Fortran complex types are supposed to be HFAs, so we need to handle
3222	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3223	 types though.  */
3224    case COMPLEX_TYPE:
3225      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3226	  && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3227	return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3228			      * BITS_PER_UNIT, MODE_FLOAT, 0);
3229      else
3230	return VOIDmode;
3231
3232    case REAL_TYPE:
3233      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3234	 mode if this is contained within an aggregate.  */
3235      if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3236	return TYPE_MODE (type);
3237      else
3238	return VOIDmode;
3239
3240    case ARRAY_TYPE:
3241      return hfa_element_mode (TREE_TYPE (type), 1);
3242
3243    case RECORD_TYPE:
3244    case UNION_TYPE:
3245    case QUAL_UNION_TYPE:
3246      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3247	{
3248	  if (TREE_CODE (t) != FIELD_DECL)
3249	    continue;
3250
3251	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3252	  if (know_element_mode)
3253	    {
3254	      if (mode != element_mode)
3255		return VOIDmode;
3256	    }
3257	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3258	    return VOIDmode;
3259	  else
3260	    {
3261	      know_element_mode = 1;
3262	      element_mode = mode;
3263	    }
3264	}
3265      return element_mode;
3266
3267    default:
3268      /* If we reach here, we probably have some front-end specific type
3269	 that the backend doesn't know about.  This can happen via the
3270	 aggregate_value_p call in init_function_start.  All we can do is
3271	 ignore unknown tree types.  */
3272      return VOIDmode;
3273    }
3274
3275  return VOIDmode;
3276}
3277
3278/* Return rtx for register where argument is passed, or zero if it is passed
3279   on the stack.  */
3280
3281/* ??? 128-bit quad-precision floats are always passed in general
3282   registers.  */
3283
3284rtx
3285ia64_function_arg (cum, mode, type, named, incoming)
3286     CUMULATIVE_ARGS *cum;
3287     enum machine_mode mode;
3288     tree type;
3289     int named;
3290     int incoming;
3291{
3292  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3293  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3294		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3295	       / UNITS_PER_WORD);
3296  int offset = 0;
3297  enum machine_mode hfa_mode = VOIDmode;
3298
3299  /* Integer and float arguments larger than 8 bytes start at the next even
3300     boundary.  Aggregates larger than 8 bytes start at the next even boundary
3301     if the aggregate has 16 byte alignment.  Net effect is that types with
3302     alignment greater than 8 start at the next even boundary.  */
3303  /* ??? The ABI does not specify how to handle aggregates with alignment from
3304     9 to 15 bytes, or greater than 16.   We handle them all as if they had
3305     16 byte alignment.  Such aggregates can occur only if gcc extensions are
3306     used.  */
3307  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3308       : (words > 1))
3309      && (cum->words & 1))
3310    offset = 1;
3311
3312  /* If all argument slots are used, then it must go on the stack.  */
3313  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3314    return 0;
3315
3316  /* Check for and handle homogeneous FP aggregates.  */
3317  if (type)
3318    hfa_mode = hfa_element_mode (type, 0);
3319
3320  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3321     and unprototyped hfas are passed specially.  */
3322  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3323    {
3324      rtx loc[16];
3325      int i = 0;
3326      int fp_regs = cum->fp_regs;
3327      int int_regs = cum->words + offset;
3328      int hfa_size = GET_MODE_SIZE (hfa_mode);
3329      int byte_size;
3330      int args_byte_size;
3331
3332      /* If prototyped, pass it in FR regs then GR regs.
3333	 If not prototyped, pass it in both FR and GR regs.
3334
3335	 If this is an SFmode aggregate, then it is possible to run out of
3336	 FR regs while GR regs are still left.  In that case, we pass the
3337	 remaining part in the GR regs.  */
3338
3339      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3340	 of the argument, the last FP register, or the last argument slot.  */
3341
3342      byte_size = ((mode == BLKmode)
3343		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3344      args_byte_size = int_regs * UNITS_PER_WORD;
3345      offset = 0;
3346      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3347	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3348	{
3349	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3350				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3351							      + fp_regs)),
3352				      GEN_INT (offset));
3353	  offset += hfa_size;
3354	  args_byte_size += hfa_size;
3355	  fp_regs++;
3356	}
3357
3358      /* If no prototype, then the whole thing must go in GR regs.  */
3359      if (! cum->prototype)
3360	offset = 0;
3361      /* If this is an SFmode aggregate, then we might have some left over
3362	 that needs to go in GR regs.  */
3363      else if (byte_size != offset)
3364	int_regs += offset / UNITS_PER_WORD;
3365
3366      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3367
3368      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3369	{
3370	  enum machine_mode gr_mode = DImode;
3371
3372	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3373	     then this goes in a GR reg left adjusted/little endian, right
3374	     adjusted/big endian.  */
3375	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3376	     always right adjusted/little endian.  */
3377	  if (offset & 0x4)
3378	    gr_mode = SImode;
3379	  /* If we have an even 4 byte hunk because the aggregate is a
3380	     multiple of 4 bytes in size, then this goes in a GR reg right
3381	     adjusted/little endian.  */
3382	  else if (byte_size - offset == 4)
3383	    gr_mode = SImode;
3384	  /* Complex floats need to have float mode.  */
3385	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3386	    gr_mode = hfa_mode;
3387
3388	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3389				      gen_rtx_REG (gr_mode, (basereg
3390							     + int_regs)),
3391				      GEN_INT (offset));
3392	  offset += GET_MODE_SIZE (gr_mode);
3393	  int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3394		      ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3395	}
3396
3397      /* If we ended up using just one location, just return that one loc.  */
3398      if (i == 1)
3399	return XEXP (loc[0], 0);
3400      else
3401	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3402    }
3403
3404  /* Integral and aggregates go in general registers.  If we have run out of
3405     FR registers, then FP values must also go in general registers.  This can
3406     happen when we have a SFmode HFA.  */
3407  else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3408          || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3409    {
3410      int byte_size = ((mode == BLKmode)
3411                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3412      if (BYTES_BIG_ENDIAN
3413	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3414	&& byte_size < UNITS_PER_WORD
3415	&& byte_size > 0)
3416	{
3417	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3418					  gen_rtx_REG (DImode,
3419						       (basereg + cum->words
3420							+ offset)),
3421					  const0_rtx);
3422	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3423	}
3424      else
3425	return gen_rtx_REG (mode, basereg + cum->words + offset);
3426
3427    }
3428
3429  /* If there is a prototype, then FP values go in a FR register when
3430     named, and in a GR registeer when unnamed.  */
3431  else if (cum->prototype)
3432    {
3433      if (! named)
3434	return gen_rtx_REG (mode, basereg + cum->words + offset);
3435      else
3436	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3437    }
3438  /* If there is no prototype, then FP values go in both FR and GR
3439     registers.  */
3440  else
3441    {
3442      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3443				      gen_rtx_REG (mode, (FR_ARG_FIRST
3444							  + cum->fp_regs)),
3445				      const0_rtx);
3446      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3447				      gen_rtx_REG (mode,
3448						   (basereg + cum->words
3449						    + offset)),
3450				      const0_rtx);
3451
3452      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3453    }
3454}
3455
3456/* Return number of words, at the beginning of the argument, that must be
3457   put in registers.  0 is the argument is entirely in registers or entirely
3458   in memory.  */
3459
3460int
3461ia64_function_arg_partial_nregs (cum, mode, type, named)
3462     CUMULATIVE_ARGS *cum;
3463     enum machine_mode mode;
3464     tree type;
3465     int named ATTRIBUTE_UNUSED;
3466{
3467  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3468		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3469	       / UNITS_PER_WORD);
3470  int offset = 0;
3471
3472  /* Arguments with alignment larger than 8 bytes start at the next even
3473     boundary.  */
3474  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3475       : (words > 1))
3476      && (cum->words & 1))
3477    offset = 1;
3478
3479  /* If all argument slots are used, then it must go on the stack.  */
3480  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3481    return 0;
3482
3483  /* It doesn't matter whether the argument goes in FR or GR regs.  If
3484     it fits within the 8 argument slots, then it goes entirely in
3485     registers.  If it extends past the last argument slot, then the rest
3486     goes on the stack.  */
3487
3488  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3489    return 0;
3490
3491  return MAX_ARGUMENT_SLOTS - cum->words - offset;
3492}
3493
3494/* Update CUM to point after this argument.  This is patterned after
3495   ia64_function_arg.  */
3496
3497void
3498ia64_function_arg_advance (cum, mode, type, named)
3499     CUMULATIVE_ARGS *cum;
3500     enum machine_mode mode;
3501     tree type;
3502     int named;
3503{
3504  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3505		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3506	       / UNITS_PER_WORD);
3507  int offset = 0;
3508  enum machine_mode hfa_mode = VOIDmode;
3509
3510  /* If all arg slots are already full, then there is nothing to do.  */
3511  if (cum->words >= MAX_ARGUMENT_SLOTS)
3512    return;
3513
3514  /* Arguments with alignment larger than 8 bytes start at the next even
3515     boundary.  */
3516  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3517       : (words > 1))
3518      && (cum->words & 1))
3519    offset = 1;
3520
3521  cum->words += words + offset;
3522
3523  /* Check for and handle homogeneous FP aggregates.  */
3524  if (type)
3525    hfa_mode = hfa_element_mode (type, 0);
3526
3527  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3528     and unprototyped hfas are passed specially.  */
3529  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3530    {
3531      int fp_regs = cum->fp_regs;
3532      /* This is the original value of cum->words + offset.  */
3533      int int_regs = cum->words - words;
3534      int hfa_size = GET_MODE_SIZE (hfa_mode);
3535      int byte_size;
3536      int args_byte_size;
3537
3538      /* If prototyped, pass it in FR regs then GR regs.
3539	 If not prototyped, pass it in both FR and GR regs.
3540
3541	 If this is an SFmode aggregate, then it is possible to run out of
3542	 FR regs while GR regs are still left.  In that case, we pass the
3543	 remaining part in the GR regs.  */
3544
3545      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3546	 of the argument, the last FP register, or the last argument slot.  */
3547
3548      byte_size = ((mode == BLKmode)
3549		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3550      args_byte_size = int_regs * UNITS_PER_WORD;
3551      offset = 0;
3552      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3553	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3554	{
3555	  offset += hfa_size;
3556	  args_byte_size += hfa_size;
3557	  fp_regs++;
3558	}
3559
3560      cum->fp_regs = fp_regs;
3561    }
3562
3563  /* Integral and aggregates go in general registers.  If we have run out of
3564     FR registers, then FP values must also go in general registers.  This can
3565     happen when we have a SFmode HFA.  */
3566  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3567    cum->int_regs = cum->words;
3568
3569  /* If there is a prototype, then FP values go in a FR register when
3570     named, and in a GR registeer when unnamed.  */
3571  else if (cum->prototype)
3572    {
3573      if (! named)
3574	cum->int_regs = cum->words;
3575      else
3576	/* ??? Complex types should not reach here.  */
3577	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3578    }
3579  /* If there is no prototype, then FP values go in both FR and GR
3580     registers.  */
3581  else
3582    {
3583      /* ??? Complex types should not reach here.  */
3584      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3585      cum->int_regs = cum->words;
3586    }
3587}
3588
3589/* Variable sized types are passed by reference.  */
3590/* ??? At present this is a GCC extension to the IA-64 ABI.  */
3591
3592int
3593ia64_function_arg_pass_by_reference (cum, mode, type, named)
3594     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3595     enum machine_mode mode ATTRIBUTE_UNUSED;
3596     tree type;
3597     int named ATTRIBUTE_UNUSED;
3598{
3599  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3600}
3601
3602
3603/* Implement va_arg.  */
3604
3605rtx
3606ia64_va_arg (valist, type)
3607     tree valist, type;
3608{
3609  tree t;
3610
3611  /* Variable sized types are passed by reference.  */
3612  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3613    {
3614      rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3615      return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3616    }
3617
3618  /* Arguments with alignment larger than 8 bytes start at the next even
3619     boundary.  */
3620  if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3621    {
3622      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3623		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3624      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3625		 build_int_2 (-2 * UNITS_PER_WORD, -1));
3626      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3627      TREE_SIDE_EFFECTS (t) = 1;
3628      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3629    }
3630
3631  return std_expand_builtin_va_arg (valist, type);
3632}
3633
3634/* Return 1 if function return value returned in memory.  Return 0 if it is
3635   in a register.  */
3636
3637int
3638ia64_return_in_memory (valtype)
3639     tree valtype;
3640{
3641  enum machine_mode mode;
3642  enum machine_mode hfa_mode;
3643  HOST_WIDE_INT byte_size;
3644
3645  mode = TYPE_MODE (valtype);
3646  byte_size = GET_MODE_SIZE (mode);
3647  if (mode == BLKmode)
3648    {
3649      byte_size = int_size_in_bytes (valtype);
3650      if (byte_size < 0)
3651	return 1;
3652    }
3653
3654  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
3655
3656  hfa_mode = hfa_element_mode (valtype, 0);
3657  if (hfa_mode != VOIDmode)
3658    {
3659      int hfa_size = GET_MODE_SIZE (hfa_mode);
3660
3661      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3662	return 1;
3663      else
3664	return 0;
3665    }
3666  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3667    return 1;
3668  else
3669    return 0;
3670}
3671
3672/* Return rtx for register that holds the function return value.  */
3673
3674rtx
3675ia64_function_value (valtype, func)
3676     tree valtype;
3677     tree func ATTRIBUTE_UNUSED;
3678{
3679  enum machine_mode mode;
3680  enum machine_mode hfa_mode;
3681
3682  mode = TYPE_MODE (valtype);
3683  hfa_mode = hfa_element_mode (valtype, 0);
3684
3685  if (hfa_mode != VOIDmode)
3686    {
3687      rtx loc[8];
3688      int i;
3689      int hfa_size;
3690      int byte_size;
3691      int offset;
3692
3693      hfa_size = GET_MODE_SIZE (hfa_mode);
3694      byte_size = ((mode == BLKmode)
3695		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3696      offset = 0;
3697      for (i = 0; offset < byte_size; i++)
3698	{
3699	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3700				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3701				      GEN_INT (offset));
3702	  offset += hfa_size;
3703	}
3704
3705      if (i == 1)
3706	return XEXP (loc[0], 0);
3707      else
3708	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3709    }
3710  else if (FLOAT_TYPE_P (valtype) &&
3711           ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3712    return gen_rtx_REG (mode, FR_ARG_FIRST);
3713  else
3714    {
3715      if (BYTES_BIG_ENDIAN
3716	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3717	{
3718	  rtx loc[8];
3719	  int offset;
3720	  int bytesize;
3721	  int i;
3722
3723	  offset = 0;
3724	  bytesize = int_size_in_bytes (valtype);
3725	  for (i = 0; offset < bytesize; i++)
3726	    {
3727	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3728					  gen_rtx_REG (DImode,
3729						       GR_RET_FIRST + i),
3730					  GEN_INT (offset));
3731	      offset += UNITS_PER_WORD;
3732	    }
3733	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3734	}
3735      else
3736	return gen_rtx_REG (mode, GR_RET_FIRST);
3737    }
3738}
3739
3740/* Print a memory address as an operand to reference that memory location.  */
3741
3742/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
3743   also call this from ia64_print_operand for memory addresses.  */
3744
3745void
3746ia64_print_operand_address (stream, address)
3747     FILE * stream ATTRIBUTE_UNUSED;
3748     rtx    address ATTRIBUTE_UNUSED;
3749{
3750}
3751
3752/* Print an operand to an assembler instruction.
3753   C	Swap and print a comparison operator.
3754   D	Print an FP comparison operator.
3755   E    Print 32 - constant, for SImode shifts as extract.
3756   e    Print 64 - constant, for DImode rotates.
3757   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3758        a floating point register emitted normally.
3759   I	Invert a predicate register by adding 1.
3760   J    Select the proper predicate register for a condition.
3761   j    Select the inverse predicate register for a condition.
3762   O	Append .acq for volatile load.
3763   P	Postincrement of a MEM.
3764   Q	Append .rel for volatile store.
3765   S	Shift amount for shladd instruction.
3766   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3767	for Intel assembler.
3768   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3769	for Intel assembler.
3770   r	Print register name, or constant 0 as r0.  HP compatibility for
3771	Linux kernel.  */
3772void
3773ia64_print_operand (file, x, code)
3774     FILE * file;
3775     rtx    x;
3776     int    code;
3777{
3778  const char *str;
3779
3780  switch (code)
3781    {
3782    case 0:
3783      /* Handled below.  */
3784      break;
3785
3786    case 'C':
3787      {
3788	enum rtx_code c = swap_condition (GET_CODE (x));
3789	fputs (GET_RTX_NAME (c), file);
3790	return;
3791      }
3792
3793    case 'D':
3794      switch (GET_CODE (x))
3795	{
3796	case NE:
3797	  str = "neq";
3798	  break;
3799	case UNORDERED:
3800	  str = "unord";
3801	  break;
3802	case ORDERED:
3803	  str = "ord";
3804	  break;
3805	default:
3806	  str = GET_RTX_NAME (GET_CODE (x));
3807	  break;
3808	}
3809      fputs (str, file);
3810      return;
3811
3812    case 'E':
3813      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3814      return;
3815
3816    case 'e':
3817      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3818      return;
3819
3820    case 'F':
3821      if (x == CONST0_RTX (GET_MODE (x)))
3822	str = reg_names [FR_REG (0)];
3823      else if (x == CONST1_RTX (GET_MODE (x)))
3824	str = reg_names [FR_REG (1)];
3825      else if (GET_CODE (x) == REG)
3826	str = reg_names [REGNO (x)];
3827      else
3828	abort ();
3829      fputs (str, file);
3830      return;
3831
3832    case 'I':
3833      fputs (reg_names [REGNO (x) + 1], file);
3834      return;
3835
3836    case 'J':
3837    case 'j':
3838      {
3839	unsigned int regno = REGNO (XEXP (x, 0));
3840	if (GET_CODE (x) == EQ)
3841	  regno += 1;
3842	if (code == 'j')
3843	  regno ^= 1;
3844        fputs (reg_names [regno], file);
3845      }
3846      return;
3847
3848    case 'O':
3849      if (MEM_VOLATILE_P (x))
3850	fputs(".acq", file);
3851      return;
3852
3853    case 'P':
3854      {
3855	HOST_WIDE_INT value;
3856
3857	switch (GET_CODE (XEXP (x, 0)))
3858	  {
3859	  default:
3860	    return;
3861
3862	  case POST_MODIFY:
3863	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3864	    if (GET_CODE (x) == CONST_INT)
3865	      value = INTVAL (x);
3866	    else if (GET_CODE (x) == REG)
3867	      {
3868		fprintf (file, ", %s", reg_names[REGNO (x)]);
3869		return;
3870	      }
3871	    else
3872	      abort ();
3873	    break;
3874
3875	  case POST_INC:
3876	    value = GET_MODE_SIZE (GET_MODE (x));
3877	    break;
3878
3879	  case POST_DEC:
3880	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3881	    break;
3882	  }
3883
3884	putc (',', file);
3885	putc (' ', file);
3886	fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3887	return;
3888      }
3889
3890    case 'Q':
3891      if (MEM_VOLATILE_P (x))
3892	fputs(".rel", file);
3893      return;
3894
3895    case 'S':
3896      fprintf (file, "%d", exact_log2 (INTVAL (x)));
3897      return;
3898
3899    case 'T':
3900      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3901	{
3902	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3903	  return;
3904	}
3905      break;
3906
3907    case 'U':
3908      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3909	{
3910	  const char *prefix = "0x";
3911	  if (INTVAL (x) & 0x80000000)
3912	    {
3913	      fprintf (file, "0xffffffff");
3914	      prefix = "";
3915	    }
3916	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3917	  return;
3918	}
3919      break;
3920
3921    case 'r':
3922      /* If this operand is the constant zero, write it as register zero.
3923	 Any register, zero, or CONST_INT value is OK here.  */
3924      if (GET_CODE (x) == REG)
3925	fputs (reg_names[REGNO (x)], file);
3926      else if (x == CONST0_RTX (GET_MODE (x)))
3927	fputs ("r0", file);
3928      else if (GET_CODE (x) == CONST_INT)
3929	output_addr_const (file, x);
3930      else
3931	output_operand_lossage ("invalid %%r value");
3932      return;
3933
3934    case '+':
3935      {
3936	const char *which;
3937
3938	/* For conditional branches, returns or calls, substitute
3939	   sptk, dptk, dpnt, or spnt for %s.  */
3940	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3941	if (x)
3942	  {
3943	    int pred_val = INTVAL (XEXP (x, 0));
3944
3945	    /* Guess top and bottom 10% statically predicted.  */
3946	    if (pred_val < REG_BR_PROB_BASE / 50)
3947	      which = ".spnt";
3948	    else if (pred_val < REG_BR_PROB_BASE / 2)
3949	      which = ".dpnt";
3950	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3951	      which = ".dptk";
3952	    else
3953	      which = ".sptk";
3954	  }
3955	else if (GET_CODE (current_output_insn) == CALL_INSN)
3956	  which = ".sptk";
3957	else
3958	  which = ".dptk";
3959
3960	fputs (which, file);
3961	return;
3962      }
3963
3964    case ',':
3965      x = current_insn_predicate;
3966      if (x)
3967	{
3968	  unsigned int regno = REGNO (XEXP (x, 0));
3969	  if (GET_CODE (x) == EQ)
3970	    regno += 1;
3971          fprintf (file, "(%s) ", reg_names [regno]);
3972	}
3973      return;
3974
3975    default:
3976      output_operand_lossage ("ia64_print_operand: unknown code");
3977      return;
3978    }
3979
3980  switch (GET_CODE (x))
3981    {
3982      /* This happens for the spill/restore instructions.  */
3983    case POST_INC:
3984    case POST_DEC:
3985    case POST_MODIFY:
3986      x = XEXP (x, 0);
3987      /* ... fall through ...  */
3988
3989    case REG:
3990      fputs (reg_names [REGNO (x)], file);
3991      break;
3992
3993    case MEM:
3994      {
3995	rtx addr = XEXP (x, 0);
3996	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3997	  addr = XEXP (addr, 0);
3998	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3999	break;
4000      }
4001
4002    default:
4003      output_addr_const (file, x);
4004      break;
4005    }
4006
4007  return;
4008}
4009
4010/* Calulate the cost of moving data from a register in class FROM to
4011   one in class TO, using MODE.  */
4012
4013int
4014ia64_register_move_cost (mode, from, to)
4015     enum machine_mode mode;
4016     enum reg_class from, to;
4017{
4018  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4019  if (to == ADDL_REGS)
4020    to = GR_REGS;
4021  if (from == ADDL_REGS)
4022    from = GR_REGS;
4023
4024  /* All costs are symmetric, so reduce cases by putting the
4025     lower number class as the destination.  */
4026  if (from < to)
4027    {
4028      enum reg_class tmp = to;
4029      to = from, from = tmp;
4030    }
4031
4032  /* Moving from FR<->GR in TFmode must be more expensive than 2,
4033     so that we get secondary memory reloads.  Between FR_REGS,
4034     we have to make this at least as expensive as MEMORY_MOVE_COST
4035     to avoid spectacularly poor register class preferencing.  */
4036  if (mode == TFmode)
4037    {
4038      if (to != GR_REGS || from != GR_REGS)
4039        return MEMORY_MOVE_COST (mode, to, 0);
4040      else
4041	return 3;
4042    }
4043
4044  switch (to)
4045    {
4046    case PR_REGS:
4047      /* Moving between PR registers takes two insns.  */
4048      if (from == PR_REGS)
4049	return 3;
4050      /* Moving between PR and anything but GR is impossible.  */
4051      if (from != GR_REGS)
4052	return MEMORY_MOVE_COST (mode, to, 0);
4053      break;
4054
4055    case BR_REGS:
4056      /* Moving between BR and anything but GR is impossible.  */
4057      if (from != GR_REGS && from != GR_AND_BR_REGS)
4058	return MEMORY_MOVE_COST (mode, to, 0);
4059      break;
4060
4061    case AR_I_REGS:
4062    case AR_M_REGS:
4063      /* Moving between AR and anything but GR is impossible.  */
4064      if (from != GR_REGS)
4065	return MEMORY_MOVE_COST (mode, to, 0);
4066      break;
4067
4068    case GR_REGS:
4069    case FR_REGS:
4070    case GR_AND_FR_REGS:
4071    case GR_AND_BR_REGS:
4072    case ALL_REGS:
4073      break;
4074
4075    default:
4076      abort ();
4077    }
4078
4079  return 2;
4080}
4081
4082/* This function returns the register class required for a secondary
4083   register when copying between one of the registers in CLASS, and X,
4084   using MODE.  A return value of NO_REGS means that no secondary register
4085   is required.  */
4086
4087enum reg_class
4088ia64_secondary_reload_class (class, mode, x)
4089     enum reg_class class;
4090     enum machine_mode mode ATTRIBUTE_UNUSED;
4091     rtx x;
4092{
4093  int regno = -1;
4094
4095  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4096    regno = true_regnum (x);
4097
4098  switch (class)
4099    {
4100    case BR_REGS:
4101    case AR_M_REGS:
4102    case AR_I_REGS:
4103      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4104	 interaction.  We end up with two pseudos with overlapping lifetimes
4105	 both of which are equiv to the same constant, and both which need
4106	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4107	 changes depending on the path length, which means the qty_first_reg
4108	 check in make_regs_eqv can give different answers at different times.
4109	 At some point I'll probably need a reload_indi pattern to handle
4110	 this.
4111
4112	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4113	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4114	 non-general registers for good measure.  */
4115      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4116	return GR_REGS;
4117
4118      /* This is needed if a pseudo used as a call_operand gets spilled to a
4119	 stack slot.  */
4120      if (GET_CODE (x) == MEM)
4121	return GR_REGS;
4122      break;
4123
4124    case FR_REGS:
4125      /* Need to go through general regsters to get to other class regs.  */
4126      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4127	return GR_REGS;
4128
4129      /* This can happen when a paradoxical subreg is an operand to the
4130	 muldi3 pattern.  */
4131      /* ??? This shouldn't be necessary after instruction scheduling is
4132	 enabled, because paradoxical subregs are not accepted by
4133	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4134	 stop the paradoxical subreg stupidity in the *_operand functions
4135	 in recog.c.  */
4136      if (GET_CODE (x) == MEM
4137	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4138	      || GET_MODE (x) == QImode))
4139	return GR_REGS;
4140
4141      /* This can happen because of the ior/and/etc patterns that accept FP
4142	 registers as operands.  If the third operand is a constant, then it
4143	 needs to be reloaded into a FP register.  */
4144      if (GET_CODE (x) == CONST_INT)
4145	return GR_REGS;
4146
4147      /* This can happen because of register elimination in a muldi3 insn.
4148	 E.g. `26107 * (unsigned long)&u'.  */
4149      if (GET_CODE (x) == PLUS)
4150	return GR_REGS;
4151      break;
4152
4153    case PR_REGS:
4154      /* ??? This happens if we cse/gcse a BImode value across a call,
4155	 and the function has a nonlocal goto.  This is because global
4156	 does not allocate call crossing pseudos to hard registers when
4157	 current_function_has_nonlocal_goto is true.  This is relatively
4158	 common for C++ programs that use exceptions.  To reproduce,
4159	 return NO_REGS and compile libstdc++.  */
4160      if (GET_CODE (x) == MEM)
4161	return GR_REGS;
4162
4163      /* This can happen when we take a BImode subreg of a DImode value,
4164	 and that DImode value winds up in some non-GR register.  */
4165      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4166	return GR_REGS;
4167      break;
4168
4169    case GR_REGS:
4170      /* Since we have no offsettable memory addresses, we need a temporary
4171	 to hold the address of the second word.  */
4172      if (mode == TImode)
4173	return GR_REGS;
4174      break;
4175
4176    default:
4177      break;
4178    }
4179
4180  return NO_REGS;
4181}
4182
4183/* Emit text to declare externally defined variables and functions, because
4184   the Intel assembler does not support undefined externals.  */
4185
4186void
4187ia64_asm_output_external (file, decl, name)
4188     FILE *file;
4189     tree decl;
4190     const char *name;
4191{
4192  int save_referenced;
4193
4194  /* GNU as does not need anything here, but the HP linker does need
4195     something for external functions.  */
4196
4197  if (TARGET_GNU_AS
4198      && (!TARGET_HPUX_LD
4199	  || TREE_CODE (decl) != FUNCTION_DECL
4200	  || strstr(name, "__builtin_") == name))
4201    return;
4202
4203  /* ??? The Intel assembler creates a reference that needs to be satisfied by
4204     the linker when we do this, so we need to be careful not to do this for
4205     builtin functions which have no library equivalent.  Unfortunately, we
4206     can't tell here whether or not a function will actually be called by
4207     expand_expr, so we pull in library functions even if we may not need
4208     them later.  */
4209  if (! strcmp (name, "__builtin_next_arg")
4210      || ! strcmp (name, "alloca")
4211      || ! strcmp (name, "__builtin_constant_p")
4212      || ! strcmp (name, "__builtin_args_info"))
4213    return;
4214
4215  if (TARGET_HPUX_LD)
4216    ia64_hpux_add_extern_decl (name);
4217  else
4218    {
4219      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4220         restore it.  */
4221      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4222      if (TREE_CODE (decl) == FUNCTION_DECL)
4223        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4224      (*targetm.asm_out.globalize_label) (file, name);
4225      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4226    }
4227}
4228
4229/* Parse the -mfixed-range= option string.  */
4230
4231static void
4232fix_range (const_str)
4233     const char *const_str;
4234{
4235  int i, first, last;
4236  char *str, *dash, *comma;
4237
4238  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4239     REG2 are either register names or register numbers.  The effect
4240     of this option is to mark the registers in the range from REG1 to
4241     REG2 as ``fixed'' so they won't be used by the compiler.  This is
4242     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4243
4244  i = strlen (const_str);
4245  str = (char *) alloca (i + 1);
4246  memcpy (str, const_str, i + 1);
4247
4248  while (1)
4249    {
4250      dash = strchr (str, '-');
4251      if (!dash)
4252	{
4253	  warning ("value of -mfixed-range must have form REG1-REG2");
4254	  return;
4255	}
4256      *dash = '\0';
4257
4258      comma = strchr (dash + 1, ',');
4259      if (comma)
4260	*comma = '\0';
4261
4262      first = decode_reg_name (str);
4263      if (first < 0)
4264	{
4265	  warning ("unknown register name: %s", str);
4266	  return;
4267	}
4268
4269      last = decode_reg_name (dash + 1);
4270      if (last < 0)
4271	{
4272	  warning ("unknown register name: %s", dash + 1);
4273	  return;
4274	}
4275
4276      *dash = '-';
4277
4278      if (first > last)
4279	{
4280	  warning ("%s-%s is an empty range", str, dash + 1);
4281	  return;
4282	}
4283
4284      for (i = first; i <= last; ++i)
4285	fixed_regs[i] = call_used_regs[i] = 1;
4286
4287      if (!comma)
4288	break;
4289
4290      *comma = ',';
4291      str = comma + 1;
4292    }
4293}
4294
4295static struct machine_function *
4296ia64_init_machine_status ()
4297{
4298  return ggc_alloc_cleared (sizeof (struct machine_function));
4299}
4300
4301/* Handle TARGET_OPTIONS switches.  */
4302
4303void
4304ia64_override_options ()
4305{
4306  if (TARGET_AUTO_PIC)
4307    target_flags |= MASK_CONST_GP;
4308
4309  if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4310    {
4311      warning ("cannot optimize floating point division for both latency and throughput");
4312      target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4313    }
4314
4315  if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4316    {
4317      warning ("cannot optimize integer division for both latency and throughput");
4318      target_flags &= ~MASK_INLINE_INT_DIV_THR;
4319    }
4320
4321  if (ia64_fixed_range_string)
4322    fix_range (ia64_fixed_range_string);
4323
4324  if (ia64_tls_size_string)
4325    {
4326      char *end;
4327      unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4328      if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4329	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4330      else
4331	ia64_tls_size = tmp;
4332    }
4333
4334  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4335  flag_schedule_insns_after_reload = 0;
4336
4337  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4338
4339  init_machine_status = ia64_init_machine_status;
4340
4341  /* Tell the compiler which flavor of TFmode we're using.  */
4342  if (INTEL_EXTENDED_IEEE_FORMAT)
4343    real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4344}
4345
4346static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4347static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4348static enum attr_type ia64_safe_type PARAMS((rtx));
4349
4350static enum attr_itanium_requires_unit0
4351ia64_safe_itanium_requires_unit0 (insn)
4352     rtx insn;
4353{
4354  if (recog_memoized (insn) >= 0)
4355    return get_attr_itanium_requires_unit0 (insn);
4356  else
4357    return ITANIUM_REQUIRES_UNIT0_NO;
4358}
4359
4360static enum attr_itanium_class
4361ia64_safe_itanium_class (insn)
4362     rtx insn;
4363{
4364  if (recog_memoized (insn) >= 0)
4365    return get_attr_itanium_class (insn);
4366  else
4367    return ITANIUM_CLASS_UNKNOWN;
4368}
4369
4370static enum attr_type
4371ia64_safe_type (insn)
4372     rtx insn;
4373{
4374  if (recog_memoized (insn) >= 0)
4375    return get_attr_type (insn);
4376  else
4377    return TYPE_UNKNOWN;
4378}
4379
4380/* The following collection of routines emit instruction group stop bits as
4381   necessary to avoid dependencies.  */
4382
4383/* Need to track some additional registers as far as serialization is
4384   concerned so we can properly handle br.call and br.ret.  We could
4385   make these registers visible to gcc, but since these registers are
4386   never explicitly used in gcc generated code, it seems wasteful to
4387   do so (plus it would make the call and return patterns needlessly
4388   complex).  */
4389#define REG_GP		(GR_REG (1))
4390#define REG_RP		(BR_REG (0))
4391#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4392/* This is used for volatile asms which may require a stop bit immediately
4393   before and after them.  */
4394#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4395#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4396#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4397
4398/* For each register, we keep track of how it has been written in the
4399   current instruction group.
4400
4401   If a register is written unconditionally (no qualifying predicate),
4402   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4403
4404   If a register is written if its qualifying predicate P is true, we
4405   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4406   may be written again by the complement of P (P^1) and when this happens,
4407   WRITE_COUNT gets set to 2.
4408
4409   The result of this is that whenever an insn attempts to write a register
4410   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4411
4412   If a predicate register is written by a floating-point insn, we set
4413   WRITTEN_BY_FP to true.
4414
4415   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4416   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4417
4418struct reg_write_state
4419{
4420  unsigned int write_count : 2;
4421  unsigned int first_pred : 16;
4422  unsigned int written_by_fp : 1;
4423  unsigned int written_by_and : 1;
4424  unsigned int written_by_or : 1;
4425};
4426
4427/* Cumulative info for the current instruction group.  */
4428struct reg_write_state rws_sum[NUM_REGS];
4429/* Info for the current instruction.  This gets copied to rws_sum after a
4430   stop bit is emitted.  */
4431struct reg_write_state rws_insn[NUM_REGS];
4432
4433/* Indicates whether this is the first instruction after a stop bit,
4434   in which case we don't need another stop bit.  Without this, we hit
4435   the abort in ia64_variable_issue when scheduling an alloc.  */
4436static int first_instruction;
4437
4438/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4439   RTL for one instruction.  */
4440struct reg_flags
4441{
4442  unsigned int is_write : 1;	/* Is register being written?  */
4443  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4444  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4445  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4446  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4447  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4448};
4449
4450static void rws_update PARAMS ((struct reg_write_state *, int,
4451				struct reg_flags, int));
4452static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4453static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4454static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4455static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4456static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4457static void init_insn_group_barriers PARAMS ((void));
4458static int group_barrier_needed_p PARAMS ((rtx));
4459static int safe_group_barrier_needed_p PARAMS ((rtx));
4460
4461/* Update *RWS for REGNO, which is being written by the current instruction,
4462   with predicate PRED, and associated register flags in FLAGS.  */
4463
4464static void
4465rws_update (rws, regno, flags, pred)
4466     struct reg_write_state *rws;
4467     int regno;
4468     struct reg_flags flags;
4469     int pred;
4470{
4471  if (pred)
4472    rws[regno].write_count++;
4473  else
4474    rws[regno].write_count = 2;
4475  rws[regno].written_by_fp |= flags.is_fp;
4476  /* ??? Not tracking and/or across differing predicates.  */
4477  rws[regno].written_by_and = flags.is_and;
4478  rws[regno].written_by_or = flags.is_or;
4479  rws[regno].first_pred = pred;
4480}
4481
4482/* Handle an access to register REGNO of type FLAGS using predicate register
4483   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4484   a dependency with an earlier instruction in the same group.  */
4485
4486static int
4487rws_access_regno (regno, flags, pred)
4488     int regno;
4489     struct reg_flags flags;
4490     int pred;
4491{
4492  int need_barrier = 0;
4493
4494  if (regno >= NUM_REGS)
4495    abort ();
4496
4497  if (! PR_REGNO_P (regno))
4498    flags.is_and = flags.is_or = 0;
4499
4500  if (flags.is_write)
4501    {
4502      int write_count;
4503
4504      /* One insn writes same reg multiple times?  */
4505      if (rws_insn[regno].write_count > 0)
4506	abort ();
4507
4508      /* Update info for current instruction.  */
4509      rws_update (rws_insn, regno, flags, pred);
4510      write_count = rws_sum[regno].write_count;
4511
4512      switch (write_count)
4513	{
4514	case 0:
4515	  /* The register has not been written yet.  */
4516	  rws_update (rws_sum, regno, flags, pred);
4517	  break;
4518
4519	case 1:
4520	  /* The register has been written via a predicate.  If this is
4521	     not a complementary predicate, then we need a barrier.  */
4522	  /* ??? This assumes that P and P+1 are always complementary
4523	     predicates for P even.  */
4524	  if (flags.is_and && rws_sum[regno].written_by_and)
4525	    ;
4526	  else if (flags.is_or && rws_sum[regno].written_by_or)
4527	    ;
4528	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4529	    need_barrier = 1;
4530	  rws_update (rws_sum, regno, flags, pred);
4531	  break;
4532
4533	case 2:
4534	  /* The register has been unconditionally written already.  We
4535	     need a barrier.  */
4536	  if (flags.is_and && rws_sum[regno].written_by_and)
4537	    ;
4538	  else if (flags.is_or && rws_sum[regno].written_by_or)
4539	    ;
4540	  else
4541	    need_barrier = 1;
4542	  rws_sum[regno].written_by_and = flags.is_and;
4543	  rws_sum[regno].written_by_or = flags.is_or;
4544	  break;
4545
4546	default:
4547	  abort ();
4548	}
4549    }
4550  else
4551    {
4552      if (flags.is_branch)
4553	{
4554	  /* Branches have several RAW exceptions that allow to avoid
4555	     barriers.  */
4556
4557	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4558	    /* RAW dependencies on branch regs are permissible as long
4559	       as the writer is a non-branch instruction.  Since we
4560	       never generate code that uses a branch register written
4561	       by a branch instruction, handling this case is
4562	       easy.  */
4563	    return 0;
4564
4565	  if (REGNO_REG_CLASS (regno) == PR_REGS
4566	      && ! rws_sum[regno].written_by_fp)
4567	    /* The predicates of a branch are available within the
4568	       same insn group as long as the predicate was written by
4569	       something other than a floating-point instruction.  */
4570	    return 0;
4571	}
4572
4573      if (flags.is_and && rws_sum[regno].written_by_and)
4574	return 0;
4575      if (flags.is_or && rws_sum[regno].written_by_or)
4576	return 0;
4577
4578      switch (rws_sum[regno].write_count)
4579	{
4580	case 0:
4581	  /* The register has not been written yet.  */
4582	  break;
4583
4584	case 1:
4585	  /* The register has been written via a predicate.  If this is
4586	     not a complementary predicate, then we need a barrier.  */
4587	  /* ??? This assumes that P and P+1 are always complementary
4588	     predicates for P even.  */
4589	  if ((rws_sum[regno].first_pred ^ 1) != pred)
4590	    need_barrier = 1;
4591	  break;
4592
4593	case 2:
4594	  /* The register has been unconditionally written already.  We
4595	     need a barrier.  */
4596	  need_barrier = 1;
4597	  break;
4598
4599	default:
4600	  abort ();
4601	}
4602    }
4603
4604  return need_barrier;
4605}
4606
4607static int
4608rws_access_reg (reg, flags, pred)
4609     rtx reg;
4610     struct reg_flags flags;
4611     int pred;
4612{
4613  int regno = REGNO (reg);
4614  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4615
4616  if (n == 1)
4617    return rws_access_regno (regno, flags, pred);
4618  else
4619    {
4620      int need_barrier = 0;
4621      while (--n >= 0)
4622	need_barrier |= rws_access_regno (regno + n, flags, pred);
4623      return need_barrier;
4624    }
4625}
4626
4627/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4628   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
4629
4630static void
4631update_set_flags (x, pflags, ppred, pcond)
4632     rtx x;
4633     struct reg_flags *pflags;
4634     int *ppred;
4635     rtx *pcond;
4636{
4637  rtx src = SET_SRC (x);
4638
4639  *pcond = 0;
4640
4641  switch (GET_CODE (src))
4642    {
4643    case CALL:
4644      return;
4645
4646    case IF_THEN_ELSE:
4647      if (SET_DEST (x) == pc_rtx)
4648	/* X is a conditional branch.  */
4649	return;
4650      else
4651	{
4652	  int is_complemented = 0;
4653
4654	  /* X is a conditional move.  */
4655	  rtx cond = XEXP (src, 0);
4656	  if (GET_CODE (cond) == EQ)
4657	    is_complemented = 1;
4658	  cond = XEXP (cond, 0);
4659	  if (GET_CODE (cond) != REG
4660	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4661	    abort ();
4662	  *pcond = cond;
4663	  if (XEXP (src, 1) == SET_DEST (x)
4664	      || XEXP (src, 2) == SET_DEST (x))
4665	    {
4666	      /* X is a conditional move that conditionally writes the
4667		 destination.  */
4668
4669	      /* We need another complement in this case.  */
4670	      if (XEXP (src, 1) == SET_DEST (x))
4671		is_complemented = ! is_complemented;
4672
4673	      *ppred = REGNO (cond);
4674	      if (is_complemented)
4675		++*ppred;
4676	    }
4677
4678	  /* ??? If this is a conditional write to the dest, then this
4679	     instruction does not actually read one source.  This probably
4680	     doesn't matter, because that source is also the dest.  */
4681	  /* ??? Multiple writes to predicate registers are allowed
4682	     if they are all AND type compares, or if they are all OR
4683	     type compares.  We do not generate such instructions
4684	     currently.  */
4685	}
4686      /* ... fall through ...  */
4687
4688    default:
4689      if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4690	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4691	/* Set pflags->is_fp to 1 so that we know we're dealing
4692	   with a floating point comparison when processing the
4693	   destination of the SET.  */
4694	pflags->is_fp = 1;
4695
4696      /* Discover if this is a parallel comparison.  We only handle
4697	 and.orcm and or.andcm at present, since we must retain a
4698	 strict inverse on the predicate pair.  */
4699      else if (GET_CODE (src) == AND)
4700	pflags->is_and = 1;
4701      else if (GET_CODE (src) == IOR)
4702	pflags->is_or = 1;
4703
4704      break;
4705    }
4706}
4707
4708/* Subroutine of rtx_needs_barrier; this function determines whether the
4709   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
4710   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
4711   for this insn.  */
4712
4713static int
4714set_src_needs_barrier (x, flags, pred, cond)
4715     rtx x;
4716     struct reg_flags flags;
4717     int pred;
4718     rtx cond;
4719{
4720  int need_barrier = 0;
4721  rtx dst;
4722  rtx src = SET_SRC (x);
4723
4724  if (GET_CODE (src) == CALL)
4725    /* We don't need to worry about the result registers that
4726       get written by subroutine call.  */
4727    return rtx_needs_barrier (src, flags, pred);
4728  else if (SET_DEST (x) == pc_rtx)
4729    {
4730      /* X is a conditional branch.  */
4731      /* ??? This seems redundant, as the caller sets this bit for
4732	 all JUMP_INSNs.  */
4733      flags.is_branch = 1;
4734      return rtx_needs_barrier (src, flags, pred);
4735    }
4736
4737  need_barrier = rtx_needs_barrier (src, flags, pred);
4738
4739  /* This instruction unconditionally uses a predicate register.  */
4740  if (cond)
4741    need_barrier |= rws_access_reg (cond, flags, 0);
4742
4743  dst = SET_DEST (x);
4744  if (GET_CODE (dst) == ZERO_EXTRACT)
4745    {
4746      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4747      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4748      dst = XEXP (dst, 0);
4749    }
4750  return need_barrier;
4751}
4752
4753/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4754   Return 1 is this access creates a dependency with an earlier instruction
4755   in the same group.  */
4756
4757static int
4758rtx_needs_barrier (x, flags, pred)
4759     rtx x;
4760     struct reg_flags flags;
4761     int pred;
4762{
4763  int i, j;
4764  int is_complemented = 0;
4765  int need_barrier = 0;
4766  const char *format_ptr;
4767  struct reg_flags new_flags;
4768  rtx cond = 0;
4769
4770  if (! x)
4771    return 0;
4772
4773  new_flags = flags;
4774
4775  switch (GET_CODE (x))
4776    {
4777    case SET:
4778      update_set_flags (x, &new_flags, &pred, &cond);
4779      need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4780      if (GET_CODE (SET_SRC (x)) != CALL)
4781	{
4782	  new_flags.is_write = 1;
4783	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4784	}
4785      break;
4786
4787    case CALL:
4788      new_flags.is_write = 0;
4789      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4790
4791      /* Avoid multiple register writes, in case this is a pattern with
4792	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
4793      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4794	{
4795	  new_flags.is_write = 1;
4796	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4797	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4798	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4799	}
4800      break;
4801
4802    case COND_EXEC:
4803      /* X is a predicated instruction.  */
4804
4805      cond = COND_EXEC_TEST (x);
4806      if (pred)
4807	abort ();
4808      need_barrier = rtx_needs_barrier (cond, flags, 0);
4809
4810      if (GET_CODE (cond) == EQ)
4811	is_complemented = 1;
4812      cond = XEXP (cond, 0);
4813      if (GET_CODE (cond) != REG
4814	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4815	abort ();
4816      pred = REGNO (cond);
4817      if (is_complemented)
4818	++pred;
4819
4820      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4821      return need_barrier;
4822
4823    case CLOBBER:
4824    case USE:
4825      /* Clobber & use are for earlier compiler-phases only.  */
4826      break;
4827
4828    case ASM_OPERANDS:
4829    case ASM_INPUT:
4830      /* We always emit stop bits for traditional asms.  We emit stop bits
4831	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
4832      if (GET_CODE (x) != ASM_OPERANDS
4833	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4834	{
4835	  /* Avoid writing the register multiple times if we have multiple
4836	     asm outputs.  This avoids an abort in rws_access_reg.  */
4837	  if (! rws_insn[REG_VOLATILE].write_count)
4838	    {
4839	      new_flags.is_write = 1;
4840	      rws_access_regno (REG_VOLATILE, new_flags, pred);
4841	    }
4842	  return 1;
4843	}
4844
4845      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4846	 We can not just fall through here since then we would be confused
4847	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4848	 traditional asms unlike their normal usage.  */
4849
4850      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4851	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4852	  need_barrier = 1;
4853      break;
4854
4855    case PARALLEL:
4856      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4857	{
4858	  rtx pat = XVECEXP (x, 0, i);
4859	  if (GET_CODE (pat) == SET)
4860	    {
4861	      update_set_flags (pat, &new_flags, &pred, &cond);
4862	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4863	    }
4864	  else if (GET_CODE (pat) == USE
4865		   || GET_CODE (pat) == CALL
4866		   || GET_CODE (pat) == ASM_OPERANDS)
4867	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4868	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4869	    abort ();
4870	}
4871      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4872	{
4873	  rtx pat = XVECEXP (x, 0, i);
4874	  if (GET_CODE (pat) == SET)
4875	    {
4876	      if (GET_CODE (SET_SRC (pat)) != CALL)
4877		{
4878		  new_flags.is_write = 1;
4879		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4880						     pred);
4881		}
4882	    }
4883	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4884	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4885	}
4886      break;
4887
4888    case SUBREG:
4889      x = SUBREG_REG (x);
4890      /* FALLTHRU */
4891    case REG:
4892      if (REGNO (x) == AR_UNAT_REGNUM)
4893	{
4894	  for (i = 0; i < 64; ++i)
4895	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4896	}
4897      else
4898	need_barrier = rws_access_reg (x, flags, pred);
4899      break;
4900
4901    case MEM:
4902      /* Find the regs used in memory address computation.  */
4903      new_flags.is_write = 0;
4904      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4905      break;
4906
4907    case CONST_INT:   case CONST_DOUBLE:
4908    case SYMBOL_REF:  case LABEL_REF:     case CONST:
4909      break;
4910
4911      /* Operators with side-effects.  */
4912    case POST_INC:    case POST_DEC:
4913      if (GET_CODE (XEXP (x, 0)) != REG)
4914	abort ();
4915
4916      new_flags.is_write = 0;
4917      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4918      new_flags.is_write = 1;
4919      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4920      break;
4921
4922    case POST_MODIFY:
4923      if (GET_CODE (XEXP (x, 0)) != REG)
4924	abort ();
4925
4926      new_flags.is_write = 0;
4927      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4928      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4929      new_flags.is_write = 1;
4930      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4931      break;
4932
4933      /* Handle common unary and binary ops for efficiency.  */
4934    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
4935    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
4936    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
4937    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
4938    case NE:       case EQ:      case GE:      case GT:        case LE:
4939    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
4940      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4941      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4942      break;
4943
4944    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
4945    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
4946    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
4947    case SQRT:     case FFS:
4948      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4949      break;
4950
4951    case UNSPEC:
4952      switch (XINT (x, 1))
4953	{
4954	case UNSPEC_LTOFF_DTPMOD:
4955	case UNSPEC_LTOFF_DTPREL:
4956	case UNSPEC_DTPREL:
4957	case UNSPEC_LTOFF_TPREL:
4958	case UNSPEC_TPREL:
4959	case UNSPEC_PRED_REL_MUTEX:
4960	case UNSPEC_PIC_CALL:
4961        case UNSPEC_MF:
4962        case UNSPEC_FETCHADD_ACQ:
4963	case UNSPEC_BSP_VALUE:
4964	case UNSPEC_FLUSHRS:
4965	case UNSPEC_BUNDLE_SELECTOR:
4966          break;
4967
4968	case UNSPEC_GR_SPILL:
4969	case UNSPEC_GR_RESTORE:
4970	  {
4971	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4972	    HOST_WIDE_INT bit = (offset >> 3) & 63;
4973
4974	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4975	    new_flags.is_write = (XINT (x, 1) == 1);
4976	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4977					      new_flags, pred);
4978	    break;
4979	  }
4980
4981	case UNSPEC_FR_SPILL:
4982	case UNSPEC_FR_RESTORE:
4983	case UNSPEC_POPCNT:
4984	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4985	  break;
4986
4987        case UNSPEC_ADDP4:
4988	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4989	  break;
4990
4991	case UNSPEC_FR_RECIP_APPROX:
4992	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4993	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4994	  break;
4995
4996        case UNSPEC_CMPXCHG_ACQ:
4997	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4998	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4999	  break;
5000
5001	default:
5002	  abort ();
5003	}
5004      break;
5005
5006    case UNSPEC_VOLATILE:
5007      switch (XINT (x, 1))
5008	{
5009	case UNSPECV_ALLOC:
5010	  /* Alloc must always be the first instruction of a group.
5011	     We force this by always returning true.  */
5012	  /* ??? We might get better scheduling if we explicitly check for
5013	     input/local/output register dependencies, and modify the
5014	     scheduler so that alloc is always reordered to the start of
5015	     the current group.  We could then eliminate all of the
5016	     first_instruction code.  */
5017	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5018
5019	  new_flags.is_write = 1;
5020	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5021	  return 1;
5022
5023	case UNSPECV_SET_BSP:
5024	  need_barrier = 1;
5025          break;
5026
5027	case UNSPECV_BLOCKAGE:
5028	case UNSPECV_INSN_GROUP_BARRIER:
5029	case UNSPECV_BREAK:
5030	case UNSPECV_PSAC_ALL:
5031	case UNSPECV_PSAC_NORMAL:
5032	  return 0;
5033
5034	default:
5035	  abort ();
5036	}
5037      break;
5038
5039    case RETURN:
5040      new_flags.is_write = 0;
5041      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5042      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5043
5044      new_flags.is_write = 1;
5045      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5046      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5047      break;
5048
5049    default:
5050      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5051      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5052	switch (format_ptr[i])
5053	  {
5054	  case '0':	/* unused field */
5055	  case 'i':	/* integer */
5056	  case 'n':	/* note */
5057	  case 'w':	/* wide integer */
5058	  case 's':	/* pointer to string */
5059	  case 'S':	/* optional pointer to string */
5060	    break;
5061
5062	  case 'e':
5063	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5064	      need_barrier = 1;
5065	    break;
5066
5067	  case 'E':
5068	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5069	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5070		need_barrier = 1;
5071	    break;
5072
5073	  default:
5074	    abort ();
5075	  }
5076      break;
5077    }
5078  return need_barrier;
5079}
5080
5081/* Clear out the state for group_barrier_needed_p at the start of a
5082   sequence of insns.  */
5083
5084static void
5085init_insn_group_barriers ()
5086{
5087  memset (rws_sum, 0, sizeof (rws_sum));
5088  first_instruction = 1;
5089}
5090
5091/* Given the current state, recorded by previous calls to this function,
5092   determine whether a group barrier (a stop bit) is necessary before INSN.
5093   Return nonzero if so.  */
5094
5095static int
5096group_barrier_needed_p (insn)
5097     rtx insn;
5098{
5099  rtx pat;
5100  int need_barrier = 0;
5101  struct reg_flags flags;
5102
5103  memset (&flags, 0, sizeof (flags));
5104  switch (GET_CODE (insn))
5105    {
5106    case NOTE:
5107      break;
5108
5109    case BARRIER:
5110      /* A barrier doesn't imply an instruction group boundary.  */
5111      break;
5112
5113    case CODE_LABEL:
5114      memset (rws_insn, 0, sizeof (rws_insn));
5115      return 1;
5116
5117    case CALL_INSN:
5118      flags.is_branch = 1;
5119      flags.is_sibcall = SIBLING_CALL_P (insn);
5120      memset (rws_insn, 0, sizeof (rws_insn));
5121
5122      /* Don't bundle a call following another call.  */
5123      if ((pat = prev_active_insn (insn))
5124	  && GET_CODE (pat) == CALL_INSN)
5125	{
5126	  need_barrier = 1;
5127	  break;
5128	}
5129
5130      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5131      break;
5132
5133    case JUMP_INSN:
5134      flags.is_branch = 1;
5135
5136      /* Don't bundle a jump following a call.  */
5137      if ((pat = prev_active_insn (insn))
5138	  && GET_CODE (pat) == CALL_INSN)
5139	{
5140	  need_barrier = 1;
5141	  break;
5142	}
5143      /* FALLTHRU */
5144
5145    case INSN:
5146      if (GET_CODE (PATTERN (insn)) == USE
5147	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5148	/* Don't care about USE and CLOBBER "insns"---those are used to
5149	   indicate to the optimizer that it shouldn't get rid of
5150	   certain operations.  */
5151	break;
5152
5153      pat = PATTERN (insn);
5154
5155      /* Ug.  Hack hacks hacked elsewhere.  */
5156      switch (recog_memoized (insn))
5157	{
5158	  /* We play dependency tricks with the epilogue in order
5159	     to get proper schedules.  Undo this for dv analysis.  */
5160	case CODE_FOR_epilogue_deallocate_stack:
5161	case CODE_FOR_prologue_allocate_stack:
5162	  pat = XVECEXP (pat, 0, 0);
5163	  break;
5164
5165	  /* The pattern we use for br.cloop confuses the code above.
5166	     The second element of the vector is representative.  */
5167	case CODE_FOR_doloop_end_internal:
5168	  pat = XVECEXP (pat, 0, 1);
5169	  break;
5170
5171	  /* Doesn't generate code.  */
5172	case CODE_FOR_pred_rel_mutex:
5173	case CODE_FOR_prologue_use:
5174	  return 0;
5175
5176	default:
5177	  break;
5178	}
5179
5180      memset (rws_insn, 0, sizeof (rws_insn));
5181      need_barrier = rtx_needs_barrier (pat, flags, 0);
5182
5183      /* Check to see if the previous instruction was a volatile
5184	 asm.  */
5185      if (! need_barrier)
5186	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5187      break;
5188
5189    default:
5190      abort ();
5191    }
5192
5193  if (first_instruction)
5194    {
5195      need_barrier = 0;
5196      first_instruction = 0;
5197    }
5198
5199  return need_barrier;
5200}
5201
5202/* Like group_barrier_needed_p, but do not clobber the current state.  */
5203
5204static int
5205safe_group_barrier_needed_p (insn)
5206     rtx insn;
5207{
5208  struct reg_write_state rws_saved[NUM_REGS];
5209  int saved_first_instruction;
5210  int t;
5211
5212  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5213  saved_first_instruction = first_instruction;
5214
5215  t = group_barrier_needed_p (insn);
5216
5217  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5218  first_instruction = saved_first_instruction;
5219
5220  return t;
5221}
5222
5223/* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
5224   as necessary to eliminate dependendencies.  This function assumes that
5225   a final instruction scheduling pass has been run which has already
5226   inserted most of the necessary stop bits.  This function only inserts
5227   new ones at basic block boundaries, since these are invisible to the
5228   scheduler.  */
5229
5230static void
5231emit_insn_group_barriers (dump, insns)
5232     FILE *dump;
5233     rtx insns;
5234{
5235  rtx insn;
5236  rtx last_label = 0;
5237  int insns_since_last_label = 0;
5238
5239  init_insn_group_barriers ();
5240
5241  for (insn = insns; insn; insn = NEXT_INSN (insn))
5242    {
5243      if (GET_CODE (insn) == CODE_LABEL)
5244	{
5245	  if (insns_since_last_label)
5246	    last_label = insn;
5247	  insns_since_last_label = 0;
5248	}
5249      else if (GET_CODE (insn) == NOTE
5250	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5251	{
5252	  if (insns_since_last_label)
5253	    last_label = insn;
5254	  insns_since_last_label = 0;
5255	}
5256      else if (GET_CODE (insn) == INSN
5257	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5258	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5259	{
5260	  init_insn_group_barriers ();
5261	  last_label = 0;
5262	}
5263      else if (INSN_P (insn))
5264	{
5265	  insns_since_last_label = 1;
5266
5267	  if (group_barrier_needed_p (insn))
5268	    {
5269	      if (last_label)
5270		{
5271		  if (dump)
5272		    fprintf (dump, "Emitting stop before label %d\n",
5273			     INSN_UID (last_label));
5274		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5275		  insn = last_label;
5276
5277		  init_insn_group_barriers ();
5278		  last_label = 0;
5279		}
5280	    }
5281	}
5282    }
5283}
5284
5285/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5286   This function has to emit all necessary group barriers.  */
5287
5288static void
5289emit_all_insn_group_barriers (dump, insns)
5290     FILE *dump ATTRIBUTE_UNUSED;
5291     rtx insns;
5292{
5293  rtx insn;
5294
5295  init_insn_group_barriers ();
5296
5297  for (insn = insns; insn; insn = NEXT_INSN (insn))
5298    {
5299      if (GET_CODE (insn) == BARRIER)
5300	{
5301	  rtx last = prev_active_insn (insn);
5302
5303	  if (! last)
5304	    continue;
5305	  if (GET_CODE (last) == JUMP_INSN
5306	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5307	    last = prev_active_insn (last);
5308	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5309	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5310
5311	  init_insn_group_barriers ();
5312	}
5313      else if (INSN_P (insn))
5314	{
5315	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5316	    init_insn_group_barriers ();
5317	  else if (group_barrier_needed_p (insn))
5318	    {
5319	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5320	      init_insn_group_barriers ();
5321	      group_barrier_needed_p (insn);
5322	    }
5323	}
5324    }
5325}
5326
5327static int errata_find_address_regs PARAMS ((rtx *, void *));
5328static void errata_emit_nops PARAMS ((rtx));
5329static void fixup_errata PARAMS ((void));
5330
5331/* This structure is used to track some details about the previous insns
5332   groups so we can determine if it may be necessary to insert NOPs to
5333   workaround hardware errata.  */
5334static struct group
5335{
5336  HARD_REG_SET p_reg_set;
5337  HARD_REG_SET gr_reg_conditionally_set;
5338} last_group[2];
5339
5340/* Index into the last_group array.  */
5341static int group_idx;
5342
5343/* Called through for_each_rtx; determines if a hard register that was
5344   conditionally set in the previous group is used as an address register.
5345   It ensures that for_each_rtx returns 1 in that case.  */
5346static int
5347errata_find_address_regs (xp, data)
5348     rtx *xp;
5349     void *data ATTRIBUTE_UNUSED;
5350{
5351  rtx x = *xp;
5352  if (GET_CODE (x) != MEM)
5353    return 0;
5354  x = XEXP (x, 0);
5355  if (GET_CODE (x) == POST_MODIFY)
5356    x = XEXP (x, 0);
5357  if (GET_CODE (x) == REG)
5358    {
5359      struct group *prev_group = last_group + (group_idx ^ 1);
5360      if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5361			     REGNO (x)))
5362	return 1;
5363      return -1;
5364    }
5365  return 0;
5366}
5367
5368/* Called for each insn; this function keeps track of the state in
5369   last_group and emits additional NOPs if necessary to work around
5370   an Itanium A/B step erratum.  */
5371static void
5372errata_emit_nops (insn)
5373     rtx insn;
5374{
5375  struct group *this_group = last_group + group_idx;
5376  struct group *prev_group = last_group + (group_idx ^ 1);
5377  rtx pat = PATTERN (insn);
5378  rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5379  rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5380  enum attr_type type;
5381  rtx set = real_pat;
5382
5383  if (GET_CODE (real_pat) == USE
5384      || GET_CODE (real_pat) == CLOBBER
5385      || GET_CODE (real_pat) == ASM_INPUT
5386      || GET_CODE (real_pat) == ADDR_VEC
5387      || GET_CODE (real_pat) == ADDR_DIFF_VEC
5388      || asm_noperands (PATTERN (insn)) >= 0)
5389    return;
5390
5391  /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5392     parts of it.  */
5393
5394  if (GET_CODE (set) == PARALLEL)
5395    {
5396      int i;
5397      set = XVECEXP (real_pat, 0, 0);
5398      for (i = 1; i < XVECLEN (real_pat, 0); i++)
5399	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5400	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5401	  {
5402	    set = 0;
5403	    break;
5404	  }
5405    }
5406
5407  if (set && GET_CODE (set) != SET)
5408    set = 0;
5409
5410  type  = get_attr_type (insn);
5411
5412  if (type == TYPE_F
5413      && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5414    SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5415
5416  if ((type == TYPE_M || type == TYPE_A) && cond && set
5417      && REG_P (SET_DEST (set))
5418      && GET_CODE (SET_SRC (set)) != PLUS
5419      && GET_CODE (SET_SRC (set)) != MINUS
5420      && (GET_CODE (SET_SRC (set)) != ASHIFT
5421	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5422      && (GET_CODE (SET_SRC (set)) != MEM
5423	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5424      && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5425    {
5426      if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5427	  || ! REG_P (XEXP (cond, 0)))
5428	abort ();
5429
5430      if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5431	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5432    }
5433  if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5434    {
5435      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5436      emit_insn_before (gen_nop (), insn);
5437      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5438      group_idx = 0;
5439      memset (last_group, 0, sizeof last_group);
5440    }
5441}
5442
5443/* Emit extra nops if they are required to work around hardware errata.  */
5444
5445static void
5446fixup_errata ()
5447{
5448  rtx insn;
5449
5450  if (! TARGET_B_STEP)
5451    return;
5452
5453  group_idx = 0;
5454  memset (last_group, 0, sizeof last_group);
5455
5456  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5457    {
5458      if (!INSN_P (insn))
5459	continue;
5460
5461      if (ia64_safe_type (insn) == TYPE_S)
5462	{
5463	  group_idx ^= 1;
5464	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5465	}
5466      else
5467	errata_emit_nops (insn);
5468    }
5469}
5470
5471/* Instruction scheduling support.  */
5472/* Describe one bundle.  */
5473
5474struct bundle
5475{
5476  /* Zero if there's no possibility of a stop in this bundle other than
5477     at the end, otherwise the position of the optional stop bit.  */
5478  int possible_stop;
5479  /* The types of the three slots.  */
5480  enum attr_type t[3];
5481  /* The pseudo op to be emitted into the assembler output.  */
5482  const char *name;
5483};
5484
5485#define NR_BUNDLES 10
5486
5487/* A list of all available bundles.  */
5488
5489static const struct bundle bundle[NR_BUNDLES] =
5490{
5491  { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5492  { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5493  { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5494  { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5495#if NR_BUNDLES == 10
5496  { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5497  { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5498#endif
5499  { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5500  { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5501  { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5502  /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5503     it matches an L type insn.  Otherwise we'll try to generate L type
5504     nops.  */
5505  { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5506};
5507
5508/* Describe a packet of instructions.  Packets consist of two bundles that
5509   are visible to the hardware in one scheduling window.  */
5510
5511struct ia64_packet
5512{
5513  const struct bundle *t1, *t2;
5514  /* Precomputed value of the first split issue in this packet if a cycle
5515     starts at its beginning.  */
5516  int first_split;
5517  /* For convenience, the insn types are replicated here so we don't have
5518     to go through T1 and T2 all the time.  */
5519  enum attr_type t[6];
5520};
5521
5522/* An array containing all possible packets.  */
5523#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5524static struct ia64_packet packets[NR_PACKETS];
5525
5526/* Map attr_type to a string with the name.  */
5527
5528static const char *const type_names[] =
5529{
5530  "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5531};
5532
5533/* Nonzero if we should insert stop bits into the schedule.  */
5534int ia64_final_schedule = 0;
5535
5536static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5537static rtx ia64_single_set PARAMS ((rtx));
5538static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5539static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5540static void maybe_rotate PARAMS ((FILE *));
5541static void finish_last_head PARAMS ((FILE *, int));
5542static void rotate_one_bundle PARAMS ((FILE *));
5543static void rotate_two_bundles PARAMS ((FILE *));
5544static void nop_cycles_until PARAMS ((int, FILE *));
5545static void cycle_end_fill_slots PARAMS ((FILE *));
5546static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5547static int get_split PARAMS ((const struct ia64_packet *, int));
5548static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5549				   const struct ia64_packet *, int));
5550static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5551				      rtx *, enum attr_type *, int));
5552static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5553static void dump_current_packet PARAMS ((FILE *));
5554static void schedule_stop PARAMS ((FILE *));
5555static rtx gen_nop_type PARAMS ((enum attr_type));
5556static void ia64_emit_nops PARAMS ((void));
5557
5558/* Map a bundle number to its pseudo-op.  */
5559
5560const char *
5561get_bundle_name (b)
5562     int b;
5563{
5564  return bundle[b].name;
5565}
5566
5567/* Compute the slot which will cause a split issue in packet P if the
5568   current cycle begins at slot BEGIN.  */
5569
5570static int
5571itanium_split_issue (p, begin)
5572     const struct ia64_packet *p;
5573     int begin;
5574{
5575  int type_count[TYPE_S];
5576  int i;
5577  int split = 6;
5578
5579  if (begin < 3)
5580    {
5581      /* Always split before and after MMF.  */
5582      if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5583	return 3;
5584      if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5585	return 3;
5586      /* Always split after MBB and BBB.  */
5587      if (p->t[1] == TYPE_B)
5588	return 3;
5589      /* Split after first bundle in MIB BBB combination.  */
5590      if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5591	return 3;
5592    }
5593
5594  memset (type_count, 0, sizeof type_count);
5595  for (i = begin; i < split; i++)
5596    {
5597      enum attr_type t0 = p->t[i];
5598      /* An MLX bundle reserves the same units as an MFI bundle.  */
5599      enum attr_type t = (t0 == TYPE_L ? TYPE_F
5600			  : t0 == TYPE_X ? TYPE_I
5601			  : t0);
5602
5603      /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5604	 2 integer per cycle.  */
5605      int max = (t == TYPE_B ? 3 : 2);
5606      if (type_count[t] == max)
5607	return i;
5608
5609      type_count[t]++;
5610    }
5611  return split;
5612}
5613
5614/* Return the maximum number of instructions a cpu can issue.  */
5615
5616static int
5617ia64_issue_rate ()
5618{
5619  return 6;
5620}
5621
5622/* Helper function - like single_set, but look inside COND_EXEC.  */
5623
5624static rtx
5625ia64_single_set (insn)
5626     rtx insn;
5627{
5628  rtx x = PATTERN (insn), ret;
5629  if (GET_CODE (x) == COND_EXEC)
5630    x = COND_EXEC_CODE (x);
5631  if (GET_CODE (x) == SET)
5632    return x;
5633
5634  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5635     Although they are not classical single set, the second set is there just
5636     to protect it from moving past FP-relative stack accesses.  */
5637  switch (recog_memoized (insn))
5638    {
5639    case CODE_FOR_prologue_allocate_stack:
5640    case CODE_FOR_epilogue_deallocate_stack:
5641      ret = XVECEXP (x, 0, 0);
5642      break;
5643
5644    default:
5645      ret = single_set_2 (insn, x);
5646      break;
5647    }
5648
5649  return ret;
5650}
5651
5652/* Adjust the cost of a scheduling dependency.  Return the new cost of
5653   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
5654
5655static int
5656ia64_adjust_cost (insn, link, dep_insn, cost)
5657     rtx insn, link, dep_insn;
5658     int cost;
5659{
5660  enum attr_type dep_type;
5661  enum attr_itanium_class dep_class;
5662  enum attr_itanium_class insn_class;
5663  rtx dep_set, set, src, addr;
5664
5665  if (GET_CODE (PATTERN (insn)) == CLOBBER
5666      || GET_CODE (PATTERN (insn)) == USE
5667      || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5668      || GET_CODE (PATTERN (dep_insn)) == USE
5669      /* @@@ Not accurate for indirect calls.  */
5670      || GET_CODE (insn) == CALL_INSN
5671      || ia64_safe_type (insn) == TYPE_S)
5672    return 0;
5673
5674  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5675      || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5676    return 0;
5677
5678  dep_type = ia64_safe_type (dep_insn);
5679  dep_class = ia64_safe_itanium_class (dep_insn);
5680  insn_class = ia64_safe_itanium_class (insn);
5681
5682  /* Compares that feed a conditional branch can execute in the same
5683     cycle.  */
5684  dep_set = ia64_single_set (dep_insn);
5685  set = ia64_single_set (insn);
5686
5687  if (dep_type != TYPE_F
5688      && dep_set
5689      && GET_CODE (SET_DEST (dep_set)) == REG
5690      && PR_REG (REGNO (SET_DEST (dep_set)))
5691      && GET_CODE (insn) == JUMP_INSN)
5692    return 0;
5693
5694  if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5695    {
5696      /* ??? Can't find any information in the documenation about whether
5697	 a sequence
5698	   st [rx] = ra
5699	   ld rb = [ry]
5700	 splits issue.  Assume it doesn't.  */
5701      return 0;
5702    }
5703
5704  src = set ? SET_SRC (set) : 0;
5705  addr = 0;
5706  if (set)
5707    {
5708      if (GET_CODE (SET_DEST (set)) == MEM)
5709	addr = XEXP (SET_DEST (set), 0);
5710      else if (GET_CODE (SET_DEST (set)) == SUBREG
5711	       && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5712	addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5713      else
5714	{
5715	  addr = src;
5716	  if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5717	    addr = XVECEXP (addr, 0, 0);
5718	  while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5719	    addr = XEXP (addr, 0);
5720
5721	  /* Note that LO_SUM is used for GOT loads.  */
5722	  if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
5723	    addr = XEXP (addr, 0);
5724	  else
5725	    addr = 0;
5726	}
5727    }
5728
5729  if (addr && GET_CODE (addr) == POST_MODIFY)
5730    addr = XEXP (addr, 0);
5731
5732  set = ia64_single_set (dep_insn);
5733
5734  if ((dep_class == ITANIUM_CLASS_IALU
5735       || dep_class == ITANIUM_CLASS_ILOG
5736       || dep_class == ITANIUM_CLASS_LD)
5737      && (insn_class == ITANIUM_CLASS_LD
5738	  || insn_class == ITANIUM_CLASS_ST))
5739    {
5740      if (! addr || ! set)
5741	abort ();
5742      /* This isn't completely correct - an IALU that feeds an address has
5743	 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5744	 otherwise.  Unfortunately there's no good way to describe this.  */
5745      if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5746	return cost + 1;
5747    }
5748
5749  if ((dep_class == ITANIUM_CLASS_IALU
5750       || dep_class == ITANIUM_CLASS_ILOG
5751       || dep_class == ITANIUM_CLASS_LD)
5752      && (insn_class == ITANIUM_CLASS_MMMUL
5753	  || insn_class == ITANIUM_CLASS_MMSHF
5754	  || insn_class == ITANIUM_CLASS_MMSHFI))
5755    return 3;
5756
5757  if (dep_class == ITANIUM_CLASS_FMAC
5758      && (insn_class == ITANIUM_CLASS_FMISC
5759	  || insn_class == ITANIUM_CLASS_FCVTFX
5760	  || insn_class == ITANIUM_CLASS_XMPY))
5761    return 7;
5762
5763  if ((dep_class == ITANIUM_CLASS_FMAC
5764       || dep_class == ITANIUM_CLASS_FMISC
5765       || dep_class == ITANIUM_CLASS_FCVTFX
5766       || dep_class == ITANIUM_CLASS_XMPY)
5767      && insn_class == ITANIUM_CLASS_STF)
5768    return 8;
5769
5770  /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5771     but HP engineers say any non-MM operation.  */
5772  if ((dep_class == ITANIUM_CLASS_MMMUL
5773       || dep_class == ITANIUM_CLASS_MMSHF
5774       || dep_class == ITANIUM_CLASS_MMSHFI)
5775      && insn_class != ITANIUM_CLASS_MMMUL
5776      && insn_class != ITANIUM_CLASS_MMSHF
5777      && insn_class != ITANIUM_CLASS_MMSHFI)
5778    return 4;
5779
5780  return cost;
5781}
5782
5783/* Describe the current state of the Itanium pipeline.  */
5784static struct
5785{
5786  /* The first slot that is used in the current cycle.  */
5787  int first_slot;
5788  /* The next slot to fill.  */
5789  int cur;
5790  /* The packet we have selected for the current issue window.  */
5791  const struct ia64_packet *packet;
5792  /* The position of the split issue that occurs due to issue width
5793     limitations (6 if there's no split issue).  */
5794  int split;
5795  /* Record data about the insns scheduled so far in the same issue
5796     window.  The elements up to but not including FIRST_SLOT belong
5797     to the previous cycle, the ones starting with FIRST_SLOT belong
5798     to the current cycle.  */
5799  enum attr_type types[6];
5800  rtx insns[6];
5801  int stopbit[6];
5802  /* Nonzero if we decided to schedule a stop bit.  */
5803  int last_was_stop;
5804} sched_data;
5805
5806/* Temporary arrays; they have enough elements to hold all insns that
5807   can be ready at the same time while scheduling of the current block.
5808   SCHED_READY can hold ready insns, SCHED_TYPES their types.  */
5809static rtx *sched_ready;
5810static enum attr_type *sched_types;
5811
5812/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5813   of packet P.  */
5814
5815static int
5816insn_matches_slot (p, itype, slot, insn)
5817     const struct ia64_packet *p;
5818     enum attr_type itype;
5819     int slot;
5820     rtx insn;
5821{
5822  enum attr_itanium_requires_unit0 u0;
5823  enum attr_type stype = p->t[slot];
5824
5825  if (insn)
5826    {
5827      u0 = ia64_safe_itanium_requires_unit0 (insn);
5828      if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5829	{
5830	  int i;
5831	  for (i = sched_data.first_slot; i < slot; i++)
5832	    if (p->t[i] == stype
5833		|| (stype == TYPE_F && p->t[i] == TYPE_L)
5834		|| (stype == TYPE_I && p->t[i] == TYPE_X))
5835	      return 0;
5836	}
5837      if (GET_CODE (insn) == CALL_INSN)
5838	{
5839	  /* Reject calls in multiway branch packets.  We want to limit
5840	     the number of multiway branches we generate (since the branch
5841	     predictor is limited), and this seems to work fairly well.
5842	     (If we didn't do this, we'd have to add another test here to
5843	     force calls into the third slot of the bundle.)  */
5844	  if (slot < 3)
5845	    {
5846	      if (p->t[1] == TYPE_B)
5847		return 0;
5848	    }
5849	  else
5850	    {
5851	      if (p->t[4] == TYPE_B)
5852		return 0;
5853	    }
5854	}
5855    }
5856
5857  if (itype == stype)
5858    return 1;
5859  if (itype == TYPE_A)
5860    return stype == TYPE_M || stype == TYPE_I;
5861  return 0;
5862}
5863
5864/* Like emit_insn_before, but skip cycle_display notes.
5865   ??? When cycle display notes are implemented, update this.  */
5866
5867static void
5868ia64_emit_insn_before (insn, before)
5869     rtx insn, before;
5870{
5871  emit_insn_before (insn, before);
5872}
5873
5874/* When rotating a bundle out of the issue window, insert a bundle selector
5875   insn in front of it.  DUMP is the scheduling dump file or NULL.  START
5876   is either 0 or 3, depending on whether we want to emit a bundle selector
5877   for the first bundle or the second bundle in the current issue window.
5878
5879   The selector insns are emitted this late because the selected packet can
5880   be changed until parts of it get rotated out.  */
5881
5882static void
5883finish_last_head (dump, start)
5884     FILE *dump;
5885     int start;
5886{
5887  const struct ia64_packet *p = sched_data.packet;
5888  const struct bundle *b = start == 0 ? p->t1 : p->t2;
5889  int bundle_type = b - bundle;
5890  rtx insn;
5891  int i;
5892
5893  if (! ia64_final_schedule)
5894    return;
5895
5896  for (i = start; sched_data.insns[i] == 0; i++)
5897    if (i == start + 3)
5898      abort ();
5899  insn = sched_data.insns[i];
5900
5901  if (dump)
5902    fprintf (dump, "//    Emitting template before %d: %s\n",
5903	     INSN_UID (insn), b->name);
5904
5905  ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5906}
5907
5908/* We can't schedule more insns this cycle.  Fix up the scheduling state
5909   and advance FIRST_SLOT and CUR.
5910   We have to distribute the insns that are currently found between
5911   FIRST_SLOT and CUR into the slots of the packet we have selected.  So
5912   far, they are stored successively in the fields starting at FIRST_SLOT;
5913   now they must be moved to the correct slots.
5914   DUMP is the current scheduling dump file, or NULL.  */
5915
5916static void
5917cycle_end_fill_slots (dump)
5918     FILE *dump;
5919{
5920  const struct ia64_packet *packet = sched_data.packet;
5921  int slot, i;
5922  enum attr_type tmp_types[6];
5923  rtx tmp_insns[6];
5924
5925  memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5926  memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5927
5928  for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5929    {
5930      enum attr_type t = tmp_types[i];
5931      if (t != ia64_safe_type (tmp_insns[i]))
5932	abort ();
5933      while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5934	{
5935	  if (slot > sched_data.split)
5936	    abort ();
5937	  if (dump)
5938	    fprintf (dump, "// Packet needs %s, have %s\n",
5939		     type_names[packet->t[slot]], type_names[t]);
5940	  sched_data.types[slot] = packet->t[slot];
5941	  sched_data.insns[slot] = 0;
5942	  sched_data.stopbit[slot] = 0;
5943
5944	  /* ??? TYPE_L instructions always fill up two slots, but we don't
5945	     support TYPE_L nops.  */
5946	  if (packet->t[slot] == TYPE_L)
5947	    abort ();
5948
5949	  slot++;
5950	}
5951
5952      /* Do _not_ use T here.  If T == TYPE_A, then we'd risk changing the
5953	 actual slot type later.  */
5954      sched_data.types[slot] = packet->t[slot];
5955      sched_data.insns[slot] = tmp_insns[i];
5956      sched_data.stopbit[slot] = 0;
5957      slot++;
5958
5959      /* TYPE_L instructions always fill up two slots.  */
5960      if (t == TYPE_L)
5961	{
5962	  sched_data.types[slot] = packet->t[slot];
5963	  sched_data.insns[slot] = 0;
5964	  sched_data.stopbit[slot] = 0;
5965	  slot++;
5966	}
5967    }
5968
5969  /* This isn't right - there's no need to pad out until the forced split;
5970     the CPU will automatically split if an insn isn't ready.  */
5971#if 0
5972  while (slot < sched_data.split)
5973    {
5974      sched_data.types[slot] = packet->t[slot];
5975      sched_data.insns[slot] = 0;
5976      sched_data.stopbit[slot] = 0;
5977      slot++;
5978    }
5979#endif
5980
5981  sched_data.first_slot = sched_data.cur = slot;
5982}
5983
5984/* Bundle rotations, as described in the Itanium optimization manual.
5985   We can rotate either one or both bundles out of the issue window.
5986   DUMP is the current scheduling dump file, or NULL.  */
5987
5988static void
5989rotate_one_bundle (dump)
5990     FILE *dump;
5991{
5992  if (dump)
5993    fprintf (dump, "// Rotating one bundle.\n");
5994
5995  finish_last_head (dump, 0);
5996  if (sched_data.cur > 3)
5997    {
5998      sched_data.cur -= 3;
5999      sched_data.first_slot -= 3;
6000      memmove (sched_data.types,
6001	       sched_data.types + 3,
6002	       sched_data.cur * sizeof *sched_data.types);
6003      memmove (sched_data.stopbit,
6004	       sched_data.stopbit + 3,
6005	       sched_data.cur * sizeof *sched_data.stopbit);
6006      memmove (sched_data.insns,
6007	       sched_data.insns + 3,
6008	       sched_data.cur * sizeof *sched_data.insns);
6009      sched_data.packet
6010	= &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
6011    }
6012  else
6013    {
6014      sched_data.cur = 0;
6015      sched_data.first_slot = 0;
6016    }
6017}
6018
6019static void
6020rotate_two_bundles (dump)
6021     FILE *dump;
6022{
6023  if (dump)
6024    fprintf (dump, "// Rotating two bundles.\n");
6025
6026  if (sched_data.cur == 0)
6027    return;
6028
6029  finish_last_head (dump, 0);
6030  if (sched_data.cur > 3)
6031    finish_last_head (dump, 3);
6032  sched_data.cur = 0;
6033  sched_data.first_slot = 0;
6034}
6035
6036/* We're beginning a new block.  Initialize data structures as necessary.  */
6037
6038static void
6039ia64_sched_init (dump, sched_verbose, max_ready)
6040     FILE *dump ATTRIBUTE_UNUSED;
6041     int sched_verbose ATTRIBUTE_UNUSED;
6042     int max_ready;
6043{
6044  static int initialized = 0;
6045
6046  if (! initialized)
6047    {
6048      int b1, b2, i;
6049
6050      initialized = 1;
6051
6052      for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
6053	{
6054	  const struct bundle *t1 = bundle + b1;
6055	  for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6056	    {
6057	      const struct bundle *t2 = bundle + b2;
6058
6059	      packets[i].t1 = t1;
6060	      packets[i].t2 = t2;
6061	    }
6062	}
6063      for (i = 0; i < NR_PACKETS; i++)
6064	{
6065	  int j;
6066	  for (j = 0; j < 3; j++)
6067	    packets[i].t[j] = packets[i].t1->t[j];
6068	  for (j = 0; j < 3; j++)
6069	    packets[i].t[j + 3] = packets[i].t2->t[j];
6070	  packets[i].first_split = itanium_split_issue (packets + i, 0);
6071	}
6072
6073    }
6074
6075  init_insn_group_barriers ();
6076
6077  memset (&sched_data, 0, sizeof sched_data);
6078  sched_types = (enum attr_type *) xmalloc (max_ready
6079					    * sizeof (enum attr_type));
6080  sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
6081}
6082
6083/* See if the packet P can match the insns we have already scheduled.  Return
6084   nonzero if so.  In *PSLOT, we store the first slot that is available for
6085   more instructions if we choose this packet.
6086   SPLIT holds the last slot we can use, there's a split issue after it so
6087   scheduling beyond it would cause us to use more than one cycle.  */
6088
6089static int
6090packet_matches_p (p, split, pslot)
6091     const struct ia64_packet *p;
6092     int split;
6093     int *pslot;
6094{
6095  int filled = sched_data.cur;
6096  int first = sched_data.first_slot;
6097  int i, slot;
6098
6099  /* First, check if the first of the two bundles must be a specific one (due
6100     to stop bits).  */
6101  if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6102    return 0;
6103  if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6104    return 0;
6105
6106  for (i = 0; i < first; i++)
6107    if (! insn_matches_slot (p, sched_data.types[i], i,
6108			     sched_data.insns[i]))
6109      return 0;
6110  for (i = slot = first; i < filled; i++)
6111    {
6112      while (slot < split)
6113	{
6114	  if (insn_matches_slot (p, sched_data.types[i], slot,
6115				 sched_data.insns[i]))
6116	    break;
6117	  slot++;
6118	}
6119      if (slot == split)
6120	return 0;
6121      slot++;
6122    }
6123
6124  if (pslot)
6125    *pslot = slot;
6126  return 1;
6127}
6128
6129/* A frontend for itanium_split_issue.  For a packet P and a slot
6130   number FIRST that describes the start of the current clock cycle,
6131   return the slot number of the first split issue.  This function
6132   uses the cached number found in P if possible.  */
6133
6134static int
6135get_split (p, first)
6136     const struct ia64_packet *p;
6137     int first;
6138{
6139  if (first == 0)
6140    return p->first_split;
6141  return itanium_split_issue (p, first);
6142}
6143
6144/* Given N_READY insns in the array READY, whose types are found in the
6145   corresponding array TYPES, return the insn that is best suited to be
6146   scheduled in slot SLOT of packet P.  */
6147
6148static int
6149find_best_insn (ready, types, n_ready, p, slot)
6150     rtx *ready;
6151     enum attr_type *types;
6152     int n_ready;
6153     const struct ia64_packet *p;
6154     int slot;
6155{
6156  int best = -1;
6157  int best_pri = 0;
6158  while (n_ready-- > 0)
6159    {
6160      rtx insn = ready[n_ready];
6161      if (! insn)
6162	continue;
6163      if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6164	break;
6165      /* If we have equally good insns, one of which has a stricter
6166	 slot requirement, prefer the one with the stricter requirement.  */
6167      if (best >= 0 && types[n_ready] == TYPE_A)
6168	continue;
6169      if (insn_matches_slot (p, types[n_ready], slot, insn))
6170	{
6171	  best = n_ready;
6172	  best_pri = INSN_PRIORITY (ready[best]);
6173
6174	  /* If there's no way we could get a stricter requirement, stop
6175	     looking now.  */
6176	  if (types[n_ready] != TYPE_A
6177	      && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6178	    break;
6179	  break;
6180	}
6181    }
6182  return best;
6183}
6184
6185/* Select the best packet to use given the current scheduler state and the
6186   current ready list.
6187   READY is an array holding N_READY ready insns; TYPES is a corresponding
6188   array that holds their types.  Store the best packet in *PPACKET and the
6189   number of insns that can be scheduled in the current cycle in *PBEST.  */
6190
6191static void
6192find_best_packet (pbest, ppacket, ready, types, n_ready)
6193     int *pbest;
6194     const struct ia64_packet **ppacket;
6195     rtx *ready;
6196     enum attr_type *types;
6197     int n_ready;
6198{
6199  int first = sched_data.first_slot;
6200  int best = 0;
6201  int lowest_end = 6;
6202  const struct ia64_packet *best_packet = NULL;
6203  int i;
6204
6205  for (i = 0; i < NR_PACKETS; i++)
6206    {
6207      const struct ia64_packet *p = packets + i;
6208      int slot;
6209      int split = get_split (p, first);
6210      int win = 0;
6211      int first_slot, last_slot;
6212      int b_nops = 0;
6213
6214      if (! packet_matches_p (p, split, &first_slot))
6215	continue;
6216
6217      memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6218
6219      win = 0;
6220      last_slot = 6;
6221      for (slot = first_slot; slot < split; slot++)
6222	{
6223	  int insn_nr;
6224
6225	  /* Disallow a degenerate case where the first bundle doesn't
6226	     contain anything but NOPs!  */
6227	  if (first_slot == 0 && win == 0 && slot == 3)
6228	    {
6229	      win = -1;
6230	      break;
6231	    }
6232
6233	  insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6234	  if (insn_nr >= 0)
6235	    {
6236	      sched_ready[insn_nr] = 0;
6237	      last_slot = slot;
6238	      win++;
6239	    }
6240	  else if (p->t[slot] == TYPE_B)
6241	    b_nops++;
6242	}
6243      /* We must disallow MBB/BBB packets if any of their B slots would be
6244	 filled with nops.  */
6245      if (last_slot < 3)
6246	{
6247	  if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6248	    win = -1;
6249	}
6250      else
6251	{
6252	  if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6253	    win = -1;
6254	}
6255
6256      if (win > best
6257	  || (win == best && last_slot < lowest_end))
6258	{
6259	  best = win;
6260	  lowest_end = last_slot;
6261	  best_packet = p;
6262	}
6263    }
6264  *pbest = best;
6265  *ppacket = best_packet;
6266}
6267
6268/* Reorder the ready list so that the insns that can be issued in this cycle
6269   are found in the correct order at the end of the list.
6270   DUMP is the scheduling dump file, or NULL.  READY points to the start,
6271   E_READY to the end of the ready list.  MAY_FAIL determines what should be
6272   done if no insns can be scheduled in this cycle: if it is zero, we abort,
6273   otherwise we return 0.
6274   Return 1 if any insns can be scheduled in this cycle.  */
6275
6276static int
6277itanium_reorder (dump, ready, e_ready, may_fail)
6278     FILE *dump;
6279     rtx *ready;
6280     rtx *e_ready;
6281     int may_fail;
6282{
6283  const struct ia64_packet *best_packet;
6284  int n_ready = e_ready - ready;
6285  int first = sched_data.first_slot;
6286  int i, best, best_split, filled;
6287
6288  for (i = 0; i < n_ready; i++)
6289    sched_types[i] = ia64_safe_type (ready[i]);
6290
6291  find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6292
6293  if (best == 0)
6294    {
6295      if (may_fail)
6296	return 0;
6297      abort ();
6298    }
6299
6300  if (dump)
6301    {
6302      fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6303	       best_packet->t1->name,
6304	       best_packet->t2 ? best_packet->t2->name : NULL, best);
6305    }
6306
6307  best_split = itanium_split_issue (best_packet, first);
6308  packet_matches_p (best_packet, best_split, &filled);
6309
6310  for (i = filled; i < best_split; i++)
6311    {
6312      int insn_nr;
6313
6314      insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6315      if (insn_nr >= 0)
6316	{
6317	  rtx insn = ready[insn_nr];
6318	  memmove (ready + insn_nr, ready + insn_nr + 1,
6319		   (n_ready - insn_nr - 1) * sizeof (rtx));
6320	  memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6321		   (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6322	  ready[--n_ready] = insn;
6323	}
6324    }
6325
6326  sched_data.packet = best_packet;
6327  sched_data.split = best_split;
6328  return 1;
6329}
6330
6331/* Dump information about the current scheduling state to file DUMP.  */
6332
6333static void
6334dump_current_packet (dump)
6335     FILE *dump;
6336{
6337  int i;
6338  fprintf (dump, "//    %d slots filled:", sched_data.cur);
6339  for (i = 0; i < sched_data.first_slot; i++)
6340    {
6341      rtx insn = sched_data.insns[i];
6342      fprintf (dump, " %s", type_names[sched_data.types[i]]);
6343      if (insn)
6344	fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6345      if (sched_data.stopbit[i])
6346	fprintf (dump, " ;;");
6347    }
6348  fprintf (dump, " :::");
6349  for (i = sched_data.first_slot; i < sched_data.cur; i++)
6350    {
6351      rtx insn = sched_data.insns[i];
6352      enum attr_type t = ia64_safe_type (insn);
6353      fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6354    }
6355  fprintf (dump, "\n");
6356}
6357
6358/* Schedule a stop bit.  DUMP is the current scheduling dump file, or
6359   NULL.  */
6360
6361static void
6362schedule_stop (dump)
6363     FILE *dump;
6364{
6365  const struct ia64_packet *best = sched_data.packet;
6366  int i;
6367  int best_stop = 6;
6368
6369  if (dump)
6370    fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6371
6372  if (sched_data.cur == 0)
6373    {
6374      if (dump)
6375	fprintf (dump, "//   At start of bundle, so nothing to do.\n");
6376
6377      rotate_two_bundles (NULL);
6378      return;
6379    }
6380
6381  for (i = -1; i < NR_PACKETS; i++)
6382    {
6383      /* This is a slight hack to give the current packet the first chance.
6384	 This is done to avoid e.g. switching from MIB to MBB bundles.  */
6385      const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6386      int split = get_split (p, sched_data.first_slot);
6387      const struct bundle *compare;
6388      int next, stoppos;
6389
6390      if (! packet_matches_p (p, split, &next))
6391	continue;
6392
6393      compare = next > 3 ? p->t2 : p->t1;
6394
6395      stoppos = 3;
6396      if (compare->possible_stop)
6397	stoppos = compare->possible_stop;
6398      if (next > 3)
6399	stoppos += 3;
6400
6401      if (stoppos < next || stoppos >= best_stop)
6402	{
6403	  if (compare->possible_stop == 0)
6404	    continue;
6405	  stoppos = (next > 3 ? 6 : 3);
6406	}
6407      if (stoppos < next || stoppos >= best_stop)
6408	continue;
6409
6410      if (dump)
6411	fprintf (dump, "//   switching from %s %s to %s %s (stop at %d)\n",
6412		 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6413		 stoppos);
6414
6415      best_stop = stoppos;
6416      best = p;
6417    }
6418
6419  sched_data.packet = best;
6420  cycle_end_fill_slots (dump);
6421  while (sched_data.cur < best_stop)
6422    {
6423      sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6424      sched_data.insns[sched_data.cur] = 0;
6425      sched_data.stopbit[sched_data.cur] = 0;
6426      sched_data.cur++;
6427    }
6428  sched_data.stopbit[sched_data.cur - 1] = 1;
6429  sched_data.first_slot = best_stop;
6430
6431  if (dump)
6432    dump_current_packet (dump);
6433}
6434
6435/* If necessary, perform one or two rotations on the scheduling state.
6436   This should only be called if we are starting a new cycle.  */
6437
6438static void
6439maybe_rotate (dump)
6440     FILE *dump;
6441{
6442  cycle_end_fill_slots (dump);
6443  if (sched_data.cur == 6)
6444    rotate_two_bundles (dump);
6445  else if (sched_data.cur >= 3)
6446    rotate_one_bundle (dump);
6447  sched_data.first_slot = sched_data.cur;
6448}
6449
6450/* The clock cycle when ia64_sched_reorder was last called.  */
6451static int prev_cycle;
6452
6453/* The first insn scheduled in the previous cycle.  This is the saved
6454   value of sched_data.first_slot.  */
6455static int prev_first;
6456
6457/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
6458   pad out the delay between MM (shifts, etc.) and integer operations.  */
6459
6460static void
6461nop_cycles_until (clock_var, dump)
6462     int clock_var;
6463     FILE *dump;
6464{
6465  int prev_clock = prev_cycle;
6466  int cycles_left = clock_var - prev_clock;
6467  bool did_stop = false;
6468
6469  /* Finish the previous cycle; pad it out with NOPs.  */
6470  if (sched_data.cur == 3)
6471    {
6472      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6473      did_stop = true;
6474      maybe_rotate (dump);
6475    }
6476  else if (sched_data.cur > 0)
6477    {
6478      int need_stop = 0;
6479      int split = itanium_split_issue (sched_data.packet, prev_first);
6480
6481      if (sched_data.cur < 3 && split > 3)
6482	{
6483	  split = 3;
6484	  need_stop = 1;
6485	}
6486
6487      if (split > sched_data.cur)
6488	{
6489	  int i;
6490	  for (i = sched_data.cur; i < split; i++)
6491	    {
6492	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6493	      sched_data.types[i] = sched_data.packet->t[i];
6494	      sched_data.insns[i] = t;
6495	      sched_data.stopbit[i] = 0;
6496	    }
6497	  sched_data.cur = split;
6498	}
6499
6500      if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6501	  && cycles_left > 1)
6502	{
6503	  int i;
6504	  for (i = sched_data.cur; i < 6; i++)
6505	    {
6506	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6507	      sched_data.types[i] = sched_data.packet->t[i];
6508	      sched_data.insns[i] = t;
6509	      sched_data.stopbit[i] = 0;
6510	    }
6511	  sched_data.cur = 6;
6512	  cycles_left--;
6513	  need_stop = 1;
6514	}
6515
6516      if (need_stop || sched_data.cur == 6)
6517	{
6518	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6519	  did_stop = true;
6520	}
6521      maybe_rotate (dump);
6522    }
6523
6524  cycles_left--;
6525  while (cycles_left > 0)
6526    {
6527      sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6528      sched_emit_insn (gen_nop_type (TYPE_M));
6529      sched_emit_insn (gen_nop_type (TYPE_I));
6530      if (cycles_left > 1)
6531	{
6532	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6533	  cycles_left--;
6534	}
6535      sched_emit_insn (gen_nop_type (TYPE_I));
6536      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6537      did_stop = true;
6538      cycles_left--;
6539    }
6540
6541  if (did_stop)
6542    init_insn_group_barriers ();
6543}
6544
6545/* We are about to being issuing insns for this clock cycle.
6546   Override the default sort algorithm to better slot instructions.  */
6547
6548static int
6549ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6550		    reorder_type, clock_var)
6551     FILE *dump ATTRIBUTE_UNUSED;
6552     int sched_verbose ATTRIBUTE_UNUSED;
6553     rtx *ready;
6554     int *pn_ready;
6555     int reorder_type, clock_var;
6556{
6557  int n_asms;
6558  int n_ready = *pn_ready;
6559  rtx *e_ready = ready + n_ready;
6560  rtx *insnp;
6561
6562  if (sched_verbose)
6563    {
6564      fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6565      dump_current_packet (dump);
6566    }
6567
6568  /* Work around the pipeline flush that will occurr if the results of
6569     an MM instruction are accessed before the result is ready.  Intel
6570     documentation says this only happens with IALU, ISHF, ILOG, LD,
6571     and ST consumers, but experimental evidence shows that *any* non-MM
6572     type instruction will incurr the flush.  */
6573  if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6574    {
6575      for (insnp = ready; insnp < e_ready; insnp++)
6576	{
6577	  rtx insn = *insnp, link;
6578	  enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6579
6580	  if (t == ITANIUM_CLASS_MMMUL
6581	      || t == ITANIUM_CLASS_MMSHF
6582	      || t == ITANIUM_CLASS_MMSHFI)
6583	    continue;
6584
6585	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6586	    if (REG_NOTE_KIND (link) == 0)
6587	      {
6588		rtx other = XEXP (link, 0);
6589		enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6590		if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6591		  {
6592		    nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6593		    goto out;
6594		  }
6595	      }
6596	}
6597    }
6598 out:
6599
6600  prev_first = sched_data.first_slot;
6601  prev_cycle = clock_var;
6602
6603  if (reorder_type == 0)
6604    maybe_rotate (sched_verbose ? dump : NULL);
6605
6606  /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6607  n_asms = 0;
6608  for (insnp = ready; insnp < e_ready; insnp++)
6609    if (insnp < e_ready)
6610      {
6611	rtx insn = *insnp;
6612	enum attr_type t = ia64_safe_type (insn);
6613	if (t == TYPE_UNKNOWN)
6614	  {
6615	    if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6616		|| asm_noperands (PATTERN (insn)) >= 0)
6617	      {
6618		rtx lowest = ready[n_asms];
6619		ready[n_asms] = insn;
6620		*insnp = lowest;
6621		n_asms++;
6622	      }
6623	    else
6624	      {
6625		rtx highest = ready[n_ready - 1];
6626		ready[n_ready - 1] = insn;
6627		*insnp = highest;
6628		if (ia64_final_schedule && group_barrier_needed_p (insn))
6629		  {
6630		    schedule_stop (sched_verbose ? dump : NULL);
6631		    sched_data.last_was_stop = 1;
6632		    maybe_rotate (sched_verbose ? dump : NULL);
6633		  }
6634
6635		return 1;
6636	      }
6637	  }
6638      }
6639  if (n_asms < n_ready)
6640    {
6641      /* Some normal insns to process.  Skip the asms.  */
6642      ready += n_asms;
6643      n_ready -= n_asms;
6644    }
6645  else if (n_ready > 0)
6646    {
6647      /* Only asm insns left.  */
6648      if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6649	{
6650	  schedule_stop (sched_verbose ? dump : NULL);
6651	  sched_data.last_was_stop = 1;
6652	  maybe_rotate (sched_verbose ? dump : NULL);
6653	}
6654      cycle_end_fill_slots (sched_verbose ? dump : NULL);
6655      return 1;
6656    }
6657
6658  if (ia64_final_schedule)
6659    {
6660      int nr_need_stop = 0;
6661
6662      for (insnp = ready; insnp < e_ready; insnp++)
6663	if (safe_group_barrier_needed_p (*insnp))
6664	  nr_need_stop++;
6665
6666      /* Schedule a stop bit if
6667          - all insns require a stop bit, or
6668          - we are starting a new cycle and _any_ insns require a stop bit.
6669         The reason for the latter is that if our schedule is accurate, then
6670         the additional stop won't decrease performance at this point (since
6671	 there's a split issue at this point anyway), but it gives us more
6672         freedom when scheduling the currently ready insns.  */
6673      if ((reorder_type == 0 && nr_need_stop)
6674	  || (reorder_type == 1 && n_ready == nr_need_stop))
6675	{
6676	  schedule_stop (sched_verbose ? dump : NULL);
6677	  sched_data.last_was_stop = 1;
6678	  maybe_rotate (sched_verbose ? dump : NULL);
6679	  if (reorder_type == 1)
6680	    return 0;
6681	}
6682      else
6683	{
6684	  int deleted = 0;
6685	  insnp = e_ready;
6686	  /* Move down everything that needs a stop bit, preserving relative
6687	     order.  */
6688	  while (insnp-- > ready + deleted)
6689	    while (insnp >= ready + deleted)
6690	      {
6691		rtx insn = *insnp;
6692		if (! safe_group_barrier_needed_p (insn))
6693		  break;
6694		memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6695		*ready = insn;
6696		deleted++;
6697	      }
6698	  n_ready -= deleted;
6699	  ready += deleted;
6700	  if (deleted != nr_need_stop)
6701	    abort ();
6702	}
6703    }
6704
6705  return itanium_reorder (sched_verbose ? dump : NULL,
6706			  ready, e_ready, reorder_type == 1);
6707}
6708
6709static int
6710ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6711     FILE *dump;
6712     int sched_verbose;
6713     rtx *ready;
6714     int *pn_ready;
6715     int clock_var;
6716{
6717  return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6718				      pn_ready, 0, clock_var);
6719}
6720
6721/* Like ia64_sched_reorder, but called after issuing each insn.
6722   Override the default sort algorithm to better slot instructions.  */
6723
6724static int
6725ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6726     FILE *dump ATTRIBUTE_UNUSED;
6727     int sched_verbose ATTRIBUTE_UNUSED;
6728     rtx *ready;
6729     int *pn_ready;
6730     int clock_var;
6731{
6732  if (sched_data.last_was_stop)
6733    return 0;
6734
6735  /* Detect one special case and try to optimize it.
6736     If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6737     then we can get better code by transforming this to 1.MFB;; 2.MIx.  */
6738  if (sched_data.first_slot == 1
6739      && sched_data.stopbit[0]
6740      && ((sched_data.cur == 4
6741	   && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6742	   && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6743	   && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6744	  || (sched_data.cur == 3
6745	      && (sched_data.types[1] == TYPE_M
6746		  || sched_data.types[1] == TYPE_A)
6747	      && (sched_data.types[2] != TYPE_M
6748		  && sched_data.types[2] != TYPE_I
6749		  && sched_data.types[2] != TYPE_A))))
6750
6751    {
6752      int i, best;
6753      rtx stop = sched_data.insns[1];
6754
6755      /* Search backward for the stop bit that must be there.  */
6756      while (1)
6757	{
6758	  int insn_code;
6759
6760	  stop = PREV_INSN (stop);
6761	  if (GET_CODE (stop) != INSN)
6762	    abort ();
6763	  insn_code = recog_memoized (stop);
6764
6765	  /* Ignore .pred.rel.mutex.
6766
6767	     ??? Update this to ignore cycle display notes too
6768	     ??? once those are implemented  */
6769	  if (insn_code == CODE_FOR_pred_rel_mutex
6770	      || insn_code == CODE_FOR_prologue_use)
6771	    continue;
6772
6773	  if (insn_code == CODE_FOR_insn_group_barrier)
6774	    break;
6775	  abort ();
6776	}
6777
6778      /* Adjust the stop bit's slot selector.  */
6779      if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6780	abort ();
6781      XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6782
6783      sched_data.stopbit[0] = 0;
6784      sched_data.stopbit[2] = 1;
6785
6786      sched_data.types[5] = sched_data.types[3];
6787      sched_data.types[4] = sched_data.types[2];
6788      sched_data.types[3] = sched_data.types[1];
6789      sched_data.insns[5] = sched_data.insns[3];
6790      sched_data.insns[4] = sched_data.insns[2];
6791      sched_data.insns[3] = sched_data.insns[1];
6792      sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6793      sched_data.cur += 2;
6794      sched_data.first_slot = 3;
6795      for (i = 0; i < NR_PACKETS; i++)
6796	{
6797	  const struct ia64_packet *p = packets + i;
6798	  if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6799	    {
6800	      sched_data.packet = p;
6801	      break;
6802	    }
6803	}
6804      rotate_one_bundle (sched_verbose ? dump : NULL);
6805
6806      best = 6;
6807      for (i = 0; i < NR_PACKETS; i++)
6808	{
6809	  const struct ia64_packet *p = packets + i;
6810	  int split = get_split (p, sched_data.first_slot);
6811	  int next;
6812
6813	  /* Disallow multiway branches here.  */
6814	  if (p->t[1] == TYPE_B)
6815	    continue;
6816
6817	  if (packet_matches_p (p, split, &next) && next < best)
6818	    {
6819	      best = next;
6820	      sched_data.packet = p;
6821	      sched_data.split = split;
6822	    }
6823	}
6824      if (best == 6)
6825	abort ();
6826    }
6827
6828  if (*pn_ready > 0)
6829    {
6830      int more = ia64_internal_sched_reorder (dump, sched_verbose,
6831					      ready, pn_ready, 1,
6832					      clock_var);
6833      if (more)
6834	return more;
6835      /* Did we schedule a stop?  If so, finish this cycle.  */
6836      if (sched_data.cur == sched_data.first_slot)
6837	return 0;
6838    }
6839
6840  if (sched_verbose)
6841    fprintf (dump, "//   Can't issue more this cycle; updating type array.\n");
6842
6843  cycle_end_fill_slots (sched_verbose ? dump : NULL);
6844  if (sched_verbose)
6845    dump_current_packet (dump);
6846  return 0;
6847}
6848
6849/* We are about to issue INSN.  Return the number of insns left on the
6850   ready queue that can be issued this cycle.  */
6851
6852static int
6853ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6854     FILE *dump;
6855     int sched_verbose;
6856     rtx insn;
6857     int can_issue_more ATTRIBUTE_UNUSED;
6858{
6859  enum attr_type t = ia64_safe_type (insn);
6860
6861  if (sched_data.last_was_stop)
6862    {
6863      int t = sched_data.first_slot;
6864      if (t == 0)
6865	t = 3;
6866      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6867      init_insn_group_barriers ();
6868      sched_data.last_was_stop = 0;
6869    }
6870
6871  if (t == TYPE_UNKNOWN)
6872    {
6873      if (sched_verbose)
6874	fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6875      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6876	  || asm_noperands (PATTERN (insn)) >= 0)
6877	{
6878	  /* This must be some kind of asm.  Clear the scheduling state.  */
6879	  rotate_two_bundles (sched_verbose ? dump : NULL);
6880	  if (ia64_final_schedule)
6881	    group_barrier_needed_p (insn);
6882	}
6883      return 1;
6884    }
6885
6886  /* This is _not_ just a sanity check.  group_barrier_needed_p will update
6887     important state info.  Don't delete this test.  */
6888  if (ia64_final_schedule
6889      && group_barrier_needed_p (insn))
6890    abort ();
6891
6892  sched_data.stopbit[sched_data.cur] = 0;
6893  sched_data.insns[sched_data.cur] = insn;
6894  sched_data.types[sched_data.cur] = t;
6895
6896  sched_data.cur++;
6897  if (sched_verbose)
6898    fprintf (dump, "// Scheduling insn %d of type %s\n",
6899	     INSN_UID (insn), type_names[t]);
6900
6901  if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6902    {
6903      schedule_stop (sched_verbose ? dump : NULL);
6904      sched_data.last_was_stop = 1;
6905    }
6906
6907  return 1;
6908}
6909
6910/* Free data allocated by ia64_sched_init.  */
6911
6912static void
6913ia64_sched_finish (dump, sched_verbose)
6914     FILE *dump;
6915     int sched_verbose;
6916{
6917  if (sched_verbose)
6918    fprintf (dump, "// Finishing schedule.\n");
6919  rotate_two_bundles (NULL);
6920  free (sched_types);
6921  free (sched_ready);
6922}
6923
6924/* Emit pseudo-ops for the assembler to describe predicate relations.
6925   At present this assumes that we only consider predicate pairs to
6926   be mutex, and that the assembler can deduce proper values from
6927   straight-line code.  */
6928
6929static void
6930emit_predicate_relation_info ()
6931{
6932  basic_block bb;
6933
6934  FOR_EACH_BB_REVERSE (bb)
6935    {
6936      int r;
6937      rtx head = bb->head;
6938
6939      /* We only need such notes at code labels.  */
6940      if (GET_CODE (head) != CODE_LABEL)
6941	continue;
6942      if (GET_CODE (NEXT_INSN (head)) == NOTE
6943	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6944	head = NEXT_INSN (head);
6945
6946      for (r = PR_REG (0); r < PR_REG (64); r += 2)
6947	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6948	  {
6949	    rtx p = gen_rtx_REG (BImode, r);
6950	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6951	    if (head == bb->end)
6952	      bb->end = n;
6953	    head = n;
6954	  }
6955    }
6956
6957  /* Look for conditional calls that do not return, and protect predicate
6958     relations around them.  Otherwise the assembler will assume the call
6959     returns, and complain about uses of call-clobbered predicates after
6960     the call.  */
6961  FOR_EACH_BB_REVERSE (bb)
6962    {
6963      rtx insn = bb->head;
6964
6965      while (1)
6966	{
6967	  if (GET_CODE (insn) == CALL_INSN
6968	      && GET_CODE (PATTERN (insn)) == COND_EXEC
6969	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6970	    {
6971	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6972	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6973	      if (bb->head == insn)
6974		bb->head = b;
6975	      if (bb->end == insn)
6976		bb->end = a;
6977	    }
6978
6979	  if (insn == bb->end)
6980	    break;
6981	  insn = NEXT_INSN (insn);
6982	}
6983    }
6984}
6985
6986/* Generate a NOP instruction of type T.  We will never generate L type
6987   nops.  */
6988
6989static rtx
6990gen_nop_type (t)
6991     enum attr_type t;
6992{
6993  switch (t)
6994    {
6995    case TYPE_M:
6996      return gen_nop_m ();
6997    case TYPE_I:
6998      return gen_nop_i ();
6999    case TYPE_B:
7000      return gen_nop_b ();
7001    case TYPE_F:
7002      return gen_nop_f ();
7003    case TYPE_X:
7004      return gen_nop_x ();
7005    default:
7006      abort ();
7007    }
7008}
7009
7010/* After the last scheduling pass, fill in NOPs.  It's easier to do this
7011   here than while scheduling.  */
7012
7013static void
7014ia64_emit_nops ()
7015{
7016  rtx insn;
7017  const struct bundle *b = 0;
7018  int bundle_pos = 0;
7019
7020  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7021    {
7022      rtx pat;
7023      enum attr_type t;
7024      pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
7025      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
7026	continue;
7027      if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7028	  || GET_CODE (insn) == CODE_LABEL)
7029	{
7030	  if (b)
7031	    while (bundle_pos < 3)
7032	      {
7033		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7034		bundle_pos++;
7035	      }
7036	  if (GET_CODE (insn) != CODE_LABEL)
7037	    b = bundle + INTVAL (XVECEXP (pat, 0, 0));
7038	  else
7039	    b = 0;
7040	  bundle_pos = 0;
7041	  continue;
7042	}
7043      else if (GET_CODE (pat) == UNSPEC_VOLATILE
7044	       && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7045	{
7046	  int t = INTVAL (XVECEXP (pat, 0, 0));
7047	  if (b)
7048	    while (bundle_pos < t)
7049	      {
7050		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7051		bundle_pos++;
7052	      }
7053	  continue;
7054	}
7055
7056      if (bundle_pos == 3)
7057	b = 0;
7058
7059      if (b && INSN_P (insn))
7060	{
7061	  t = ia64_safe_type (insn);
7062	  if (asm_noperands (PATTERN (insn)) >= 0
7063	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)
7064	    {
7065	      while (bundle_pos < 3)
7066		{
7067		  emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7068		  bundle_pos++;
7069		}
7070	      continue;
7071	    }
7072
7073	  if (t == TYPE_UNKNOWN)
7074	    continue;
7075	  while (bundle_pos < 3)
7076	    {
7077	      if (t == b->t[bundle_pos]
7078		  || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
7079				      || b->t[bundle_pos] == TYPE_I)))
7080		break;
7081
7082	      emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7083	      bundle_pos++;
7084	    }
7085	  if (bundle_pos < 3)
7086	    bundle_pos++;
7087	}
7088    }
7089}
7090
7091/* Perform machine dependent operations on the rtl chain INSNS.  */
7092
7093void
7094ia64_reorg (insns)
7095     rtx insns;
7096{
7097  /* We are freeing block_for_insn in the toplev to keep compatibility
7098     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7099  compute_bb_for_insn ();
7100
7101  /* If optimizing, we'll have split before scheduling.  */
7102  if (optimize == 0)
7103    split_all_insns (0);
7104
7105  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7106     non-optimizing bootstrap.  */
7107  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7108
7109  if (ia64_flag_schedule_insns2)
7110    {
7111      timevar_push (TV_SCHED2);
7112      ia64_final_schedule = 1;
7113      schedule_ebbs (rtl_dump_file);
7114      ia64_final_schedule = 0;
7115      timevar_pop (TV_SCHED2);
7116
7117      /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7118	 place as they were during scheduling.  */
7119      emit_insn_group_barriers (rtl_dump_file, insns);
7120      ia64_emit_nops ();
7121    }
7122  else
7123    emit_all_insn_group_barriers (rtl_dump_file, insns);
7124
7125  /* A call must not be the last instruction in a function, so that the
7126     return address is still within the function, so that unwinding works
7127     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7128  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7129    {
7130      rtx insn;
7131      int saw_stop = 0;
7132
7133      insn = get_last_insn ();
7134      if (! INSN_P (insn))
7135        insn = prev_active_insn (insn);
7136      if (GET_CODE (insn) == INSN
7137	  && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7138	  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7139	{
7140	  saw_stop = 1;
7141	  insn = prev_active_insn (insn);
7142	}
7143      if (GET_CODE (insn) == CALL_INSN)
7144	{
7145	  if (! saw_stop)
7146	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7147	  emit_insn (gen_break_f ());
7148	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7149	}
7150    }
7151
7152  fixup_errata ();
7153  emit_predicate_relation_info ();
7154}
7155
7156/* Return true if REGNO is used by the epilogue.  */
7157
7158int
7159ia64_epilogue_uses (regno)
7160     int regno;
7161{
7162  switch (regno)
7163    {
7164    case R_GR (1):
7165      /* When a function makes a call through a function descriptor, we
7166         will write a (potentially) new value to "gp".  After returning
7167         from such a call, we need to make sure the function restores the
7168         original gp-value, even if the function itself does not use the
7169         gp anymore.  */
7170      return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC));
7171
7172    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7173    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7174      /* For functions defined with the syscall_linkage attribute, all
7175	 input registers are marked as live at all function exits.  This
7176	 prevents the register allocator from using the input registers,
7177	 which in turn makes it possible to restart a system call after
7178	 an interrupt without having to save/restore the input registers.
7179	 This also prevents kernel data from leaking to application code.  */
7180      return lookup_attribute ("syscall_linkage",
7181	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7182
7183    case R_BR (0):
7184      /* Conditional return patterns can't represent the use of `b0' as
7185         the return address, so we force the value live this way.  */
7186      return 1;
7187
7188    case AR_PFS_REGNUM:
7189      /* Likewise for ar.pfs, which is used by br.ret.  */
7190      return 1;
7191
7192    default:
7193      return 0;
7194    }
7195}
7196
7197/* Return true if REGNO is used by the frame unwinder.  */
7198
7199int
7200ia64_eh_uses (regno)
7201     int regno;
7202{
7203  if (! reload_completed)
7204    return 0;
7205
7206  if (current_frame_info.reg_save_b0
7207      && regno == current_frame_info.reg_save_b0)
7208    return 1;
7209  if (current_frame_info.reg_save_pr
7210      && regno == current_frame_info.reg_save_pr)
7211    return 1;
7212  if (current_frame_info.reg_save_ar_pfs
7213      && regno == current_frame_info.reg_save_ar_pfs)
7214    return 1;
7215  if (current_frame_info.reg_save_ar_unat
7216      && regno == current_frame_info.reg_save_ar_unat)
7217    return 1;
7218  if (current_frame_info.reg_save_ar_lc
7219      && regno == current_frame_info.reg_save_ar_lc)
7220    return 1;
7221
7222  return 0;
7223}
7224
7225/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7226
7227   We add @ to the name if this goes in small data/bss.  We can only put
7228   a variable in small data/bss if it is defined in this module or a module
7229   that we are statically linked with.  We can't check the second condition,
7230   but TREE_STATIC gives us the first one.  */
7231
7232/* ??? If we had IPA, we could check the second condition.  We could support
7233   programmer added section attributes if the variable is not defined in this
7234   module.  */
7235
7236/* ??? See the v850 port for a cleaner way to do this.  */
7237
7238/* ??? We could also support own long data here.  Generating movl/add/ld8
7239   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7240   code faster because there is one less load.  This also includes incomplete
7241   types which can't go in sdata/sbss.  */
7242
7243static bool
7244ia64_in_small_data_p (exp)
7245     tree exp;
7246{
7247  if (TARGET_NO_SDATA)
7248    return false;
7249
7250  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7251    {
7252      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7253      if (strcmp (section, ".sdata") == 0
7254	  || strcmp (section, ".sbss") == 0)
7255	return true;
7256    }
7257  else
7258    {
7259      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7260
7261      /* If this is an incomplete type with size 0, then we can't put it
7262	 in sdata because it might be too big when completed.  */
7263      if (size > 0 && size <= ia64_section_threshold)
7264	return true;
7265    }
7266
7267  return false;
7268}
7269
7270static void
7271ia64_encode_section_info (decl, first)
7272     tree decl;
7273     int first ATTRIBUTE_UNUSED;
7274{
7275  const char *symbol_str;
7276  bool is_local;
7277  rtx symbol;
7278  char encoding = 0;
7279
7280  if (TREE_CODE (decl) == FUNCTION_DECL)
7281    {
7282      SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7283      return;
7284    }
7285
7286  /* Careful not to prod global register variables.  */
7287  if (TREE_CODE (decl) != VAR_DECL
7288      || GET_CODE (DECL_RTL (decl)) != MEM
7289      || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7290    return;
7291
7292  symbol = XEXP (DECL_RTL (decl), 0);
7293  symbol_str = XSTR (symbol, 0);
7294
7295  is_local = (*targetm.binds_local_p) (decl);
7296
7297  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7298    encoding = " GLil"[decl_tls_model (decl)];
7299  /* Determine if DECL will wind up in .sdata/.sbss.  */
7300  else if (is_local && ia64_in_small_data_p (decl))
7301    encoding = 's';
7302
7303  /* Finally, encode this into the symbol string.  */
7304  if (encoding)
7305    {
7306      char *newstr;
7307      size_t len;
7308
7309      if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7310	{
7311	  if (encoding == symbol_str[1])
7312	    return;
7313	  /* ??? Sdata became thread or thread becaome not thread.  Lose.  */
7314	  abort ();
7315	}
7316
7317      len = strlen (symbol_str);
7318      newstr = alloca (len + 3);
7319      newstr[0] = ENCODE_SECTION_INFO_CHAR;
7320      newstr[1] = encoding;
7321      memcpy (newstr + 2, symbol_str, len + 1);
7322
7323      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7324    }
7325
7326  /* This decl is marked as being in small data/bss but it shouldn't be;
7327     one likely explanation for this is that the decl has been moved into
7328     a different section from the one it was in when encode_section_info
7329     was first called.  Remove the encoding.  */
7330  else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7331    XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7332}
7333
7334static const char *
7335ia64_strip_name_encoding (str)
7336     const char *str;
7337{
7338  if (str[0] == ENCODE_SECTION_INFO_CHAR)
7339    str += 2;
7340  if (str[0] == '*')
7341    str++;
7342  return str;
7343}
7344
7345/* True if it is OK to do sibling call optimization for the specified
7346   call expression EXP.  DECL will be the called function, or NULL if
7347   this is an indirect call.  */
7348bool
7349ia64_function_ok_for_sibcall (decl)
7350     tree decl;
7351{
7352  /* Direct calls are always ok.  */
7353  if (decl)
7354    return true;
7355
7356  /* If TARGET_CONST_GP is in effect, then our caller expects us to
7357     return with our current GP.  This means that we'll always have
7358     a GP reload after an indirect call.  */
7359  return !ia64_epilogue_uses (R_GR (1));
7360}
7361
7362/* Output assembly directives for prologue regions.  */
7363
7364/* The current basic block number.  */
7365
7366static bool last_block;
7367
7368/* True if we need a copy_state command at the start of the next block.  */
7369
7370static bool need_copy_state;
7371
7372/* The function emits unwind directives for the start of an epilogue.  */
7373
7374static void
7375process_epilogue ()
7376{
7377  /* If this isn't the last block of the function, then we need to label the
7378     current state, and copy it back in at the start of the next block.  */
7379
7380  if (!last_block)
7381    {
7382      fprintf (asm_out_file, "\t.label_state 1\n");
7383      need_copy_state = true;
7384    }
7385
7386  fprintf (asm_out_file, "\t.restore sp\n");
7387}
7388
7389/* This function processes a SET pattern looking for specific patterns
7390   which result in emitting an assembly directive required for unwinding.  */
7391
7392static int
7393process_set (asm_out_file, pat)
7394     FILE *asm_out_file;
7395     rtx pat;
7396{
7397  rtx src = SET_SRC (pat);
7398  rtx dest = SET_DEST (pat);
7399  int src_regno, dest_regno;
7400
7401  /* Look for the ALLOC insn.  */
7402  if (GET_CODE (src) == UNSPEC_VOLATILE
7403      && XINT (src, 1) == UNSPECV_ALLOC
7404      && GET_CODE (dest) == REG)
7405    {
7406      dest_regno = REGNO (dest);
7407
7408      /* If this isn't the final destination for ar.pfs, the alloc
7409	 shouldn't have been marked frame related.  */
7410      if (dest_regno != current_frame_info.reg_save_ar_pfs)
7411	abort ();
7412
7413      fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7414	       ia64_dbx_register_number (dest_regno));
7415      return 1;
7416    }
7417
7418  /* Look for SP = ....  */
7419  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7420    {
7421      if (GET_CODE (src) == PLUS)
7422        {
7423	  rtx op0 = XEXP (src, 0);
7424	  rtx op1 = XEXP (src, 1);
7425	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7426	    {
7427	      if (INTVAL (op1) < 0)
7428		{
7429		  fputs ("\t.fframe ", asm_out_file);
7430		  fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7431			   -INTVAL (op1));
7432		  fputc ('\n', asm_out_file);
7433		}
7434	      else
7435		process_epilogue ();
7436	    }
7437	  else
7438	    abort ();
7439	}
7440      else if (GET_CODE (src) == REG
7441	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7442	process_epilogue ();
7443      else
7444	abort ();
7445
7446      return 1;
7447    }
7448
7449  /* Register move we need to look at.  */
7450  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7451    {
7452      src_regno = REGNO (src);
7453      dest_regno = REGNO (dest);
7454
7455      switch (src_regno)
7456	{
7457	case BR_REG (0):
7458	  /* Saving return address pointer.  */
7459	  if (dest_regno != current_frame_info.reg_save_b0)
7460	    abort ();
7461	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7462		   ia64_dbx_register_number (dest_regno));
7463	  return 1;
7464
7465	case PR_REG (0):
7466	  if (dest_regno != current_frame_info.reg_save_pr)
7467	    abort ();
7468	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7469		   ia64_dbx_register_number (dest_regno));
7470	  return 1;
7471
7472	case AR_UNAT_REGNUM:
7473	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7474	    abort ();
7475	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7476		   ia64_dbx_register_number (dest_regno));
7477	  return 1;
7478
7479	case AR_LC_REGNUM:
7480	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7481	    abort ();
7482	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7483		   ia64_dbx_register_number (dest_regno));
7484	  return 1;
7485
7486	case STACK_POINTER_REGNUM:
7487	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7488	      || ! frame_pointer_needed)
7489	    abort ();
7490	  fprintf (asm_out_file, "\t.vframe r%d\n",
7491		   ia64_dbx_register_number (dest_regno));
7492	  return 1;
7493
7494	default:
7495	  /* Everything else should indicate being stored to memory.  */
7496	  abort ();
7497	}
7498    }
7499
7500  /* Memory store we need to look at.  */
7501  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7502    {
7503      long off;
7504      rtx base;
7505      const char *saveop;
7506
7507      if (GET_CODE (XEXP (dest, 0)) == REG)
7508	{
7509	  base = XEXP (dest, 0);
7510	  off = 0;
7511	}
7512      else if (GET_CODE (XEXP (dest, 0)) == PLUS
7513	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7514	{
7515	  base = XEXP (XEXP (dest, 0), 0);
7516	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7517	}
7518      else
7519	abort ();
7520
7521      if (base == hard_frame_pointer_rtx)
7522	{
7523	  saveop = ".savepsp";
7524	  off = - off;
7525	}
7526      else if (base == stack_pointer_rtx)
7527	saveop = ".savesp";
7528      else
7529	abort ();
7530
7531      src_regno = REGNO (src);
7532      switch (src_regno)
7533	{
7534	case BR_REG (0):
7535	  if (current_frame_info.reg_save_b0 != 0)
7536	    abort ();
7537	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7538	  return 1;
7539
7540	case PR_REG (0):
7541	  if (current_frame_info.reg_save_pr != 0)
7542	    abort ();
7543	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7544	  return 1;
7545
7546	case AR_LC_REGNUM:
7547	  if (current_frame_info.reg_save_ar_lc != 0)
7548	    abort ();
7549	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7550	  return 1;
7551
7552	case AR_PFS_REGNUM:
7553	  if (current_frame_info.reg_save_ar_pfs != 0)
7554	    abort ();
7555	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7556	  return 1;
7557
7558	case AR_UNAT_REGNUM:
7559	  if (current_frame_info.reg_save_ar_unat != 0)
7560	    abort ();
7561	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7562	  return 1;
7563
7564	case GR_REG (4):
7565	case GR_REG (5):
7566	case GR_REG (6):
7567	case GR_REG (7):
7568	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7569		   1 << (src_regno - GR_REG (4)));
7570	  return 1;
7571
7572	case BR_REG (1):
7573	case BR_REG (2):
7574	case BR_REG (3):
7575	case BR_REG (4):
7576	case BR_REG (5):
7577	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
7578		   1 << (src_regno - BR_REG (1)));
7579	  return 1;
7580
7581	case FR_REG (2):
7582	case FR_REG (3):
7583	case FR_REG (4):
7584	case FR_REG (5):
7585	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
7586		   1 << (src_regno - FR_REG (2)));
7587	  return 1;
7588
7589	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7590	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7591	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7592	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7593	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7594		   1 << (src_regno - FR_REG (12)));
7595	  return 1;
7596
7597	default:
7598	  return 0;
7599	}
7600    }
7601
7602  return 0;
7603}
7604
7605
7606/* This function looks at a single insn and emits any directives
7607   required to unwind this insn.  */
7608void
7609process_for_unwind_directive (asm_out_file, insn)
7610     FILE *asm_out_file;
7611     rtx insn;
7612{
7613  if (flag_unwind_tables
7614      || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7615    {
7616      rtx pat;
7617
7618      if (GET_CODE (insn) == NOTE
7619	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7620	{
7621	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7622
7623	  /* Restore unwind state from immediately before the epilogue.  */
7624	  if (need_copy_state)
7625	    {
7626	      fprintf (asm_out_file, "\t.body\n");
7627	      fprintf (asm_out_file, "\t.copy_state 1\n");
7628	      need_copy_state = false;
7629	    }
7630	}
7631
7632      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7633	return;
7634
7635      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7636      if (pat)
7637	pat = XEXP (pat, 0);
7638      else
7639	pat = PATTERN (insn);
7640
7641      switch (GET_CODE (pat))
7642        {
7643	case SET:
7644	  process_set (asm_out_file, pat);
7645	  break;
7646
7647	case PARALLEL:
7648	  {
7649	    int par_index;
7650	    int limit = XVECLEN (pat, 0);
7651	    for (par_index = 0; par_index < limit; par_index++)
7652	      {
7653		rtx x = XVECEXP (pat, 0, par_index);
7654		if (GET_CODE (x) == SET)
7655		  process_set (asm_out_file, x);
7656	      }
7657	    break;
7658	  }
7659
7660	default:
7661	  abort ();
7662	}
7663    }
7664}
7665
7666
7667void
7668ia64_init_builtins ()
7669{
7670  tree psi_type_node = build_pointer_type (integer_type_node);
7671  tree pdi_type_node = build_pointer_type (long_integer_type_node);
7672
7673  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7674  tree si_ftype_psi_si_si
7675    = build_function_type_list (integer_type_node,
7676				psi_type_node, integer_type_node,
7677				integer_type_node, NULL_TREE);
7678
7679  /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7680  tree di_ftype_pdi_di_di
7681    = build_function_type_list (long_integer_type_node,
7682				pdi_type_node, long_integer_type_node,
7683				long_integer_type_node, NULL_TREE);
7684  /* __sync_synchronize */
7685  tree void_ftype_void
7686    = build_function_type (void_type_node, void_list_node);
7687
7688  /* __sync_lock_test_and_set_si */
7689  tree si_ftype_psi_si
7690    = build_function_type_list (integer_type_node,
7691				psi_type_node, integer_type_node, NULL_TREE);
7692
7693  /* __sync_lock_test_and_set_di */
7694  tree di_ftype_pdi_di
7695    = build_function_type_list (long_integer_type_node,
7696				pdi_type_node, long_integer_type_node,
7697				NULL_TREE);
7698
7699  /* __sync_lock_release_si */
7700  tree void_ftype_psi
7701    = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7702
7703  /* __sync_lock_release_di */
7704  tree void_ftype_pdi
7705    = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7706
7707#define def_builtin(name, type, code) \
7708  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7709
7710  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7711	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7712  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7713	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7714  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7715	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7716  def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7717	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7718
7719  def_builtin ("__sync_synchronize", void_ftype_void,
7720	       IA64_BUILTIN_SYNCHRONIZE);
7721
7722  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7723	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7724  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7725	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7726  def_builtin ("__sync_lock_release_si", void_ftype_psi,
7727	       IA64_BUILTIN_LOCK_RELEASE_SI);
7728  def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7729	       IA64_BUILTIN_LOCK_RELEASE_DI);
7730
7731  def_builtin ("__builtin_ia64_bsp",
7732	       build_function_type (ptr_type_node, void_list_node),
7733	       IA64_BUILTIN_BSP);
7734
7735  def_builtin ("__builtin_ia64_flushrs",
7736	       build_function_type (void_type_node, void_list_node),
7737	       IA64_BUILTIN_FLUSHRS);
7738
7739  def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7740	       IA64_BUILTIN_FETCH_AND_ADD_SI);
7741  def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7742	       IA64_BUILTIN_FETCH_AND_SUB_SI);
7743  def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7744	       IA64_BUILTIN_FETCH_AND_OR_SI);
7745  def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7746	       IA64_BUILTIN_FETCH_AND_AND_SI);
7747  def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7748	       IA64_BUILTIN_FETCH_AND_XOR_SI);
7749  def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7750	       IA64_BUILTIN_FETCH_AND_NAND_SI);
7751
7752  def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7753	       IA64_BUILTIN_ADD_AND_FETCH_SI);
7754  def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7755	       IA64_BUILTIN_SUB_AND_FETCH_SI);
7756  def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7757	       IA64_BUILTIN_OR_AND_FETCH_SI);
7758  def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7759	       IA64_BUILTIN_AND_AND_FETCH_SI);
7760  def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7761	       IA64_BUILTIN_XOR_AND_FETCH_SI);
7762  def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7763	       IA64_BUILTIN_NAND_AND_FETCH_SI);
7764
7765  def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7766	       IA64_BUILTIN_FETCH_AND_ADD_DI);
7767  def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7768	       IA64_BUILTIN_FETCH_AND_SUB_DI);
7769  def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7770	       IA64_BUILTIN_FETCH_AND_OR_DI);
7771  def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7772	       IA64_BUILTIN_FETCH_AND_AND_DI);
7773  def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7774	       IA64_BUILTIN_FETCH_AND_XOR_DI);
7775  def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7776	       IA64_BUILTIN_FETCH_AND_NAND_DI);
7777
7778  def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7779	       IA64_BUILTIN_ADD_AND_FETCH_DI);
7780  def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7781	       IA64_BUILTIN_SUB_AND_FETCH_DI);
7782  def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7783	       IA64_BUILTIN_OR_AND_FETCH_DI);
7784  def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7785	       IA64_BUILTIN_AND_AND_FETCH_DI);
7786  def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7787	       IA64_BUILTIN_XOR_AND_FETCH_DI);
7788  def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7789	       IA64_BUILTIN_NAND_AND_FETCH_DI);
7790
7791#undef def_builtin
7792}
7793
7794/* Expand fetch_and_op intrinsics.  The basic code sequence is:
7795
7796     mf
7797     tmp = [ptr];
7798     do {
7799       ret = tmp;
7800       ar.ccv = tmp;
7801       tmp <op>= value;
7802       cmpxchgsz.acq tmp = [ptr], tmp
7803     } while (tmp != ret)
7804*/
7805
7806static rtx
7807ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7808     optab binoptab;
7809     enum machine_mode mode;
7810     tree arglist;
7811     rtx target;
7812{
7813  rtx ret, label, tmp, ccv, insn, mem, value;
7814  tree arg0, arg1;
7815
7816  arg0 = TREE_VALUE (arglist);
7817  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7818  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7819#ifdef POINTERS_EXTEND_UNSIGNED
7820  if (GET_MODE(mem) != Pmode)
7821    mem = convert_memory_address (Pmode, mem);
7822#endif
7823  value = expand_expr (arg1, NULL_RTX, mode, 0);
7824
7825  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7826  MEM_VOLATILE_P (mem) = 1;
7827
7828  if (target && register_operand (target, mode))
7829    ret = target;
7830  else
7831    ret = gen_reg_rtx (mode);
7832
7833  emit_insn (gen_mf ());
7834
7835  /* Special case for fetchadd instructions.  */
7836  if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7837    {
7838      if (mode == SImode)
7839        insn = gen_fetchadd_acq_si (ret, mem, value);
7840      else
7841        insn = gen_fetchadd_acq_di (ret, mem, value);
7842      emit_insn (insn);
7843      return ret;
7844    }
7845
7846  tmp = gen_reg_rtx (mode);
7847  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7848  emit_move_insn (tmp, mem);
7849
7850  label = gen_label_rtx ();
7851  emit_label (label);
7852  emit_move_insn (ret, tmp);
7853  emit_move_insn (ccv, tmp);
7854
7855  /* Perform the specific operation.  Special case NAND by noticing
7856     one_cmpl_optab instead.  */
7857  if (binoptab == one_cmpl_optab)
7858    {
7859      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7860      binoptab = and_optab;
7861    }
7862  tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7863
7864  if (mode == SImode)
7865    insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7866  else
7867    insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7868  emit_insn (insn);
7869
7870  emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7871
7872  return ret;
7873}
7874
7875/* Expand op_and_fetch intrinsics.  The basic code sequence is:
7876
7877     mf
7878     tmp = [ptr];
7879     do {
7880       old = tmp;
7881       ar.ccv = tmp;
7882       ret = tmp <op> value;
7883       cmpxchgsz.acq tmp = [ptr], ret
7884     } while (tmp != old)
7885*/
7886
7887static rtx
7888ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7889     optab binoptab;
7890     enum machine_mode mode;
7891     tree arglist;
7892     rtx target;
7893{
7894  rtx old, label, tmp, ret, ccv, insn, mem, value;
7895  tree arg0, arg1;
7896
7897  arg0 = TREE_VALUE (arglist);
7898  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7899  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7900#ifdef POINTERS_EXTEND_UNSIGNED
7901  if (GET_MODE(mem) != Pmode)
7902    mem = convert_memory_address (Pmode, mem);
7903#endif
7904
7905  value = expand_expr (arg1, NULL_RTX, mode, 0);
7906
7907  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7908  MEM_VOLATILE_P (mem) = 1;
7909
7910  if (target && ! register_operand (target, mode))
7911    target = NULL_RTX;
7912
7913  emit_insn (gen_mf ());
7914  tmp = gen_reg_rtx (mode);
7915  old = gen_reg_rtx (mode);
7916  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7917
7918  emit_move_insn (tmp, mem);
7919
7920  label = gen_label_rtx ();
7921  emit_label (label);
7922  emit_move_insn (old, tmp);
7923  emit_move_insn (ccv, tmp);
7924
7925  /* Perform the specific operation.  Special case NAND by noticing
7926     one_cmpl_optab instead.  */
7927  if (binoptab == one_cmpl_optab)
7928    {
7929      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7930      binoptab = and_optab;
7931    }
7932  ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7933
7934  if (mode == SImode)
7935    insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7936  else
7937    insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7938  emit_insn (insn);
7939
7940  emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7941
7942  return ret;
7943}
7944
7945/* Expand val_ and bool_compare_and_swap.  For val_ we want:
7946
7947     ar.ccv = oldval
7948     mf
7949     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7950     return ret
7951
7952   For bool_ it's the same except return ret == oldval.
7953*/
7954
7955static rtx
7956ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7957     enum machine_mode mode;
7958     int boolp;
7959     tree arglist;
7960     rtx target;
7961{
7962  tree arg0, arg1, arg2;
7963  rtx mem, old, new, ccv, tmp, insn;
7964
7965  arg0 = TREE_VALUE (arglist);
7966  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7967  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7968  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7969  old = expand_expr (arg1, NULL_RTX, mode, 0);
7970  new = expand_expr (arg2, NULL_RTX, mode, 0);
7971
7972  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7973  MEM_VOLATILE_P (mem) = 1;
7974
7975  if (! register_operand (old, mode))
7976    old = copy_to_mode_reg (mode, old);
7977  if (! register_operand (new, mode))
7978    new = copy_to_mode_reg (mode, new);
7979
7980  if (! boolp && target && register_operand (target, mode))
7981    tmp = target;
7982  else
7983    tmp = gen_reg_rtx (mode);
7984
7985  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7986  if (mode == DImode)
7987    emit_move_insn (ccv, old);
7988  else
7989    {
7990      rtx ccvtmp = gen_reg_rtx (DImode);
7991      emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
7992      emit_move_insn (ccv, ccvtmp);
7993    }
7994  emit_insn (gen_mf ());
7995  if (mode == SImode)
7996    insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7997  else
7998    insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7999  emit_insn (insn);
8000
8001  if (boolp)
8002    {
8003      if (! target)
8004	target = gen_reg_rtx (mode);
8005      return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8006    }
8007  else
8008    return tmp;
8009}
8010
8011/* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8012
8013static rtx
8014ia64_expand_lock_test_and_set (mode, arglist, target)
8015     enum machine_mode mode;
8016     tree arglist;
8017     rtx target;
8018{
8019  tree arg0, arg1;
8020  rtx mem, new, ret, insn;
8021
8022  arg0 = TREE_VALUE (arglist);
8023  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8024  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8025  new = expand_expr (arg1, NULL_RTX, mode, 0);
8026
8027  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8028  MEM_VOLATILE_P (mem) = 1;
8029  if (! register_operand (new, mode))
8030    new = copy_to_mode_reg (mode, new);
8031
8032  if (target && register_operand (target, mode))
8033    ret = target;
8034  else
8035    ret = gen_reg_rtx (mode);
8036
8037  if (mode == SImode)
8038    insn = gen_xchgsi (ret, mem, new);
8039  else
8040    insn = gen_xchgdi (ret, mem, new);
8041  emit_insn (insn);
8042
8043  return ret;
8044}
8045
8046/* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8047
8048static rtx
8049ia64_expand_lock_release (mode, arglist, target)
8050     enum machine_mode mode;
8051     tree arglist;
8052     rtx target ATTRIBUTE_UNUSED;
8053{
8054  tree arg0;
8055  rtx mem;
8056
8057  arg0 = TREE_VALUE (arglist);
8058  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8059
8060  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8061  MEM_VOLATILE_P (mem) = 1;
8062
8063  emit_move_insn (mem, const0_rtx);
8064
8065  return const0_rtx;
8066}
8067
8068rtx
8069ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8070     tree exp;
8071     rtx target;
8072     rtx subtarget ATTRIBUTE_UNUSED;
8073     enum machine_mode mode ATTRIBUTE_UNUSED;
8074     int ignore ATTRIBUTE_UNUSED;
8075{
8076  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8077  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8078  tree arglist = TREE_OPERAND (exp, 1);
8079
8080  switch (fcode)
8081    {
8082    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8083    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8084    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8085    case IA64_BUILTIN_LOCK_RELEASE_SI:
8086    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8087    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8088    case IA64_BUILTIN_FETCH_AND_OR_SI:
8089    case IA64_BUILTIN_FETCH_AND_AND_SI:
8090    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8091    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8092    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8093    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8094    case IA64_BUILTIN_OR_AND_FETCH_SI:
8095    case IA64_BUILTIN_AND_AND_FETCH_SI:
8096    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8097    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8098      mode = SImode;
8099      break;
8100
8101    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8102    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8103    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8104    case IA64_BUILTIN_LOCK_RELEASE_DI:
8105    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8106    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8107    case IA64_BUILTIN_FETCH_AND_OR_DI:
8108    case IA64_BUILTIN_FETCH_AND_AND_DI:
8109    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8110    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8111    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8112    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8113    case IA64_BUILTIN_OR_AND_FETCH_DI:
8114    case IA64_BUILTIN_AND_AND_FETCH_DI:
8115    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8116    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8117      mode = DImode;
8118      break;
8119
8120    default:
8121      break;
8122    }
8123
8124  switch (fcode)
8125    {
8126    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8127    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8128      return ia64_expand_compare_and_swap (mode, 1, arglist, target);
8129
8130    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8131    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8132      return ia64_expand_compare_and_swap (mode, 0, arglist, target);
8133
8134    case IA64_BUILTIN_SYNCHRONIZE:
8135      emit_insn (gen_mf ());
8136      return const0_rtx;
8137
8138    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8139    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8140      return ia64_expand_lock_test_and_set (mode, arglist, target);
8141
8142    case IA64_BUILTIN_LOCK_RELEASE_SI:
8143    case IA64_BUILTIN_LOCK_RELEASE_DI:
8144      return ia64_expand_lock_release (mode, arglist, target);
8145
8146    case IA64_BUILTIN_BSP:
8147      if (! target || ! register_operand (target, DImode))
8148	target = gen_reg_rtx (DImode);
8149      emit_insn (gen_bsp_value (target));
8150      return target;
8151
8152    case IA64_BUILTIN_FLUSHRS:
8153      emit_insn (gen_flushrs ());
8154      return const0_rtx;
8155
8156    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8157    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8158      return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8159
8160    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8161    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8162      return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8163
8164    case IA64_BUILTIN_FETCH_AND_OR_SI:
8165    case IA64_BUILTIN_FETCH_AND_OR_DI:
8166      return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8167
8168    case IA64_BUILTIN_FETCH_AND_AND_SI:
8169    case IA64_BUILTIN_FETCH_AND_AND_DI:
8170      return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8171
8172    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8173    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8174      return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8175
8176    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8177    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8178      return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8179
8180    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8181    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8182      return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8183
8184    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8185    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8186      return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8187
8188    case IA64_BUILTIN_OR_AND_FETCH_SI:
8189    case IA64_BUILTIN_OR_AND_FETCH_DI:
8190      return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8191
8192    case IA64_BUILTIN_AND_AND_FETCH_SI:
8193    case IA64_BUILTIN_AND_AND_FETCH_DI:
8194      return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8195
8196    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8197    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8198      return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8199
8200    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8201    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8202      return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8203
8204    default:
8205      break;
8206    }
8207
8208  return NULL_RTX;
8209}
8210
8211/* For the HP-UX IA64 aggregate parameters are passed stored in the
8212   most significant bits of the stack slot.  */
8213
8214enum direction
8215ia64_hpux_function_arg_padding (mode, type)
8216     enum machine_mode mode;
8217     tree type;
8218{
8219   /* Exception to normal case for structures/unions/etc.  */
8220
8221   if (type && AGGREGATE_TYPE_P (type)
8222       && int_size_in_bytes (type) < UNITS_PER_WORD)
8223     return upward;
8224
8225   /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8226      hardwired to be true.  */
8227
8228   return((mode == BLKmode
8229       ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8230          && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8231       : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8232      ? downward : upward);
8233}
8234
8235/* Linked list of all external functions that are to be emitted by GCC.
8236   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8237   order to avoid putting out names that are never really used.  */
8238
8239struct extern_func_list
8240{
8241  struct extern_func_list *next; /* next external */
8242  char *name;                    /* name of the external */
8243} *extern_func_head = 0;
8244
8245static void
8246ia64_hpux_add_extern_decl (name)
8247        const char *name;
8248{
8249  struct extern_func_list *p;
8250
8251  p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8252  p->name = xmalloc (strlen (name) + 1);
8253  strcpy(p->name, name);
8254  p->next = extern_func_head;
8255  extern_func_head = p;
8256}
8257
8258/* Print out the list of used global functions.  */
8259
8260void
8261ia64_hpux_asm_file_end (file)
8262	FILE *file;
8263{
8264  while (extern_func_head)
8265    {
8266      const char *real_name;
8267      tree decl;
8268
8269      real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8270      decl = maybe_get_identifier (real_name);
8271
8272      if (!decl
8273	  || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8274        {
8275	  if (decl)
8276	    TREE_ASM_WRITTEN (decl) = 1;
8277	  (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8278	  fprintf (file, "%s", TYPE_ASM_OP);
8279	  assemble_name (file, extern_func_head->name);
8280	  putc (',', file);
8281	  fprintf (file, TYPE_OPERAND_FMT, "function");
8282	  putc ('\n', file);
8283        }
8284      extern_func_head = extern_func_head->next;
8285    }
8286}
8287
8288
8289/* Switch to the section to which we should output X.  The only thing
8290   special we do here is to honor small data.  */
8291
8292static void
8293ia64_select_rtx_section (mode, x, align)
8294     enum machine_mode mode;
8295     rtx x;
8296     unsigned HOST_WIDE_INT align;
8297{
8298  if (GET_MODE_SIZE (mode) > 0
8299      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8300    sdata_section ();
8301  else
8302    default_elf_select_rtx_section (mode, x, align);
8303}
8304
8305/* It is illegal to have relocations in shared segments on AIX and HPUX.
8306   Pretend flag_pic is always set.  */
8307
8308static void
8309ia64_rwreloc_select_section (exp, reloc, align)
8310     tree exp;
8311     int reloc;
8312     unsigned HOST_WIDE_INT align;
8313{
8314  default_elf_select_section_1 (exp, reloc, align, true);
8315}
8316
8317static void
8318ia64_rwreloc_unique_section (decl, reloc)
8319     tree decl;
8320     int reloc;
8321{
8322  default_unique_section_1 (decl, reloc, true);
8323}
8324
8325static void
8326ia64_rwreloc_select_rtx_section (mode, x, align)
8327     enum machine_mode mode;
8328     rtx x;
8329     unsigned HOST_WIDE_INT align;
8330{
8331  int save_pic = flag_pic;
8332  flag_pic = 1;
8333  ia64_select_rtx_section (mode, x, align);
8334  flag_pic = save_pic;
8335}
8336
8337static unsigned int
8338ia64_rwreloc_section_type_flags (decl, name, reloc)
8339     tree decl;
8340     const char *name;
8341     int reloc;
8342{
8343  return default_section_type_flags_1 (decl, name, reloc, true);
8344}
8345
8346
8347/* Output the assembler code for a thunk function.  THUNK_DECL is the
8348   declaration for the thunk function itself, FUNCTION is the decl for
8349   the target function.  DELTA is an immediate constant offset to be
8350   added to THIS.  If VCALL_OFFSET is non-zero, the word at
8351   *(*this + vcall_offset) should be added to THIS.  */
8352
8353static void
8354ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8355     FILE *file;
8356     tree thunk ATTRIBUTE_UNUSED;
8357     HOST_WIDE_INT delta;
8358     HOST_WIDE_INT vcall_offset;
8359     tree function;
8360{
8361  rtx this, insn, funexp;
8362
8363  reload_completed = 1;
8364  no_new_pseudos = 1;
8365
8366  /* Set things up as ia64_expand_prologue might.  */
8367  last_scratch_gr_reg = 15;
8368
8369  memset (&current_frame_info, 0, sizeof (current_frame_info));
8370  current_frame_info.spill_cfa_off = -16;
8371  current_frame_info.n_input_regs = 1;
8372  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8373
8374  if (!TARGET_REG_NAMES)
8375    reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8376
8377  /* Mark the end of the (empty) prologue.  */
8378  emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8379
8380  this = gen_rtx_REG (Pmode, IN_REG (0));
8381
8382  /* Apply the constant offset, if required.  */
8383  if (delta)
8384    {
8385      rtx delta_rtx = GEN_INT (delta);
8386
8387      if (!CONST_OK_FOR_I (delta))
8388	{
8389	  rtx tmp = gen_rtx_REG (Pmode, 2);
8390	  emit_move_insn (tmp, delta_rtx);
8391	  delta_rtx = tmp;
8392	}
8393      emit_insn (gen_adddi3 (this, this, delta_rtx));
8394    }
8395
8396  /* Apply the offset from the vtable, if required.  */
8397  if (vcall_offset)
8398    {
8399      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8400      rtx tmp = gen_rtx_REG (Pmode, 2);
8401
8402      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8403
8404      if (!CONST_OK_FOR_J (vcall_offset))
8405	{
8406	  rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8407	  emit_move_insn (tmp2, vcall_offset_rtx);
8408	  vcall_offset_rtx = tmp2;
8409	}
8410      emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8411
8412      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8413
8414      emit_insn (gen_adddi3 (this, this, tmp));
8415    }
8416
8417  /* Generate a tail call to the target function.  */
8418  if (! TREE_USED (function))
8419    {
8420      assemble_external (function);
8421      TREE_USED (function) = 1;
8422    }
8423  funexp = XEXP (DECL_RTL (function), 0);
8424  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8425  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8426  insn = get_last_insn ();
8427  SIBLING_CALL_P (insn) = 1;
8428
8429  /* Code generation for calls relies on splitting.  */
8430  reload_completed = 1;
8431  try_split (PATTERN (insn), insn, 0);
8432
8433  emit_barrier ();
8434
8435  /* Run just enough of rest_of_compilation to get the insns emitted.
8436     There's not really enough bulk here to make other passes such as
8437     instruction scheduling worth while.  Note that use_thunk calls
8438     assemble_start_function and assemble_end_function.  */
8439
8440  insn = get_insns ();
8441  emit_all_insn_group_barriers (NULL, insn);
8442  shorten_branches (insn);
8443  final_start_function (insn, file, 1);
8444  final (insn, file, 1, 0);
8445  final_end_function ();
8446
8447  reload_completed = 0;
8448  no_new_pseudos = 0;
8449}
8450
8451#include "gt-ia64.h"
8452