ia64.c revision 119256
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3   Contributed by James E. Wilson <wilson@cygnus.com> and
4   		  David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GNU CC.
7
8GNU CC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 2, or (at your option)
11any later version.
12
13GNU CC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU CC; see the file COPYING.  If not, write to
20the Free Software Foundation, 59 Temple Place - Suite 330,
21Boston, MA 02111-1307, USA.  */
22
23#include "config.h"
24#include "system.h"
25#include "rtl.h"
26#include "tree.h"
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
32#include "output.h"
33#include "insn-attr.h"
34#include "flags.h"
35#include "recog.h"
36#include "expr.h"
37#include "optabs.h"
38#include "except.h"
39#include "function.h"
40#include "ggc.h"
41#include "basic-block.h"
42#include "toplev.h"
43#include "sched-int.h"
44#include "timevar.h"
45#include "target.h"
46#include "target-def.h"
47#include "tm_p.h"
48#include "langhooks.h"
49
50/* This is used for communication between ASM_OUTPUT_LABEL and
51   ASM_OUTPUT_LABELREF.  */
52int ia64_asm_output_label = 0;
53
54/* Define the information needed to generate branch and scc insns.  This is
55   stored from the compare operation.  */
56struct rtx_def * ia64_compare_op0;
57struct rtx_def * ia64_compare_op1;
58
59/* Register names for ia64_expand_prologue.  */
60static const char * const ia64_reg_numbers[96] =
61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
62  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
63  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
64  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
65  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
66  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
67  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
68  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
69  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
70  "r104","r105","r106","r107","r108","r109","r110","r111",
71  "r112","r113","r114","r115","r116","r117","r118","r119",
72  "r120","r121","r122","r123","r124","r125","r126","r127"};
73
74/* ??? These strings could be shared with REGISTER_NAMES.  */
75static const char * const ia64_input_reg_names[8] =
76{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
77
78/* ??? These strings could be shared with REGISTER_NAMES.  */
79static const char * const ia64_local_reg_names[80] =
80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
81  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
82  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
83  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
84  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
85  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
86  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
87  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
88  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
89  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
90
91/* ??? These strings could be shared with REGISTER_NAMES.  */
92static const char * const ia64_output_reg_names[8] =
93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
94
95/* String used with the -mfixed-range= option.  */
96const char *ia64_fixed_range_string;
97
98/* Determines whether we use adds, addl, or movl to generate our
99   TLS immediate offsets.  */
100int ia64_tls_size = 22;
101
102/* String used with the -mtls-size= option.  */
103const char *ia64_tls_size_string;
104
105/* Determines whether we run our final scheduling pass or not.  We always
106   avoid the normal second scheduling pass.  */
107static int ia64_flag_schedule_insns2;
108
109/* Variables which are this size or smaller are put in the sdata/sbss
110   sections.  */
111
112unsigned int ia64_section_threshold;
113
114/* Structure to be filled in by ia64_compute_frame_size with register
115   save masks and offsets for the current function.  */
116
117struct ia64_frame_info
118{
119  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
120				   the caller's scratch area.  */
121  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
122  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
123  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
124  HARD_REG_SET mask;		/* mask of saved registers.  */
125  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
126				   registers or long-term scratches.  */
127  int n_spilled;		/* number of spilled registers.  */
128  int reg_fp;			/* register for fp.  */
129  int reg_save_b0;		/* save register for b0.  */
130  int reg_save_pr;		/* save register for prs.  */
131  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
132  int reg_save_ar_unat;		/* save register for ar.unat.  */
133  int reg_save_ar_lc;		/* save register for ar.lc.  */
134  int reg_save_gp;		/* save register for gp.  */
135  int n_input_regs;		/* number of input registers used.  */
136  int n_local_regs;		/* number of local registers used.  */
137  int n_output_regs;		/* number of output registers used.  */
138  int n_rotate_regs;		/* number of rotating registers used.  */
139
140  char need_regstk;		/* true if a .regstk directive needed.  */
141  char initialized;		/* true if the data is finalized.  */
142};
143
144/* Current frame information calculated by ia64_compute_frame_size.  */
145static struct ia64_frame_info current_frame_info;
146
147static rtx gen_tls_get_addr PARAMS ((void));
148static rtx gen_thread_pointer PARAMS ((void));
149static int find_gr_spill PARAMS ((int));
150static int next_scratch_gr_reg PARAMS ((void));
151static void mark_reg_gr_used_mask PARAMS ((rtx, void *));
152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT));
153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT));
154static void finish_spill_pointers PARAMS ((void));
155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT));
156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx));
157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT));
158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx));
159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx));
160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx));
161
162static enum machine_mode hfa_element_mode PARAMS ((tree, int));
163static void fix_range PARAMS ((const char *));
164static struct machine_function * ia64_init_machine_status PARAMS ((void));
165static void emit_insn_group_barriers PARAMS ((FILE *, rtx));
166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx));
167static void emit_predicate_relation_info PARAMS ((void));
168static bool ia64_in_small_data_p PARAMS ((tree));
169static void ia64_encode_section_info PARAMS ((tree, int));
170static const char *ia64_strip_name_encoding PARAMS ((const char *));
171static void process_epilogue PARAMS ((void));
172static int process_set PARAMS ((FILE *, rtx));
173
174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode,
175					     tree, rtx));
176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode,
177					     tree, rtx));
178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int,
179						 tree, rtx));
180static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
181						  tree, rtx));
182static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx));
183static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int));
184static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
185static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
186static void ia64_output_function_end_prologue PARAMS ((FILE *));
187
188static int ia64_issue_rate PARAMS ((void));
189static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
190static void ia64_sched_init PARAMS ((FILE *, int, int));
191static void ia64_sched_finish PARAMS ((FILE *, int));
192static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *,
193						int *, int, int));
194static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
195static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
196static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
197
198static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
199					  HOST_WIDE_INT, tree));
200
201static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
202					     unsigned HOST_WIDE_INT));
203static void ia64_rwreloc_select_section PARAMS ((tree, int,
204					         unsigned HOST_WIDE_INT))
205     ATTRIBUTE_UNUSED;
206static void ia64_rwreloc_unique_section PARAMS ((tree, int))
207     ATTRIBUTE_UNUSED;
208static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
209					             unsigned HOST_WIDE_INT))
210     ATTRIBUTE_UNUSED;
211static unsigned int ia64_rwreloc_section_type_flags
212     PARAMS ((tree, const char *, int))
213     ATTRIBUTE_UNUSED;
214
215static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
216     ATTRIBUTE_UNUSED;
217
218/* Table of valid machine attributes.  */
219static const struct attribute_spec ia64_attribute_table[] =
220{
221  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
222  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
223  { NULL,              0, 0, false, false, false, NULL }
224};
225
226/* Initialize the GCC target structure.  */
227#undef TARGET_ATTRIBUTE_TABLE
228#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
229
230#undef TARGET_INIT_BUILTINS
231#define TARGET_INIT_BUILTINS ia64_init_builtins
232
233#undef TARGET_EXPAND_BUILTIN
234#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
235
236#undef TARGET_ASM_BYTE_OP
237#define TARGET_ASM_BYTE_OP "\tdata1\t"
238#undef TARGET_ASM_ALIGNED_HI_OP
239#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
240#undef TARGET_ASM_ALIGNED_SI_OP
241#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
242#undef TARGET_ASM_ALIGNED_DI_OP
243#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
244#undef TARGET_ASM_UNALIGNED_HI_OP
245#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
246#undef TARGET_ASM_UNALIGNED_SI_OP
247#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
248#undef TARGET_ASM_UNALIGNED_DI_OP
249#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
250#undef TARGET_ASM_INTEGER
251#define TARGET_ASM_INTEGER ia64_assemble_integer
252
253#undef TARGET_ASM_FUNCTION_PROLOGUE
254#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
255#undef TARGET_ASM_FUNCTION_END_PROLOGUE
256#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
257#undef TARGET_ASM_FUNCTION_EPILOGUE
258#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
259
260#undef TARGET_IN_SMALL_DATA_P
261#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
262#undef TARGET_ENCODE_SECTION_INFO
263#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
264#undef TARGET_STRIP_NAME_ENCODING
265#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding
266
267#undef TARGET_SCHED_ADJUST_COST
268#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
269#undef TARGET_SCHED_ISSUE_RATE
270#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
271#undef TARGET_SCHED_VARIABLE_ISSUE
272#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
273#undef TARGET_SCHED_INIT
274#define TARGET_SCHED_INIT ia64_sched_init
275#undef TARGET_SCHED_FINISH
276#define TARGET_SCHED_FINISH ia64_sched_finish
277#undef TARGET_SCHED_REORDER
278#define TARGET_SCHED_REORDER ia64_sched_reorder
279#undef TARGET_SCHED_REORDER2
280#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
281
282#ifdef HAVE_AS_TLS
283#undef TARGET_HAVE_TLS
284#define TARGET_HAVE_TLS true
285#endif
286
287#undef TARGET_ASM_OUTPUT_MI_THUNK
288#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
289#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
290#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
291
292struct gcc_target targetm = TARGET_INITIALIZER;
293
294/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
295
296int
297call_operand (op, mode)
298     rtx op;
299     enum machine_mode mode;
300{
301  if (mode != GET_MODE (op) && mode != VOIDmode)
302    return 0;
303
304  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
305	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
306}
307
308/* Return 1 if OP refers to a symbol in the sdata section.  */
309
310int
311sdata_symbolic_operand (op, mode)
312     rtx op;
313     enum machine_mode mode ATTRIBUTE_UNUSED;
314{
315  switch (GET_CODE (op))
316    {
317    case CONST:
318      if (GET_CODE (XEXP (op, 0)) != PLUS
319	  || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
320	break;
321      op = XEXP (XEXP (op, 0), 0);
322      /* FALLTHRU */
323
324    case SYMBOL_REF:
325      if (CONSTANT_POOL_ADDRESS_P (op))
326	return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
327      else
328	{
329	  const char *str = XSTR (op, 0);
330          return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's');
331	}
332
333    default:
334      break;
335    }
336
337  return 0;
338}
339
340/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
341
342int
343got_symbolic_operand (op, mode)
344     rtx op;
345     enum machine_mode mode ATTRIBUTE_UNUSED;
346{
347  switch (GET_CODE (op))
348    {
349    case CONST:
350      op = XEXP (op, 0);
351      if (GET_CODE (op) != PLUS)
352	return 0;
353      if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
354	return 0;
355      op = XEXP (op, 1);
356      if (GET_CODE (op) != CONST_INT)
357	return 0;
358
359	return 1;
360
361      /* Ok if we're not using GOT entries at all.  */
362      if (TARGET_NO_PIC || TARGET_AUTO_PIC)
363	return 1;
364
365      /* "Ok" while emitting rtl, since otherwise we won't be provided
366	 with the entire offset during emission, which makes it very
367	 hard to split the offset into high and low parts.  */
368      if (rtx_equal_function_value_matters)
369	return 1;
370
371      /* Force the low 14 bits of the constant to zero so that we do not
372	 use up so many GOT entries.  */
373      return (INTVAL (op) & 0x3fff) == 0;
374
375    case SYMBOL_REF:
376    case LABEL_REF:
377      return 1;
378
379    default:
380      break;
381    }
382  return 0;
383}
384
385/* Return 1 if OP refers to a symbol.  */
386
387int
388symbolic_operand (op, mode)
389     rtx op;
390     enum machine_mode mode ATTRIBUTE_UNUSED;
391{
392  switch (GET_CODE (op))
393    {
394    case CONST:
395    case SYMBOL_REF:
396    case LABEL_REF:
397      return 1;
398
399    default:
400      break;
401    }
402  return 0;
403}
404
405/* Return tls_model if OP refers to a TLS symbol.  */
406
407int
408tls_symbolic_operand (op, mode)
409     rtx op;
410     enum machine_mode mode ATTRIBUTE_UNUSED;
411{
412  const char *str;
413
414  if (GET_CODE (op) != SYMBOL_REF)
415    return 0;
416  str = XSTR (op, 0);
417  if (str[0] != ENCODE_SECTION_INFO_CHAR)
418    return 0;
419  switch (str[1])
420    {
421    case 'G':
422      return TLS_MODEL_GLOBAL_DYNAMIC;
423    case 'L':
424      return TLS_MODEL_LOCAL_DYNAMIC;
425    case 'i':
426      return TLS_MODEL_INITIAL_EXEC;
427    case 'l':
428      return TLS_MODEL_LOCAL_EXEC;
429    }
430  return 0;
431}
432
433
434/* Return 1 if OP refers to a function.  */
435
436int
437function_operand (op, mode)
438     rtx op;
439     enum machine_mode mode ATTRIBUTE_UNUSED;
440{
441  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op))
442    return 1;
443  else
444    return 0;
445}
446
447/* Return 1 if OP is setjmp or a similar function.  */
448
449/* ??? This is an unsatisfying solution.  Should rethink.  */
450
451int
452setjmp_operand (op, mode)
453     rtx op;
454     enum machine_mode mode ATTRIBUTE_UNUSED;
455{
456  const char *name;
457  int retval = 0;
458
459  if (GET_CODE (op) != SYMBOL_REF)
460    return 0;
461
462  name = XSTR (op, 0);
463
464  /* The following code is borrowed from special_function_p in calls.c.  */
465
466  /* Disregard prefix _, __ or __x.  */
467  if (name[0] == '_')
468    {
469      if (name[1] == '_' && name[2] == 'x')
470	name += 3;
471      else if (name[1] == '_')
472	name += 2;
473      else
474	name += 1;
475    }
476
477  if (name[0] == 's')
478    {
479      retval
480	= ((name[1] == 'e'
481	    && (! strcmp (name, "setjmp")
482		|| ! strcmp (name, "setjmp_syscall")))
483	   || (name[1] == 'i'
484	       && ! strcmp (name, "sigsetjmp"))
485	   || (name[1] == 'a'
486	       && ! strcmp (name, "savectx")));
487    }
488  else if ((name[0] == 'q' && name[1] == 's'
489	    && ! strcmp (name, "qsetjmp"))
490	   || (name[0] == 'v' && name[1] == 'f'
491	       && ! strcmp (name, "vfork")))
492    retval = 1;
493
494  return retval;
495}
496
497/* Return 1 if OP is a general operand, but when pic exclude symbolic
498   operands.  */
499
500/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF
501   from PREDICATE_CODES.  */
502
503int
504move_operand (op, mode)
505     rtx op;
506     enum machine_mode mode;
507{
508  if (! TARGET_NO_PIC && symbolic_operand (op, mode))
509    return 0;
510
511  return general_operand (op, mode);
512}
513
514/* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
515
516int
517gr_register_operand (op, mode)
518     rtx op;
519     enum machine_mode mode;
520{
521  if (! register_operand (op, mode))
522    return 0;
523  if (GET_CODE (op) == SUBREG)
524    op = SUBREG_REG (op);
525  if (GET_CODE (op) == REG)
526    {
527      unsigned int regno = REGNO (op);
528      if (regno < FIRST_PSEUDO_REGISTER)
529	return GENERAL_REGNO_P (regno);
530    }
531  return 1;
532}
533
534/* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
535
536int
537fr_register_operand (op, mode)
538     rtx op;
539     enum machine_mode mode;
540{
541  if (! register_operand (op, mode))
542    return 0;
543  if (GET_CODE (op) == SUBREG)
544    op = SUBREG_REG (op);
545  if (GET_CODE (op) == REG)
546    {
547      unsigned int regno = REGNO (op);
548      if (regno < FIRST_PSEUDO_REGISTER)
549	return FR_REGNO_P (regno);
550    }
551  return 1;
552}
553
554/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
555
556int
557grfr_register_operand (op, mode)
558     rtx op;
559     enum machine_mode mode;
560{
561  if (! register_operand (op, mode))
562    return 0;
563  if (GET_CODE (op) == SUBREG)
564    op = SUBREG_REG (op);
565  if (GET_CODE (op) == REG)
566    {
567      unsigned int regno = REGNO (op);
568      if (regno < FIRST_PSEUDO_REGISTER)
569	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
570    }
571  return 1;
572}
573
574/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
575
576int
577gr_nonimmediate_operand (op, mode)
578     rtx op;
579     enum machine_mode mode;
580{
581  if (! nonimmediate_operand (op, mode))
582    return 0;
583  if (GET_CODE (op) == SUBREG)
584    op = SUBREG_REG (op);
585  if (GET_CODE (op) == REG)
586    {
587      unsigned int regno = REGNO (op);
588      if (regno < FIRST_PSEUDO_REGISTER)
589	return GENERAL_REGNO_P (regno);
590    }
591  return 1;
592}
593
594/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
595
596int
597fr_nonimmediate_operand (op, mode)
598     rtx op;
599     enum machine_mode mode;
600{
601  if (! nonimmediate_operand (op, mode))
602    return 0;
603  if (GET_CODE (op) == SUBREG)
604    op = SUBREG_REG (op);
605  if (GET_CODE (op) == REG)
606    {
607      unsigned int regno = REGNO (op);
608      if (regno < FIRST_PSEUDO_REGISTER)
609	return FR_REGNO_P (regno);
610    }
611  return 1;
612}
613
614/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
615
616int
617grfr_nonimmediate_operand (op, mode)
618     rtx op;
619     enum machine_mode mode;
620{
621  if (! nonimmediate_operand (op, mode))
622    return 0;
623  if (GET_CODE (op) == SUBREG)
624    op = SUBREG_REG (op);
625  if (GET_CODE (op) == REG)
626    {
627      unsigned int regno = REGNO (op);
628      if (regno < FIRST_PSEUDO_REGISTER)
629	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
630    }
631  return 1;
632}
633
634/* Return 1 if OP is a GR register operand, or zero.  */
635
636int
637gr_reg_or_0_operand (op, mode)
638     rtx op;
639     enum machine_mode mode;
640{
641  return (op == const0_rtx || gr_register_operand (op, mode));
642}
643
644/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
645
646int
647gr_reg_or_5bit_operand (op, mode)
648     rtx op;
649     enum machine_mode mode;
650{
651  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
652	  || GET_CODE (op) == CONSTANT_P_RTX
653	  || gr_register_operand (op, mode));
654}
655
656/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
657
658int
659gr_reg_or_6bit_operand (op, mode)
660     rtx op;
661     enum machine_mode mode;
662{
663  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
664	  || GET_CODE (op) == CONSTANT_P_RTX
665	  || gr_register_operand (op, mode));
666}
667
668/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
669
670int
671gr_reg_or_8bit_operand (op, mode)
672     rtx op;
673     enum machine_mode mode;
674{
675  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
676	  || GET_CODE (op) == CONSTANT_P_RTX
677	  || gr_register_operand (op, mode));
678}
679
680/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
681
682int
683grfr_reg_or_8bit_operand (op, mode)
684     rtx op;
685     enum machine_mode mode;
686{
687  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
688	  || GET_CODE (op) == CONSTANT_P_RTX
689	  || grfr_register_operand (op, mode));
690}
691
692/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
693   operand.  */
694
695int
696gr_reg_or_8bit_adjusted_operand (op, mode)
697     rtx op;
698     enum machine_mode mode;
699{
700  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
701	  || GET_CODE (op) == CONSTANT_P_RTX
702	  || gr_register_operand (op, mode));
703}
704
705/* Return 1 if OP is a register operand, or is valid for both an 8 bit
706   immediate and an 8 bit adjusted immediate operand.  This is necessary
707   because when we emit a compare, we don't know what the condition will be,
708   so we need the union of the immediates accepted by GT and LT.  */
709
710int
711gr_reg_or_8bit_and_adjusted_operand (op, mode)
712     rtx op;
713     enum machine_mode mode;
714{
715  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
716	   && CONST_OK_FOR_L (INTVAL (op)))
717	  || GET_CODE (op) == CONSTANT_P_RTX
718	  || gr_register_operand (op, mode));
719}
720
721/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
722
723int
724gr_reg_or_14bit_operand (op, mode)
725     rtx op;
726     enum machine_mode mode;
727{
728  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
729	  || GET_CODE (op) == CONSTANT_P_RTX
730	  || gr_register_operand (op, mode));
731}
732
733/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
734
735int
736gr_reg_or_22bit_operand (op, mode)
737     rtx op;
738     enum machine_mode mode;
739{
740  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
741	  || GET_CODE (op) == CONSTANT_P_RTX
742	  || gr_register_operand (op, mode));
743}
744
745/* Return 1 if OP is a 6 bit immediate operand.  */
746
747int
748shift_count_operand (op, mode)
749     rtx op;
750     enum machine_mode mode ATTRIBUTE_UNUSED;
751{
752  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
753	  || GET_CODE (op) == CONSTANT_P_RTX);
754}
755
756/* Return 1 if OP is a 5 bit immediate operand.  */
757
758int
759shift_32bit_count_operand (op, mode)
760     rtx op;
761     enum machine_mode mode ATTRIBUTE_UNUSED;
762{
763  return ((GET_CODE (op) == CONST_INT
764	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
765	  || GET_CODE (op) == CONSTANT_P_RTX);
766}
767
768/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
769
770int
771shladd_operand (op, mode)
772     rtx op;
773     enum machine_mode mode ATTRIBUTE_UNUSED;
774{
775  return (GET_CODE (op) == CONST_INT
776	  && (INTVAL (op) == 2 || INTVAL (op) == 4
777	      || INTVAL (op) == 8 || INTVAL (op) == 16));
778}
779
780/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
781
782int
783fetchadd_operand (op, mode)
784     rtx op;
785     enum machine_mode mode ATTRIBUTE_UNUSED;
786{
787  return (GET_CODE (op) == CONST_INT
788          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
789              INTVAL (op) == -4  || INTVAL (op) == -1 ||
790              INTVAL (op) == 1   || INTVAL (op) == 4  ||
791              INTVAL (op) == 8   || INTVAL (op) == 16));
792}
793
794/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
795
796int
797fr_reg_or_fp01_operand (op, mode)
798     rtx op;
799     enum machine_mode mode;
800{
801  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
802	  || fr_register_operand (op, mode));
803}
804
805/* Like nonimmediate_operand, but don't allow MEMs that try to use a
806   POST_MODIFY with a REG as displacement.  */
807
808int
809destination_operand (op, mode)
810     rtx op;
811     enum machine_mode mode;
812{
813  if (! nonimmediate_operand (op, mode))
814    return 0;
815  if (GET_CODE (op) == MEM
816      && GET_CODE (XEXP (op, 0)) == POST_MODIFY
817      && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
818    return 0;
819  return 1;
820}
821
822/* Like memory_operand, but don't allow post-increments.  */
823
824int
825not_postinc_memory_operand (op, mode)
826     rtx op;
827     enum machine_mode mode;
828{
829  return (memory_operand (op, mode)
830	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
831}
832
833/* Return 1 if this is a comparison operator, which accepts an normal 8-bit
834   signed immediate operand.  */
835
836int
837normal_comparison_operator (op, mode)
838    register rtx op;
839    enum machine_mode mode;
840{
841  enum rtx_code code = GET_CODE (op);
842  return ((mode == VOIDmode || GET_MODE (op) == mode)
843	  && (code == EQ || code == NE
844	      || code == GT || code == LE || code == GTU || code == LEU));
845}
846
847/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
848   signed immediate operand.  */
849
850int
851adjusted_comparison_operator (op, mode)
852    register rtx op;
853    enum machine_mode mode;
854{
855  enum rtx_code code = GET_CODE (op);
856  return ((mode == VOIDmode || GET_MODE (op) == mode)
857	  && (code == LT || code == GE || code == LTU || code == GEU));
858}
859
860/* Return 1 if this is a signed inequality operator.  */
861
862int
863signed_inequality_operator (op, mode)
864    register rtx op;
865    enum machine_mode mode;
866{
867  enum rtx_code code = GET_CODE (op);
868  return ((mode == VOIDmode || GET_MODE (op) == mode)
869	  && (code == GE || code == GT
870	      || code == LE || code == LT));
871}
872
873/* Return 1 if this operator is valid for predication.  */
874
875int
876predicate_operator (op, mode)
877    register rtx op;
878    enum machine_mode mode;
879{
880  enum rtx_code code = GET_CODE (op);
881  return ((GET_MODE (op) == mode || mode == VOIDmode)
882	  && (code == EQ || code == NE));
883}
884
885/* Return 1 if this operator can be used in a conditional operation.  */
886
887int
888condop_operator (op, mode)
889    register rtx op;
890    enum machine_mode mode;
891{
892  enum rtx_code code = GET_CODE (op);
893  return ((GET_MODE (op) == mode || mode == VOIDmode)
894	  && (code == PLUS || code == MINUS || code == AND
895	      || code == IOR || code == XOR));
896}
897
898/* Return 1 if this is the ar.lc register.  */
899
900int
901ar_lc_reg_operand (op, mode)
902     register rtx op;
903     enum machine_mode mode;
904{
905  return (GET_MODE (op) == DImode
906	  && (mode == DImode || mode == VOIDmode)
907	  && GET_CODE (op) == REG
908	  && REGNO (op) == AR_LC_REGNUM);
909}
910
911/* Return 1 if this is the ar.ccv register.  */
912
913int
914ar_ccv_reg_operand (op, mode)
915     register rtx op;
916     enum machine_mode mode;
917{
918  return ((GET_MODE (op) == mode || mode == VOIDmode)
919	  && GET_CODE (op) == REG
920	  && REGNO (op) == AR_CCV_REGNUM);
921}
922
923/* Return 1 if this is the ar.pfs register.  */
924
925int
926ar_pfs_reg_operand (op, mode)
927     register rtx op;
928     enum machine_mode mode;
929{
930  return ((GET_MODE (op) == mode || mode == VOIDmode)
931	  && GET_CODE (op) == REG
932	  && REGNO (op) == AR_PFS_REGNUM);
933}
934
935/* Like general_operand, but don't allow (mem (addressof)).  */
936
937int
938general_tfmode_operand (op, mode)
939     rtx op;
940     enum machine_mode mode;
941{
942  if (! general_operand (op, mode))
943    return 0;
944  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
945    return 0;
946  return 1;
947}
948
949/* Similarly.  */
950
951int
952destination_tfmode_operand (op, mode)
953     rtx op;
954     enum machine_mode mode;
955{
956  if (! destination_operand (op, mode))
957    return 0;
958  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
959    return 0;
960  return 1;
961}
962
963/* Similarly.  */
964
965int
966tfreg_or_fp01_operand (op, mode)
967     rtx op;
968     enum machine_mode mode;
969{
970  if (GET_CODE (op) == SUBREG)
971    return 0;
972  return fr_reg_or_fp01_operand (op, mode);
973}
974
975/* Return 1 if OP is valid as a base register in a reg + offset address.  */
976
977int
978basereg_operand (op, mode)
979     rtx op;
980     enum machine_mode mode;
981{
982  /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
983     checks from pa.c basereg_operand as well?  Seems to be OK without them
984     in test runs.  */
985
986  return (register_operand (op, mode) &&
987	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
988}
989
990/* Return 1 if the operands of a move are ok.  */
991
992int
993ia64_move_ok (dst, src)
994     rtx dst, src;
995{
996  /* If we're under init_recog_no_volatile, we'll not be able to use
997     memory_operand.  So check the code directly and don't worry about
998     the validity of the underlying address, which should have been
999     checked elsewhere anyway.  */
1000  if (GET_CODE (dst) != MEM)
1001    return 1;
1002  if (GET_CODE (src) == MEM)
1003    return 0;
1004  if (register_operand (src, VOIDmode))
1005    return 1;
1006
1007  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1008  if (INTEGRAL_MODE_P (GET_MODE (dst)))
1009    return src == const0_rtx;
1010  else
1011    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1012}
1013
1014/* Return 0 if we are doing C++ code.  This optimization fails with
1015   C++ because of GNAT c++/6685.  */
1016
1017int
1018addp4_optimize_ok (op1, op2)
1019     rtx op1, op2;
1020{
1021
1022  if (!strcmp (lang_hooks.name, "GNU C++"))
1023    return 0;
1024
1025  return (basereg_operand (op1, GET_MODE(op1)) !=
1026	  basereg_operand (op2, GET_MODE(op2)));
1027}
1028
1029/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
1030   Return the length of the field, or <= 0 on failure.  */
1031
1032int
1033ia64_depz_field_mask (rop, rshift)
1034     rtx rop, rshift;
1035{
1036  unsigned HOST_WIDE_INT op = INTVAL (rop);
1037  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1038
1039  /* Get rid of the zero bits we're shifting in.  */
1040  op >>= shift;
1041
1042  /* We must now have a solid block of 1's at bit 0.  */
1043  return exact_log2 (op + 1);
1044}
1045
1046/* Expand a symbolic constant load.  */
1047/* ??? Should generalize this, so that we can also support 32 bit pointers.  */
1048
1049void
1050ia64_expand_load_address (dest, src, scratch)
1051      rtx dest, src, scratch;
1052{
1053  rtx temp;
1054
1055  /* The destination could be a MEM during initial rtl generation,
1056     which isn't a valid destination for the PIC load address patterns.  */
1057  if (! register_operand (dest, DImode))
1058    if (! scratch || ! register_operand (scratch, DImode))
1059      temp = gen_reg_rtx (DImode);
1060    else
1061      temp = scratch;
1062  else
1063    temp = dest;
1064
1065  if (tls_symbolic_operand (src, Pmode))
1066    abort ();
1067
1068  if (TARGET_AUTO_PIC)
1069    emit_insn (gen_load_gprel64 (temp, src));
1070  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src))
1071    emit_insn (gen_load_fptr (temp, src));
1072  else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode)
1073           && sdata_symbolic_operand (src, VOIDmode))
1074    emit_insn (gen_load_gprel (temp, src));
1075  else if (GET_CODE (src) == CONST
1076	   && GET_CODE (XEXP (src, 0)) == PLUS
1077	   && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1078	   && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1079    {
1080      rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1081      rtx sym = XEXP (XEXP (src, 0), 0);
1082      HOST_WIDE_INT ofs, hi, lo;
1083
1084      /* Split the offset into a sign extended 14-bit low part
1085	 and a complementary high part.  */
1086      ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1087      lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1088      hi = ofs - lo;
1089
1090      if (! scratch)
1091	scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode);
1092
1093      emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi),
1094				  scratch));
1095      emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo)));
1096    }
1097  else
1098    {
1099      rtx insn;
1100      if (! scratch)
1101	scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode);
1102
1103      insn = emit_insn (gen_load_symptr (temp, src, scratch));
1104#ifdef POINTERS_EXTEND_UNSIGNED
1105      if (GET_MODE (temp) != GET_MODE (src))
1106	src = convert_memory_address (GET_MODE (temp), src);
1107#endif
1108      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn));
1109    }
1110
1111  if (temp != dest)
1112    {
1113      if (GET_MODE (dest) != GET_MODE (temp))
1114	temp = convert_to_mode (GET_MODE (dest), temp, 0);
1115      emit_move_insn (dest, temp);
1116    }
1117}
1118
1119static GTY(()) rtx gen_tls_tga;
1120static rtx
1121gen_tls_get_addr ()
1122{
1123  if (!gen_tls_tga)
1124    {
1125      gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1126     }
1127  return gen_tls_tga;
1128}
1129
1130static GTY(()) rtx thread_pointer_rtx;
1131static rtx
1132gen_thread_pointer ()
1133{
1134  if (!thread_pointer_rtx)
1135    {
1136      thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1137      RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1138    }
1139  return thread_pointer_rtx;
1140}
1141
1142rtx
1143ia64_expand_move (op0, op1)
1144     rtx op0, op1;
1145{
1146  enum machine_mode mode = GET_MODE (op0);
1147
1148  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1149    op1 = force_reg (mode, op1);
1150
1151  if (mode == Pmode || mode == ptr_mode)
1152    {
1153      enum tls_model tls_kind;
1154      if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1155	{
1156	  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1157
1158	  switch (tls_kind)
1159	    {
1160	    case TLS_MODEL_GLOBAL_DYNAMIC:
1161	      start_sequence ();
1162
1163	      tga_op1 = gen_reg_rtx (Pmode);
1164	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1165	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1166	      RTX_UNCHANGING_P (tga_op1) = 1;
1167
1168	      tga_op2 = gen_reg_rtx (Pmode);
1169	      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1170	      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1171	      RTX_UNCHANGING_P (tga_op2) = 1;
1172
1173	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1174						 LCT_CONST, Pmode, 2, tga_op1,
1175						 Pmode, tga_op2, Pmode);
1176
1177	      insns = get_insns ();
1178	      end_sequence ();
1179
1180	      emit_libcall_block (insns, op0, tga_ret, op1);
1181	      return NULL_RTX;
1182
1183	    case TLS_MODEL_LOCAL_DYNAMIC:
1184	      /* ??? This isn't the completely proper way to do local-dynamic
1185		 If the call to __tls_get_addr is used only by a single symbol,
1186		 then we should (somehow) move the dtprel to the second arg
1187		 to avoid the extra add.  */
1188	      start_sequence ();
1189
1190	      tga_op1 = gen_reg_rtx (Pmode);
1191	      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1192	      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1193	      RTX_UNCHANGING_P (tga_op1) = 1;
1194
1195	      tga_op2 = const0_rtx;
1196
1197	      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1198						 LCT_CONST, Pmode, 2, tga_op1,
1199						 Pmode, tga_op2, Pmode);
1200
1201	      insns = get_insns ();
1202	      end_sequence ();
1203
1204	      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1205					UNSPEC_LD_BASE);
1206	      tmp = gen_reg_rtx (Pmode);
1207	      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1208
1209	      if (register_operand (op0, Pmode))
1210		tga_ret = op0;
1211	      else
1212		tga_ret = gen_reg_rtx (Pmode);
1213	      if (TARGET_TLS64)
1214		{
1215		  emit_insn (gen_load_dtprel (tga_ret, op1));
1216		  emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret));
1217		}
1218	      else
1219		emit_insn (gen_add_dtprel (tga_ret, tmp, op1));
1220	      if (tga_ret == op0)
1221		return NULL_RTX;
1222	      op1 = tga_ret;
1223	      break;
1224
1225	    case TLS_MODEL_INITIAL_EXEC:
1226	      tmp = gen_reg_rtx (Pmode);
1227	      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1228	      tmp = gen_rtx_MEM (Pmode, tmp);
1229	      RTX_UNCHANGING_P (tmp) = 1;
1230	      tmp = force_reg (Pmode, tmp);
1231
1232	      if (register_operand (op0, Pmode))
1233		op1 = op0;
1234	      else
1235		op1 = gen_reg_rtx (Pmode);
1236	      emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ()));
1237	      if (op1 == op0)
1238		return NULL_RTX;
1239	      break;
1240
1241	    case TLS_MODEL_LOCAL_EXEC:
1242	      if (register_operand (op0, Pmode))
1243		tmp = op0;
1244	      else
1245		tmp = gen_reg_rtx (Pmode);
1246	      if (TARGET_TLS64)
1247		{
1248		  emit_insn (gen_load_tprel (tmp, op1));
1249		  emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp));
1250		}
1251	      else
1252		emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1));
1253	      if (tmp == op0)
1254		return NULL_RTX;
1255	      op1 = tmp;
1256	      break;
1257
1258	    default:
1259	      abort ();
1260	    }
1261	}
1262      else if (!TARGET_NO_PIC &&
1263	       (symbolic_operand (op1, Pmode) ||
1264		symbolic_operand (op1, ptr_mode)))
1265	{
1266	  /* Before optimization starts, delay committing to any particular
1267	     type of PIC address load.  If this function gets deferred, we
1268	     may acquire information that changes the value of the
1269	     sdata_symbolic_operand predicate.
1270
1271	     But don't delay for function pointers.  Loading a function address
1272	     actually loads the address of the descriptor not the function.
1273	     If we represent these as SYMBOL_REFs, then they get cse'd with
1274	     calls, and we end up with calls to the descriptor address instead
1275	     of calls to the function address.  Functions are not candidates
1276	     for sdata anyways.
1277
1278	     Don't delay for LABEL_REF because the splitter loses REG_LABEL
1279	     notes.  Don't delay for pool addresses on general principals;
1280	     they'll never become non-local behind our back.  */
1281
1282	  if (rtx_equal_function_value_matters
1283	      && GET_CODE (op1) != LABEL_REF
1284	      && ! (GET_CODE (op1) == SYMBOL_REF
1285		    && (SYMBOL_REF_FLAG (op1)
1286			|| CONSTANT_POOL_ADDRESS_P (op1)
1287			|| STRING_POOL_ADDRESS_P (op1))))
1288	    if (GET_MODE (op1) == DImode)
1289	      emit_insn (gen_movdi_symbolic (op0, op1));
1290	    else
1291	      emit_insn (gen_movsi_symbolic (op0, op1));
1292	  else
1293	    ia64_expand_load_address (op0, op1, NULL_RTX);
1294	  return NULL_RTX;
1295	}
1296    }
1297
1298  return op1;
1299}
1300
1301/* Split a post-reload TImode reference into two DImode components.  */
1302
1303rtx
1304ia64_split_timode (out, in, scratch)
1305     rtx out[2];
1306     rtx in, scratch;
1307{
1308  switch (GET_CODE (in))
1309    {
1310    case REG:
1311      out[0] = gen_rtx_REG (DImode, REGNO (in));
1312      out[1] = gen_rtx_REG (DImode, REGNO (in) + 1);
1313      return NULL_RTX;
1314
1315    case MEM:
1316      {
1317	rtx base = XEXP (in, 0);
1318
1319	switch (GET_CODE (base))
1320	  {
1321	  case REG:
1322	    out[0] = adjust_address (in, DImode, 0);
1323	    break;
1324	  case POST_MODIFY:
1325	    base = XEXP (base, 0);
1326	    out[0] = adjust_address (in, DImode, 0);
1327	    break;
1328
1329	  /* Since we're changing the mode, we need to change to POST_MODIFY
1330	     as well to preserve the size of the increment.  Either that or
1331	     do the update in two steps, but we've already got this scratch
1332	     register handy so let's use it.  */
1333	  case POST_INC:
1334	    base = XEXP (base, 0);
1335	    out[0]
1336	      = change_address (in, DImode,
1337				gen_rtx_POST_MODIFY
1338				(Pmode, base, plus_constant (base, 16)));
1339	    break;
1340	  case POST_DEC:
1341	    base = XEXP (base, 0);
1342	    out[0]
1343	      = change_address (in, DImode,
1344				gen_rtx_POST_MODIFY
1345				(Pmode, base, plus_constant (base, -16)));
1346	    break;
1347	  default:
1348	    abort ();
1349	  }
1350
1351	if (scratch == NULL_RTX)
1352	  abort ();
1353	out[1] = change_address (in, DImode, scratch);
1354	return gen_adddi3 (scratch, base, GEN_INT (8));
1355      }
1356
1357    case CONST_INT:
1358    case CONST_DOUBLE:
1359      split_double (in, &out[0], &out[1]);
1360      return NULL_RTX;
1361
1362    default:
1363      abort ();
1364    }
1365}
1366
1367/* ??? Fixing GR->FR TFmode moves during reload is hard.  You need to go
1368   through memory plus an extra GR scratch register.  Except that you can
1369   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1370   SECONDARY_RELOAD_CLASS, but not both.
1371
1372   We got into problems in the first place by allowing a construct like
1373   (subreg:TF (reg:TI)), which we got from a union containing a long double.
1374   This solution attempts to prevent this situation from occurring.  When
1375   we see something like the above, we spill the inner register to memory.  */
1376
1377rtx
1378spill_tfmode_operand (in, force)
1379     rtx in;
1380     int force;
1381{
1382  if (GET_CODE (in) == SUBREG
1383      && GET_MODE (SUBREG_REG (in)) == TImode
1384      && GET_CODE (SUBREG_REG (in)) == REG)
1385    {
1386      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
1387      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1388    }
1389  else if (force && GET_CODE (in) == REG)
1390    {
1391      rtx mem = gen_mem_addressof (in, NULL_TREE, true);
1392      return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
1393    }
1394  else if (GET_CODE (in) == MEM
1395	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1396    return change_address (in, TFmode, copy_to_reg (XEXP (in, 0)));
1397  else
1398    return in;
1399}
1400
1401/* Emit comparison instruction if necessary, returning the expression
1402   that holds the compare result in the proper mode.  */
1403
1404rtx
1405ia64_expand_compare (code, mode)
1406     enum rtx_code code;
1407     enum machine_mode mode;
1408{
1409  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1410  rtx cmp;
1411
1412  /* If we have a BImode input, then we already have a compare result, and
1413     do not need to emit another comparison.  */
1414  if (GET_MODE (op0) == BImode)
1415    {
1416      if ((code == NE || code == EQ) && op1 == const0_rtx)
1417	cmp = op0;
1418      else
1419	abort ();
1420    }
1421  else
1422    {
1423      cmp = gen_reg_rtx (BImode);
1424      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1425			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1426      code = NE;
1427    }
1428
1429  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1430}
1431
1432/* Emit the appropriate sequence for a call.  */
1433void
1434ia64_expand_call (retval, addr, nextarg, sibcall_p)
1435     rtx retval;
1436     rtx addr;
1437     rtx nextarg ATTRIBUTE_UNUSED;
1438     int sibcall_p;
1439{
1440  rtx insn, b0;
1441
1442  addr = XEXP (addr, 0);
1443  b0 = gen_rtx_REG (DImode, R_BR (0));
1444
1445  /* ??? Should do this for functions known to bind local too.  */
1446  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1447    {
1448      if (sibcall_p)
1449	insn = gen_sibcall_nogp (addr);
1450      else if (! retval)
1451	insn = gen_call_nogp (addr, b0);
1452      else
1453	insn = gen_call_value_nogp (retval, addr, b0);
1454      insn = emit_call_insn (insn);
1455    }
1456  else
1457    {
1458      if (sibcall_p)
1459	insn = gen_sibcall_gp (addr);
1460      else if (! retval)
1461	insn = gen_call_gp (addr, b0);
1462      else
1463	insn = gen_call_value_gp (retval, addr, b0);
1464      insn = emit_call_insn (insn);
1465
1466      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1467    }
1468
1469  if (sibcall_p)
1470    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1471}
1472void
1473ia64_reload_gp ()
1474{
1475  rtx tmp;
1476
1477  if (current_frame_info.reg_save_gp)
1478    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1479  else
1480    {
1481      HOST_WIDE_INT offset;
1482
1483      offset = (current_frame_info.spill_cfa_off
1484	        + current_frame_info.spill_size);
1485      if (frame_pointer_needed)
1486        {
1487          tmp = hard_frame_pointer_rtx;
1488          offset = -offset;
1489        }
1490      else
1491        {
1492          tmp = stack_pointer_rtx;
1493          offset = current_frame_info.total_size - offset;
1494        }
1495
1496      if (CONST_OK_FOR_I (offset))
1497        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1498			       tmp, GEN_INT (offset)));
1499      else
1500        {
1501          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1502          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1503			         pic_offset_table_rtx, tmp));
1504        }
1505
1506      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1507    }
1508
1509  emit_move_insn (pic_offset_table_rtx, tmp);
1510}
1511
1512void
1513ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
1514		 noreturn_p, sibcall_p)
1515     rtx retval, addr, retaddr, scratch_r, scratch_b;
1516     int noreturn_p, sibcall_p;
1517{
1518  rtx insn;
1519  bool is_desc = false;
1520
1521  /* If we find we're calling through a register, then we're actually
1522     calling through a descriptor, so load up the values.  */
1523  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1524    {
1525      rtx tmp;
1526      bool addr_dead_p;
1527
1528      /* ??? We are currently constrained to *not* use peep2, because
1529	 we can legitimiately change the global lifetime of the GP
1530	 (in the form of killing where previously live).  This is
1531	 because a call through a descriptor doesn't use the previous
1532	 value of the GP, while a direct call does, and we do not
1533	 commit to either form until the split here.
1534
1535	 That said, this means that we lack precise life info for
1536	 whether ADDR is dead after this call.  This is not terribly
1537	 important, since we can fix things up essentially for free
1538	 with the POST_DEC below, but it's nice to not use it when we
1539	 can immediately tell it's not necessary.  */
1540      addr_dead_p = ((noreturn_p || sibcall_p
1541		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1542					    REGNO (addr)))
1543		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1544
1545      /* Load the code address into scratch_b.  */
1546      tmp = gen_rtx_POST_INC (Pmode, addr);
1547      tmp = gen_rtx_MEM (Pmode, tmp);
1548      emit_move_insn (scratch_r, tmp);
1549      emit_move_insn (scratch_b, scratch_r);
1550
1551      /* Load the GP address.  If ADDR is not dead here, then we must
1552	 revert the change made above via the POST_INCREMENT.  */
1553      if (!addr_dead_p)
1554	tmp = gen_rtx_POST_DEC (Pmode, addr);
1555      else
1556	tmp = addr;
1557      tmp = gen_rtx_MEM (Pmode, tmp);
1558      emit_move_insn (pic_offset_table_rtx, tmp);
1559
1560      is_desc = true;
1561      addr = scratch_b;
1562    }
1563
1564  if (sibcall_p)
1565    insn = gen_sibcall_nogp (addr);
1566  else if (retval)
1567    insn = gen_call_value_nogp (retval, addr, retaddr);
1568  else
1569    insn = gen_call_nogp (addr, retaddr);
1570  emit_call_insn (insn);
1571
1572  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1573    ia64_reload_gp ();
1574}
1575
1576/* Begin the assembly file.  */
1577
1578void
1579emit_safe_across_calls (f)
1580     FILE *f;
1581{
1582  unsigned int rs, re;
1583  int out_state;
1584
1585  rs = 1;
1586  out_state = 0;
1587  while (1)
1588    {
1589      while (rs < 64 && call_used_regs[PR_REG (rs)])
1590	rs++;
1591      if (rs >= 64)
1592	break;
1593      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1594	continue;
1595      if (out_state == 0)
1596	{
1597	  fputs ("\t.pred.safe_across_calls ", f);
1598	  out_state = 1;
1599	}
1600      else
1601	fputc (',', f);
1602      if (re == rs + 1)
1603	fprintf (f, "p%u", rs);
1604      else
1605	fprintf (f, "p%u-p%u", rs, re - 1);
1606      rs = re + 1;
1607    }
1608  if (out_state)
1609    fputc ('\n', f);
1610}
1611
1612/* Helper function for ia64_compute_frame_size: find an appropriate general
1613   register to spill some special register to.  SPECIAL_SPILL_MASK contains
1614   bits in GR0 to GR31 that have already been allocated by this routine.
1615   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1616
1617static int
1618find_gr_spill (try_locals)
1619     int try_locals;
1620{
1621  int regno;
1622
1623  /* If this is a leaf function, first try an otherwise unused
1624     call-clobbered register.  */
1625  if (current_function_is_leaf)
1626    {
1627      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1628	if (! regs_ever_live[regno]
1629	    && call_used_regs[regno]
1630	    && ! fixed_regs[regno]
1631	    && ! global_regs[regno]
1632	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1633	  {
1634	    current_frame_info.gr_used_mask |= 1 << regno;
1635	    return regno;
1636	  }
1637    }
1638
1639  if (try_locals)
1640    {
1641      regno = current_frame_info.n_local_regs;
1642      /* If there is a frame pointer, then we can't use loc79, because
1643	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1644	 reg_name switching code in ia64_expand_prologue.  */
1645      if (regno < (80 - frame_pointer_needed))
1646	{
1647	  current_frame_info.n_local_regs = regno + 1;
1648	  return LOC_REG (0) + regno;
1649	}
1650    }
1651
1652  /* Failed to find a general register to spill to.  Must use stack.  */
1653  return 0;
1654}
1655
1656/* In order to make for nice schedules, we try to allocate every temporary
1657   to a different register.  We must of course stay away from call-saved,
1658   fixed, and global registers.  We must also stay away from registers
1659   allocated in current_frame_info.gr_used_mask, since those include regs
1660   used all through the prologue.
1661
1662   Any register allocated here must be used immediately.  The idea is to
1663   aid scheduling, not to solve data flow problems.  */
1664
1665static int last_scratch_gr_reg;
1666
1667static int
1668next_scratch_gr_reg ()
1669{
1670  int i, regno;
1671
1672  for (i = 0; i < 32; ++i)
1673    {
1674      regno = (last_scratch_gr_reg + i + 1) & 31;
1675      if (call_used_regs[regno]
1676	  && ! fixed_regs[regno]
1677	  && ! global_regs[regno]
1678	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1679	{
1680	  last_scratch_gr_reg = regno;
1681	  return regno;
1682	}
1683    }
1684
1685  /* There must be _something_ available.  */
1686  abort ();
1687}
1688
1689/* Helper function for ia64_compute_frame_size, called through
1690   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
1691
1692static void
1693mark_reg_gr_used_mask (reg, data)
1694     rtx reg;
1695     void *data ATTRIBUTE_UNUSED;
1696{
1697  unsigned int regno = REGNO (reg);
1698  if (regno < 32)
1699    {
1700      unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1701      for (i = 0; i < n; ++i)
1702	current_frame_info.gr_used_mask |= 1 << (regno + i);
1703    }
1704}
1705
1706/* Returns the number of bytes offset between the frame pointer and the stack
1707   pointer for the current function.  SIZE is the number of bytes of space
1708   needed for local variables.  */
1709
1710static void
1711ia64_compute_frame_size (size)
1712     HOST_WIDE_INT size;
1713{
1714  HOST_WIDE_INT total_size;
1715  HOST_WIDE_INT spill_size = 0;
1716  HOST_WIDE_INT extra_spill_size = 0;
1717  HOST_WIDE_INT pretend_args_size;
1718  HARD_REG_SET mask;
1719  int n_spilled = 0;
1720  int spilled_gr_p = 0;
1721  int spilled_fr_p = 0;
1722  unsigned int regno;
1723  int i;
1724
1725  if (current_frame_info.initialized)
1726    return;
1727
1728  memset (&current_frame_info, 0, sizeof current_frame_info);
1729  CLEAR_HARD_REG_SET (mask);
1730
1731  /* Don't allocate scratches to the return register.  */
1732  diddle_return_value (mark_reg_gr_used_mask, NULL);
1733
1734  /* Don't allocate scratches to the EH scratch registers.  */
1735  if (cfun->machine->ia64_eh_epilogue_sp)
1736    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
1737  if (cfun->machine->ia64_eh_epilogue_bsp)
1738    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
1739
1740  /* Find the size of the register stack frame.  We have only 80 local
1741     registers, because we reserve 8 for the inputs and 8 for the
1742     outputs.  */
1743
1744  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
1745     since we'll be adjusting that down later.  */
1746  regno = LOC_REG (78) + ! frame_pointer_needed;
1747  for (; regno >= LOC_REG (0); regno--)
1748    if (regs_ever_live[regno])
1749      break;
1750  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
1751
1752  /* For functions marked with the syscall_linkage attribute, we must mark
1753     all eight input registers as in use, so that locals aren't visible to
1754     the caller.  */
1755
1756  if (cfun->machine->n_varargs > 0
1757      || lookup_attribute ("syscall_linkage",
1758			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
1759    current_frame_info.n_input_regs = 8;
1760  else
1761    {
1762      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
1763	if (regs_ever_live[regno])
1764	  break;
1765      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
1766    }
1767
1768  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
1769    if (regs_ever_live[regno])
1770      break;
1771  i = regno - OUT_REG (0) + 1;
1772
1773  /* When -p profiling, we need one output register for the mcount argument.
1774     Likwise for -a profiling for the bb_init_func argument.  For -ax
1775     profiling, we need two output registers for the two bb_init_trace_func
1776     arguments.  */
1777  if (current_function_profile)
1778    i = MAX (i, 1);
1779  current_frame_info.n_output_regs = i;
1780
1781  /* ??? No rotating register support yet.  */
1782  current_frame_info.n_rotate_regs = 0;
1783
1784  /* Discover which registers need spilling, and how much room that
1785     will take.  Begin with floating point and general registers,
1786     which will always wind up on the stack.  */
1787
1788  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
1789    if (regs_ever_live[regno] && ! call_used_regs[regno])
1790      {
1791	SET_HARD_REG_BIT (mask, regno);
1792	spill_size += 16;
1793	n_spilled += 1;
1794	spilled_fr_p = 1;
1795      }
1796
1797  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1798    if (regs_ever_live[regno] && ! call_used_regs[regno])
1799      {
1800	SET_HARD_REG_BIT (mask, regno);
1801	spill_size += 8;
1802	n_spilled += 1;
1803	spilled_gr_p = 1;
1804      }
1805
1806  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
1807    if (regs_ever_live[regno] && ! call_used_regs[regno])
1808      {
1809	SET_HARD_REG_BIT (mask, regno);
1810	spill_size += 8;
1811	n_spilled += 1;
1812      }
1813
1814  /* Now come all special registers that might get saved in other
1815     general registers.  */
1816
1817  if (frame_pointer_needed)
1818    {
1819      current_frame_info.reg_fp = find_gr_spill (1);
1820      /* If we did not get a register, then we take LOC79.  This is guaranteed
1821	 to be free, even if regs_ever_live is already set, because this is
1822	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
1823	 as we don't count loc79 above.  */
1824      if (current_frame_info.reg_fp == 0)
1825	{
1826	  current_frame_info.reg_fp = LOC_REG (79);
1827	  current_frame_info.n_local_regs++;
1828	}
1829    }
1830
1831  if (! current_function_is_leaf)
1832    {
1833      /* Emit a save of BR0 if we call other functions.  Do this even
1834	 if this function doesn't return, as EH depends on this to be
1835	 able to unwind the stack.  */
1836      SET_HARD_REG_BIT (mask, BR_REG (0));
1837
1838      current_frame_info.reg_save_b0 = find_gr_spill (1);
1839      if (current_frame_info.reg_save_b0 == 0)
1840	{
1841	  spill_size += 8;
1842	  n_spilled += 1;
1843	}
1844
1845      /* Similarly for ar.pfs.  */
1846      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1847      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1848      if (current_frame_info.reg_save_ar_pfs == 0)
1849	{
1850	  extra_spill_size += 8;
1851	  n_spilled += 1;
1852	}
1853
1854      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
1855	 registers are clobbered, so we fall back to the stack.  */
1856      current_frame_info.reg_save_gp
1857	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
1858      if (current_frame_info.reg_save_gp == 0)
1859	{
1860	  SET_HARD_REG_BIT (mask, GR_REG (1));
1861	  spill_size += 8;
1862	  n_spilled += 1;
1863	}
1864    }
1865  else
1866    {
1867      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
1868	{
1869	  SET_HARD_REG_BIT (mask, BR_REG (0));
1870	  spill_size += 8;
1871	  n_spilled += 1;
1872	}
1873
1874      if (regs_ever_live[AR_PFS_REGNUM])
1875	{
1876	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
1877	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
1878	  if (current_frame_info.reg_save_ar_pfs == 0)
1879	    {
1880	      extra_spill_size += 8;
1881	      n_spilled += 1;
1882	    }
1883	}
1884    }
1885
1886  /* Unwind descriptor hackery: things are most efficient if we allocate
1887     consecutive GR save registers for RP, PFS, FP in that order. However,
1888     it is absolutely critical that FP get the only hard register that's
1889     guaranteed to be free, so we allocated it first.  If all three did
1890     happen to be allocated hard regs, and are consecutive, rearrange them
1891     into the preferred order now.  */
1892  if (current_frame_info.reg_fp != 0
1893      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
1894      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
1895    {
1896      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
1897      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
1898      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
1899    }
1900
1901  /* See if we need to store the predicate register block.  */
1902  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1903    if (regs_ever_live[regno] && ! call_used_regs[regno])
1904      break;
1905  if (regno <= PR_REG (63))
1906    {
1907      SET_HARD_REG_BIT (mask, PR_REG (0));
1908      current_frame_info.reg_save_pr = find_gr_spill (1);
1909      if (current_frame_info.reg_save_pr == 0)
1910	{
1911	  extra_spill_size += 8;
1912	  n_spilled += 1;
1913	}
1914
1915      /* ??? Mark them all as used so that register renaming and such
1916	 are free to use them.  */
1917      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
1918	regs_ever_live[regno] = 1;
1919    }
1920
1921  /* If we're forced to use st8.spill, we're forced to save and restore
1922     ar.unat as well.  The check for existing liveness allows inline asm
1923     to touch ar.unat.  */
1924  if (spilled_gr_p || cfun->machine->n_varargs
1925      || regs_ever_live[AR_UNAT_REGNUM])
1926    {
1927      regs_ever_live[AR_UNAT_REGNUM] = 1;
1928      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
1929      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
1930      if (current_frame_info.reg_save_ar_unat == 0)
1931	{
1932	  extra_spill_size += 8;
1933	  n_spilled += 1;
1934	}
1935    }
1936
1937  if (regs_ever_live[AR_LC_REGNUM])
1938    {
1939      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
1940      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
1941      if (current_frame_info.reg_save_ar_lc == 0)
1942	{
1943	  extra_spill_size += 8;
1944	  n_spilled += 1;
1945	}
1946    }
1947
1948  /* If we have an odd number of words of pretend arguments written to
1949     the stack, then the FR save area will be unaligned.  We round the
1950     size of this area up to keep things 16 byte aligned.  */
1951  if (spilled_fr_p)
1952    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
1953  else
1954    pretend_args_size = current_function_pretend_args_size;
1955
1956  total_size = (spill_size + extra_spill_size + size + pretend_args_size
1957		+ current_function_outgoing_args_size);
1958  total_size = IA64_STACK_ALIGN (total_size);
1959
1960  /* We always use the 16-byte scratch area provided by the caller, but
1961     if we are a leaf function, there's no one to which we need to provide
1962     a scratch area.  */
1963  if (current_function_is_leaf)
1964    total_size = MAX (0, total_size - 16);
1965
1966  current_frame_info.total_size = total_size;
1967  current_frame_info.spill_cfa_off = pretend_args_size - 16;
1968  current_frame_info.spill_size = spill_size;
1969  current_frame_info.extra_spill_size = extra_spill_size;
1970  COPY_HARD_REG_SET (current_frame_info.mask, mask);
1971  current_frame_info.n_spilled = n_spilled;
1972  current_frame_info.initialized = reload_completed;
1973}
1974
1975/* Compute the initial difference between the specified pair of registers.  */
1976
1977HOST_WIDE_INT
1978ia64_initial_elimination_offset (from, to)
1979     int from, to;
1980{
1981  HOST_WIDE_INT offset;
1982
1983  ia64_compute_frame_size (get_frame_size ());
1984  switch (from)
1985    {
1986    case FRAME_POINTER_REGNUM:
1987      if (to == HARD_FRAME_POINTER_REGNUM)
1988	{
1989	  if (current_function_is_leaf)
1990	    offset = -current_frame_info.total_size;
1991	  else
1992	    offset = -(current_frame_info.total_size
1993		       - current_function_outgoing_args_size - 16);
1994	}
1995      else if (to == STACK_POINTER_REGNUM)
1996	{
1997	  if (current_function_is_leaf)
1998	    offset = 0;
1999	  else
2000	    offset = 16 + current_function_outgoing_args_size;
2001	}
2002      else
2003	abort ();
2004      break;
2005
2006    case ARG_POINTER_REGNUM:
2007      /* Arguments start above the 16 byte save area, unless stdarg
2008	 in which case we store through the 16 byte save area.  */
2009      if (to == HARD_FRAME_POINTER_REGNUM)
2010	offset = 16 - current_function_pretend_args_size;
2011      else if (to == STACK_POINTER_REGNUM)
2012	offset = (current_frame_info.total_size
2013		  + 16 - current_function_pretend_args_size);
2014      else
2015	abort ();
2016      break;
2017
2018    case RETURN_ADDRESS_POINTER_REGNUM:
2019      offset = 0;
2020      break;
2021
2022    default:
2023      abort ();
2024    }
2025
2026  return offset;
2027}
2028
2029/* If there are more than a trivial number of register spills, we use
2030   two interleaved iterators so that we can get two memory references
2031   per insn group.
2032
2033   In order to simplify things in the prologue and epilogue expanders,
2034   we use helper functions to fix up the memory references after the
2035   fact with the appropriate offsets to a POST_MODIFY memory mode.
2036   The following data structure tracks the state of the two iterators
2037   while insns are being emitted.  */
2038
2039struct spill_fill_data
2040{
2041  rtx init_after;		/* point at which to emit initializations */
2042  rtx init_reg[2];		/* initial base register */
2043  rtx iter_reg[2];		/* the iterator registers */
2044  rtx *prev_addr[2];		/* address of last memory use */
2045  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2046  HOST_WIDE_INT prev_off[2];	/* last offset */
2047  int n_iter;			/* number of iterators in use */
2048  int next_iter;		/* next iterator to use */
2049  unsigned int save_gr_used_mask;
2050};
2051
2052static struct spill_fill_data spill_fill_data;
2053
2054static void
2055setup_spill_pointers (n_spills, init_reg, cfa_off)
2056     int n_spills;
2057     rtx init_reg;
2058     HOST_WIDE_INT cfa_off;
2059{
2060  int i;
2061
2062  spill_fill_data.init_after = get_last_insn ();
2063  spill_fill_data.init_reg[0] = init_reg;
2064  spill_fill_data.init_reg[1] = init_reg;
2065  spill_fill_data.prev_addr[0] = NULL;
2066  spill_fill_data.prev_addr[1] = NULL;
2067  spill_fill_data.prev_insn[0] = NULL;
2068  spill_fill_data.prev_insn[1] = NULL;
2069  spill_fill_data.prev_off[0] = cfa_off;
2070  spill_fill_data.prev_off[1] = cfa_off;
2071  spill_fill_data.next_iter = 0;
2072  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2073
2074  spill_fill_data.n_iter = 1 + (n_spills > 2);
2075  for (i = 0; i < spill_fill_data.n_iter; ++i)
2076    {
2077      int regno = next_scratch_gr_reg ();
2078      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2079      current_frame_info.gr_used_mask |= 1 << regno;
2080    }
2081}
2082
2083static void
2084finish_spill_pointers ()
2085{
2086  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2087}
2088
2089static rtx
2090spill_restore_mem (reg, cfa_off)
2091     rtx reg;
2092     HOST_WIDE_INT cfa_off;
2093{
2094  int iter = spill_fill_data.next_iter;
2095  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2096  rtx disp_rtx = GEN_INT (disp);
2097  rtx mem;
2098
2099  if (spill_fill_data.prev_addr[iter])
2100    {
2101      if (CONST_OK_FOR_N (disp))
2102	{
2103	  *spill_fill_data.prev_addr[iter]
2104	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2105				   gen_rtx_PLUS (DImode,
2106						 spill_fill_data.iter_reg[iter],
2107						 disp_rtx));
2108	  REG_NOTES (spill_fill_data.prev_insn[iter])
2109	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2110				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2111	}
2112      else
2113	{
2114	  /* ??? Could use register post_modify for loads.  */
2115	  if (! CONST_OK_FOR_I (disp))
2116	    {
2117	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2118	      emit_move_insn (tmp, disp_rtx);
2119	      disp_rtx = tmp;
2120	    }
2121	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2122				 spill_fill_data.iter_reg[iter], disp_rtx));
2123	}
2124    }
2125  /* Micro-optimization: if we've created a frame pointer, it's at
2126     CFA 0, which may allow the real iterator to be initialized lower,
2127     slightly increasing parallelism.  Also, if there are few saves
2128     it may eliminate the iterator entirely.  */
2129  else if (disp == 0
2130	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2131	   && frame_pointer_needed)
2132    {
2133      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2134      set_mem_alias_set (mem, get_varargs_alias_set ());
2135      return mem;
2136    }
2137  else
2138    {
2139      rtx seq, insn;
2140
2141      if (disp == 0)
2142	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2143			 spill_fill_data.init_reg[iter]);
2144      else
2145	{
2146	  start_sequence ();
2147
2148	  if (! CONST_OK_FOR_I (disp))
2149	    {
2150	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2151	      emit_move_insn (tmp, disp_rtx);
2152	      disp_rtx = tmp;
2153	    }
2154
2155	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2156				 spill_fill_data.init_reg[iter],
2157				 disp_rtx));
2158
2159	  seq = get_insns ();
2160	  end_sequence ();
2161	}
2162
2163      /* Careful for being the first insn in a sequence.  */
2164      if (spill_fill_data.init_after)
2165	insn = emit_insn_after (seq, spill_fill_data.init_after);
2166      else
2167	{
2168	  rtx first = get_insns ();
2169	  if (first)
2170	    insn = emit_insn_before (seq, first);
2171	  else
2172	    insn = emit_insn (seq);
2173	}
2174      spill_fill_data.init_after = insn;
2175
2176      /* If DISP is 0, we may or may not have a further adjustment
2177	 afterward.  If we do, then the load/store insn may be modified
2178	 to be a post-modify.  If we don't, then this copy may be
2179	 eliminated by copyprop_hardreg_forward, which makes this
2180	 insn garbage, which runs afoul of the sanity check in
2181	 propagate_one_insn.  So mark this insn as legal to delete.  */
2182      if (disp == 0)
2183	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2184					     REG_NOTES (insn));
2185    }
2186
2187  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2188
2189  /* ??? Not all of the spills are for varargs, but some of them are.
2190     The rest of the spills belong in an alias set of their own.  But
2191     it doesn't actually hurt to include them here.  */
2192  set_mem_alias_set (mem, get_varargs_alias_set ());
2193
2194  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2195  spill_fill_data.prev_off[iter] = cfa_off;
2196
2197  if (++iter >= spill_fill_data.n_iter)
2198    iter = 0;
2199  spill_fill_data.next_iter = iter;
2200
2201  return mem;
2202}
2203
2204static void
2205do_spill (move_fn, reg, cfa_off, frame_reg)
2206     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2207     rtx reg, frame_reg;
2208     HOST_WIDE_INT cfa_off;
2209{
2210  int iter = spill_fill_data.next_iter;
2211  rtx mem, insn;
2212
2213  mem = spill_restore_mem (reg, cfa_off);
2214  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2215  spill_fill_data.prev_insn[iter] = insn;
2216
2217  if (frame_reg)
2218    {
2219      rtx base;
2220      HOST_WIDE_INT off;
2221
2222      RTX_FRAME_RELATED_P (insn) = 1;
2223
2224      /* Don't even pretend that the unwind code can intuit its way
2225	 through a pair of interleaved post_modify iterators.  Just
2226	 provide the correct answer.  */
2227
2228      if (frame_pointer_needed)
2229	{
2230	  base = hard_frame_pointer_rtx;
2231	  off = - cfa_off;
2232	}
2233      else
2234	{
2235	  base = stack_pointer_rtx;
2236	  off = current_frame_info.total_size - cfa_off;
2237	}
2238
2239      REG_NOTES (insn)
2240	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2241		gen_rtx_SET (VOIDmode,
2242			     gen_rtx_MEM (GET_MODE (reg),
2243					  plus_constant (base, off)),
2244			     frame_reg),
2245		REG_NOTES (insn));
2246    }
2247}
2248
2249static void
2250do_restore (move_fn, reg, cfa_off)
2251     rtx (*move_fn) PARAMS ((rtx, rtx, rtx));
2252     rtx reg;
2253     HOST_WIDE_INT cfa_off;
2254{
2255  int iter = spill_fill_data.next_iter;
2256  rtx insn;
2257
2258  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2259				GEN_INT (cfa_off)));
2260  spill_fill_data.prev_insn[iter] = insn;
2261}
2262
2263/* Wrapper functions that discards the CONST_INT spill offset.  These
2264   exist so that we can give gr_spill/gr_fill the offset they need and
2265   use a consistant function interface.  */
2266
2267static rtx
2268gen_movdi_x (dest, src, offset)
2269     rtx dest, src;
2270     rtx offset ATTRIBUTE_UNUSED;
2271{
2272  return gen_movdi (dest, src);
2273}
2274
2275static rtx
2276gen_fr_spill_x (dest, src, offset)
2277     rtx dest, src;
2278     rtx offset ATTRIBUTE_UNUSED;
2279{
2280  return gen_fr_spill (dest, src);
2281}
2282
2283static rtx
2284gen_fr_restore_x (dest, src, offset)
2285     rtx dest, src;
2286     rtx offset ATTRIBUTE_UNUSED;
2287{
2288  return gen_fr_restore (dest, src);
2289}
2290
2291/* Called after register allocation to add any instructions needed for the
2292   prologue.  Using a prologue insn is favored compared to putting all of the
2293   instructions in output_function_prologue(), since it allows the scheduler
2294   to intermix instructions with the saves of the caller saved registers.  In
2295   some cases, it might be necessary to emit a barrier instruction as the last
2296   insn to prevent such scheduling.
2297
2298   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2299   so that the debug info generation code can handle them properly.
2300
2301   The register save area is layed out like so:
2302   cfa+16
2303	[ varargs spill area ]
2304	[ fr register spill area ]
2305	[ br register spill area ]
2306	[ ar register spill area ]
2307	[ pr register spill area ]
2308	[ gr register spill area ] */
2309
2310/* ??? Get inefficient code when the frame size is larger than can fit in an
2311   adds instruction.  */
2312
2313void
2314ia64_expand_prologue ()
2315{
2316  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2317  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2318  rtx reg, alt_reg;
2319
2320  ia64_compute_frame_size (get_frame_size ());
2321  last_scratch_gr_reg = 15;
2322
2323  /* If there is no epilogue, then we don't need some prologue insns.
2324     We need to avoid emitting the dead prologue insns, because flow
2325     will complain about them.  */
2326  if (optimize)
2327    {
2328      edge e;
2329
2330      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2331	if ((e->flags & EDGE_FAKE) == 0
2332	    && (e->flags & EDGE_FALLTHRU) != 0)
2333	  break;
2334      epilogue_p = (e != NULL);
2335    }
2336  else
2337    epilogue_p = 1;
2338
2339  /* Set the local, input, and output register names.  We need to do this
2340     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2341     half.  If we use in/loc/out register names, then we get assembler errors
2342     in crtn.S because there is no alloc insn or regstk directive in there.  */
2343  if (! TARGET_REG_NAMES)
2344    {
2345      int inputs = current_frame_info.n_input_regs;
2346      int locals = current_frame_info.n_local_regs;
2347      int outputs = current_frame_info.n_output_regs;
2348
2349      for (i = 0; i < inputs; i++)
2350	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2351      for (i = 0; i < locals; i++)
2352	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2353      for (i = 0; i < outputs; i++)
2354	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2355    }
2356
2357  /* Set the frame pointer register name.  The regnum is logically loc79,
2358     but of course we'll not have allocated that many locals.  Rather than
2359     worrying about renumbering the existing rtxs, we adjust the name.  */
2360  /* ??? This code means that we can never use one local register when
2361     there is a frame pointer.  loc79 gets wasted in this case, as it is
2362     renamed to a register that will never be used.  See also the try_locals
2363     code in find_gr_spill.  */
2364  if (current_frame_info.reg_fp)
2365    {
2366      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2367      reg_names[HARD_FRAME_POINTER_REGNUM]
2368	= reg_names[current_frame_info.reg_fp];
2369      reg_names[current_frame_info.reg_fp] = tmp;
2370    }
2371
2372  /* Fix up the return address placeholder.  */
2373  /* ??? We can fail if __builtin_return_address is used, and we didn't
2374     allocate a register in which to save b0.  I can't think of a way to
2375     eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and
2376     then be sure that I got the right one.  Further, reload doesn't seem
2377     to care if an eliminable register isn't used, and "eliminates" it
2378     anyway.  */
2379  if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]
2380      && current_frame_info.reg_save_b0 != 0)
2381    XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0;
2382
2383  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2384  if (current_frame_info.n_local_regs == 0
2385      && current_frame_info.n_output_regs == 0
2386      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2387      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2388    {
2389      /* If there is no alloc, but there are input registers used, then we
2390	 need a .regstk directive.  */
2391      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2392      ar_pfs_save_reg = NULL_RTX;
2393    }
2394  else
2395    {
2396      current_frame_info.need_regstk = 0;
2397
2398      if (current_frame_info.reg_save_ar_pfs)
2399	regno = current_frame_info.reg_save_ar_pfs;
2400      else
2401	regno = next_scratch_gr_reg ();
2402      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2403
2404      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2405				   GEN_INT (current_frame_info.n_input_regs),
2406				   GEN_INT (current_frame_info.n_local_regs),
2407				   GEN_INT (current_frame_info.n_output_regs),
2408				   GEN_INT (current_frame_info.n_rotate_regs)));
2409      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2410    }
2411
2412  /* Set up frame pointer, stack pointer, and spill iterators.  */
2413
2414  n_varargs = cfun->machine->n_varargs;
2415  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2416			stack_pointer_rtx, 0);
2417
2418  if (frame_pointer_needed)
2419    {
2420      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2421      RTX_FRAME_RELATED_P (insn) = 1;
2422    }
2423
2424  if (current_frame_info.total_size != 0)
2425    {
2426      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2427      rtx offset;
2428
2429      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2430	offset = frame_size_rtx;
2431      else
2432	{
2433	  regno = next_scratch_gr_reg ();
2434 	  offset = gen_rtx_REG (DImode, regno);
2435	  emit_move_insn (offset, frame_size_rtx);
2436	}
2437
2438      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2439				    stack_pointer_rtx, offset));
2440
2441      if (! frame_pointer_needed)
2442	{
2443	  RTX_FRAME_RELATED_P (insn) = 1;
2444	  if (GET_CODE (offset) != CONST_INT)
2445	    {
2446	      REG_NOTES (insn)
2447		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2448			gen_rtx_SET (VOIDmode,
2449				     stack_pointer_rtx,
2450				     gen_rtx_PLUS (DImode,
2451						   stack_pointer_rtx,
2452						   frame_size_rtx)),
2453			REG_NOTES (insn));
2454	    }
2455	}
2456
2457      /* ??? At this point we must generate a magic insn that appears to
2458	 modify the stack pointer, the frame pointer, and all spill
2459	 iterators.  This would allow the most scheduling freedom.  For
2460	 now, just hard stop.  */
2461      emit_insn (gen_blockage ());
2462    }
2463
2464  /* Must copy out ar.unat before doing any integer spills.  */
2465  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2466    {
2467      if (current_frame_info.reg_save_ar_unat)
2468	ar_unat_save_reg
2469	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2470      else
2471	{
2472	  alt_regno = next_scratch_gr_reg ();
2473	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2474	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2475	}
2476
2477      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2478      insn = emit_move_insn (ar_unat_save_reg, reg);
2479      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2480
2481      /* Even if we're not going to generate an epilogue, we still
2482	 need to save the register so that EH works.  */
2483      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2484	emit_insn (gen_prologue_use (ar_unat_save_reg));
2485    }
2486  else
2487    ar_unat_save_reg = NULL_RTX;
2488
2489  /* Spill all varargs registers.  Do this before spilling any GR registers,
2490     since we want the UNAT bits for the GR registers to override the UNAT
2491     bits from varargs, which we don't care about.  */
2492
2493  cfa_off = -16;
2494  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2495    {
2496      reg = gen_rtx_REG (DImode, regno);
2497      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2498    }
2499
2500  /* Locate the bottom of the register save area.  */
2501  cfa_off = (current_frame_info.spill_cfa_off
2502	     + current_frame_info.spill_size
2503	     + current_frame_info.extra_spill_size);
2504
2505  /* Save the predicate register block either in a register or in memory.  */
2506  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2507    {
2508      reg = gen_rtx_REG (DImode, PR_REG (0));
2509      if (current_frame_info.reg_save_pr != 0)
2510	{
2511	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2512	  insn = emit_move_insn (alt_reg, reg);
2513
2514	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2515	     64 hard registers.  */
2516	  RTX_FRAME_RELATED_P (insn) = 1;
2517	  REG_NOTES (insn)
2518	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2519			gen_rtx_SET (VOIDmode, alt_reg, reg),
2520			REG_NOTES (insn));
2521
2522	  /* Even if we're not going to generate an epilogue, we still
2523	     need to save the register so that EH works.  */
2524	  if (! epilogue_p)
2525	    emit_insn (gen_prologue_use (alt_reg));
2526	}
2527      else
2528	{
2529	  alt_regno = next_scratch_gr_reg ();
2530	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2531	  insn = emit_move_insn (alt_reg, reg);
2532	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2533	  cfa_off -= 8;
2534	}
2535    }
2536
2537  /* Handle AR regs in numerical order.  All of them get special handling.  */
2538  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2539      && current_frame_info.reg_save_ar_unat == 0)
2540    {
2541      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2542      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2543      cfa_off -= 8;
2544    }
2545
2546  /* The alloc insn already copied ar.pfs into a general register.  The
2547     only thing we have to do now is copy that register to a stack slot
2548     if we'd not allocated a local register for the job.  */
2549  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2550      && current_frame_info.reg_save_ar_pfs == 0)
2551    {
2552      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2553      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2554      cfa_off -= 8;
2555    }
2556
2557  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2558    {
2559      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2560      if (current_frame_info.reg_save_ar_lc != 0)
2561	{
2562	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2563	  insn = emit_move_insn (alt_reg, reg);
2564	  RTX_FRAME_RELATED_P (insn) = 1;
2565
2566	  /* Even if we're not going to generate an epilogue, we still
2567	     need to save the register so that EH works.  */
2568	  if (! epilogue_p)
2569	    emit_insn (gen_prologue_use (alt_reg));
2570	}
2571      else
2572	{
2573	  alt_regno = next_scratch_gr_reg ();
2574	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2575	  emit_move_insn (alt_reg, reg);
2576	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2577	  cfa_off -= 8;
2578	}
2579    }
2580
2581  if (current_frame_info.reg_save_gp)
2582    {
2583      insn = emit_move_insn (gen_rtx_REG (DImode,
2584					  current_frame_info.reg_save_gp),
2585			     pic_offset_table_rtx);
2586      /* We don't know for sure yet if this is actually needed, since
2587	 we've not split the PIC call patterns.  If all of the calls
2588	 are indirect, and not followed by any uses of the gp, then
2589	 this save is dead.  Allow it to go away.  */
2590      REG_NOTES (insn)
2591	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2592    }
2593
2594  /* We should now be at the base of the gr/br/fr spill area.  */
2595  if (cfa_off != (current_frame_info.spill_cfa_off
2596		  + current_frame_info.spill_size))
2597    abort ();
2598
2599  /* Spill all general registers.  */
2600  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2601    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2602      {
2603	reg = gen_rtx_REG (DImode, regno);
2604	do_spill (gen_gr_spill, reg, cfa_off, reg);
2605	cfa_off -= 8;
2606      }
2607
2608  /* Handle BR0 specially -- it may be getting stored permanently in
2609     some GR register.  */
2610  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2611    {
2612      reg = gen_rtx_REG (DImode, BR_REG (0));
2613      if (current_frame_info.reg_save_b0 != 0)
2614	{
2615	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2616	  insn = emit_move_insn (alt_reg, reg);
2617	  RTX_FRAME_RELATED_P (insn) = 1;
2618
2619	  /* Even if we're not going to generate an epilogue, we still
2620	     need to save the register so that EH works.  */
2621	  if (! epilogue_p)
2622	    emit_insn (gen_prologue_use (alt_reg));
2623	}
2624      else
2625	{
2626	  alt_regno = next_scratch_gr_reg ();
2627	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2628	  emit_move_insn (alt_reg, reg);
2629	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2630	  cfa_off -= 8;
2631	}
2632    }
2633
2634  /* Spill the rest of the BR registers.  */
2635  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2636    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2637      {
2638	alt_regno = next_scratch_gr_reg ();
2639	alt_reg = gen_rtx_REG (DImode, alt_regno);
2640	reg = gen_rtx_REG (DImode, regno);
2641	emit_move_insn (alt_reg, reg);
2642	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2643	cfa_off -= 8;
2644      }
2645
2646  /* Align the frame and spill all FR registers.  */
2647  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2648    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2649      {
2650        if (cfa_off & 15)
2651	  abort ();
2652	reg = gen_rtx_REG (TFmode, regno);
2653	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2654	cfa_off -= 16;
2655      }
2656
2657  if (cfa_off != current_frame_info.spill_cfa_off)
2658    abort ();
2659
2660  finish_spill_pointers ();
2661}
2662
2663/* Called after register allocation to add any instructions needed for the
2664   epilogue.  Using an epilogue insn is favored compared to putting all of the
2665   instructions in output_function_prologue(), since it allows the scheduler
2666   to intermix instructions with the saves of the caller saved registers.  In
2667   some cases, it might be necessary to emit a barrier instruction as the last
2668   insn to prevent such scheduling.  */
2669
2670void
2671ia64_expand_epilogue (sibcall_p)
2672     int sibcall_p;
2673{
2674  rtx insn, reg, alt_reg, ar_unat_save_reg;
2675  int regno, alt_regno, cfa_off;
2676
2677  ia64_compute_frame_size (get_frame_size ());
2678
2679  /* If there is a frame pointer, then we use it instead of the stack
2680     pointer, so that the stack pointer does not need to be valid when
2681     the epilogue starts.  See EXIT_IGNORE_STACK.  */
2682  if (frame_pointer_needed)
2683    setup_spill_pointers (current_frame_info.n_spilled,
2684			  hard_frame_pointer_rtx, 0);
2685  else
2686    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2687			  current_frame_info.total_size);
2688
2689  if (current_frame_info.total_size != 0)
2690    {
2691      /* ??? At this point we must generate a magic insn that appears to
2692         modify the spill iterators and the frame pointer.  This would
2693	 allow the most scheduling freedom.  For now, just hard stop.  */
2694      emit_insn (gen_blockage ());
2695    }
2696
2697  /* Locate the bottom of the register save area.  */
2698  cfa_off = (current_frame_info.spill_cfa_off
2699	     + current_frame_info.spill_size
2700	     + current_frame_info.extra_spill_size);
2701
2702  /* Restore the predicate registers.  */
2703  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2704    {
2705      if (current_frame_info.reg_save_pr != 0)
2706	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2707      else
2708	{
2709	  alt_regno = next_scratch_gr_reg ();
2710	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2711	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2712	  cfa_off -= 8;
2713	}
2714      reg = gen_rtx_REG (DImode, PR_REG (0));
2715      emit_move_insn (reg, alt_reg);
2716    }
2717
2718  /* Restore the application registers.  */
2719
2720  /* Load the saved unat from the stack, but do not restore it until
2721     after the GRs have been restored.  */
2722  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2723    {
2724      if (current_frame_info.reg_save_ar_unat != 0)
2725        ar_unat_save_reg
2726	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2727      else
2728	{
2729	  alt_regno = next_scratch_gr_reg ();
2730	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2731	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2732	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2733	  cfa_off -= 8;
2734	}
2735    }
2736  else
2737    ar_unat_save_reg = NULL_RTX;
2738
2739  if (current_frame_info.reg_save_ar_pfs != 0)
2740    {
2741      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2742      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2743      emit_move_insn (reg, alt_reg);
2744    }
2745  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2746    {
2747      alt_regno = next_scratch_gr_reg ();
2748      alt_reg = gen_rtx_REG (DImode, alt_regno);
2749      do_restore (gen_movdi_x, alt_reg, cfa_off);
2750      cfa_off -= 8;
2751      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2752      emit_move_insn (reg, alt_reg);
2753    }
2754
2755  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2756    {
2757      if (current_frame_info.reg_save_ar_lc != 0)
2758	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2759      else
2760	{
2761	  alt_regno = next_scratch_gr_reg ();
2762	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2763	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2764	  cfa_off -= 8;
2765	}
2766      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2767      emit_move_insn (reg, alt_reg);
2768    }
2769
2770  /* We should now be at the base of the gr/br/fr spill area.  */
2771  if (cfa_off != (current_frame_info.spill_cfa_off
2772		  + current_frame_info.spill_size))
2773    abort ();
2774
2775  /* The GP may be stored on the stack in the prologue, but it's
2776     never restored in the epilogue.  Skip the stack slot.  */
2777  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
2778    cfa_off -= 8;
2779
2780  /* Restore all general registers.  */
2781  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
2782    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2783      {
2784	reg = gen_rtx_REG (DImode, regno);
2785	do_restore (gen_gr_restore, reg, cfa_off);
2786	cfa_off -= 8;
2787      }
2788
2789  /* Restore the branch registers.  Handle B0 specially, as it may
2790     have gotten stored in some GR register.  */
2791  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2792    {
2793      if (current_frame_info.reg_save_b0 != 0)
2794	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2795      else
2796	{
2797	  alt_regno = next_scratch_gr_reg ();
2798	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2799	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2800	  cfa_off -= 8;
2801	}
2802      reg = gen_rtx_REG (DImode, BR_REG (0));
2803      emit_move_insn (reg, alt_reg);
2804    }
2805
2806  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2807    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2808      {
2809	alt_regno = next_scratch_gr_reg ();
2810	alt_reg = gen_rtx_REG (DImode, alt_regno);
2811	do_restore (gen_movdi_x, alt_reg, cfa_off);
2812	cfa_off -= 8;
2813	reg = gen_rtx_REG (DImode, regno);
2814	emit_move_insn (reg, alt_reg);
2815      }
2816
2817  /* Restore floating point registers.  */
2818  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2819    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2820      {
2821        if (cfa_off & 15)
2822	  abort ();
2823	reg = gen_rtx_REG (TFmode, regno);
2824	do_restore (gen_fr_restore_x, reg, cfa_off);
2825	cfa_off -= 16;
2826      }
2827
2828  /* Restore ar.unat for real.  */
2829  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2830    {
2831      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2832      emit_move_insn (reg, ar_unat_save_reg);
2833    }
2834
2835  if (cfa_off != current_frame_info.spill_cfa_off)
2836    abort ();
2837
2838  finish_spill_pointers ();
2839
2840  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
2841    {
2842      /* ??? At this point we must generate a magic insn that appears to
2843         modify the spill iterators, the stack pointer, and the frame
2844	 pointer.  This would allow the most scheduling freedom.  For now,
2845	 just hard stop.  */
2846      emit_insn (gen_blockage ());
2847    }
2848
2849  if (cfun->machine->ia64_eh_epilogue_sp)
2850    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
2851  else if (frame_pointer_needed)
2852    {
2853      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
2854      RTX_FRAME_RELATED_P (insn) = 1;
2855    }
2856  else if (current_frame_info.total_size)
2857    {
2858      rtx offset, frame_size_rtx;
2859
2860      frame_size_rtx = GEN_INT (current_frame_info.total_size);
2861      if (CONST_OK_FOR_I (current_frame_info.total_size))
2862	offset = frame_size_rtx;
2863      else
2864	{
2865	  regno = next_scratch_gr_reg ();
2866	  offset = gen_rtx_REG (DImode, regno);
2867	  emit_move_insn (offset, frame_size_rtx);
2868	}
2869
2870      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
2871				    offset));
2872
2873      RTX_FRAME_RELATED_P (insn) = 1;
2874      if (GET_CODE (offset) != CONST_INT)
2875	{
2876	  REG_NOTES (insn)
2877	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2878			gen_rtx_SET (VOIDmode,
2879				     stack_pointer_rtx,
2880				     gen_rtx_PLUS (DImode,
2881						   stack_pointer_rtx,
2882						   frame_size_rtx)),
2883			REG_NOTES (insn));
2884	}
2885    }
2886
2887  if (cfun->machine->ia64_eh_epilogue_bsp)
2888    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
2889
2890  if (! sibcall_p)
2891    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
2892  else
2893    {
2894      int fp = GR_REG (2);
2895      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
2896	 first available call clobbered register.  If there was a frame_pointer
2897	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
2898	 so we have to make sure we're using the string "r2" when emitting
2899	 the register name for the assmbler.  */
2900      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
2901	fp = HARD_FRAME_POINTER_REGNUM;
2902
2903      /* We must emit an alloc to force the input registers to become output
2904	 registers.  Otherwise, if the callee tries to pass its parameters
2905	 through to another call without an intervening alloc, then these
2906	 values get lost.  */
2907      /* ??? We don't need to preserve all input registers.  We only need to
2908	 preserve those input registers used as arguments to the sibling call.
2909	 It is unclear how to compute that number here.  */
2910      if (current_frame_info.n_input_regs != 0)
2911	emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
2912			      GEN_INT (0), GEN_INT (0),
2913			      GEN_INT (current_frame_info.n_input_regs),
2914			      GEN_INT (0)));
2915    }
2916}
2917
2918/* Return 1 if br.ret can do all the work required to return from a
2919   function.  */
2920
2921int
2922ia64_direct_return ()
2923{
2924  if (reload_completed && ! frame_pointer_needed)
2925    {
2926      ia64_compute_frame_size (get_frame_size ());
2927
2928      return (current_frame_info.total_size == 0
2929	      && current_frame_info.n_spilled == 0
2930	      && current_frame_info.reg_save_b0 == 0
2931	      && current_frame_info.reg_save_pr == 0
2932	      && current_frame_info.reg_save_ar_pfs == 0
2933	      && current_frame_info.reg_save_ar_unat == 0
2934	      && current_frame_info.reg_save_ar_lc == 0);
2935    }
2936  return 0;
2937}
2938
2939int
2940ia64_hard_regno_rename_ok (from, to)
2941     int from;
2942     int to;
2943{
2944  /* Don't clobber any of the registers we reserved for the prologue.  */
2945  if (to == current_frame_info.reg_fp
2946      || to == current_frame_info.reg_save_b0
2947      || to == current_frame_info.reg_save_pr
2948      || to == current_frame_info.reg_save_ar_pfs
2949      || to == current_frame_info.reg_save_ar_unat
2950      || to == current_frame_info.reg_save_ar_lc)
2951    return 0;
2952
2953  if (from == current_frame_info.reg_fp
2954      || from == current_frame_info.reg_save_b0
2955      || from == current_frame_info.reg_save_pr
2956      || from == current_frame_info.reg_save_ar_pfs
2957      || from == current_frame_info.reg_save_ar_unat
2958      || from == current_frame_info.reg_save_ar_lc)
2959    return 0;
2960
2961  /* Don't use output registers outside the register frame.  */
2962  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
2963    return 0;
2964
2965  /* Retain even/oddness on predicate register pairs.  */
2966  if (PR_REGNO_P (from) && PR_REGNO_P (to))
2967    return (from & 1) == (to & 1);
2968
2969  return 1;
2970}
2971
2972/* Target hook for assembling integer objects.  Handle word-sized
2973   aligned objects and detect the cases when @fptr is needed.  */
2974
2975static bool
2976ia64_assemble_integer (x, size, aligned_p)
2977     rtx x;
2978     unsigned int size;
2979     int aligned_p;
2980{
2981  if (size == (TARGET_ILP32 ? 4 : 8)
2982      && aligned_p
2983      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
2984      && GET_CODE (x) == SYMBOL_REF
2985      && SYMBOL_REF_FLAG (x))
2986    {
2987      if (TARGET_ILP32)
2988	fputs ("\tdata4\t@fptr(", asm_out_file);
2989      else
2990	fputs ("\tdata8\t@fptr(", asm_out_file);
2991      output_addr_const (asm_out_file, x);
2992      fputs (")\n", asm_out_file);
2993      return true;
2994    }
2995  return default_assemble_integer (x, size, aligned_p);
2996}
2997
2998/* Emit the function prologue.  */
2999
3000static void
3001ia64_output_function_prologue (file, size)
3002     FILE *file;
3003     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3004{
3005  int mask, grsave, grsave_prev;
3006
3007  if (current_frame_info.need_regstk)
3008    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3009	     current_frame_info.n_input_regs,
3010	     current_frame_info.n_local_regs,
3011	     current_frame_info.n_output_regs,
3012	     current_frame_info.n_rotate_regs);
3013
3014  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3015    return;
3016
3017  /* Emit the .prologue directive.  */
3018
3019  mask = 0;
3020  grsave = grsave_prev = 0;
3021  if (current_frame_info.reg_save_b0 != 0)
3022    {
3023      mask |= 8;
3024      grsave = grsave_prev = current_frame_info.reg_save_b0;
3025    }
3026  if (current_frame_info.reg_save_ar_pfs != 0
3027      && (grsave_prev == 0
3028	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3029    {
3030      mask |= 4;
3031      if (grsave_prev == 0)
3032	grsave = current_frame_info.reg_save_ar_pfs;
3033      grsave_prev = current_frame_info.reg_save_ar_pfs;
3034    }
3035  if (current_frame_info.reg_fp != 0
3036      && (grsave_prev == 0
3037	  || current_frame_info.reg_fp == grsave_prev + 1))
3038    {
3039      mask |= 2;
3040      if (grsave_prev == 0)
3041	grsave = HARD_FRAME_POINTER_REGNUM;
3042      grsave_prev = current_frame_info.reg_fp;
3043    }
3044  if (current_frame_info.reg_save_pr != 0
3045      && (grsave_prev == 0
3046	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3047    {
3048      mask |= 1;
3049      if (grsave_prev == 0)
3050	grsave = current_frame_info.reg_save_pr;
3051    }
3052
3053  if (mask)
3054    fprintf (file, "\t.prologue %d, %d\n", mask,
3055	     ia64_dbx_register_number (grsave));
3056  else
3057    fputs ("\t.prologue\n", file);
3058
3059  /* Emit a .spill directive, if necessary, to relocate the base of
3060     the register spill area.  */
3061  if (current_frame_info.spill_cfa_off != -16)
3062    fprintf (file, "\t.spill %ld\n",
3063	     (long) (current_frame_info.spill_cfa_off
3064		     + current_frame_info.spill_size));
3065}
3066
3067/* Emit the .body directive at the scheduled end of the prologue.  */
3068
3069static void
3070ia64_output_function_end_prologue (file)
3071     FILE *file;
3072{
3073  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3074    return;
3075
3076  fputs ("\t.body\n", file);
3077}
3078
3079/* Emit the function epilogue.  */
3080
3081static void
3082ia64_output_function_epilogue (file, size)
3083     FILE *file ATTRIBUTE_UNUSED;
3084     HOST_WIDE_INT size ATTRIBUTE_UNUSED;
3085{
3086  int i;
3087
3088  /* Reset from the function's potential modifications.  */
3089  XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM;
3090
3091  if (current_frame_info.reg_fp)
3092    {
3093      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3094      reg_names[HARD_FRAME_POINTER_REGNUM]
3095	= reg_names[current_frame_info.reg_fp];
3096      reg_names[current_frame_info.reg_fp] = tmp;
3097    }
3098  if (! TARGET_REG_NAMES)
3099    {
3100      for (i = 0; i < current_frame_info.n_input_regs; i++)
3101	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3102      for (i = 0; i < current_frame_info.n_local_regs; i++)
3103	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3104      for (i = 0; i < current_frame_info.n_output_regs; i++)
3105	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3106    }
3107
3108  current_frame_info.initialized = 0;
3109}
3110
3111int
3112ia64_dbx_register_number (regno)
3113     int regno;
3114{
3115  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3116     from its home at loc79 to something inside the register frame.  We
3117     must perform the same renumbering here for the debug info.  */
3118  if (current_frame_info.reg_fp)
3119    {
3120      if (regno == HARD_FRAME_POINTER_REGNUM)
3121	regno = current_frame_info.reg_fp;
3122      else if (regno == current_frame_info.reg_fp)
3123	regno = HARD_FRAME_POINTER_REGNUM;
3124    }
3125
3126  if (IN_REGNO_P (regno))
3127    return 32 + regno - IN_REG (0);
3128  else if (LOC_REGNO_P (regno))
3129    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3130  else if (OUT_REGNO_P (regno))
3131    return (32 + current_frame_info.n_input_regs
3132	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3133  else
3134    return regno;
3135}
3136
3137void
3138ia64_initialize_trampoline (addr, fnaddr, static_chain)
3139     rtx addr, fnaddr, static_chain;
3140{
3141  rtx addr_reg, eight = GEN_INT (8);
3142
3143  /* Load up our iterator.  */
3144  addr_reg = gen_reg_rtx (Pmode);
3145  emit_move_insn (addr_reg, addr);
3146
3147  /* The first two words are the fake descriptor:
3148     __ia64_trampoline, ADDR+16.  */
3149  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3150		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3151  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3152
3153  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3154		  copy_to_reg (plus_constant (addr, 16)));
3155  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3156
3157  /* The third word is the target descriptor.  */
3158  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3159  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3160
3161  /* The fourth word is the static chain.  */
3162  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3163}
3164
3165/* Do any needed setup for a variadic function.  CUM has not been updated
3166   for the last named argument which has type TYPE and mode MODE.
3167
3168   We generate the actual spill instructions during prologue generation.  */
3169
3170void
3171ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time)
3172     CUMULATIVE_ARGS cum;
3173     int             int_mode;
3174     tree            type;
3175     int *           pretend_size;
3176     int	     second_time ATTRIBUTE_UNUSED;
3177{
3178  /* Skip the current argument.  */
3179  ia64_function_arg_advance (&cum, int_mode, type, 1);
3180
3181  if (cum.words < MAX_ARGUMENT_SLOTS)
3182    {
3183      int n = MAX_ARGUMENT_SLOTS - cum.words;
3184      *pretend_size = n * UNITS_PER_WORD;
3185      cfun->machine->n_varargs = n;
3186    }
3187}
3188
3189/* Check whether TYPE is a homogeneous floating point aggregate.  If
3190   it is, return the mode of the floating point type that appears
3191   in all leafs.  If it is not, return VOIDmode.
3192
3193   An aggregate is a homogeneous floating point aggregate is if all
3194   fields/elements in it have the same floating point type (e.g,
3195   SFmode).  128-bit quad-precision floats are excluded.  */
3196
3197static enum machine_mode
3198hfa_element_mode (type, nested)
3199     tree type;
3200     int nested;
3201{
3202  enum machine_mode element_mode = VOIDmode;
3203  enum machine_mode mode;
3204  enum tree_code code = TREE_CODE (type);
3205  int know_element_mode = 0;
3206  tree t;
3207
3208  switch (code)
3209    {
3210    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3211    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3212    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3213    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3214    case FUNCTION_TYPE:
3215      return VOIDmode;
3216
3217      /* Fortran complex types are supposed to be HFAs, so we need to handle
3218	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3219	 types though.  */
3220    case COMPLEX_TYPE:
3221      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3222	  && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT))
3223	return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type))
3224			      * BITS_PER_UNIT, MODE_FLOAT, 0);
3225      else
3226	return VOIDmode;
3227
3228    case REAL_TYPE:
3229      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3230	 mode if this is contained within an aggregate.  */
3231      if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT))
3232	return TYPE_MODE (type);
3233      else
3234	return VOIDmode;
3235
3236    case ARRAY_TYPE:
3237      return hfa_element_mode (TREE_TYPE (type), 1);
3238
3239    case RECORD_TYPE:
3240    case UNION_TYPE:
3241    case QUAL_UNION_TYPE:
3242      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3243	{
3244	  if (TREE_CODE (t) != FIELD_DECL)
3245	    continue;
3246
3247	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3248	  if (know_element_mode)
3249	    {
3250	      if (mode != element_mode)
3251		return VOIDmode;
3252	    }
3253	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3254	    return VOIDmode;
3255	  else
3256	    {
3257	      know_element_mode = 1;
3258	      element_mode = mode;
3259	    }
3260	}
3261      return element_mode;
3262
3263    default:
3264      /* If we reach here, we probably have some front-end specific type
3265	 that the backend doesn't know about.  This can happen via the
3266	 aggregate_value_p call in init_function_start.  All we can do is
3267	 ignore unknown tree types.  */
3268      return VOIDmode;
3269    }
3270
3271  return VOIDmode;
3272}
3273
3274/* Return rtx for register where argument is passed, or zero if it is passed
3275   on the stack.  */
3276
3277/* ??? 128-bit quad-precision floats are always passed in general
3278   registers.  */
3279
3280rtx
3281ia64_function_arg (cum, mode, type, named, incoming)
3282     CUMULATIVE_ARGS *cum;
3283     enum machine_mode mode;
3284     tree type;
3285     int named;
3286     int incoming;
3287{
3288  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3289  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3290		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3291	       / UNITS_PER_WORD);
3292  int offset = 0;
3293  enum machine_mode hfa_mode = VOIDmode;
3294
3295  /* Integer and float arguments larger than 8 bytes start at the next even
3296     boundary.  Aggregates larger than 8 bytes start at the next even boundary
3297     if the aggregate has 16 byte alignment.  Net effect is that types with
3298     alignment greater than 8 start at the next even boundary.  */
3299  /* ??? The ABI does not specify how to handle aggregates with alignment from
3300     9 to 15 bytes, or greater than 16.   We handle them all as if they had
3301     16 byte alignment.  Such aggregates can occur only if gcc extensions are
3302     used.  */
3303  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3304       : (words > 1))
3305      && (cum->words & 1))
3306    offset = 1;
3307
3308  /* If all argument slots are used, then it must go on the stack.  */
3309  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3310    return 0;
3311
3312  /* Check for and handle homogeneous FP aggregates.  */
3313  if (type)
3314    hfa_mode = hfa_element_mode (type, 0);
3315
3316  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3317     and unprototyped hfas are passed specially.  */
3318  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3319    {
3320      rtx loc[16];
3321      int i = 0;
3322      int fp_regs = cum->fp_regs;
3323      int int_regs = cum->words + offset;
3324      int hfa_size = GET_MODE_SIZE (hfa_mode);
3325      int byte_size;
3326      int args_byte_size;
3327
3328      /* If prototyped, pass it in FR regs then GR regs.
3329	 If not prototyped, pass it in both FR and GR regs.
3330
3331	 If this is an SFmode aggregate, then it is possible to run out of
3332	 FR regs while GR regs are still left.  In that case, we pass the
3333	 remaining part in the GR regs.  */
3334
3335      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3336	 of the argument, the last FP register, or the last argument slot.  */
3337
3338      byte_size = ((mode == BLKmode)
3339		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3340      args_byte_size = int_regs * UNITS_PER_WORD;
3341      offset = 0;
3342      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3343	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3344	{
3345	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3346				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3347							      + fp_regs)),
3348				      GEN_INT (offset));
3349	  offset += hfa_size;
3350	  args_byte_size += hfa_size;
3351	  fp_regs++;
3352	}
3353
3354      /* If no prototype, then the whole thing must go in GR regs.  */
3355      if (! cum->prototype)
3356	offset = 0;
3357      /* If this is an SFmode aggregate, then we might have some left over
3358	 that needs to go in GR regs.  */
3359      else if (byte_size != offset)
3360	int_regs += offset / UNITS_PER_WORD;
3361
3362      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3363
3364      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3365	{
3366	  enum machine_mode gr_mode = DImode;
3367
3368	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3369	     then this goes in a GR reg left adjusted/little endian, right
3370	     adjusted/big endian.  */
3371	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3372	     always right adjusted/little endian.  */
3373	  if (offset & 0x4)
3374	    gr_mode = SImode;
3375	  /* If we have an even 4 byte hunk because the aggregate is a
3376	     multiple of 4 bytes in size, then this goes in a GR reg right
3377	     adjusted/little endian.  */
3378	  else if (byte_size - offset == 4)
3379	    gr_mode = SImode;
3380	  /* Complex floats need to have float mode.  */
3381	  if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3382	    gr_mode = hfa_mode;
3383
3384	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3385				      gen_rtx_REG (gr_mode, (basereg
3386							     + int_regs)),
3387				      GEN_INT (offset));
3388	  offset += GET_MODE_SIZE (gr_mode);
3389	  int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD
3390		      ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD;
3391	}
3392
3393      /* If we ended up using just one location, just return that one loc.  */
3394      if (i == 1)
3395	return XEXP (loc[0], 0);
3396      else
3397	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3398    }
3399
3400  /* Integral and aggregates go in general registers.  If we have run out of
3401     FR registers, then FP values must also go in general registers.  This can
3402     happen when we have a SFmode HFA.  */
3403  else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT)
3404          || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3405    {
3406      int byte_size = ((mode == BLKmode)
3407                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3408      if (BYTES_BIG_ENDIAN
3409	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3410	&& byte_size < UNITS_PER_WORD
3411	&& byte_size > 0)
3412	{
3413	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3414					  gen_rtx_REG (DImode,
3415						       (basereg + cum->words
3416							+ offset)),
3417					  const0_rtx);
3418	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3419	}
3420      else
3421	return gen_rtx_REG (mode, basereg + cum->words + offset);
3422
3423    }
3424
3425  /* If there is a prototype, then FP values go in a FR register when
3426     named, and in a GR registeer when unnamed.  */
3427  else if (cum->prototype)
3428    {
3429      if (! named)
3430	return gen_rtx_REG (mode, basereg + cum->words + offset);
3431      else
3432	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3433    }
3434  /* If there is no prototype, then FP values go in both FR and GR
3435     registers.  */
3436  else
3437    {
3438      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3439				      gen_rtx_REG (mode, (FR_ARG_FIRST
3440							  + cum->fp_regs)),
3441				      const0_rtx);
3442      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3443				      gen_rtx_REG (mode,
3444						   (basereg + cum->words
3445						    + offset)),
3446				      const0_rtx);
3447
3448      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3449    }
3450}
3451
3452/* Return number of words, at the beginning of the argument, that must be
3453   put in registers.  0 is the argument is entirely in registers or entirely
3454   in memory.  */
3455
3456int
3457ia64_function_arg_partial_nregs (cum, mode, type, named)
3458     CUMULATIVE_ARGS *cum;
3459     enum machine_mode mode;
3460     tree type;
3461     int named ATTRIBUTE_UNUSED;
3462{
3463  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3464		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3465	       / UNITS_PER_WORD);
3466  int offset = 0;
3467
3468  /* Arguments with alignment larger than 8 bytes start at the next even
3469     boundary.  */
3470  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3471       : (words > 1))
3472      && (cum->words & 1))
3473    offset = 1;
3474
3475  /* If all argument slots are used, then it must go on the stack.  */
3476  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3477    return 0;
3478
3479  /* It doesn't matter whether the argument goes in FR or GR regs.  If
3480     it fits within the 8 argument slots, then it goes entirely in
3481     registers.  If it extends past the last argument slot, then the rest
3482     goes on the stack.  */
3483
3484  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3485    return 0;
3486
3487  return MAX_ARGUMENT_SLOTS - cum->words - offset;
3488}
3489
3490/* Update CUM to point after this argument.  This is patterned after
3491   ia64_function_arg.  */
3492
3493void
3494ia64_function_arg_advance (cum, mode, type, named)
3495     CUMULATIVE_ARGS *cum;
3496     enum machine_mode mode;
3497     tree type;
3498     int named;
3499{
3500  int words = (((mode == BLKmode ? int_size_in_bytes (type)
3501		 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1)
3502	       / UNITS_PER_WORD);
3503  int offset = 0;
3504  enum machine_mode hfa_mode = VOIDmode;
3505
3506  /* If all arg slots are already full, then there is nothing to do.  */
3507  if (cum->words >= MAX_ARGUMENT_SLOTS)
3508    return;
3509
3510  /* Arguments with alignment larger than 8 bytes start at the next even
3511     boundary.  */
3512  if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3513       : (words > 1))
3514      && (cum->words & 1))
3515    offset = 1;
3516
3517  cum->words += words + offset;
3518
3519  /* Check for and handle homogeneous FP aggregates.  */
3520  if (type)
3521    hfa_mode = hfa_element_mode (type, 0);
3522
3523  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3524     and unprototyped hfas are passed specially.  */
3525  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3526    {
3527      int fp_regs = cum->fp_regs;
3528      /* This is the original value of cum->words + offset.  */
3529      int int_regs = cum->words - words;
3530      int hfa_size = GET_MODE_SIZE (hfa_mode);
3531      int byte_size;
3532      int args_byte_size;
3533
3534      /* If prototyped, pass it in FR regs then GR regs.
3535	 If not prototyped, pass it in both FR and GR regs.
3536
3537	 If this is an SFmode aggregate, then it is possible to run out of
3538	 FR regs while GR regs are still left.  In that case, we pass the
3539	 remaining part in the GR regs.  */
3540
3541      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3542	 of the argument, the last FP register, or the last argument slot.  */
3543
3544      byte_size = ((mode == BLKmode)
3545		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3546      args_byte_size = int_regs * UNITS_PER_WORD;
3547      offset = 0;
3548      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3549	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3550	{
3551	  offset += hfa_size;
3552	  args_byte_size += hfa_size;
3553	  fp_regs++;
3554	}
3555
3556      cum->fp_regs = fp_regs;
3557    }
3558
3559  /* Integral and aggregates go in general registers.  If we have run out of
3560     FR registers, then FP values must also go in general registers.  This can
3561     happen when we have a SFmode HFA.  */
3562  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3563    cum->int_regs = cum->words;
3564
3565  /* If there is a prototype, then FP values go in a FR register when
3566     named, and in a GR registeer when unnamed.  */
3567  else if (cum->prototype)
3568    {
3569      if (! named)
3570	cum->int_regs = cum->words;
3571      else
3572	/* ??? Complex types should not reach here.  */
3573	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3574    }
3575  /* If there is no prototype, then FP values go in both FR and GR
3576     registers.  */
3577  else
3578    {
3579      /* ??? Complex types should not reach here.  */
3580      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3581      cum->int_regs = cum->words;
3582    }
3583}
3584
3585/* Variable sized types are passed by reference.  */
3586/* ??? At present this is a GCC extension to the IA-64 ABI.  */
3587
3588int
3589ia64_function_arg_pass_by_reference (cum, mode, type, named)
3590     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
3591     enum machine_mode mode ATTRIBUTE_UNUSED;
3592     tree type;
3593     int named ATTRIBUTE_UNUSED;
3594{
3595  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3596}
3597
3598
3599/* Implement va_arg.  */
3600
3601rtx
3602ia64_va_arg (valist, type)
3603     tree valist, type;
3604{
3605  tree t;
3606
3607  /* Variable sized types are passed by reference.  */
3608  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3609    {
3610      rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type));
3611      return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
3612    }
3613
3614  /* Arguments with alignment larger than 8 bytes start at the next even
3615     boundary.  */
3616  if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3617    {
3618      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3619		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3620      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3621		 build_int_2 (-2 * UNITS_PER_WORD, -1));
3622      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3623      TREE_SIDE_EFFECTS (t) = 1;
3624      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3625    }
3626
3627  return std_expand_builtin_va_arg (valist, type);
3628}
3629
3630/* Return 1 if function return value returned in memory.  Return 0 if it is
3631   in a register.  */
3632
3633int
3634ia64_return_in_memory (valtype)
3635     tree valtype;
3636{
3637  enum machine_mode mode;
3638  enum machine_mode hfa_mode;
3639  HOST_WIDE_INT byte_size;
3640
3641  mode = TYPE_MODE (valtype);
3642  byte_size = GET_MODE_SIZE (mode);
3643  if (mode == BLKmode)
3644    {
3645      byte_size = int_size_in_bytes (valtype);
3646      if (byte_size < 0)
3647	return 1;
3648    }
3649
3650  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
3651
3652  hfa_mode = hfa_element_mode (valtype, 0);
3653  if (hfa_mode != VOIDmode)
3654    {
3655      int hfa_size = GET_MODE_SIZE (hfa_mode);
3656
3657      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3658	return 1;
3659      else
3660	return 0;
3661    }
3662  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3663    return 1;
3664  else
3665    return 0;
3666}
3667
3668/* Return rtx for register that holds the function return value.  */
3669
3670rtx
3671ia64_function_value (valtype, func)
3672     tree valtype;
3673     tree func ATTRIBUTE_UNUSED;
3674{
3675  enum machine_mode mode;
3676  enum machine_mode hfa_mode;
3677
3678  mode = TYPE_MODE (valtype);
3679  hfa_mode = hfa_element_mode (valtype, 0);
3680
3681  if (hfa_mode != VOIDmode)
3682    {
3683      rtx loc[8];
3684      int i;
3685      int hfa_size;
3686      int byte_size;
3687      int offset;
3688
3689      hfa_size = GET_MODE_SIZE (hfa_mode);
3690      byte_size = ((mode == BLKmode)
3691		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
3692      offset = 0;
3693      for (i = 0; offset < byte_size; i++)
3694	{
3695	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3696				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
3697				      GEN_INT (offset));
3698	  offset += hfa_size;
3699	}
3700
3701      if (i == 1)
3702	return XEXP (loc[0], 0);
3703      else
3704	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3705    }
3706  else if (FLOAT_TYPE_P (valtype) &&
3707           ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT))
3708    return gen_rtx_REG (mode, FR_ARG_FIRST);
3709  else
3710    {
3711      if (BYTES_BIG_ENDIAN
3712	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
3713	{
3714	  rtx loc[8];
3715	  int offset;
3716	  int bytesize;
3717	  int i;
3718
3719	  offset = 0;
3720	  bytesize = int_size_in_bytes (valtype);
3721	  for (i = 0; offset < bytesize; i++)
3722	    {
3723	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3724					  gen_rtx_REG (DImode,
3725						       GR_RET_FIRST + i),
3726					  GEN_INT (offset));
3727	      offset += UNITS_PER_WORD;
3728	    }
3729	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3730	}
3731      else
3732	return gen_rtx_REG (mode, GR_RET_FIRST);
3733    }
3734}
3735
3736/* Print a memory address as an operand to reference that memory location.  */
3737
3738/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
3739   also call this from ia64_print_operand for memory addresses.  */
3740
3741void
3742ia64_print_operand_address (stream, address)
3743     FILE * stream ATTRIBUTE_UNUSED;
3744     rtx    address ATTRIBUTE_UNUSED;
3745{
3746}
3747
3748/* Print an operand to an assembler instruction.
3749   C	Swap and print a comparison operator.
3750   D	Print an FP comparison operator.
3751   E    Print 32 - constant, for SImode shifts as extract.
3752   e    Print 64 - constant, for DImode rotates.
3753   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
3754        a floating point register emitted normally.
3755   I	Invert a predicate register by adding 1.
3756   J    Select the proper predicate register for a condition.
3757   j    Select the inverse predicate register for a condition.
3758   O	Append .acq for volatile load.
3759   P	Postincrement of a MEM.
3760   Q	Append .rel for volatile store.
3761   S	Shift amount for shladd instruction.
3762   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
3763	for Intel assembler.
3764   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
3765	for Intel assembler.
3766   r	Print register name, or constant 0 as r0.  HP compatibility for
3767	Linux kernel.  */
3768void
3769ia64_print_operand (file, x, code)
3770     FILE * file;
3771     rtx    x;
3772     int    code;
3773{
3774  const char *str;
3775
3776  switch (code)
3777    {
3778    case 0:
3779      /* Handled below.  */
3780      break;
3781
3782    case 'C':
3783      {
3784	enum rtx_code c = swap_condition (GET_CODE (x));
3785	fputs (GET_RTX_NAME (c), file);
3786	return;
3787      }
3788
3789    case 'D':
3790      switch (GET_CODE (x))
3791	{
3792	case NE:
3793	  str = "neq";
3794	  break;
3795	case UNORDERED:
3796	  str = "unord";
3797	  break;
3798	case ORDERED:
3799	  str = "ord";
3800	  break;
3801	default:
3802	  str = GET_RTX_NAME (GET_CODE (x));
3803	  break;
3804	}
3805      fputs (str, file);
3806      return;
3807
3808    case 'E':
3809      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
3810      return;
3811
3812    case 'e':
3813      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
3814      return;
3815
3816    case 'F':
3817      if (x == CONST0_RTX (GET_MODE (x)))
3818	str = reg_names [FR_REG (0)];
3819      else if (x == CONST1_RTX (GET_MODE (x)))
3820	str = reg_names [FR_REG (1)];
3821      else if (GET_CODE (x) == REG)
3822	str = reg_names [REGNO (x)];
3823      else
3824	abort ();
3825      fputs (str, file);
3826      return;
3827
3828    case 'I':
3829      fputs (reg_names [REGNO (x) + 1], file);
3830      return;
3831
3832    case 'J':
3833    case 'j':
3834      {
3835	unsigned int regno = REGNO (XEXP (x, 0));
3836	if (GET_CODE (x) == EQ)
3837	  regno += 1;
3838	if (code == 'j')
3839	  regno ^= 1;
3840        fputs (reg_names [regno], file);
3841      }
3842      return;
3843
3844    case 'O':
3845      if (MEM_VOLATILE_P (x))
3846	fputs(".acq", file);
3847      return;
3848
3849    case 'P':
3850      {
3851	HOST_WIDE_INT value;
3852
3853	switch (GET_CODE (XEXP (x, 0)))
3854	  {
3855	  default:
3856	    return;
3857
3858	  case POST_MODIFY:
3859	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
3860	    if (GET_CODE (x) == CONST_INT)
3861	      value = INTVAL (x);
3862	    else if (GET_CODE (x) == REG)
3863	      {
3864		fprintf (file, ", %s", reg_names[REGNO (x)]);
3865		return;
3866	      }
3867	    else
3868	      abort ();
3869	    break;
3870
3871	  case POST_INC:
3872	    value = GET_MODE_SIZE (GET_MODE (x));
3873	    break;
3874
3875	  case POST_DEC:
3876	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
3877	    break;
3878	  }
3879
3880	putc (',', file);
3881	putc (' ', file);
3882	fprintf (file, HOST_WIDE_INT_PRINT_DEC, value);
3883	return;
3884      }
3885
3886    case 'Q':
3887      if (MEM_VOLATILE_P (x))
3888	fputs(".rel", file);
3889      return;
3890
3891    case 'S':
3892      fprintf (file, "%d", exact_log2 (INTVAL (x)));
3893      return;
3894
3895    case 'T':
3896      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3897	{
3898	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
3899	  return;
3900	}
3901      break;
3902
3903    case 'U':
3904      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
3905	{
3906	  const char *prefix = "0x";
3907	  if (INTVAL (x) & 0x80000000)
3908	    {
3909	      fprintf (file, "0xffffffff");
3910	      prefix = "";
3911	    }
3912	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
3913	  return;
3914	}
3915      break;
3916
3917    case 'r':
3918      /* If this operand is the constant zero, write it as register zero.
3919	 Any register, zero, or CONST_INT value is OK here.  */
3920      if (GET_CODE (x) == REG)
3921	fputs (reg_names[REGNO (x)], file);
3922      else if (x == CONST0_RTX (GET_MODE (x)))
3923	fputs ("r0", file);
3924      else if (GET_CODE (x) == CONST_INT)
3925	output_addr_const (file, x);
3926      else
3927	output_operand_lossage ("invalid %%r value");
3928      return;
3929
3930    case '+':
3931      {
3932	const char *which;
3933
3934	/* For conditional branches, returns or calls, substitute
3935	   sptk, dptk, dpnt, or spnt for %s.  */
3936	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
3937	if (x)
3938	  {
3939	    int pred_val = INTVAL (XEXP (x, 0));
3940
3941	    /* Guess top and bottom 10% statically predicted.  */
3942	    if (pred_val < REG_BR_PROB_BASE / 50)
3943	      which = ".spnt";
3944	    else if (pred_val < REG_BR_PROB_BASE / 2)
3945	      which = ".dpnt";
3946	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
3947	      which = ".dptk";
3948	    else
3949	      which = ".sptk";
3950	  }
3951	else if (GET_CODE (current_output_insn) == CALL_INSN)
3952	  which = ".sptk";
3953	else
3954	  which = ".dptk";
3955
3956	fputs (which, file);
3957	return;
3958      }
3959
3960    case ',':
3961      x = current_insn_predicate;
3962      if (x)
3963	{
3964	  unsigned int regno = REGNO (XEXP (x, 0));
3965	  if (GET_CODE (x) == EQ)
3966	    regno += 1;
3967          fprintf (file, "(%s) ", reg_names [regno]);
3968	}
3969      return;
3970
3971    default:
3972      output_operand_lossage ("ia64_print_operand: unknown code");
3973      return;
3974    }
3975
3976  switch (GET_CODE (x))
3977    {
3978      /* This happens for the spill/restore instructions.  */
3979    case POST_INC:
3980    case POST_DEC:
3981    case POST_MODIFY:
3982      x = XEXP (x, 0);
3983      /* ... fall through ...  */
3984
3985    case REG:
3986      fputs (reg_names [REGNO (x)], file);
3987      break;
3988
3989    case MEM:
3990      {
3991	rtx addr = XEXP (x, 0);
3992	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
3993	  addr = XEXP (addr, 0);
3994	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
3995	break;
3996      }
3997
3998    default:
3999      output_addr_const (file, x);
4000      break;
4001    }
4002
4003  return;
4004}
4005
4006/* Calulate the cost of moving data from a register in class FROM to
4007   one in class TO, using MODE.  */
4008
4009int
4010ia64_register_move_cost (mode, from, to)
4011     enum machine_mode mode;
4012     enum reg_class from, to;
4013{
4014  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4015  if (to == ADDL_REGS)
4016    to = GR_REGS;
4017  if (from == ADDL_REGS)
4018    from = GR_REGS;
4019
4020  /* All costs are symmetric, so reduce cases by putting the
4021     lower number class as the destination.  */
4022  if (from < to)
4023    {
4024      enum reg_class tmp = to;
4025      to = from, from = tmp;
4026    }
4027
4028  /* Moving from FR<->GR in TFmode must be more expensive than 2,
4029     so that we get secondary memory reloads.  Between FR_REGS,
4030     we have to make this at least as expensive as MEMORY_MOVE_COST
4031     to avoid spectacularly poor register class preferencing.  */
4032  if (mode == TFmode)
4033    {
4034      if (to != GR_REGS || from != GR_REGS)
4035        return MEMORY_MOVE_COST (mode, to, 0);
4036      else
4037	return 3;
4038    }
4039
4040  switch (to)
4041    {
4042    case PR_REGS:
4043      /* Moving between PR registers takes two insns.  */
4044      if (from == PR_REGS)
4045	return 3;
4046      /* Moving between PR and anything but GR is impossible.  */
4047      if (from != GR_REGS)
4048	return MEMORY_MOVE_COST (mode, to, 0);
4049      break;
4050
4051    case BR_REGS:
4052      /* Moving between BR and anything but GR is impossible.  */
4053      if (from != GR_REGS && from != GR_AND_BR_REGS)
4054	return MEMORY_MOVE_COST (mode, to, 0);
4055      break;
4056
4057    case AR_I_REGS:
4058    case AR_M_REGS:
4059      /* Moving between AR and anything but GR is impossible.  */
4060      if (from != GR_REGS)
4061	return MEMORY_MOVE_COST (mode, to, 0);
4062      break;
4063
4064    case GR_REGS:
4065    case FR_REGS:
4066    case GR_AND_FR_REGS:
4067    case GR_AND_BR_REGS:
4068    case ALL_REGS:
4069      break;
4070
4071    default:
4072      abort ();
4073    }
4074
4075  return 2;
4076}
4077
4078/* This function returns the register class required for a secondary
4079   register when copying between one of the registers in CLASS, and X,
4080   using MODE.  A return value of NO_REGS means that no secondary register
4081   is required.  */
4082
4083enum reg_class
4084ia64_secondary_reload_class (class, mode, x)
4085     enum reg_class class;
4086     enum machine_mode mode ATTRIBUTE_UNUSED;
4087     rtx x;
4088{
4089  int regno = -1;
4090
4091  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4092    regno = true_regnum (x);
4093
4094  switch (class)
4095    {
4096    case BR_REGS:
4097    case AR_M_REGS:
4098    case AR_I_REGS:
4099      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4100	 interaction.  We end up with two pseudos with overlapping lifetimes
4101	 both of which are equiv to the same constant, and both which need
4102	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4103	 changes depending on the path length, which means the qty_first_reg
4104	 check in make_regs_eqv can give different answers at different times.
4105	 At some point I'll probably need a reload_indi pattern to handle
4106	 this.
4107
4108	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4109	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4110	 non-general registers for good measure.  */
4111      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4112	return GR_REGS;
4113
4114      /* This is needed if a pseudo used as a call_operand gets spilled to a
4115	 stack slot.  */
4116      if (GET_CODE (x) == MEM)
4117	return GR_REGS;
4118      break;
4119
4120    case FR_REGS:
4121      /* Need to go through general regsters to get to other class regs.  */
4122      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4123	return GR_REGS;
4124
4125      /* This can happen when a paradoxical subreg is an operand to the
4126	 muldi3 pattern.  */
4127      /* ??? This shouldn't be necessary after instruction scheduling is
4128	 enabled, because paradoxical subregs are not accepted by
4129	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4130	 stop the paradoxical subreg stupidity in the *_operand functions
4131	 in recog.c.  */
4132      if (GET_CODE (x) == MEM
4133	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4134	      || GET_MODE (x) == QImode))
4135	return GR_REGS;
4136
4137      /* This can happen because of the ior/and/etc patterns that accept FP
4138	 registers as operands.  If the third operand is a constant, then it
4139	 needs to be reloaded into a FP register.  */
4140      if (GET_CODE (x) == CONST_INT)
4141	return GR_REGS;
4142
4143      /* This can happen because of register elimination in a muldi3 insn.
4144	 E.g. `26107 * (unsigned long)&u'.  */
4145      if (GET_CODE (x) == PLUS)
4146	return GR_REGS;
4147      break;
4148
4149    case PR_REGS:
4150      /* ??? This happens if we cse/gcse a BImode value across a call,
4151	 and the function has a nonlocal goto.  This is because global
4152	 does not allocate call crossing pseudos to hard registers when
4153	 current_function_has_nonlocal_goto is true.  This is relatively
4154	 common for C++ programs that use exceptions.  To reproduce,
4155	 return NO_REGS and compile libstdc++.  */
4156      if (GET_CODE (x) == MEM)
4157	return GR_REGS;
4158
4159      /* This can happen when we take a BImode subreg of a DImode value,
4160	 and that DImode value winds up in some non-GR register.  */
4161      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4162	return GR_REGS;
4163      break;
4164
4165    case GR_REGS:
4166      /* Since we have no offsettable memory addresses, we need a temporary
4167	 to hold the address of the second word.  */
4168      if (mode == TImode)
4169	return GR_REGS;
4170      break;
4171
4172    default:
4173      break;
4174    }
4175
4176  return NO_REGS;
4177}
4178
4179/* Emit text to declare externally defined variables and functions, because
4180   the Intel assembler does not support undefined externals.  */
4181
4182void
4183ia64_asm_output_external (file, decl, name)
4184     FILE *file;
4185     tree decl;
4186     const char *name;
4187{
4188  int save_referenced;
4189
4190  /* GNU as does not need anything here, but the HP linker does need
4191     something for external functions.  */
4192
4193  if (TARGET_GNU_AS
4194      && (!TARGET_HPUX_LD
4195	  || TREE_CODE (decl) != FUNCTION_DECL
4196	  || strstr(name, "__builtin_") == name))
4197    return;
4198
4199  /* ??? The Intel assembler creates a reference that needs to be satisfied by
4200     the linker when we do this, so we need to be careful not to do this for
4201     builtin functions which have no library equivalent.  Unfortunately, we
4202     can't tell here whether or not a function will actually be called by
4203     expand_expr, so we pull in library functions even if we may not need
4204     them later.  */
4205  if (! strcmp (name, "__builtin_next_arg")
4206      || ! strcmp (name, "alloca")
4207      || ! strcmp (name, "__builtin_constant_p")
4208      || ! strcmp (name, "__builtin_args_info"))
4209    return;
4210
4211  if (TARGET_HPUX_LD)
4212    ia64_hpux_add_extern_decl (name);
4213  else
4214    {
4215      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4216         restore it.  */
4217      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4218      if (TREE_CODE (decl) == FUNCTION_DECL)
4219        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4220      (*targetm.asm_out.globalize_label) (file, name);
4221      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4222    }
4223}
4224
4225/* Parse the -mfixed-range= option string.  */
4226
4227static void
4228fix_range (const_str)
4229     const char *const_str;
4230{
4231  int i, first, last;
4232  char *str, *dash, *comma;
4233
4234  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4235     REG2 are either register names or register numbers.  The effect
4236     of this option is to mark the registers in the range from REG1 to
4237     REG2 as ``fixed'' so they won't be used by the compiler.  This is
4238     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4239
4240  i = strlen (const_str);
4241  str = (char *) alloca (i + 1);
4242  memcpy (str, const_str, i + 1);
4243
4244  while (1)
4245    {
4246      dash = strchr (str, '-');
4247      if (!dash)
4248	{
4249	  warning ("value of -mfixed-range must have form REG1-REG2");
4250	  return;
4251	}
4252      *dash = '\0';
4253
4254      comma = strchr (dash + 1, ',');
4255      if (comma)
4256	*comma = '\0';
4257
4258      first = decode_reg_name (str);
4259      if (first < 0)
4260	{
4261	  warning ("unknown register name: %s", str);
4262	  return;
4263	}
4264
4265      last = decode_reg_name (dash + 1);
4266      if (last < 0)
4267	{
4268	  warning ("unknown register name: %s", dash + 1);
4269	  return;
4270	}
4271
4272      *dash = '-';
4273
4274      if (first > last)
4275	{
4276	  warning ("%s-%s is an empty range", str, dash + 1);
4277	  return;
4278	}
4279
4280      for (i = first; i <= last; ++i)
4281	fixed_regs[i] = call_used_regs[i] = 1;
4282
4283      if (!comma)
4284	break;
4285
4286      *comma = ',';
4287      str = comma + 1;
4288    }
4289}
4290
4291static struct machine_function *
4292ia64_init_machine_status ()
4293{
4294  return ggc_alloc_cleared (sizeof (struct machine_function));
4295}
4296
4297/* Handle TARGET_OPTIONS switches.  */
4298
4299void
4300ia64_override_options ()
4301{
4302  if (TARGET_AUTO_PIC)
4303    target_flags |= MASK_CONST_GP;
4304
4305  if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4306    {
4307      warning ("cannot optimize floating point division for both latency and throughput");
4308      target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4309    }
4310
4311  if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4312    {
4313      warning ("cannot optimize integer division for both latency and throughput");
4314      target_flags &= ~MASK_INLINE_INT_DIV_THR;
4315    }
4316
4317  if (ia64_fixed_range_string)
4318    fix_range (ia64_fixed_range_string);
4319
4320  if (ia64_tls_size_string)
4321    {
4322      char *end;
4323      unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4324      if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4325	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4326      else
4327	ia64_tls_size = tmp;
4328    }
4329
4330  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4331  flag_schedule_insns_after_reload = 0;
4332
4333  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4334
4335  init_machine_status = ia64_init_machine_status;
4336
4337  /* Tell the compiler which flavor of TFmode we're using.  */
4338  if (INTEL_EXTENDED_IEEE_FORMAT)
4339    real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
4340}
4341
4342static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx));
4343static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx));
4344static enum attr_type ia64_safe_type PARAMS((rtx));
4345
4346static enum attr_itanium_requires_unit0
4347ia64_safe_itanium_requires_unit0 (insn)
4348     rtx insn;
4349{
4350  if (recog_memoized (insn) >= 0)
4351    return get_attr_itanium_requires_unit0 (insn);
4352  else
4353    return ITANIUM_REQUIRES_UNIT0_NO;
4354}
4355
4356static enum attr_itanium_class
4357ia64_safe_itanium_class (insn)
4358     rtx insn;
4359{
4360  if (recog_memoized (insn) >= 0)
4361    return get_attr_itanium_class (insn);
4362  else
4363    return ITANIUM_CLASS_UNKNOWN;
4364}
4365
4366static enum attr_type
4367ia64_safe_type (insn)
4368     rtx insn;
4369{
4370  if (recog_memoized (insn) >= 0)
4371    return get_attr_type (insn);
4372  else
4373    return TYPE_UNKNOWN;
4374}
4375
4376/* The following collection of routines emit instruction group stop bits as
4377   necessary to avoid dependencies.  */
4378
4379/* Need to track some additional registers as far as serialization is
4380   concerned so we can properly handle br.call and br.ret.  We could
4381   make these registers visible to gcc, but since these registers are
4382   never explicitly used in gcc generated code, it seems wasteful to
4383   do so (plus it would make the call and return patterns needlessly
4384   complex).  */
4385#define REG_GP		(GR_REG (1))
4386#define REG_RP		(BR_REG (0))
4387#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4388/* This is used for volatile asms which may require a stop bit immediately
4389   before and after them.  */
4390#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4391#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4392#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4393
4394/* For each register, we keep track of how it has been written in the
4395   current instruction group.
4396
4397   If a register is written unconditionally (no qualifying predicate),
4398   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4399
4400   If a register is written if its qualifying predicate P is true, we
4401   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4402   may be written again by the complement of P (P^1) and when this happens,
4403   WRITE_COUNT gets set to 2.
4404
4405   The result of this is that whenever an insn attempts to write a register
4406   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4407
4408   If a predicate register is written by a floating-point insn, we set
4409   WRITTEN_BY_FP to true.
4410
4411   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4412   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4413
4414struct reg_write_state
4415{
4416  unsigned int write_count : 2;
4417  unsigned int first_pred : 16;
4418  unsigned int written_by_fp : 1;
4419  unsigned int written_by_and : 1;
4420  unsigned int written_by_or : 1;
4421};
4422
4423/* Cumulative info for the current instruction group.  */
4424struct reg_write_state rws_sum[NUM_REGS];
4425/* Info for the current instruction.  This gets copied to rws_sum after a
4426   stop bit is emitted.  */
4427struct reg_write_state rws_insn[NUM_REGS];
4428
4429/* Indicates whether this is the first instruction after a stop bit,
4430   in which case we don't need another stop bit.  Without this, we hit
4431   the abort in ia64_variable_issue when scheduling an alloc.  */
4432static int first_instruction;
4433
4434/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4435   RTL for one instruction.  */
4436struct reg_flags
4437{
4438  unsigned int is_write : 1;	/* Is register being written?  */
4439  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4440  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4441  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4442  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4443  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4444};
4445
4446static void rws_update PARAMS ((struct reg_write_state *, int,
4447				struct reg_flags, int));
4448static int rws_access_regno PARAMS ((int, struct reg_flags, int));
4449static int rws_access_reg PARAMS ((rtx, struct reg_flags, int));
4450static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *));
4451static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx));
4452static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int));
4453static void init_insn_group_barriers PARAMS ((void));
4454static int group_barrier_needed_p PARAMS ((rtx));
4455static int safe_group_barrier_needed_p PARAMS ((rtx));
4456
4457/* Update *RWS for REGNO, which is being written by the current instruction,
4458   with predicate PRED, and associated register flags in FLAGS.  */
4459
4460static void
4461rws_update (rws, regno, flags, pred)
4462     struct reg_write_state *rws;
4463     int regno;
4464     struct reg_flags flags;
4465     int pred;
4466{
4467  if (pred)
4468    rws[regno].write_count++;
4469  else
4470    rws[regno].write_count = 2;
4471  rws[regno].written_by_fp |= flags.is_fp;
4472  /* ??? Not tracking and/or across differing predicates.  */
4473  rws[regno].written_by_and = flags.is_and;
4474  rws[regno].written_by_or = flags.is_or;
4475  rws[regno].first_pred = pred;
4476}
4477
4478/* Handle an access to register REGNO of type FLAGS using predicate register
4479   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4480   a dependency with an earlier instruction in the same group.  */
4481
4482static int
4483rws_access_regno (regno, flags, pred)
4484     int regno;
4485     struct reg_flags flags;
4486     int pred;
4487{
4488  int need_barrier = 0;
4489
4490  if (regno >= NUM_REGS)
4491    abort ();
4492
4493  if (! PR_REGNO_P (regno))
4494    flags.is_and = flags.is_or = 0;
4495
4496  if (flags.is_write)
4497    {
4498      int write_count;
4499
4500      /* One insn writes same reg multiple times?  */
4501      if (rws_insn[regno].write_count > 0)
4502	abort ();
4503
4504      /* Update info for current instruction.  */
4505      rws_update (rws_insn, regno, flags, pred);
4506      write_count = rws_sum[regno].write_count;
4507
4508      switch (write_count)
4509	{
4510	case 0:
4511	  /* The register has not been written yet.  */
4512	  rws_update (rws_sum, regno, flags, pred);
4513	  break;
4514
4515	case 1:
4516	  /* The register has been written via a predicate.  If this is
4517	     not a complementary predicate, then we need a barrier.  */
4518	  /* ??? This assumes that P and P+1 are always complementary
4519	     predicates for P even.  */
4520	  if (flags.is_and && rws_sum[regno].written_by_and)
4521	    ;
4522	  else if (flags.is_or && rws_sum[regno].written_by_or)
4523	    ;
4524	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4525	    need_barrier = 1;
4526	  rws_update (rws_sum, regno, flags, pred);
4527	  break;
4528
4529	case 2:
4530	  /* The register has been unconditionally written already.  We
4531	     need a barrier.  */
4532	  if (flags.is_and && rws_sum[regno].written_by_and)
4533	    ;
4534	  else if (flags.is_or && rws_sum[regno].written_by_or)
4535	    ;
4536	  else
4537	    need_barrier = 1;
4538	  rws_sum[regno].written_by_and = flags.is_and;
4539	  rws_sum[regno].written_by_or = flags.is_or;
4540	  break;
4541
4542	default:
4543	  abort ();
4544	}
4545    }
4546  else
4547    {
4548      if (flags.is_branch)
4549	{
4550	  /* Branches have several RAW exceptions that allow to avoid
4551	     barriers.  */
4552
4553	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4554	    /* RAW dependencies on branch regs are permissible as long
4555	       as the writer is a non-branch instruction.  Since we
4556	       never generate code that uses a branch register written
4557	       by a branch instruction, handling this case is
4558	       easy.  */
4559	    return 0;
4560
4561	  if (REGNO_REG_CLASS (regno) == PR_REGS
4562	      && ! rws_sum[regno].written_by_fp)
4563	    /* The predicates of a branch are available within the
4564	       same insn group as long as the predicate was written by
4565	       something other than a floating-point instruction.  */
4566	    return 0;
4567	}
4568
4569      if (flags.is_and && rws_sum[regno].written_by_and)
4570	return 0;
4571      if (flags.is_or && rws_sum[regno].written_by_or)
4572	return 0;
4573
4574      switch (rws_sum[regno].write_count)
4575	{
4576	case 0:
4577	  /* The register has not been written yet.  */
4578	  break;
4579
4580	case 1:
4581	  /* The register has been written via a predicate.  If this is
4582	     not a complementary predicate, then we need a barrier.  */
4583	  /* ??? This assumes that P and P+1 are always complementary
4584	     predicates for P even.  */
4585	  if ((rws_sum[regno].first_pred ^ 1) != pred)
4586	    need_barrier = 1;
4587	  break;
4588
4589	case 2:
4590	  /* The register has been unconditionally written already.  We
4591	     need a barrier.  */
4592	  need_barrier = 1;
4593	  break;
4594
4595	default:
4596	  abort ();
4597	}
4598    }
4599
4600  return need_barrier;
4601}
4602
4603static int
4604rws_access_reg (reg, flags, pred)
4605     rtx reg;
4606     struct reg_flags flags;
4607     int pred;
4608{
4609  int regno = REGNO (reg);
4610  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
4611
4612  if (n == 1)
4613    return rws_access_regno (regno, flags, pred);
4614  else
4615    {
4616      int need_barrier = 0;
4617      while (--n >= 0)
4618	need_barrier |= rws_access_regno (regno + n, flags, pred);
4619      return need_barrier;
4620    }
4621}
4622
4623/* Examine X, which is a SET rtx, and update the flags, the predicate, and
4624   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
4625
4626static void
4627update_set_flags (x, pflags, ppred, pcond)
4628     rtx x;
4629     struct reg_flags *pflags;
4630     int *ppred;
4631     rtx *pcond;
4632{
4633  rtx src = SET_SRC (x);
4634
4635  *pcond = 0;
4636
4637  switch (GET_CODE (src))
4638    {
4639    case CALL:
4640      return;
4641
4642    case IF_THEN_ELSE:
4643      if (SET_DEST (x) == pc_rtx)
4644	/* X is a conditional branch.  */
4645	return;
4646      else
4647	{
4648	  int is_complemented = 0;
4649
4650	  /* X is a conditional move.  */
4651	  rtx cond = XEXP (src, 0);
4652	  if (GET_CODE (cond) == EQ)
4653	    is_complemented = 1;
4654	  cond = XEXP (cond, 0);
4655	  if (GET_CODE (cond) != REG
4656	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4657	    abort ();
4658	  *pcond = cond;
4659	  if (XEXP (src, 1) == SET_DEST (x)
4660	      || XEXP (src, 2) == SET_DEST (x))
4661	    {
4662	      /* X is a conditional move that conditionally writes the
4663		 destination.  */
4664
4665	      /* We need another complement in this case.  */
4666	      if (XEXP (src, 1) == SET_DEST (x))
4667		is_complemented = ! is_complemented;
4668
4669	      *ppred = REGNO (cond);
4670	      if (is_complemented)
4671		++*ppred;
4672	    }
4673
4674	  /* ??? If this is a conditional write to the dest, then this
4675	     instruction does not actually read one source.  This probably
4676	     doesn't matter, because that source is also the dest.  */
4677	  /* ??? Multiple writes to predicate registers are allowed
4678	     if they are all AND type compares, or if they are all OR
4679	     type compares.  We do not generate such instructions
4680	     currently.  */
4681	}
4682      /* ... fall through ...  */
4683
4684    default:
4685      if (GET_RTX_CLASS (GET_CODE (src)) == '<'
4686	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
4687	/* Set pflags->is_fp to 1 so that we know we're dealing
4688	   with a floating point comparison when processing the
4689	   destination of the SET.  */
4690	pflags->is_fp = 1;
4691
4692      /* Discover if this is a parallel comparison.  We only handle
4693	 and.orcm and or.andcm at present, since we must retain a
4694	 strict inverse on the predicate pair.  */
4695      else if (GET_CODE (src) == AND)
4696	pflags->is_and = 1;
4697      else if (GET_CODE (src) == IOR)
4698	pflags->is_or = 1;
4699
4700      break;
4701    }
4702}
4703
4704/* Subroutine of rtx_needs_barrier; this function determines whether the
4705   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
4706   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
4707   for this insn.  */
4708
4709static int
4710set_src_needs_barrier (x, flags, pred, cond)
4711     rtx x;
4712     struct reg_flags flags;
4713     int pred;
4714     rtx cond;
4715{
4716  int need_barrier = 0;
4717  rtx dst;
4718  rtx src = SET_SRC (x);
4719
4720  if (GET_CODE (src) == CALL)
4721    /* We don't need to worry about the result registers that
4722       get written by subroutine call.  */
4723    return rtx_needs_barrier (src, flags, pred);
4724  else if (SET_DEST (x) == pc_rtx)
4725    {
4726      /* X is a conditional branch.  */
4727      /* ??? This seems redundant, as the caller sets this bit for
4728	 all JUMP_INSNs.  */
4729      flags.is_branch = 1;
4730      return rtx_needs_barrier (src, flags, pred);
4731    }
4732
4733  need_barrier = rtx_needs_barrier (src, flags, pred);
4734
4735  /* This instruction unconditionally uses a predicate register.  */
4736  if (cond)
4737    need_barrier |= rws_access_reg (cond, flags, 0);
4738
4739  dst = SET_DEST (x);
4740  if (GET_CODE (dst) == ZERO_EXTRACT)
4741    {
4742      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
4743      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
4744      dst = XEXP (dst, 0);
4745    }
4746  return need_barrier;
4747}
4748
4749/* Handle an access to rtx X of type FLAGS using predicate register PRED.
4750   Return 1 is this access creates a dependency with an earlier instruction
4751   in the same group.  */
4752
4753static int
4754rtx_needs_barrier (x, flags, pred)
4755     rtx x;
4756     struct reg_flags flags;
4757     int pred;
4758{
4759  int i, j;
4760  int is_complemented = 0;
4761  int need_barrier = 0;
4762  const char *format_ptr;
4763  struct reg_flags new_flags;
4764  rtx cond = 0;
4765
4766  if (! x)
4767    return 0;
4768
4769  new_flags = flags;
4770
4771  switch (GET_CODE (x))
4772    {
4773    case SET:
4774      update_set_flags (x, &new_flags, &pred, &cond);
4775      need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
4776      if (GET_CODE (SET_SRC (x)) != CALL)
4777	{
4778	  new_flags.is_write = 1;
4779	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
4780	}
4781      break;
4782
4783    case CALL:
4784      new_flags.is_write = 0;
4785      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
4786
4787      /* Avoid multiple register writes, in case this is a pattern with
4788	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
4789      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
4790	{
4791	  new_flags.is_write = 1;
4792	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
4793	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
4794	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
4795	}
4796      break;
4797
4798    case COND_EXEC:
4799      /* X is a predicated instruction.  */
4800
4801      cond = COND_EXEC_TEST (x);
4802      if (pred)
4803	abort ();
4804      need_barrier = rtx_needs_barrier (cond, flags, 0);
4805
4806      if (GET_CODE (cond) == EQ)
4807	is_complemented = 1;
4808      cond = XEXP (cond, 0);
4809      if (GET_CODE (cond) != REG
4810	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
4811	abort ();
4812      pred = REGNO (cond);
4813      if (is_complemented)
4814	++pred;
4815
4816      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
4817      return need_barrier;
4818
4819    case CLOBBER:
4820    case USE:
4821      /* Clobber & use are for earlier compiler-phases only.  */
4822      break;
4823
4824    case ASM_OPERANDS:
4825    case ASM_INPUT:
4826      /* We always emit stop bits for traditional asms.  We emit stop bits
4827	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
4828      if (GET_CODE (x) != ASM_OPERANDS
4829	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
4830	{
4831	  /* Avoid writing the register multiple times if we have multiple
4832	     asm outputs.  This avoids an abort in rws_access_reg.  */
4833	  if (! rws_insn[REG_VOLATILE].write_count)
4834	    {
4835	      new_flags.is_write = 1;
4836	      rws_access_regno (REG_VOLATILE, new_flags, pred);
4837	    }
4838	  return 1;
4839	}
4840
4841      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
4842	 We can not just fall through here since then we would be confused
4843	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
4844	 traditional asms unlike their normal usage.  */
4845
4846      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
4847	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
4848	  need_barrier = 1;
4849      break;
4850
4851    case PARALLEL:
4852      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4853	{
4854	  rtx pat = XVECEXP (x, 0, i);
4855	  if (GET_CODE (pat) == SET)
4856	    {
4857	      update_set_flags (pat, &new_flags, &pred, &cond);
4858	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
4859	    }
4860	  else if (GET_CODE (pat) == USE
4861		   || GET_CODE (pat) == CALL
4862		   || GET_CODE (pat) == ASM_OPERANDS)
4863	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4864	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
4865	    abort ();
4866	}
4867      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
4868	{
4869	  rtx pat = XVECEXP (x, 0, i);
4870	  if (GET_CODE (pat) == SET)
4871	    {
4872	      if (GET_CODE (SET_SRC (pat)) != CALL)
4873		{
4874		  new_flags.is_write = 1;
4875		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
4876						     pred);
4877		}
4878	    }
4879	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
4880	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
4881	}
4882      break;
4883
4884    case SUBREG:
4885      x = SUBREG_REG (x);
4886      /* FALLTHRU */
4887    case REG:
4888      if (REGNO (x) == AR_UNAT_REGNUM)
4889	{
4890	  for (i = 0; i < 64; ++i)
4891	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
4892	}
4893      else
4894	need_barrier = rws_access_reg (x, flags, pred);
4895      break;
4896
4897    case MEM:
4898      /* Find the regs used in memory address computation.  */
4899      new_flags.is_write = 0;
4900      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4901      break;
4902
4903    case CONST_INT:   case CONST_DOUBLE:
4904    case SYMBOL_REF:  case LABEL_REF:     case CONST:
4905      break;
4906
4907      /* Operators with side-effects.  */
4908    case POST_INC:    case POST_DEC:
4909      if (GET_CODE (XEXP (x, 0)) != REG)
4910	abort ();
4911
4912      new_flags.is_write = 0;
4913      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4914      new_flags.is_write = 1;
4915      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4916      break;
4917
4918    case POST_MODIFY:
4919      if (GET_CODE (XEXP (x, 0)) != REG)
4920	abort ();
4921
4922      new_flags.is_write = 0;
4923      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
4924      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4925      new_flags.is_write = 1;
4926      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
4927      break;
4928
4929      /* Handle common unary and binary ops for efficiency.  */
4930    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
4931    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
4932    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
4933    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
4934    case NE:       case EQ:      case GE:      case GT:        case LE:
4935    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
4936      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
4937      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
4938      break;
4939
4940    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
4941    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
4942    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
4943    case SQRT:     case FFS:
4944      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
4945      break;
4946
4947    case UNSPEC:
4948      switch (XINT (x, 1))
4949	{
4950	case UNSPEC_LTOFF_DTPMOD:
4951	case UNSPEC_LTOFF_DTPREL:
4952	case UNSPEC_DTPREL:
4953	case UNSPEC_LTOFF_TPREL:
4954	case UNSPEC_TPREL:
4955	case UNSPEC_PRED_REL_MUTEX:
4956	case UNSPEC_PIC_CALL:
4957        case UNSPEC_MF:
4958        case UNSPEC_FETCHADD_ACQ:
4959	case UNSPEC_BSP_VALUE:
4960	case UNSPEC_FLUSHRS:
4961	case UNSPEC_BUNDLE_SELECTOR:
4962          break;
4963
4964	case UNSPEC_GR_SPILL:
4965	case UNSPEC_GR_RESTORE:
4966	  {
4967	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
4968	    HOST_WIDE_INT bit = (offset >> 3) & 63;
4969
4970	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4971	    new_flags.is_write = (XINT (x, 1) == 1);
4972	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
4973					      new_flags, pred);
4974	    break;
4975	  }
4976
4977	case UNSPEC_FR_SPILL:
4978	case UNSPEC_FR_RESTORE:
4979	case UNSPEC_POPCNT:
4980	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4981	  break;
4982
4983        case UNSPEC_ADDP4:
4984	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4985	  break;
4986
4987	case UNSPEC_FR_RECIP_APPROX:
4988	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
4989	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4990	  break;
4991
4992        case UNSPEC_CMPXCHG_ACQ:
4993	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
4994	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
4995	  break;
4996
4997	default:
4998	  abort ();
4999	}
5000      break;
5001
5002    case UNSPEC_VOLATILE:
5003      switch (XINT (x, 1))
5004	{
5005	case UNSPECV_ALLOC:
5006	  /* Alloc must always be the first instruction of a group.
5007	     We force this by always returning true.  */
5008	  /* ??? We might get better scheduling if we explicitly check for
5009	     input/local/output register dependencies, and modify the
5010	     scheduler so that alloc is always reordered to the start of
5011	     the current group.  We could then eliminate all of the
5012	     first_instruction code.  */
5013	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5014
5015	  new_flags.is_write = 1;
5016	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5017	  return 1;
5018
5019	case UNSPECV_SET_BSP:
5020	  need_barrier = 1;
5021          break;
5022
5023	case UNSPECV_BLOCKAGE:
5024	case UNSPECV_INSN_GROUP_BARRIER:
5025	case UNSPECV_BREAK:
5026	case UNSPECV_PSAC_ALL:
5027	case UNSPECV_PSAC_NORMAL:
5028	  return 0;
5029
5030	default:
5031	  abort ();
5032	}
5033      break;
5034
5035    case RETURN:
5036      new_flags.is_write = 0;
5037      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5038      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5039
5040      new_flags.is_write = 1;
5041      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5042      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5043      break;
5044
5045    default:
5046      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5047      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5048	switch (format_ptr[i])
5049	  {
5050	  case '0':	/* unused field */
5051	  case 'i':	/* integer */
5052	  case 'n':	/* note */
5053	  case 'w':	/* wide integer */
5054	  case 's':	/* pointer to string */
5055	  case 'S':	/* optional pointer to string */
5056	    break;
5057
5058	  case 'e':
5059	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5060	      need_barrier = 1;
5061	    break;
5062
5063	  case 'E':
5064	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5065	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5066		need_barrier = 1;
5067	    break;
5068
5069	  default:
5070	    abort ();
5071	  }
5072      break;
5073    }
5074  return need_barrier;
5075}
5076
5077/* Clear out the state for group_barrier_needed_p at the start of a
5078   sequence of insns.  */
5079
5080static void
5081init_insn_group_barriers ()
5082{
5083  memset (rws_sum, 0, sizeof (rws_sum));
5084  first_instruction = 1;
5085}
5086
5087/* Given the current state, recorded by previous calls to this function,
5088   determine whether a group barrier (a stop bit) is necessary before INSN.
5089   Return nonzero if so.  */
5090
5091static int
5092group_barrier_needed_p (insn)
5093     rtx insn;
5094{
5095  rtx pat;
5096  int need_barrier = 0;
5097  struct reg_flags flags;
5098
5099  memset (&flags, 0, sizeof (flags));
5100  switch (GET_CODE (insn))
5101    {
5102    case NOTE:
5103      break;
5104
5105    case BARRIER:
5106      /* A barrier doesn't imply an instruction group boundary.  */
5107      break;
5108
5109    case CODE_LABEL:
5110      memset (rws_insn, 0, sizeof (rws_insn));
5111      return 1;
5112
5113    case CALL_INSN:
5114      flags.is_branch = 1;
5115      flags.is_sibcall = SIBLING_CALL_P (insn);
5116      memset (rws_insn, 0, sizeof (rws_insn));
5117
5118      /* Don't bundle a call following another call.  */
5119      if ((pat = prev_active_insn (insn))
5120	  && GET_CODE (pat) == CALL_INSN)
5121	{
5122	  need_barrier = 1;
5123	  break;
5124	}
5125
5126      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5127      break;
5128
5129    case JUMP_INSN:
5130      flags.is_branch = 1;
5131
5132      /* Don't bundle a jump following a call.  */
5133      if ((pat = prev_active_insn (insn))
5134	  && GET_CODE (pat) == CALL_INSN)
5135	{
5136	  need_barrier = 1;
5137	  break;
5138	}
5139      /* FALLTHRU */
5140
5141    case INSN:
5142      if (GET_CODE (PATTERN (insn)) == USE
5143	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5144	/* Don't care about USE and CLOBBER "insns"---those are used to
5145	   indicate to the optimizer that it shouldn't get rid of
5146	   certain operations.  */
5147	break;
5148
5149      pat = PATTERN (insn);
5150
5151      /* Ug.  Hack hacks hacked elsewhere.  */
5152      switch (recog_memoized (insn))
5153	{
5154	  /* We play dependency tricks with the epilogue in order
5155	     to get proper schedules.  Undo this for dv analysis.  */
5156	case CODE_FOR_epilogue_deallocate_stack:
5157	case CODE_FOR_prologue_allocate_stack:
5158	  pat = XVECEXP (pat, 0, 0);
5159	  break;
5160
5161	  /* The pattern we use for br.cloop confuses the code above.
5162	     The second element of the vector is representative.  */
5163	case CODE_FOR_doloop_end_internal:
5164	  pat = XVECEXP (pat, 0, 1);
5165	  break;
5166
5167	  /* Doesn't generate code.  */
5168	case CODE_FOR_pred_rel_mutex:
5169	case CODE_FOR_prologue_use:
5170	  return 0;
5171
5172	default:
5173	  break;
5174	}
5175
5176      memset (rws_insn, 0, sizeof (rws_insn));
5177      need_barrier = rtx_needs_barrier (pat, flags, 0);
5178
5179      /* Check to see if the previous instruction was a volatile
5180	 asm.  */
5181      if (! need_barrier)
5182	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5183      break;
5184
5185    default:
5186      abort ();
5187    }
5188
5189  if (first_instruction)
5190    {
5191      need_barrier = 0;
5192      first_instruction = 0;
5193    }
5194
5195  return need_barrier;
5196}
5197
5198/* Like group_barrier_needed_p, but do not clobber the current state.  */
5199
5200static int
5201safe_group_barrier_needed_p (insn)
5202     rtx insn;
5203{
5204  struct reg_write_state rws_saved[NUM_REGS];
5205  int saved_first_instruction;
5206  int t;
5207
5208  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5209  saved_first_instruction = first_instruction;
5210
5211  t = group_barrier_needed_p (insn);
5212
5213  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5214  first_instruction = saved_first_instruction;
5215
5216  return t;
5217}
5218
5219/* INSNS is an chain of instructions.  Scan the chain, and insert stop bits
5220   as necessary to eliminate dependendencies.  This function assumes that
5221   a final instruction scheduling pass has been run which has already
5222   inserted most of the necessary stop bits.  This function only inserts
5223   new ones at basic block boundaries, since these are invisible to the
5224   scheduler.  */
5225
5226static void
5227emit_insn_group_barriers (dump, insns)
5228     FILE *dump;
5229     rtx insns;
5230{
5231  rtx insn;
5232  rtx last_label = 0;
5233  int insns_since_last_label = 0;
5234
5235  init_insn_group_barriers ();
5236
5237  for (insn = insns; insn; insn = NEXT_INSN (insn))
5238    {
5239      if (GET_CODE (insn) == CODE_LABEL)
5240	{
5241	  if (insns_since_last_label)
5242	    last_label = insn;
5243	  insns_since_last_label = 0;
5244	}
5245      else if (GET_CODE (insn) == NOTE
5246	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5247	{
5248	  if (insns_since_last_label)
5249	    last_label = insn;
5250	  insns_since_last_label = 0;
5251	}
5252      else if (GET_CODE (insn) == INSN
5253	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5254	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5255	{
5256	  init_insn_group_barriers ();
5257	  last_label = 0;
5258	}
5259      else if (INSN_P (insn))
5260	{
5261	  insns_since_last_label = 1;
5262
5263	  if (group_barrier_needed_p (insn))
5264	    {
5265	      if (last_label)
5266		{
5267		  if (dump)
5268		    fprintf (dump, "Emitting stop before label %d\n",
5269			     INSN_UID (last_label));
5270		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5271		  insn = last_label;
5272
5273		  init_insn_group_barriers ();
5274		  last_label = 0;
5275		}
5276	    }
5277	}
5278    }
5279}
5280
5281/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5282   This function has to emit all necessary group barriers.  */
5283
5284static void
5285emit_all_insn_group_barriers (dump, insns)
5286     FILE *dump ATTRIBUTE_UNUSED;
5287     rtx insns;
5288{
5289  rtx insn;
5290
5291  init_insn_group_barriers ();
5292
5293  for (insn = insns; insn; insn = NEXT_INSN (insn))
5294    {
5295      if (GET_CODE (insn) == BARRIER)
5296	{
5297	  rtx last = prev_active_insn (insn);
5298
5299	  if (! last)
5300	    continue;
5301	  if (GET_CODE (last) == JUMP_INSN
5302	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5303	    last = prev_active_insn (last);
5304	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5305	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5306
5307	  init_insn_group_barriers ();
5308	}
5309      else if (INSN_P (insn))
5310	{
5311	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5312	    init_insn_group_barriers ();
5313	  else if (group_barrier_needed_p (insn))
5314	    {
5315	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5316	      init_insn_group_barriers ();
5317	      group_barrier_needed_p (insn);
5318	    }
5319	}
5320    }
5321}
5322
5323static int errata_find_address_regs PARAMS ((rtx *, void *));
5324static void errata_emit_nops PARAMS ((rtx));
5325static void fixup_errata PARAMS ((void));
5326
5327/* This structure is used to track some details about the previous insns
5328   groups so we can determine if it may be necessary to insert NOPs to
5329   workaround hardware errata.  */
5330static struct group
5331{
5332  HARD_REG_SET p_reg_set;
5333  HARD_REG_SET gr_reg_conditionally_set;
5334} last_group[2];
5335
5336/* Index into the last_group array.  */
5337static int group_idx;
5338
5339/* Called through for_each_rtx; determines if a hard register that was
5340   conditionally set in the previous group is used as an address register.
5341   It ensures that for_each_rtx returns 1 in that case.  */
5342static int
5343errata_find_address_regs (xp, data)
5344     rtx *xp;
5345     void *data ATTRIBUTE_UNUSED;
5346{
5347  rtx x = *xp;
5348  if (GET_CODE (x) != MEM)
5349    return 0;
5350  x = XEXP (x, 0);
5351  if (GET_CODE (x) == POST_MODIFY)
5352    x = XEXP (x, 0);
5353  if (GET_CODE (x) == REG)
5354    {
5355      struct group *prev_group = last_group + (group_idx ^ 1);
5356      if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5357			     REGNO (x)))
5358	return 1;
5359      return -1;
5360    }
5361  return 0;
5362}
5363
5364/* Called for each insn; this function keeps track of the state in
5365   last_group and emits additional NOPs if necessary to work around
5366   an Itanium A/B step erratum.  */
5367static void
5368errata_emit_nops (insn)
5369     rtx insn;
5370{
5371  struct group *this_group = last_group + group_idx;
5372  struct group *prev_group = last_group + (group_idx ^ 1);
5373  rtx pat = PATTERN (insn);
5374  rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5375  rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5376  enum attr_type type;
5377  rtx set = real_pat;
5378
5379  if (GET_CODE (real_pat) == USE
5380      || GET_CODE (real_pat) == CLOBBER
5381      || GET_CODE (real_pat) == ASM_INPUT
5382      || GET_CODE (real_pat) == ADDR_VEC
5383      || GET_CODE (real_pat) == ADDR_DIFF_VEC
5384      || asm_noperands (PATTERN (insn)) >= 0)
5385    return;
5386
5387  /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5388     parts of it.  */
5389
5390  if (GET_CODE (set) == PARALLEL)
5391    {
5392      int i;
5393      set = XVECEXP (real_pat, 0, 0);
5394      for (i = 1; i < XVECLEN (real_pat, 0); i++)
5395	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5396	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5397	  {
5398	    set = 0;
5399	    break;
5400	  }
5401    }
5402
5403  if (set && GET_CODE (set) != SET)
5404    set = 0;
5405
5406  type  = get_attr_type (insn);
5407
5408  if (type == TYPE_F
5409      && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5410    SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5411
5412  if ((type == TYPE_M || type == TYPE_A) && cond && set
5413      && REG_P (SET_DEST (set))
5414      && GET_CODE (SET_SRC (set)) != PLUS
5415      && GET_CODE (SET_SRC (set)) != MINUS
5416      && (GET_CODE (SET_SRC (set)) != ASHIFT
5417	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5418      && (GET_CODE (SET_SRC (set)) != MEM
5419	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5420      && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5421    {
5422      if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5423	  || ! REG_P (XEXP (cond, 0)))
5424	abort ();
5425
5426      if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5427	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5428    }
5429  if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5430    {
5431      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5432      emit_insn_before (gen_nop (), insn);
5433      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5434      group_idx = 0;
5435      memset (last_group, 0, sizeof last_group);
5436    }
5437}
5438
5439/* Emit extra nops if they are required to work around hardware errata.  */
5440
5441static void
5442fixup_errata ()
5443{
5444  rtx insn;
5445
5446  if (! TARGET_B_STEP)
5447    return;
5448
5449  group_idx = 0;
5450  memset (last_group, 0, sizeof last_group);
5451
5452  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5453    {
5454      if (!INSN_P (insn))
5455	continue;
5456
5457      if (ia64_safe_type (insn) == TYPE_S)
5458	{
5459	  group_idx ^= 1;
5460	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5461	}
5462      else
5463	errata_emit_nops (insn);
5464    }
5465}
5466
5467/* Instruction scheduling support.  */
5468/* Describe one bundle.  */
5469
5470struct bundle
5471{
5472  /* Zero if there's no possibility of a stop in this bundle other than
5473     at the end, otherwise the position of the optional stop bit.  */
5474  int possible_stop;
5475  /* The types of the three slots.  */
5476  enum attr_type t[3];
5477  /* The pseudo op to be emitted into the assembler output.  */
5478  const char *name;
5479};
5480
5481#define NR_BUNDLES 10
5482
5483/* A list of all available bundles.  */
5484
5485static const struct bundle bundle[NR_BUNDLES] =
5486{
5487  { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" },
5488  { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" },
5489  { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" },
5490  { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" },
5491#if NR_BUNDLES == 10
5492  { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" },
5493  { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" },
5494#endif
5495  { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" },
5496  { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" },
5497  { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" },
5498  /* .mfi needs to occur earlier than .mlx, so that we only generate it if
5499     it matches an L type insn.  Otherwise we'll try to generate L type
5500     nops.  */
5501  { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" }
5502};
5503
5504/* Describe a packet of instructions.  Packets consist of two bundles that
5505   are visible to the hardware in one scheduling window.  */
5506
5507struct ia64_packet
5508{
5509  const struct bundle *t1, *t2;
5510  /* Precomputed value of the first split issue in this packet if a cycle
5511     starts at its beginning.  */
5512  int first_split;
5513  /* For convenience, the insn types are replicated here so we don't have
5514     to go through T1 and T2 all the time.  */
5515  enum attr_type t[6];
5516};
5517
5518/* An array containing all possible packets.  */
5519#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
5520static struct ia64_packet packets[NR_PACKETS];
5521
5522/* Map attr_type to a string with the name.  */
5523
5524static const char *const type_names[] =
5525{
5526  "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S"
5527};
5528
5529/* Nonzero if we should insert stop bits into the schedule.  */
5530int ia64_final_schedule = 0;
5531
5532static int itanium_split_issue PARAMS ((const struct ia64_packet *, int));
5533static rtx ia64_single_set PARAMS ((rtx));
5534static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx));
5535static void ia64_emit_insn_before PARAMS ((rtx, rtx));
5536static void maybe_rotate PARAMS ((FILE *));
5537static void finish_last_head PARAMS ((FILE *, int));
5538static void rotate_one_bundle PARAMS ((FILE *));
5539static void rotate_two_bundles PARAMS ((FILE *));
5540static void nop_cycles_until PARAMS ((int, FILE *));
5541static void cycle_end_fill_slots PARAMS ((FILE *));
5542static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
5543static int get_split PARAMS ((const struct ia64_packet *, int));
5544static int find_best_insn PARAMS ((rtx *, enum attr_type *, int,
5545				   const struct ia64_packet *, int));
5546static void find_best_packet PARAMS ((int *, const struct ia64_packet **,
5547				      rtx *, enum attr_type *, int));
5548static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int));
5549static void dump_current_packet PARAMS ((FILE *));
5550static void schedule_stop PARAMS ((FILE *));
5551static rtx gen_nop_type PARAMS ((enum attr_type));
5552static void ia64_emit_nops PARAMS ((void));
5553
5554/* Map a bundle number to its pseudo-op.  */
5555
5556const char *
5557get_bundle_name (b)
5558     int b;
5559{
5560  return bundle[b].name;
5561}
5562
5563/* Compute the slot which will cause a split issue in packet P if the
5564   current cycle begins at slot BEGIN.  */
5565
5566static int
5567itanium_split_issue (p, begin)
5568     const struct ia64_packet *p;
5569     int begin;
5570{
5571  int type_count[TYPE_S];
5572  int i;
5573  int split = 6;
5574
5575  if (begin < 3)
5576    {
5577      /* Always split before and after MMF.  */
5578      if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F)
5579	return 3;
5580      if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F)
5581	return 3;
5582      /* Always split after MBB and BBB.  */
5583      if (p->t[1] == TYPE_B)
5584	return 3;
5585      /* Split after first bundle in MIB BBB combination.  */
5586      if (p->t[2] == TYPE_B && p->t[3] == TYPE_B)
5587	return 3;
5588    }
5589
5590  memset (type_count, 0, sizeof type_count);
5591  for (i = begin; i < split; i++)
5592    {
5593      enum attr_type t0 = p->t[i];
5594      /* An MLX bundle reserves the same units as an MFI bundle.  */
5595      enum attr_type t = (t0 == TYPE_L ? TYPE_F
5596			  : t0 == TYPE_X ? TYPE_I
5597			  : t0);
5598
5599      /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and
5600	 2 integer per cycle.  */
5601      int max = (t == TYPE_B ? 3 : 2);
5602      if (type_count[t] == max)
5603	return i;
5604
5605      type_count[t]++;
5606    }
5607  return split;
5608}
5609
5610/* Return the maximum number of instructions a cpu can issue.  */
5611
5612static int
5613ia64_issue_rate ()
5614{
5615  return 6;
5616}
5617
5618/* Helper function - like single_set, but look inside COND_EXEC.  */
5619
5620static rtx
5621ia64_single_set (insn)
5622     rtx insn;
5623{
5624  rtx x = PATTERN (insn), ret;
5625  if (GET_CODE (x) == COND_EXEC)
5626    x = COND_EXEC_CODE (x);
5627  if (GET_CODE (x) == SET)
5628    return x;
5629
5630  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5631     Although they are not classical single set, the second set is there just
5632     to protect it from moving past FP-relative stack accesses.  */
5633  switch (recog_memoized (insn))
5634    {
5635    case CODE_FOR_prologue_allocate_stack:
5636    case CODE_FOR_epilogue_deallocate_stack:
5637      ret = XVECEXP (x, 0, 0);
5638      break;
5639
5640    default:
5641      ret = single_set_2 (insn, x);
5642      break;
5643    }
5644
5645  return ret;
5646}
5647
5648/* Adjust the cost of a scheduling dependency.  Return the new cost of
5649   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
5650
5651static int
5652ia64_adjust_cost (insn, link, dep_insn, cost)
5653     rtx insn, link, dep_insn;
5654     int cost;
5655{
5656  enum attr_type dep_type;
5657  enum attr_itanium_class dep_class;
5658  enum attr_itanium_class insn_class;
5659  rtx dep_set, set, src, addr;
5660
5661  if (GET_CODE (PATTERN (insn)) == CLOBBER
5662      || GET_CODE (PATTERN (insn)) == USE
5663      || GET_CODE (PATTERN (dep_insn)) == CLOBBER
5664      || GET_CODE (PATTERN (dep_insn)) == USE
5665      /* @@@ Not accurate for indirect calls.  */
5666      || GET_CODE (insn) == CALL_INSN
5667      || ia64_safe_type (insn) == TYPE_S)
5668    return 0;
5669
5670  if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT
5671      || REG_NOTE_KIND (link) == REG_DEP_ANTI)
5672    return 0;
5673
5674  dep_type = ia64_safe_type (dep_insn);
5675  dep_class = ia64_safe_itanium_class (dep_insn);
5676  insn_class = ia64_safe_itanium_class (insn);
5677
5678  /* Compares that feed a conditional branch can execute in the same
5679     cycle.  */
5680  dep_set = ia64_single_set (dep_insn);
5681  set = ia64_single_set (insn);
5682
5683  if (dep_type != TYPE_F
5684      && dep_set
5685      && GET_CODE (SET_DEST (dep_set)) == REG
5686      && PR_REG (REGNO (SET_DEST (dep_set)))
5687      && GET_CODE (insn) == JUMP_INSN)
5688    return 0;
5689
5690  if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM)
5691    {
5692      /* ??? Can't find any information in the documenation about whether
5693	 a sequence
5694	   st [rx] = ra
5695	   ld rb = [ry]
5696	 splits issue.  Assume it doesn't.  */
5697      return 0;
5698    }
5699
5700  src = set ? SET_SRC (set) : 0;
5701  addr = 0;
5702  if (set)
5703    {
5704      if (GET_CODE (SET_DEST (set)) == MEM)
5705	addr = XEXP (SET_DEST (set), 0);
5706      else if (GET_CODE (SET_DEST (set)) == SUBREG
5707	       && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM)
5708	addr = XEXP (SUBREG_REG (SET_DEST (set)), 0);
5709      else
5710	{
5711	  addr = src;
5712	  if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0)
5713	    addr = XVECEXP (addr, 0, 0);
5714	  while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
5715	    addr = XEXP (addr, 0);
5716
5717	  /* Note that LO_SUM is used for GOT loads.  */
5718	  if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
5719	    addr = XEXP (addr, 0);
5720	  else
5721	    addr = 0;
5722	}
5723    }
5724
5725  if (addr && GET_CODE (addr) == POST_MODIFY)
5726    addr = XEXP (addr, 0);
5727
5728  set = ia64_single_set (dep_insn);
5729
5730  if ((dep_class == ITANIUM_CLASS_IALU
5731       || dep_class == ITANIUM_CLASS_ILOG
5732       || dep_class == ITANIUM_CLASS_LD)
5733      && (insn_class == ITANIUM_CLASS_LD
5734	  || insn_class == ITANIUM_CLASS_ST))
5735    {
5736      if (! addr || ! set)
5737	abort ();
5738      /* This isn't completely correct - an IALU that feeds an address has
5739	 a latency of 1 cycle if it's issued in an M slot, but 2 cycles
5740	 otherwise.  Unfortunately there's no good way to describe this.  */
5741      if (reg_overlap_mentioned_p (SET_DEST (set), addr))
5742	return cost + 1;
5743    }
5744
5745  if ((dep_class == ITANIUM_CLASS_IALU
5746       || dep_class == ITANIUM_CLASS_ILOG
5747       || dep_class == ITANIUM_CLASS_LD)
5748      && (insn_class == ITANIUM_CLASS_MMMUL
5749	  || insn_class == ITANIUM_CLASS_MMSHF
5750	  || insn_class == ITANIUM_CLASS_MMSHFI))
5751    return 3;
5752
5753  if (dep_class == ITANIUM_CLASS_FMAC
5754      && (insn_class == ITANIUM_CLASS_FMISC
5755	  || insn_class == ITANIUM_CLASS_FCVTFX
5756	  || insn_class == ITANIUM_CLASS_XMPY))
5757    return 7;
5758
5759  if ((dep_class == ITANIUM_CLASS_FMAC
5760       || dep_class == ITANIUM_CLASS_FMISC
5761       || dep_class == ITANIUM_CLASS_FCVTFX
5762       || dep_class == ITANIUM_CLASS_XMPY)
5763      && insn_class == ITANIUM_CLASS_STF)
5764    return 8;
5765
5766  /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4,
5767     but HP engineers say any non-MM operation.  */
5768  if ((dep_class == ITANIUM_CLASS_MMMUL
5769       || dep_class == ITANIUM_CLASS_MMSHF
5770       || dep_class == ITANIUM_CLASS_MMSHFI)
5771      && insn_class != ITANIUM_CLASS_MMMUL
5772      && insn_class != ITANIUM_CLASS_MMSHF
5773      && insn_class != ITANIUM_CLASS_MMSHFI)
5774    return 4;
5775
5776  return cost;
5777}
5778
5779/* Describe the current state of the Itanium pipeline.  */
5780static struct
5781{
5782  /* The first slot that is used in the current cycle.  */
5783  int first_slot;
5784  /* The next slot to fill.  */
5785  int cur;
5786  /* The packet we have selected for the current issue window.  */
5787  const struct ia64_packet *packet;
5788  /* The position of the split issue that occurs due to issue width
5789     limitations (6 if there's no split issue).  */
5790  int split;
5791  /* Record data about the insns scheduled so far in the same issue
5792     window.  The elements up to but not including FIRST_SLOT belong
5793     to the previous cycle, the ones starting with FIRST_SLOT belong
5794     to the current cycle.  */
5795  enum attr_type types[6];
5796  rtx insns[6];
5797  int stopbit[6];
5798  /* Nonzero if we decided to schedule a stop bit.  */
5799  int last_was_stop;
5800} sched_data;
5801
5802/* Temporary arrays; they have enough elements to hold all insns that
5803   can be ready at the same time while scheduling of the current block.
5804   SCHED_READY can hold ready insns, SCHED_TYPES their types.  */
5805static rtx *sched_ready;
5806static enum attr_type *sched_types;
5807
5808/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
5809   of packet P.  */
5810
5811static int
5812insn_matches_slot (p, itype, slot, insn)
5813     const struct ia64_packet *p;
5814     enum attr_type itype;
5815     int slot;
5816     rtx insn;
5817{
5818  enum attr_itanium_requires_unit0 u0;
5819  enum attr_type stype = p->t[slot];
5820
5821  if (insn)
5822    {
5823      u0 = ia64_safe_itanium_requires_unit0 (insn);
5824      if (u0 == ITANIUM_REQUIRES_UNIT0_YES)
5825	{
5826	  int i;
5827	  for (i = sched_data.first_slot; i < slot; i++)
5828	    if (p->t[i] == stype
5829		|| (stype == TYPE_F && p->t[i] == TYPE_L)
5830		|| (stype == TYPE_I && p->t[i] == TYPE_X))
5831	      return 0;
5832	}
5833      if (GET_CODE (insn) == CALL_INSN)
5834	{
5835	  /* Reject calls in multiway branch packets.  We want to limit
5836	     the number of multiway branches we generate (since the branch
5837	     predictor is limited), and this seems to work fairly well.
5838	     (If we didn't do this, we'd have to add another test here to
5839	     force calls into the third slot of the bundle.)  */
5840	  if (slot < 3)
5841	    {
5842	      if (p->t[1] == TYPE_B)
5843		return 0;
5844	    }
5845	  else
5846	    {
5847	      if (p->t[4] == TYPE_B)
5848		return 0;
5849	    }
5850	}
5851    }
5852
5853  if (itype == stype)
5854    return 1;
5855  if (itype == TYPE_A)
5856    return stype == TYPE_M || stype == TYPE_I;
5857  return 0;
5858}
5859
5860/* Like emit_insn_before, but skip cycle_display notes.
5861   ??? When cycle display notes are implemented, update this.  */
5862
5863static void
5864ia64_emit_insn_before (insn, before)
5865     rtx insn, before;
5866{
5867  emit_insn_before (insn, before);
5868}
5869
5870/* When rotating a bundle out of the issue window, insert a bundle selector
5871   insn in front of it.  DUMP is the scheduling dump file or NULL.  START
5872   is either 0 or 3, depending on whether we want to emit a bundle selector
5873   for the first bundle or the second bundle in the current issue window.
5874
5875   The selector insns are emitted this late because the selected packet can
5876   be changed until parts of it get rotated out.  */
5877
5878static void
5879finish_last_head (dump, start)
5880     FILE *dump;
5881     int start;
5882{
5883  const struct ia64_packet *p = sched_data.packet;
5884  const struct bundle *b = start == 0 ? p->t1 : p->t2;
5885  int bundle_type = b - bundle;
5886  rtx insn;
5887  int i;
5888
5889  if (! ia64_final_schedule)
5890    return;
5891
5892  for (i = start; sched_data.insns[i] == 0; i++)
5893    if (i == start + 3)
5894      abort ();
5895  insn = sched_data.insns[i];
5896
5897  if (dump)
5898    fprintf (dump, "//    Emitting template before %d: %s\n",
5899	     INSN_UID (insn), b->name);
5900
5901  ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn);
5902}
5903
5904/* We can't schedule more insns this cycle.  Fix up the scheduling state
5905   and advance FIRST_SLOT and CUR.
5906   We have to distribute the insns that are currently found between
5907   FIRST_SLOT and CUR into the slots of the packet we have selected.  So
5908   far, they are stored successively in the fields starting at FIRST_SLOT;
5909   now they must be moved to the correct slots.
5910   DUMP is the current scheduling dump file, or NULL.  */
5911
5912static void
5913cycle_end_fill_slots (dump)
5914     FILE *dump;
5915{
5916  const struct ia64_packet *packet = sched_data.packet;
5917  int slot, i;
5918  enum attr_type tmp_types[6];
5919  rtx tmp_insns[6];
5920
5921  memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type));
5922  memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx));
5923
5924  for (i = slot = sched_data.first_slot; i < sched_data.cur; i++)
5925    {
5926      enum attr_type t = tmp_types[i];
5927      if (t != ia64_safe_type (tmp_insns[i]))
5928	abort ();
5929      while (! insn_matches_slot (packet, t, slot, tmp_insns[i]))
5930	{
5931	  if (slot > sched_data.split)
5932	    abort ();
5933	  if (dump)
5934	    fprintf (dump, "// Packet needs %s, have %s\n",
5935		     type_names[packet->t[slot]], type_names[t]);
5936	  sched_data.types[slot] = packet->t[slot];
5937	  sched_data.insns[slot] = 0;
5938	  sched_data.stopbit[slot] = 0;
5939
5940	  /* ??? TYPE_L instructions always fill up two slots, but we don't
5941	     support TYPE_L nops.  */
5942	  if (packet->t[slot] == TYPE_L)
5943	    abort ();
5944
5945	  slot++;
5946	}
5947
5948      /* Do _not_ use T here.  If T == TYPE_A, then we'd risk changing the
5949	 actual slot type later.  */
5950      sched_data.types[slot] = packet->t[slot];
5951      sched_data.insns[slot] = tmp_insns[i];
5952      sched_data.stopbit[slot] = 0;
5953      slot++;
5954
5955      /* TYPE_L instructions always fill up two slots.  */
5956      if (t == TYPE_L)
5957	{
5958	  sched_data.types[slot] = packet->t[slot];
5959	  sched_data.insns[slot] = 0;
5960	  sched_data.stopbit[slot] = 0;
5961	  slot++;
5962	}
5963    }
5964
5965  /* This isn't right - there's no need to pad out until the forced split;
5966     the CPU will automatically split if an insn isn't ready.  */
5967#if 0
5968  while (slot < sched_data.split)
5969    {
5970      sched_data.types[slot] = packet->t[slot];
5971      sched_data.insns[slot] = 0;
5972      sched_data.stopbit[slot] = 0;
5973      slot++;
5974    }
5975#endif
5976
5977  sched_data.first_slot = sched_data.cur = slot;
5978}
5979
5980/* Bundle rotations, as described in the Itanium optimization manual.
5981   We can rotate either one or both bundles out of the issue window.
5982   DUMP is the current scheduling dump file, or NULL.  */
5983
5984static void
5985rotate_one_bundle (dump)
5986     FILE *dump;
5987{
5988  if (dump)
5989    fprintf (dump, "// Rotating one bundle.\n");
5990
5991  finish_last_head (dump, 0);
5992  if (sched_data.cur > 3)
5993    {
5994      sched_data.cur -= 3;
5995      sched_data.first_slot -= 3;
5996      memmove (sched_data.types,
5997	       sched_data.types + 3,
5998	       sched_data.cur * sizeof *sched_data.types);
5999      memmove (sched_data.stopbit,
6000	       sched_data.stopbit + 3,
6001	       sched_data.cur * sizeof *sched_data.stopbit);
6002      memmove (sched_data.insns,
6003	       sched_data.insns + 3,
6004	       sched_data.cur * sizeof *sched_data.insns);
6005      sched_data.packet
6006	= &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES];
6007    }
6008  else
6009    {
6010      sched_data.cur = 0;
6011      sched_data.first_slot = 0;
6012    }
6013}
6014
6015static void
6016rotate_two_bundles (dump)
6017     FILE *dump;
6018{
6019  if (dump)
6020    fprintf (dump, "// Rotating two bundles.\n");
6021
6022  if (sched_data.cur == 0)
6023    return;
6024
6025  finish_last_head (dump, 0);
6026  if (sched_data.cur > 3)
6027    finish_last_head (dump, 3);
6028  sched_data.cur = 0;
6029  sched_data.first_slot = 0;
6030}
6031
6032/* We're beginning a new block.  Initialize data structures as necessary.  */
6033
6034static void
6035ia64_sched_init (dump, sched_verbose, max_ready)
6036     FILE *dump ATTRIBUTE_UNUSED;
6037     int sched_verbose ATTRIBUTE_UNUSED;
6038     int max_ready;
6039{
6040  static int initialized = 0;
6041
6042  if (! initialized)
6043    {
6044      int b1, b2, i;
6045
6046      initialized = 1;
6047
6048      for (i = b1 = 0; b1 < NR_BUNDLES; b1++)
6049	{
6050	  const struct bundle *t1 = bundle + b1;
6051	  for (b2 = 0; b2 < NR_BUNDLES; b2++, i++)
6052	    {
6053	      const struct bundle *t2 = bundle + b2;
6054
6055	      packets[i].t1 = t1;
6056	      packets[i].t2 = t2;
6057	    }
6058	}
6059      for (i = 0; i < NR_PACKETS; i++)
6060	{
6061	  int j;
6062	  for (j = 0; j < 3; j++)
6063	    packets[i].t[j] = packets[i].t1->t[j];
6064	  for (j = 0; j < 3; j++)
6065	    packets[i].t[j + 3] = packets[i].t2->t[j];
6066	  packets[i].first_split = itanium_split_issue (packets + i, 0);
6067	}
6068
6069    }
6070
6071  init_insn_group_barriers ();
6072
6073  memset (&sched_data, 0, sizeof sched_data);
6074  sched_types = (enum attr_type *) xmalloc (max_ready
6075					    * sizeof (enum attr_type));
6076  sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx));
6077}
6078
6079/* See if the packet P can match the insns we have already scheduled.  Return
6080   nonzero if so.  In *PSLOT, we store the first slot that is available for
6081   more instructions if we choose this packet.
6082   SPLIT holds the last slot we can use, there's a split issue after it so
6083   scheduling beyond it would cause us to use more than one cycle.  */
6084
6085static int
6086packet_matches_p (p, split, pslot)
6087     const struct ia64_packet *p;
6088     int split;
6089     int *pslot;
6090{
6091  int filled = sched_data.cur;
6092  int first = sched_data.first_slot;
6093  int i, slot;
6094
6095  /* First, check if the first of the two bundles must be a specific one (due
6096     to stop bits).  */
6097  if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1)
6098    return 0;
6099  if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2)
6100    return 0;
6101
6102  for (i = 0; i < first; i++)
6103    if (! insn_matches_slot (p, sched_data.types[i], i,
6104			     sched_data.insns[i]))
6105      return 0;
6106  for (i = slot = first; i < filled; i++)
6107    {
6108      while (slot < split)
6109	{
6110	  if (insn_matches_slot (p, sched_data.types[i], slot,
6111				 sched_data.insns[i]))
6112	    break;
6113	  slot++;
6114	}
6115      if (slot == split)
6116	return 0;
6117      slot++;
6118    }
6119
6120  if (pslot)
6121    *pslot = slot;
6122  return 1;
6123}
6124
6125/* A frontend for itanium_split_issue.  For a packet P and a slot
6126   number FIRST that describes the start of the current clock cycle,
6127   return the slot number of the first split issue.  This function
6128   uses the cached number found in P if possible.  */
6129
6130static int
6131get_split (p, first)
6132     const struct ia64_packet *p;
6133     int first;
6134{
6135  if (first == 0)
6136    return p->first_split;
6137  return itanium_split_issue (p, first);
6138}
6139
6140/* Given N_READY insns in the array READY, whose types are found in the
6141   corresponding array TYPES, return the insn that is best suited to be
6142   scheduled in slot SLOT of packet P.  */
6143
6144static int
6145find_best_insn (ready, types, n_ready, p, slot)
6146     rtx *ready;
6147     enum attr_type *types;
6148     int n_ready;
6149     const struct ia64_packet *p;
6150     int slot;
6151{
6152  int best = -1;
6153  int best_pri = 0;
6154  while (n_ready-- > 0)
6155    {
6156      rtx insn = ready[n_ready];
6157      if (! insn)
6158	continue;
6159      if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri)
6160	break;
6161      /* If we have equally good insns, one of which has a stricter
6162	 slot requirement, prefer the one with the stricter requirement.  */
6163      if (best >= 0 && types[n_ready] == TYPE_A)
6164	continue;
6165      if (insn_matches_slot (p, types[n_ready], slot, insn))
6166	{
6167	  best = n_ready;
6168	  best_pri = INSN_PRIORITY (ready[best]);
6169
6170	  /* If there's no way we could get a stricter requirement, stop
6171	     looking now.  */
6172	  if (types[n_ready] != TYPE_A
6173	      && ia64_safe_itanium_requires_unit0 (ready[n_ready]))
6174	    break;
6175	  break;
6176	}
6177    }
6178  return best;
6179}
6180
6181/* Select the best packet to use given the current scheduler state and the
6182   current ready list.
6183   READY is an array holding N_READY ready insns; TYPES is a corresponding
6184   array that holds their types.  Store the best packet in *PPACKET and the
6185   number of insns that can be scheduled in the current cycle in *PBEST.  */
6186
6187static void
6188find_best_packet (pbest, ppacket, ready, types, n_ready)
6189     int *pbest;
6190     const struct ia64_packet **ppacket;
6191     rtx *ready;
6192     enum attr_type *types;
6193     int n_ready;
6194{
6195  int first = sched_data.first_slot;
6196  int best = 0;
6197  int lowest_end = 6;
6198  const struct ia64_packet *best_packet = NULL;
6199  int i;
6200
6201  for (i = 0; i < NR_PACKETS; i++)
6202    {
6203      const struct ia64_packet *p = packets + i;
6204      int slot;
6205      int split = get_split (p, first);
6206      int win = 0;
6207      int first_slot, last_slot;
6208      int b_nops = 0;
6209
6210      if (! packet_matches_p (p, split, &first_slot))
6211	continue;
6212
6213      memcpy (sched_ready, ready, n_ready * sizeof (rtx));
6214
6215      win = 0;
6216      last_slot = 6;
6217      for (slot = first_slot; slot < split; slot++)
6218	{
6219	  int insn_nr;
6220
6221	  /* Disallow a degenerate case where the first bundle doesn't
6222	     contain anything but NOPs!  */
6223	  if (first_slot == 0 && win == 0 && slot == 3)
6224	    {
6225	      win = -1;
6226	      break;
6227	    }
6228
6229	  insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot);
6230	  if (insn_nr >= 0)
6231	    {
6232	      sched_ready[insn_nr] = 0;
6233	      last_slot = slot;
6234	      win++;
6235	    }
6236	  else if (p->t[slot] == TYPE_B)
6237	    b_nops++;
6238	}
6239      /* We must disallow MBB/BBB packets if any of their B slots would be
6240	 filled with nops.  */
6241      if (last_slot < 3)
6242	{
6243	  if (p->t[1] == TYPE_B && (b_nops || last_slot < 2))
6244	    win = -1;
6245	}
6246      else
6247	{
6248	  if (p->t[4] == TYPE_B && (b_nops || last_slot < 5))
6249	    win = -1;
6250	}
6251
6252      if (win > best
6253	  || (win == best && last_slot < lowest_end))
6254	{
6255	  best = win;
6256	  lowest_end = last_slot;
6257	  best_packet = p;
6258	}
6259    }
6260  *pbest = best;
6261  *ppacket = best_packet;
6262}
6263
6264/* Reorder the ready list so that the insns that can be issued in this cycle
6265   are found in the correct order at the end of the list.
6266   DUMP is the scheduling dump file, or NULL.  READY points to the start,
6267   E_READY to the end of the ready list.  MAY_FAIL determines what should be
6268   done if no insns can be scheduled in this cycle: if it is zero, we abort,
6269   otherwise we return 0.
6270   Return 1 if any insns can be scheduled in this cycle.  */
6271
6272static int
6273itanium_reorder (dump, ready, e_ready, may_fail)
6274     FILE *dump;
6275     rtx *ready;
6276     rtx *e_ready;
6277     int may_fail;
6278{
6279  const struct ia64_packet *best_packet;
6280  int n_ready = e_ready - ready;
6281  int first = sched_data.first_slot;
6282  int i, best, best_split, filled;
6283
6284  for (i = 0; i < n_ready; i++)
6285    sched_types[i] = ia64_safe_type (ready[i]);
6286
6287  find_best_packet (&best, &best_packet, ready, sched_types, n_ready);
6288
6289  if (best == 0)
6290    {
6291      if (may_fail)
6292	return 0;
6293      abort ();
6294    }
6295
6296  if (dump)
6297    {
6298      fprintf (dump, "// Selected bundles: %s %s (%d insns)\n",
6299	       best_packet->t1->name,
6300	       best_packet->t2 ? best_packet->t2->name : NULL, best);
6301    }
6302
6303  best_split = itanium_split_issue (best_packet, first);
6304  packet_matches_p (best_packet, best_split, &filled);
6305
6306  for (i = filled; i < best_split; i++)
6307    {
6308      int insn_nr;
6309
6310      insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i);
6311      if (insn_nr >= 0)
6312	{
6313	  rtx insn = ready[insn_nr];
6314	  memmove (ready + insn_nr, ready + insn_nr + 1,
6315		   (n_ready - insn_nr - 1) * sizeof (rtx));
6316	  memmove (sched_types + insn_nr, sched_types + insn_nr + 1,
6317		   (n_ready - insn_nr - 1) * sizeof (enum attr_type));
6318	  ready[--n_ready] = insn;
6319	}
6320    }
6321
6322  sched_data.packet = best_packet;
6323  sched_data.split = best_split;
6324  return 1;
6325}
6326
6327/* Dump information about the current scheduling state to file DUMP.  */
6328
6329static void
6330dump_current_packet (dump)
6331     FILE *dump;
6332{
6333  int i;
6334  fprintf (dump, "//    %d slots filled:", sched_data.cur);
6335  for (i = 0; i < sched_data.first_slot; i++)
6336    {
6337      rtx insn = sched_data.insns[i];
6338      fprintf (dump, " %s", type_names[sched_data.types[i]]);
6339      if (insn)
6340	fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]);
6341      if (sched_data.stopbit[i])
6342	fprintf (dump, " ;;");
6343    }
6344  fprintf (dump, " :::");
6345  for (i = sched_data.first_slot; i < sched_data.cur; i++)
6346    {
6347      rtx insn = sched_data.insns[i];
6348      enum attr_type t = ia64_safe_type (insn);
6349      fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]);
6350    }
6351  fprintf (dump, "\n");
6352}
6353
6354/* Schedule a stop bit.  DUMP is the current scheduling dump file, or
6355   NULL.  */
6356
6357static void
6358schedule_stop (dump)
6359     FILE *dump;
6360{
6361  const struct ia64_packet *best = sched_data.packet;
6362  int i;
6363  int best_stop = 6;
6364
6365  if (dump)
6366    fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur);
6367
6368  if (sched_data.cur == 0)
6369    {
6370      if (dump)
6371	fprintf (dump, "//   At start of bundle, so nothing to do.\n");
6372
6373      rotate_two_bundles (NULL);
6374      return;
6375    }
6376
6377  for (i = -1; i < NR_PACKETS; i++)
6378    {
6379      /* This is a slight hack to give the current packet the first chance.
6380	 This is done to avoid e.g. switching from MIB to MBB bundles.  */
6381      const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet);
6382      int split = get_split (p, sched_data.first_slot);
6383      const struct bundle *compare;
6384      int next, stoppos;
6385
6386      if (! packet_matches_p (p, split, &next))
6387	continue;
6388
6389      compare = next > 3 ? p->t2 : p->t1;
6390
6391      stoppos = 3;
6392      if (compare->possible_stop)
6393	stoppos = compare->possible_stop;
6394      if (next > 3)
6395	stoppos += 3;
6396
6397      if (stoppos < next || stoppos >= best_stop)
6398	{
6399	  if (compare->possible_stop == 0)
6400	    continue;
6401	  stoppos = (next > 3 ? 6 : 3);
6402	}
6403      if (stoppos < next || stoppos >= best_stop)
6404	continue;
6405
6406      if (dump)
6407	fprintf (dump, "//   switching from %s %s to %s %s (stop at %d)\n",
6408		 best->t1->name, best->t2->name, p->t1->name, p->t2->name,
6409		 stoppos);
6410
6411      best_stop = stoppos;
6412      best = p;
6413    }
6414
6415  sched_data.packet = best;
6416  cycle_end_fill_slots (dump);
6417  while (sched_data.cur < best_stop)
6418    {
6419      sched_data.types[sched_data.cur] = best->t[sched_data.cur];
6420      sched_data.insns[sched_data.cur] = 0;
6421      sched_data.stopbit[sched_data.cur] = 0;
6422      sched_data.cur++;
6423    }
6424  sched_data.stopbit[sched_data.cur - 1] = 1;
6425  sched_data.first_slot = best_stop;
6426
6427  if (dump)
6428    dump_current_packet (dump);
6429}
6430
6431/* If necessary, perform one or two rotations on the scheduling state.
6432   This should only be called if we are starting a new cycle.  */
6433
6434static void
6435maybe_rotate (dump)
6436     FILE *dump;
6437{
6438  cycle_end_fill_slots (dump);
6439  if (sched_data.cur == 6)
6440    rotate_two_bundles (dump);
6441  else if (sched_data.cur >= 3)
6442    rotate_one_bundle (dump);
6443  sched_data.first_slot = sched_data.cur;
6444}
6445
6446/* The clock cycle when ia64_sched_reorder was last called.  */
6447static int prev_cycle;
6448
6449/* The first insn scheduled in the previous cycle.  This is the saved
6450   value of sched_data.first_slot.  */
6451static int prev_first;
6452
6453/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
6454   pad out the delay between MM (shifts, etc.) and integer operations.  */
6455
6456static void
6457nop_cycles_until (clock_var, dump)
6458     int clock_var;
6459     FILE *dump;
6460{
6461  int prev_clock = prev_cycle;
6462  int cycles_left = clock_var - prev_clock;
6463  bool did_stop = false;
6464
6465  /* Finish the previous cycle; pad it out with NOPs.  */
6466  if (sched_data.cur == 3)
6467    {
6468      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6469      did_stop = true;
6470      maybe_rotate (dump);
6471    }
6472  else if (sched_data.cur > 0)
6473    {
6474      int need_stop = 0;
6475      int split = itanium_split_issue (sched_data.packet, prev_first);
6476
6477      if (sched_data.cur < 3 && split > 3)
6478	{
6479	  split = 3;
6480	  need_stop = 1;
6481	}
6482
6483      if (split > sched_data.cur)
6484	{
6485	  int i;
6486	  for (i = sched_data.cur; i < split; i++)
6487	    {
6488	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6489	      sched_data.types[i] = sched_data.packet->t[i];
6490	      sched_data.insns[i] = t;
6491	      sched_data.stopbit[i] = 0;
6492	    }
6493	  sched_data.cur = split;
6494	}
6495
6496      if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
6497	  && cycles_left > 1)
6498	{
6499	  int i;
6500	  for (i = sched_data.cur; i < 6; i++)
6501	    {
6502	      rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i]));
6503	      sched_data.types[i] = sched_data.packet->t[i];
6504	      sched_data.insns[i] = t;
6505	      sched_data.stopbit[i] = 0;
6506	    }
6507	  sched_data.cur = 6;
6508	  cycles_left--;
6509	  need_stop = 1;
6510	}
6511
6512      if (need_stop || sched_data.cur == 6)
6513	{
6514	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6515	  did_stop = true;
6516	}
6517      maybe_rotate (dump);
6518    }
6519
6520  cycles_left--;
6521  while (cycles_left > 0)
6522    {
6523      sched_emit_insn (gen_bundle_selector (GEN_INT (0)));
6524      sched_emit_insn (gen_nop_type (TYPE_M));
6525      sched_emit_insn (gen_nop_type (TYPE_I));
6526      if (cycles_left > 1)
6527	{
6528	  sched_emit_insn (gen_insn_group_barrier (GEN_INT (2)));
6529	  cycles_left--;
6530	}
6531      sched_emit_insn (gen_nop_type (TYPE_I));
6532      sched_emit_insn (gen_insn_group_barrier (GEN_INT (3)));
6533      did_stop = true;
6534      cycles_left--;
6535    }
6536
6537  if (did_stop)
6538    init_insn_group_barriers ();
6539}
6540
6541/* We are about to being issuing insns for this clock cycle.
6542   Override the default sort algorithm to better slot instructions.  */
6543
6544static int
6545ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready,
6546		    reorder_type, clock_var)
6547     FILE *dump ATTRIBUTE_UNUSED;
6548     int sched_verbose ATTRIBUTE_UNUSED;
6549     rtx *ready;
6550     int *pn_ready;
6551     int reorder_type, clock_var;
6552{
6553  int n_asms;
6554  int n_ready = *pn_ready;
6555  rtx *e_ready = ready + n_ready;
6556  rtx *insnp;
6557
6558  if (sched_verbose)
6559    {
6560      fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type);
6561      dump_current_packet (dump);
6562    }
6563
6564  /* Work around the pipeline flush that will occurr if the results of
6565     an MM instruction are accessed before the result is ready.  Intel
6566     documentation says this only happens with IALU, ISHF, ILOG, LD,
6567     and ST consumers, but experimental evidence shows that *any* non-MM
6568     type instruction will incurr the flush.  */
6569  if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
6570    {
6571      for (insnp = ready; insnp < e_ready; insnp++)
6572	{
6573	  rtx insn = *insnp, link;
6574	  enum attr_itanium_class t = ia64_safe_itanium_class (insn);
6575
6576	  if (t == ITANIUM_CLASS_MMMUL
6577	      || t == ITANIUM_CLASS_MMSHF
6578	      || t == ITANIUM_CLASS_MMSHFI)
6579	    continue;
6580
6581	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6582	    if (REG_NOTE_KIND (link) == 0)
6583	      {
6584		rtx other = XEXP (link, 0);
6585		enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
6586		if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL)
6587		  {
6588		    nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
6589		    goto out;
6590		  }
6591	      }
6592	}
6593    }
6594 out:
6595
6596  prev_first = sched_data.first_slot;
6597  prev_cycle = clock_var;
6598
6599  if (reorder_type == 0)
6600    maybe_rotate (sched_verbose ? dump : NULL);
6601
6602  /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6603  n_asms = 0;
6604  for (insnp = ready; insnp < e_ready; insnp++)
6605    if (insnp < e_ready)
6606      {
6607	rtx insn = *insnp;
6608	enum attr_type t = ia64_safe_type (insn);
6609	if (t == TYPE_UNKNOWN)
6610	  {
6611	    if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6612		|| asm_noperands (PATTERN (insn)) >= 0)
6613	      {
6614		rtx lowest = ready[n_asms];
6615		ready[n_asms] = insn;
6616		*insnp = lowest;
6617		n_asms++;
6618	      }
6619	    else
6620	      {
6621		rtx highest = ready[n_ready - 1];
6622		ready[n_ready - 1] = insn;
6623		*insnp = highest;
6624		if (ia64_final_schedule && group_barrier_needed_p (insn))
6625		  {
6626		    schedule_stop (sched_verbose ? dump : NULL);
6627		    sched_data.last_was_stop = 1;
6628		    maybe_rotate (sched_verbose ? dump : NULL);
6629		  }
6630
6631		return 1;
6632	      }
6633	  }
6634      }
6635  if (n_asms < n_ready)
6636    {
6637      /* Some normal insns to process.  Skip the asms.  */
6638      ready += n_asms;
6639      n_ready -= n_asms;
6640    }
6641  else if (n_ready > 0)
6642    {
6643      /* Only asm insns left.  */
6644      if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1]))
6645	{
6646	  schedule_stop (sched_verbose ? dump : NULL);
6647	  sched_data.last_was_stop = 1;
6648	  maybe_rotate (sched_verbose ? dump : NULL);
6649	}
6650      cycle_end_fill_slots (sched_verbose ? dump : NULL);
6651      return 1;
6652    }
6653
6654  if (ia64_final_schedule)
6655    {
6656      int nr_need_stop = 0;
6657
6658      for (insnp = ready; insnp < e_ready; insnp++)
6659	if (safe_group_barrier_needed_p (*insnp))
6660	  nr_need_stop++;
6661
6662      /* Schedule a stop bit if
6663          - all insns require a stop bit, or
6664          - we are starting a new cycle and _any_ insns require a stop bit.
6665         The reason for the latter is that if our schedule is accurate, then
6666         the additional stop won't decrease performance at this point (since
6667	 there's a split issue at this point anyway), but it gives us more
6668         freedom when scheduling the currently ready insns.  */
6669      if ((reorder_type == 0 && nr_need_stop)
6670	  || (reorder_type == 1 && n_ready == nr_need_stop))
6671	{
6672	  schedule_stop (sched_verbose ? dump : NULL);
6673	  sched_data.last_was_stop = 1;
6674	  maybe_rotate (sched_verbose ? dump : NULL);
6675	  if (reorder_type == 1)
6676	    return 0;
6677	}
6678      else
6679	{
6680	  int deleted = 0;
6681	  insnp = e_ready;
6682	  /* Move down everything that needs a stop bit, preserving relative
6683	     order.  */
6684	  while (insnp-- > ready + deleted)
6685	    while (insnp >= ready + deleted)
6686	      {
6687		rtx insn = *insnp;
6688		if (! safe_group_barrier_needed_p (insn))
6689		  break;
6690		memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6691		*ready = insn;
6692		deleted++;
6693	      }
6694	  n_ready -= deleted;
6695	  ready += deleted;
6696	  if (deleted != nr_need_stop)
6697	    abort ();
6698	}
6699    }
6700
6701  return itanium_reorder (sched_verbose ? dump : NULL,
6702			  ready, e_ready, reorder_type == 1);
6703}
6704
6705static int
6706ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var)
6707     FILE *dump;
6708     int sched_verbose;
6709     rtx *ready;
6710     int *pn_ready;
6711     int clock_var;
6712{
6713  return ia64_internal_sched_reorder (dump, sched_verbose, ready,
6714				      pn_ready, 0, clock_var);
6715}
6716
6717/* Like ia64_sched_reorder, but called after issuing each insn.
6718   Override the default sort algorithm to better slot instructions.  */
6719
6720static int
6721ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var)
6722     FILE *dump ATTRIBUTE_UNUSED;
6723     int sched_verbose ATTRIBUTE_UNUSED;
6724     rtx *ready;
6725     int *pn_ready;
6726     int clock_var;
6727{
6728  if (sched_data.last_was_stop)
6729    return 0;
6730
6731  /* Detect one special case and try to optimize it.
6732     If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
6733     then we can get better code by transforming this to 1.MFB;; 2.MIx.  */
6734  if (sched_data.first_slot == 1
6735      && sched_data.stopbit[0]
6736      && ((sched_data.cur == 4
6737	   && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A)
6738	   && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A)
6739	   && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A))
6740	  || (sched_data.cur == 3
6741	      && (sched_data.types[1] == TYPE_M
6742		  || sched_data.types[1] == TYPE_A)
6743	      && (sched_data.types[2] != TYPE_M
6744		  && sched_data.types[2] != TYPE_I
6745		  && sched_data.types[2] != TYPE_A))))
6746
6747    {
6748      int i, best;
6749      rtx stop = sched_data.insns[1];
6750
6751      /* Search backward for the stop bit that must be there.  */
6752      while (1)
6753	{
6754	  int insn_code;
6755
6756	  stop = PREV_INSN (stop);
6757	  if (GET_CODE (stop) != INSN)
6758	    abort ();
6759	  insn_code = recog_memoized (stop);
6760
6761	  /* Ignore .pred.rel.mutex.
6762
6763	     ??? Update this to ignore cycle display notes too
6764	     ??? once those are implemented  */
6765	  if (insn_code == CODE_FOR_pred_rel_mutex
6766	      || insn_code == CODE_FOR_prologue_use)
6767	    continue;
6768
6769	  if (insn_code == CODE_FOR_insn_group_barrier)
6770	    break;
6771	  abort ();
6772	}
6773
6774      /* Adjust the stop bit's slot selector.  */
6775      if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1)
6776	abort ();
6777      XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3);
6778
6779      sched_data.stopbit[0] = 0;
6780      sched_data.stopbit[2] = 1;
6781
6782      sched_data.types[5] = sched_data.types[3];
6783      sched_data.types[4] = sched_data.types[2];
6784      sched_data.types[3] = sched_data.types[1];
6785      sched_data.insns[5] = sched_data.insns[3];
6786      sched_data.insns[4] = sched_data.insns[2];
6787      sched_data.insns[3] = sched_data.insns[1];
6788      sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0;
6789      sched_data.cur += 2;
6790      sched_data.first_slot = 3;
6791      for (i = 0; i < NR_PACKETS; i++)
6792	{
6793	  const struct ia64_packet *p = packets + i;
6794	  if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B)
6795	    {
6796	      sched_data.packet = p;
6797	      break;
6798	    }
6799	}
6800      rotate_one_bundle (sched_verbose ? dump : NULL);
6801
6802      best = 6;
6803      for (i = 0; i < NR_PACKETS; i++)
6804	{
6805	  const struct ia64_packet *p = packets + i;
6806	  int split = get_split (p, sched_data.first_slot);
6807	  int next;
6808
6809	  /* Disallow multiway branches here.  */
6810	  if (p->t[1] == TYPE_B)
6811	    continue;
6812
6813	  if (packet_matches_p (p, split, &next) && next < best)
6814	    {
6815	      best = next;
6816	      sched_data.packet = p;
6817	      sched_data.split = split;
6818	    }
6819	}
6820      if (best == 6)
6821	abort ();
6822    }
6823
6824  if (*pn_ready > 0)
6825    {
6826      int more = ia64_internal_sched_reorder (dump, sched_verbose,
6827					      ready, pn_ready, 1,
6828					      clock_var);
6829      if (more)
6830	return more;
6831      /* Did we schedule a stop?  If so, finish this cycle.  */
6832      if (sched_data.cur == sched_data.first_slot)
6833	return 0;
6834    }
6835
6836  if (sched_verbose)
6837    fprintf (dump, "//   Can't issue more this cycle; updating type array.\n");
6838
6839  cycle_end_fill_slots (sched_verbose ? dump : NULL);
6840  if (sched_verbose)
6841    dump_current_packet (dump);
6842  return 0;
6843}
6844
6845/* We are about to issue INSN.  Return the number of insns left on the
6846   ready queue that can be issued this cycle.  */
6847
6848static int
6849ia64_variable_issue (dump, sched_verbose, insn, can_issue_more)
6850     FILE *dump;
6851     int sched_verbose;
6852     rtx insn;
6853     int can_issue_more ATTRIBUTE_UNUSED;
6854{
6855  enum attr_type t = ia64_safe_type (insn);
6856
6857  if (sched_data.last_was_stop)
6858    {
6859      int t = sched_data.first_slot;
6860      if (t == 0)
6861	t = 3;
6862      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn);
6863      init_insn_group_barriers ();
6864      sched_data.last_was_stop = 0;
6865    }
6866
6867  if (t == TYPE_UNKNOWN)
6868    {
6869      if (sched_verbose)
6870	fprintf (dump, "// Ignoring type %s\n", type_names[t]);
6871      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6872	  || asm_noperands (PATTERN (insn)) >= 0)
6873	{
6874	  /* This must be some kind of asm.  Clear the scheduling state.  */
6875	  rotate_two_bundles (sched_verbose ? dump : NULL);
6876	  if (ia64_final_schedule)
6877	    group_barrier_needed_p (insn);
6878	}
6879      return 1;
6880    }
6881
6882  /* This is _not_ just a sanity check.  group_barrier_needed_p will update
6883     important state info.  Don't delete this test.  */
6884  if (ia64_final_schedule
6885      && group_barrier_needed_p (insn))
6886    abort ();
6887
6888  sched_data.stopbit[sched_data.cur] = 0;
6889  sched_data.insns[sched_data.cur] = insn;
6890  sched_data.types[sched_data.cur] = t;
6891
6892  sched_data.cur++;
6893  if (sched_verbose)
6894    fprintf (dump, "// Scheduling insn %d of type %s\n",
6895	     INSN_UID (insn), type_names[t]);
6896
6897  if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule)
6898    {
6899      schedule_stop (sched_verbose ? dump : NULL);
6900      sched_data.last_was_stop = 1;
6901    }
6902
6903  return 1;
6904}
6905
6906/* Free data allocated by ia64_sched_init.  */
6907
6908static void
6909ia64_sched_finish (dump, sched_verbose)
6910     FILE *dump;
6911     int sched_verbose;
6912{
6913  if (sched_verbose)
6914    fprintf (dump, "// Finishing schedule.\n");
6915  rotate_two_bundles (NULL);
6916  free (sched_types);
6917  free (sched_ready);
6918}
6919
6920/* Emit pseudo-ops for the assembler to describe predicate relations.
6921   At present this assumes that we only consider predicate pairs to
6922   be mutex, and that the assembler can deduce proper values from
6923   straight-line code.  */
6924
6925static void
6926emit_predicate_relation_info ()
6927{
6928  basic_block bb;
6929
6930  FOR_EACH_BB_REVERSE (bb)
6931    {
6932      int r;
6933      rtx head = bb->head;
6934
6935      /* We only need such notes at code labels.  */
6936      if (GET_CODE (head) != CODE_LABEL)
6937	continue;
6938      if (GET_CODE (NEXT_INSN (head)) == NOTE
6939	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
6940	head = NEXT_INSN (head);
6941
6942      for (r = PR_REG (0); r < PR_REG (64); r += 2)
6943	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
6944	  {
6945	    rtx p = gen_rtx_REG (BImode, r);
6946	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
6947	    if (head == bb->end)
6948	      bb->end = n;
6949	    head = n;
6950	  }
6951    }
6952
6953  /* Look for conditional calls that do not return, and protect predicate
6954     relations around them.  Otherwise the assembler will assume the call
6955     returns, and complain about uses of call-clobbered predicates after
6956     the call.  */
6957  FOR_EACH_BB_REVERSE (bb)
6958    {
6959      rtx insn = bb->head;
6960
6961      while (1)
6962	{
6963	  if (GET_CODE (insn) == CALL_INSN
6964	      && GET_CODE (PATTERN (insn)) == COND_EXEC
6965	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
6966	    {
6967	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
6968	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
6969	      if (bb->head == insn)
6970		bb->head = b;
6971	      if (bb->end == insn)
6972		bb->end = a;
6973	    }
6974
6975	  if (insn == bb->end)
6976	    break;
6977	  insn = NEXT_INSN (insn);
6978	}
6979    }
6980}
6981
6982/* Generate a NOP instruction of type T.  We will never generate L type
6983   nops.  */
6984
6985static rtx
6986gen_nop_type (t)
6987     enum attr_type t;
6988{
6989  switch (t)
6990    {
6991    case TYPE_M:
6992      return gen_nop_m ();
6993    case TYPE_I:
6994      return gen_nop_i ();
6995    case TYPE_B:
6996      return gen_nop_b ();
6997    case TYPE_F:
6998      return gen_nop_f ();
6999    case TYPE_X:
7000      return gen_nop_x ();
7001    default:
7002      abort ();
7003    }
7004}
7005
7006/* After the last scheduling pass, fill in NOPs.  It's easier to do this
7007   here than while scheduling.  */
7008
7009static void
7010ia64_emit_nops ()
7011{
7012  rtx insn;
7013  const struct bundle *b = 0;
7014  int bundle_pos = 0;
7015
7016  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7017    {
7018      rtx pat;
7019      enum attr_type t;
7020      pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx;
7021      if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
7022	continue;
7023      if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR)
7024	  || GET_CODE (insn) == CODE_LABEL)
7025	{
7026	  if (b)
7027	    while (bundle_pos < 3)
7028	      {
7029		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7030		bundle_pos++;
7031	      }
7032	  if (GET_CODE (insn) != CODE_LABEL)
7033	    b = bundle + INTVAL (XVECEXP (pat, 0, 0));
7034	  else
7035	    b = 0;
7036	  bundle_pos = 0;
7037	  continue;
7038	}
7039      else if (GET_CODE (pat) == UNSPEC_VOLATILE
7040	       && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER)
7041	{
7042	  int t = INTVAL (XVECEXP (pat, 0, 0));
7043	  if (b)
7044	    while (bundle_pos < t)
7045	      {
7046		emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7047		bundle_pos++;
7048	      }
7049	  continue;
7050	}
7051
7052      if (bundle_pos == 3)
7053	b = 0;
7054
7055      if (b && INSN_P (insn))
7056	{
7057	  t = ia64_safe_type (insn);
7058	  if (asm_noperands (PATTERN (insn)) >= 0
7059	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)
7060	    {
7061	      while (bundle_pos < 3)
7062		{
7063		  emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7064		  bundle_pos++;
7065		}
7066	      continue;
7067	    }
7068
7069	  if (t == TYPE_UNKNOWN)
7070	    continue;
7071	  while (bundle_pos < 3)
7072	    {
7073	      if (t == b->t[bundle_pos]
7074		  || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M
7075				      || b->t[bundle_pos] == TYPE_I)))
7076		break;
7077
7078	      emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn);
7079	      bundle_pos++;
7080	    }
7081	  if (bundle_pos < 3)
7082	    bundle_pos++;
7083	}
7084    }
7085}
7086
7087/* Perform machine dependent operations on the rtl chain INSNS.  */
7088
7089void
7090ia64_reorg (insns)
7091     rtx insns;
7092{
7093  /* We are freeing block_for_insn in the toplev to keep compatibility
7094     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7095  compute_bb_for_insn ();
7096
7097  /* If optimizing, we'll have split before scheduling.  */
7098  if (optimize == 0)
7099    split_all_insns (0);
7100
7101  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7102     non-optimizing bootstrap.  */
7103  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7104
7105  if (ia64_flag_schedule_insns2)
7106    {
7107      timevar_push (TV_SCHED2);
7108      ia64_final_schedule = 1;
7109      schedule_ebbs (rtl_dump_file);
7110      ia64_final_schedule = 0;
7111      timevar_pop (TV_SCHED2);
7112
7113      /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
7114	 place as they were during scheduling.  */
7115      emit_insn_group_barriers (rtl_dump_file, insns);
7116      ia64_emit_nops ();
7117    }
7118  else
7119    emit_all_insn_group_barriers (rtl_dump_file, insns);
7120
7121  /* A call must not be the last instruction in a function, so that the
7122     return address is still within the function, so that unwinding works
7123     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7124  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7125    {
7126      rtx insn;
7127      int saw_stop = 0;
7128
7129      insn = get_last_insn ();
7130      if (! INSN_P (insn))
7131        insn = prev_active_insn (insn);
7132      if (GET_CODE (insn) == INSN
7133	  && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7134	  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7135	{
7136	  saw_stop = 1;
7137	  insn = prev_active_insn (insn);
7138	}
7139      if (GET_CODE (insn) == CALL_INSN)
7140	{
7141	  if (! saw_stop)
7142	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7143	  emit_insn (gen_break_f ());
7144	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7145	}
7146    }
7147
7148  fixup_errata ();
7149  emit_predicate_relation_info ();
7150}
7151
7152/* Return true if REGNO is used by the epilogue.  */
7153
7154int
7155ia64_epilogue_uses (regno)
7156     int regno;
7157{
7158  switch (regno)
7159    {
7160    case R_GR (1):
7161      /* With a call to a function in another module, we will write a new
7162	 value to "gp".  After returning from such a call, we need to make
7163	 sure the function restores the original gp-value, even if the
7164	 function itself does not use the gp anymore.  */
7165      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7166
7167    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7168    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7169      /* For functions defined with the syscall_linkage attribute, all
7170	 input registers are marked as live at all function exits.  This
7171	 prevents the register allocator from using the input registers,
7172	 which in turn makes it possible to restart a system call after
7173	 an interrupt without having to save/restore the input registers.
7174	 This also prevents kernel data from leaking to application code.  */
7175      return lookup_attribute ("syscall_linkage",
7176	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7177
7178    case R_BR (0):
7179      /* Conditional return patterns can't represent the use of `b0' as
7180         the return address, so we force the value live this way.  */
7181      return 1;
7182
7183    case AR_PFS_REGNUM:
7184      /* Likewise for ar.pfs, which is used by br.ret.  */
7185      return 1;
7186
7187    default:
7188      return 0;
7189    }
7190}
7191
7192/* Return true if REGNO is used by the frame unwinder.  */
7193
7194int
7195ia64_eh_uses (regno)
7196     int regno;
7197{
7198  if (! reload_completed)
7199    return 0;
7200
7201  if (current_frame_info.reg_save_b0
7202      && regno == current_frame_info.reg_save_b0)
7203    return 1;
7204  if (current_frame_info.reg_save_pr
7205      && regno == current_frame_info.reg_save_pr)
7206    return 1;
7207  if (current_frame_info.reg_save_ar_pfs
7208      && regno == current_frame_info.reg_save_ar_pfs)
7209    return 1;
7210  if (current_frame_info.reg_save_ar_unat
7211      && regno == current_frame_info.reg_save_ar_unat)
7212    return 1;
7213  if (current_frame_info.reg_save_ar_lc
7214      && regno == current_frame_info.reg_save_ar_lc)
7215    return 1;
7216
7217  return 0;
7218}
7219
7220/* For ia64, SYMBOL_REF_FLAG set means that it is a function.
7221
7222   We add @ to the name if this goes in small data/bss.  We can only put
7223   a variable in small data/bss if it is defined in this module or a module
7224   that we are statically linked with.  We can't check the second condition,
7225   but TREE_STATIC gives us the first one.  */
7226
7227/* ??? If we had IPA, we could check the second condition.  We could support
7228   programmer added section attributes if the variable is not defined in this
7229   module.  */
7230
7231/* ??? See the v850 port for a cleaner way to do this.  */
7232
7233/* ??? We could also support own long data here.  Generating movl/add/ld8
7234   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7235   code faster because there is one less load.  This also includes incomplete
7236   types which can't go in sdata/sbss.  */
7237
7238static bool
7239ia64_in_small_data_p (exp)
7240     tree exp;
7241{
7242  if (TARGET_NO_SDATA)
7243    return false;
7244
7245  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7246    {
7247      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7248      if (strcmp (section, ".sdata") == 0
7249	  || strcmp (section, ".sbss") == 0)
7250	return true;
7251    }
7252  else
7253    {
7254      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7255
7256      /* If this is an incomplete type with size 0, then we can't put it
7257	 in sdata because it might be too big when completed.  */
7258      if (size > 0 && size <= ia64_section_threshold)
7259	return true;
7260    }
7261
7262  return false;
7263}
7264
7265static void
7266ia64_encode_section_info (decl, first)
7267     tree decl;
7268     int first ATTRIBUTE_UNUSED;
7269{
7270  const char *symbol_str;
7271  bool is_local;
7272  rtx symbol;
7273  char encoding = 0;
7274
7275  if (TREE_CODE (decl) == FUNCTION_DECL)
7276    {
7277      SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1;
7278      return;
7279    }
7280
7281  /* Careful not to prod global register variables.  */
7282  if (TREE_CODE (decl) != VAR_DECL
7283      || GET_CODE (DECL_RTL (decl)) != MEM
7284      || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF)
7285    return;
7286
7287  symbol = XEXP (DECL_RTL (decl), 0);
7288  symbol_str = XSTR (symbol, 0);
7289
7290  is_local = (*targetm.binds_local_p) (decl);
7291
7292  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
7293    encoding = " GLil"[decl_tls_model (decl)];
7294  /* Determine if DECL will wind up in .sdata/.sbss.  */
7295  else if (is_local && ia64_in_small_data_p (decl))
7296    encoding = 's';
7297
7298  /* Finally, encode this into the symbol string.  */
7299  if (encoding)
7300    {
7301      char *newstr;
7302      size_t len;
7303
7304      if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7305	{
7306	  if (encoding == symbol_str[1])
7307	    return;
7308	  /* ??? Sdata became thread or thread becaome not thread.  Lose.  */
7309	  abort ();
7310	}
7311
7312      len = strlen (symbol_str);
7313      newstr = alloca (len + 3);
7314      newstr[0] = ENCODE_SECTION_INFO_CHAR;
7315      newstr[1] = encoding;
7316      memcpy (newstr + 2, symbol_str, len + 1);
7317
7318      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2);
7319    }
7320
7321  /* This decl is marked as being in small data/bss but it shouldn't be;
7322     one likely explanation for this is that the decl has been moved into
7323     a different section from the one it was in when encode_section_info
7324     was first called.  Remove the encoding.  */
7325  else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR)
7326    XSTR (symbol, 0) = ggc_strdup (symbol_str + 2);
7327}
7328
7329static const char *
7330ia64_strip_name_encoding (str)
7331     const char *str;
7332{
7333  if (str[0] == ENCODE_SECTION_INFO_CHAR)
7334    str += 2;
7335  if (str[0] == '*')
7336    str++;
7337  return str;
7338}
7339
7340/* True if it is OK to do sibling call optimization for the specified
7341   call expression EXP.  DECL will be the called function, or NULL if
7342   this is an indirect call.  */
7343bool
7344ia64_function_ok_for_sibcall (decl)
7345     tree decl;
7346{
7347  /* We must always return with our current GP.  This means we can
7348     only sibcall to functions defined in the current module.  */
7349  return decl && (*targetm.binds_local_p) (decl);
7350}
7351
7352/* Output assembly directives for prologue regions.  */
7353
7354/* The current basic block number.  */
7355
7356static bool last_block;
7357
7358/* True if we need a copy_state command at the start of the next block.  */
7359
7360static bool need_copy_state;
7361
7362/* The function emits unwind directives for the start of an epilogue.  */
7363
7364static void
7365process_epilogue ()
7366{
7367  /* If this isn't the last block of the function, then we need to label the
7368     current state, and copy it back in at the start of the next block.  */
7369
7370  if (!last_block)
7371    {
7372      fprintf (asm_out_file, "\t.label_state 1\n");
7373      need_copy_state = true;
7374    }
7375
7376  fprintf (asm_out_file, "\t.restore sp\n");
7377}
7378
7379/* This function processes a SET pattern looking for specific patterns
7380   which result in emitting an assembly directive required for unwinding.  */
7381
7382static int
7383process_set (asm_out_file, pat)
7384     FILE *asm_out_file;
7385     rtx pat;
7386{
7387  rtx src = SET_SRC (pat);
7388  rtx dest = SET_DEST (pat);
7389  int src_regno, dest_regno;
7390
7391  /* Look for the ALLOC insn.  */
7392  if (GET_CODE (src) == UNSPEC_VOLATILE
7393      && XINT (src, 1) == UNSPECV_ALLOC
7394      && GET_CODE (dest) == REG)
7395    {
7396      dest_regno = REGNO (dest);
7397
7398      /* If this isn't the final destination for ar.pfs, the alloc
7399	 shouldn't have been marked frame related.  */
7400      if (dest_regno != current_frame_info.reg_save_ar_pfs)
7401	abort ();
7402
7403      fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7404	       ia64_dbx_register_number (dest_regno));
7405      return 1;
7406    }
7407
7408  /* Look for SP = ....  */
7409  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7410    {
7411      if (GET_CODE (src) == PLUS)
7412        {
7413	  rtx op0 = XEXP (src, 0);
7414	  rtx op1 = XEXP (src, 1);
7415	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7416	    {
7417	      if (INTVAL (op1) < 0)
7418		{
7419		  fputs ("\t.fframe ", asm_out_file);
7420		  fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC,
7421			   -INTVAL (op1));
7422		  fputc ('\n', asm_out_file);
7423		}
7424	      else
7425		process_epilogue ();
7426	    }
7427	  else
7428	    abort ();
7429	}
7430      else if (GET_CODE (src) == REG
7431	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7432	process_epilogue ();
7433      else
7434	abort ();
7435
7436      return 1;
7437    }
7438
7439  /* Register move we need to look at.  */
7440  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7441    {
7442      src_regno = REGNO (src);
7443      dest_regno = REGNO (dest);
7444
7445      switch (src_regno)
7446	{
7447	case BR_REG (0):
7448	  /* Saving return address pointer.  */
7449	  if (dest_regno != current_frame_info.reg_save_b0)
7450	    abort ();
7451	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7452		   ia64_dbx_register_number (dest_regno));
7453	  return 1;
7454
7455	case PR_REG (0):
7456	  if (dest_regno != current_frame_info.reg_save_pr)
7457	    abort ();
7458	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7459		   ia64_dbx_register_number (dest_regno));
7460	  return 1;
7461
7462	case AR_UNAT_REGNUM:
7463	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7464	    abort ();
7465	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7466		   ia64_dbx_register_number (dest_regno));
7467	  return 1;
7468
7469	case AR_LC_REGNUM:
7470	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7471	    abort ();
7472	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7473		   ia64_dbx_register_number (dest_regno));
7474	  return 1;
7475
7476	case STACK_POINTER_REGNUM:
7477	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7478	      || ! frame_pointer_needed)
7479	    abort ();
7480	  fprintf (asm_out_file, "\t.vframe r%d\n",
7481		   ia64_dbx_register_number (dest_regno));
7482	  return 1;
7483
7484	default:
7485	  /* Everything else should indicate being stored to memory.  */
7486	  abort ();
7487	}
7488    }
7489
7490  /* Memory store we need to look at.  */
7491  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7492    {
7493      long off;
7494      rtx base;
7495      const char *saveop;
7496
7497      if (GET_CODE (XEXP (dest, 0)) == REG)
7498	{
7499	  base = XEXP (dest, 0);
7500	  off = 0;
7501	}
7502      else if (GET_CODE (XEXP (dest, 0)) == PLUS
7503	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7504	{
7505	  base = XEXP (XEXP (dest, 0), 0);
7506	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7507	}
7508      else
7509	abort ();
7510
7511      if (base == hard_frame_pointer_rtx)
7512	{
7513	  saveop = ".savepsp";
7514	  off = - off;
7515	}
7516      else if (base == stack_pointer_rtx)
7517	saveop = ".savesp";
7518      else
7519	abort ();
7520
7521      src_regno = REGNO (src);
7522      switch (src_regno)
7523	{
7524	case BR_REG (0):
7525	  if (current_frame_info.reg_save_b0 != 0)
7526	    abort ();
7527	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7528	  return 1;
7529
7530	case PR_REG (0):
7531	  if (current_frame_info.reg_save_pr != 0)
7532	    abort ();
7533	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7534	  return 1;
7535
7536	case AR_LC_REGNUM:
7537	  if (current_frame_info.reg_save_ar_lc != 0)
7538	    abort ();
7539	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7540	  return 1;
7541
7542	case AR_PFS_REGNUM:
7543	  if (current_frame_info.reg_save_ar_pfs != 0)
7544	    abort ();
7545	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7546	  return 1;
7547
7548	case AR_UNAT_REGNUM:
7549	  if (current_frame_info.reg_save_ar_unat != 0)
7550	    abort ();
7551	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7552	  return 1;
7553
7554	case GR_REG (4):
7555	case GR_REG (5):
7556	case GR_REG (6):
7557	case GR_REG (7):
7558	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7559		   1 << (src_regno - GR_REG (4)));
7560	  return 1;
7561
7562	case BR_REG (1):
7563	case BR_REG (2):
7564	case BR_REG (3):
7565	case BR_REG (4):
7566	case BR_REG (5):
7567	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
7568		   1 << (src_regno - BR_REG (1)));
7569	  return 1;
7570
7571	case FR_REG (2):
7572	case FR_REG (3):
7573	case FR_REG (4):
7574	case FR_REG (5):
7575	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
7576		   1 << (src_regno - FR_REG (2)));
7577	  return 1;
7578
7579	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7580	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7581	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7582	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7583	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7584		   1 << (src_regno - FR_REG (12)));
7585	  return 1;
7586
7587	default:
7588	  return 0;
7589	}
7590    }
7591
7592  return 0;
7593}
7594
7595
7596/* This function looks at a single insn and emits any directives
7597   required to unwind this insn.  */
7598void
7599process_for_unwind_directive (asm_out_file, insn)
7600     FILE *asm_out_file;
7601     rtx insn;
7602{
7603  if (flag_unwind_tables
7604      || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7605    {
7606      rtx pat;
7607
7608      if (GET_CODE (insn) == NOTE
7609	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7610	{
7611	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7612
7613	  /* Restore unwind state from immediately before the epilogue.  */
7614	  if (need_copy_state)
7615	    {
7616	      fprintf (asm_out_file, "\t.body\n");
7617	      fprintf (asm_out_file, "\t.copy_state 1\n");
7618	      need_copy_state = false;
7619	    }
7620	}
7621
7622      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
7623	return;
7624
7625      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
7626      if (pat)
7627	pat = XEXP (pat, 0);
7628      else
7629	pat = PATTERN (insn);
7630
7631      switch (GET_CODE (pat))
7632        {
7633	case SET:
7634	  process_set (asm_out_file, pat);
7635	  break;
7636
7637	case PARALLEL:
7638	  {
7639	    int par_index;
7640	    int limit = XVECLEN (pat, 0);
7641	    for (par_index = 0; par_index < limit; par_index++)
7642	      {
7643		rtx x = XVECEXP (pat, 0, par_index);
7644		if (GET_CODE (x) == SET)
7645		  process_set (asm_out_file, x);
7646	      }
7647	    break;
7648	  }
7649
7650	default:
7651	  abort ();
7652	}
7653    }
7654}
7655
7656
7657void
7658ia64_init_builtins ()
7659{
7660  tree psi_type_node = build_pointer_type (integer_type_node);
7661  tree pdi_type_node = build_pointer_type (long_integer_type_node);
7662
7663  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
7664  tree si_ftype_psi_si_si
7665    = build_function_type_list (integer_type_node,
7666				psi_type_node, integer_type_node,
7667				integer_type_node, NULL_TREE);
7668
7669  /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */
7670  tree di_ftype_pdi_di_di
7671    = build_function_type_list (long_integer_type_node,
7672				pdi_type_node, long_integer_type_node,
7673				long_integer_type_node, NULL_TREE);
7674  /* __sync_synchronize */
7675  tree void_ftype_void
7676    = build_function_type (void_type_node, void_list_node);
7677
7678  /* __sync_lock_test_and_set_si */
7679  tree si_ftype_psi_si
7680    = build_function_type_list (integer_type_node,
7681				psi_type_node, integer_type_node, NULL_TREE);
7682
7683  /* __sync_lock_test_and_set_di */
7684  tree di_ftype_pdi_di
7685    = build_function_type_list (long_integer_type_node,
7686				pdi_type_node, long_integer_type_node,
7687				NULL_TREE);
7688
7689  /* __sync_lock_release_si */
7690  tree void_ftype_psi
7691    = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
7692
7693  /* __sync_lock_release_di */
7694  tree void_ftype_pdi
7695    = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
7696
7697#define def_builtin(name, type, code) \
7698  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
7699
7700  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
7701	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
7702  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
7703	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
7704  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
7705	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
7706  def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di,
7707	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
7708
7709  def_builtin ("__sync_synchronize", void_ftype_void,
7710	       IA64_BUILTIN_SYNCHRONIZE);
7711
7712  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
7713	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
7714  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
7715	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
7716  def_builtin ("__sync_lock_release_si", void_ftype_psi,
7717	       IA64_BUILTIN_LOCK_RELEASE_SI);
7718  def_builtin ("__sync_lock_release_di", void_ftype_pdi,
7719	       IA64_BUILTIN_LOCK_RELEASE_DI);
7720
7721  def_builtin ("__builtin_ia64_bsp",
7722	       build_function_type (ptr_type_node, void_list_node),
7723	       IA64_BUILTIN_BSP);
7724
7725  def_builtin ("__builtin_ia64_flushrs",
7726	       build_function_type (void_type_node, void_list_node),
7727	       IA64_BUILTIN_FLUSHRS);
7728
7729  def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
7730	       IA64_BUILTIN_FETCH_AND_ADD_SI);
7731  def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
7732	       IA64_BUILTIN_FETCH_AND_SUB_SI);
7733  def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
7734	       IA64_BUILTIN_FETCH_AND_OR_SI);
7735  def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
7736	       IA64_BUILTIN_FETCH_AND_AND_SI);
7737  def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
7738	       IA64_BUILTIN_FETCH_AND_XOR_SI);
7739  def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
7740	       IA64_BUILTIN_FETCH_AND_NAND_SI);
7741
7742  def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
7743	       IA64_BUILTIN_ADD_AND_FETCH_SI);
7744  def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
7745	       IA64_BUILTIN_SUB_AND_FETCH_SI);
7746  def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
7747	       IA64_BUILTIN_OR_AND_FETCH_SI);
7748  def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
7749	       IA64_BUILTIN_AND_AND_FETCH_SI);
7750  def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
7751	       IA64_BUILTIN_XOR_AND_FETCH_SI);
7752  def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
7753	       IA64_BUILTIN_NAND_AND_FETCH_SI);
7754
7755  def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
7756	       IA64_BUILTIN_FETCH_AND_ADD_DI);
7757  def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
7758	       IA64_BUILTIN_FETCH_AND_SUB_DI);
7759  def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
7760	       IA64_BUILTIN_FETCH_AND_OR_DI);
7761  def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
7762	       IA64_BUILTIN_FETCH_AND_AND_DI);
7763  def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
7764	       IA64_BUILTIN_FETCH_AND_XOR_DI);
7765  def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
7766	       IA64_BUILTIN_FETCH_AND_NAND_DI);
7767
7768  def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
7769	       IA64_BUILTIN_ADD_AND_FETCH_DI);
7770  def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
7771	       IA64_BUILTIN_SUB_AND_FETCH_DI);
7772  def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
7773	       IA64_BUILTIN_OR_AND_FETCH_DI);
7774  def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
7775	       IA64_BUILTIN_AND_AND_FETCH_DI);
7776  def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
7777	       IA64_BUILTIN_XOR_AND_FETCH_DI);
7778  def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
7779	       IA64_BUILTIN_NAND_AND_FETCH_DI);
7780
7781#undef def_builtin
7782}
7783
7784/* Expand fetch_and_op intrinsics.  The basic code sequence is:
7785
7786     mf
7787     tmp = [ptr];
7788     do {
7789       ret = tmp;
7790       ar.ccv = tmp;
7791       tmp <op>= value;
7792       cmpxchgsz.acq tmp = [ptr], tmp
7793     } while (tmp != ret)
7794*/
7795
7796static rtx
7797ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
7798     optab binoptab;
7799     enum machine_mode mode;
7800     tree arglist;
7801     rtx target;
7802{
7803  rtx ret, label, tmp, ccv, insn, mem, value;
7804  tree arg0, arg1;
7805
7806  arg0 = TREE_VALUE (arglist);
7807  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7808  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7809#ifdef POINTERS_EXTEND_UNSIGNED
7810  if (GET_MODE(mem) != Pmode)
7811    mem = convert_memory_address (Pmode, mem);
7812#endif
7813  value = expand_expr (arg1, NULL_RTX, mode, 0);
7814
7815  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7816  MEM_VOLATILE_P (mem) = 1;
7817
7818  if (target && register_operand (target, mode))
7819    ret = target;
7820  else
7821    ret = gen_reg_rtx (mode);
7822
7823  emit_insn (gen_mf ());
7824
7825  /* Special case for fetchadd instructions.  */
7826  if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
7827    {
7828      if (mode == SImode)
7829        insn = gen_fetchadd_acq_si (ret, mem, value);
7830      else
7831        insn = gen_fetchadd_acq_di (ret, mem, value);
7832      emit_insn (insn);
7833      return ret;
7834    }
7835
7836  tmp = gen_reg_rtx (mode);
7837  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7838  emit_move_insn (tmp, mem);
7839
7840  label = gen_label_rtx ();
7841  emit_label (label);
7842  emit_move_insn (ret, tmp);
7843  emit_move_insn (ccv, tmp);
7844
7845  /* Perform the specific operation.  Special case NAND by noticing
7846     one_cmpl_optab instead.  */
7847  if (binoptab == one_cmpl_optab)
7848    {
7849      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7850      binoptab = and_optab;
7851    }
7852  tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
7853
7854  if (mode == SImode)
7855    insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
7856  else
7857    insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
7858  emit_insn (insn);
7859
7860  emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
7861
7862  return ret;
7863}
7864
7865/* Expand op_and_fetch intrinsics.  The basic code sequence is:
7866
7867     mf
7868     tmp = [ptr];
7869     do {
7870       old = tmp;
7871       ar.ccv = tmp;
7872       ret = tmp <op> value;
7873       cmpxchgsz.acq tmp = [ptr], ret
7874     } while (tmp != old)
7875*/
7876
7877static rtx
7878ia64_expand_op_and_fetch (binoptab, mode, arglist, target)
7879     optab binoptab;
7880     enum machine_mode mode;
7881     tree arglist;
7882     rtx target;
7883{
7884  rtx old, label, tmp, ret, ccv, insn, mem, value;
7885  tree arg0, arg1;
7886
7887  arg0 = TREE_VALUE (arglist);
7888  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7889  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
7890#ifdef POINTERS_EXTEND_UNSIGNED
7891  if (GET_MODE(mem) != Pmode)
7892    mem = convert_memory_address (Pmode, mem);
7893#endif
7894
7895  value = expand_expr (arg1, NULL_RTX, mode, 0);
7896
7897  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
7898  MEM_VOLATILE_P (mem) = 1;
7899
7900  if (target && ! register_operand (target, mode))
7901    target = NULL_RTX;
7902
7903  emit_insn (gen_mf ());
7904  tmp = gen_reg_rtx (mode);
7905  old = gen_reg_rtx (mode);
7906  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
7907
7908  emit_move_insn (tmp, mem);
7909
7910  label = gen_label_rtx ();
7911  emit_label (label);
7912  emit_move_insn (old, tmp);
7913  emit_move_insn (ccv, tmp);
7914
7915  /* Perform the specific operation.  Special case NAND by noticing
7916     one_cmpl_optab instead.  */
7917  if (binoptab == one_cmpl_optab)
7918    {
7919      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
7920      binoptab = and_optab;
7921    }
7922  ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
7923
7924  if (mode == SImode)
7925    insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
7926  else
7927    insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
7928  emit_insn (insn);
7929
7930  emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
7931
7932  return ret;
7933}
7934
7935/* Expand val_ and bool_compare_and_swap.  For val_ we want:
7936
7937     ar.ccv = oldval
7938     mf
7939     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
7940     return ret
7941
7942   For bool_ it's the same except return ret == oldval.
7943*/
7944
7945static rtx
7946ia64_expand_compare_and_swap (mode, boolp, arglist, target)
7947     enum machine_mode mode;
7948     int boolp;
7949     tree arglist;
7950     rtx target;
7951{
7952  tree arg0, arg1, arg2;
7953  rtx mem, old, new, ccv, tmp, insn;
7954
7955  arg0 = TREE_VALUE (arglist);
7956  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
7957  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
7958  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
7959  old = expand_expr (arg1, NULL_RTX, mode, 0);
7960  new = expand_expr (arg2, NULL_RTX, mode, 0);
7961
7962  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
7963  MEM_VOLATILE_P (mem) = 1;
7964
7965  if (! register_operand (old, mode))
7966    old = copy_to_mode_reg (mode, old);
7967  if (! register_operand (new, mode))
7968    new = copy_to_mode_reg (mode, new);
7969
7970  if (! boolp && target && register_operand (target, mode))
7971    tmp = target;
7972  else
7973    tmp = gen_reg_rtx (mode);
7974
7975  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
7976  if (mode == DImode)
7977    emit_move_insn (ccv, old);
7978  else
7979    {
7980      rtx ccvtmp = gen_reg_rtx (DImode);
7981      emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
7982      emit_move_insn (ccv, ccvtmp);
7983    }
7984  emit_insn (gen_mf ());
7985  if (mode == SImode)
7986    insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
7987  else
7988    insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
7989  emit_insn (insn);
7990
7991  if (boolp)
7992    {
7993      if (! target)
7994	target = gen_reg_rtx (mode);
7995      return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
7996    }
7997  else
7998    return tmp;
7999}
8000
8001/* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8002
8003static rtx
8004ia64_expand_lock_test_and_set (mode, arglist, target)
8005     enum machine_mode mode;
8006     tree arglist;
8007     rtx target;
8008{
8009  tree arg0, arg1;
8010  rtx mem, new, ret, insn;
8011
8012  arg0 = TREE_VALUE (arglist);
8013  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8014  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8015  new = expand_expr (arg1, NULL_RTX, mode, 0);
8016
8017  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8018  MEM_VOLATILE_P (mem) = 1;
8019  if (! register_operand (new, mode))
8020    new = copy_to_mode_reg (mode, new);
8021
8022  if (target && register_operand (target, mode))
8023    ret = target;
8024  else
8025    ret = gen_reg_rtx (mode);
8026
8027  if (mode == SImode)
8028    insn = gen_xchgsi (ret, mem, new);
8029  else
8030    insn = gen_xchgdi (ret, mem, new);
8031  emit_insn (insn);
8032
8033  return ret;
8034}
8035
8036/* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8037
8038static rtx
8039ia64_expand_lock_release (mode, arglist, target)
8040     enum machine_mode mode;
8041     tree arglist;
8042     rtx target ATTRIBUTE_UNUSED;
8043{
8044  tree arg0;
8045  rtx mem;
8046
8047  arg0 = TREE_VALUE (arglist);
8048  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8049
8050  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8051  MEM_VOLATILE_P (mem) = 1;
8052
8053  emit_move_insn (mem, const0_rtx);
8054
8055  return const0_rtx;
8056}
8057
8058rtx
8059ia64_expand_builtin (exp, target, subtarget, mode, ignore)
8060     tree exp;
8061     rtx target;
8062     rtx subtarget ATTRIBUTE_UNUSED;
8063     enum machine_mode mode ATTRIBUTE_UNUSED;
8064     int ignore ATTRIBUTE_UNUSED;
8065{
8066  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8067  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8068  tree arglist = TREE_OPERAND (exp, 1);
8069
8070  switch (fcode)
8071    {
8072    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8073    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8074    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8075    case IA64_BUILTIN_LOCK_RELEASE_SI:
8076    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8077    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8078    case IA64_BUILTIN_FETCH_AND_OR_SI:
8079    case IA64_BUILTIN_FETCH_AND_AND_SI:
8080    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8081    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8082    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8083    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8084    case IA64_BUILTIN_OR_AND_FETCH_SI:
8085    case IA64_BUILTIN_AND_AND_FETCH_SI:
8086    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8087    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8088      mode = SImode;
8089      break;
8090
8091    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8092    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8093    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8094    case IA64_BUILTIN_LOCK_RELEASE_DI:
8095    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8096    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8097    case IA64_BUILTIN_FETCH_AND_OR_DI:
8098    case IA64_BUILTIN_FETCH_AND_AND_DI:
8099    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8100    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8101    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8102    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8103    case IA64_BUILTIN_OR_AND_FETCH_DI:
8104    case IA64_BUILTIN_AND_AND_FETCH_DI:
8105    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8106    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8107      mode = DImode;
8108      break;
8109
8110    default:
8111      break;
8112    }
8113
8114  switch (fcode)
8115    {
8116    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8117    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8118      return ia64_expand_compare_and_swap (mode, 1, arglist, target);
8119
8120    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8121    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8122      return ia64_expand_compare_and_swap (mode, 0, arglist, target);
8123
8124    case IA64_BUILTIN_SYNCHRONIZE:
8125      emit_insn (gen_mf ());
8126      return const0_rtx;
8127
8128    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8129    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8130      return ia64_expand_lock_test_and_set (mode, arglist, target);
8131
8132    case IA64_BUILTIN_LOCK_RELEASE_SI:
8133    case IA64_BUILTIN_LOCK_RELEASE_DI:
8134      return ia64_expand_lock_release (mode, arglist, target);
8135
8136    case IA64_BUILTIN_BSP:
8137      if (! target || ! register_operand (target, DImode))
8138	target = gen_reg_rtx (DImode);
8139      emit_insn (gen_bsp_value (target));
8140      return target;
8141
8142    case IA64_BUILTIN_FLUSHRS:
8143      emit_insn (gen_flushrs ());
8144      return const0_rtx;
8145
8146    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8147    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8148      return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8149
8150    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8151    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8152      return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8153
8154    case IA64_BUILTIN_FETCH_AND_OR_SI:
8155    case IA64_BUILTIN_FETCH_AND_OR_DI:
8156      return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8157
8158    case IA64_BUILTIN_FETCH_AND_AND_SI:
8159    case IA64_BUILTIN_FETCH_AND_AND_DI:
8160      return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8161
8162    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8163    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8164      return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8165
8166    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8167    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8168      return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8169
8170    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8171    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8172      return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8173
8174    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8175    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8176      return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8177
8178    case IA64_BUILTIN_OR_AND_FETCH_SI:
8179    case IA64_BUILTIN_OR_AND_FETCH_DI:
8180      return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8181
8182    case IA64_BUILTIN_AND_AND_FETCH_SI:
8183    case IA64_BUILTIN_AND_AND_FETCH_DI:
8184      return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8185
8186    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8187    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8188      return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8189
8190    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8191    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8192      return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8193
8194    default:
8195      break;
8196    }
8197
8198  return NULL_RTX;
8199}
8200
8201/* For the HP-UX IA64 aggregate parameters are passed stored in the
8202   most significant bits of the stack slot.  */
8203
8204enum direction
8205ia64_hpux_function_arg_padding (mode, type)
8206     enum machine_mode mode;
8207     tree type;
8208{
8209   /* Exception to normal case for structures/unions/etc.  */
8210
8211   if (type && AGGREGATE_TYPE_P (type)
8212       && int_size_in_bytes (type) < UNITS_PER_WORD)
8213     return upward;
8214
8215   /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN
8216      hardwired to be true.  */
8217
8218   return((mode == BLKmode
8219       ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
8220          && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT))
8221       : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY)
8222      ? downward : upward);
8223}
8224
8225/* Linked list of all external functions that are to be emitted by GCC.
8226   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8227   order to avoid putting out names that are never really used.  */
8228
8229struct extern_func_list
8230{
8231  struct extern_func_list *next; /* next external */
8232  char *name;                    /* name of the external */
8233} *extern_func_head = 0;
8234
8235static void
8236ia64_hpux_add_extern_decl (name)
8237        const char *name;
8238{
8239  struct extern_func_list *p;
8240
8241  p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list));
8242  p->name = xmalloc (strlen (name) + 1);
8243  strcpy(p->name, name);
8244  p->next = extern_func_head;
8245  extern_func_head = p;
8246}
8247
8248/* Print out the list of used global functions.  */
8249
8250void
8251ia64_hpux_asm_file_end (file)
8252	FILE *file;
8253{
8254  while (extern_func_head)
8255    {
8256      const char *real_name;
8257      tree decl;
8258
8259      real_name = (* targetm.strip_name_encoding) (extern_func_head->name);
8260      decl = maybe_get_identifier (real_name);
8261
8262      if (!decl
8263	  || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl)))
8264        {
8265	  if (decl)
8266	    TREE_ASM_WRITTEN (decl) = 1;
8267	  (*targetm.asm_out.globalize_label) (file, extern_func_head->name);
8268	  fprintf (file, "%s", TYPE_ASM_OP);
8269	  assemble_name (file, extern_func_head->name);
8270	  putc (',', file);
8271	  fprintf (file, TYPE_OPERAND_FMT, "function");
8272	  putc ('\n', file);
8273        }
8274      extern_func_head = extern_func_head->next;
8275    }
8276}
8277
8278
8279/* Switch to the section to which we should output X.  The only thing
8280   special we do here is to honor small data.  */
8281
8282static void
8283ia64_select_rtx_section (mode, x, align)
8284     enum machine_mode mode;
8285     rtx x;
8286     unsigned HOST_WIDE_INT align;
8287{
8288  if (GET_MODE_SIZE (mode) > 0
8289      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8290    sdata_section ();
8291  else
8292    default_elf_select_rtx_section (mode, x, align);
8293}
8294
8295/* It is illegal to have relocations in shared segments on AIX and HPUX.
8296   Pretend flag_pic is always set.  */
8297
8298static void
8299ia64_rwreloc_select_section (exp, reloc, align)
8300     tree exp;
8301     int reloc;
8302     unsigned HOST_WIDE_INT align;
8303{
8304  default_elf_select_section_1 (exp, reloc, align, true);
8305}
8306
8307static void
8308ia64_rwreloc_unique_section (decl, reloc)
8309     tree decl;
8310     int reloc;
8311{
8312  default_unique_section_1 (decl, reloc, true);
8313}
8314
8315static void
8316ia64_rwreloc_select_rtx_section (mode, x, align)
8317     enum machine_mode mode;
8318     rtx x;
8319     unsigned HOST_WIDE_INT align;
8320{
8321  int save_pic = flag_pic;
8322  flag_pic = 1;
8323  ia64_select_rtx_section (mode, x, align);
8324  flag_pic = save_pic;
8325}
8326
8327static unsigned int
8328ia64_rwreloc_section_type_flags (decl, name, reloc)
8329     tree decl;
8330     const char *name;
8331     int reloc;
8332{
8333  return default_section_type_flags_1 (decl, name, reloc, true);
8334}
8335
8336
8337/* Output the assembler code for a thunk function.  THUNK_DECL is the
8338   declaration for the thunk function itself, FUNCTION is the decl for
8339   the target function.  DELTA is an immediate constant offset to be
8340   added to THIS.  If VCALL_OFFSET is non-zero, the word at
8341   *(*this + vcall_offset) should be added to THIS.  */
8342
8343static void
8344ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
8345     FILE *file;
8346     tree thunk ATTRIBUTE_UNUSED;
8347     HOST_WIDE_INT delta;
8348     HOST_WIDE_INT vcall_offset;
8349     tree function;
8350{
8351  rtx this, insn, funexp;
8352
8353  reload_completed = 1;
8354  no_new_pseudos = 1;
8355
8356  /* Set things up as ia64_expand_prologue might.  */
8357  last_scratch_gr_reg = 15;
8358
8359  memset (&current_frame_info, 0, sizeof (current_frame_info));
8360  current_frame_info.spill_cfa_off = -16;
8361  current_frame_info.n_input_regs = 1;
8362  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8363
8364  if (!TARGET_REG_NAMES)
8365    reg_names[IN_REG (0)] = ia64_reg_numbers[0];
8366
8367  /* Mark the end of the (empty) prologue.  */
8368  emit_note (NULL, NOTE_INSN_PROLOGUE_END);
8369
8370  this = gen_rtx_REG (Pmode, IN_REG (0));
8371
8372  /* Apply the constant offset, if required.  */
8373  if (delta)
8374    {
8375      rtx delta_rtx = GEN_INT (delta);
8376
8377      if (!CONST_OK_FOR_I (delta))
8378	{
8379	  rtx tmp = gen_rtx_REG (Pmode, 2);
8380	  emit_move_insn (tmp, delta_rtx);
8381	  delta_rtx = tmp;
8382	}
8383      emit_insn (gen_adddi3 (this, this, delta_rtx));
8384    }
8385
8386  /* Apply the offset from the vtable, if required.  */
8387  if (vcall_offset)
8388    {
8389      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8390      rtx tmp = gen_rtx_REG (Pmode, 2);
8391
8392      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8393
8394      if (!CONST_OK_FOR_J (vcall_offset))
8395	{
8396	  rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8397	  emit_move_insn (tmp2, vcall_offset_rtx);
8398	  vcall_offset_rtx = tmp2;
8399	}
8400      emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8401
8402      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8403
8404      emit_insn (gen_adddi3 (this, this, tmp));
8405    }
8406
8407  /* Generate a tail call to the target function.  */
8408  if (! TREE_USED (function))
8409    {
8410      assemble_external (function);
8411      TREE_USED (function) = 1;
8412    }
8413  funexp = XEXP (DECL_RTL (function), 0);
8414  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8415  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8416  insn = get_last_insn ();
8417  SIBLING_CALL_P (insn) = 1;
8418
8419  /* Code generation for calls relies on splitting.  */
8420  reload_completed = 1;
8421  try_split (PATTERN (insn), insn, 0);
8422
8423  emit_barrier ();
8424
8425  /* Run just enough of rest_of_compilation to get the insns emitted.
8426     There's not really enough bulk here to make other passes such as
8427     instruction scheduling worth while.  Note that use_thunk calls
8428     assemble_start_function and assemble_end_function.  */
8429
8430  insn = get_insns ();
8431  emit_all_insn_group_barriers (NULL, insn);
8432  shorten_branches (insn);
8433  final_start_function (insn, file, 1);
8434  final (insn, file, 1, 0);
8435  final_end_function ();
8436
8437  reload_completed = 0;
8438  no_new_pseudos = 0;
8439}
8440
8441#include "gt-ia64.h"
8442