ia64.c revision 132718
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3   Free Software Foundation, Inc.
4   Contributed by James E. Wilson <wilson@cygnus.com> and
5		  David Mosberger <davidm@hpl.hp.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "toplev.h"
46#include "sched-int.h"
47#include "timevar.h"
48#include "target.h"
49#include "target-def.h"
50#include "tm_p.h"
51#include "hashtab.h"
52#include "langhooks.h"
53#include "cfglayout.h"
54
55/* This is used for communication between ASM_OUTPUT_LABEL and
56   ASM_OUTPUT_LABELREF.  */
57int ia64_asm_output_label = 0;
58
59/* Define the information needed to generate branch and scc insns.  This is
60   stored from the compare operation.  */
61struct rtx_def * ia64_compare_op0;
62struct rtx_def * ia64_compare_op1;
63
64/* Register names for ia64_expand_prologue.  */
65static const char * const ia64_reg_numbers[96] =
66{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
67  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
68  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
69  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
70  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
71  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
72  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
73  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
74  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
75  "r104","r105","r106","r107","r108","r109","r110","r111",
76  "r112","r113","r114","r115","r116","r117","r118","r119",
77  "r120","r121","r122","r123","r124","r125","r126","r127"};
78
79/* ??? These strings could be shared with REGISTER_NAMES.  */
80static const char * const ia64_input_reg_names[8] =
81{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
82
83/* ??? These strings could be shared with REGISTER_NAMES.  */
84static const char * const ia64_local_reg_names[80] =
85{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
86  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
87  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
88  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
89  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
90  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
91  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
92  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
93  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
94  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95
96/* ??? These strings could be shared with REGISTER_NAMES.  */
97static const char * const ia64_output_reg_names[8] =
98{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99
100/* String used with the -mfixed-range= option.  */
101const char *ia64_fixed_range_string;
102
103/* Determines whether we use adds, addl, or movl to generate our
104   TLS immediate offsets.  */
105int ia64_tls_size = 22;
106
107/* String used with the -mtls-size= option.  */
108const char *ia64_tls_size_string;
109
110/* Which cpu are we scheduling for.  */
111enum processor_type ia64_tune;
112
113/* String used with the -tune= option.  */
114const char *ia64_tune_string;
115
116/* Determines whether we run our final scheduling pass or not.  We always
117   avoid the normal second scheduling pass.  */
118static int ia64_flag_schedule_insns2;
119
120/* Variables which are this size or smaller are put in the sdata/sbss
121   sections.  */
122
123unsigned int ia64_section_threshold;
124
125/* The following variable is used by the DFA insn scheduler.  The value is
126   TRUE if we do insn bundling instead of insn scheduling.  */
127int bundling_p = 0;
128
129/* Structure to be filled in by ia64_compute_frame_size with register
130   save masks and offsets for the current function.  */
131
132struct ia64_frame_info
133{
134  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
135				   the caller's scratch area.  */
136  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
137  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
138  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
139  HARD_REG_SET mask;		/* mask of saved registers.  */
140  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
141				   registers or long-term scratches.  */
142  int n_spilled;		/* number of spilled registers.  */
143  int reg_fp;			/* register for fp.  */
144  int reg_save_b0;		/* save register for b0.  */
145  int reg_save_pr;		/* save register for prs.  */
146  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
147  int reg_save_ar_unat;		/* save register for ar.unat.  */
148  int reg_save_ar_lc;		/* save register for ar.lc.  */
149  int reg_save_gp;		/* save register for gp.  */
150  int n_input_regs;		/* number of input registers used.  */
151  int n_local_regs;		/* number of local registers used.  */
152  int n_output_regs;		/* number of output registers used.  */
153  int n_rotate_regs;		/* number of rotating registers used.  */
154
155  char need_regstk;		/* true if a .regstk directive needed.  */
156  char initialized;		/* true if the data is finalized.  */
157};
158
159/* Current frame information calculated by ia64_compute_frame_size.  */
160static struct ia64_frame_info current_frame_info;
161
162static int ia64_use_dfa_pipeline_interface (void);
163static int ia64_first_cycle_multipass_dfa_lookahead (void);
164static void ia64_dependencies_evaluation_hook (rtx, rtx);
165static void ia64_init_dfa_pre_cycle_insn (void);
166static rtx ia64_dfa_pre_cycle_insn (void);
167static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
168static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
169static rtx gen_tls_get_addr (void);
170static rtx gen_thread_pointer (void);
171static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
172static int find_gr_spill (int);
173static int next_scratch_gr_reg (void);
174static void mark_reg_gr_used_mask (rtx, void *);
175static void ia64_compute_frame_size (HOST_WIDE_INT);
176static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
177static void finish_spill_pointers (void);
178static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
179static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
180static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
181static rtx gen_movdi_x (rtx, rtx, rtx);
182static rtx gen_fr_spill_x (rtx, rtx, rtx);
183static rtx gen_fr_restore_x (rtx, rtx, rtx);
184
185static enum machine_mode hfa_element_mode (tree, int);
186static bool ia64_function_ok_for_sibcall (tree, tree);
187static bool ia64_rtx_costs (rtx, int, int, int *);
188static void fix_range (const char *);
189static struct machine_function * ia64_init_machine_status (void);
190static void emit_insn_group_barriers (FILE *);
191static void emit_all_insn_group_barriers (FILE *);
192static void final_emit_insn_group_barriers (FILE *);
193static void emit_predicate_relation_info (void);
194static void ia64_reorg (void);
195static bool ia64_in_small_data_p (tree);
196static void process_epilogue (void);
197static int process_set (FILE *, rtx);
198
199static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
200static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
201static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
202					 int, tree, rtx);
203static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
204static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
205static bool ia64_assemble_integer (rtx, unsigned int, int);
206static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
207static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
208static void ia64_output_function_end_prologue (FILE *);
209
210static int ia64_issue_rate (void);
211static int ia64_adjust_cost (rtx, rtx, rtx, int);
212static void ia64_sched_init (FILE *, int, int);
213static void ia64_sched_finish (FILE *, int);
214static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
215static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
216static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
217static int ia64_variable_issue (FILE *, int, rtx, int);
218
219static struct bundle_state *get_free_bundle_state (void);
220static void free_bundle_state (struct bundle_state *);
221static void initiate_bundle_states (void);
222static void finish_bundle_states (void);
223static unsigned bundle_state_hash (const void *);
224static int bundle_state_eq_p (const void *, const void *);
225static int insert_bundle_state (struct bundle_state *);
226static void initiate_bundle_state_table (void);
227static void finish_bundle_state_table (void);
228static int try_issue_nops (struct bundle_state *, int);
229static int try_issue_insn (struct bundle_state *, rtx);
230static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
231static int get_max_pos (state_t);
232static int get_template (state_t, int);
233
234static rtx get_next_important_insn (rtx, rtx);
235static void bundling (FILE *, int, rtx, rtx);
236
237static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
238				  HOST_WIDE_INT, tree);
239static void ia64_file_start (void);
240
241static void ia64_select_rtx_section (enum machine_mode, rtx,
242				     unsigned HOST_WIDE_INT);
243static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
244     ATTRIBUTE_UNUSED;
245static void ia64_rwreloc_unique_section (tree, int)
246     ATTRIBUTE_UNUSED;
247static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
248					     unsigned HOST_WIDE_INT)
249     ATTRIBUTE_UNUSED;
250static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
251     ATTRIBUTE_UNUSED;
252
253static void ia64_hpux_add_extern_decl (tree decl)
254     ATTRIBUTE_UNUSED;
255static void ia64_hpux_file_end (void)
256     ATTRIBUTE_UNUSED;
257static void ia64_hpux_init_libfuncs (void)
258     ATTRIBUTE_UNUSED;
259static void ia64_vms_init_libfuncs (void)
260     ATTRIBUTE_UNUSED;
261
262static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
263static void ia64_encode_section_info (tree, rtx, int);
264static rtx ia64_struct_value_rtx (tree, int);
265
266
267/* Table of valid machine attributes.  */
268static const struct attribute_spec ia64_attribute_table[] =
269{
270  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
271  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
272  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
273  { NULL,	       0, 0, false, false, false, NULL }
274};
275
276/* Initialize the GCC target structure.  */
277#undef TARGET_ATTRIBUTE_TABLE
278#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
279
280#undef TARGET_INIT_BUILTINS
281#define TARGET_INIT_BUILTINS ia64_init_builtins
282
283#undef TARGET_EXPAND_BUILTIN
284#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
285
286#undef TARGET_ASM_BYTE_OP
287#define TARGET_ASM_BYTE_OP "\tdata1\t"
288#undef TARGET_ASM_ALIGNED_HI_OP
289#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
290#undef TARGET_ASM_ALIGNED_SI_OP
291#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
292#undef TARGET_ASM_ALIGNED_DI_OP
293#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
294#undef TARGET_ASM_UNALIGNED_HI_OP
295#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
296#undef TARGET_ASM_UNALIGNED_SI_OP
297#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
298#undef TARGET_ASM_UNALIGNED_DI_OP
299#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
300#undef TARGET_ASM_INTEGER
301#define TARGET_ASM_INTEGER ia64_assemble_integer
302
303#undef TARGET_ASM_FUNCTION_PROLOGUE
304#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
305#undef TARGET_ASM_FUNCTION_END_PROLOGUE
306#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
307#undef TARGET_ASM_FUNCTION_EPILOGUE
308#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
309
310#undef TARGET_IN_SMALL_DATA_P
311#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
312
313#undef TARGET_SCHED_ADJUST_COST
314#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
315#undef TARGET_SCHED_ISSUE_RATE
316#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
317#undef TARGET_SCHED_VARIABLE_ISSUE
318#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
319#undef TARGET_SCHED_INIT
320#define TARGET_SCHED_INIT ia64_sched_init
321#undef TARGET_SCHED_FINISH
322#define TARGET_SCHED_FINISH ia64_sched_finish
323#undef TARGET_SCHED_REORDER
324#define TARGET_SCHED_REORDER ia64_sched_reorder
325#undef TARGET_SCHED_REORDER2
326#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
327
328#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
329#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
330
331#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
332#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
333
334#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
335#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
336
337#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
338#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
339#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
340#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
341
342#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
343#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
344  ia64_first_cycle_multipass_dfa_lookahead_guard
345
346#undef TARGET_SCHED_DFA_NEW_CYCLE
347#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
348
349#undef TARGET_FUNCTION_OK_FOR_SIBCALL
350#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
351
352#undef TARGET_ASM_OUTPUT_MI_THUNK
353#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
354#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
355#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
356
357#undef TARGET_ASM_FILE_START
358#define TARGET_ASM_FILE_START ia64_file_start
359
360#undef TARGET_RTX_COSTS
361#define TARGET_RTX_COSTS ia64_rtx_costs
362#undef TARGET_ADDRESS_COST
363#define TARGET_ADDRESS_COST hook_int_rtx_0
364
365#undef TARGET_MACHINE_DEPENDENT_REORG
366#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
367
368#undef TARGET_ENCODE_SECTION_INFO
369#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
370
371#undef TARGET_STRUCT_VALUE_RTX
372#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
373
374struct gcc_target targetm = TARGET_INITIALIZER;
375
376/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
377
378int
379call_operand (rtx op, enum machine_mode mode)
380{
381  if (mode != GET_MODE (op) && mode != VOIDmode)
382    return 0;
383
384  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
385	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
386}
387
388/* Return 1 if OP refers to a symbol in the sdata section.  */
389
390int
391sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
392{
393  switch (GET_CODE (op))
394    {
395    case CONST:
396      if (GET_CODE (XEXP (op, 0)) != PLUS
397	  || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF)
398	break;
399      op = XEXP (XEXP (op, 0), 0);
400      /* FALLTHRU */
401
402    case SYMBOL_REF:
403      if (CONSTANT_POOL_ADDRESS_P (op))
404	return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold;
405      else
406	return SYMBOL_REF_LOCAL_P (op) && SYMBOL_REF_SMALL_P (op);
407
408    default:
409      break;
410    }
411
412  return 0;
413}
414
415int
416small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
417{
418  return SYMBOL_REF_SMALL_ADDR_P (op);
419}
420
421/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
422
423int
424got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
425{
426  switch (GET_CODE (op))
427    {
428    case CONST:
429      op = XEXP (op, 0);
430      if (GET_CODE (op) != PLUS)
431	return 0;
432      if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
433	return 0;
434      op = XEXP (op, 1);
435      if (GET_CODE (op) != CONST_INT)
436	return 0;
437
438	return 1;
439
440      /* Ok if we're not using GOT entries at all.  */
441      if (TARGET_NO_PIC || TARGET_AUTO_PIC)
442	return 1;
443
444      /* "Ok" while emitting rtl, since otherwise we won't be provided
445	 with the entire offset during emission, which makes it very
446	 hard to split the offset into high and low parts.  */
447      if (rtx_equal_function_value_matters)
448	return 1;
449
450      /* Force the low 14 bits of the constant to zero so that we do not
451	 use up so many GOT entries.  */
452      return (INTVAL (op) & 0x3fff) == 0;
453
454    case SYMBOL_REF:
455      if (SYMBOL_REF_SMALL_ADDR_P (op))
456	return 0;
457    case LABEL_REF:
458      return 1;
459
460    default:
461      break;
462    }
463  return 0;
464}
465
466/* Return 1 if OP refers to a symbol.  */
467
468int
469symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
470{
471  switch (GET_CODE (op))
472    {
473    case CONST:
474    case SYMBOL_REF:
475    case LABEL_REF:
476      return 1;
477
478    default:
479      break;
480    }
481  return 0;
482}
483
484/* Return tls_model if OP refers to a TLS symbol.  */
485
486int
487tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
488{
489  if (GET_CODE (op) != SYMBOL_REF)
490    return 0;
491  return SYMBOL_REF_TLS_MODEL (op);
492}
493
494
495/* Return 1 if OP refers to a function.  */
496
497int
498function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
499{
500  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
501    return 1;
502  else
503    return 0;
504}
505
506/* Return 1 if OP is setjmp or a similar function.  */
507
508/* ??? This is an unsatisfying solution.  Should rethink.  */
509
510int
511setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
512{
513  const char *name;
514  int retval = 0;
515
516  if (GET_CODE (op) != SYMBOL_REF)
517    return 0;
518
519  name = XSTR (op, 0);
520
521  /* The following code is borrowed from special_function_p in calls.c.  */
522
523  /* Disregard prefix _, __ or __x.  */
524  if (name[0] == '_')
525    {
526      if (name[1] == '_' && name[2] == 'x')
527	name += 3;
528      else if (name[1] == '_')
529	name += 2;
530      else
531	name += 1;
532    }
533
534  if (name[0] == 's')
535    {
536      retval
537	= ((name[1] == 'e'
538	    && (! strcmp (name, "setjmp")
539		|| ! strcmp (name, "setjmp_syscall")))
540	   || (name[1] == 'i'
541	       && ! strcmp (name, "sigsetjmp"))
542	   || (name[1] == 'a'
543	       && ! strcmp (name, "savectx")));
544    }
545  else if ((name[0] == 'q' && name[1] == 's'
546	    && ! strcmp (name, "qsetjmp"))
547	   || (name[0] == 'v' && name[1] == 'f'
548	       && ! strcmp (name, "vfork")))
549    retval = 1;
550
551  return retval;
552}
553
554/* Return 1 if OP is a general operand, excluding tls symbolic operands.  */
555
556int
557move_operand (rtx op, enum machine_mode mode)
558{
559  return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
560}
561
562/* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
563
564int
565gr_register_operand (rtx op, enum machine_mode mode)
566{
567  if (! register_operand (op, mode))
568    return 0;
569  if (GET_CODE (op) == SUBREG)
570    op = SUBREG_REG (op);
571  if (GET_CODE (op) == REG)
572    {
573      unsigned int regno = REGNO (op);
574      if (regno < FIRST_PSEUDO_REGISTER)
575	return GENERAL_REGNO_P (regno);
576    }
577  return 1;
578}
579
580/* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
581
582int
583fr_register_operand (rtx op, enum machine_mode mode)
584{
585  if (! register_operand (op, mode))
586    return 0;
587  if (GET_CODE (op) == SUBREG)
588    op = SUBREG_REG (op);
589  if (GET_CODE (op) == REG)
590    {
591      unsigned int regno = REGNO (op);
592      if (regno < FIRST_PSEUDO_REGISTER)
593	return FR_REGNO_P (regno);
594    }
595  return 1;
596}
597
598/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
599
600int
601grfr_register_operand (rtx op, enum machine_mode mode)
602{
603  if (! register_operand (op, mode))
604    return 0;
605  if (GET_CODE (op) == SUBREG)
606    op = SUBREG_REG (op);
607  if (GET_CODE (op) == REG)
608    {
609      unsigned int regno = REGNO (op);
610      if (regno < FIRST_PSEUDO_REGISTER)
611	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
612    }
613  return 1;
614}
615
616/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
617
618int
619gr_nonimmediate_operand (rtx op, enum machine_mode mode)
620{
621  if (! nonimmediate_operand (op, mode))
622    return 0;
623  if (GET_CODE (op) == SUBREG)
624    op = SUBREG_REG (op);
625  if (GET_CODE (op) == REG)
626    {
627      unsigned int regno = REGNO (op);
628      if (regno < FIRST_PSEUDO_REGISTER)
629	return GENERAL_REGNO_P (regno);
630    }
631  return 1;
632}
633
634/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
635
636int
637fr_nonimmediate_operand (rtx op, enum machine_mode mode)
638{
639  if (! nonimmediate_operand (op, mode))
640    return 0;
641  if (GET_CODE (op) == SUBREG)
642    op = SUBREG_REG (op);
643  if (GET_CODE (op) == REG)
644    {
645      unsigned int regno = REGNO (op);
646      if (regno < FIRST_PSEUDO_REGISTER)
647	return FR_REGNO_P (regno);
648    }
649  return 1;
650}
651
652/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
653
654int
655grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
656{
657  if (! nonimmediate_operand (op, mode))
658    return 0;
659  if (GET_CODE (op) == SUBREG)
660    op = SUBREG_REG (op);
661  if (GET_CODE (op) == REG)
662    {
663      unsigned int regno = REGNO (op);
664      if (regno < FIRST_PSEUDO_REGISTER)
665	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
666    }
667  return 1;
668}
669
670/* Return 1 if OP is a GR register operand, or zero.  */
671
672int
673gr_reg_or_0_operand (rtx op, enum machine_mode mode)
674{
675  return (op == const0_rtx || gr_register_operand (op, mode));
676}
677
678/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
679
680int
681gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
682{
683  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
684	  || GET_CODE (op) == CONSTANT_P_RTX
685	  || gr_register_operand (op, mode));
686}
687
688/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
689
690int
691gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
692{
693  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
694	  || GET_CODE (op) == CONSTANT_P_RTX
695	  || gr_register_operand (op, mode));
696}
697
698/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
699
700int
701gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
702{
703  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
704	  || GET_CODE (op) == CONSTANT_P_RTX
705	  || gr_register_operand (op, mode));
706}
707
708/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
709
710int
711grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
712{
713  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
714	  || GET_CODE (op) == CONSTANT_P_RTX
715	  || grfr_register_operand (op, mode));
716}
717
718/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
719   operand.  */
720
721int
722gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
723{
724  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
725	  || GET_CODE (op) == CONSTANT_P_RTX
726	  || gr_register_operand (op, mode));
727}
728
729/* Return 1 if OP is a register operand, or is valid for both an 8 bit
730   immediate and an 8 bit adjusted immediate operand.  This is necessary
731   because when we emit a compare, we don't know what the condition will be,
732   so we need the union of the immediates accepted by GT and LT.  */
733
734int
735gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
736{
737  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
738	   && CONST_OK_FOR_L (INTVAL (op)))
739	  || GET_CODE (op) == CONSTANT_P_RTX
740	  || gr_register_operand (op, mode));
741}
742
743/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
744
745int
746gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
747{
748  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
749	  || GET_CODE (op) == CONSTANT_P_RTX
750	  || gr_register_operand (op, mode));
751}
752
753/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
754
755int
756gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
757{
758  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
759	  || GET_CODE (op) == CONSTANT_P_RTX
760	  || gr_register_operand (op, mode));
761}
762
763/* Return 1 if OP is a 6 bit immediate operand.  */
764
765int
766shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
767{
768  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
769	  || GET_CODE (op) == CONSTANT_P_RTX);
770}
771
772/* Return 1 if OP is a 5 bit immediate operand.  */
773
774int
775shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
776{
777  return ((GET_CODE (op) == CONST_INT
778	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
779	  || GET_CODE (op) == CONSTANT_P_RTX);
780}
781
782/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
783
784int
785shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
786{
787  return (GET_CODE (op) == CONST_INT
788	  && (INTVAL (op) == 2 || INTVAL (op) == 4
789	      || INTVAL (op) == 8 || INTVAL (op) == 16));
790}
791
792/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
793
794int
795fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
796{
797  return (GET_CODE (op) == CONST_INT
798          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
799              INTVAL (op) == -4  || INTVAL (op) == -1 ||
800              INTVAL (op) == 1   || INTVAL (op) == 4  ||
801              INTVAL (op) == 8   || INTVAL (op) == 16));
802}
803
804/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
805
806int
807fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
808{
809  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
810	  || fr_register_operand (op, mode));
811}
812
813/* Like nonimmediate_operand, but don't allow MEMs that try to use a
814   POST_MODIFY with a REG as displacement.  */
815
816int
817destination_operand (rtx op, enum machine_mode mode)
818{
819  if (! nonimmediate_operand (op, mode))
820    return 0;
821  if (GET_CODE (op) == MEM
822      && GET_CODE (XEXP (op, 0)) == POST_MODIFY
823      && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
824    return 0;
825  return 1;
826}
827
828/* Like memory_operand, but don't allow post-increments.  */
829
830int
831not_postinc_memory_operand (rtx op, enum machine_mode mode)
832{
833  return (memory_operand (op, mode)
834	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
835}
836
837/* Return 1 if this is a comparison operator, which accepts a normal 8-bit
838   signed immediate operand.  */
839
840int
841normal_comparison_operator (register rtx op, enum machine_mode mode)
842{
843  enum rtx_code code = GET_CODE (op);
844  return ((mode == VOIDmode || GET_MODE (op) == mode)
845	  && (code == EQ || code == NE
846	      || code == GT || code == LE || code == GTU || code == LEU));
847}
848
849/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
850   signed immediate operand.  */
851
852int
853adjusted_comparison_operator (register rtx op, enum machine_mode mode)
854{
855  enum rtx_code code = GET_CODE (op);
856  return ((mode == VOIDmode || GET_MODE (op) == mode)
857	  && (code == LT || code == GE || code == LTU || code == GEU));
858}
859
860/* Return 1 if this is a signed inequality operator.  */
861
862int
863signed_inequality_operator (register rtx op, enum machine_mode mode)
864{
865  enum rtx_code code = GET_CODE (op);
866  return ((mode == VOIDmode || GET_MODE (op) == mode)
867	  && (code == GE || code == GT
868	      || code == LE || code == LT));
869}
870
871/* Return 1 if this operator is valid for predication.  */
872
873int
874predicate_operator (register rtx op, enum machine_mode mode)
875{
876  enum rtx_code code = GET_CODE (op);
877  return ((GET_MODE (op) == mode || mode == VOIDmode)
878	  && (code == EQ || code == NE));
879}
880
881/* Return 1 if this operator can be used in a conditional operation.  */
882
883int
884condop_operator (register rtx op, enum machine_mode mode)
885{
886  enum rtx_code code = GET_CODE (op);
887  return ((GET_MODE (op) == mode || mode == VOIDmode)
888	  && (code == PLUS || code == MINUS || code == AND
889	      || code == IOR || code == XOR));
890}
891
892/* Return 1 if this is the ar.lc register.  */
893
894int
895ar_lc_reg_operand (register rtx op, enum machine_mode mode)
896{
897  return (GET_MODE (op) == DImode
898	  && (mode == DImode || mode == VOIDmode)
899	  && GET_CODE (op) == REG
900	  && REGNO (op) == AR_LC_REGNUM);
901}
902
903/* Return 1 if this is the ar.ccv register.  */
904
905int
906ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
907{
908  return ((GET_MODE (op) == mode || mode == VOIDmode)
909	  && GET_CODE (op) == REG
910	  && REGNO (op) == AR_CCV_REGNUM);
911}
912
913/* Return 1 if this is the ar.pfs register.  */
914
915int
916ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
917{
918  return ((GET_MODE (op) == mode || mode == VOIDmode)
919	  && GET_CODE (op) == REG
920	  && REGNO (op) == AR_PFS_REGNUM);
921}
922
923/* Like general_operand, but don't allow (mem (addressof)).  */
924
925int
926general_xfmode_operand (rtx op, enum machine_mode mode)
927{
928  if (! general_operand (op, mode))
929    return 0;
930  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
931    return 0;
932  return 1;
933}
934
935/* Similarly.  */
936
937int
938destination_xfmode_operand (rtx op, enum machine_mode mode)
939{
940  if (! destination_operand (op, mode))
941    return 0;
942  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
943    return 0;
944  return 1;
945}
946
947/* Similarly.  */
948
949int
950xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
951{
952  if (GET_CODE (op) == SUBREG)
953    return 0;
954  return fr_reg_or_fp01_operand (op, mode);
955}
956
957/* Return 1 if OP is valid as a base register in a reg + offset address.  */
958
959int
960basereg_operand (rtx op, enum machine_mode mode)
961{
962  /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
963     checks from pa.c basereg_operand as well?  Seems to be OK without them
964     in test runs.  */
965
966  return (register_operand (op, mode) &&
967	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
968}
969
970typedef enum
971  {
972    ADDR_AREA_NORMAL,	/* normal address area */
973    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
974  }
975ia64_addr_area;
976
977static GTY(()) tree small_ident1;
978static GTY(()) tree small_ident2;
979
980static void
981init_idents (void)
982{
983  if (small_ident1 == 0)
984    {
985      small_ident1 = get_identifier ("small");
986      small_ident2 = get_identifier ("__small__");
987    }
988}
989
990/* Retrieve the address area that has been chosen for the given decl.  */
991
992static ia64_addr_area
993ia64_get_addr_area (tree decl)
994{
995  tree model_attr;
996
997  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
998  if (model_attr)
999    {
1000      tree id;
1001
1002      init_idents ();
1003      id = TREE_VALUE (TREE_VALUE (model_attr));
1004      if (id == small_ident1 || id == small_ident2)
1005	return ADDR_AREA_SMALL;
1006    }
1007  return ADDR_AREA_NORMAL;
1008}
1009
1010static tree
1011ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1012{
1013  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1014  ia64_addr_area area;
1015  tree arg, decl = *node;
1016
1017  init_idents ();
1018  arg = TREE_VALUE (args);
1019  if (arg == small_ident1 || arg == small_ident2)
1020    {
1021      addr_area = ADDR_AREA_SMALL;
1022    }
1023  else
1024    {
1025      warning ("invalid argument of `%s' attribute",
1026	       IDENTIFIER_POINTER (name));
1027      *no_add_attrs = true;
1028    }
1029
1030  switch (TREE_CODE (decl))
1031    {
1032    case VAR_DECL:
1033      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1034	   == FUNCTION_DECL)
1035	  && !TREE_STATIC (decl))
1036	{
1037	  error ("%Jan address area attribute cannot be specified for "
1038		 "local variables", decl, decl);
1039	  *no_add_attrs = true;
1040	}
1041      area = ia64_get_addr_area (decl);
1042      if (area != ADDR_AREA_NORMAL && addr_area != area)
1043	{
1044	  error ("%Jaddress area of '%s' conflicts with previous "
1045		 "declaration", decl, decl);
1046	  *no_add_attrs = true;
1047	}
1048      break;
1049
1050    case FUNCTION_DECL:
1051      error ("%Jaddress area attribute cannot be specified for functions",
1052	     decl, decl);
1053      *no_add_attrs = true;
1054      break;
1055
1056    default:
1057      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1058      *no_add_attrs = true;
1059      break;
1060    }
1061
1062  return NULL_TREE;
1063}
1064
1065static void
1066ia64_encode_addr_area (tree decl, rtx symbol)
1067{
1068  int flags;
1069
1070  flags = SYMBOL_REF_FLAGS (symbol);
1071  switch (ia64_get_addr_area (decl))
1072    {
1073    case ADDR_AREA_NORMAL: break;
1074    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1075    default: abort ();
1076    }
1077  SYMBOL_REF_FLAGS (symbol) = flags;
1078}
1079
1080static void
1081ia64_encode_section_info (tree decl, rtx rtl, int first)
1082{
1083  default_encode_section_info (decl, rtl, first);
1084
1085  /* Careful not to prod global register variables.  */
1086  if (TREE_CODE (decl) == VAR_DECL
1087      && GET_CODE (DECL_RTL (decl)) == MEM
1088      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
1089      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1090    ia64_encode_addr_area (decl, XEXP (rtl, 0));
1091}
1092
1093/* Return 1 if the operands of a move are ok.  */
1094
1095int
1096ia64_move_ok (rtx dst, rtx src)
1097{
1098  /* If we're under init_recog_no_volatile, we'll not be able to use
1099     memory_operand.  So check the code directly and don't worry about
1100     the validity of the underlying address, which should have been
1101     checked elsewhere anyway.  */
1102  if (GET_CODE (dst) != MEM)
1103    return 1;
1104  if (GET_CODE (src) == MEM)
1105    return 0;
1106  if (register_operand (src, VOIDmode))
1107    return 1;
1108
1109  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1110  if (INTEGRAL_MODE_P (GET_MODE (dst)))
1111    return src == const0_rtx;
1112  else
1113    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1114}
1115
1116int
1117addp4_optimize_ok (rtx op1, rtx op2)
1118{
1119  return (basereg_operand (op1, GET_MODE(op1)) !=
1120	  basereg_operand (op2, GET_MODE(op2)));
1121}
1122
1123/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1124   Return the length of the field, or <= 0 on failure.  */
1125
1126int
1127ia64_depz_field_mask (rtx rop, rtx rshift)
1128{
1129  unsigned HOST_WIDE_INT op = INTVAL (rop);
1130  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1131
1132  /* Get rid of the zero bits we're shifting in.  */
1133  op >>= shift;
1134
1135  /* We must now have a solid block of 1's at bit 0.  */
1136  return exact_log2 (op + 1);
1137}
1138
1139/* Expand a symbolic constant load.  */
1140
1141void
1142ia64_expand_load_address (rtx dest, rtx src)
1143{
1144  if (tls_symbolic_operand (src, VOIDmode))
1145    abort ();
1146  if (GET_CODE (dest) != REG)
1147    abort ();
1148
1149  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1150     having to pointer-extend the value afterward.  Other forms of address
1151     computation below are also more natural to compute as 64-bit quantities.
1152     If we've been given an SImode destination register, change it.  */
1153  if (GET_MODE (dest) != Pmode)
1154    dest = gen_rtx_REG (Pmode, REGNO (dest));
1155
1156  if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1157    {
1158      emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1159      return;
1160    }
1161  else if (TARGET_AUTO_PIC)
1162    {
1163      emit_insn (gen_load_gprel64 (dest, src));
1164      return;
1165    }
1166  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1167    {
1168      emit_insn (gen_load_fptr (dest, src));
1169      return;
1170    }
1171  else if (sdata_symbolic_operand (src, VOIDmode))
1172    {
1173      emit_insn (gen_load_gprel (dest, src));
1174      return;
1175    }
1176
1177  if (GET_CODE (src) == CONST
1178      && GET_CODE (XEXP (src, 0)) == PLUS
1179      && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1180      && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1181    {
1182      rtx sym = XEXP (XEXP (src, 0), 0);
1183      HOST_WIDE_INT ofs, hi, lo;
1184
1185      /* Split the offset into a sign extended 14-bit low part
1186	 and a complementary high part.  */
1187      ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1188      lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1189      hi = ofs - lo;
1190
1191      ia64_expand_load_address (dest, plus_constant (sym, hi));
1192      emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1193    }
1194  else
1195    {
1196      rtx tmp;
1197
1198      tmp = gen_rtx_HIGH (Pmode, src);
1199      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1200      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1201
1202      tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1203      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1204    }
1205}
1206
1207static GTY(()) rtx gen_tls_tga;
1208static rtx
1209gen_tls_get_addr (void)
1210{
1211  if (!gen_tls_tga)
1212    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1213  return gen_tls_tga;
1214}
1215
1216static GTY(()) rtx thread_pointer_rtx;
1217static rtx
1218gen_thread_pointer (void)
1219{
1220  if (!thread_pointer_rtx)
1221    {
1222      thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1223      RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1224    }
1225  return thread_pointer_rtx;
1226}
1227
1228static rtx
1229ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1230{
1231  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1232  rtx orig_op0 = op0;
1233
1234  switch (tls_kind)
1235    {
1236    case TLS_MODEL_GLOBAL_DYNAMIC:
1237      start_sequence ();
1238
1239      tga_op1 = gen_reg_rtx (Pmode);
1240      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1241      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1242      RTX_UNCHANGING_P (tga_op1) = 1;
1243
1244      tga_op2 = gen_reg_rtx (Pmode);
1245      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1246      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1247      RTX_UNCHANGING_P (tga_op2) = 1;
1248
1249      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1250					 LCT_CONST, Pmode, 2, tga_op1,
1251					 Pmode, tga_op2, Pmode);
1252
1253      insns = get_insns ();
1254      end_sequence ();
1255
1256      if (GET_MODE (op0) != Pmode)
1257	op0 = tga_ret;
1258      emit_libcall_block (insns, op0, tga_ret, op1);
1259      break;
1260
1261    case TLS_MODEL_LOCAL_DYNAMIC:
1262      /* ??? This isn't the completely proper way to do local-dynamic
1263	 If the call to __tls_get_addr is used only by a single symbol,
1264	 then we should (somehow) move the dtprel to the second arg
1265	 to avoid the extra add.  */
1266      start_sequence ();
1267
1268      tga_op1 = gen_reg_rtx (Pmode);
1269      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1270      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1271      RTX_UNCHANGING_P (tga_op1) = 1;
1272
1273      tga_op2 = const0_rtx;
1274
1275      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1276					 LCT_CONST, Pmode, 2, tga_op1,
1277					 Pmode, tga_op2, Pmode);
1278
1279      insns = get_insns ();
1280      end_sequence ();
1281
1282      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1283				UNSPEC_LD_BASE);
1284      tmp = gen_reg_rtx (Pmode);
1285      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1286
1287      if (!register_operand (op0, Pmode))
1288	op0 = gen_reg_rtx (Pmode);
1289      if (TARGET_TLS64)
1290	{
1291	  emit_insn (gen_load_dtprel (op0, op1));
1292	  emit_insn (gen_adddi3 (op0, tmp, op0));
1293	}
1294      else
1295	emit_insn (gen_add_dtprel (op0, tmp, op1));
1296      break;
1297
1298    case TLS_MODEL_INITIAL_EXEC:
1299      tmp = gen_reg_rtx (Pmode);
1300      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1301      tmp = gen_rtx_MEM (Pmode, tmp);
1302      RTX_UNCHANGING_P (tmp) = 1;
1303      tmp = force_reg (Pmode, tmp);
1304
1305      if (!register_operand (op0, Pmode))
1306	op0 = gen_reg_rtx (Pmode);
1307      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1308      break;
1309
1310    case TLS_MODEL_LOCAL_EXEC:
1311      if (!register_operand (op0, Pmode))
1312	op0 = gen_reg_rtx (Pmode);
1313      if (TARGET_TLS64)
1314	{
1315	  emit_insn (gen_load_tprel (op0, op1));
1316	  emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1317	}
1318      else
1319	emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1320      break;
1321
1322    default:
1323      abort ();
1324    }
1325
1326  if (orig_op0 == op0)
1327    return NULL_RTX;
1328  if (GET_MODE (orig_op0) == Pmode)
1329    return op0;
1330  return gen_lowpart (GET_MODE (orig_op0), op0);
1331}
1332
1333rtx
1334ia64_expand_move (rtx op0, rtx op1)
1335{
1336  enum machine_mode mode = GET_MODE (op0);
1337
1338  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1339    op1 = force_reg (mode, op1);
1340
1341  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1342    {
1343      enum tls_model tls_kind;
1344      if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1345	return ia64_expand_tls_address (tls_kind, op0, op1);
1346
1347      if (!TARGET_NO_PIC && reload_completed)
1348	{
1349	  ia64_expand_load_address (op0, op1);
1350	  return NULL_RTX;
1351	}
1352    }
1353
1354  return op1;
1355}
1356
1357/* Split a move from OP1 to OP0 conditional on COND.  */
1358
1359void
1360ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1361{
1362  rtx insn, first = get_last_insn ();
1363
1364  emit_move_insn (op0, op1);
1365
1366  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1367    if (INSN_P (insn))
1368      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1369					  PATTERN (insn));
1370}
1371
1372/* Split a post-reload TImode or TFmode reference into two DImode
1373   components.  This is made extra difficult by the fact that we do
1374   not get any scratch registers to work with, because reload cannot
1375   be prevented from giving us a scratch that overlaps the register
1376   pair involved.  So instead, when addressing memory, we tweak the
1377   pointer register up and back down with POST_INCs.  Or up and not
1378   back down when we can get away with it.
1379
1380   REVERSED is true when the loads must be done in reversed order
1381   (high word first) for correctness.  DEAD is true when the pointer
1382   dies with the second insn we generate and therefore the second
1383   address must not carry a postmodify.
1384
1385   May return an insn which is to be emitted after the moves.  */
1386
1387static rtx
1388ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1389{
1390  rtx fixup = 0;
1391
1392  switch (GET_CODE (in))
1393    {
1394    case REG:
1395      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1396      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1397      break;
1398
1399    case CONST_INT:
1400    case CONST_DOUBLE:
1401      /* Cannot occur reversed.  */
1402      if (reversed) abort ();
1403
1404      if (GET_MODE (in) != TFmode)
1405	split_double (in, &out[0], &out[1]);
1406      else
1407	/* split_double does not understand how to split a TFmode
1408	   quantity into a pair of DImode constants.  */
1409	{
1410	  REAL_VALUE_TYPE r;
1411	  unsigned HOST_WIDE_INT p[2];
1412	  long l[4];  /* TFmode is 128 bits */
1413
1414	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1415	  real_to_target (l, &r, TFmode);
1416
1417	  if (FLOAT_WORDS_BIG_ENDIAN)
1418	    {
1419	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1420	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1421	    }
1422	  else
1423	    {
1424	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1425	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1426	    }
1427	  out[0] = GEN_INT (p[0]);
1428	  out[1] = GEN_INT (p[1]);
1429	}
1430      break;
1431
1432    case MEM:
1433      {
1434	rtx base = XEXP (in, 0);
1435	rtx offset;
1436
1437	switch (GET_CODE (base))
1438	  {
1439	  case REG:
1440	    if (!reversed)
1441	      {
1442		out[0] = adjust_automodify_address
1443		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1444		out[1] = adjust_automodify_address
1445		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1446	      }
1447	    else
1448	      {
1449		/* Reversal requires a pre-increment, which can only
1450		   be done as a separate insn.  */
1451		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1452		out[0] = adjust_automodify_address
1453		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1454		out[1] = adjust_address (in, DImode, 0);
1455	      }
1456	    break;
1457
1458	  case POST_INC:
1459	    if (reversed || dead) abort ();
1460	    /* Just do the increment in two steps.  */
1461	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1462	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1463	    break;
1464
1465	  case POST_DEC:
1466	    if (reversed || dead) abort ();
1467	    /* Add 8, subtract 24.  */
1468	    base = XEXP (base, 0);
1469	    out[0] = adjust_automodify_address
1470	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1471	    out[1] = adjust_automodify_address
1472	      (in, DImode,
1473	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1474	       8);
1475	    break;
1476
1477	  case POST_MODIFY:
1478	    if (reversed || dead) abort ();
1479	    /* Extract and adjust the modification.  This case is
1480	       trickier than the others, because we might have an
1481	       index register, or we might have a combined offset that
1482	       doesn't fit a signed 9-bit displacement field.  We can
1483	       assume the incoming expression is already legitimate.  */
1484	    offset = XEXP (base, 1);
1485	    base = XEXP (base, 0);
1486
1487	    out[0] = adjust_automodify_address
1488	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1489
1490	    if (GET_CODE (XEXP (offset, 1)) == REG)
1491	      {
1492		/* Can't adjust the postmodify to match.  Emit the
1493		   original, then a separate addition insn.  */
1494		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1495		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1496	      }
1497	    else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1498	      abort ();
1499	    else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1500	      {
1501		/* Again the postmodify cannot be made to match, but
1502		   in this case it's more efficient to get rid of the
1503		   postmodify entirely and fix up with an add insn. */
1504		out[1] = adjust_automodify_address (in, DImode, base, 8);
1505		fixup = gen_adddi3 (base, base,
1506				    GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1507	      }
1508	    else
1509	      {
1510		/* Combined offset still fits in the displacement field.
1511		   (We cannot overflow it at the high end.)  */
1512		out[1] = adjust_automodify_address
1513		  (in, DImode,
1514		   gen_rtx_POST_MODIFY (Pmode, base,
1515		     gen_rtx_PLUS (Pmode, base,
1516				   GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1517		   8);
1518	      }
1519	    break;
1520
1521	  default:
1522	    abort ();
1523	  }
1524	break;
1525      }
1526
1527    default:
1528      abort ();
1529    }
1530
1531  return fixup;
1532}
1533
1534/* Split a TImode or TFmode move instruction after reload.
1535   This is used by *movtf_internal and *movti_internal.  */
1536void
1537ia64_split_tmode_move (rtx operands[])
1538{
1539  rtx in[2], out[2], insn;
1540  rtx fixup[2];
1541  bool dead = false;
1542  bool reversed = false;
1543
1544  /* It is possible for reload to decide to overwrite a pointer with
1545     the value it points to.  In that case we have to do the loads in
1546     the appropriate order so that the pointer is not destroyed too
1547     early.  Also we must not generate a postmodify for that second
1548     load, or rws_access_regno will abort.  */
1549  if (GET_CODE (operands[1]) == MEM
1550      && reg_overlap_mentioned_p (operands[0], operands[1]))
1551    {
1552      rtx base = XEXP (operands[1], 0);
1553      while (GET_CODE (base) != REG)
1554	base = XEXP (base, 0);
1555
1556      if (REGNO (base) == REGNO (operands[0]))
1557	reversed = true;
1558      dead = true;
1559    }
1560  /* Another reason to do the moves in reversed order is if the first
1561     element of the target register pair is also the second element of
1562     the source register pair.  */
1563  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1564      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1565    reversed = true;
1566
1567  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1568  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1569
1570#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1571  if (GET_CODE (EXP) == MEM						\
1572      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1573	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1574	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1575    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1576					  XEXP (XEXP (EXP, 0), 0),	\
1577					  REG_NOTES (INSN))
1578
1579  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1580  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1581  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1582
1583  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1584  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1585  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1586
1587  if (fixup[0])
1588    emit_insn (fixup[0]);
1589  if (fixup[1])
1590    emit_insn (fixup[1]);
1591
1592#undef MAYBE_ADD_REG_INC_NOTE
1593}
1594
1595/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1596   through memory plus an extra GR scratch register.  Except that you can
1597   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1598   SECONDARY_RELOAD_CLASS, but not both.
1599
1600   We got into problems in the first place by allowing a construct like
1601   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1602   This solution attempts to prevent this situation from occurring.  When
1603   we see something like the above, we spill the inner register to memory.  */
1604
1605rtx
1606spill_xfmode_operand (rtx in, int force)
1607{
1608  if (GET_CODE (in) == SUBREG
1609      && GET_MODE (SUBREG_REG (in)) == TImode
1610      && GET_CODE (SUBREG_REG (in)) == REG)
1611    {
1612      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1613      return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1614    }
1615  else if (force && GET_CODE (in) == REG)
1616    {
1617      rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1618      return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1619    }
1620  else if (GET_CODE (in) == MEM
1621	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1622    return change_address (in, XFmode, copy_to_reg (XEXP (in, 0)));
1623  else
1624    return in;
1625}
1626
1627/* Emit comparison instruction if necessary, returning the expression
1628   that holds the compare result in the proper mode.  */
1629
1630static GTY(()) rtx cmptf_libfunc;
1631
1632rtx
1633ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1634{
1635  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1636  rtx cmp;
1637
1638  /* If we have a BImode input, then we already have a compare result, and
1639     do not need to emit another comparison.  */
1640  if (GET_MODE (op0) == BImode)
1641    {
1642      if ((code == NE || code == EQ) && op1 == const0_rtx)
1643	cmp = op0;
1644      else
1645	abort ();
1646    }
1647  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1648     magic number as its third argument, that indicates what to do.
1649     The return value is an integer to be compared against zero.  */
1650  else if (TARGET_HPUX && GET_MODE (op0) == TFmode)
1651    {
1652      enum qfcmp_magic {
1653	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1654	QCMP_UNORD = 2,
1655	QCMP_EQ = 4,
1656	QCMP_LT = 8,
1657	QCMP_GT = 16
1658      } magic;
1659      enum rtx_code ncode;
1660      rtx ret, insns;
1661      if (GET_MODE (op1) != TFmode)
1662	abort ();
1663      switch (code)
1664	{
1665	  /* 1 = equal, 0 = not equal.  Equality operators do
1666	     not raise FP_INVALID when given an SNaN operand.  */
1667	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1668	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1669	  /* isunordered() from C99.  */
1670	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1671	  /* Relational operators raise FP_INVALID when given
1672	     an SNaN operand.  */
1673	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1674	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1675	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1676	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1677	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1678	     Expanders for buneq etc. weuld have to be added to ia64.md
1679	     for this to be useful.  */
1680	default: abort ();
1681	}
1682
1683      start_sequence ();
1684
1685      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1686				     op0, TFmode, op1, TFmode,
1687				     GEN_INT (magic), DImode);
1688      cmp = gen_reg_rtx (BImode);
1689      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1690			      gen_rtx_fmt_ee (ncode, BImode,
1691					      ret, const0_rtx)));
1692
1693      insns = get_insns ();
1694      end_sequence ();
1695
1696      emit_libcall_block (insns, cmp, cmp,
1697			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1698      code = NE;
1699    }
1700  else
1701    {
1702      cmp = gen_reg_rtx (BImode);
1703      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1704			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1705      code = NE;
1706    }
1707
1708  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1709}
1710
1711/* Emit the appropriate sequence for a call.  */
1712
1713void
1714ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1715		  int sibcall_p)
1716{
1717  rtx insn, b0;
1718
1719  addr = XEXP (addr, 0);
1720  addr = convert_memory_address (DImode, addr);
1721  b0 = gen_rtx_REG (DImode, R_BR (0));
1722
1723  /* ??? Should do this for functions known to bind local too.  */
1724  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1725    {
1726      if (sibcall_p)
1727	insn = gen_sibcall_nogp (addr);
1728      else if (! retval)
1729	insn = gen_call_nogp (addr, b0);
1730      else
1731	insn = gen_call_value_nogp (retval, addr, b0);
1732      insn = emit_call_insn (insn);
1733    }
1734  else
1735    {
1736      if (sibcall_p)
1737	insn = gen_sibcall_gp (addr);
1738      else if (! retval)
1739	insn = gen_call_gp (addr, b0);
1740      else
1741	insn = gen_call_value_gp (retval, addr, b0);
1742      insn = emit_call_insn (insn);
1743
1744      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1745    }
1746
1747  if (sibcall_p)
1748    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1749}
1750
1751void
1752ia64_reload_gp (void)
1753{
1754  rtx tmp;
1755
1756  if (current_frame_info.reg_save_gp)
1757    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1758  else
1759    {
1760      HOST_WIDE_INT offset;
1761
1762      offset = (current_frame_info.spill_cfa_off
1763	        + current_frame_info.spill_size);
1764      if (frame_pointer_needed)
1765        {
1766          tmp = hard_frame_pointer_rtx;
1767          offset = -offset;
1768        }
1769      else
1770        {
1771          tmp = stack_pointer_rtx;
1772          offset = current_frame_info.total_size - offset;
1773        }
1774
1775      if (CONST_OK_FOR_I (offset))
1776        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1777			       tmp, GEN_INT (offset)));
1778      else
1779        {
1780          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1781          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1782			         pic_offset_table_rtx, tmp));
1783        }
1784
1785      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1786    }
1787
1788  emit_move_insn (pic_offset_table_rtx, tmp);
1789}
1790
1791void
1792ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1793		 rtx scratch_b, int noreturn_p, int sibcall_p)
1794{
1795  rtx insn;
1796  bool is_desc = false;
1797
1798  /* If we find we're calling through a register, then we're actually
1799     calling through a descriptor, so load up the values.  */
1800  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1801    {
1802      rtx tmp;
1803      bool addr_dead_p;
1804
1805      /* ??? We are currently constrained to *not* use peep2, because
1806	 we can legitimately change the global lifetime of the GP
1807	 (in the form of killing where previously live).  This is
1808	 because a call through a descriptor doesn't use the previous
1809	 value of the GP, while a direct call does, and we do not
1810	 commit to either form until the split here.
1811
1812	 That said, this means that we lack precise life info for
1813	 whether ADDR is dead after this call.  This is not terribly
1814	 important, since we can fix things up essentially for free
1815	 with the POST_DEC below, but it's nice to not use it when we
1816	 can immediately tell it's not necessary.  */
1817      addr_dead_p = ((noreturn_p || sibcall_p
1818		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1819					    REGNO (addr)))
1820		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1821
1822      /* Load the code address into scratch_b.  */
1823      tmp = gen_rtx_POST_INC (Pmode, addr);
1824      tmp = gen_rtx_MEM (Pmode, tmp);
1825      emit_move_insn (scratch_r, tmp);
1826      emit_move_insn (scratch_b, scratch_r);
1827
1828      /* Load the GP address.  If ADDR is not dead here, then we must
1829	 revert the change made above via the POST_INCREMENT.  */
1830      if (!addr_dead_p)
1831	tmp = gen_rtx_POST_DEC (Pmode, addr);
1832      else
1833	tmp = addr;
1834      tmp = gen_rtx_MEM (Pmode, tmp);
1835      emit_move_insn (pic_offset_table_rtx, tmp);
1836
1837      is_desc = true;
1838      addr = scratch_b;
1839    }
1840
1841  if (sibcall_p)
1842    insn = gen_sibcall_nogp (addr);
1843  else if (retval)
1844    insn = gen_call_value_nogp (retval, addr, retaddr);
1845  else
1846    insn = gen_call_nogp (addr, retaddr);
1847  emit_call_insn (insn);
1848
1849  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1850    ia64_reload_gp ();
1851}
1852
1853/* Begin the assembly file.  */
1854
1855static void
1856ia64_file_start (void)
1857{
1858  default_file_start ();
1859  emit_safe_across_calls ();
1860}
1861
1862void
1863emit_safe_across_calls (void)
1864{
1865  unsigned int rs, re;
1866  int out_state;
1867
1868  rs = 1;
1869  out_state = 0;
1870  while (1)
1871    {
1872      while (rs < 64 && call_used_regs[PR_REG (rs)])
1873	rs++;
1874      if (rs >= 64)
1875	break;
1876      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1877	continue;
1878      if (out_state == 0)
1879	{
1880	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
1881	  out_state = 1;
1882	}
1883      else
1884	fputc (',', asm_out_file);
1885      if (re == rs + 1)
1886	fprintf (asm_out_file, "p%u", rs);
1887      else
1888	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1889      rs = re + 1;
1890    }
1891  if (out_state)
1892    fputc ('\n', asm_out_file);
1893}
1894
1895/* Helper function for ia64_compute_frame_size: find an appropriate general
1896   register to spill some special register to.  SPECIAL_SPILL_MASK contains
1897   bits in GR0 to GR31 that have already been allocated by this routine.
1898   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1899
1900static int
1901find_gr_spill (int try_locals)
1902{
1903  int regno;
1904
1905  /* If this is a leaf function, first try an otherwise unused
1906     call-clobbered register.  */
1907  if (current_function_is_leaf)
1908    {
1909      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1910	if (! regs_ever_live[regno]
1911	    && call_used_regs[regno]
1912	    && ! fixed_regs[regno]
1913	    && ! global_regs[regno]
1914	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1915	  {
1916	    current_frame_info.gr_used_mask |= 1 << regno;
1917	    return regno;
1918	  }
1919    }
1920
1921  if (try_locals)
1922    {
1923      regno = current_frame_info.n_local_regs;
1924      /* If there is a frame pointer, then we can't use loc79, because
1925	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1926	 reg_name switching code in ia64_expand_prologue.  */
1927      if (regno < (80 - frame_pointer_needed))
1928	{
1929	  current_frame_info.n_local_regs = regno + 1;
1930	  return LOC_REG (0) + regno;
1931	}
1932    }
1933
1934  /* Failed to find a general register to spill to.  Must use stack.  */
1935  return 0;
1936}
1937
1938/* In order to make for nice schedules, we try to allocate every temporary
1939   to a different register.  We must of course stay away from call-saved,
1940   fixed, and global registers.  We must also stay away from registers
1941   allocated in current_frame_info.gr_used_mask, since those include regs
1942   used all through the prologue.
1943
1944   Any register allocated here must be used immediately.  The idea is to
1945   aid scheduling, not to solve data flow problems.  */
1946
1947static int last_scratch_gr_reg;
1948
1949static int
1950next_scratch_gr_reg (void)
1951{
1952  int i, regno;
1953
1954  for (i = 0; i < 32; ++i)
1955    {
1956      regno = (last_scratch_gr_reg + i + 1) & 31;
1957      if (call_used_regs[regno]
1958	  && ! fixed_regs[regno]
1959	  && ! global_regs[regno]
1960	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1961	{
1962	  last_scratch_gr_reg = regno;
1963	  return regno;
1964	}
1965    }
1966
1967  /* There must be _something_ available.  */
1968  abort ();
1969}
1970
1971/* Helper function for ia64_compute_frame_size, called through
1972   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
1973
1974static void
1975mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
1976{
1977  unsigned int regno = REGNO (reg);
1978  if (regno < 32)
1979    {
1980      unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1981      for (i = 0; i < n; ++i)
1982	current_frame_info.gr_used_mask |= 1 << (regno + i);
1983    }
1984}
1985
1986/* Returns the number of bytes offset between the frame pointer and the stack
1987   pointer for the current function.  SIZE is the number of bytes of space
1988   needed for local variables.  */
1989
1990static void
1991ia64_compute_frame_size (HOST_WIDE_INT size)
1992{
1993  HOST_WIDE_INT total_size;
1994  HOST_WIDE_INT spill_size = 0;
1995  HOST_WIDE_INT extra_spill_size = 0;
1996  HOST_WIDE_INT pretend_args_size;
1997  HARD_REG_SET mask;
1998  int n_spilled = 0;
1999  int spilled_gr_p = 0;
2000  int spilled_fr_p = 0;
2001  unsigned int regno;
2002  int i;
2003
2004  if (current_frame_info.initialized)
2005    return;
2006
2007  memset (&current_frame_info, 0, sizeof current_frame_info);
2008  CLEAR_HARD_REG_SET (mask);
2009
2010  /* Don't allocate scratches to the return register.  */
2011  diddle_return_value (mark_reg_gr_used_mask, NULL);
2012
2013  /* Don't allocate scratches to the EH scratch registers.  */
2014  if (cfun->machine->ia64_eh_epilogue_sp)
2015    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2016  if (cfun->machine->ia64_eh_epilogue_bsp)
2017    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2018
2019  /* Find the size of the register stack frame.  We have only 80 local
2020     registers, because we reserve 8 for the inputs and 8 for the
2021     outputs.  */
2022
2023  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2024     since we'll be adjusting that down later.  */
2025  regno = LOC_REG (78) + ! frame_pointer_needed;
2026  for (; regno >= LOC_REG (0); regno--)
2027    if (regs_ever_live[regno])
2028      break;
2029  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2030
2031  /* For functions marked with the syscall_linkage attribute, we must mark
2032     all eight input registers as in use, so that locals aren't visible to
2033     the caller.  */
2034
2035  if (cfun->machine->n_varargs > 0
2036      || lookup_attribute ("syscall_linkage",
2037			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2038    current_frame_info.n_input_regs = 8;
2039  else
2040    {
2041      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2042	if (regs_ever_live[regno])
2043	  break;
2044      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2045    }
2046
2047  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2048    if (regs_ever_live[regno])
2049      break;
2050  i = regno - OUT_REG (0) + 1;
2051
2052  /* When -p profiling, we need one output register for the mcount argument.
2053     Likewise for -a profiling for the bb_init_func argument.  For -ax
2054     profiling, we need two output registers for the two bb_init_trace_func
2055     arguments.  */
2056  if (current_function_profile)
2057    i = MAX (i, 1);
2058  current_frame_info.n_output_regs = i;
2059
2060  /* ??? No rotating register support yet.  */
2061  current_frame_info.n_rotate_regs = 0;
2062
2063  /* Discover which registers need spilling, and how much room that
2064     will take.  Begin with floating point and general registers,
2065     which will always wind up on the stack.  */
2066
2067  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2068    if (regs_ever_live[regno] && ! call_used_regs[regno])
2069      {
2070	SET_HARD_REG_BIT (mask, regno);
2071	spill_size += 16;
2072	n_spilled += 1;
2073	spilled_fr_p = 1;
2074      }
2075
2076  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2077    if (regs_ever_live[regno] && ! call_used_regs[regno])
2078      {
2079	SET_HARD_REG_BIT (mask, regno);
2080	spill_size += 8;
2081	n_spilled += 1;
2082	spilled_gr_p = 1;
2083      }
2084
2085  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2086    if (regs_ever_live[regno] && ! call_used_regs[regno])
2087      {
2088	SET_HARD_REG_BIT (mask, regno);
2089	spill_size += 8;
2090	n_spilled += 1;
2091      }
2092
2093  /* Now come all special registers that might get saved in other
2094     general registers.  */
2095
2096  if (frame_pointer_needed)
2097    {
2098      current_frame_info.reg_fp = find_gr_spill (1);
2099      /* If we did not get a register, then we take LOC79.  This is guaranteed
2100	 to be free, even if regs_ever_live is already set, because this is
2101	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2102	 as we don't count loc79 above.  */
2103      if (current_frame_info.reg_fp == 0)
2104	{
2105	  current_frame_info.reg_fp = LOC_REG (79);
2106	  current_frame_info.n_local_regs++;
2107	}
2108    }
2109
2110  if (! current_function_is_leaf)
2111    {
2112      /* Emit a save of BR0 if we call other functions.  Do this even
2113	 if this function doesn't return, as EH depends on this to be
2114	 able to unwind the stack.  */
2115      SET_HARD_REG_BIT (mask, BR_REG (0));
2116
2117      current_frame_info.reg_save_b0 = find_gr_spill (1);
2118      if (current_frame_info.reg_save_b0 == 0)
2119	{
2120	  spill_size += 8;
2121	  n_spilled += 1;
2122	}
2123
2124      /* Similarly for ar.pfs.  */
2125      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2126      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2127      if (current_frame_info.reg_save_ar_pfs == 0)
2128	{
2129	  extra_spill_size += 8;
2130	  n_spilled += 1;
2131	}
2132
2133      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2134	 registers are clobbered, so we fall back to the stack.  */
2135      current_frame_info.reg_save_gp
2136	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2137      if (current_frame_info.reg_save_gp == 0)
2138	{
2139	  SET_HARD_REG_BIT (mask, GR_REG (1));
2140	  spill_size += 8;
2141	  n_spilled += 1;
2142	}
2143    }
2144  else
2145    {
2146      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2147	{
2148	  SET_HARD_REG_BIT (mask, BR_REG (0));
2149	  spill_size += 8;
2150	  n_spilled += 1;
2151	}
2152
2153      if (regs_ever_live[AR_PFS_REGNUM])
2154	{
2155	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2156	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2157	  if (current_frame_info.reg_save_ar_pfs == 0)
2158	    {
2159	      extra_spill_size += 8;
2160	      n_spilled += 1;
2161	    }
2162	}
2163    }
2164
2165  /* Unwind descriptor hackery: things are most efficient if we allocate
2166     consecutive GR save registers for RP, PFS, FP in that order. However,
2167     it is absolutely critical that FP get the only hard register that's
2168     guaranteed to be free, so we allocated it first.  If all three did
2169     happen to be allocated hard regs, and are consecutive, rearrange them
2170     into the preferred order now.  */
2171  if (current_frame_info.reg_fp != 0
2172      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2173      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2174    {
2175      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2176      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2177      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2178    }
2179
2180  /* See if we need to store the predicate register block.  */
2181  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2182    if (regs_ever_live[regno] && ! call_used_regs[regno])
2183      break;
2184  if (regno <= PR_REG (63))
2185    {
2186      SET_HARD_REG_BIT (mask, PR_REG (0));
2187      current_frame_info.reg_save_pr = find_gr_spill (1);
2188      if (current_frame_info.reg_save_pr == 0)
2189	{
2190	  extra_spill_size += 8;
2191	  n_spilled += 1;
2192	}
2193
2194      /* ??? Mark them all as used so that register renaming and such
2195	 are free to use them.  */
2196      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2197	regs_ever_live[regno] = 1;
2198    }
2199
2200  /* If we're forced to use st8.spill, we're forced to save and restore
2201     ar.unat as well.  The check for existing liveness allows inline asm
2202     to touch ar.unat.  */
2203  if (spilled_gr_p || cfun->machine->n_varargs
2204      || regs_ever_live[AR_UNAT_REGNUM])
2205    {
2206      regs_ever_live[AR_UNAT_REGNUM] = 1;
2207      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2208      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2209      if (current_frame_info.reg_save_ar_unat == 0)
2210	{
2211	  extra_spill_size += 8;
2212	  n_spilled += 1;
2213	}
2214    }
2215
2216  if (regs_ever_live[AR_LC_REGNUM])
2217    {
2218      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2219      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2220      if (current_frame_info.reg_save_ar_lc == 0)
2221	{
2222	  extra_spill_size += 8;
2223	  n_spilled += 1;
2224	}
2225    }
2226
2227  /* If we have an odd number of words of pretend arguments written to
2228     the stack, then the FR save area will be unaligned.  We round the
2229     size of this area up to keep things 16 byte aligned.  */
2230  if (spilled_fr_p)
2231    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2232  else
2233    pretend_args_size = current_function_pretend_args_size;
2234
2235  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2236		+ current_function_outgoing_args_size);
2237  total_size = IA64_STACK_ALIGN (total_size);
2238
2239  /* We always use the 16-byte scratch area provided by the caller, but
2240     if we are a leaf function, there's no one to which we need to provide
2241     a scratch area.  */
2242  if (current_function_is_leaf)
2243    total_size = MAX (0, total_size - 16);
2244
2245  current_frame_info.total_size = total_size;
2246  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2247  current_frame_info.spill_size = spill_size;
2248  current_frame_info.extra_spill_size = extra_spill_size;
2249  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2250  current_frame_info.n_spilled = n_spilled;
2251  current_frame_info.initialized = reload_completed;
2252}
2253
2254/* Compute the initial difference between the specified pair of registers.  */
2255
2256HOST_WIDE_INT
2257ia64_initial_elimination_offset (int from, int to)
2258{
2259  HOST_WIDE_INT offset;
2260
2261  ia64_compute_frame_size (get_frame_size ());
2262  switch (from)
2263    {
2264    case FRAME_POINTER_REGNUM:
2265      if (to == HARD_FRAME_POINTER_REGNUM)
2266	{
2267	  if (current_function_is_leaf)
2268	    offset = -current_frame_info.total_size;
2269	  else
2270	    offset = -(current_frame_info.total_size
2271		       - current_function_outgoing_args_size - 16);
2272	}
2273      else if (to == STACK_POINTER_REGNUM)
2274	{
2275	  if (current_function_is_leaf)
2276	    offset = 0;
2277	  else
2278	    offset = 16 + current_function_outgoing_args_size;
2279	}
2280      else
2281	abort ();
2282      break;
2283
2284    case ARG_POINTER_REGNUM:
2285      /* Arguments start above the 16 byte save area, unless stdarg
2286	 in which case we store through the 16 byte save area.  */
2287      if (to == HARD_FRAME_POINTER_REGNUM)
2288	offset = 16 - current_function_pretend_args_size;
2289      else if (to == STACK_POINTER_REGNUM)
2290	offset = (current_frame_info.total_size
2291		  + 16 - current_function_pretend_args_size);
2292      else
2293	abort ();
2294      break;
2295
2296    default:
2297      abort ();
2298    }
2299
2300  return offset;
2301}
2302
2303/* If there are more than a trivial number of register spills, we use
2304   two interleaved iterators so that we can get two memory references
2305   per insn group.
2306
2307   In order to simplify things in the prologue and epilogue expanders,
2308   we use helper functions to fix up the memory references after the
2309   fact with the appropriate offsets to a POST_MODIFY memory mode.
2310   The following data structure tracks the state of the two iterators
2311   while insns are being emitted.  */
2312
2313struct spill_fill_data
2314{
2315  rtx init_after;		/* point at which to emit initializations */
2316  rtx init_reg[2];		/* initial base register */
2317  rtx iter_reg[2];		/* the iterator registers */
2318  rtx *prev_addr[2];		/* address of last memory use */
2319  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2320  HOST_WIDE_INT prev_off[2];	/* last offset */
2321  int n_iter;			/* number of iterators in use */
2322  int next_iter;		/* next iterator to use */
2323  unsigned int save_gr_used_mask;
2324};
2325
2326static struct spill_fill_data spill_fill_data;
2327
2328static void
2329setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2330{
2331  int i;
2332
2333  spill_fill_data.init_after = get_last_insn ();
2334  spill_fill_data.init_reg[0] = init_reg;
2335  spill_fill_data.init_reg[1] = init_reg;
2336  spill_fill_data.prev_addr[0] = NULL;
2337  spill_fill_data.prev_addr[1] = NULL;
2338  spill_fill_data.prev_insn[0] = NULL;
2339  spill_fill_data.prev_insn[1] = NULL;
2340  spill_fill_data.prev_off[0] = cfa_off;
2341  spill_fill_data.prev_off[1] = cfa_off;
2342  spill_fill_data.next_iter = 0;
2343  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2344
2345  spill_fill_data.n_iter = 1 + (n_spills > 2);
2346  for (i = 0; i < spill_fill_data.n_iter; ++i)
2347    {
2348      int regno = next_scratch_gr_reg ();
2349      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2350      current_frame_info.gr_used_mask |= 1 << regno;
2351    }
2352}
2353
2354static void
2355finish_spill_pointers (void)
2356{
2357  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2358}
2359
2360static rtx
2361spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2362{
2363  int iter = spill_fill_data.next_iter;
2364  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2365  rtx disp_rtx = GEN_INT (disp);
2366  rtx mem;
2367
2368  if (spill_fill_data.prev_addr[iter])
2369    {
2370      if (CONST_OK_FOR_N (disp))
2371	{
2372	  *spill_fill_data.prev_addr[iter]
2373	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2374				   gen_rtx_PLUS (DImode,
2375						 spill_fill_data.iter_reg[iter],
2376						 disp_rtx));
2377	  REG_NOTES (spill_fill_data.prev_insn[iter])
2378	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2379				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2380	}
2381      else
2382	{
2383	  /* ??? Could use register post_modify for loads.  */
2384	  if (! CONST_OK_FOR_I (disp))
2385	    {
2386	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2387	      emit_move_insn (tmp, disp_rtx);
2388	      disp_rtx = tmp;
2389	    }
2390	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2391				 spill_fill_data.iter_reg[iter], disp_rtx));
2392	}
2393    }
2394  /* Micro-optimization: if we've created a frame pointer, it's at
2395     CFA 0, which may allow the real iterator to be initialized lower,
2396     slightly increasing parallelism.  Also, if there are few saves
2397     it may eliminate the iterator entirely.  */
2398  else if (disp == 0
2399	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2400	   && frame_pointer_needed)
2401    {
2402      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2403      set_mem_alias_set (mem, get_varargs_alias_set ());
2404      return mem;
2405    }
2406  else
2407    {
2408      rtx seq, insn;
2409
2410      if (disp == 0)
2411	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2412			 spill_fill_data.init_reg[iter]);
2413      else
2414	{
2415	  start_sequence ();
2416
2417	  if (! CONST_OK_FOR_I (disp))
2418	    {
2419	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2420	      emit_move_insn (tmp, disp_rtx);
2421	      disp_rtx = tmp;
2422	    }
2423
2424	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2425				 spill_fill_data.init_reg[iter],
2426				 disp_rtx));
2427
2428	  seq = get_insns ();
2429	  end_sequence ();
2430	}
2431
2432      /* Careful for being the first insn in a sequence.  */
2433      if (spill_fill_data.init_after)
2434	insn = emit_insn_after (seq, spill_fill_data.init_after);
2435      else
2436	{
2437	  rtx first = get_insns ();
2438	  if (first)
2439	    insn = emit_insn_before (seq, first);
2440	  else
2441	    insn = emit_insn (seq);
2442	}
2443      spill_fill_data.init_after = insn;
2444
2445      /* If DISP is 0, we may or may not have a further adjustment
2446	 afterward.  If we do, then the load/store insn may be modified
2447	 to be a post-modify.  If we don't, then this copy may be
2448	 eliminated by copyprop_hardreg_forward, which makes this
2449	 insn garbage, which runs afoul of the sanity check in
2450	 propagate_one_insn.  So mark this insn as legal to delete.  */
2451      if (disp == 0)
2452	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2453					     REG_NOTES (insn));
2454    }
2455
2456  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2457
2458  /* ??? Not all of the spills are for varargs, but some of them are.
2459     The rest of the spills belong in an alias set of their own.  But
2460     it doesn't actually hurt to include them here.  */
2461  set_mem_alias_set (mem, get_varargs_alias_set ());
2462
2463  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2464  spill_fill_data.prev_off[iter] = cfa_off;
2465
2466  if (++iter >= spill_fill_data.n_iter)
2467    iter = 0;
2468  spill_fill_data.next_iter = iter;
2469
2470  return mem;
2471}
2472
2473static void
2474do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2475	  rtx frame_reg)
2476{
2477  int iter = spill_fill_data.next_iter;
2478  rtx mem, insn;
2479
2480  mem = spill_restore_mem (reg, cfa_off);
2481  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2482  spill_fill_data.prev_insn[iter] = insn;
2483
2484  if (frame_reg)
2485    {
2486      rtx base;
2487      HOST_WIDE_INT off;
2488
2489      RTX_FRAME_RELATED_P (insn) = 1;
2490
2491      /* Don't even pretend that the unwind code can intuit its way
2492	 through a pair of interleaved post_modify iterators.  Just
2493	 provide the correct answer.  */
2494
2495      if (frame_pointer_needed)
2496	{
2497	  base = hard_frame_pointer_rtx;
2498	  off = - cfa_off;
2499	}
2500      else
2501	{
2502	  base = stack_pointer_rtx;
2503	  off = current_frame_info.total_size - cfa_off;
2504	}
2505
2506      REG_NOTES (insn)
2507	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2508		gen_rtx_SET (VOIDmode,
2509			     gen_rtx_MEM (GET_MODE (reg),
2510					  plus_constant (base, off)),
2511			     frame_reg),
2512		REG_NOTES (insn));
2513    }
2514}
2515
2516static void
2517do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2518{
2519  int iter = spill_fill_data.next_iter;
2520  rtx insn;
2521
2522  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2523				GEN_INT (cfa_off)));
2524  spill_fill_data.prev_insn[iter] = insn;
2525}
2526
2527/* Wrapper functions that discards the CONST_INT spill offset.  These
2528   exist so that we can give gr_spill/gr_fill the offset they need and
2529   use a consistent function interface.  */
2530
2531static rtx
2532gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2533{
2534  return gen_movdi (dest, src);
2535}
2536
2537static rtx
2538gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2539{
2540  return gen_fr_spill (dest, src);
2541}
2542
2543static rtx
2544gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2545{
2546  return gen_fr_restore (dest, src);
2547}
2548
2549/* Called after register allocation to add any instructions needed for the
2550   prologue.  Using a prologue insn is favored compared to putting all of the
2551   instructions in output_function_prologue(), since it allows the scheduler
2552   to intermix instructions with the saves of the caller saved registers.  In
2553   some cases, it might be necessary to emit a barrier instruction as the last
2554   insn to prevent such scheduling.
2555
2556   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2557   so that the debug info generation code can handle them properly.
2558
2559   The register save area is layed out like so:
2560   cfa+16
2561	[ varargs spill area ]
2562	[ fr register spill area ]
2563	[ br register spill area ]
2564	[ ar register spill area ]
2565	[ pr register spill area ]
2566	[ gr register spill area ] */
2567
2568/* ??? Get inefficient code when the frame size is larger than can fit in an
2569   adds instruction.  */
2570
2571void
2572ia64_expand_prologue (void)
2573{
2574  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2575  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2576  rtx reg, alt_reg;
2577
2578  ia64_compute_frame_size (get_frame_size ());
2579  last_scratch_gr_reg = 15;
2580
2581  /* If there is no epilogue, then we don't need some prologue insns.
2582     We need to avoid emitting the dead prologue insns, because flow
2583     will complain about them.  */
2584  if (optimize)
2585    {
2586      edge e;
2587
2588      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2589	if ((e->flags & EDGE_FAKE) == 0
2590	    && (e->flags & EDGE_FALLTHRU) != 0)
2591	  break;
2592      epilogue_p = (e != NULL);
2593    }
2594  else
2595    epilogue_p = 1;
2596
2597  /* Set the local, input, and output register names.  We need to do this
2598     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2599     half.  If we use in/loc/out register names, then we get assembler errors
2600     in crtn.S because there is no alloc insn or regstk directive in there.  */
2601  if (! TARGET_REG_NAMES)
2602    {
2603      int inputs = current_frame_info.n_input_regs;
2604      int locals = current_frame_info.n_local_regs;
2605      int outputs = current_frame_info.n_output_regs;
2606
2607      for (i = 0; i < inputs; i++)
2608	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2609      for (i = 0; i < locals; i++)
2610	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2611      for (i = 0; i < outputs; i++)
2612	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2613    }
2614
2615  /* Set the frame pointer register name.  The regnum is logically loc79,
2616     but of course we'll not have allocated that many locals.  Rather than
2617     worrying about renumbering the existing rtxs, we adjust the name.  */
2618  /* ??? This code means that we can never use one local register when
2619     there is a frame pointer.  loc79 gets wasted in this case, as it is
2620     renamed to a register that will never be used.  See also the try_locals
2621     code in find_gr_spill.  */
2622  if (current_frame_info.reg_fp)
2623    {
2624      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2625      reg_names[HARD_FRAME_POINTER_REGNUM]
2626	= reg_names[current_frame_info.reg_fp];
2627      reg_names[current_frame_info.reg_fp] = tmp;
2628    }
2629
2630  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2631  if (current_frame_info.n_local_regs == 0
2632      && current_frame_info.n_output_regs == 0
2633      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2634      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2635    {
2636      /* If there is no alloc, but there are input registers used, then we
2637	 need a .regstk directive.  */
2638      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2639      ar_pfs_save_reg = NULL_RTX;
2640    }
2641  else
2642    {
2643      current_frame_info.need_regstk = 0;
2644
2645      if (current_frame_info.reg_save_ar_pfs)
2646	regno = current_frame_info.reg_save_ar_pfs;
2647      else
2648	regno = next_scratch_gr_reg ();
2649      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2650
2651      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2652				   GEN_INT (current_frame_info.n_input_regs),
2653				   GEN_INT (current_frame_info.n_local_regs),
2654				   GEN_INT (current_frame_info.n_output_regs),
2655				   GEN_INT (current_frame_info.n_rotate_regs)));
2656      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2657    }
2658
2659  /* Set up frame pointer, stack pointer, and spill iterators.  */
2660
2661  n_varargs = cfun->machine->n_varargs;
2662  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2663			stack_pointer_rtx, 0);
2664
2665  if (frame_pointer_needed)
2666    {
2667      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2668      RTX_FRAME_RELATED_P (insn) = 1;
2669    }
2670
2671  if (current_frame_info.total_size != 0)
2672    {
2673      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2674      rtx offset;
2675
2676      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2677	offset = frame_size_rtx;
2678      else
2679	{
2680	  regno = next_scratch_gr_reg ();
2681	  offset = gen_rtx_REG (DImode, regno);
2682	  emit_move_insn (offset, frame_size_rtx);
2683	}
2684
2685      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2686				    stack_pointer_rtx, offset));
2687
2688      if (! frame_pointer_needed)
2689	{
2690	  RTX_FRAME_RELATED_P (insn) = 1;
2691	  if (GET_CODE (offset) != CONST_INT)
2692	    {
2693	      REG_NOTES (insn)
2694		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2695			gen_rtx_SET (VOIDmode,
2696				     stack_pointer_rtx,
2697				     gen_rtx_PLUS (DImode,
2698						   stack_pointer_rtx,
2699						   frame_size_rtx)),
2700			REG_NOTES (insn));
2701	    }
2702	}
2703
2704      /* ??? At this point we must generate a magic insn that appears to
2705	 modify the stack pointer, the frame pointer, and all spill
2706	 iterators.  This would allow the most scheduling freedom.  For
2707	 now, just hard stop.  */
2708      emit_insn (gen_blockage ());
2709    }
2710
2711  /* Must copy out ar.unat before doing any integer spills.  */
2712  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2713    {
2714      if (current_frame_info.reg_save_ar_unat)
2715	ar_unat_save_reg
2716	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2717      else
2718	{
2719	  alt_regno = next_scratch_gr_reg ();
2720	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2721	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2722	}
2723
2724      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2725      insn = emit_move_insn (ar_unat_save_reg, reg);
2726      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2727
2728      /* Even if we're not going to generate an epilogue, we still
2729	 need to save the register so that EH works.  */
2730      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2731	emit_insn (gen_prologue_use (ar_unat_save_reg));
2732    }
2733  else
2734    ar_unat_save_reg = NULL_RTX;
2735
2736  /* Spill all varargs registers.  Do this before spilling any GR registers,
2737     since we want the UNAT bits for the GR registers to override the UNAT
2738     bits from varargs, which we don't care about.  */
2739
2740  cfa_off = -16;
2741  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2742    {
2743      reg = gen_rtx_REG (DImode, regno);
2744      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2745    }
2746
2747  /* Locate the bottom of the register save area.  */
2748  cfa_off = (current_frame_info.spill_cfa_off
2749	     + current_frame_info.spill_size
2750	     + current_frame_info.extra_spill_size);
2751
2752  /* Save the predicate register block either in a register or in memory.  */
2753  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2754    {
2755      reg = gen_rtx_REG (DImode, PR_REG (0));
2756      if (current_frame_info.reg_save_pr != 0)
2757	{
2758	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2759	  insn = emit_move_insn (alt_reg, reg);
2760
2761	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2762	     64 hard registers.  */
2763	  RTX_FRAME_RELATED_P (insn) = 1;
2764	  REG_NOTES (insn)
2765	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2766			gen_rtx_SET (VOIDmode, alt_reg, reg),
2767			REG_NOTES (insn));
2768
2769	  /* Even if we're not going to generate an epilogue, we still
2770	     need to save the register so that EH works.  */
2771	  if (! epilogue_p)
2772	    emit_insn (gen_prologue_use (alt_reg));
2773	}
2774      else
2775	{
2776	  alt_regno = next_scratch_gr_reg ();
2777	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2778	  insn = emit_move_insn (alt_reg, reg);
2779	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2780	  cfa_off -= 8;
2781	}
2782    }
2783
2784  /* Handle AR regs in numerical order.  All of them get special handling.  */
2785  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2786      && current_frame_info.reg_save_ar_unat == 0)
2787    {
2788      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2789      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2790      cfa_off -= 8;
2791    }
2792
2793  /* The alloc insn already copied ar.pfs into a general register.  The
2794     only thing we have to do now is copy that register to a stack slot
2795     if we'd not allocated a local register for the job.  */
2796  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2797      && current_frame_info.reg_save_ar_pfs == 0)
2798    {
2799      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2800      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2801      cfa_off -= 8;
2802    }
2803
2804  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2805    {
2806      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2807      if (current_frame_info.reg_save_ar_lc != 0)
2808	{
2809	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2810	  insn = emit_move_insn (alt_reg, reg);
2811	  RTX_FRAME_RELATED_P (insn) = 1;
2812
2813	  /* Even if we're not going to generate an epilogue, we still
2814	     need to save the register so that EH works.  */
2815	  if (! epilogue_p)
2816	    emit_insn (gen_prologue_use (alt_reg));
2817	}
2818      else
2819	{
2820	  alt_regno = next_scratch_gr_reg ();
2821	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2822	  emit_move_insn (alt_reg, reg);
2823	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2824	  cfa_off -= 8;
2825	}
2826    }
2827
2828  if (current_frame_info.reg_save_gp)
2829    {
2830      insn = emit_move_insn (gen_rtx_REG (DImode,
2831					  current_frame_info.reg_save_gp),
2832			     pic_offset_table_rtx);
2833      /* We don't know for sure yet if this is actually needed, since
2834	 we've not split the PIC call patterns.  If all of the calls
2835	 are indirect, and not followed by any uses of the gp, then
2836	 this save is dead.  Allow it to go away.  */
2837      REG_NOTES (insn)
2838	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2839    }
2840
2841  /* We should now be at the base of the gr/br/fr spill area.  */
2842  if (cfa_off != (current_frame_info.spill_cfa_off
2843		  + current_frame_info.spill_size))
2844    abort ();
2845
2846  /* Spill all general registers.  */
2847  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2848    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2849      {
2850	reg = gen_rtx_REG (DImode, regno);
2851	do_spill (gen_gr_spill, reg, cfa_off, reg);
2852	cfa_off -= 8;
2853      }
2854
2855  /* Handle BR0 specially -- it may be getting stored permanently in
2856     some GR register.  */
2857  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2858    {
2859      reg = gen_rtx_REG (DImode, BR_REG (0));
2860      if (current_frame_info.reg_save_b0 != 0)
2861	{
2862	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2863	  insn = emit_move_insn (alt_reg, reg);
2864	  RTX_FRAME_RELATED_P (insn) = 1;
2865
2866	  /* Even if we're not going to generate an epilogue, we still
2867	     need to save the register so that EH works.  */
2868	  if (! epilogue_p)
2869	    emit_insn (gen_prologue_use (alt_reg));
2870	}
2871      else
2872	{
2873	  alt_regno = next_scratch_gr_reg ();
2874	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2875	  emit_move_insn (alt_reg, reg);
2876	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2877	  cfa_off -= 8;
2878	}
2879    }
2880
2881  /* Spill the rest of the BR registers.  */
2882  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2883    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2884      {
2885	alt_regno = next_scratch_gr_reg ();
2886	alt_reg = gen_rtx_REG (DImode, alt_regno);
2887	reg = gen_rtx_REG (DImode, regno);
2888	emit_move_insn (alt_reg, reg);
2889	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2890	cfa_off -= 8;
2891      }
2892
2893  /* Align the frame and spill all FR registers.  */
2894  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2895    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2896      {
2897        if (cfa_off & 15)
2898	  abort ();
2899	reg = gen_rtx_REG (XFmode, regno);
2900	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2901	cfa_off -= 16;
2902      }
2903
2904  if (cfa_off != current_frame_info.spill_cfa_off)
2905    abort ();
2906
2907  finish_spill_pointers ();
2908}
2909
2910/* Called after register allocation to add any instructions needed for the
2911   epilogue.  Using an epilogue insn is favored compared to putting all of the
2912   instructions in output_function_prologue(), since it allows the scheduler
2913   to intermix instructions with the saves of the caller saved registers.  In
2914   some cases, it might be necessary to emit a barrier instruction as the last
2915   insn to prevent such scheduling.  */
2916
2917void
2918ia64_expand_epilogue (int sibcall_p)
2919{
2920  rtx insn, reg, alt_reg, ar_unat_save_reg;
2921  int regno, alt_regno, cfa_off;
2922
2923  ia64_compute_frame_size (get_frame_size ());
2924
2925  /* If there is a frame pointer, then we use it instead of the stack
2926     pointer, so that the stack pointer does not need to be valid when
2927     the epilogue starts.  See EXIT_IGNORE_STACK.  */
2928  if (frame_pointer_needed)
2929    setup_spill_pointers (current_frame_info.n_spilled,
2930			  hard_frame_pointer_rtx, 0);
2931  else
2932    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2933			  current_frame_info.total_size);
2934
2935  if (current_frame_info.total_size != 0)
2936    {
2937      /* ??? At this point we must generate a magic insn that appears to
2938         modify the spill iterators and the frame pointer.  This would
2939	 allow the most scheduling freedom.  For now, just hard stop.  */
2940      emit_insn (gen_blockage ());
2941    }
2942
2943  /* Locate the bottom of the register save area.  */
2944  cfa_off = (current_frame_info.spill_cfa_off
2945	     + current_frame_info.spill_size
2946	     + current_frame_info.extra_spill_size);
2947
2948  /* Restore the predicate registers.  */
2949  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2950    {
2951      if (current_frame_info.reg_save_pr != 0)
2952	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2953      else
2954	{
2955	  alt_regno = next_scratch_gr_reg ();
2956	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2957	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2958	  cfa_off -= 8;
2959	}
2960      reg = gen_rtx_REG (DImode, PR_REG (0));
2961      emit_move_insn (reg, alt_reg);
2962    }
2963
2964  /* Restore the application registers.  */
2965
2966  /* Load the saved unat from the stack, but do not restore it until
2967     after the GRs have been restored.  */
2968  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2969    {
2970      if (current_frame_info.reg_save_ar_unat != 0)
2971        ar_unat_save_reg
2972	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2973      else
2974	{
2975	  alt_regno = next_scratch_gr_reg ();
2976	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2977	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2978	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
2979	  cfa_off -= 8;
2980	}
2981    }
2982  else
2983    ar_unat_save_reg = NULL_RTX;
2984
2985  if (current_frame_info.reg_save_ar_pfs != 0)
2986    {
2987      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
2988      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2989      emit_move_insn (reg, alt_reg);
2990    }
2991  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2992    {
2993      alt_regno = next_scratch_gr_reg ();
2994      alt_reg = gen_rtx_REG (DImode, alt_regno);
2995      do_restore (gen_movdi_x, alt_reg, cfa_off);
2996      cfa_off -= 8;
2997      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2998      emit_move_insn (reg, alt_reg);
2999    }
3000
3001  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3002    {
3003      if (current_frame_info.reg_save_ar_lc != 0)
3004	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3005      else
3006	{
3007	  alt_regno = next_scratch_gr_reg ();
3008	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3009	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3010	  cfa_off -= 8;
3011	}
3012      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3013      emit_move_insn (reg, alt_reg);
3014    }
3015
3016  /* We should now be at the base of the gr/br/fr spill area.  */
3017  if (cfa_off != (current_frame_info.spill_cfa_off
3018		  + current_frame_info.spill_size))
3019    abort ();
3020
3021  /* The GP may be stored on the stack in the prologue, but it's
3022     never restored in the epilogue.  Skip the stack slot.  */
3023  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3024    cfa_off -= 8;
3025
3026  /* Restore all general registers.  */
3027  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3028    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3029      {
3030	reg = gen_rtx_REG (DImode, regno);
3031	do_restore (gen_gr_restore, reg, cfa_off);
3032	cfa_off -= 8;
3033      }
3034
3035  /* Restore the branch registers.  Handle B0 specially, as it may
3036     have gotten stored in some GR register.  */
3037  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3038    {
3039      if (current_frame_info.reg_save_b0 != 0)
3040	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3041      else
3042	{
3043	  alt_regno = next_scratch_gr_reg ();
3044	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3045	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3046	  cfa_off -= 8;
3047	}
3048      reg = gen_rtx_REG (DImode, BR_REG (0));
3049      emit_move_insn (reg, alt_reg);
3050    }
3051
3052  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3053    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3054      {
3055	alt_regno = next_scratch_gr_reg ();
3056	alt_reg = gen_rtx_REG (DImode, alt_regno);
3057	do_restore (gen_movdi_x, alt_reg, cfa_off);
3058	cfa_off -= 8;
3059	reg = gen_rtx_REG (DImode, regno);
3060	emit_move_insn (reg, alt_reg);
3061      }
3062
3063  /* Restore floating point registers.  */
3064  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3065    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3066      {
3067        if (cfa_off & 15)
3068	  abort ();
3069	reg = gen_rtx_REG (XFmode, regno);
3070	do_restore (gen_fr_restore_x, reg, cfa_off);
3071	cfa_off -= 16;
3072      }
3073
3074  /* Restore ar.unat for real.  */
3075  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3076    {
3077      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3078      emit_move_insn (reg, ar_unat_save_reg);
3079    }
3080
3081  if (cfa_off != current_frame_info.spill_cfa_off)
3082    abort ();
3083
3084  finish_spill_pointers ();
3085
3086  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3087    {
3088      /* ??? At this point we must generate a magic insn that appears to
3089         modify the spill iterators, the stack pointer, and the frame
3090	 pointer.  This would allow the most scheduling freedom.  For now,
3091	 just hard stop.  */
3092      emit_insn (gen_blockage ());
3093    }
3094
3095  if (cfun->machine->ia64_eh_epilogue_sp)
3096    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3097  else if (frame_pointer_needed)
3098    {
3099      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3100      RTX_FRAME_RELATED_P (insn) = 1;
3101    }
3102  else if (current_frame_info.total_size)
3103    {
3104      rtx offset, frame_size_rtx;
3105
3106      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3107      if (CONST_OK_FOR_I (current_frame_info.total_size))
3108	offset = frame_size_rtx;
3109      else
3110	{
3111	  regno = next_scratch_gr_reg ();
3112	  offset = gen_rtx_REG (DImode, regno);
3113	  emit_move_insn (offset, frame_size_rtx);
3114	}
3115
3116      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3117				    offset));
3118
3119      RTX_FRAME_RELATED_P (insn) = 1;
3120      if (GET_CODE (offset) != CONST_INT)
3121	{
3122	  REG_NOTES (insn)
3123	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3124			gen_rtx_SET (VOIDmode,
3125				     stack_pointer_rtx,
3126				     gen_rtx_PLUS (DImode,
3127						   stack_pointer_rtx,
3128						   frame_size_rtx)),
3129			REG_NOTES (insn));
3130	}
3131    }
3132
3133  if (cfun->machine->ia64_eh_epilogue_bsp)
3134    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3135
3136  if (! sibcall_p)
3137    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3138  else
3139    {
3140      int fp = GR_REG (2);
3141      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3142	 first available call clobbered register.  If there was a frame_pointer
3143	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3144	 so we have to make sure we're using the string "r2" when emitting
3145	 the register name for the assembler.  */
3146      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3147	fp = HARD_FRAME_POINTER_REGNUM;
3148
3149      /* We must emit an alloc to force the input registers to become output
3150	 registers.  Otherwise, if the callee tries to pass its parameters
3151	 through to another call without an intervening alloc, then these
3152	 values get lost.  */
3153      /* ??? We don't need to preserve all input registers.  We only need to
3154	 preserve those input registers used as arguments to the sibling call.
3155	 It is unclear how to compute that number here.  */
3156      if (current_frame_info.n_input_regs != 0)
3157	emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3158			      GEN_INT (0), GEN_INT (0),
3159			      GEN_INT (current_frame_info.n_input_regs),
3160			      GEN_INT (0)));
3161    }
3162}
3163
3164/* Return 1 if br.ret can do all the work required to return from a
3165   function.  */
3166
3167int
3168ia64_direct_return (void)
3169{
3170  if (reload_completed && ! frame_pointer_needed)
3171    {
3172      ia64_compute_frame_size (get_frame_size ());
3173
3174      return (current_frame_info.total_size == 0
3175	      && current_frame_info.n_spilled == 0
3176	      && current_frame_info.reg_save_b0 == 0
3177	      && current_frame_info.reg_save_pr == 0
3178	      && current_frame_info.reg_save_ar_pfs == 0
3179	      && current_frame_info.reg_save_ar_unat == 0
3180	      && current_frame_info.reg_save_ar_lc == 0);
3181    }
3182  return 0;
3183}
3184
3185/* Return the magic cookie that we use to hold the return address
3186   during early compilation.  */
3187
3188rtx
3189ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3190{
3191  if (count != 0)
3192    return NULL;
3193  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3194}
3195
3196/* Split this value after reload, now that we know where the return
3197   address is saved.  */
3198
3199void
3200ia64_split_return_addr_rtx (rtx dest)
3201{
3202  rtx src;
3203
3204  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3205    {
3206      if (current_frame_info.reg_save_b0 != 0)
3207	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3208      else
3209	{
3210	  HOST_WIDE_INT off;
3211	  unsigned int regno;
3212
3213	  /* Compute offset from CFA for BR0.  */
3214	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3215	  off = (current_frame_info.spill_cfa_off
3216		 + current_frame_info.spill_size);
3217	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3218	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3219	      off -= 8;
3220
3221	  /* Convert CFA offset to a register based offset.  */
3222	  if (frame_pointer_needed)
3223	    src = hard_frame_pointer_rtx;
3224	  else
3225	    {
3226	      src = stack_pointer_rtx;
3227	      off += current_frame_info.total_size;
3228	    }
3229
3230	  /* Load address into scratch register.  */
3231	  if (CONST_OK_FOR_I (off))
3232	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3233	  else
3234	    {
3235	      emit_move_insn (dest, GEN_INT (off));
3236	      emit_insn (gen_adddi3 (dest, src, dest));
3237	    }
3238
3239	  src = gen_rtx_MEM (Pmode, dest);
3240	}
3241    }
3242  else
3243    src = gen_rtx_REG (DImode, BR_REG (0));
3244
3245  emit_move_insn (dest, src);
3246}
3247
3248int
3249ia64_hard_regno_rename_ok (int from, int to)
3250{
3251  /* Don't clobber any of the registers we reserved for the prologue.  */
3252  if (to == current_frame_info.reg_fp
3253      || to == current_frame_info.reg_save_b0
3254      || to == current_frame_info.reg_save_pr
3255      || to == current_frame_info.reg_save_ar_pfs
3256      || to == current_frame_info.reg_save_ar_unat
3257      || to == current_frame_info.reg_save_ar_lc)
3258    return 0;
3259
3260  if (from == current_frame_info.reg_fp
3261      || from == current_frame_info.reg_save_b0
3262      || from == current_frame_info.reg_save_pr
3263      || from == current_frame_info.reg_save_ar_pfs
3264      || from == current_frame_info.reg_save_ar_unat
3265      || from == current_frame_info.reg_save_ar_lc)
3266    return 0;
3267
3268  /* Don't use output registers outside the register frame.  */
3269  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3270    return 0;
3271
3272  /* Retain even/oddness on predicate register pairs.  */
3273  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3274    return (from & 1) == (to & 1);
3275
3276  return 1;
3277}
3278
3279/* Target hook for assembling integer objects.  Handle word-sized
3280   aligned objects and detect the cases when @fptr is needed.  */
3281
3282static bool
3283ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3284{
3285  if (size == POINTER_SIZE / BITS_PER_UNIT
3286      && aligned_p
3287      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3288      && GET_CODE (x) == SYMBOL_REF
3289      && SYMBOL_REF_FUNCTION_P (x))
3290    {
3291      if (POINTER_SIZE == 32)
3292	fputs ("\tdata4\t@fptr(", asm_out_file);
3293      else
3294	fputs ("\tdata8\t@fptr(", asm_out_file);
3295      output_addr_const (asm_out_file, x);
3296      fputs (")\n", asm_out_file);
3297      return true;
3298    }
3299  return default_assemble_integer (x, size, aligned_p);
3300}
3301
3302/* Emit the function prologue.  */
3303
3304static void
3305ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3306{
3307  int mask, grsave, grsave_prev;
3308
3309  if (current_frame_info.need_regstk)
3310    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3311	     current_frame_info.n_input_regs,
3312	     current_frame_info.n_local_regs,
3313	     current_frame_info.n_output_regs,
3314	     current_frame_info.n_rotate_regs);
3315
3316  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3317    return;
3318
3319  /* Emit the .prologue directive.  */
3320
3321  mask = 0;
3322  grsave = grsave_prev = 0;
3323  if (current_frame_info.reg_save_b0 != 0)
3324    {
3325      mask |= 8;
3326      grsave = grsave_prev = current_frame_info.reg_save_b0;
3327    }
3328  if (current_frame_info.reg_save_ar_pfs != 0
3329      && (grsave_prev == 0
3330	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3331    {
3332      mask |= 4;
3333      if (grsave_prev == 0)
3334	grsave = current_frame_info.reg_save_ar_pfs;
3335      grsave_prev = current_frame_info.reg_save_ar_pfs;
3336    }
3337  if (current_frame_info.reg_fp != 0
3338      && (grsave_prev == 0
3339	  || current_frame_info.reg_fp == grsave_prev + 1))
3340    {
3341      mask |= 2;
3342      if (grsave_prev == 0)
3343	grsave = HARD_FRAME_POINTER_REGNUM;
3344      grsave_prev = current_frame_info.reg_fp;
3345    }
3346  if (current_frame_info.reg_save_pr != 0
3347      && (grsave_prev == 0
3348	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3349    {
3350      mask |= 1;
3351      if (grsave_prev == 0)
3352	grsave = current_frame_info.reg_save_pr;
3353    }
3354
3355  if (mask && TARGET_GNU_AS)
3356    fprintf (file, "\t.prologue %d, %d\n", mask,
3357	     ia64_dbx_register_number (grsave));
3358  else
3359    fputs ("\t.prologue\n", file);
3360
3361  /* Emit a .spill directive, if necessary, to relocate the base of
3362     the register spill area.  */
3363  if (current_frame_info.spill_cfa_off != -16)
3364    fprintf (file, "\t.spill %ld\n",
3365	     (long) (current_frame_info.spill_cfa_off
3366		     + current_frame_info.spill_size));
3367}
3368
3369/* Emit the .body directive at the scheduled end of the prologue.  */
3370
3371static void
3372ia64_output_function_end_prologue (FILE *file)
3373{
3374  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3375    return;
3376
3377  fputs ("\t.body\n", file);
3378}
3379
3380/* Emit the function epilogue.  */
3381
3382static void
3383ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3384			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3385{
3386  int i;
3387
3388  if (current_frame_info.reg_fp)
3389    {
3390      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3391      reg_names[HARD_FRAME_POINTER_REGNUM]
3392	= reg_names[current_frame_info.reg_fp];
3393      reg_names[current_frame_info.reg_fp] = tmp;
3394    }
3395  if (! TARGET_REG_NAMES)
3396    {
3397      for (i = 0; i < current_frame_info.n_input_regs; i++)
3398	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3399      for (i = 0; i < current_frame_info.n_local_regs; i++)
3400	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3401      for (i = 0; i < current_frame_info.n_output_regs; i++)
3402	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3403    }
3404
3405  current_frame_info.initialized = 0;
3406}
3407
3408int
3409ia64_dbx_register_number (int regno)
3410{
3411  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3412     from its home at loc79 to something inside the register frame.  We
3413     must perform the same renumbering here for the debug info.  */
3414  if (current_frame_info.reg_fp)
3415    {
3416      if (regno == HARD_FRAME_POINTER_REGNUM)
3417	regno = current_frame_info.reg_fp;
3418      else if (regno == current_frame_info.reg_fp)
3419	regno = HARD_FRAME_POINTER_REGNUM;
3420    }
3421
3422  if (IN_REGNO_P (regno))
3423    return 32 + regno - IN_REG (0);
3424  else if (LOC_REGNO_P (regno))
3425    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3426  else if (OUT_REGNO_P (regno))
3427    return (32 + current_frame_info.n_input_regs
3428	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3429  else
3430    return regno;
3431}
3432
3433void
3434ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3435{
3436  rtx addr_reg, eight = GEN_INT (8);
3437
3438  /* The Intel assembler requires that the global __ia64_trampoline symbol
3439     be declared explicitly */
3440  if (!TARGET_GNU_AS)
3441    {
3442      static bool declared_ia64_trampoline = false;
3443
3444      if (!declared_ia64_trampoline)
3445	{
3446	  declared_ia64_trampoline = true;
3447	  (*targetm.asm_out.globalize_label) (asm_out_file,
3448					      "__ia64_trampoline");
3449	}
3450    }
3451
3452  /* Load up our iterator.  */
3453  addr_reg = gen_reg_rtx (Pmode);
3454  emit_move_insn (addr_reg, addr);
3455
3456  /* The first two words are the fake descriptor:
3457     __ia64_trampoline, ADDR+16.  */
3458  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3459		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3460  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3461
3462  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3463		  copy_to_reg (plus_constant (addr, 16)));
3464  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3465
3466  /* The third word is the target descriptor.  */
3467  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3468  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3469
3470  /* The fourth word is the static chain.  */
3471  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3472}
3473
3474/* Do any needed setup for a variadic function.  CUM has not been updated
3475   for the last named argument which has type TYPE and mode MODE.
3476
3477   We generate the actual spill instructions during prologue generation.  */
3478
3479void
3480ia64_setup_incoming_varargs (CUMULATIVE_ARGS cum, int int_mode, tree type,
3481			     int * pretend_size,
3482			     int second_time ATTRIBUTE_UNUSED)
3483{
3484  /* Skip the current argument.  */
3485  ia64_function_arg_advance (&cum, int_mode, type, 1);
3486
3487  if (cum.words < MAX_ARGUMENT_SLOTS)
3488    {
3489      int n = MAX_ARGUMENT_SLOTS - cum.words;
3490      *pretend_size = n * UNITS_PER_WORD;
3491      cfun->machine->n_varargs = n;
3492    }
3493}
3494
3495/* Check whether TYPE is a homogeneous floating point aggregate.  If
3496   it is, return the mode of the floating point type that appears
3497   in all leafs.  If it is not, return VOIDmode.
3498
3499   An aggregate is a homogeneous floating point aggregate is if all
3500   fields/elements in it have the same floating point type (e.g,
3501   SFmode).  128-bit quad-precision floats are excluded.  */
3502
3503static enum machine_mode
3504hfa_element_mode (tree type, int nested)
3505{
3506  enum machine_mode element_mode = VOIDmode;
3507  enum machine_mode mode;
3508  enum tree_code code = TREE_CODE (type);
3509  int know_element_mode = 0;
3510  tree t;
3511
3512  switch (code)
3513    {
3514    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3515    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3516    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3517    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3518    case FUNCTION_TYPE:
3519      return VOIDmode;
3520
3521      /* Fortran complex types are supposed to be HFAs, so we need to handle
3522	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3523	 types though.  */
3524    case COMPLEX_TYPE:
3525      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3526	  && TYPE_MODE (type) != TCmode)
3527	return GET_MODE_INNER (TYPE_MODE (type));
3528      else
3529	return VOIDmode;
3530
3531    case REAL_TYPE:
3532      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3533	 mode if this is contained within an aggregate.  */
3534      if (nested && TYPE_MODE (type) != TFmode)
3535	return TYPE_MODE (type);
3536      else
3537	return VOIDmode;
3538
3539    case ARRAY_TYPE:
3540      return hfa_element_mode (TREE_TYPE (type), 1);
3541
3542    case RECORD_TYPE:
3543    case UNION_TYPE:
3544    case QUAL_UNION_TYPE:
3545      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3546	{
3547	  if (TREE_CODE (t) != FIELD_DECL)
3548	    continue;
3549
3550	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3551	  if (know_element_mode)
3552	    {
3553	      if (mode != element_mode)
3554		return VOIDmode;
3555	    }
3556	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3557	    return VOIDmode;
3558	  else
3559	    {
3560	      know_element_mode = 1;
3561	      element_mode = mode;
3562	    }
3563	}
3564      return element_mode;
3565
3566    default:
3567      /* If we reach here, we probably have some front-end specific type
3568	 that the backend doesn't know about.  This can happen via the
3569	 aggregate_value_p call in init_function_start.  All we can do is
3570	 ignore unknown tree types.  */
3571      return VOIDmode;
3572    }
3573
3574  return VOIDmode;
3575}
3576
3577/* Return the number of words required to hold a quantity of TYPE and MODE
3578   when passed as an argument.  */
3579static int
3580ia64_function_arg_words (tree type, enum machine_mode mode)
3581{
3582  int words;
3583
3584  if (mode == BLKmode)
3585    words = int_size_in_bytes (type);
3586  else
3587    words = GET_MODE_SIZE (mode);
3588
3589  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3590}
3591
3592/* Return the number of registers that should be skipped so the current
3593   argument (described by TYPE and WORDS) will be properly aligned.
3594
3595   Integer and float arguments larger than 8 bytes start at the next
3596   even boundary.  Aggregates larger than 8 bytes start at the next
3597   even boundary if the aggregate has 16 byte alignment.  Note that
3598   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3599   but are still to be aligned in registers.
3600
3601   ??? The ABI does not specify how to handle aggregates with
3602   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3603   all as if they had 16 byte alignment.  Such aggregates can occur
3604   only if gcc extensions are used.  */
3605static int
3606ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3607{
3608  if ((cum->words & 1) == 0)
3609    return 0;
3610
3611  if (type
3612      && TREE_CODE (type) != INTEGER_TYPE
3613      && TREE_CODE (type) != REAL_TYPE)
3614    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3615  else
3616    return words > 1;
3617}
3618
3619/* Return rtx for register where argument is passed, or zero if it is passed
3620   on the stack.  */
3621/* ??? 128-bit quad-precision floats are always passed in general
3622   registers.  */
3623
3624rtx
3625ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3626		   int named, int incoming)
3627{
3628  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3629  int words = ia64_function_arg_words (type, mode);
3630  int offset = ia64_function_arg_offset (cum, type, words);
3631  enum machine_mode hfa_mode = VOIDmode;
3632
3633  /* If all argument slots are used, then it must go on the stack.  */
3634  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3635    return 0;
3636
3637  /* Check for and handle homogeneous FP aggregates.  */
3638  if (type)
3639    hfa_mode = hfa_element_mode (type, 0);
3640
3641  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3642     and unprototyped hfas are passed specially.  */
3643  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3644    {
3645      rtx loc[16];
3646      int i = 0;
3647      int fp_regs = cum->fp_regs;
3648      int int_regs = cum->words + offset;
3649      int hfa_size = GET_MODE_SIZE (hfa_mode);
3650      int byte_size;
3651      int args_byte_size;
3652
3653      /* If prototyped, pass it in FR regs then GR regs.
3654	 If not prototyped, pass it in both FR and GR regs.
3655
3656	 If this is an SFmode aggregate, then it is possible to run out of
3657	 FR regs while GR regs are still left.  In that case, we pass the
3658	 remaining part in the GR regs.  */
3659
3660      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3661	 of the argument, the last FP register, or the last argument slot.  */
3662
3663      byte_size = ((mode == BLKmode)
3664		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3665      args_byte_size = int_regs * UNITS_PER_WORD;
3666      offset = 0;
3667      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3668	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3669	{
3670	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3671				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3672							      + fp_regs)),
3673				      GEN_INT (offset));
3674	  offset += hfa_size;
3675	  args_byte_size += hfa_size;
3676	  fp_regs++;
3677	}
3678
3679      /* If no prototype, then the whole thing must go in GR regs.  */
3680      if (! cum->prototype)
3681	offset = 0;
3682      /* If this is an SFmode aggregate, then we might have some left over
3683	 that needs to go in GR regs.  */
3684      else if (byte_size != offset)
3685	int_regs += offset / UNITS_PER_WORD;
3686
3687      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3688
3689      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3690	{
3691	  enum machine_mode gr_mode = DImode;
3692	  unsigned int gr_size;
3693
3694	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3695	     then this goes in a GR reg left adjusted/little endian, right
3696	     adjusted/big endian.  */
3697	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3698	     always right adjusted/little endian.  */
3699	  if (offset & 0x4)
3700	    gr_mode = SImode;
3701	  /* If we have an even 4 byte hunk because the aggregate is a
3702	     multiple of 4 bytes in size, then this goes in a GR reg right
3703	     adjusted/little endian.  */
3704	  else if (byte_size - offset == 4)
3705	    gr_mode = SImode;
3706
3707	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3708				      gen_rtx_REG (gr_mode, (basereg
3709							     + int_regs)),
3710				      GEN_INT (offset));
3711
3712	  gr_size = GET_MODE_SIZE (gr_mode);
3713	  offset += gr_size;
3714	  if (gr_size == UNITS_PER_WORD
3715	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3716	    int_regs++;
3717	  else if (gr_size > UNITS_PER_WORD)
3718	    int_regs += gr_size / UNITS_PER_WORD;
3719	}
3720
3721      /* If we ended up using just one location, just return that one loc, but
3722	 change the mode back to the argument mode.  */
3723      if (i == 1)
3724	return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3725      else
3726	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3727    }
3728
3729  /* Integral and aggregates go in general registers.  If we have run out of
3730     FR registers, then FP values must also go in general registers.  This can
3731     happen when we have a SFmode HFA.  */
3732  else if (mode == TFmode || mode == TCmode
3733	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3734    {
3735      int byte_size = ((mode == BLKmode)
3736                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3737      if (BYTES_BIG_ENDIAN
3738	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3739	&& byte_size < UNITS_PER_WORD
3740	&& byte_size > 0)
3741	{
3742	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3743					  gen_rtx_REG (DImode,
3744						       (basereg + cum->words
3745							+ offset)),
3746					  const0_rtx);
3747	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3748	}
3749      else
3750	return gen_rtx_REG (mode, basereg + cum->words + offset);
3751
3752    }
3753
3754  /* If there is a prototype, then FP values go in a FR register when
3755     named, and in a GR register when unnamed.  */
3756  else if (cum->prototype)
3757    {
3758      if (named)
3759	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3760      /* In big-endian mode, an anonymous SFmode value must be represented
3761         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3762	 the value into the high half of the general register.  */
3763      else if (BYTES_BIG_ENDIAN && mode == SFmode)
3764	return gen_rtx_PARALLEL (mode,
3765		 gen_rtvec (1,
3766                   gen_rtx_EXPR_LIST (VOIDmode,
3767		     gen_rtx_REG (DImode, basereg + cum->words + offset),
3768				      const0_rtx)));
3769      else
3770	return gen_rtx_REG (mode, basereg + cum->words + offset);
3771    }
3772  /* If there is no prototype, then FP values go in both FR and GR
3773     registers.  */
3774  else
3775    {
3776      /* See comment above.  */
3777      enum machine_mode inner_mode =
3778	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3779
3780      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3781				      gen_rtx_REG (mode, (FR_ARG_FIRST
3782							  + cum->fp_regs)),
3783				      const0_rtx);
3784      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3785				      gen_rtx_REG (inner_mode,
3786						   (basereg + cum->words
3787						    + offset)),
3788				      const0_rtx);
3789
3790      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3791    }
3792}
3793
3794/* Return number of words, at the beginning of the argument, that must be
3795   put in registers.  0 is the argument is entirely in registers or entirely
3796   in memory.  */
3797
3798int
3799ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3800				 tree type, int named ATTRIBUTE_UNUSED)
3801{
3802  int words = ia64_function_arg_words (type, mode);
3803  int offset = ia64_function_arg_offset (cum, type, words);
3804
3805  /* If all argument slots are used, then it must go on the stack.  */
3806  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3807    return 0;
3808
3809  /* It doesn't matter whether the argument goes in FR or GR regs.  If
3810     it fits within the 8 argument slots, then it goes entirely in
3811     registers.  If it extends past the last argument slot, then the rest
3812     goes on the stack.  */
3813
3814  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3815    return 0;
3816
3817  return MAX_ARGUMENT_SLOTS - cum->words - offset;
3818}
3819
3820/* Update CUM to point after this argument.  This is patterned after
3821   ia64_function_arg.  */
3822
3823void
3824ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3825			   tree type, int named)
3826{
3827  int words = ia64_function_arg_words (type, mode);
3828  int offset = ia64_function_arg_offset (cum, type, words);
3829  enum machine_mode hfa_mode = VOIDmode;
3830
3831  /* If all arg slots are already full, then there is nothing to do.  */
3832  if (cum->words >= MAX_ARGUMENT_SLOTS)
3833    return;
3834
3835  cum->words += words + offset;
3836
3837  /* Check for and handle homogeneous FP aggregates.  */
3838  if (type)
3839    hfa_mode = hfa_element_mode (type, 0);
3840
3841  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3842     and unprototyped hfas are passed specially.  */
3843  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3844    {
3845      int fp_regs = cum->fp_regs;
3846      /* This is the original value of cum->words + offset.  */
3847      int int_regs = cum->words - words;
3848      int hfa_size = GET_MODE_SIZE (hfa_mode);
3849      int byte_size;
3850      int args_byte_size;
3851
3852      /* If prototyped, pass it in FR regs then GR regs.
3853	 If not prototyped, pass it in both FR and GR regs.
3854
3855	 If this is an SFmode aggregate, then it is possible to run out of
3856	 FR regs while GR regs are still left.  In that case, we pass the
3857	 remaining part in the GR regs.  */
3858
3859      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3860	 of the argument, the last FP register, or the last argument slot.  */
3861
3862      byte_size = ((mode == BLKmode)
3863		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3864      args_byte_size = int_regs * UNITS_PER_WORD;
3865      offset = 0;
3866      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3867	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3868	{
3869	  offset += hfa_size;
3870	  args_byte_size += hfa_size;
3871	  fp_regs++;
3872	}
3873
3874      cum->fp_regs = fp_regs;
3875    }
3876
3877  /* Integral and aggregates go in general registers.  If we have run out of
3878     FR registers, then FP values must also go in general registers.  This can
3879     happen when we have a SFmode HFA.  */
3880  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3881    cum->int_regs = cum->words;
3882
3883  /* If there is a prototype, then FP values go in a FR register when
3884     named, and in a GR register when unnamed.  */
3885  else if (cum->prototype)
3886    {
3887      if (! named)
3888	cum->int_regs = cum->words;
3889      else
3890	/* ??? Complex types should not reach here.  */
3891	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3892    }
3893  /* If there is no prototype, then FP values go in both FR and GR
3894     registers.  */
3895  else
3896    {
3897      /* ??? Complex types should not reach here.  */
3898      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3899      cum->int_regs = cum->words;
3900    }
3901}
3902
3903/* Variable sized types are passed by reference.  */
3904/* ??? At present this is a GCC extension to the IA-64 ABI.  */
3905
3906int
3907ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3908				     enum machine_mode mode ATTRIBUTE_UNUSED,
3909				     tree type, int named ATTRIBUTE_UNUSED)
3910{
3911  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3912}
3913
3914/* True if it is OK to do sibling call optimization for the specified
3915   call expression EXP.  DECL will be the called function, or NULL if
3916   this is an indirect call.  */
3917static bool
3918ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3919{
3920  /* We must always return with our current GP.  This means we can
3921     only sibcall to functions defined in the current module.  */
3922  return decl && (*targetm.binds_local_p) (decl);
3923}
3924
3925
3926/* Implement va_arg.  */
3927
3928rtx
3929ia64_va_arg (tree valist, tree type)
3930{
3931  tree t;
3932
3933  /* Variable sized types are passed by reference.  */
3934  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3935    {
3936      rtx addr = force_reg (ptr_mode,
3937	    std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3938#ifdef POINTERS_EXTEND_UNSIGNED
3939      addr = convert_memory_address (Pmode, addr);
3940#endif
3941      return gen_rtx_MEM (ptr_mode, addr);
3942    }
3943
3944  /* Aggregate arguments with alignment larger than 8 bytes start at
3945     the next even boundary.  Integer and floating point arguments
3946     do so if they are larger than 8 bytes, whether or not they are
3947     also aligned larger than 8 bytes.  */
3948  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3949      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3950    {
3951      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3952		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3953      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3954		 build_int_2 (-2 * UNITS_PER_WORD, -1));
3955      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
3956      TREE_SIDE_EFFECTS (t) = 1;
3957      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3958    }
3959
3960  return std_expand_builtin_va_arg (valist, type);
3961}
3962
3963/* Return 1 if function return value returned in memory.  Return 0 if it is
3964   in a register.  */
3965
3966int
3967ia64_return_in_memory (tree valtype)
3968{
3969  enum machine_mode mode;
3970  enum machine_mode hfa_mode;
3971  HOST_WIDE_INT byte_size;
3972
3973  mode = TYPE_MODE (valtype);
3974  byte_size = GET_MODE_SIZE (mode);
3975  if (mode == BLKmode)
3976    {
3977      byte_size = int_size_in_bytes (valtype);
3978      if (byte_size < 0)
3979	return 1;
3980    }
3981
3982  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
3983
3984  hfa_mode = hfa_element_mode (valtype, 0);
3985  if (hfa_mode != VOIDmode)
3986    {
3987      int hfa_size = GET_MODE_SIZE (hfa_mode);
3988
3989      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
3990	return 1;
3991      else
3992	return 0;
3993    }
3994  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
3995    return 1;
3996  else
3997    return 0;
3998}
3999
4000/* Return rtx for register that holds the function return value.  */
4001
4002rtx
4003ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4004{
4005  enum machine_mode mode;
4006  enum machine_mode hfa_mode;
4007
4008  mode = TYPE_MODE (valtype);
4009  hfa_mode = hfa_element_mode (valtype, 0);
4010
4011  if (hfa_mode != VOIDmode)
4012    {
4013      rtx loc[8];
4014      int i;
4015      int hfa_size;
4016      int byte_size;
4017      int offset;
4018
4019      hfa_size = GET_MODE_SIZE (hfa_mode);
4020      byte_size = ((mode == BLKmode)
4021		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4022      offset = 0;
4023      for (i = 0; offset < byte_size; i++)
4024	{
4025	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4026				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4027				      GEN_INT (offset));
4028	  offset += hfa_size;
4029	}
4030
4031      if (i == 1)
4032	return XEXP (loc[0], 0);
4033      else
4034	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4035    }
4036  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4037    return gen_rtx_REG (mode, FR_ARG_FIRST);
4038  else
4039    {
4040      if (BYTES_BIG_ENDIAN
4041	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4042	{
4043	  rtx loc[8];
4044	  int offset;
4045	  int bytesize;
4046	  int i;
4047
4048	  offset = 0;
4049	  bytesize = int_size_in_bytes (valtype);
4050	  for (i = 0; offset < bytesize; i++)
4051	    {
4052	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4053					  gen_rtx_REG (DImode,
4054						       GR_RET_FIRST + i),
4055					  GEN_INT (offset));
4056	      offset += UNITS_PER_WORD;
4057	    }
4058	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4059	}
4060      else
4061	return gen_rtx_REG (mode, GR_RET_FIRST);
4062    }
4063}
4064
4065/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4066   We need to emit DTP-relative relocations.  */
4067
4068void
4069ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4070{
4071  if (size != 8)
4072    abort ();
4073  fputs ("\tdata8.ua\t@dtprel(", file);
4074  output_addr_const (file, x);
4075  fputs (")", file);
4076}
4077
4078/* Print a memory address as an operand to reference that memory location.  */
4079
4080/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4081   also call this from ia64_print_operand for memory addresses.  */
4082
4083void
4084ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4085			    rtx address ATTRIBUTE_UNUSED)
4086{
4087}
4088
4089/* Print an operand to an assembler instruction.
4090   C	Swap and print a comparison operator.
4091   D	Print an FP comparison operator.
4092   E    Print 32 - constant, for SImode shifts as extract.
4093   e    Print 64 - constant, for DImode rotates.
4094   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4095        a floating point register emitted normally.
4096   I	Invert a predicate register by adding 1.
4097   J    Select the proper predicate register for a condition.
4098   j    Select the inverse predicate register for a condition.
4099   O	Append .acq for volatile load.
4100   P	Postincrement of a MEM.
4101   Q	Append .rel for volatile store.
4102   S	Shift amount for shladd instruction.
4103   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4104	for Intel assembler.
4105   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4106	for Intel assembler.
4107   r	Print register name, or constant 0 as r0.  HP compatibility for
4108	Linux kernel.  */
4109void
4110ia64_print_operand (FILE * file, rtx x, int code)
4111{
4112  const char *str;
4113
4114  switch (code)
4115    {
4116    case 0:
4117      /* Handled below.  */
4118      break;
4119
4120    case 'C':
4121      {
4122	enum rtx_code c = swap_condition (GET_CODE (x));
4123	fputs (GET_RTX_NAME (c), file);
4124	return;
4125      }
4126
4127    case 'D':
4128      switch (GET_CODE (x))
4129	{
4130	case NE:
4131	  str = "neq";
4132	  break;
4133	case UNORDERED:
4134	  str = "unord";
4135	  break;
4136	case ORDERED:
4137	  str = "ord";
4138	  break;
4139	default:
4140	  str = GET_RTX_NAME (GET_CODE (x));
4141	  break;
4142	}
4143      fputs (str, file);
4144      return;
4145
4146    case 'E':
4147      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4148      return;
4149
4150    case 'e':
4151      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4152      return;
4153
4154    case 'F':
4155      if (x == CONST0_RTX (GET_MODE (x)))
4156	str = reg_names [FR_REG (0)];
4157      else if (x == CONST1_RTX (GET_MODE (x)))
4158	str = reg_names [FR_REG (1)];
4159      else if (GET_CODE (x) == REG)
4160	str = reg_names [REGNO (x)];
4161      else
4162	abort ();
4163      fputs (str, file);
4164      return;
4165
4166    case 'I':
4167      fputs (reg_names [REGNO (x) + 1], file);
4168      return;
4169
4170    case 'J':
4171    case 'j':
4172      {
4173	unsigned int regno = REGNO (XEXP (x, 0));
4174	if (GET_CODE (x) == EQ)
4175	  regno += 1;
4176	if (code == 'j')
4177	  regno ^= 1;
4178        fputs (reg_names [regno], file);
4179      }
4180      return;
4181
4182    case 'O':
4183      if (MEM_VOLATILE_P (x))
4184	fputs(".acq", file);
4185      return;
4186
4187    case 'P':
4188      {
4189	HOST_WIDE_INT value;
4190
4191	switch (GET_CODE (XEXP (x, 0)))
4192	  {
4193	  default:
4194	    return;
4195
4196	  case POST_MODIFY:
4197	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4198	    if (GET_CODE (x) == CONST_INT)
4199	      value = INTVAL (x);
4200	    else if (GET_CODE (x) == REG)
4201	      {
4202		fprintf (file, ", %s", reg_names[REGNO (x)]);
4203		return;
4204	      }
4205	    else
4206	      abort ();
4207	    break;
4208
4209	  case POST_INC:
4210	    value = GET_MODE_SIZE (GET_MODE (x));
4211	    break;
4212
4213	  case POST_DEC:
4214	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4215	    break;
4216	  }
4217
4218	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4219	return;
4220      }
4221
4222    case 'Q':
4223      if (MEM_VOLATILE_P (x))
4224	fputs(".rel", file);
4225      return;
4226
4227    case 'S':
4228      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4229      return;
4230
4231    case 'T':
4232      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4233	{
4234	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4235	  return;
4236	}
4237      break;
4238
4239    case 'U':
4240      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4241	{
4242	  const char *prefix = "0x";
4243	  if (INTVAL (x) & 0x80000000)
4244	    {
4245	      fprintf (file, "0xffffffff");
4246	      prefix = "";
4247	    }
4248	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4249	  return;
4250	}
4251      break;
4252
4253    case 'r':
4254      /* If this operand is the constant zero, write it as register zero.
4255	 Any register, zero, or CONST_INT value is OK here.  */
4256      if (GET_CODE (x) == REG)
4257	fputs (reg_names[REGNO (x)], file);
4258      else if (x == CONST0_RTX (GET_MODE (x)))
4259	fputs ("r0", file);
4260      else if (GET_CODE (x) == CONST_INT)
4261	output_addr_const (file, x);
4262      else
4263	output_operand_lossage ("invalid %%r value");
4264      return;
4265
4266    case '+':
4267      {
4268	const char *which;
4269
4270	/* For conditional branches, returns or calls, substitute
4271	   sptk, dptk, dpnt, or spnt for %s.  */
4272	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4273	if (x)
4274	  {
4275	    int pred_val = INTVAL (XEXP (x, 0));
4276
4277	    /* Guess top and bottom 10% statically predicted.  */
4278	    if (pred_val < REG_BR_PROB_BASE / 50)
4279	      which = ".spnt";
4280	    else if (pred_val < REG_BR_PROB_BASE / 2)
4281	      which = ".dpnt";
4282	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4283	      which = ".dptk";
4284	    else
4285	      which = ".sptk";
4286	  }
4287	else if (GET_CODE (current_output_insn) == CALL_INSN)
4288	  which = ".sptk";
4289	else
4290	  which = ".dptk";
4291
4292	fputs (which, file);
4293	return;
4294      }
4295
4296    case ',':
4297      x = current_insn_predicate;
4298      if (x)
4299	{
4300	  unsigned int regno = REGNO (XEXP (x, 0));
4301	  if (GET_CODE (x) == EQ)
4302	    regno += 1;
4303          fprintf (file, "(%s) ", reg_names [regno]);
4304	}
4305      return;
4306
4307    default:
4308      output_operand_lossage ("ia64_print_operand: unknown code");
4309      return;
4310    }
4311
4312  switch (GET_CODE (x))
4313    {
4314      /* This happens for the spill/restore instructions.  */
4315    case POST_INC:
4316    case POST_DEC:
4317    case POST_MODIFY:
4318      x = XEXP (x, 0);
4319      /* ... fall through ...  */
4320
4321    case REG:
4322      fputs (reg_names [REGNO (x)], file);
4323      break;
4324
4325    case MEM:
4326      {
4327	rtx addr = XEXP (x, 0);
4328	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4329	  addr = XEXP (addr, 0);
4330	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4331	break;
4332      }
4333
4334    default:
4335      output_addr_const (file, x);
4336      break;
4337    }
4338
4339  return;
4340}
4341
4342/* Compute a (partial) cost for rtx X.  Return true if the complete
4343   cost has been computed, and false if subexpressions should be
4344   scanned.  In either case, *TOTAL contains the cost result.  */
4345/* ??? This is incomplete.  */
4346
4347static bool
4348ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4349{
4350  switch (code)
4351    {
4352    case CONST_INT:
4353      switch (outer_code)
4354        {
4355        case SET:
4356	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4357	  return true;
4358        case PLUS:
4359	  if (CONST_OK_FOR_I (INTVAL (x)))
4360	    *total = 0;
4361	  else if (CONST_OK_FOR_J (INTVAL (x)))
4362	    *total = 1;
4363	  else
4364	    *total = COSTS_N_INSNS (1);
4365	  return true;
4366        default:
4367	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4368	    *total = 0;
4369	  else
4370	    *total = COSTS_N_INSNS (1);
4371	  return true;
4372	}
4373
4374    case CONST_DOUBLE:
4375      *total = COSTS_N_INSNS (1);
4376      return true;
4377
4378    case CONST:
4379    case SYMBOL_REF:
4380    case LABEL_REF:
4381      *total = COSTS_N_INSNS (3);
4382      return true;
4383
4384    case MULT:
4385      /* For multiplies wider than HImode, we have to go to the FPU,
4386         which normally involves copies.  Plus there's the latency
4387         of the multiply itself, and the latency of the instructions to
4388         transfer integer regs to FP regs.  */
4389      /* ??? Check for FP mode.  */
4390      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4391        *total = COSTS_N_INSNS (10);
4392      else
4393	*total = COSTS_N_INSNS (2);
4394      return true;
4395
4396    case PLUS:
4397    case MINUS:
4398    case ASHIFT:
4399    case ASHIFTRT:
4400    case LSHIFTRT:
4401      *total = COSTS_N_INSNS (1);
4402      return true;
4403
4404    case DIV:
4405    case UDIV:
4406    case MOD:
4407    case UMOD:
4408      /* We make divide expensive, so that divide-by-constant will be
4409         optimized to a multiply.  */
4410      *total = COSTS_N_INSNS (60);
4411      return true;
4412
4413    default:
4414      return false;
4415    }
4416}
4417
4418/* Calculate the cost of moving data from a register in class FROM to
4419   one in class TO, using MODE.  */
4420
4421int
4422ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4423			 enum reg_class to)
4424{
4425  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4426  if (to == ADDL_REGS)
4427    to = GR_REGS;
4428  if (from == ADDL_REGS)
4429    from = GR_REGS;
4430
4431  /* All costs are symmetric, so reduce cases by putting the
4432     lower number class as the destination.  */
4433  if (from < to)
4434    {
4435      enum reg_class tmp = to;
4436      to = from, from = tmp;
4437    }
4438
4439  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4440     so that we get secondary memory reloads.  Between FR_REGS,
4441     we have to make this at least as expensive as MEMORY_MOVE_COST
4442     to avoid spectacularly poor register class preferencing.  */
4443  if (mode == XFmode)
4444    {
4445      if (to != GR_REGS || from != GR_REGS)
4446        return MEMORY_MOVE_COST (mode, to, 0);
4447      else
4448	return 3;
4449    }
4450
4451  switch (to)
4452    {
4453    case PR_REGS:
4454      /* Moving between PR registers takes two insns.  */
4455      if (from == PR_REGS)
4456	return 3;
4457      /* Moving between PR and anything but GR is impossible.  */
4458      if (from != GR_REGS)
4459	return MEMORY_MOVE_COST (mode, to, 0);
4460      break;
4461
4462    case BR_REGS:
4463      /* Moving between BR and anything but GR is impossible.  */
4464      if (from != GR_REGS && from != GR_AND_BR_REGS)
4465	return MEMORY_MOVE_COST (mode, to, 0);
4466      break;
4467
4468    case AR_I_REGS:
4469    case AR_M_REGS:
4470      /* Moving between AR and anything but GR is impossible.  */
4471      if (from != GR_REGS)
4472	return MEMORY_MOVE_COST (mode, to, 0);
4473      break;
4474
4475    case GR_REGS:
4476    case FR_REGS:
4477    case GR_AND_FR_REGS:
4478    case GR_AND_BR_REGS:
4479    case ALL_REGS:
4480      break;
4481
4482    default:
4483      abort ();
4484    }
4485
4486  return 2;
4487}
4488
4489/* This function returns the register class required for a secondary
4490   register when copying between one of the registers in CLASS, and X,
4491   using MODE.  A return value of NO_REGS means that no secondary register
4492   is required.  */
4493
4494enum reg_class
4495ia64_secondary_reload_class (enum reg_class class,
4496			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4497{
4498  int regno = -1;
4499
4500  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4501    regno = true_regnum (x);
4502
4503  switch (class)
4504    {
4505    case BR_REGS:
4506    case AR_M_REGS:
4507    case AR_I_REGS:
4508      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4509	 interaction.  We end up with two pseudos with overlapping lifetimes
4510	 both of which are equiv to the same constant, and both which need
4511	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4512	 changes depending on the path length, which means the qty_first_reg
4513	 check in make_regs_eqv can give different answers at different times.
4514	 At some point I'll probably need a reload_indi pattern to handle
4515	 this.
4516
4517	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4518	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4519	 non-general registers for good measure.  */
4520      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4521	return GR_REGS;
4522
4523      /* This is needed if a pseudo used as a call_operand gets spilled to a
4524	 stack slot.  */
4525      if (GET_CODE (x) == MEM)
4526	return GR_REGS;
4527      break;
4528
4529    case FR_REGS:
4530      /* Need to go through general registers to get to other class regs.  */
4531      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4532	return GR_REGS;
4533
4534      /* This can happen when a paradoxical subreg is an operand to the
4535	 muldi3 pattern.  */
4536      /* ??? This shouldn't be necessary after instruction scheduling is
4537	 enabled, because paradoxical subregs are not accepted by
4538	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4539	 stop the paradoxical subreg stupidity in the *_operand functions
4540	 in recog.c.  */
4541      if (GET_CODE (x) == MEM
4542	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4543	      || GET_MODE (x) == QImode))
4544	return GR_REGS;
4545
4546      /* This can happen because of the ior/and/etc patterns that accept FP
4547	 registers as operands.  If the third operand is a constant, then it
4548	 needs to be reloaded into a FP register.  */
4549      if (GET_CODE (x) == CONST_INT)
4550	return GR_REGS;
4551
4552      /* This can happen because of register elimination in a muldi3 insn.
4553	 E.g. `26107 * (unsigned long)&u'.  */
4554      if (GET_CODE (x) == PLUS)
4555	return GR_REGS;
4556      break;
4557
4558    case PR_REGS:
4559      /* ??? This happens if we cse/gcse a BImode value across a call,
4560	 and the function has a nonlocal goto.  This is because global
4561	 does not allocate call crossing pseudos to hard registers when
4562	 current_function_has_nonlocal_goto is true.  This is relatively
4563	 common for C++ programs that use exceptions.  To reproduce,
4564	 return NO_REGS and compile libstdc++.  */
4565      if (GET_CODE (x) == MEM)
4566	return GR_REGS;
4567
4568      /* This can happen when we take a BImode subreg of a DImode value,
4569	 and that DImode value winds up in some non-GR register.  */
4570      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4571	return GR_REGS;
4572      break;
4573
4574    default:
4575      break;
4576    }
4577
4578  return NO_REGS;
4579}
4580
4581
4582/* Emit text to declare externally defined variables and functions, because
4583   the Intel assembler does not support undefined externals.  */
4584
4585void
4586ia64_asm_output_external (FILE *file, tree decl, const char *name)
4587{
4588  int save_referenced;
4589
4590  /* GNU as does not need anything here, but the HP linker does need
4591     something for external functions.  */
4592
4593  if (TARGET_GNU_AS
4594      && (!TARGET_HPUX_LD
4595	  || TREE_CODE (decl) != FUNCTION_DECL
4596	  || strstr (name, "__builtin_") == name))
4597    return;
4598
4599  /* ??? The Intel assembler creates a reference that needs to be satisfied by
4600     the linker when we do this, so we need to be careful not to do this for
4601     builtin functions which have no library equivalent.  Unfortunately, we
4602     can't tell here whether or not a function will actually be called by
4603     expand_expr, so we pull in library functions even if we may not need
4604     them later.  */
4605  if (! strcmp (name, "__builtin_next_arg")
4606      || ! strcmp (name, "alloca")
4607      || ! strcmp (name, "__builtin_constant_p")
4608      || ! strcmp (name, "__builtin_args_info"))
4609    return;
4610
4611  if (TARGET_HPUX_LD)
4612    ia64_hpux_add_extern_decl (decl);
4613  else
4614    {
4615      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4616         restore it.  */
4617      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4618      if (TREE_CODE (decl) == FUNCTION_DECL)
4619        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4620      (*targetm.asm_out.globalize_label) (file, name);
4621      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4622    }
4623}
4624
4625/* Parse the -mfixed-range= option string.  */
4626
4627static void
4628fix_range (const char *const_str)
4629{
4630  int i, first, last;
4631  char *str, *dash, *comma;
4632
4633  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4634     REG2 are either register names or register numbers.  The effect
4635     of this option is to mark the registers in the range from REG1 to
4636     REG2 as ``fixed'' so they won't be used by the compiler.  This is
4637     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4638
4639  i = strlen (const_str);
4640  str = (char *) alloca (i + 1);
4641  memcpy (str, const_str, i + 1);
4642
4643  while (1)
4644    {
4645      dash = strchr (str, '-');
4646      if (!dash)
4647	{
4648	  warning ("value of -mfixed-range must have form REG1-REG2");
4649	  return;
4650	}
4651      *dash = '\0';
4652
4653      comma = strchr (dash + 1, ',');
4654      if (comma)
4655	*comma = '\0';
4656
4657      first = decode_reg_name (str);
4658      if (first < 0)
4659	{
4660	  warning ("unknown register name: %s", str);
4661	  return;
4662	}
4663
4664      last = decode_reg_name (dash + 1);
4665      if (last < 0)
4666	{
4667	  warning ("unknown register name: %s", dash + 1);
4668	  return;
4669	}
4670
4671      *dash = '-';
4672
4673      if (first > last)
4674	{
4675	  warning ("%s-%s is an empty range", str, dash + 1);
4676	  return;
4677	}
4678
4679      for (i = first; i <= last; ++i)
4680	fixed_regs[i] = call_used_regs[i] = 1;
4681
4682      if (!comma)
4683	break;
4684
4685      *comma = ',';
4686      str = comma + 1;
4687    }
4688}
4689
4690static struct machine_function *
4691ia64_init_machine_status (void)
4692{
4693  return ggc_alloc_cleared (sizeof (struct machine_function));
4694}
4695
4696/* Handle TARGET_OPTIONS switches.  */
4697
4698void
4699ia64_override_options (void)
4700{
4701  static struct pta
4702    {
4703      const char *const name;		/* processor name or nickname.  */
4704      const enum processor_type processor;
4705    }
4706  const processor_alias_table[] =
4707    {
4708      {"itanium", PROCESSOR_ITANIUM},
4709      {"itanium1", PROCESSOR_ITANIUM},
4710      {"merced", PROCESSOR_ITANIUM},
4711      {"itanium2", PROCESSOR_ITANIUM2},
4712      {"mckinley", PROCESSOR_ITANIUM2},
4713    };
4714
4715  int const pta_size = ARRAY_SIZE (processor_alias_table);
4716  int i;
4717
4718  if (TARGET_AUTO_PIC)
4719    target_flags |= MASK_CONST_GP;
4720
4721  if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4722    {
4723      warning ("cannot optimize floating point division for both latency and throughput");
4724      target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4725    }
4726
4727  if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4728    {
4729      warning ("cannot optimize integer division for both latency and throughput");
4730      target_flags &= ~MASK_INLINE_INT_DIV_THR;
4731    }
4732
4733  if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4734    {
4735      warning ("cannot optimize square root for both latency and throughput");
4736      target_flags &= ~MASK_INLINE_SQRT_THR;
4737    }
4738
4739  if (TARGET_INLINE_SQRT_LAT)
4740    {
4741      warning ("not yet implemented: latency-optimized inline square root");
4742      target_flags &= ~MASK_INLINE_SQRT_LAT;
4743    }
4744
4745  if (ia64_fixed_range_string)
4746    fix_range (ia64_fixed_range_string);
4747
4748  if (ia64_tls_size_string)
4749    {
4750      char *end;
4751      unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4752      if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4753	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4754      else
4755	ia64_tls_size = tmp;
4756    }
4757
4758  if (!ia64_tune_string)
4759    ia64_tune_string = "itanium2";
4760
4761  for (i = 0; i < pta_size; i++)
4762    if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4763      {
4764	ia64_tune = processor_alias_table[i].processor;
4765	break;
4766      }
4767
4768  if (i == pta_size)
4769    error ("bad value (%s) for -tune= switch", ia64_tune_string);
4770
4771  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4772  flag_schedule_insns_after_reload = 0;
4773
4774  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4775
4776  init_machine_status = ia64_init_machine_status;
4777}
4778
4779static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4780static enum attr_type ia64_safe_type (rtx);
4781
4782static enum attr_itanium_class
4783ia64_safe_itanium_class (rtx insn)
4784{
4785  if (recog_memoized (insn) >= 0)
4786    return get_attr_itanium_class (insn);
4787  else
4788    return ITANIUM_CLASS_UNKNOWN;
4789}
4790
4791static enum attr_type
4792ia64_safe_type (rtx insn)
4793{
4794  if (recog_memoized (insn) >= 0)
4795    return get_attr_type (insn);
4796  else
4797    return TYPE_UNKNOWN;
4798}
4799
4800/* The following collection of routines emit instruction group stop bits as
4801   necessary to avoid dependencies.  */
4802
4803/* Need to track some additional registers as far as serialization is
4804   concerned so we can properly handle br.call and br.ret.  We could
4805   make these registers visible to gcc, but since these registers are
4806   never explicitly used in gcc generated code, it seems wasteful to
4807   do so (plus it would make the call and return patterns needlessly
4808   complex).  */
4809#define REG_GP		(GR_REG (1))
4810#define REG_RP		(BR_REG (0))
4811#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4812/* This is used for volatile asms which may require a stop bit immediately
4813   before and after them.  */
4814#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4815#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4816#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4817
4818/* For each register, we keep track of how it has been written in the
4819   current instruction group.
4820
4821   If a register is written unconditionally (no qualifying predicate),
4822   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4823
4824   If a register is written if its qualifying predicate P is true, we
4825   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4826   may be written again by the complement of P (P^1) and when this happens,
4827   WRITE_COUNT gets set to 2.
4828
4829   The result of this is that whenever an insn attempts to write a register
4830   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4831
4832   If a predicate register is written by a floating-point insn, we set
4833   WRITTEN_BY_FP to true.
4834
4835   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4836   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4837
4838struct reg_write_state
4839{
4840  unsigned int write_count : 2;
4841  unsigned int first_pred : 16;
4842  unsigned int written_by_fp : 1;
4843  unsigned int written_by_and : 1;
4844  unsigned int written_by_or : 1;
4845};
4846
4847/* Cumulative info for the current instruction group.  */
4848struct reg_write_state rws_sum[NUM_REGS];
4849/* Info for the current instruction.  This gets copied to rws_sum after a
4850   stop bit is emitted.  */
4851struct reg_write_state rws_insn[NUM_REGS];
4852
4853/* Indicates whether this is the first instruction after a stop bit,
4854   in which case we don't need another stop bit.  Without this, we hit
4855   the abort in ia64_variable_issue when scheduling an alloc.  */
4856static int first_instruction;
4857
4858/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4859   RTL for one instruction.  */
4860struct reg_flags
4861{
4862  unsigned int is_write : 1;	/* Is register being written?  */
4863  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4864  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4865  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4866  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4867  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4868};
4869
4870static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4871static int rws_access_regno (int, struct reg_flags, int);
4872static int rws_access_reg (rtx, struct reg_flags, int);
4873static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4874static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4875static int rtx_needs_barrier (rtx, struct reg_flags, int);
4876static void init_insn_group_barriers (void);
4877static int group_barrier_needed_p (rtx);
4878static int safe_group_barrier_needed_p (rtx);
4879
4880/* Update *RWS for REGNO, which is being written by the current instruction,
4881   with predicate PRED, and associated register flags in FLAGS.  */
4882
4883static void
4884rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4885{
4886  if (pred)
4887    rws[regno].write_count++;
4888  else
4889    rws[regno].write_count = 2;
4890  rws[regno].written_by_fp |= flags.is_fp;
4891  /* ??? Not tracking and/or across differing predicates.  */
4892  rws[regno].written_by_and = flags.is_and;
4893  rws[regno].written_by_or = flags.is_or;
4894  rws[regno].first_pred = pred;
4895}
4896
4897/* Handle an access to register REGNO of type FLAGS using predicate register
4898   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4899   a dependency with an earlier instruction in the same group.  */
4900
4901static int
4902rws_access_regno (int regno, struct reg_flags flags, int pred)
4903{
4904  int need_barrier = 0;
4905
4906  if (regno >= NUM_REGS)
4907    abort ();
4908
4909  if (! PR_REGNO_P (regno))
4910    flags.is_and = flags.is_or = 0;
4911
4912  if (flags.is_write)
4913    {
4914      int write_count;
4915
4916      /* One insn writes same reg multiple times?  */
4917      if (rws_insn[regno].write_count > 0)
4918	abort ();
4919
4920      /* Update info for current instruction.  */
4921      rws_update (rws_insn, regno, flags, pred);
4922      write_count = rws_sum[regno].write_count;
4923
4924      switch (write_count)
4925	{
4926	case 0:
4927	  /* The register has not been written yet.  */
4928	  rws_update (rws_sum, regno, flags, pred);
4929	  break;
4930
4931	case 1:
4932	  /* The register has been written via a predicate.  If this is
4933	     not a complementary predicate, then we need a barrier.  */
4934	  /* ??? This assumes that P and P+1 are always complementary
4935	     predicates for P even.  */
4936	  if (flags.is_and && rws_sum[regno].written_by_and)
4937	    ;
4938	  else if (flags.is_or && rws_sum[regno].written_by_or)
4939	    ;
4940	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4941	    need_barrier = 1;
4942	  rws_update (rws_sum, regno, flags, pred);
4943	  break;
4944
4945	case 2:
4946	  /* The register has been unconditionally written already.  We
4947	     need a barrier.  */
4948	  if (flags.is_and && rws_sum[regno].written_by_and)
4949	    ;
4950	  else if (flags.is_or && rws_sum[regno].written_by_or)
4951	    ;
4952	  else
4953	    need_barrier = 1;
4954	  rws_sum[regno].written_by_and = flags.is_and;
4955	  rws_sum[regno].written_by_or = flags.is_or;
4956	  break;
4957
4958	default:
4959	  abort ();
4960	}
4961    }
4962  else
4963    {
4964      if (flags.is_branch)
4965	{
4966	  /* Branches have several RAW exceptions that allow to avoid
4967	     barriers.  */
4968
4969	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
4970	    /* RAW dependencies on branch regs are permissible as long
4971	       as the writer is a non-branch instruction.  Since we
4972	       never generate code that uses a branch register written
4973	       by a branch instruction, handling this case is
4974	       easy.  */
4975	    return 0;
4976
4977	  if (REGNO_REG_CLASS (regno) == PR_REGS
4978	      && ! rws_sum[regno].written_by_fp)
4979	    /* The predicates of a branch are available within the
4980	       same insn group as long as the predicate was written by
4981	       something other than a floating-point instruction.  */
4982	    return 0;
4983	}
4984
4985      if (flags.is_and && rws_sum[regno].written_by_and)
4986	return 0;
4987      if (flags.is_or && rws_sum[regno].written_by_or)
4988	return 0;
4989
4990      switch (rws_sum[regno].write_count)
4991	{
4992	case 0:
4993	  /* The register has not been written yet.  */
4994	  break;
4995
4996	case 1:
4997	  /* The register has been written via a predicate.  If this is
4998	     not a complementary predicate, then we need a barrier.  */
4999	  /* ??? This assumes that P and P+1 are always complementary
5000	     predicates for P even.  */
5001	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5002	    need_barrier = 1;
5003	  break;
5004
5005	case 2:
5006	  /* The register has been unconditionally written already.  We
5007	     need a barrier.  */
5008	  need_barrier = 1;
5009	  break;
5010
5011	default:
5012	  abort ();
5013	}
5014    }
5015
5016  return need_barrier;
5017}
5018
5019static int
5020rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5021{
5022  int regno = REGNO (reg);
5023  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5024
5025  if (n == 1)
5026    return rws_access_regno (regno, flags, pred);
5027  else
5028    {
5029      int need_barrier = 0;
5030      while (--n >= 0)
5031	need_barrier |= rws_access_regno (regno + n, flags, pred);
5032      return need_barrier;
5033    }
5034}
5035
5036/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5037   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5038
5039static void
5040update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
5041{
5042  rtx src = SET_SRC (x);
5043
5044  *pcond = 0;
5045
5046  switch (GET_CODE (src))
5047    {
5048    case CALL:
5049      return;
5050
5051    case IF_THEN_ELSE:
5052      if (SET_DEST (x) == pc_rtx)
5053	/* X is a conditional branch.  */
5054	return;
5055      else
5056	{
5057	  int is_complemented = 0;
5058
5059	  /* X is a conditional move.  */
5060	  rtx cond = XEXP (src, 0);
5061	  if (GET_CODE (cond) == EQ)
5062	    is_complemented = 1;
5063	  cond = XEXP (cond, 0);
5064	  if (GET_CODE (cond) != REG
5065	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5066	    abort ();
5067	  *pcond = cond;
5068	  if (XEXP (src, 1) == SET_DEST (x)
5069	      || XEXP (src, 2) == SET_DEST (x))
5070	    {
5071	      /* X is a conditional move that conditionally writes the
5072		 destination.  */
5073
5074	      /* We need another complement in this case.  */
5075	      if (XEXP (src, 1) == SET_DEST (x))
5076		is_complemented = ! is_complemented;
5077
5078	      *ppred = REGNO (cond);
5079	      if (is_complemented)
5080		++*ppred;
5081	    }
5082
5083	  /* ??? If this is a conditional write to the dest, then this
5084	     instruction does not actually read one source.  This probably
5085	     doesn't matter, because that source is also the dest.  */
5086	  /* ??? Multiple writes to predicate registers are allowed
5087	     if they are all AND type compares, or if they are all OR
5088	     type compares.  We do not generate such instructions
5089	     currently.  */
5090	}
5091      /* ... fall through ...  */
5092
5093    default:
5094      if (GET_RTX_CLASS (GET_CODE (src)) == '<'
5095	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5096	/* Set pflags->is_fp to 1 so that we know we're dealing
5097	   with a floating point comparison when processing the
5098	   destination of the SET.  */
5099	pflags->is_fp = 1;
5100
5101      /* Discover if this is a parallel comparison.  We only handle
5102	 and.orcm and or.andcm at present, since we must retain a
5103	 strict inverse on the predicate pair.  */
5104      else if (GET_CODE (src) == AND)
5105	pflags->is_and = 1;
5106      else if (GET_CODE (src) == IOR)
5107	pflags->is_or = 1;
5108
5109      break;
5110    }
5111}
5112
5113/* Subroutine of rtx_needs_barrier; this function determines whether the
5114   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5115   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5116   for this insn.  */
5117
5118static int
5119set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5120{
5121  int need_barrier = 0;
5122  rtx dst;
5123  rtx src = SET_SRC (x);
5124
5125  if (GET_CODE (src) == CALL)
5126    /* We don't need to worry about the result registers that
5127       get written by subroutine call.  */
5128    return rtx_needs_barrier (src, flags, pred);
5129  else if (SET_DEST (x) == pc_rtx)
5130    {
5131      /* X is a conditional branch.  */
5132      /* ??? This seems redundant, as the caller sets this bit for
5133	 all JUMP_INSNs.  */
5134      flags.is_branch = 1;
5135      return rtx_needs_barrier (src, flags, pred);
5136    }
5137
5138  need_barrier = rtx_needs_barrier (src, flags, pred);
5139
5140  /* This instruction unconditionally uses a predicate register.  */
5141  if (cond)
5142    need_barrier |= rws_access_reg (cond, flags, 0);
5143
5144  dst = SET_DEST (x);
5145  if (GET_CODE (dst) == ZERO_EXTRACT)
5146    {
5147      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5148      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5149      dst = XEXP (dst, 0);
5150    }
5151  return need_barrier;
5152}
5153
5154/* Handle an access to rtx X of type FLAGS using predicate register
5155   PRED.  Return 1 if this access creates a dependency with an earlier
5156   instruction in the same group.  */
5157
5158static int
5159rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5160{
5161  int i, j;
5162  int is_complemented = 0;
5163  int need_barrier = 0;
5164  const char *format_ptr;
5165  struct reg_flags new_flags;
5166  rtx cond = 0;
5167
5168  if (! x)
5169    return 0;
5170
5171  new_flags = flags;
5172
5173  switch (GET_CODE (x))
5174    {
5175    case SET:
5176      update_set_flags (x, &new_flags, &pred, &cond);
5177      need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5178      if (GET_CODE (SET_SRC (x)) != CALL)
5179	{
5180	  new_flags.is_write = 1;
5181	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5182	}
5183      break;
5184
5185    case CALL:
5186      new_flags.is_write = 0;
5187      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5188
5189      /* Avoid multiple register writes, in case this is a pattern with
5190	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
5191      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5192	{
5193	  new_flags.is_write = 1;
5194	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5195	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5196	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5197	}
5198      break;
5199
5200    case COND_EXEC:
5201      /* X is a predicated instruction.  */
5202
5203      cond = COND_EXEC_TEST (x);
5204      if (pred)
5205	abort ();
5206      need_barrier = rtx_needs_barrier (cond, flags, 0);
5207
5208      if (GET_CODE (cond) == EQ)
5209	is_complemented = 1;
5210      cond = XEXP (cond, 0);
5211      if (GET_CODE (cond) != REG
5212	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5213	abort ();
5214      pred = REGNO (cond);
5215      if (is_complemented)
5216	++pred;
5217
5218      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5219      return need_barrier;
5220
5221    case CLOBBER:
5222    case USE:
5223      /* Clobber & use are for earlier compiler-phases only.  */
5224      break;
5225
5226    case ASM_OPERANDS:
5227    case ASM_INPUT:
5228      /* We always emit stop bits for traditional asms.  We emit stop bits
5229	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5230      if (GET_CODE (x) != ASM_OPERANDS
5231	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5232	{
5233	  /* Avoid writing the register multiple times if we have multiple
5234	     asm outputs.  This avoids an abort in rws_access_reg.  */
5235	  if (! rws_insn[REG_VOLATILE].write_count)
5236	    {
5237	      new_flags.is_write = 1;
5238	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5239	    }
5240	  return 1;
5241	}
5242
5243      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5244	 We can not just fall through here since then we would be confused
5245	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5246	 traditional asms unlike their normal usage.  */
5247
5248      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5249	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5250	  need_barrier = 1;
5251      break;
5252
5253    case PARALLEL:
5254      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5255	{
5256	  rtx pat = XVECEXP (x, 0, i);
5257	  if (GET_CODE (pat) == SET)
5258	    {
5259	      update_set_flags (pat, &new_flags, &pred, &cond);
5260	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5261	    }
5262	  else if (GET_CODE (pat) == USE
5263		   || GET_CODE (pat) == CALL
5264		   || GET_CODE (pat) == ASM_OPERANDS)
5265	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5266	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5267	    abort ();
5268	}
5269      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5270	{
5271	  rtx pat = XVECEXP (x, 0, i);
5272	  if (GET_CODE (pat) == SET)
5273	    {
5274	      if (GET_CODE (SET_SRC (pat)) != CALL)
5275		{
5276		  new_flags.is_write = 1;
5277		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5278						     pred);
5279		}
5280	    }
5281	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5282	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5283	}
5284      break;
5285
5286    case SUBREG:
5287      x = SUBREG_REG (x);
5288      /* FALLTHRU */
5289    case REG:
5290      if (REGNO (x) == AR_UNAT_REGNUM)
5291	{
5292	  for (i = 0; i < 64; ++i)
5293	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5294	}
5295      else
5296	need_barrier = rws_access_reg (x, flags, pred);
5297      break;
5298
5299    case MEM:
5300      /* Find the regs used in memory address computation.  */
5301      new_flags.is_write = 0;
5302      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5303      break;
5304
5305    case CONST_INT:   case CONST_DOUBLE:
5306    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5307      break;
5308
5309      /* Operators with side-effects.  */
5310    case POST_INC:    case POST_DEC:
5311      if (GET_CODE (XEXP (x, 0)) != REG)
5312	abort ();
5313
5314      new_flags.is_write = 0;
5315      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5316      new_flags.is_write = 1;
5317      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5318      break;
5319
5320    case POST_MODIFY:
5321      if (GET_CODE (XEXP (x, 0)) != REG)
5322	abort ();
5323
5324      new_flags.is_write = 0;
5325      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5326      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5327      new_flags.is_write = 1;
5328      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5329      break;
5330
5331      /* Handle common unary and binary ops for efficiency.  */
5332    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5333    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5334    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5335    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5336    case NE:       case EQ:      case GE:      case GT:        case LE:
5337    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5338      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5339      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5340      break;
5341
5342    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5343    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5344    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5345    case SQRT:     case FFS:		case POPCOUNT:
5346      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5347      break;
5348
5349    case UNSPEC:
5350      switch (XINT (x, 1))
5351	{
5352	case UNSPEC_LTOFF_DTPMOD:
5353	case UNSPEC_LTOFF_DTPREL:
5354	case UNSPEC_DTPREL:
5355	case UNSPEC_LTOFF_TPREL:
5356	case UNSPEC_TPREL:
5357	case UNSPEC_PRED_REL_MUTEX:
5358	case UNSPEC_PIC_CALL:
5359        case UNSPEC_MF:
5360        case UNSPEC_FETCHADD_ACQ:
5361	case UNSPEC_BSP_VALUE:
5362	case UNSPEC_FLUSHRS:
5363	case UNSPEC_BUNDLE_SELECTOR:
5364          break;
5365
5366	case UNSPEC_GR_SPILL:
5367	case UNSPEC_GR_RESTORE:
5368	  {
5369	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5370	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5371
5372	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5373	    new_flags.is_write = (XINT (x, 1) == 1);
5374	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5375					      new_flags, pred);
5376	    break;
5377	  }
5378
5379	case UNSPEC_FR_SPILL:
5380	case UNSPEC_FR_RESTORE:
5381	case UNSPEC_GETF_EXP:
5382	case UNSPEC_SETF_EXP:
5383        case UNSPEC_ADDP4:
5384	case UNSPEC_FR_SQRT_RECIP_APPROX:
5385	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5386	  break;
5387
5388	case UNSPEC_FR_RECIP_APPROX:
5389	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5390	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5391	  break;
5392
5393        case UNSPEC_CMPXCHG_ACQ:
5394	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5395	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5396	  break;
5397
5398	default:
5399	  abort ();
5400	}
5401      break;
5402
5403    case UNSPEC_VOLATILE:
5404      switch (XINT (x, 1))
5405	{
5406	case UNSPECV_ALLOC:
5407	  /* Alloc must always be the first instruction of a group.
5408	     We force this by always returning true.  */
5409	  /* ??? We might get better scheduling if we explicitly check for
5410	     input/local/output register dependencies, and modify the
5411	     scheduler so that alloc is always reordered to the start of
5412	     the current group.  We could then eliminate all of the
5413	     first_instruction code.  */
5414	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5415
5416	  new_flags.is_write = 1;
5417	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5418	  return 1;
5419
5420	case UNSPECV_SET_BSP:
5421	  need_barrier = 1;
5422          break;
5423
5424	case UNSPECV_BLOCKAGE:
5425	case UNSPECV_INSN_GROUP_BARRIER:
5426	case UNSPECV_BREAK:
5427	case UNSPECV_PSAC_ALL:
5428	case UNSPECV_PSAC_NORMAL:
5429	  return 0;
5430
5431	default:
5432	  abort ();
5433	}
5434      break;
5435
5436    case RETURN:
5437      new_flags.is_write = 0;
5438      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5439      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5440
5441      new_flags.is_write = 1;
5442      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5443      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5444      break;
5445
5446    default:
5447      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5448      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5449	switch (format_ptr[i])
5450	  {
5451	  case '0':	/* unused field */
5452	  case 'i':	/* integer */
5453	  case 'n':	/* note */
5454	  case 'w':	/* wide integer */
5455	  case 's':	/* pointer to string */
5456	  case 'S':	/* optional pointer to string */
5457	    break;
5458
5459	  case 'e':
5460	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5461	      need_barrier = 1;
5462	    break;
5463
5464	  case 'E':
5465	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5466	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5467		need_barrier = 1;
5468	    break;
5469
5470	  default:
5471	    abort ();
5472	  }
5473      break;
5474    }
5475  return need_barrier;
5476}
5477
5478/* Clear out the state for group_barrier_needed_p at the start of a
5479   sequence of insns.  */
5480
5481static void
5482init_insn_group_barriers (void)
5483{
5484  memset (rws_sum, 0, sizeof (rws_sum));
5485  first_instruction = 1;
5486}
5487
5488/* Given the current state, recorded by previous calls to this function,
5489   determine whether a group barrier (a stop bit) is necessary before INSN.
5490   Return nonzero if so.  */
5491
5492static int
5493group_barrier_needed_p (rtx insn)
5494{
5495  rtx pat;
5496  int need_barrier = 0;
5497  struct reg_flags flags;
5498
5499  memset (&flags, 0, sizeof (flags));
5500  switch (GET_CODE (insn))
5501    {
5502    case NOTE:
5503      break;
5504
5505    case BARRIER:
5506      /* A barrier doesn't imply an instruction group boundary.  */
5507      break;
5508
5509    case CODE_LABEL:
5510      memset (rws_insn, 0, sizeof (rws_insn));
5511      return 1;
5512
5513    case CALL_INSN:
5514      flags.is_branch = 1;
5515      flags.is_sibcall = SIBLING_CALL_P (insn);
5516      memset (rws_insn, 0, sizeof (rws_insn));
5517
5518      /* Don't bundle a call following another call.  */
5519      if ((pat = prev_active_insn (insn))
5520	  && GET_CODE (pat) == CALL_INSN)
5521	{
5522	  need_barrier = 1;
5523	  break;
5524	}
5525
5526      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5527      break;
5528
5529    case JUMP_INSN:
5530      flags.is_branch = 1;
5531
5532      /* Don't bundle a jump following a call.  */
5533      if ((pat = prev_active_insn (insn))
5534	  && GET_CODE (pat) == CALL_INSN)
5535	{
5536	  need_barrier = 1;
5537	  break;
5538	}
5539      /* FALLTHRU */
5540
5541    case INSN:
5542      if (GET_CODE (PATTERN (insn)) == USE
5543	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5544	/* Don't care about USE and CLOBBER "insns"---those are used to
5545	   indicate to the optimizer that it shouldn't get rid of
5546	   certain operations.  */
5547	break;
5548
5549      pat = PATTERN (insn);
5550
5551      /* Ug.  Hack hacks hacked elsewhere.  */
5552      switch (recog_memoized (insn))
5553	{
5554	  /* We play dependency tricks with the epilogue in order
5555	     to get proper schedules.  Undo this for dv analysis.  */
5556	case CODE_FOR_epilogue_deallocate_stack:
5557	case CODE_FOR_prologue_allocate_stack:
5558	  pat = XVECEXP (pat, 0, 0);
5559	  break;
5560
5561	  /* The pattern we use for br.cloop confuses the code above.
5562	     The second element of the vector is representative.  */
5563	case CODE_FOR_doloop_end_internal:
5564	  pat = XVECEXP (pat, 0, 1);
5565	  break;
5566
5567	  /* Doesn't generate code.  */
5568	case CODE_FOR_pred_rel_mutex:
5569	case CODE_FOR_prologue_use:
5570	  return 0;
5571
5572	default:
5573	  break;
5574	}
5575
5576      memset (rws_insn, 0, sizeof (rws_insn));
5577      need_barrier = rtx_needs_barrier (pat, flags, 0);
5578
5579      /* Check to see if the previous instruction was a volatile
5580	 asm.  */
5581      if (! need_barrier)
5582	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5583      break;
5584
5585    default:
5586      abort ();
5587    }
5588
5589  if (first_instruction && INSN_P (insn)
5590      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5591      && GET_CODE (PATTERN (insn)) != USE
5592      && GET_CODE (PATTERN (insn)) != CLOBBER)
5593    {
5594      need_barrier = 0;
5595      first_instruction = 0;
5596    }
5597
5598  return need_barrier;
5599}
5600
5601/* Like group_barrier_needed_p, but do not clobber the current state.  */
5602
5603static int
5604safe_group_barrier_needed_p (rtx insn)
5605{
5606  struct reg_write_state rws_saved[NUM_REGS];
5607  int saved_first_instruction;
5608  int t;
5609
5610  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5611  saved_first_instruction = first_instruction;
5612
5613  t = group_barrier_needed_p (insn);
5614
5615  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5616  first_instruction = saved_first_instruction;
5617
5618  return t;
5619}
5620
5621/* Scan the current function and insert stop bits as necessary to
5622   eliminate dependencies.  This function assumes that a final
5623   instruction scheduling pass has been run which has already
5624   inserted most of the necessary stop bits.  This function only
5625   inserts new ones at basic block boundaries, since these are
5626   invisible to the scheduler.  */
5627
5628static void
5629emit_insn_group_barriers (FILE *dump)
5630{
5631  rtx insn;
5632  rtx last_label = 0;
5633  int insns_since_last_label = 0;
5634
5635  init_insn_group_barriers ();
5636
5637  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5638    {
5639      if (GET_CODE (insn) == CODE_LABEL)
5640	{
5641	  if (insns_since_last_label)
5642	    last_label = insn;
5643	  insns_since_last_label = 0;
5644	}
5645      else if (GET_CODE (insn) == NOTE
5646	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5647	{
5648	  if (insns_since_last_label)
5649	    last_label = insn;
5650	  insns_since_last_label = 0;
5651	}
5652      else if (GET_CODE (insn) == INSN
5653	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5654	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5655	{
5656	  init_insn_group_barriers ();
5657	  last_label = 0;
5658	}
5659      else if (INSN_P (insn))
5660	{
5661	  insns_since_last_label = 1;
5662
5663	  if (group_barrier_needed_p (insn))
5664	    {
5665	      if (last_label)
5666		{
5667		  if (dump)
5668		    fprintf (dump, "Emitting stop before label %d\n",
5669			     INSN_UID (last_label));
5670		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5671		  insn = last_label;
5672
5673		  init_insn_group_barriers ();
5674		  last_label = 0;
5675		}
5676	    }
5677	}
5678    }
5679}
5680
5681/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5682   This function has to emit all necessary group barriers.  */
5683
5684static void
5685emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5686{
5687  rtx insn;
5688
5689  init_insn_group_barriers ();
5690
5691  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5692    {
5693      if (GET_CODE (insn) == BARRIER)
5694	{
5695	  rtx last = prev_active_insn (insn);
5696
5697	  if (! last)
5698	    continue;
5699	  if (GET_CODE (last) == JUMP_INSN
5700	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5701	    last = prev_active_insn (last);
5702	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5703	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5704
5705	  init_insn_group_barriers ();
5706	}
5707      else if (INSN_P (insn))
5708	{
5709	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5710	    init_insn_group_barriers ();
5711	  else if (group_barrier_needed_p (insn))
5712	    {
5713	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5714	      init_insn_group_barriers ();
5715	      group_barrier_needed_p (insn);
5716	    }
5717	}
5718    }
5719}
5720
5721
5722static int errata_find_address_regs (rtx *, void *);
5723static void errata_emit_nops (rtx);
5724static void fixup_errata (void);
5725
5726/* This structure is used to track some details about the previous insns
5727   groups so we can determine if it may be necessary to insert NOPs to
5728   workaround hardware errata.  */
5729static struct group
5730{
5731  HARD_REG_SET p_reg_set;
5732  HARD_REG_SET gr_reg_conditionally_set;
5733} last_group[2];
5734
5735/* Index into the last_group array.  */
5736static int group_idx;
5737
5738/* Called through for_each_rtx; determines if a hard register that was
5739   conditionally set in the previous group is used as an address register.
5740   It ensures that for_each_rtx returns 1 in that case.  */
5741static int
5742errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5743{
5744  rtx x = *xp;
5745  if (GET_CODE (x) != MEM)
5746    return 0;
5747  x = XEXP (x, 0);
5748  if (GET_CODE (x) == POST_MODIFY)
5749    x = XEXP (x, 0);
5750  if (GET_CODE (x) == REG)
5751    {
5752      struct group *prev_group = last_group + (group_idx ^ 1);
5753      if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5754			     REGNO (x)))
5755	return 1;
5756      return -1;
5757    }
5758  return 0;
5759}
5760
5761/* Called for each insn; this function keeps track of the state in
5762   last_group and emits additional NOPs if necessary to work around
5763   an Itanium A/B step erratum.  */
5764static void
5765errata_emit_nops (rtx insn)
5766{
5767  struct group *this_group = last_group + group_idx;
5768  struct group *prev_group = last_group + (group_idx ^ 1);
5769  rtx pat = PATTERN (insn);
5770  rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5771  rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5772  enum attr_type type;
5773  rtx set = real_pat;
5774
5775  if (GET_CODE (real_pat) == USE
5776      || GET_CODE (real_pat) == CLOBBER
5777      || GET_CODE (real_pat) == ASM_INPUT
5778      || GET_CODE (real_pat) == ADDR_VEC
5779      || GET_CODE (real_pat) == ADDR_DIFF_VEC
5780      || asm_noperands (PATTERN (insn)) >= 0)
5781    return;
5782
5783  /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5784     parts of it.  */
5785
5786  if (GET_CODE (set) == PARALLEL)
5787    {
5788      int i;
5789      set = XVECEXP (real_pat, 0, 0);
5790      for (i = 1; i < XVECLEN (real_pat, 0); i++)
5791	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5792	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5793	  {
5794	    set = 0;
5795	    break;
5796	  }
5797    }
5798
5799  if (set && GET_CODE (set) != SET)
5800    set = 0;
5801
5802  type  = get_attr_type (insn);
5803
5804  if (type == TYPE_F
5805      && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5806    SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5807
5808  if ((type == TYPE_M || type == TYPE_A) && cond && set
5809      && REG_P (SET_DEST (set))
5810      && GET_CODE (SET_SRC (set)) != PLUS
5811      && GET_CODE (SET_SRC (set)) != MINUS
5812      && (GET_CODE (SET_SRC (set)) != ASHIFT
5813	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5814      && (GET_CODE (SET_SRC (set)) != MEM
5815	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5816      && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5817    {
5818      if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5819	  || ! REG_P (XEXP (cond, 0)))
5820	abort ();
5821
5822      if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5823	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5824    }
5825  if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5826    {
5827      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5828      emit_insn_before (gen_nop (), insn);
5829      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5830      group_idx = 0;
5831      memset (last_group, 0, sizeof last_group);
5832    }
5833}
5834
5835/* Emit extra nops if they are required to work around hardware errata.  */
5836
5837static void
5838fixup_errata (void)
5839{
5840  rtx insn;
5841
5842  if (! TARGET_B_STEP)
5843    return;
5844
5845  group_idx = 0;
5846  memset (last_group, 0, sizeof last_group);
5847
5848  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5849    {
5850      if (!INSN_P (insn))
5851	continue;
5852
5853      if (ia64_safe_type (insn) == TYPE_S)
5854	{
5855	  group_idx ^= 1;
5856	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5857	}
5858      else
5859	errata_emit_nops (insn);
5860    }
5861}
5862
5863
5864/* Instruction scheduling support.  */
5865
5866#define NR_BUNDLES 10
5867
5868/* A list of names of all available bundles.  */
5869
5870static const char *bundle_name [NR_BUNDLES] =
5871{
5872  ".mii",
5873  ".mmi",
5874  ".mfi",
5875  ".mmf",
5876#if NR_BUNDLES == 10
5877  ".bbb",
5878  ".mbb",
5879#endif
5880  ".mib",
5881  ".mmb",
5882  ".mfb",
5883  ".mlx"
5884};
5885
5886/* Nonzero if we should insert stop bits into the schedule.  */
5887
5888int ia64_final_schedule = 0;
5889
5890/* Codes of the corresponding quieryied units: */
5891
5892static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5893static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5894
5895static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5896static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5897
5898static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5899
5900/* The following variable value is an insn group barrier.  */
5901
5902static rtx dfa_stop_insn;
5903
5904/* The following variable value is the last issued insn.  */
5905
5906static rtx last_scheduled_insn;
5907
5908/* The following variable value is size of the DFA state.  */
5909
5910static size_t dfa_state_size;
5911
5912/* The following variable value is pointer to a DFA state used as
5913   temporary variable.  */
5914
5915static state_t temp_dfa_state = NULL;
5916
5917/* The following variable value is DFA state after issuing the last
5918   insn.  */
5919
5920static state_t prev_cycle_state = NULL;
5921
5922/* The following array element values are TRUE if the corresponding
5923   insn requires to add stop bits before it.  */
5924
5925static char *stops_p;
5926
5927/* The following variable is used to set up the mentioned above array.  */
5928
5929static int stop_before_p = 0;
5930
5931/* The following variable value is length of the arrays `clocks' and
5932   `add_cycles'. */
5933
5934static int clocks_length;
5935
5936/* The following array element values are cycles on which the
5937   corresponding insn will be issued.  The array is used only for
5938   Itanium1.  */
5939
5940static int *clocks;
5941
5942/* The following array element values are numbers of cycles should be
5943   added to improve insn scheduling for MM_insns for Itanium1.  */
5944
5945static int *add_cycles;
5946
5947static rtx ia64_single_set (rtx);
5948static void ia64_emit_insn_before (rtx, rtx);
5949
5950/* Map a bundle number to its pseudo-op.  */
5951
5952const char *
5953get_bundle_name (int b)
5954{
5955  return bundle_name[b];
5956}
5957
5958
5959/* Return the maximum number of instructions a cpu can issue.  */
5960
5961static int
5962ia64_issue_rate (void)
5963{
5964  return 6;
5965}
5966
5967/* Helper function - like single_set, but look inside COND_EXEC.  */
5968
5969static rtx
5970ia64_single_set (rtx insn)
5971{
5972  rtx x = PATTERN (insn), ret;
5973  if (GET_CODE (x) == COND_EXEC)
5974    x = COND_EXEC_CODE (x);
5975  if (GET_CODE (x) == SET)
5976    return x;
5977
5978  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
5979     Although they are not classical single set, the second set is there just
5980     to protect it from moving past FP-relative stack accesses.  */
5981  switch (recog_memoized (insn))
5982    {
5983    case CODE_FOR_prologue_allocate_stack:
5984    case CODE_FOR_epilogue_deallocate_stack:
5985      ret = XVECEXP (x, 0, 0);
5986      break;
5987
5988    default:
5989      ret = single_set_2 (insn, x);
5990      break;
5991    }
5992
5993  return ret;
5994}
5995
5996/* Adjust the cost of a scheduling dependency.  Return the new cost of
5997   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
5998
5999static int
6000ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6001{
6002  enum attr_itanium_class dep_class;
6003  enum attr_itanium_class insn_class;
6004
6005  if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6006    return cost;
6007
6008  insn_class = ia64_safe_itanium_class (insn);
6009  dep_class = ia64_safe_itanium_class (dep_insn);
6010  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6011      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6012    return 0;
6013
6014  return cost;
6015}
6016
6017/* Like emit_insn_before, but skip cycle_display notes.
6018   ??? When cycle display notes are implemented, update this.  */
6019
6020static void
6021ia64_emit_insn_before (rtx insn, rtx before)
6022{
6023  emit_insn_before (insn, before);
6024}
6025
6026/* The following function marks insns who produce addresses for load
6027   and store insns.  Such insns will be placed into M slots because it
6028   decrease latency time for Itanium1 (see function
6029   `ia64_produce_address_p' and the DFA descriptions).  */
6030
6031static void
6032ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6033{
6034  rtx insn, link, next, next_tail;
6035
6036  next_tail = NEXT_INSN (tail);
6037  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6038    if (INSN_P (insn))
6039      insn->call = 0;
6040  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6041    if (INSN_P (insn)
6042	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6043      {
6044	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6045	  {
6046	    next = XEXP (link, 0);
6047	    if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6048		 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6049		&& ia64_st_address_bypass_p (insn, next))
6050	      break;
6051	    else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6052		      || ia64_safe_itanium_class (next)
6053		      == ITANIUM_CLASS_FLD)
6054		     && ia64_ld_address_bypass_p (insn, next))
6055	      break;
6056	  }
6057	insn->call = link != 0;
6058      }
6059}
6060
6061/* We're beginning a new block.  Initialize data structures as necessary.  */
6062
6063static void
6064ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6065		 int sched_verbose ATTRIBUTE_UNUSED,
6066		 int max_ready ATTRIBUTE_UNUSED)
6067{
6068#ifdef ENABLE_CHECKING
6069  rtx insn;
6070
6071  if (reload_completed)
6072    for (insn = NEXT_INSN (current_sched_info->prev_head);
6073	 insn != current_sched_info->next_tail;
6074	 insn = NEXT_INSN (insn))
6075      if (SCHED_GROUP_P (insn))
6076	abort ();
6077#endif
6078  last_scheduled_insn = NULL_RTX;
6079  init_insn_group_barriers ();
6080}
6081
6082/* We are about to being issuing insns for this clock cycle.
6083   Override the default sort algorithm to better slot instructions.  */
6084
6085static int
6086ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6087			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6088			int reorder_type)
6089{
6090  int n_asms;
6091  int n_ready = *pn_ready;
6092  rtx *e_ready = ready + n_ready;
6093  rtx *insnp;
6094
6095  if (sched_verbose)
6096    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6097
6098  if (reorder_type == 0)
6099    {
6100      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6101      n_asms = 0;
6102      for (insnp = ready; insnp < e_ready; insnp++)
6103	if (insnp < e_ready)
6104	  {
6105	    rtx insn = *insnp;
6106	    enum attr_type t = ia64_safe_type (insn);
6107	    if (t == TYPE_UNKNOWN)
6108	      {
6109		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6110		    || asm_noperands (PATTERN (insn)) >= 0)
6111		  {
6112		    rtx lowest = ready[n_asms];
6113		    ready[n_asms] = insn;
6114		    *insnp = lowest;
6115		    n_asms++;
6116		  }
6117		else
6118		  {
6119		    rtx highest = ready[n_ready - 1];
6120		    ready[n_ready - 1] = insn;
6121		    *insnp = highest;
6122		    return 1;
6123		  }
6124	      }
6125	  }
6126
6127      if (n_asms < n_ready)
6128	{
6129	  /* Some normal insns to process.  Skip the asms.  */
6130	  ready += n_asms;
6131	  n_ready -= n_asms;
6132	}
6133      else if (n_ready > 0)
6134	return 1;
6135    }
6136
6137  if (ia64_final_schedule)
6138    {
6139      int deleted = 0;
6140      int nr_need_stop = 0;
6141
6142      for (insnp = ready; insnp < e_ready; insnp++)
6143	if (safe_group_barrier_needed_p (*insnp))
6144	  nr_need_stop++;
6145
6146      if (reorder_type == 1 && n_ready == nr_need_stop)
6147	return 0;
6148      if (reorder_type == 0)
6149	return 1;
6150      insnp = e_ready;
6151      /* Move down everything that needs a stop bit, preserving
6152	 relative order.  */
6153      while (insnp-- > ready + deleted)
6154	while (insnp >= ready + deleted)
6155	  {
6156	    rtx insn = *insnp;
6157	    if (! safe_group_barrier_needed_p (insn))
6158	      break;
6159	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6160	    *ready = insn;
6161	    deleted++;
6162	  }
6163      n_ready -= deleted;
6164      ready += deleted;
6165    }
6166
6167  return 1;
6168}
6169
6170/* We are about to being issuing insns for this clock cycle.  Override
6171   the default sort algorithm to better slot instructions.  */
6172
6173static int
6174ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6175		    int clock_var)
6176{
6177  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6178				 pn_ready, clock_var, 0);
6179}
6180
6181/* Like ia64_sched_reorder, but called after issuing each insn.
6182   Override the default sort algorithm to better slot instructions.  */
6183
6184static int
6185ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6186		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6187		     int *pn_ready, int clock_var)
6188{
6189  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6190    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6191  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6192				 clock_var, 1);
6193}
6194
6195/* We are about to issue INSN.  Return the number of insns left on the
6196   ready queue that can be issued this cycle.  */
6197
6198static int
6199ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6200		     int sched_verbose ATTRIBUTE_UNUSED,
6201		     rtx insn ATTRIBUTE_UNUSED,
6202		     int can_issue_more ATTRIBUTE_UNUSED)
6203{
6204  last_scheduled_insn = insn;
6205  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6206  if (reload_completed)
6207    {
6208      if (group_barrier_needed_p (insn))
6209	abort ();
6210      if (GET_CODE (insn) == CALL_INSN)
6211	init_insn_group_barriers ();
6212      stops_p [INSN_UID (insn)] = stop_before_p;
6213      stop_before_p = 0;
6214    }
6215  return 1;
6216}
6217
6218/* We are choosing insn from the ready queue.  Return nonzero if INSN
6219   can be chosen.  */
6220
6221static int
6222ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6223{
6224  if (insn == NULL_RTX || !INSN_P (insn))
6225    abort ();
6226  return (!reload_completed
6227	  || !safe_group_barrier_needed_p (insn));
6228}
6229
6230/* The following variable value is pseudo-insn used by the DFA insn
6231   scheduler to change the DFA state when the simulated clock is
6232   increased.  */
6233
6234static rtx dfa_pre_cycle_insn;
6235
6236/* We are about to being issuing INSN.  Return nonzero if we can not
6237   issue it on given cycle CLOCK and return zero if we should not sort
6238   the ready queue on the next clock start.  */
6239
6240static int
6241ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6242		    int clock, int *sort_p)
6243{
6244  int setup_clocks_p = FALSE;
6245
6246  if (insn == NULL_RTX || !INSN_P (insn))
6247    abort ();
6248  if ((reload_completed && safe_group_barrier_needed_p (insn))
6249      || (last_scheduled_insn
6250	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6251	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6252	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6253    {
6254      init_insn_group_barriers ();
6255      if (verbose && dump)
6256	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6257		 last_clock == clock ? " + cycle advance" : "");
6258      stop_before_p = 1;
6259      if (last_clock == clock)
6260	{
6261	  state_transition (curr_state, dfa_stop_insn);
6262	  if (TARGET_EARLY_STOP_BITS)
6263	    *sort_p = (last_scheduled_insn == NULL_RTX
6264		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6265	  else
6266	    *sort_p = 0;
6267	  return 1;
6268	}
6269      else if (reload_completed)
6270	setup_clocks_p = TRUE;
6271      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6272	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6273	state_reset (curr_state);
6274      else
6275	{
6276	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6277	  state_transition (curr_state, dfa_stop_insn);
6278	  state_transition (curr_state, dfa_pre_cycle_insn);
6279	  state_transition (curr_state, NULL);
6280	}
6281    }
6282  else if (reload_completed)
6283    setup_clocks_p = TRUE;
6284  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6285      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6286      && asm_noperands (PATTERN (insn)) < 0)
6287    {
6288      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6289
6290      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6291	{
6292	  rtx link;
6293	  int d = -1;
6294
6295	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6296	    if (REG_NOTE_KIND (link) == 0)
6297	      {
6298		enum attr_itanium_class dep_class;
6299		rtx dep_insn = XEXP (link, 0);
6300
6301		dep_class = ia64_safe_itanium_class (dep_insn);
6302		if ((dep_class == ITANIUM_CLASS_MMMUL
6303		     || dep_class == ITANIUM_CLASS_MMSHF)
6304		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6305		    && (d < 0
6306			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6307		  d = last_clock - clocks [INSN_UID (dep_insn)];
6308	      }
6309	  if (d >= 0)
6310	    add_cycles [INSN_UID (insn)] = 3 - d;
6311	}
6312    }
6313  return 0;
6314}
6315
6316
6317
6318/* The following page contains abstract data `bundle states' which are
6319   used for bundling insns (inserting nops and template generation).  */
6320
6321/* The following describes state of insn bundling.  */
6322
6323struct bundle_state
6324{
6325  /* Unique bundle state number to identify them in the debugging
6326     output  */
6327  int unique_num;
6328  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
6329  /* number nops before and after the insn  */
6330  short before_nops_num, after_nops_num;
6331  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6332                   insn */
6333  int cost;     /* cost of the state in cycles */
6334  int accumulated_insns_num; /* number of all previous insns including
6335				nops.  L is considered as 2 insns */
6336  int branch_deviation; /* deviation of previous branches from 3rd slots  */
6337  struct bundle_state *next;  /* next state with the same insn_num  */
6338  struct bundle_state *originator; /* originator (previous insn state)  */
6339  /* All bundle states are in the following chain.  */
6340  struct bundle_state *allocated_states_chain;
6341  /* The DFA State after issuing the insn and the nops.  */
6342  state_t dfa_state;
6343};
6344
6345/* The following is map insn number to the corresponding bundle state.  */
6346
6347static struct bundle_state **index_to_bundle_states;
6348
6349/* The unique number of next bundle state.  */
6350
6351static int bundle_states_num;
6352
6353/* All allocated bundle states are in the following chain.  */
6354
6355static struct bundle_state *allocated_bundle_states_chain;
6356
6357/* All allocated but not used bundle states are in the following
6358   chain.  */
6359
6360static struct bundle_state *free_bundle_state_chain;
6361
6362
6363/* The following function returns a free bundle state.  */
6364
6365static struct bundle_state *
6366get_free_bundle_state (void)
6367{
6368  struct bundle_state *result;
6369
6370  if (free_bundle_state_chain != NULL)
6371    {
6372      result = free_bundle_state_chain;
6373      free_bundle_state_chain = result->next;
6374    }
6375  else
6376    {
6377      result = xmalloc (sizeof (struct bundle_state));
6378      result->dfa_state = xmalloc (dfa_state_size);
6379      result->allocated_states_chain = allocated_bundle_states_chain;
6380      allocated_bundle_states_chain = result;
6381    }
6382  result->unique_num = bundle_states_num++;
6383  return result;
6384
6385}
6386
6387/* The following function frees given bundle state.  */
6388
6389static void
6390free_bundle_state (struct bundle_state *state)
6391{
6392  state->next = free_bundle_state_chain;
6393  free_bundle_state_chain = state;
6394}
6395
6396/* Start work with abstract data `bundle states'.  */
6397
6398static void
6399initiate_bundle_states (void)
6400{
6401  bundle_states_num = 0;
6402  free_bundle_state_chain = NULL;
6403  allocated_bundle_states_chain = NULL;
6404}
6405
6406/* Finish work with abstract data `bundle states'.  */
6407
6408static void
6409finish_bundle_states (void)
6410{
6411  struct bundle_state *curr_state, *next_state;
6412
6413  for (curr_state = allocated_bundle_states_chain;
6414       curr_state != NULL;
6415       curr_state = next_state)
6416    {
6417      next_state = curr_state->allocated_states_chain;
6418      free (curr_state->dfa_state);
6419      free (curr_state);
6420    }
6421}
6422
6423/* Hash table of the bundle states.  The key is dfa_state and insn_num
6424   of the bundle states.  */
6425
6426static htab_t bundle_state_table;
6427
6428/* The function returns hash of BUNDLE_STATE.  */
6429
6430static unsigned
6431bundle_state_hash (const void *bundle_state)
6432{
6433  const struct bundle_state *state = (struct bundle_state *) bundle_state;
6434  unsigned result, i;
6435
6436  for (result = i = 0; i < dfa_state_size; i++)
6437    result += (((unsigned char *) state->dfa_state) [i]
6438	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6439  return result + state->insn_num;
6440}
6441
6442/* The function returns nonzero if the bundle state keys are equal.  */
6443
6444static int
6445bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6446{
6447  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6448  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6449
6450  return (state1->insn_num == state2->insn_num
6451	  && memcmp (state1->dfa_state, state2->dfa_state,
6452		     dfa_state_size) == 0);
6453}
6454
6455/* The function inserts the BUNDLE_STATE into the hash table.  The
6456   function returns nonzero if the bundle has been inserted into the
6457   table.  The table contains the best bundle state with given key.  */
6458
6459static int
6460insert_bundle_state (struct bundle_state *bundle_state)
6461{
6462  void **entry_ptr;
6463
6464  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6465  if (*entry_ptr == NULL)
6466    {
6467      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6468      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6469      *entry_ptr = (void *) bundle_state;
6470      return TRUE;
6471    }
6472  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6473	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6474	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6475		   > bundle_state->accumulated_insns_num
6476		   || (((struct bundle_state *)
6477			*entry_ptr)->accumulated_insns_num
6478		       == bundle_state->accumulated_insns_num
6479		       && ((struct bundle_state *)
6480			   *entry_ptr)->branch_deviation
6481		       > bundle_state->branch_deviation))))
6482
6483    {
6484      struct bundle_state temp;
6485
6486      temp = *(struct bundle_state *) *entry_ptr;
6487      *(struct bundle_state *) *entry_ptr = *bundle_state;
6488      ((struct bundle_state *) *entry_ptr)->next = temp.next;
6489      *bundle_state = temp;
6490    }
6491  return FALSE;
6492}
6493
6494/* Start work with the hash table.  */
6495
6496static void
6497initiate_bundle_state_table (void)
6498{
6499  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6500				    (htab_del) 0);
6501}
6502
6503/* Finish work with the hash table.  */
6504
6505static void
6506finish_bundle_state_table (void)
6507{
6508  htab_delete (bundle_state_table);
6509}
6510
6511
6512
6513/* The following variable is a insn `nop' used to check bundle states
6514   with different number of inserted nops.  */
6515
6516static rtx ia64_nop;
6517
6518/* The following function tries to issue NOPS_NUM nops for the current
6519   state without advancing processor cycle.  If it failed, the
6520   function returns FALSE and frees the current state.  */
6521
6522static int
6523try_issue_nops (struct bundle_state *curr_state, int nops_num)
6524{
6525  int i;
6526
6527  for (i = 0; i < nops_num; i++)
6528    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6529      {
6530	free_bundle_state (curr_state);
6531	return FALSE;
6532      }
6533  return TRUE;
6534}
6535
6536/* The following function tries to issue INSN for the current
6537   state without advancing processor cycle.  If it failed, the
6538   function returns FALSE and frees the current state.  */
6539
6540static int
6541try_issue_insn (struct bundle_state *curr_state, rtx insn)
6542{
6543  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6544    {
6545      free_bundle_state (curr_state);
6546      return FALSE;
6547    }
6548  return TRUE;
6549}
6550
6551/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6552   starting with ORIGINATOR without advancing processor cycle.  If
6553   TRY_BUNDLE_END_P is TRUE, the function also/only (if
6554   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6555   If it was successful, the function creates new bundle state and
6556   insert into the hash table and into `index_to_bundle_states'.  */
6557
6558static void
6559issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6560		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6561{
6562  struct bundle_state *curr_state;
6563
6564  curr_state = get_free_bundle_state ();
6565  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6566  curr_state->insn = insn;
6567  curr_state->insn_num = originator->insn_num + 1;
6568  curr_state->cost = originator->cost;
6569  curr_state->originator = originator;
6570  curr_state->before_nops_num = before_nops_num;
6571  curr_state->after_nops_num = 0;
6572  curr_state->accumulated_insns_num
6573    = originator->accumulated_insns_num + before_nops_num;
6574  curr_state->branch_deviation = originator->branch_deviation;
6575  if (insn == NULL_RTX)
6576    abort ();
6577  else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6578    {
6579      if (GET_MODE (insn) == TImode)
6580	abort ();
6581      if (!try_issue_nops (curr_state, before_nops_num))
6582	return;
6583      if (!try_issue_insn (curr_state, insn))
6584	return;
6585      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6586      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6587	  && curr_state->accumulated_insns_num % 3 != 0)
6588	{
6589	  free_bundle_state (curr_state);
6590	  return;
6591	}
6592    }
6593  else if (GET_MODE (insn) != TImode)
6594    {
6595      if (!try_issue_nops (curr_state, before_nops_num))
6596	return;
6597      if (!try_issue_insn (curr_state, insn))
6598	return;
6599      curr_state->accumulated_insns_num++;
6600      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6601	  || asm_noperands (PATTERN (insn)) >= 0)
6602	abort ();
6603      if (ia64_safe_type (insn) == TYPE_L)
6604	curr_state->accumulated_insns_num++;
6605    }
6606  else
6607    {
6608      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6609      state_transition (curr_state->dfa_state, NULL);
6610      curr_state->cost++;
6611      if (!try_issue_nops (curr_state, before_nops_num))
6612	return;
6613      if (!try_issue_insn (curr_state, insn))
6614	return;
6615      curr_state->accumulated_insns_num++;
6616      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6617	  || asm_noperands (PATTERN (insn)) >= 0)
6618	{
6619	  /* Finish bundle containing asm insn.  */
6620	  curr_state->after_nops_num
6621	    = 3 - curr_state->accumulated_insns_num % 3;
6622	  curr_state->accumulated_insns_num
6623	    += 3 - curr_state->accumulated_insns_num % 3;
6624	}
6625      else if (ia64_safe_type (insn) == TYPE_L)
6626	curr_state->accumulated_insns_num++;
6627    }
6628  if (ia64_safe_type (insn) == TYPE_B)
6629    curr_state->branch_deviation
6630      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6631  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6632    {
6633      if (!only_bundle_end_p && insert_bundle_state (curr_state))
6634	{
6635	  state_t dfa_state;
6636	  struct bundle_state *curr_state1;
6637	  struct bundle_state *allocated_states_chain;
6638
6639	  curr_state1 = get_free_bundle_state ();
6640	  dfa_state = curr_state1->dfa_state;
6641	  allocated_states_chain = curr_state1->allocated_states_chain;
6642	  *curr_state1 = *curr_state;
6643	  curr_state1->dfa_state = dfa_state;
6644	  curr_state1->allocated_states_chain = allocated_states_chain;
6645	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6646		  dfa_state_size);
6647	  curr_state = curr_state1;
6648	}
6649      if (!try_issue_nops (curr_state,
6650			   3 - curr_state->accumulated_insns_num % 3))
6651	return;
6652      curr_state->after_nops_num
6653	= 3 - curr_state->accumulated_insns_num % 3;
6654      curr_state->accumulated_insns_num
6655	+= 3 - curr_state->accumulated_insns_num % 3;
6656    }
6657  if (!insert_bundle_state (curr_state))
6658    free_bundle_state (curr_state);
6659  return;
6660}
6661
6662/* The following function returns position in the two window bundle
6663   for given STATE.  */
6664
6665static int
6666get_max_pos (state_t state)
6667{
6668  if (cpu_unit_reservation_p (state, pos_6))
6669    return 6;
6670  else if (cpu_unit_reservation_p (state, pos_5))
6671    return 5;
6672  else if (cpu_unit_reservation_p (state, pos_4))
6673    return 4;
6674  else if (cpu_unit_reservation_p (state, pos_3))
6675    return 3;
6676  else if (cpu_unit_reservation_p (state, pos_2))
6677    return 2;
6678  else if (cpu_unit_reservation_p (state, pos_1))
6679    return 1;
6680  else
6681    return 0;
6682}
6683
6684/* The function returns code of a possible template for given position
6685   and state.  The function should be called only with 2 values of
6686   position equal to 3 or 6.  */
6687
6688static int
6689get_template (state_t state, int pos)
6690{
6691  switch (pos)
6692    {
6693    case 3:
6694      if (cpu_unit_reservation_p (state, _0mii_))
6695	return 0;
6696      else if (cpu_unit_reservation_p (state, _0mmi_))
6697	return 1;
6698      else if (cpu_unit_reservation_p (state, _0mfi_))
6699	return 2;
6700      else if (cpu_unit_reservation_p (state, _0mmf_))
6701	return 3;
6702      else if (cpu_unit_reservation_p (state, _0bbb_))
6703	return 4;
6704      else if (cpu_unit_reservation_p (state, _0mbb_))
6705	return 5;
6706      else if (cpu_unit_reservation_p (state, _0mib_))
6707	return 6;
6708      else if (cpu_unit_reservation_p (state, _0mmb_))
6709	return 7;
6710      else if (cpu_unit_reservation_p (state, _0mfb_))
6711	return 8;
6712      else if (cpu_unit_reservation_p (state, _0mlx_))
6713	return 9;
6714      else
6715	abort ();
6716    case 6:
6717      if (cpu_unit_reservation_p (state, _1mii_))
6718	return 0;
6719      else if (cpu_unit_reservation_p (state, _1mmi_))
6720	return 1;
6721      else if (cpu_unit_reservation_p (state, _1mfi_))
6722	return 2;
6723      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6724	return 3;
6725      else if (cpu_unit_reservation_p (state, _1bbb_))
6726	return 4;
6727      else if (cpu_unit_reservation_p (state, _1mbb_))
6728	return 5;
6729      else if (cpu_unit_reservation_p (state, _1mib_))
6730	return 6;
6731      else if (cpu_unit_reservation_p (state, _1mmb_))
6732	return 7;
6733      else if (cpu_unit_reservation_p (state, _1mfb_))
6734	return 8;
6735      else if (cpu_unit_reservation_p (state, _1mlx_))
6736	return 9;
6737      else
6738	abort ();
6739    default:
6740      abort ();
6741    }
6742}
6743
6744/* The following function returns an insn important for insn bundling
6745   followed by INSN and before TAIL.  */
6746
6747static rtx
6748get_next_important_insn (rtx insn, rtx tail)
6749{
6750  for (; insn && insn != tail; insn = NEXT_INSN (insn))
6751    if (INSN_P (insn)
6752	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6753	&& GET_CODE (PATTERN (insn)) != USE
6754	&& GET_CODE (PATTERN (insn)) != CLOBBER)
6755      return insn;
6756  return NULL_RTX;
6757}
6758
6759/* The following function does insn bundling.  Bundling means
6760   inserting templates and nop insns to fit insn groups into permitted
6761   templates.  Instruction scheduling uses NDFA (non-deterministic
6762   finite automata) encoding informations about the templates and the
6763   inserted nops.  Nondeterminism of the automata permits follows
6764   all possible insn sequences very fast.
6765
6766   Unfortunately it is not possible to get information about inserting
6767   nop insns and used templates from the automata states.  The
6768   automata only says that we can issue an insn possibly inserting
6769   some nops before it and using some template.  Therefore insn
6770   bundling in this function is implemented by using DFA
6771   (deterministic finite automata).  We follows all possible insn
6772   sequences by inserting 0-2 nops (that is what the NDFA describe for
6773   insn scheduling) before/after each insn being bundled.  We know the
6774   start of simulated processor cycle from insn scheduling (insn
6775   starting a new cycle has TImode).
6776
6777   Simple implementation of insn bundling would create enormous
6778   number of possible insn sequences satisfying information about new
6779   cycle ticks taken from the insn scheduling.  To make the algorithm
6780   practical we use dynamic programming.  Each decision (about
6781   inserting nops and implicitly about previous decisions) is described
6782   by structure bundle_state (see above).  If we generate the same
6783   bundle state (key is automaton state after issuing the insns and
6784   nops for it), we reuse already generated one.  As consequence we
6785   reject some decisions which can not improve the solution and
6786   reduce memory for the algorithm.
6787
6788   When we reach the end of EBB (extended basic block), we choose the
6789   best sequence and then, moving back in EBB, insert templates for
6790   the best alternative.  The templates are taken from querying
6791   automaton state for each insn in chosen bundle states.
6792
6793   So the algorithm makes two (forward and backward) passes through
6794   EBB.  There is an additional forward pass through EBB for Itanium1
6795   processor.  This pass inserts more nops to make dependency between
6796   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
6797
6798static void
6799bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6800{
6801  struct bundle_state *curr_state, *next_state, *best_state;
6802  rtx insn, next_insn;
6803  int insn_num;
6804  int i, bundle_end_p, only_bundle_end_p, asm_p;
6805  int pos = 0, max_pos, template0, template1;
6806  rtx b;
6807  rtx nop;
6808  enum attr_type type;
6809
6810  insn_num = 0;
6811  /* Count insns in the EBB.  */
6812  for (insn = NEXT_INSN (prev_head_insn);
6813       insn && insn != tail;
6814       insn = NEXT_INSN (insn))
6815    if (INSN_P (insn))
6816      insn_num++;
6817  if (insn_num == 0)
6818    return;
6819  bundling_p = 1;
6820  dfa_clean_insn_cache ();
6821  initiate_bundle_state_table ();
6822  index_to_bundle_states = xmalloc ((insn_num + 2)
6823				    * sizeof (struct bundle_state *));
6824  /* First (forward) pass -- generation of bundle states. */
6825  curr_state = get_free_bundle_state ();
6826  curr_state->insn = NULL;
6827  curr_state->before_nops_num = 0;
6828  curr_state->after_nops_num = 0;
6829  curr_state->insn_num = 0;
6830  curr_state->cost = 0;
6831  curr_state->accumulated_insns_num = 0;
6832  curr_state->branch_deviation = 0;
6833  curr_state->next = NULL;
6834  curr_state->originator = NULL;
6835  state_reset (curr_state->dfa_state);
6836  index_to_bundle_states [0] = curr_state;
6837  insn_num = 0;
6838  /* Shift cycle mark if it is put on insn which could be ignored.  */
6839  for (insn = NEXT_INSN (prev_head_insn);
6840       insn != tail;
6841       insn = NEXT_INSN (insn))
6842    if (INSN_P (insn)
6843	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6844	    || GET_CODE (PATTERN (insn)) == USE
6845	    || GET_CODE (PATTERN (insn)) == CLOBBER)
6846	&& GET_MODE (insn) == TImode)
6847      {
6848	PUT_MODE (insn, VOIDmode);
6849	for (next_insn = NEXT_INSN (insn);
6850	     next_insn != tail;
6851	     next_insn = NEXT_INSN (next_insn))
6852	  if (INSN_P (next_insn)
6853	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6854	      && GET_CODE (PATTERN (next_insn)) != USE
6855	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6856	    {
6857	      PUT_MODE (next_insn, TImode);
6858	      break;
6859	    }
6860      }
6861  /* Froward pass: generation of bundle states.  */
6862  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6863       insn != NULL_RTX;
6864       insn = next_insn)
6865    {
6866      if (!INSN_P (insn)
6867	  || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6868	  || GET_CODE (PATTERN (insn)) == USE
6869	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6870	abort ();
6871      type = ia64_safe_type (insn);
6872      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6873      insn_num++;
6874      index_to_bundle_states [insn_num] = NULL;
6875      for (curr_state = index_to_bundle_states [insn_num - 1];
6876	   curr_state != NULL;
6877	   curr_state = next_state)
6878	{
6879	  pos = curr_state->accumulated_insns_num % 3;
6880	  next_state = curr_state->next;
6881	  /* We must fill up the current bundle in order to start a
6882	     subsequent asm insn in a new bundle.  Asm insn is always
6883	     placed in a separate bundle.  */
6884	  only_bundle_end_p
6885	    = (next_insn != NULL_RTX
6886	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6887	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6888	  /* We may fill up the current bundle if it is the cycle end
6889	     without a group barrier.  */
6890	  bundle_end_p
6891	    = (only_bundle_end_p || next_insn == NULL_RTX
6892	       || (GET_MODE (next_insn) == TImode
6893		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6894	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6895	      || type == TYPE_S
6896	      /* We need to insert 2 nops for cases like M_MII.  To
6897		 guarantee issuing all insns on the same cycle for
6898		 Itanium 1, we need to issue 2 nops after the first M
6899		 insn (MnnMII where n is a nop insn).  */
6900	      || ((type == TYPE_M || type == TYPE_A)
6901		  && ia64_tune == PROCESSOR_ITANIUM
6902		  && !bundle_end_p && pos == 1))
6903	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6904				 only_bundle_end_p);
6905	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6906			       only_bundle_end_p);
6907	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6908			       only_bundle_end_p);
6909	}
6910      if (index_to_bundle_states [insn_num] == NULL)
6911	abort ();
6912      for (curr_state = index_to_bundle_states [insn_num];
6913	   curr_state != NULL;
6914	   curr_state = curr_state->next)
6915	if (verbose >= 2 && dump)
6916	  {
6917	    /* This structure is taken from generated code of the
6918	       pipeline hazard recognizer (see file insn-attrtab.c).
6919	       Please don't forget to change the structure if a new
6920	       automaton is added to .md file.  */
6921	    struct DFA_chip
6922	    {
6923	      unsigned short one_automaton_state;
6924	      unsigned short oneb_automaton_state;
6925	      unsigned short two_automaton_state;
6926	      unsigned short twob_automaton_state;
6927	    };
6928
6929	    fprintf
6930	      (dump,
6931	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6932	       curr_state->unique_num,
6933	       (curr_state->originator == NULL
6934		? -1 : curr_state->originator->unique_num),
6935	       curr_state->cost,
6936	       curr_state->before_nops_num, curr_state->after_nops_num,
6937	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
6938	       (ia64_tune == PROCESSOR_ITANIUM
6939		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6940		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6941	       INSN_UID (insn));
6942	  }
6943    }
6944  if (index_to_bundle_states [insn_num] == NULL)
6945    /* We should find a solution because the 2nd insn scheduling has
6946       found one.  */
6947    abort ();
6948  /* Find a state corresponding to the best insn sequence.  */
6949  best_state = NULL;
6950  for (curr_state = index_to_bundle_states [insn_num];
6951       curr_state != NULL;
6952       curr_state = curr_state->next)
6953    /* We are just looking at the states with fully filled up last
6954       bundle.  The first we prefer insn sequences with minimal cost
6955       then with minimal inserted nops and finally with branch insns
6956       placed in the 3rd slots.  */
6957    if (curr_state->accumulated_insns_num % 3 == 0
6958	&& (best_state == NULL || best_state->cost > curr_state->cost
6959	    || (best_state->cost == curr_state->cost
6960		&& (curr_state->accumulated_insns_num
6961		    < best_state->accumulated_insns_num
6962		    || (curr_state->accumulated_insns_num
6963			== best_state->accumulated_insns_num
6964			&& curr_state->branch_deviation
6965			< best_state->branch_deviation)))))
6966      best_state = curr_state;
6967  /* Second (backward) pass: adding nops and templates.  */
6968  insn_num = best_state->before_nops_num;
6969  template0 = template1 = -1;
6970  for (curr_state = best_state;
6971       curr_state->originator != NULL;
6972       curr_state = curr_state->originator)
6973    {
6974      insn = curr_state->insn;
6975      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
6976	       || asm_noperands (PATTERN (insn)) >= 0);
6977      insn_num++;
6978      if (verbose >= 2 && dump)
6979	{
6980	  struct DFA_chip
6981	  {
6982	    unsigned short one_automaton_state;
6983	    unsigned short oneb_automaton_state;
6984	    unsigned short two_automaton_state;
6985	    unsigned short twob_automaton_state;
6986	  };
6987
6988	  fprintf
6989	    (dump,
6990	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6991	     curr_state->unique_num,
6992	     (curr_state->originator == NULL
6993	      ? -1 : curr_state->originator->unique_num),
6994	     curr_state->cost,
6995	     curr_state->before_nops_num, curr_state->after_nops_num,
6996	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
6997	     (ia64_tune == PROCESSOR_ITANIUM
6998	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6999	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7000	     INSN_UID (insn));
7001	}
7002      /* Find the position in the current bundle window.  The window can
7003	 contain at most two bundles.  Two bundle window means that
7004	 the processor will make two bundle rotation.  */
7005      max_pos = get_max_pos (curr_state->dfa_state);
7006      if (max_pos == 6
7007	  /* The following (negative template number) means that the
7008	     processor did one bundle rotation.  */
7009	  || (max_pos == 3 && template0 < 0))
7010	{
7011	  /* We are at the end of the window -- find template(s) for
7012	     its bundle(s).  */
7013	  pos = max_pos;
7014	  if (max_pos == 3)
7015	    template0 = get_template (curr_state->dfa_state, 3);
7016	  else
7017	    {
7018	      template1 = get_template (curr_state->dfa_state, 3);
7019	      template0 = get_template (curr_state->dfa_state, 6);
7020	    }
7021	}
7022      if (max_pos > 3 && template1 < 0)
7023	/* It may happen when we have the stop inside a bundle.  */
7024	{
7025	  if (pos > 3)
7026	    abort ();
7027	  template1 = get_template (curr_state->dfa_state, 3);
7028	  pos += 3;
7029	}
7030      if (!asm_p)
7031	/* Emit nops after the current insn.  */
7032	for (i = 0; i < curr_state->after_nops_num; i++)
7033	  {
7034	    nop = gen_nop ();
7035	    emit_insn_after (nop, insn);
7036	    pos--;
7037	    if (pos < 0)
7038	      abort ();
7039	    if (pos % 3 == 0)
7040	      {
7041		/* We are at the start of a bundle: emit the template
7042		   (it should be defined).  */
7043		if (template0 < 0)
7044		  abort ();
7045		b = gen_bundle_selector (GEN_INT (template0));
7046		ia64_emit_insn_before (b, nop);
7047		/* If we have two bundle window, we make one bundle
7048		   rotation.  Otherwise template0 will be undefined
7049		   (negative value).  */
7050		template0 = template1;
7051		template1 = -1;
7052	      }
7053	  }
7054      /* Move the position backward in the window.  Group barrier has
7055	 no slot.  Asm insn takes all bundle.  */
7056      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7057	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7058	  && asm_noperands (PATTERN (insn)) < 0)
7059	pos--;
7060      /* Long insn takes 2 slots.  */
7061      if (ia64_safe_type (insn) == TYPE_L)
7062	pos--;
7063      if (pos < 0)
7064	abort ();
7065      if (pos % 3 == 0
7066	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7067	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7068	  && asm_noperands (PATTERN (insn)) < 0)
7069	{
7070	  /* The current insn is at the bundle start: emit the
7071	     template.  */
7072	  if (template0 < 0)
7073	    abort ();
7074	  b = gen_bundle_selector (GEN_INT (template0));
7075	  ia64_emit_insn_before (b, insn);
7076	  b = PREV_INSN (insn);
7077	  insn = b;
7078	  /* See comment above in analogous place for emiting nops
7079	     after the insn.  */
7080	  template0 = template1;
7081	  template1 = -1;
7082	}
7083      /* Emit nops after the current insn.  */
7084      for (i = 0; i < curr_state->before_nops_num; i++)
7085	{
7086	  nop = gen_nop ();
7087	  ia64_emit_insn_before (nop, insn);
7088	  nop = PREV_INSN (insn);
7089	  insn = nop;
7090	  pos--;
7091	  if (pos < 0)
7092	    abort ();
7093	  if (pos % 3 == 0)
7094	    {
7095	      /* See comment above in analogous place for emiting nops
7096		 after the insn.  */
7097	      if (template0 < 0)
7098		abort ();
7099	      b = gen_bundle_selector (GEN_INT (template0));
7100	      ia64_emit_insn_before (b, insn);
7101	      b = PREV_INSN (insn);
7102	      insn = b;
7103	      template0 = template1;
7104	      template1 = -1;
7105	    }
7106	}
7107    }
7108  if (ia64_tune == PROCESSOR_ITANIUM)
7109    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7110       Itanium1 has a strange design, if the distance between an insn
7111       and dependent MM-insn is less 4 then we have a 6 additional
7112       cycles stall.  So we make the distance equal to 4 cycles if it
7113       is less.  */
7114    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7115	 insn != NULL_RTX;
7116	 insn = next_insn)
7117      {
7118	if (!INSN_P (insn)
7119	    || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7120	    || GET_CODE (PATTERN (insn)) == USE
7121	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7122	  abort ();
7123	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7124	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7125	  /* We found a MM-insn which needs additional cycles.  */
7126	  {
7127	    rtx last;
7128	    int i, j, n;
7129	    int pred_stop_p;
7130
7131	    /* Now we are searching for a template of the bundle in
7132	       which the MM-insn is placed and the position of the
7133	       insn in the bundle (0, 1, 2).  Also we are searching
7134	       for that there is a stop before the insn.  */
7135	    last = prev_active_insn (insn);
7136	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7137	    if (pred_stop_p)
7138	      last = prev_active_insn (last);
7139	    n = 0;
7140	    for (;; last = prev_active_insn (last))
7141	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
7142		{
7143		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7144		  if (template0 == 9)
7145		    /* The insn is in MLX bundle.  Change the template
7146		       onto MFI because we will add nops before the
7147		       insn.  It simplifies subsequent code a lot.  */
7148		    PATTERN (last)
7149		      = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7150		  break;
7151		}
7152	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7153		       && (ia64_safe_itanium_class (last)
7154			   != ITANIUM_CLASS_IGNORE))
7155		n++;
7156	    /* Some check of correctness: the stop is not at the
7157	       bundle start, there are no more 3 insns in the bundle,
7158	       and the MM-insn is not at the start of bundle with
7159	       template MLX.  */
7160	    if ((pred_stop_p && n == 0) || n > 2
7161		|| (template0 == 9 && n != 0))
7162	      abort ();
7163	    /* Put nops after the insn in the bundle.  */
7164	    for (j = 3 - n; j > 0; j --)
7165	      ia64_emit_insn_before (gen_nop (), insn);
7166	    /* It takes into account that we will add more N nops
7167	       before the insn lately -- please see code below.  */
7168	    add_cycles [INSN_UID (insn)]--;
7169	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7170	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7171				     insn);
7172	    if (pred_stop_p)
7173	      add_cycles [INSN_UID (insn)]--;
7174	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7175	      {
7176		/* Insert "MII;" template.  */
7177		ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7178				       insn);
7179		ia64_emit_insn_before (gen_nop (), insn);
7180		ia64_emit_insn_before (gen_nop (), insn);
7181		if (i > 1)
7182		  {
7183		    /* To decrease code size, we use "MI;I;"
7184		       template.  */
7185		    ia64_emit_insn_before
7186		      (gen_insn_group_barrier (GEN_INT (3)), insn);
7187		    i--;
7188		  }
7189		ia64_emit_insn_before (gen_nop (), insn);
7190		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7191				       insn);
7192	      }
7193	    /* Put the MM-insn in the same slot of a bundle with the
7194	       same template as the original one.  */
7195	    ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7196				   insn);
7197	    /* To put the insn in the same slot, add necessary number
7198	       of nops.  */
7199	    for (j = n; j > 0; j --)
7200	      ia64_emit_insn_before (gen_nop (), insn);
7201	    /* Put the stop if the original bundle had it.  */
7202	    if (pred_stop_p)
7203	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7204				     insn);
7205	  }
7206      }
7207  free (index_to_bundle_states);
7208  finish_bundle_state_table ();
7209  bundling_p = 0;
7210  dfa_clean_insn_cache ();
7211}
7212
7213/* The following function is called at the end of scheduling BB or
7214   EBB.  After reload, it inserts stop bits and does insn bundling.  */
7215
7216static void
7217ia64_sched_finish (FILE *dump, int sched_verbose)
7218{
7219  if (sched_verbose)
7220    fprintf (dump, "// Finishing schedule.\n");
7221  if (!reload_completed)
7222    return;
7223  if (reload_completed)
7224    {
7225      final_emit_insn_group_barriers (dump);
7226      bundling (dump, sched_verbose, current_sched_info->prev_head,
7227		current_sched_info->next_tail);
7228      if (sched_verbose && dump)
7229	fprintf (dump, "//    finishing %d-%d\n",
7230		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7231		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7232
7233      return;
7234    }
7235}
7236
7237/* The following function inserts stop bits in scheduled BB or EBB.  */
7238
7239static void
7240final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7241{
7242  rtx insn;
7243  int need_barrier_p = 0;
7244  rtx prev_insn = NULL_RTX;
7245
7246  init_insn_group_barriers ();
7247
7248  for (insn = NEXT_INSN (current_sched_info->prev_head);
7249       insn != current_sched_info->next_tail;
7250       insn = NEXT_INSN (insn))
7251    {
7252      if (GET_CODE (insn) == BARRIER)
7253	{
7254	  rtx last = prev_active_insn (insn);
7255
7256	  if (! last)
7257	    continue;
7258	  if (GET_CODE (last) == JUMP_INSN
7259	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7260	    last = prev_active_insn (last);
7261	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7262	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7263
7264	  init_insn_group_barriers ();
7265	  need_barrier_p = 0;
7266	  prev_insn = NULL_RTX;
7267	}
7268      else if (INSN_P (insn))
7269	{
7270	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7271	    {
7272	      init_insn_group_barriers ();
7273	      need_barrier_p = 0;
7274	      prev_insn = NULL_RTX;
7275	    }
7276	  else if (need_barrier_p || group_barrier_needed_p (insn))
7277	    {
7278	      if (TARGET_EARLY_STOP_BITS)
7279		{
7280		  rtx last;
7281
7282		  for (last = insn;
7283		       last != current_sched_info->prev_head;
7284		       last = PREV_INSN (last))
7285		    if (INSN_P (last) && GET_MODE (last) == TImode
7286			&& stops_p [INSN_UID (last)])
7287		      break;
7288		  if (last == current_sched_info->prev_head)
7289		    last = insn;
7290		  last = prev_active_insn (last);
7291		  if (last
7292		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7293		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7294				     last);
7295		  init_insn_group_barriers ();
7296		  for (last = NEXT_INSN (last);
7297		       last != insn;
7298		       last = NEXT_INSN (last))
7299		    if (INSN_P (last))
7300		      group_barrier_needed_p (last);
7301		}
7302	      else
7303		{
7304		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7305				    insn);
7306		  init_insn_group_barriers ();
7307		}
7308	      group_barrier_needed_p (insn);
7309	      prev_insn = NULL_RTX;
7310	    }
7311	  else if (recog_memoized (insn) >= 0)
7312	    prev_insn = insn;
7313	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
7314			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
7315			    || asm_noperands (PATTERN (insn)) >= 0);
7316	}
7317    }
7318}
7319
7320
7321
7322/* If the following function returns TRUE, we will use the the DFA
7323   insn scheduler.  */
7324
7325static int
7326ia64_use_dfa_pipeline_interface (void)
7327{
7328  return 1;
7329}
7330
7331/* If the following function returns TRUE, we will use the the DFA
7332   insn scheduler.  */
7333
7334static int
7335ia64_first_cycle_multipass_dfa_lookahead (void)
7336{
7337  return (reload_completed ? 6 : 4);
7338}
7339
7340/* The following function initiates variable `dfa_pre_cycle_insn'.  */
7341
7342static void
7343ia64_init_dfa_pre_cycle_insn (void)
7344{
7345  if (temp_dfa_state == NULL)
7346    {
7347      dfa_state_size = state_size ();
7348      temp_dfa_state = xmalloc (dfa_state_size);
7349      prev_cycle_state = xmalloc (dfa_state_size);
7350    }
7351  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7352  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7353  recog_memoized (dfa_pre_cycle_insn);
7354  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7355  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7356  recog_memoized (dfa_stop_insn);
7357}
7358
7359/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7360   used by the DFA insn scheduler.  */
7361
7362static rtx
7363ia64_dfa_pre_cycle_insn (void)
7364{
7365  return dfa_pre_cycle_insn;
7366}
7367
7368/* The following function returns TRUE if PRODUCER (of type ilog or
7369   ld) produces address for CONSUMER (of type st or stf). */
7370
7371int
7372ia64_st_address_bypass_p (rtx producer, rtx consumer)
7373{
7374  rtx dest, reg, mem;
7375
7376  if (producer == NULL_RTX || consumer == NULL_RTX)
7377    abort ();
7378  dest = ia64_single_set (producer);
7379  if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7380      || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7381    abort ();
7382  if (GET_CODE (reg) == SUBREG)
7383    reg = SUBREG_REG (reg);
7384  dest = ia64_single_set (consumer);
7385  if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7386      || GET_CODE (mem) != MEM)
7387    abort ();
7388  return reg_mentioned_p (reg, mem);
7389}
7390
7391/* The following function returns TRUE if PRODUCER (of type ilog or
7392   ld) produces address for CONSUMER (of type ld or fld). */
7393
7394int
7395ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7396{
7397  rtx dest, src, reg, mem;
7398
7399  if (producer == NULL_RTX || consumer == NULL_RTX)
7400    abort ();
7401  dest = ia64_single_set (producer);
7402  if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7403      || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7404    abort ();
7405  if (GET_CODE (reg) == SUBREG)
7406    reg = SUBREG_REG (reg);
7407  src = ia64_single_set (consumer);
7408  if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7409    abort ();
7410  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7411    mem = XVECEXP (mem, 0, 0);
7412  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7413    mem = XEXP (mem, 0);
7414
7415  /* Note that LO_SUM is used for GOT loads.  */
7416  if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7417    abort ();
7418
7419  return reg_mentioned_p (reg, mem);
7420}
7421
7422/* The following function returns TRUE if INSN produces address for a
7423   load/store insn.  We will place such insns into M slot because it
7424   decreases its latency time. */
7425
7426int
7427ia64_produce_address_p (rtx insn)
7428{
7429  return insn->call;
7430}
7431
7432
7433/* Emit pseudo-ops for the assembler to describe predicate relations.
7434   At present this assumes that we only consider predicate pairs to
7435   be mutex, and that the assembler can deduce proper values from
7436   straight-line code.  */
7437
7438static void
7439emit_predicate_relation_info (void)
7440{
7441  basic_block bb;
7442
7443  FOR_EACH_BB_REVERSE (bb)
7444    {
7445      int r;
7446      rtx head = BB_HEAD (bb);
7447
7448      /* We only need such notes at code labels.  */
7449      if (GET_CODE (head) != CODE_LABEL)
7450	continue;
7451      if (GET_CODE (NEXT_INSN (head)) == NOTE
7452	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7453	head = NEXT_INSN (head);
7454
7455      for (r = PR_REG (0); r < PR_REG (64); r += 2)
7456	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7457	  {
7458	    rtx p = gen_rtx_REG (BImode, r);
7459	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7460	    if (head == BB_END (bb))
7461	      BB_END (bb) = n;
7462	    head = n;
7463	  }
7464    }
7465
7466  /* Look for conditional calls that do not return, and protect predicate
7467     relations around them.  Otherwise the assembler will assume the call
7468     returns, and complain about uses of call-clobbered predicates after
7469     the call.  */
7470  FOR_EACH_BB_REVERSE (bb)
7471    {
7472      rtx insn = BB_HEAD (bb);
7473
7474      while (1)
7475	{
7476	  if (GET_CODE (insn) == CALL_INSN
7477	      && GET_CODE (PATTERN (insn)) == COND_EXEC
7478	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7479	    {
7480	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7481	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7482	      if (BB_HEAD (bb) == insn)
7483		BB_HEAD (bb) = b;
7484	      if (BB_END (bb) == insn)
7485		BB_END (bb) = a;
7486	    }
7487
7488	  if (insn == BB_END (bb))
7489	    break;
7490	  insn = NEXT_INSN (insn);
7491	}
7492    }
7493}
7494
7495/* Perform machine dependent operations on the rtl chain INSNS.  */
7496
7497static void
7498ia64_reorg (void)
7499{
7500  /* We are freeing block_for_insn in the toplev to keep compatibility
7501     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7502  compute_bb_for_insn ();
7503
7504  /* If optimizing, we'll have split before scheduling.  */
7505  if (optimize == 0)
7506    split_all_insns (0);
7507
7508  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7509     non-optimizing bootstrap.  */
7510  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7511
7512  if (ia64_flag_schedule_insns2)
7513    {
7514      timevar_push (TV_SCHED2);
7515      ia64_final_schedule = 1;
7516
7517      initiate_bundle_states ();
7518      ia64_nop = make_insn_raw (gen_nop ());
7519      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7520      recog_memoized (ia64_nop);
7521      clocks_length = get_max_uid () + 1;
7522      stops_p = xcalloc (1, clocks_length);
7523      if (ia64_tune == PROCESSOR_ITANIUM)
7524	{
7525	  clocks = xcalloc (clocks_length, sizeof (int));
7526	  add_cycles = xcalloc (clocks_length, sizeof (int));
7527	}
7528      if (ia64_tune == PROCESSOR_ITANIUM2)
7529	{
7530	  pos_1 = get_cpu_unit_code ("2_1");
7531	  pos_2 = get_cpu_unit_code ("2_2");
7532	  pos_3 = get_cpu_unit_code ("2_3");
7533	  pos_4 = get_cpu_unit_code ("2_4");
7534	  pos_5 = get_cpu_unit_code ("2_5");
7535	  pos_6 = get_cpu_unit_code ("2_6");
7536	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
7537	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7538	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7539	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7540	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7541	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7542	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
7543	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7544	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7545	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7546	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
7547	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7548	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7549	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7550	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7551	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7552	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
7553	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7554	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7555	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7556	}
7557      else
7558	{
7559	  pos_1 = get_cpu_unit_code ("1_1");
7560	  pos_2 = get_cpu_unit_code ("1_2");
7561	  pos_3 = get_cpu_unit_code ("1_3");
7562	  pos_4 = get_cpu_unit_code ("1_4");
7563	  pos_5 = get_cpu_unit_code ("1_5");
7564	  pos_6 = get_cpu_unit_code ("1_6");
7565	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
7566	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7567	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7568	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7569	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7570	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7571	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
7572	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7573	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7574	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7575	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
7576	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7577	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7578	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7579	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7580	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7581	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
7582	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7583	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7584	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7585	}
7586      schedule_ebbs (rtl_dump_file);
7587      finish_bundle_states ();
7588      if (ia64_tune == PROCESSOR_ITANIUM)
7589	{
7590	  free (add_cycles);
7591	  free (clocks);
7592	}
7593      free (stops_p);
7594      emit_insn_group_barriers (rtl_dump_file);
7595
7596      ia64_final_schedule = 0;
7597      timevar_pop (TV_SCHED2);
7598    }
7599  else
7600    emit_all_insn_group_barriers (rtl_dump_file);
7601
7602  /* A call must not be the last instruction in a function, so that the
7603     return address is still within the function, so that unwinding works
7604     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7605  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7606    {
7607      rtx insn;
7608      int saw_stop = 0;
7609
7610      insn = get_last_insn ();
7611      if (! INSN_P (insn))
7612        insn = prev_active_insn (insn);
7613      /* Skip over insns that expand to nothing.  */
7614      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7615        {
7616	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7617	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7618	    saw_stop = 1;
7619	  insn = prev_active_insn (insn);
7620	}
7621      if (GET_CODE (insn) == CALL_INSN)
7622	{
7623	  if (! saw_stop)
7624	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7625	  emit_insn (gen_break_f ());
7626	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7627	}
7628    }
7629
7630  fixup_errata ();
7631  emit_predicate_relation_info ();
7632}
7633
7634/* Return true if REGNO is used by the epilogue.  */
7635
7636int
7637ia64_epilogue_uses (int regno)
7638{
7639  switch (regno)
7640    {
7641    case R_GR (1):
7642      /* With a call to a function in another module, we will write a new
7643	 value to "gp".  After returning from such a call, we need to make
7644	 sure the function restores the original gp-value, even if the
7645	 function itself does not use the gp anymore.  */
7646      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7647
7648    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7649    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7650      /* For functions defined with the syscall_linkage attribute, all
7651	 input registers are marked as live at all function exits.  This
7652	 prevents the register allocator from using the input registers,
7653	 which in turn makes it possible to restart a system call after
7654	 an interrupt without having to save/restore the input registers.
7655	 This also prevents kernel data from leaking to application code.  */
7656      return lookup_attribute ("syscall_linkage",
7657	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7658
7659    case R_BR (0):
7660      /* Conditional return patterns can't represent the use of `b0' as
7661         the return address, so we force the value live this way.  */
7662      return 1;
7663
7664    case AR_PFS_REGNUM:
7665      /* Likewise for ar.pfs, which is used by br.ret.  */
7666      return 1;
7667
7668    default:
7669      return 0;
7670    }
7671}
7672
7673/* Return true if REGNO is used by the frame unwinder.  */
7674
7675int
7676ia64_eh_uses (int regno)
7677{
7678  if (! reload_completed)
7679    return 0;
7680
7681  if (current_frame_info.reg_save_b0
7682      && regno == current_frame_info.reg_save_b0)
7683    return 1;
7684  if (current_frame_info.reg_save_pr
7685      && regno == current_frame_info.reg_save_pr)
7686    return 1;
7687  if (current_frame_info.reg_save_ar_pfs
7688      && regno == current_frame_info.reg_save_ar_pfs)
7689    return 1;
7690  if (current_frame_info.reg_save_ar_unat
7691      && regno == current_frame_info.reg_save_ar_unat)
7692    return 1;
7693  if (current_frame_info.reg_save_ar_lc
7694      && regno == current_frame_info.reg_save_ar_lc)
7695    return 1;
7696
7697  return 0;
7698}
7699
7700/* Return true if this goes in small data/bss.  */
7701
7702/* ??? We could also support own long data here.  Generating movl/add/ld8
7703   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7704   code faster because there is one less load.  This also includes incomplete
7705   types which can't go in sdata/sbss.  */
7706
7707static bool
7708ia64_in_small_data_p (tree exp)
7709{
7710  if (TARGET_NO_SDATA)
7711    return false;
7712
7713  /* We want to merge strings, so we never consider them small data.  */
7714  if (TREE_CODE (exp) == STRING_CST)
7715    return false;
7716
7717  /* Functions are never small data.  */
7718  if (TREE_CODE (exp) == FUNCTION_DECL)
7719    return false;
7720
7721  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7722    {
7723      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7724      if (strcmp (section, ".sdata") == 0
7725	  || strcmp (section, ".sbss") == 0)
7726	return true;
7727    }
7728  else
7729    {
7730      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7731
7732      /* If this is an incomplete type with size 0, then we can't put it
7733	 in sdata because it might be too big when completed.  */
7734      if (size > 0 && size <= ia64_section_threshold)
7735	return true;
7736    }
7737
7738  return false;
7739}
7740
7741/* Output assembly directives for prologue regions.  */
7742
7743/* The current basic block number.  */
7744
7745static bool last_block;
7746
7747/* True if we need a copy_state command at the start of the next block.  */
7748
7749static bool need_copy_state;
7750
7751/* The function emits unwind directives for the start of an epilogue.  */
7752
7753static void
7754process_epilogue (void)
7755{
7756  /* If this isn't the last block of the function, then we need to label the
7757     current state, and copy it back in at the start of the next block.  */
7758
7759  if (!last_block)
7760    {
7761      fprintf (asm_out_file, "\t.label_state 1\n");
7762      need_copy_state = true;
7763    }
7764
7765  fprintf (asm_out_file, "\t.restore sp\n");
7766}
7767
7768/* This function processes a SET pattern looking for specific patterns
7769   which result in emitting an assembly directive required for unwinding.  */
7770
7771static int
7772process_set (FILE *asm_out_file, rtx pat)
7773{
7774  rtx src = SET_SRC (pat);
7775  rtx dest = SET_DEST (pat);
7776  int src_regno, dest_regno;
7777
7778  /* Look for the ALLOC insn.  */
7779  if (GET_CODE (src) == UNSPEC_VOLATILE
7780      && XINT (src, 1) == UNSPECV_ALLOC
7781      && GET_CODE (dest) == REG)
7782    {
7783      dest_regno = REGNO (dest);
7784
7785      /* If this isn't the final destination for ar.pfs, the alloc
7786	 shouldn't have been marked frame related.  */
7787      if (dest_regno != current_frame_info.reg_save_ar_pfs)
7788	abort ();
7789
7790      fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7791	       ia64_dbx_register_number (dest_regno));
7792      return 1;
7793    }
7794
7795  /* Look for SP = ....  */
7796  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7797    {
7798      if (GET_CODE (src) == PLUS)
7799        {
7800	  rtx op0 = XEXP (src, 0);
7801	  rtx op1 = XEXP (src, 1);
7802	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7803	    {
7804	      if (INTVAL (op1) < 0)
7805		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7806			 -INTVAL (op1));
7807	      else
7808		process_epilogue ();
7809	    }
7810	  else
7811	    abort ();
7812	}
7813      else if (GET_CODE (src) == REG
7814	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7815	process_epilogue ();
7816      else
7817	abort ();
7818
7819      return 1;
7820    }
7821
7822  /* Register move we need to look at.  */
7823  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7824    {
7825      src_regno = REGNO (src);
7826      dest_regno = REGNO (dest);
7827
7828      switch (src_regno)
7829	{
7830	case BR_REG (0):
7831	  /* Saving return address pointer.  */
7832	  if (dest_regno != current_frame_info.reg_save_b0)
7833	    abort ();
7834	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7835		   ia64_dbx_register_number (dest_regno));
7836	  return 1;
7837
7838	case PR_REG (0):
7839	  if (dest_regno != current_frame_info.reg_save_pr)
7840	    abort ();
7841	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7842		   ia64_dbx_register_number (dest_regno));
7843	  return 1;
7844
7845	case AR_UNAT_REGNUM:
7846	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7847	    abort ();
7848	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7849		   ia64_dbx_register_number (dest_regno));
7850	  return 1;
7851
7852	case AR_LC_REGNUM:
7853	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7854	    abort ();
7855	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7856		   ia64_dbx_register_number (dest_regno));
7857	  return 1;
7858
7859	case STACK_POINTER_REGNUM:
7860	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7861	      || ! frame_pointer_needed)
7862	    abort ();
7863	  fprintf (asm_out_file, "\t.vframe r%d\n",
7864		   ia64_dbx_register_number (dest_regno));
7865	  return 1;
7866
7867	default:
7868	  /* Everything else should indicate being stored to memory.  */
7869	  abort ();
7870	}
7871    }
7872
7873  /* Memory store we need to look at.  */
7874  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7875    {
7876      long off;
7877      rtx base;
7878      const char *saveop;
7879
7880      if (GET_CODE (XEXP (dest, 0)) == REG)
7881	{
7882	  base = XEXP (dest, 0);
7883	  off = 0;
7884	}
7885      else if (GET_CODE (XEXP (dest, 0)) == PLUS
7886	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7887	{
7888	  base = XEXP (XEXP (dest, 0), 0);
7889	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7890	}
7891      else
7892	abort ();
7893
7894      if (base == hard_frame_pointer_rtx)
7895	{
7896	  saveop = ".savepsp";
7897	  off = - off;
7898	}
7899      else if (base == stack_pointer_rtx)
7900	saveop = ".savesp";
7901      else
7902	abort ();
7903
7904      src_regno = REGNO (src);
7905      switch (src_regno)
7906	{
7907	case BR_REG (0):
7908	  if (current_frame_info.reg_save_b0 != 0)
7909	    abort ();
7910	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7911	  return 1;
7912
7913	case PR_REG (0):
7914	  if (current_frame_info.reg_save_pr != 0)
7915	    abort ();
7916	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7917	  return 1;
7918
7919	case AR_LC_REGNUM:
7920	  if (current_frame_info.reg_save_ar_lc != 0)
7921	    abort ();
7922	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7923	  return 1;
7924
7925	case AR_PFS_REGNUM:
7926	  if (current_frame_info.reg_save_ar_pfs != 0)
7927	    abort ();
7928	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7929	  return 1;
7930
7931	case AR_UNAT_REGNUM:
7932	  if (current_frame_info.reg_save_ar_unat != 0)
7933	    abort ();
7934	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7935	  return 1;
7936
7937	case GR_REG (4):
7938	case GR_REG (5):
7939	case GR_REG (6):
7940	case GR_REG (7):
7941	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7942		   1 << (src_regno - GR_REG (4)));
7943	  return 1;
7944
7945	case BR_REG (1):
7946	case BR_REG (2):
7947	case BR_REG (3):
7948	case BR_REG (4):
7949	case BR_REG (5):
7950	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
7951		   1 << (src_regno - BR_REG (1)));
7952	  return 1;
7953
7954	case FR_REG (2):
7955	case FR_REG (3):
7956	case FR_REG (4):
7957	case FR_REG (5):
7958	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
7959		   1 << (src_regno - FR_REG (2)));
7960	  return 1;
7961
7962	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
7963	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
7964	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
7965	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
7966	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
7967		   1 << (src_regno - FR_REG (12)));
7968	  return 1;
7969
7970	default:
7971	  return 0;
7972	}
7973    }
7974
7975  return 0;
7976}
7977
7978
7979/* This function looks at a single insn and emits any directives
7980   required to unwind this insn.  */
7981void
7982process_for_unwind_directive (FILE *asm_out_file, rtx insn)
7983{
7984  if (flag_unwind_tables
7985      || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7986    {
7987      rtx pat;
7988
7989      if (GET_CODE (insn) == NOTE
7990	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
7991	{
7992	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
7993
7994	  /* Restore unwind state from immediately before the epilogue.  */
7995	  if (need_copy_state)
7996	    {
7997	      fprintf (asm_out_file, "\t.body\n");
7998	      fprintf (asm_out_file, "\t.copy_state 1\n");
7999	      need_copy_state = false;
8000	    }
8001	}
8002
8003      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8004	return;
8005
8006      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8007      if (pat)
8008	pat = XEXP (pat, 0);
8009      else
8010	pat = PATTERN (insn);
8011
8012      switch (GET_CODE (pat))
8013        {
8014	case SET:
8015	  process_set (asm_out_file, pat);
8016	  break;
8017
8018	case PARALLEL:
8019	  {
8020	    int par_index;
8021	    int limit = XVECLEN (pat, 0);
8022	    for (par_index = 0; par_index < limit; par_index++)
8023	      {
8024		rtx x = XVECEXP (pat, 0, par_index);
8025		if (GET_CODE (x) == SET)
8026		  process_set (asm_out_file, x);
8027	      }
8028	    break;
8029	  }
8030
8031	default:
8032	  abort ();
8033	}
8034    }
8035}
8036
8037
8038void
8039ia64_init_builtins (void)
8040{
8041  tree psi_type_node = build_pointer_type (integer_type_node);
8042  tree pdi_type_node = build_pointer_type (long_integer_type_node);
8043
8044  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8045  tree si_ftype_psi_si_si
8046    = build_function_type_list (integer_type_node,
8047				psi_type_node, integer_type_node,
8048				integer_type_node, NULL_TREE);
8049
8050  /* __sync_val_compare_and_swap_di */
8051  tree di_ftype_pdi_di_di
8052    = build_function_type_list (long_integer_type_node,
8053				pdi_type_node, long_integer_type_node,
8054				long_integer_type_node, NULL_TREE);
8055  /* __sync_bool_compare_and_swap_di */
8056  tree si_ftype_pdi_di_di
8057    = build_function_type_list (integer_type_node,
8058				pdi_type_node, long_integer_type_node,
8059				long_integer_type_node, NULL_TREE);
8060  /* __sync_synchronize */
8061  tree void_ftype_void
8062    = build_function_type (void_type_node, void_list_node);
8063
8064  /* __sync_lock_test_and_set_si */
8065  tree si_ftype_psi_si
8066    = build_function_type_list (integer_type_node,
8067				psi_type_node, integer_type_node, NULL_TREE);
8068
8069  /* __sync_lock_test_and_set_di */
8070  tree di_ftype_pdi_di
8071    = build_function_type_list (long_integer_type_node,
8072				pdi_type_node, long_integer_type_node,
8073				NULL_TREE);
8074
8075  /* __sync_lock_release_si */
8076  tree void_ftype_psi
8077    = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
8078
8079  /* __sync_lock_release_di */
8080  tree void_ftype_pdi
8081    = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
8082
8083  tree fpreg_type;
8084  tree float80_type;
8085
8086  /* The __fpreg type.  */
8087  fpreg_type = make_node (REAL_TYPE);
8088  /* ??? The back end should know to load/save __fpreg variables using
8089     the ldf.fill and stf.spill instructions.  */
8090  TYPE_PRECISION (fpreg_type) = 96;
8091  layout_type (fpreg_type);
8092  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8093
8094  /* The __float80 type.  */
8095  float80_type = make_node (REAL_TYPE);
8096  TYPE_PRECISION (float80_type) = 96;
8097  layout_type (float80_type);
8098  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8099
8100  /* The __float128 type.  */
8101  if (!TARGET_HPUX)
8102    {
8103      tree float128_type = make_node (REAL_TYPE);
8104      TYPE_PRECISION (float128_type) = 128;
8105      layout_type (float128_type);
8106      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8107    }
8108  else
8109    /* Under HPUX, this is a synonym for "long double".  */
8110    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8111					       "__float128");
8112
8113#define def_builtin(name, type, code) \
8114  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
8115
8116  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8117	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
8118  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8119	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
8120  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8121	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8122  def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8123	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8124
8125  def_builtin ("__sync_synchronize", void_ftype_void,
8126	       IA64_BUILTIN_SYNCHRONIZE);
8127
8128  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8129	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8130  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8131	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8132  def_builtin ("__sync_lock_release_si", void_ftype_psi,
8133	       IA64_BUILTIN_LOCK_RELEASE_SI);
8134  def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8135	       IA64_BUILTIN_LOCK_RELEASE_DI);
8136
8137  def_builtin ("__builtin_ia64_bsp",
8138	       build_function_type (ptr_type_node, void_list_node),
8139	       IA64_BUILTIN_BSP);
8140
8141  def_builtin ("__builtin_ia64_flushrs",
8142	       build_function_type (void_type_node, void_list_node),
8143	       IA64_BUILTIN_FLUSHRS);
8144
8145  def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8146	       IA64_BUILTIN_FETCH_AND_ADD_SI);
8147  def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8148	       IA64_BUILTIN_FETCH_AND_SUB_SI);
8149  def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8150	       IA64_BUILTIN_FETCH_AND_OR_SI);
8151  def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8152	       IA64_BUILTIN_FETCH_AND_AND_SI);
8153  def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8154	       IA64_BUILTIN_FETCH_AND_XOR_SI);
8155  def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8156	       IA64_BUILTIN_FETCH_AND_NAND_SI);
8157
8158  def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8159	       IA64_BUILTIN_ADD_AND_FETCH_SI);
8160  def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8161	       IA64_BUILTIN_SUB_AND_FETCH_SI);
8162  def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8163	       IA64_BUILTIN_OR_AND_FETCH_SI);
8164  def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8165	       IA64_BUILTIN_AND_AND_FETCH_SI);
8166  def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8167	       IA64_BUILTIN_XOR_AND_FETCH_SI);
8168  def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8169	       IA64_BUILTIN_NAND_AND_FETCH_SI);
8170
8171  def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8172	       IA64_BUILTIN_FETCH_AND_ADD_DI);
8173  def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8174	       IA64_BUILTIN_FETCH_AND_SUB_DI);
8175  def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8176	       IA64_BUILTIN_FETCH_AND_OR_DI);
8177  def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8178	       IA64_BUILTIN_FETCH_AND_AND_DI);
8179  def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8180	       IA64_BUILTIN_FETCH_AND_XOR_DI);
8181  def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8182	       IA64_BUILTIN_FETCH_AND_NAND_DI);
8183
8184  def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8185	       IA64_BUILTIN_ADD_AND_FETCH_DI);
8186  def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8187	       IA64_BUILTIN_SUB_AND_FETCH_DI);
8188  def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8189	       IA64_BUILTIN_OR_AND_FETCH_DI);
8190  def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8191	       IA64_BUILTIN_AND_AND_FETCH_DI);
8192  def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8193	       IA64_BUILTIN_XOR_AND_FETCH_DI);
8194  def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8195	       IA64_BUILTIN_NAND_AND_FETCH_DI);
8196
8197#undef def_builtin
8198}
8199
8200/* Expand fetch_and_op intrinsics.  The basic code sequence is:
8201
8202     mf
8203     tmp = [ptr];
8204     do {
8205       ret = tmp;
8206       ar.ccv = tmp;
8207       tmp <op>= value;
8208       cmpxchgsz.acq tmp = [ptr], tmp
8209     } while (tmp != ret)
8210*/
8211
8212static rtx
8213ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8214			  tree arglist, rtx target)
8215{
8216  rtx ret, label, tmp, ccv, insn, mem, value;
8217  tree arg0, arg1;
8218
8219  arg0 = TREE_VALUE (arglist);
8220  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8221  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8222#ifdef POINTERS_EXTEND_UNSIGNED
8223  if (GET_MODE(mem) != Pmode)
8224    mem = convert_memory_address (Pmode, mem);
8225#endif
8226  value = expand_expr (arg1, NULL_RTX, mode, 0);
8227
8228  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8229  MEM_VOLATILE_P (mem) = 1;
8230
8231  if (target && register_operand (target, mode))
8232    ret = target;
8233  else
8234    ret = gen_reg_rtx (mode);
8235
8236  emit_insn (gen_mf ());
8237
8238  /* Special case for fetchadd instructions.  */
8239  if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8240    {
8241      if (mode == SImode)
8242        insn = gen_fetchadd_acq_si (ret, mem, value);
8243      else
8244        insn = gen_fetchadd_acq_di (ret, mem, value);
8245      emit_insn (insn);
8246      return ret;
8247    }
8248
8249  tmp = gen_reg_rtx (mode);
8250  /* ar.ccv must always be loaded with a zero-extended DImode value.  */
8251  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8252  emit_move_insn (tmp, mem);
8253
8254  label = gen_label_rtx ();
8255  emit_label (label);
8256  emit_move_insn (ret, tmp);
8257  convert_move (ccv, tmp, /*unsignedp=*/1);
8258
8259  /* Perform the specific operation.  Special case NAND by noticing
8260     one_cmpl_optab instead.  */
8261  if (binoptab == one_cmpl_optab)
8262    {
8263      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8264      binoptab = and_optab;
8265    }
8266  tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8267
8268  if (mode == SImode)
8269    insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8270  else
8271    insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8272  emit_insn (insn);
8273
8274  emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8275
8276  return ret;
8277}
8278
8279/* Expand op_and_fetch intrinsics.  The basic code sequence is:
8280
8281     mf
8282     tmp = [ptr];
8283     do {
8284       old = tmp;
8285       ar.ccv = tmp;
8286       ret = tmp <op> value;
8287       cmpxchgsz.acq tmp = [ptr], ret
8288     } while (tmp != old)
8289*/
8290
8291static rtx
8292ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8293			  tree arglist, rtx target)
8294{
8295  rtx old, label, tmp, ret, ccv, insn, mem, value;
8296  tree arg0, arg1;
8297
8298  arg0 = TREE_VALUE (arglist);
8299  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8300  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8301#ifdef POINTERS_EXTEND_UNSIGNED
8302  if (GET_MODE(mem) != Pmode)
8303    mem = convert_memory_address (Pmode, mem);
8304#endif
8305
8306  value = expand_expr (arg1, NULL_RTX, mode, 0);
8307
8308  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8309  MEM_VOLATILE_P (mem) = 1;
8310
8311  if (target && ! register_operand (target, mode))
8312    target = NULL_RTX;
8313
8314  emit_insn (gen_mf ());
8315  tmp = gen_reg_rtx (mode);
8316  old = gen_reg_rtx (mode);
8317  /* ar.ccv must always be loaded with a zero-extended DImode value.  */
8318  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8319
8320  emit_move_insn (tmp, mem);
8321
8322  label = gen_label_rtx ();
8323  emit_label (label);
8324  emit_move_insn (old, tmp);
8325  convert_move (ccv, tmp, /*unsignedp=*/1);
8326
8327  /* Perform the specific operation.  Special case NAND by noticing
8328     one_cmpl_optab instead.  */
8329  if (binoptab == one_cmpl_optab)
8330    {
8331      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8332      binoptab = and_optab;
8333    }
8334  ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8335
8336  if (mode == SImode)
8337    insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8338  else
8339    insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8340  emit_insn (insn);
8341
8342  emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8343
8344  return ret;
8345}
8346
8347/* Expand val_ and bool_compare_and_swap.  For val_ we want:
8348
8349     ar.ccv = oldval
8350     mf
8351     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8352     return ret
8353
8354   For bool_ it's the same except return ret == oldval.
8355*/
8356
8357static rtx
8358ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8359			      int boolp, tree arglist, rtx target)
8360{
8361  tree arg0, arg1, arg2;
8362  rtx mem, old, new, ccv, tmp, insn;
8363
8364  arg0 = TREE_VALUE (arglist);
8365  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8366  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8367  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8368  old = expand_expr (arg1, NULL_RTX, mode, 0);
8369  new = expand_expr (arg2, NULL_RTX, mode, 0);
8370
8371  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8372  MEM_VOLATILE_P (mem) = 1;
8373
8374  if (GET_MODE (old) != mode)
8375    old = convert_to_mode (mode, old, /*unsignedp=*/1);
8376  if (GET_MODE (new) != mode)
8377    new = convert_to_mode (mode, new, /*unsignedp=*/1);
8378
8379  if (! register_operand (old, mode))
8380    old = copy_to_mode_reg (mode, old);
8381  if (! register_operand (new, mode))
8382    new = copy_to_mode_reg (mode, new);
8383
8384  if (! boolp && target && register_operand (target, mode))
8385    tmp = target;
8386  else
8387    tmp = gen_reg_rtx (mode);
8388
8389  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8390  convert_move (ccv, old, /*unsignedp=*/1);
8391  emit_insn (gen_mf ());
8392  if (mode == SImode)
8393    insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8394  else
8395    insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8396  emit_insn (insn);
8397
8398  if (boolp)
8399    {
8400      if (! target)
8401	target = gen_reg_rtx (rmode);
8402      return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8403    }
8404  else
8405    return tmp;
8406}
8407
8408/* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8409
8410static rtx
8411ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8412			       rtx target)
8413{
8414  tree arg0, arg1;
8415  rtx mem, new, ret, insn;
8416
8417  arg0 = TREE_VALUE (arglist);
8418  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8419  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8420  new = expand_expr (arg1, NULL_RTX, mode, 0);
8421
8422  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8423  MEM_VOLATILE_P (mem) = 1;
8424  if (! register_operand (new, mode))
8425    new = copy_to_mode_reg (mode, new);
8426
8427  if (target && register_operand (target, mode))
8428    ret = target;
8429  else
8430    ret = gen_reg_rtx (mode);
8431
8432  if (mode == SImode)
8433    insn = gen_xchgsi (ret, mem, new);
8434  else
8435    insn = gen_xchgdi (ret, mem, new);
8436  emit_insn (insn);
8437
8438  return ret;
8439}
8440
8441/* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8442
8443static rtx
8444ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8445			  rtx target ATTRIBUTE_UNUSED)
8446{
8447  tree arg0;
8448  rtx mem;
8449
8450  arg0 = TREE_VALUE (arglist);
8451  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8452
8453  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8454  MEM_VOLATILE_P (mem) = 1;
8455
8456  emit_move_insn (mem, const0_rtx);
8457
8458  return const0_rtx;
8459}
8460
8461rtx
8462ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8463		     enum machine_mode mode ATTRIBUTE_UNUSED,
8464		     int ignore ATTRIBUTE_UNUSED)
8465{
8466  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8467  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8468  tree arglist = TREE_OPERAND (exp, 1);
8469  enum machine_mode rmode = VOIDmode;
8470
8471  switch (fcode)
8472    {
8473    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8474    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8475      mode = SImode;
8476      rmode = SImode;
8477      break;
8478
8479    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8480    case IA64_BUILTIN_LOCK_RELEASE_SI:
8481    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8482    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8483    case IA64_BUILTIN_FETCH_AND_OR_SI:
8484    case IA64_BUILTIN_FETCH_AND_AND_SI:
8485    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8486    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8487    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8488    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8489    case IA64_BUILTIN_OR_AND_FETCH_SI:
8490    case IA64_BUILTIN_AND_AND_FETCH_SI:
8491    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8492    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8493      mode = SImode;
8494      break;
8495
8496    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8497      mode = DImode;
8498      rmode = SImode;
8499      break;
8500
8501    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8502      mode = DImode;
8503      rmode = DImode;
8504      break;
8505
8506    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8507    case IA64_BUILTIN_LOCK_RELEASE_DI:
8508    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8509    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8510    case IA64_BUILTIN_FETCH_AND_OR_DI:
8511    case IA64_BUILTIN_FETCH_AND_AND_DI:
8512    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8513    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8514    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8515    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8516    case IA64_BUILTIN_OR_AND_FETCH_DI:
8517    case IA64_BUILTIN_AND_AND_FETCH_DI:
8518    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8519    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8520      mode = DImode;
8521      break;
8522
8523    default:
8524      break;
8525    }
8526
8527  switch (fcode)
8528    {
8529    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8530    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8531      return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8532					   target);
8533
8534    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8535    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8536      return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8537					   target);
8538
8539    case IA64_BUILTIN_SYNCHRONIZE:
8540      emit_insn (gen_mf ());
8541      return const0_rtx;
8542
8543    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8544    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8545      return ia64_expand_lock_test_and_set (mode, arglist, target);
8546
8547    case IA64_BUILTIN_LOCK_RELEASE_SI:
8548    case IA64_BUILTIN_LOCK_RELEASE_DI:
8549      return ia64_expand_lock_release (mode, arglist, target);
8550
8551    case IA64_BUILTIN_BSP:
8552      if (! target || ! register_operand (target, DImode))
8553	target = gen_reg_rtx (DImode);
8554      emit_insn (gen_bsp_value (target));
8555#ifdef POINTERS_EXTEND_UNSIGNED
8556      target = convert_memory_address (ptr_mode, target);
8557#endif
8558      return target;
8559
8560    case IA64_BUILTIN_FLUSHRS:
8561      emit_insn (gen_flushrs ());
8562      return const0_rtx;
8563
8564    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8565    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8566      return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8567
8568    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8569    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8570      return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8571
8572    case IA64_BUILTIN_FETCH_AND_OR_SI:
8573    case IA64_BUILTIN_FETCH_AND_OR_DI:
8574      return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8575
8576    case IA64_BUILTIN_FETCH_AND_AND_SI:
8577    case IA64_BUILTIN_FETCH_AND_AND_DI:
8578      return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8579
8580    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8581    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8582      return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8583
8584    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8585    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8586      return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8587
8588    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8589    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8590      return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8591
8592    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8593    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8594      return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8595
8596    case IA64_BUILTIN_OR_AND_FETCH_SI:
8597    case IA64_BUILTIN_OR_AND_FETCH_DI:
8598      return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8599
8600    case IA64_BUILTIN_AND_AND_FETCH_SI:
8601    case IA64_BUILTIN_AND_AND_FETCH_DI:
8602      return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8603
8604    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8605    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8606      return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8607
8608    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8609    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8610      return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8611
8612    default:
8613      break;
8614    }
8615
8616  return NULL_RTX;
8617}
8618
8619/* For the HP-UX IA64 aggregate parameters are passed stored in the
8620   most significant bits of the stack slot.  */
8621
8622enum direction
8623ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8624{
8625   /* Exception to normal case for structures/unions/etc.  */
8626
8627   if (type && AGGREGATE_TYPE_P (type)
8628       && int_size_in_bytes (type) < UNITS_PER_WORD)
8629     return upward;
8630
8631   /* Fall back to the default.  */
8632   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8633}
8634
8635/* Linked list of all external functions that are to be emitted by GCC.
8636   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8637   order to avoid putting out names that are never really used.  */
8638
8639struct extern_func_list GTY(())
8640{
8641  struct extern_func_list *next;
8642  tree decl;
8643};
8644
8645static GTY(()) struct extern_func_list *extern_func_head;
8646
8647static void
8648ia64_hpux_add_extern_decl (tree decl)
8649{
8650  struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8651
8652  p->decl = decl;
8653  p->next = extern_func_head;
8654  extern_func_head = p;
8655}
8656
8657/* Print out the list of used global functions.  */
8658
8659static void
8660ia64_hpux_file_end (void)
8661{
8662  struct extern_func_list *p;
8663
8664  for (p = extern_func_head; p; p = p->next)
8665    {
8666      tree decl = p->decl;
8667      tree id = DECL_ASSEMBLER_NAME (decl);
8668
8669      if (!id)
8670	abort ();
8671
8672      if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8673        {
8674	  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8675
8676	  TREE_ASM_WRITTEN (decl) = 1;
8677	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
8678	  fputs (TYPE_ASM_OP, asm_out_file);
8679	  assemble_name (asm_out_file, name);
8680	  fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8681        }
8682    }
8683
8684  extern_func_head = 0;
8685}
8686
8687/* Rename all the TFmode libfuncs using the HPUX conventions.  */
8688
8689static void
8690ia64_hpux_init_libfuncs (void)
8691{
8692  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8693  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8694  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8695  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8696  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8697  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8698  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8699  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8700
8701  /* ia64_expand_compare uses this.  */
8702  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8703
8704  /* These should never be used.  */
8705  set_optab_libfunc (eq_optab, TFmode, 0);
8706  set_optab_libfunc (ne_optab, TFmode, 0);
8707  set_optab_libfunc (gt_optab, TFmode, 0);
8708  set_optab_libfunc (ge_optab, TFmode, 0);
8709  set_optab_libfunc (lt_optab, TFmode, 0);
8710  set_optab_libfunc (le_optab, TFmode, 0);
8711
8712  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8713  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8714  set_conv_libfunc (sext_optab,   TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8715  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8716  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8717  set_conv_libfunc (trunc_optab,  XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8718
8719  set_conv_libfunc (sfix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8720  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8721  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8722  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8723
8724  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8725  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8726}
8727
8728/* Rename the division and modulus functions in VMS.  */
8729
8730static void
8731ia64_vms_init_libfuncs (void)
8732{
8733  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8734  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8735  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8736  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8737  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8738  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8739  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8740  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8741}
8742
8743/* Switch to the section to which we should output X.  The only thing
8744   special we do here is to honor small data.  */
8745
8746static void
8747ia64_select_rtx_section (enum machine_mode mode, rtx x,
8748			 unsigned HOST_WIDE_INT align)
8749{
8750  if (GET_MODE_SIZE (mode) > 0
8751      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8752    sdata_section ();
8753  else
8754    default_elf_select_rtx_section (mode, x, align);
8755}
8756
8757/* It is illegal to have relocations in shared segments on AIX and HPUX.
8758   Pretend flag_pic is always set.  */
8759
8760static void
8761ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8762{
8763  default_elf_select_section_1 (exp, reloc, align, true);
8764}
8765
8766static void
8767ia64_rwreloc_unique_section (tree decl, int reloc)
8768{
8769  default_unique_section_1 (decl, reloc, true);
8770}
8771
8772static void
8773ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8774				 unsigned HOST_WIDE_INT align)
8775{
8776  int save_pic = flag_pic;
8777  flag_pic = 1;
8778  ia64_select_rtx_section (mode, x, align);
8779  flag_pic = save_pic;
8780}
8781
8782static unsigned int
8783ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8784{
8785  return default_section_type_flags_1 (decl, name, reloc, true);
8786}
8787
8788/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8789   structure type and that the address of that type should be passed
8790   in out0, rather than in r8.  */
8791
8792static bool
8793ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8794{
8795  tree ret_type = TREE_TYPE (fntype);
8796
8797  /* The Itanium C++ ABI requires that out0, rather than r8, be used
8798     as the structure return address parameter, if the return value
8799     type has a non-trivial copy constructor or destructor.  It is not
8800     clear if this same convention should be used for other
8801     programming languages.  Until G++ 3.4, we incorrectly used r8 for
8802     these return values.  */
8803  return (abi_version_at_least (2)
8804	  && ret_type
8805	  && TYPE_MODE (ret_type) == BLKmode
8806	  && TREE_ADDRESSABLE (ret_type)
8807	  && strcmp (lang_hooks.name, "GNU C++") == 0);
8808}
8809
8810/* Output the assembler code for a thunk function.  THUNK_DECL is the
8811   declaration for the thunk function itself, FUNCTION is the decl for
8812   the target function.  DELTA is an immediate constant offset to be
8813   added to THIS.  If VCALL_OFFSET is nonzero, the word at
8814   *(*this + vcall_offset) should be added to THIS.  */
8815
8816static void
8817ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8818		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8819		      tree function)
8820{
8821  rtx this, insn, funexp;
8822  unsigned int this_parmno;
8823  unsigned int this_regno;
8824
8825  reload_completed = 1;
8826  epilogue_completed = 1;
8827  no_new_pseudos = 1;
8828
8829  /* Set things up as ia64_expand_prologue might.  */
8830  last_scratch_gr_reg = 15;
8831
8832  memset (&current_frame_info, 0, sizeof (current_frame_info));
8833  current_frame_info.spill_cfa_off = -16;
8834  current_frame_info.n_input_regs = 1;
8835  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8836
8837  /* Mark the end of the (empty) prologue.  */
8838  emit_note (NOTE_INSN_PROLOGUE_END);
8839
8840  /* Figure out whether "this" will be the first parameter (the
8841     typical case) or the second parameter (as happens when the
8842     virtual function returns certain class objects).  */
8843  this_parmno
8844    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8845       ? 1 : 0);
8846  this_regno = IN_REG (this_parmno);
8847  if (!TARGET_REG_NAMES)
8848    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8849
8850  this = gen_rtx_REG (Pmode, this_regno);
8851  if (TARGET_ILP32)
8852    {
8853      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8854      REG_POINTER (tmp) = 1;
8855      if (delta && CONST_OK_FOR_I (delta))
8856	{
8857	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8858	  delta = 0;
8859	}
8860      else
8861	emit_insn (gen_ptr_extend (this, tmp));
8862    }
8863
8864  /* Apply the constant offset, if required.  */
8865  if (delta)
8866    {
8867      rtx delta_rtx = GEN_INT (delta);
8868
8869      if (!CONST_OK_FOR_I (delta))
8870	{
8871	  rtx tmp = gen_rtx_REG (Pmode, 2);
8872	  emit_move_insn (tmp, delta_rtx);
8873	  delta_rtx = tmp;
8874	}
8875      emit_insn (gen_adddi3 (this, this, delta_rtx));
8876    }
8877
8878  /* Apply the offset from the vtable, if required.  */
8879  if (vcall_offset)
8880    {
8881      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8882      rtx tmp = gen_rtx_REG (Pmode, 2);
8883
8884      if (TARGET_ILP32)
8885	{
8886	  rtx t = gen_rtx_REG (ptr_mode, 2);
8887	  REG_POINTER (t) = 1;
8888	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8889	  if (CONST_OK_FOR_I (vcall_offset))
8890	    {
8891	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8892						  vcall_offset_rtx));
8893	      vcall_offset = 0;
8894	    }
8895	  else
8896	    emit_insn (gen_ptr_extend (tmp, t));
8897	}
8898      else
8899	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8900
8901      if (vcall_offset)
8902	{
8903	  if (!CONST_OK_FOR_J (vcall_offset))
8904	    {
8905	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8906	      emit_move_insn (tmp2, vcall_offset_rtx);
8907	      vcall_offset_rtx = tmp2;
8908	    }
8909	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8910	}
8911
8912      if (TARGET_ILP32)
8913	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8914			gen_rtx_MEM (ptr_mode, tmp));
8915      else
8916	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8917
8918      emit_insn (gen_adddi3 (this, this, tmp));
8919    }
8920
8921  /* Generate a tail call to the target function.  */
8922  if (! TREE_USED (function))
8923    {
8924      assemble_external (function);
8925      TREE_USED (function) = 1;
8926    }
8927  funexp = XEXP (DECL_RTL (function), 0);
8928  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8929  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8930  insn = get_last_insn ();
8931  SIBLING_CALL_P (insn) = 1;
8932
8933  /* Code generation for calls relies on splitting.  */
8934  reload_completed = 1;
8935  epilogue_completed = 1;
8936  try_split (PATTERN (insn), insn, 0);
8937
8938  emit_barrier ();
8939
8940  /* Run just enough of rest_of_compilation to get the insns emitted.
8941     There's not really enough bulk here to make other passes such as
8942     instruction scheduling worth while.  Note that use_thunk calls
8943     assemble_start_function and assemble_end_function.  */
8944
8945  insn_locators_initialize ();
8946  emit_all_insn_group_barriers (NULL);
8947  insn = get_insns ();
8948  shorten_branches (insn);
8949  final_start_function (insn, file, 1);
8950  final (insn, file, 1, 0);
8951  final_end_function ();
8952
8953  reload_completed = 0;
8954  epilogue_completed = 0;
8955  no_new_pseudos = 0;
8956}
8957
8958/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
8959
8960static rtx
8961ia64_struct_value_rtx (tree fntype,
8962		       int incoming ATTRIBUTE_UNUSED)
8963{
8964  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8965    return NULL_RTX;
8966  return gen_rtx_REG (Pmode, GR_REG (8));
8967}
8968
8969#include "gt-ia64.h"
8970