ia64.c revision 146895
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
3   Free Software Foundation, Inc.
4   Contributed by James E. Wilson <wilson@cygnus.com> and
5		  David Mosberger <davidm@hpl.hp.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to
21the Free Software Foundation, 59 Temple Place - Suite 330,
22Boston, MA 02111-1307, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "toplev.h"
46#include "sched-int.h"
47#include "timevar.h"
48#include "target.h"
49#include "target-def.h"
50#include "tm_p.h"
51#include "hashtab.h"
52#include "langhooks.h"
53#include "cfglayout.h"
54
55/* This is used for communication between ASM_OUTPUT_LABEL and
56   ASM_OUTPUT_LABELREF.  */
57int ia64_asm_output_label = 0;
58
59/* Define the information needed to generate branch and scc insns.  This is
60   stored from the compare operation.  */
61struct rtx_def * ia64_compare_op0;
62struct rtx_def * ia64_compare_op1;
63
64/* Register names for ia64_expand_prologue.  */
65static const char * const ia64_reg_numbers[96] =
66{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
67  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
68  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
69  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
70  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
71  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
72  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
73  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
74  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
75  "r104","r105","r106","r107","r108","r109","r110","r111",
76  "r112","r113","r114","r115","r116","r117","r118","r119",
77  "r120","r121","r122","r123","r124","r125","r126","r127"};
78
79/* ??? These strings could be shared with REGISTER_NAMES.  */
80static const char * const ia64_input_reg_names[8] =
81{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
82
83/* ??? These strings could be shared with REGISTER_NAMES.  */
84static const char * const ia64_local_reg_names[80] =
85{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
86  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
87  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
88  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
89  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
90  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
91  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
92  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
93  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
94  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
95
96/* ??? These strings could be shared with REGISTER_NAMES.  */
97static const char * const ia64_output_reg_names[8] =
98{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
99
100/* String used with the -mfixed-range= option.  */
101const char *ia64_fixed_range_string;
102
103/* Determines whether we use adds, addl, or movl to generate our
104   TLS immediate offsets.  */
105int ia64_tls_size = 22;
106
107/* String used with the -mtls-size= option.  */
108const char *ia64_tls_size_string;
109
110/* Which cpu are we scheduling for.  */
111enum processor_type ia64_tune;
112
113/* String used with the -tune= option.  */
114const char *ia64_tune_string;
115
116/* Determines whether we run our final scheduling pass or not.  We always
117   avoid the normal second scheduling pass.  */
118static int ia64_flag_schedule_insns2;
119
120/* Variables which are this size or smaller are put in the sdata/sbss
121   sections.  */
122
123unsigned int ia64_section_threshold;
124
125/* The following variable is used by the DFA insn scheduler.  The value is
126   TRUE if we do insn bundling instead of insn scheduling.  */
127int bundling_p = 0;
128
129/* Structure to be filled in by ia64_compute_frame_size with register
130   save masks and offsets for the current function.  */
131
132struct ia64_frame_info
133{
134  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
135				   the caller's scratch area.  */
136  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
137  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
138  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
139  HARD_REG_SET mask;		/* mask of saved registers.  */
140  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
141				   registers or long-term scratches.  */
142  int n_spilled;		/* number of spilled registers.  */
143  int reg_fp;			/* register for fp.  */
144  int reg_save_b0;		/* save register for b0.  */
145  int reg_save_pr;		/* save register for prs.  */
146  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
147  int reg_save_ar_unat;		/* save register for ar.unat.  */
148  int reg_save_ar_lc;		/* save register for ar.lc.  */
149  int reg_save_gp;		/* save register for gp.  */
150  int n_input_regs;		/* number of input registers used.  */
151  int n_local_regs;		/* number of local registers used.  */
152  int n_output_regs;		/* number of output registers used.  */
153  int n_rotate_regs;		/* number of rotating registers used.  */
154
155  char need_regstk;		/* true if a .regstk directive needed.  */
156  char initialized;		/* true if the data is finalized.  */
157};
158
159/* Current frame information calculated by ia64_compute_frame_size.  */
160static struct ia64_frame_info current_frame_info;
161
162static int ia64_use_dfa_pipeline_interface (void);
163static int ia64_first_cycle_multipass_dfa_lookahead (void);
164static void ia64_dependencies_evaluation_hook (rtx, rtx);
165static void ia64_init_dfa_pre_cycle_insn (void);
166static rtx ia64_dfa_pre_cycle_insn (void);
167static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
168static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
169static rtx gen_tls_get_addr (void);
170static rtx gen_thread_pointer (void);
171static rtx ia64_expand_tls_address (enum tls_model, rtx, rtx);
172static int find_gr_spill (int);
173static int next_scratch_gr_reg (void);
174static void mark_reg_gr_used_mask (rtx, void *);
175static void ia64_compute_frame_size (HOST_WIDE_INT);
176static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
177static void finish_spill_pointers (void);
178static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
179static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
180static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
181static rtx gen_movdi_x (rtx, rtx, rtx);
182static rtx gen_fr_spill_x (rtx, rtx, rtx);
183static rtx gen_fr_restore_x (rtx, rtx, rtx);
184
185static enum machine_mode hfa_element_mode (tree, int);
186static bool ia64_function_ok_for_sibcall (tree, tree);
187static bool ia64_rtx_costs (rtx, int, int, int *);
188static void fix_range (const char *);
189static struct machine_function * ia64_init_machine_status (void);
190static void emit_insn_group_barriers (FILE *);
191static void emit_all_insn_group_barriers (FILE *);
192static void final_emit_insn_group_barriers (FILE *);
193static void emit_predicate_relation_info (void);
194static void ia64_reorg (void);
195static bool ia64_in_small_data_p (tree);
196static void process_epilogue (void);
197static int process_set (FILE *, rtx);
198
199static rtx ia64_expand_fetch_and_op (optab, enum machine_mode, tree, rtx);
200static rtx ia64_expand_op_and_fetch (optab, enum machine_mode, tree, rtx);
201static rtx ia64_expand_compare_and_swap (enum machine_mode, enum machine_mode,
202					 int, tree, rtx);
203static rtx ia64_expand_lock_test_and_set (enum machine_mode, tree, rtx);
204static rtx ia64_expand_lock_release (enum machine_mode, tree, rtx);
205static bool ia64_assemble_integer (rtx, unsigned int, int);
206static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
207static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
208static void ia64_output_function_end_prologue (FILE *);
209
210static int ia64_issue_rate (void);
211static int ia64_adjust_cost (rtx, rtx, rtx, int);
212static void ia64_sched_init (FILE *, int, int);
213static void ia64_sched_finish (FILE *, int);
214static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
215static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
216static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
217static int ia64_variable_issue (FILE *, int, rtx, int);
218
219static struct bundle_state *get_free_bundle_state (void);
220static void free_bundle_state (struct bundle_state *);
221static void initiate_bundle_states (void);
222static void finish_bundle_states (void);
223static unsigned bundle_state_hash (const void *);
224static int bundle_state_eq_p (const void *, const void *);
225static int insert_bundle_state (struct bundle_state *);
226static void initiate_bundle_state_table (void);
227static void finish_bundle_state_table (void);
228static int try_issue_nops (struct bundle_state *, int);
229static int try_issue_insn (struct bundle_state *, rtx);
230static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
231static int get_max_pos (state_t);
232static int get_template (state_t, int);
233
234static rtx get_next_important_insn (rtx, rtx);
235static void bundling (FILE *, int, rtx, rtx);
236
237static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
238				  HOST_WIDE_INT, tree);
239static void ia64_file_start (void);
240
241static void ia64_select_rtx_section (enum machine_mode, rtx,
242				     unsigned HOST_WIDE_INT);
243static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
244     ATTRIBUTE_UNUSED;
245static void ia64_rwreloc_unique_section (tree, int)
246     ATTRIBUTE_UNUSED;
247static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
248					     unsigned HOST_WIDE_INT)
249     ATTRIBUTE_UNUSED;
250static unsigned int ia64_rwreloc_section_type_flags (tree, const char *, int)
251     ATTRIBUTE_UNUSED;
252
253static void ia64_hpux_add_extern_decl (tree decl)
254     ATTRIBUTE_UNUSED;
255static void ia64_hpux_file_end (void)
256     ATTRIBUTE_UNUSED;
257static void ia64_hpux_init_libfuncs (void)
258     ATTRIBUTE_UNUSED;
259static void ia64_vms_init_libfuncs (void)
260     ATTRIBUTE_UNUSED;
261
262static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
263static void ia64_encode_section_info (tree, rtx, int);
264static rtx ia64_struct_value_rtx (tree, int);
265
266
267/* Table of valid machine attributes.  */
268static const struct attribute_spec ia64_attribute_table[] =
269{
270  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
271  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
272  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
273  { NULL,	       0, 0, false, false, false, NULL }
274};
275
276/* Initialize the GCC target structure.  */
277#undef TARGET_ATTRIBUTE_TABLE
278#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
279
280#undef TARGET_INIT_BUILTINS
281#define TARGET_INIT_BUILTINS ia64_init_builtins
282
283#undef TARGET_EXPAND_BUILTIN
284#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
285
286#undef TARGET_ASM_BYTE_OP
287#define TARGET_ASM_BYTE_OP "\tdata1\t"
288#undef TARGET_ASM_ALIGNED_HI_OP
289#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
290#undef TARGET_ASM_ALIGNED_SI_OP
291#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
292#undef TARGET_ASM_ALIGNED_DI_OP
293#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
294#undef TARGET_ASM_UNALIGNED_HI_OP
295#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
296#undef TARGET_ASM_UNALIGNED_SI_OP
297#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
298#undef TARGET_ASM_UNALIGNED_DI_OP
299#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
300#undef TARGET_ASM_INTEGER
301#define TARGET_ASM_INTEGER ia64_assemble_integer
302
303#undef TARGET_ASM_FUNCTION_PROLOGUE
304#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
305#undef TARGET_ASM_FUNCTION_END_PROLOGUE
306#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
307#undef TARGET_ASM_FUNCTION_EPILOGUE
308#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
309
310#undef TARGET_IN_SMALL_DATA_P
311#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
312
313#undef TARGET_SCHED_ADJUST_COST
314#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
315#undef TARGET_SCHED_ISSUE_RATE
316#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
317#undef TARGET_SCHED_VARIABLE_ISSUE
318#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
319#undef TARGET_SCHED_INIT
320#define TARGET_SCHED_INIT ia64_sched_init
321#undef TARGET_SCHED_FINISH
322#define TARGET_SCHED_FINISH ia64_sched_finish
323#undef TARGET_SCHED_REORDER
324#define TARGET_SCHED_REORDER ia64_sched_reorder
325#undef TARGET_SCHED_REORDER2
326#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
327
328#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
329#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
330
331#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
332#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE ia64_use_dfa_pipeline_interface
333
334#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
335#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
336
337#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
338#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
339#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
340#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
341
342#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
343#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
344  ia64_first_cycle_multipass_dfa_lookahead_guard
345
346#undef TARGET_SCHED_DFA_NEW_CYCLE
347#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
348
349#undef TARGET_FUNCTION_OK_FOR_SIBCALL
350#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
351
352#undef TARGET_ASM_OUTPUT_MI_THUNK
353#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
354#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
355#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
356
357#undef TARGET_ASM_FILE_START
358#define TARGET_ASM_FILE_START ia64_file_start
359
360#undef TARGET_RTX_COSTS
361#define TARGET_RTX_COSTS ia64_rtx_costs
362#undef TARGET_ADDRESS_COST
363#define TARGET_ADDRESS_COST hook_int_rtx_0
364
365#undef TARGET_MACHINE_DEPENDENT_REORG
366#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
367
368#undef TARGET_ENCODE_SECTION_INFO
369#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
370
371#undef TARGET_STRUCT_VALUE_RTX
372#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
373
374struct gcc_target targetm = TARGET_INITIALIZER;
375
376/* Return 1 if OP is a valid operand for the MEM of a CALL insn.  */
377
378int
379call_operand (rtx op, enum machine_mode mode)
380{
381  if (mode != GET_MODE (op) && mode != VOIDmode)
382    return 0;
383
384  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG
385	  || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG));
386}
387
388/* Return 1 if OP refers to a symbol in the sdata section.  */
389
390int
391sdata_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
392{
393  HOST_WIDE_INT offset = 0, size = 0;
394
395  switch (GET_CODE (op))
396    {
397    case CONST:
398      op = XEXP (op, 0);
399      if (GET_CODE (op) != PLUS
400	  || GET_CODE (XEXP (op, 0)) != SYMBOL_REF
401	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
402	break;
403      offset = INTVAL (XEXP (op, 1));
404      op = XEXP (op, 0);
405      /* FALLTHRU */
406
407    case SYMBOL_REF:
408      if (CONSTANT_POOL_ADDRESS_P (op))
409	{
410	  size = GET_MODE_SIZE (get_pool_mode (op));
411	  if (size > ia64_section_threshold)
412	    return false;
413	}
414      else
415	{
416	  tree t;
417
418	  if (!SYMBOL_REF_LOCAL_P (op) || !SYMBOL_REF_SMALL_P (op))
419	    return false;
420
421	  /* Note that in addition to DECLs, we can get various forms
422	     of constants here.  */
423	  t = SYMBOL_REF_DECL (op);
424	  if (DECL_P (t))
425	    t = DECL_SIZE_UNIT (t);
426	  else
427	    t = TYPE_SIZE_UNIT (TREE_TYPE (t));
428	  if (t && host_integerp (t, 0))
429	    {
430	      size = tree_low_cst (t, 0);
431	      if (size < 0)
432		size = 0;
433	    }
434	}
435
436      /* Deny the stupid user trick of addressing outside the object.  Such
437	 things quickly result in GPREL22 relocation overflows.  Of course,
438	 they're also highly undefined.  From a pure pedant's point of view
439	 they deserve a slap on the wrist (such as provided by a relocation
440	 overflow), but that just leads to bugzilla noise.  */
441      return (offset >= 0 && offset <= size);
442
443    default:
444      break;
445    }
446
447  return 0;
448}
449
450int
451small_addr_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
452{
453  return SYMBOL_REF_SMALL_ADDR_P (op);
454}
455
456/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load.  */
457
458int
459got_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
460{
461  switch (GET_CODE (op))
462    {
463    case CONST:
464      op = XEXP (op, 0);
465      if (GET_CODE (op) != PLUS)
466	return 0;
467      if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF)
468	return 0;
469      op = XEXP (op, 1);
470      if (GET_CODE (op) != CONST_INT)
471	return 0;
472
473	return 1;
474
475      /* Ok if we're not using GOT entries at all.  */
476      if (TARGET_NO_PIC || TARGET_AUTO_PIC)
477	return 1;
478
479      /* "Ok" while emitting rtl, since otherwise we won't be provided
480	 with the entire offset during emission, which makes it very
481	 hard to split the offset into high and low parts.  */
482      if (rtx_equal_function_value_matters)
483	return 1;
484
485      /* Force the low 14 bits of the constant to zero so that we do not
486	 use up so many GOT entries.  */
487      return (INTVAL (op) & 0x3fff) == 0;
488
489    case SYMBOL_REF:
490      if (SYMBOL_REF_SMALL_ADDR_P (op))
491	return 0;
492    case LABEL_REF:
493      return 1;
494
495    default:
496      break;
497    }
498  return 0;
499}
500
501/* Return 1 if OP refers to a symbol.  */
502
503int
504symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
505{
506  switch (GET_CODE (op))
507    {
508    case CONST:
509    case SYMBOL_REF:
510    case LABEL_REF:
511      return 1;
512
513    default:
514      break;
515    }
516  return 0;
517}
518
519/* Return tls_model if OP refers to a TLS symbol.  */
520
521int
522tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
523{
524  if (GET_CODE (op) != SYMBOL_REF)
525    return 0;
526  return SYMBOL_REF_TLS_MODEL (op);
527}
528
529
530/* Return 1 if OP refers to a function.  */
531
532int
533function_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
534{
535  if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (op))
536    return 1;
537  else
538    return 0;
539}
540
541/* Return 1 if OP is setjmp or a similar function.  */
542
543/* ??? This is an unsatisfying solution.  Should rethink.  */
544
545int
546setjmp_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
547{
548  const char *name;
549  int retval = 0;
550
551  if (GET_CODE (op) != SYMBOL_REF)
552    return 0;
553
554  name = XSTR (op, 0);
555
556  /* The following code is borrowed from special_function_p in calls.c.  */
557
558  /* Disregard prefix _, __ or __x.  */
559  if (name[0] == '_')
560    {
561      if (name[1] == '_' && name[2] == 'x')
562	name += 3;
563      else if (name[1] == '_')
564	name += 2;
565      else
566	name += 1;
567    }
568
569  if (name[0] == 's')
570    {
571      retval
572	= ((name[1] == 'e'
573	    && (! strcmp (name, "setjmp")
574		|| ! strcmp (name, "setjmp_syscall")))
575	   || (name[1] == 'i'
576	       && ! strcmp (name, "sigsetjmp"))
577	   || (name[1] == 'a'
578	       && ! strcmp (name, "savectx")));
579    }
580  else if ((name[0] == 'q' && name[1] == 's'
581	    && ! strcmp (name, "qsetjmp"))
582	   || (name[0] == 'v' && name[1] == 'f'
583	       && ! strcmp (name, "vfork")))
584    retval = 1;
585
586  return retval;
587}
588
589/* Return 1 if OP is a general operand, excluding tls symbolic operands.  */
590
591int
592move_operand (rtx op, enum machine_mode mode)
593{
594  return general_operand (op, mode) && !tls_symbolic_operand (op, mode);
595}
596
597/* Return 1 if OP is a register operand that is (or could be) a GR reg.  */
598
599int
600gr_register_operand (rtx op, enum machine_mode mode)
601{
602  if (! register_operand (op, mode))
603    return 0;
604  if (GET_CODE (op) == SUBREG)
605    op = SUBREG_REG (op);
606  if (GET_CODE (op) == REG)
607    {
608      unsigned int regno = REGNO (op);
609      if (regno < FIRST_PSEUDO_REGISTER)
610	return GENERAL_REGNO_P (regno);
611    }
612  return 1;
613}
614
615/* Return 1 if OP is a register operand that is (or could be) an FR reg.  */
616
617int
618fr_register_operand (rtx op, enum machine_mode mode)
619{
620  if (! register_operand (op, mode))
621    return 0;
622  if (GET_CODE (op) == SUBREG)
623    op = SUBREG_REG (op);
624  if (GET_CODE (op) == REG)
625    {
626      unsigned int regno = REGNO (op);
627      if (regno < FIRST_PSEUDO_REGISTER)
628	return FR_REGNO_P (regno);
629    }
630  return 1;
631}
632
633/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg.  */
634
635int
636grfr_register_operand (rtx op, enum machine_mode mode)
637{
638  if (! register_operand (op, mode))
639    return 0;
640  if (GET_CODE (op) == SUBREG)
641    op = SUBREG_REG (op);
642  if (GET_CODE (op) == REG)
643    {
644      unsigned int regno = REGNO (op);
645      if (regno < FIRST_PSEUDO_REGISTER)
646	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
647    }
648  return 1;
649}
650
651/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg.  */
652
653int
654gr_nonimmediate_operand (rtx op, enum machine_mode mode)
655{
656  if (! nonimmediate_operand (op, mode))
657    return 0;
658  if (GET_CODE (op) == SUBREG)
659    op = SUBREG_REG (op);
660  if (GET_CODE (op) == REG)
661    {
662      unsigned int regno = REGNO (op);
663      if (regno < FIRST_PSEUDO_REGISTER)
664	return GENERAL_REGNO_P (regno);
665    }
666  return 1;
667}
668
669/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg.  */
670
671int
672fr_nonimmediate_operand (rtx op, enum machine_mode mode)
673{
674  if (! nonimmediate_operand (op, mode))
675    return 0;
676  if (GET_CODE (op) == SUBREG)
677    op = SUBREG_REG (op);
678  if (GET_CODE (op) == REG)
679    {
680      unsigned int regno = REGNO (op);
681      if (regno < FIRST_PSEUDO_REGISTER)
682	return FR_REGNO_P (regno);
683    }
684  return 1;
685}
686
687/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg.  */
688
689int
690grfr_nonimmediate_operand (rtx op, enum machine_mode mode)
691{
692  if (! nonimmediate_operand (op, mode))
693    return 0;
694  if (GET_CODE (op) == SUBREG)
695    op = SUBREG_REG (op);
696  if (GET_CODE (op) == REG)
697    {
698      unsigned int regno = REGNO (op);
699      if (regno < FIRST_PSEUDO_REGISTER)
700	return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno);
701    }
702  return 1;
703}
704
705/* Return 1 if OP is a GR register operand, or zero.  */
706
707int
708gr_reg_or_0_operand (rtx op, enum machine_mode mode)
709{
710  return (op == const0_rtx || gr_register_operand (op, mode));
711}
712
713/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand.  */
714
715int
716gr_reg_or_5bit_operand (rtx op, enum machine_mode mode)
717{
718  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
719	  || GET_CODE (op) == CONSTANT_P_RTX
720	  || gr_register_operand (op, mode));
721}
722
723/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand.  */
724
725int
726gr_reg_or_6bit_operand (rtx op, enum machine_mode mode)
727{
728  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
729	  || GET_CODE (op) == CONSTANT_P_RTX
730	  || gr_register_operand (op, mode));
731}
732
733/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand.  */
734
735int
736gr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
737{
738  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
739	  || GET_CODE (op) == CONSTANT_P_RTX
740	  || gr_register_operand (op, mode));
741}
742
743/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate.  */
744
745int
746grfr_reg_or_8bit_operand (rtx op, enum machine_mode mode)
747{
748  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)))
749	  || GET_CODE (op) == CONSTANT_P_RTX
750	  || grfr_register_operand (op, mode));
751}
752
753/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate
754   operand.  */
755
756int
757gr_reg_or_8bit_adjusted_operand (rtx op, enum machine_mode mode)
758{
759  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
760	  || GET_CODE (op) == CONSTANT_P_RTX
761	  || gr_register_operand (op, mode));
762}
763
764/* Return 1 if OP is a register operand, or is valid for both an 8 bit
765   immediate and an 8 bit adjusted immediate operand.  This is necessary
766   because when we emit a compare, we don't know what the condition will be,
767   so we need the union of the immediates accepted by GT and LT.  */
768
769int
770gr_reg_or_8bit_and_adjusted_operand (rtx op, enum machine_mode mode)
771{
772  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))
773	   && CONST_OK_FOR_L (INTVAL (op)))
774	  || GET_CODE (op) == CONSTANT_P_RTX
775	  || gr_register_operand (op, mode));
776}
777
778/* Return 1 if OP is a register operand, or a 14 bit immediate operand.  */
779
780int
781gr_reg_or_14bit_operand (rtx op, enum machine_mode mode)
782{
783  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
784	  || GET_CODE (op) == CONSTANT_P_RTX
785	  || gr_register_operand (op, mode));
786}
787
788/* Return 1 if OP is a register operand, or a 22 bit immediate operand.  */
789
790int
791gr_reg_or_22bit_operand (rtx op, enum machine_mode mode)
792{
793  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op)))
794	  || GET_CODE (op) == CONSTANT_P_RTX
795	  || gr_register_operand (op, mode));
796}
797
798/* Return 1 if OP is a 6 bit immediate operand.  */
799
800int
801shift_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
802{
803  return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op)))
804	  || GET_CODE (op) == CONSTANT_P_RTX);
805}
806
807/* Return 1 if OP is a 5 bit immediate operand.  */
808
809int
810shift_32bit_count_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
811{
812  return ((GET_CODE (op) == CONST_INT
813	   && (INTVAL (op) >= 0 && INTVAL (op) < 32))
814	  || GET_CODE (op) == CONSTANT_P_RTX);
815}
816
817/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand.  */
818
819int
820shladd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
821{
822  return (GET_CODE (op) == CONST_INT
823	  && (INTVAL (op) == 2 || INTVAL (op) == 4
824	      || INTVAL (op) == 8 || INTVAL (op) == 16));
825}
826
827/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand.  */
828
829int
830fetchadd_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
831{
832  return (GET_CODE (op) == CONST_INT
833          && (INTVAL (op) == -16 || INTVAL (op) == -8 ||
834              INTVAL (op) == -4  || INTVAL (op) == -1 ||
835              INTVAL (op) == 1   || INTVAL (op) == 4  ||
836              INTVAL (op) == 8   || INTVAL (op) == 16));
837}
838
839/* Return 1 if OP is a floating-point constant zero, one, or a register.  */
840
841int
842fr_reg_or_fp01_operand (rtx op, enum machine_mode mode)
843{
844  return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op))
845	  || fr_register_operand (op, mode));
846}
847
848/* Like nonimmediate_operand, but don't allow MEMs that try to use a
849   POST_MODIFY with a REG as displacement.  */
850
851int
852destination_operand (rtx op, enum machine_mode mode)
853{
854  if (! nonimmediate_operand (op, mode))
855    return 0;
856  if (GET_CODE (op) == MEM
857      && GET_CODE (XEXP (op, 0)) == POST_MODIFY
858      && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG)
859    return 0;
860  return 1;
861}
862
863/* Like memory_operand, but don't allow post-increments.  */
864
865int
866not_postinc_memory_operand (rtx op, enum machine_mode mode)
867{
868  return (memory_operand (op, mode)
869	  && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a');
870}
871
872/* Return 1 if this is a comparison operator, which accepts a normal 8-bit
873   signed immediate operand.  */
874
875int
876normal_comparison_operator (register rtx op, enum machine_mode mode)
877{
878  enum rtx_code code = GET_CODE (op);
879  return ((mode == VOIDmode || GET_MODE (op) == mode)
880	  && (code == EQ || code == NE
881	      || code == GT || code == LE || code == GTU || code == LEU));
882}
883
884/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit
885   signed immediate operand.  */
886
887int
888adjusted_comparison_operator (register rtx op, enum machine_mode mode)
889{
890  enum rtx_code code = GET_CODE (op);
891  return ((mode == VOIDmode || GET_MODE (op) == mode)
892	  && (code == LT || code == GE || code == LTU || code == GEU));
893}
894
895/* Return 1 if this is a signed inequality operator.  */
896
897int
898signed_inequality_operator (register rtx op, enum machine_mode mode)
899{
900  enum rtx_code code = GET_CODE (op);
901  return ((mode == VOIDmode || GET_MODE (op) == mode)
902	  && (code == GE || code == GT
903	      || code == LE || code == LT));
904}
905
906/* Return 1 if this operator is valid for predication.  */
907
908int
909predicate_operator (register rtx op, enum machine_mode mode)
910{
911  enum rtx_code code = GET_CODE (op);
912  return ((GET_MODE (op) == mode || mode == VOIDmode)
913	  && (code == EQ || code == NE));
914}
915
916/* Return 1 if this operator can be used in a conditional operation.  */
917
918int
919condop_operator (register rtx op, enum machine_mode mode)
920{
921  enum rtx_code code = GET_CODE (op);
922  return ((GET_MODE (op) == mode || mode == VOIDmode)
923	  && (code == PLUS || code == MINUS || code == AND
924	      || code == IOR || code == XOR));
925}
926
927/* Return 1 if this is the ar.lc register.  */
928
929int
930ar_lc_reg_operand (register rtx op, enum machine_mode mode)
931{
932  return (GET_MODE (op) == DImode
933	  && (mode == DImode || mode == VOIDmode)
934	  && GET_CODE (op) == REG
935	  && REGNO (op) == AR_LC_REGNUM);
936}
937
938/* Return 1 if this is the ar.ccv register.  */
939
940int
941ar_ccv_reg_operand (register rtx op, enum machine_mode mode)
942{
943  return ((GET_MODE (op) == mode || mode == VOIDmode)
944	  && GET_CODE (op) == REG
945	  && REGNO (op) == AR_CCV_REGNUM);
946}
947
948/* Return 1 if this is the ar.pfs register.  */
949
950int
951ar_pfs_reg_operand (register rtx op, enum machine_mode mode)
952{
953  return ((GET_MODE (op) == mode || mode == VOIDmode)
954	  && GET_CODE (op) == REG
955	  && REGNO (op) == AR_PFS_REGNUM);
956}
957
958/* Like general_operand, but don't allow (mem (addressof)).  */
959
960int
961general_xfmode_operand (rtx op, enum machine_mode mode)
962{
963  if (! general_operand (op, mode))
964    return 0;
965  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
966    return 0;
967  return 1;
968}
969
970/* Similarly.  */
971
972int
973destination_xfmode_operand (rtx op, enum machine_mode mode)
974{
975  if (! destination_operand (op, mode))
976    return 0;
977  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF)
978    return 0;
979  return 1;
980}
981
982/* Similarly.  */
983
984int
985xfreg_or_fp01_operand (rtx op, enum machine_mode mode)
986{
987  if (GET_CODE (op) == SUBREG)
988    return 0;
989  return fr_reg_or_fp01_operand (op, mode);
990}
991
992/* Return 1 if OP is valid as a base register in a reg + offset address.  */
993
994int
995basereg_operand (rtx op, enum machine_mode mode)
996{
997  /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected
998     checks from pa.c basereg_operand as well?  Seems to be OK without them
999     in test runs.  */
1000
1001  return (register_operand (op, mode) &&
1002	  REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op));
1003}
1004
1005typedef enum
1006  {
1007    ADDR_AREA_NORMAL,	/* normal address area */
1008    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
1009  }
1010ia64_addr_area;
1011
1012static GTY(()) tree small_ident1;
1013static GTY(()) tree small_ident2;
1014
1015static void
1016init_idents (void)
1017{
1018  if (small_ident1 == 0)
1019    {
1020      small_ident1 = get_identifier ("small");
1021      small_ident2 = get_identifier ("__small__");
1022    }
1023}
1024
1025/* Retrieve the address area that has been chosen for the given decl.  */
1026
1027static ia64_addr_area
1028ia64_get_addr_area (tree decl)
1029{
1030  tree model_attr;
1031
1032  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
1033  if (model_attr)
1034    {
1035      tree id;
1036
1037      init_idents ();
1038      id = TREE_VALUE (TREE_VALUE (model_attr));
1039      if (id == small_ident1 || id == small_ident2)
1040	return ADDR_AREA_SMALL;
1041    }
1042  return ADDR_AREA_NORMAL;
1043}
1044
1045static tree
1046ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1047{
1048  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
1049  ia64_addr_area area;
1050  tree arg, decl = *node;
1051
1052  init_idents ();
1053  arg = TREE_VALUE (args);
1054  if (arg == small_ident1 || arg == small_ident2)
1055    {
1056      addr_area = ADDR_AREA_SMALL;
1057    }
1058  else
1059    {
1060      warning ("invalid argument of `%s' attribute",
1061	       IDENTIFIER_POINTER (name));
1062      *no_add_attrs = true;
1063    }
1064
1065  switch (TREE_CODE (decl))
1066    {
1067    case VAR_DECL:
1068      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
1069	   == FUNCTION_DECL)
1070	  && !TREE_STATIC (decl))
1071	{
1072	  error ("%Jan address area attribute cannot be specified for "
1073		 "local variables", decl, decl);
1074	  *no_add_attrs = true;
1075	}
1076      area = ia64_get_addr_area (decl);
1077      if (area != ADDR_AREA_NORMAL && addr_area != area)
1078	{
1079	  error ("%Jaddress area of '%s' conflicts with previous "
1080		 "declaration", decl, decl);
1081	  *no_add_attrs = true;
1082	}
1083      break;
1084
1085    case FUNCTION_DECL:
1086      error ("%Jaddress area attribute cannot be specified for functions",
1087	     decl, decl);
1088      *no_add_attrs = true;
1089      break;
1090
1091    default:
1092      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1093      *no_add_attrs = true;
1094      break;
1095    }
1096
1097  return NULL_TREE;
1098}
1099
1100static void
1101ia64_encode_addr_area (tree decl, rtx symbol)
1102{
1103  int flags;
1104
1105  flags = SYMBOL_REF_FLAGS (symbol);
1106  switch (ia64_get_addr_area (decl))
1107    {
1108    case ADDR_AREA_NORMAL: break;
1109    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
1110    default: abort ();
1111    }
1112  SYMBOL_REF_FLAGS (symbol) = flags;
1113}
1114
1115static void
1116ia64_encode_section_info (tree decl, rtx rtl, int first)
1117{
1118  default_encode_section_info (decl, rtl, first);
1119
1120  /* Careful not to prod global register variables.  */
1121  if (TREE_CODE (decl) == VAR_DECL
1122      && GET_CODE (DECL_RTL (decl)) == MEM
1123      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
1124      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
1125    ia64_encode_addr_area (decl, XEXP (rtl, 0));
1126}
1127
1128/* Return 1 if the operands of a move are ok.  */
1129
1130int
1131ia64_move_ok (rtx dst, rtx src)
1132{
1133  /* If we're under init_recog_no_volatile, we'll not be able to use
1134     memory_operand.  So check the code directly and don't worry about
1135     the validity of the underlying address, which should have been
1136     checked elsewhere anyway.  */
1137  if (GET_CODE (dst) != MEM)
1138    return 1;
1139  if (GET_CODE (src) == MEM)
1140    return 0;
1141  if (register_operand (src, VOIDmode))
1142    return 1;
1143
1144  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
1145  if (INTEGRAL_MODE_P (GET_MODE (dst)))
1146    return src == const0_rtx;
1147  else
1148    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
1149}
1150
1151int
1152addp4_optimize_ok (rtx op1, rtx op2)
1153{
1154  return (basereg_operand (op1, GET_MODE(op1)) !=
1155	  basereg_operand (op2, GET_MODE(op2)));
1156}
1157
1158/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
1159   Return the length of the field, or <= 0 on failure.  */
1160
1161int
1162ia64_depz_field_mask (rtx rop, rtx rshift)
1163{
1164  unsigned HOST_WIDE_INT op = INTVAL (rop);
1165  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
1166
1167  /* Get rid of the zero bits we're shifting in.  */
1168  op >>= shift;
1169
1170  /* We must now have a solid block of 1's at bit 0.  */
1171  return exact_log2 (op + 1);
1172}
1173
1174/* Expand a symbolic constant load.  */
1175
1176void
1177ia64_expand_load_address (rtx dest, rtx src)
1178{
1179  if (tls_symbolic_operand (src, VOIDmode))
1180    abort ();
1181  if (GET_CODE (dest) != REG)
1182    abort ();
1183
1184  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1185     having to pointer-extend the value afterward.  Other forms of address
1186     computation below are also more natural to compute as 64-bit quantities.
1187     If we've been given an SImode destination register, change it.  */
1188  if (GET_MODE (dest) != Pmode)
1189    dest = gen_rtx_REG (Pmode, REGNO (dest));
1190
1191  if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (src))
1192    {
1193      emit_insn (gen_rtx_SET (VOIDmode, dest, src));
1194      return;
1195    }
1196  else if (TARGET_AUTO_PIC)
1197    {
1198      emit_insn (gen_load_gprel64 (dest, src));
1199      return;
1200    }
1201  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1202    {
1203      emit_insn (gen_load_fptr (dest, src));
1204      return;
1205    }
1206  else if (sdata_symbolic_operand (src, VOIDmode))
1207    {
1208      emit_insn (gen_load_gprel (dest, src));
1209      return;
1210    }
1211
1212  if (GET_CODE (src) == CONST
1213      && GET_CODE (XEXP (src, 0)) == PLUS
1214      && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT
1215      && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0)
1216    {
1217      rtx sym = XEXP (XEXP (src, 0), 0);
1218      HOST_WIDE_INT ofs, hi, lo;
1219
1220      /* Split the offset into a sign extended 14-bit low part
1221	 and a complementary high part.  */
1222      ofs = INTVAL (XEXP (XEXP (src, 0), 1));
1223      lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000;
1224      hi = ofs - lo;
1225
1226      ia64_expand_load_address (dest, plus_constant (sym, hi));
1227      emit_insn (gen_adddi3 (dest, dest, GEN_INT (lo)));
1228    }
1229  else
1230    {
1231      rtx tmp;
1232
1233      tmp = gen_rtx_HIGH (Pmode, src);
1234      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1235      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1236
1237      tmp = gen_rtx_LO_SUM (GET_MODE (dest), dest, src);
1238      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1239    }
1240}
1241
1242static GTY(()) rtx gen_tls_tga;
1243static rtx
1244gen_tls_get_addr (void)
1245{
1246  if (!gen_tls_tga)
1247    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1248  return gen_tls_tga;
1249}
1250
1251static GTY(()) rtx thread_pointer_rtx;
1252static rtx
1253gen_thread_pointer (void)
1254{
1255  if (!thread_pointer_rtx)
1256    {
1257      thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1258      RTX_UNCHANGING_P (thread_pointer_rtx) = 1;
1259    }
1260  return thread_pointer_rtx;
1261}
1262
1263static rtx
1264ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1)
1265{
1266  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1267  rtx orig_op0 = op0;
1268
1269  switch (tls_kind)
1270    {
1271    case TLS_MODEL_GLOBAL_DYNAMIC:
1272      start_sequence ();
1273
1274      tga_op1 = gen_reg_rtx (Pmode);
1275      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1276      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1277      RTX_UNCHANGING_P (tga_op1) = 1;
1278
1279      tga_op2 = gen_reg_rtx (Pmode);
1280      emit_insn (gen_load_ltoff_dtprel (tga_op2, op1));
1281      tga_op2 = gen_rtx_MEM (Pmode, tga_op2);
1282      RTX_UNCHANGING_P (tga_op2) = 1;
1283
1284      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1285					 LCT_CONST, Pmode, 2, tga_op1,
1286					 Pmode, tga_op2, Pmode);
1287
1288      insns = get_insns ();
1289      end_sequence ();
1290
1291      if (GET_MODE (op0) != Pmode)
1292	op0 = tga_ret;
1293      emit_libcall_block (insns, op0, tga_ret, op1);
1294      break;
1295
1296    case TLS_MODEL_LOCAL_DYNAMIC:
1297      /* ??? This isn't the completely proper way to do local-dynamic
1298	 If the call to __tls_get_addr is used only by a single symbol,
1299	 then we should (somehow) move the dtprel to the second arg
1300	 to avoid the extra add.  */
1301      start_sequence ();
1302
1303      tga_op1 = gen_reg_rtx (Pmode);
1304      emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1));
1305      tga_op1 = gen_rtx_MEM (Pmode, tga_op1);
1306      RTX_UNCHANGING_P (tga_op1) = 1;
1307
1308      tga_op2 = const0_rtx;
1309
1310      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1311					 LCT_CONST, Pmode, 2, tga_op1,
1312					 Pmode, tga_op2, Pmode);
1313
1314      insns = get_insns ();
1315      end_sequence ();
1316
1317      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1318				UNSPEC_LD_BASE);
1319      tmp = gen_reg_rtx (Pmode);
1320      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1321
1322      if (!register_operand (op0, Pmode))
1323	op0 = gen_reg_rtx (Pmode);
1324      if (TARGET_TLS64)
1325	{
1326	  emit_insn (gen_load_dtprel (op0, op1));
1327	  emit_insn (gen_adddi3 (op0, tmp, op0));
1328	}
1329      else
1330	emit_insn (gen_add_dtprel (op0, tmp, op1));
1331      break;
1332
1333    case TLS_MODEL_INITIAL_EXEC:
1334      tmp = gen_reg_rtx (Pmode);
1335      emit_insn (gen_load_ltoff_tprel (tmp, op1));
1336      tmp = gen_rtx_MEM (Pmode, tmp);
1337      RTX_UNCHANGING_P (tmp) = 1;
1338      tmp = force_reg (Pmode, tmp);
1339
1340      if (!register_operand (op0, Pmode))
1341	op0 = gen_reg_rtx (Pmode);
1342      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1343      break;
1344
1345    case TLS_MODEL_LOCAL_EXEC:
1346      if (!register_operand (op0, Pmode))
1347	op0 = gen_reg_rtx (Pmode);
1348      if (TARGET_TLS64)
1349	{
1350	  emit_insn (gen_load_tprel (op0, op1));
1351	  emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0));
1352	}
1353      else
1354	emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1));
1355      break;
1356
1357    default:
1358      abort ();
1359    }
1360
1361  if (orig_op0 == op0)
1362    return NULL_RTX;
1363  if (GET_MODE (orig_op0) == Pmode)
1364    return op0;
1365  return gen_lowpart (GET_MODE (orig_op0), op0);
1366}
1367
1368rtx
1369ia64_expand_move (rtx op0, rtx op1)
1370{
1371  enum machine_mode mode = GET_MODE (op0);
1372
1373  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1374    op1 = force_reg (mode, op1);
1375
1376  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1377    {
1378      enum tls_model tls_kind;
1379      if ((tls_kind = tls_symbolic_operand (op1, VOIDmode)))
1380	return ia64_expand_tls_address (tls_kind, op0, op1);
1381
1382      if (!TARGET_NO_PIC && reload_completed)
1383	{
1384	  ia64_expand_load_address (op0, op1);
1385	  return NULL_RTX;
1386	}
1387    }
1388
1389  return op1;
1390}
1391
1392/* Split a move from OP1 to OP0 conditional on COND.  */
1393
1394void
1395ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1396{
1397  rtx insn, first = get_last_insn ();
1398
1399  emit_move_insn (op0, op1);
1400
1401  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1402    if (INSN_P (insn))
1403      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1404					  PATTERN (insn));
1405}
1406
1407/* Split a post-reload TImode or TFmode reference into two DImode
1408   components.  This is made extra difficult by the fact that we do
1409   not get any scratch registers to work with, because reload cannot
1410   be prevented from giving us a scratch that overlaps the register
1411   pair involved.  So instead, when addressing memory, we tweak the
1412   pointer register up and back down with POST_INCs.  Or up and not
1413   back down when we can get away with it.
1414
1415   REVERSED is true when the loads must be done in reversed order
1416   (high word first) for correctness.  DEAD is true when the pointer
1417   dies with the second insn we generate and therefore the second
1418   address must not carry a postmodify.
1419
1420   May return an insn which is to be emitted after the moves.  */
1421
1422static rtx
1423ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1424{
1425  rtx fixup = 0;
1426
1427  switch (GET_CODE (in))
1428    {
1429    case REG:
1430      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1431      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1432      break;
1433
1434    case CONST_INT:
1435    case CONST_DOUBLE:
1436      /* Cannot occur reversed.  */
1437      if (reversed) abort ();
1438
1439      if (GET_MODE (in) != TFmode)
1440	split_double (in, &out[0], &out[1]);
1441      else
1442	/* split_double does not understand how to split a TFmode
1443	   quantity into a pair of DImode constants.  */
1444	{
1445	  REAL_VALUE_TYPE r;
1446	  unsigned HOST_WIDE_INT p[2];
1447	  long l[4];  /* TFmode is 128 bits */
1448
1449	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1450	  real_to_target (l, &r, TFmode);
1451
1452	  if (FLOAT_WORDS_BIG_ENDIAN)
1453	    {
1454	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1455	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1456	    }
1457	  else
1458	    {
1459	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1460	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1461	    }
1462	  out[0] = GEN_INT (p[0]);
1463	  out[1] = GEN_INT (p[1]);
1464	}
1465      break;
1466
1467    case MEM:
1468      {
1469	rtx base = XEXP (in, 0);
1470	rtx offset;
1471
1472	switch (GET_CODE (base))
1473	  {
1474	  case REG:
1475	    if (!reversed)
1476	      {
1477		out[0] = adjust_automodify_address
1478		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1479		out[1] = adjust_automodify_address
1480		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1481	      }
1482	    else
1483	      {
1484		/* Reversal requires a pre-increment, which can only
1485		   be done as a separate insn.  */
1486		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1487		out[0] = adjust_automodify_address
1488		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1489		out[1] = adjust_address (in, DImode, 0);
1490	      }
1491	    break;
1492
1493	  case POST_INC:
1494	    if (reversed || dead) abort ();
1495	    /* Just do the increment in two steps.  */
1496	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1497	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1498	    break;
1499
1500	  case POST_DEC:
1501	    if (reversed || dead) abort ();
1502	    /* Add 8, subtract 24.  */
1503	    base = XEXP (base, 0);
1504	    out[0] = adjust_automodify_address
1505	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1506	    out[1] = adjust_automodify_address
1507	      (in, DImode,
1508	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1509	       8);
1510	    break;
1511
1512	  case POST_MODIFY:
1513	    if (reversed || dead) abort ();
1514	    /* Extract and adjust the modification.  This case is
1515	       trickier than the others, because we might have an
1516	       index register, or we might have a combined offset that
1517	       doesn't fit a signed 9-bit displacement field.  We can
1518	       assume the incoming expression is already legitimate.  */
1519	    offset = XEXP (base, 1);
1520	    base = XEXP (base, 0);
1521
1522	    out[0] = adjust_automodify_address
1523	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1524
1525	    if (GET_CODE (XEXP (offset, 1)) == REG)
1526	      {
1527		/* Can't adjust the postmodify to match.  Emit the
1528		   original, then a separate addition insn.  */
1529		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1530		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1531	      }
1532	    else if (GET_CODE (XEXP (offset, 1)) != CONST_INT)
1533	      abort ();
1534	    else if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1535	      {
1536		/* Again the postmodify cannot be made to match, but
1537		   in this case it's more efficient to get rid of the
1538		   postmodify entirely and fix up with an add insn. */
1539		out[1] = adjust_automodify_address (in, DImode, base, 8);
1540		fixup = gen_adddi3 (base, base,
1541				    GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1542	      }
1543	    else
1544	      {
1545		/* Combined offset still fits in the displacement field.
1546		   (We cannot overflow it at the high end.)  */
1547		out[1] = adjust_automodify_address
1548		  (in, DImode,
1549		   gen_rtx_POST_MODIFY (Pmode, base,
1550		     gen_rtx_PLUS (Pmode, base,
1551				   GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1552		   8);
1553	      }
1554	    break;
1555
1556	  default:
1557	    abort ();
1558	  }
1559	break;
1560      }
1561
1562    default:
1563      abort ();
1564    }
1565
1566  return fixup;
1567}
1568
1569/* Split a TImode or TFmode move instruction after reload.
1570   This is used by *movtf_internal and *movti_internal.  */
1571void
1572ia64_split_tmode_move (rtx operands[])
1573{
1574  rtx in[2], out[2], insn;
1575  rtx fixup[2];
1576  bool dead = false;
1577  bool reversed = false;
1578
1579  /* It is possible for reload to decide to overwrite a pointer with
1580     the value it points to.  In that case we have to do the loads in
1581     the appropriate order so that the pointer is not destroyed too
1582     early.  Also we must not generate a postmodify for that second
1583     load, or rws_access_regno will abort.  */
1584  if (GET_CODE (operands[1]) == MEM
1585      && reg_overlap_mentioned_p (operands[0], operands[1]))
1586    {
1587      rtx base = XEXP (operands[1], 0);
1588      while (GET_CODE (base) != REG)
1589	base = XEXP (base, 0);
1590
1591      if (REGNO (base) == REGNO (operands[0]))
1592	reversed = true;
1593      dead = true;
1594    }
1595  /* Another reason to do the moves in reversed order is if the first
1596     element of the target register pair is also the second element of
1597     the source register pair.  */
1598  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1599      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1600    reversed = true;
1601
1602  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1603  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1604
1605#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1606  if (GET_CODE (EXP) == MEM						\
1607      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1608	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1609	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1610    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1611					  XEXP (XEXP (EXP, 0), 0),	\
1612					  REG_NOTES (INSN))
1613
1614  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1615  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1616  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1617
1618  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1619  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1620  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1621
1622  if (fixup[0])
1623    emit_insn (fixup[0]);
1624  if (fixup[1])
1625    emit_insn (fixup[1]);
1626
1627#undef MAYBE_ADD_REG_INC_NOTE
1628}
1629
1630/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1631   through memory plus an extra GR scratch register.  Except that you can
1632   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1633   SECONDARY_RELOAD_CLASS, but not both.
1634
1635   We got into problems in the first place by allowing a construct like
1636   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1637   This solution attempts to prevent this situation from occurring.  When
1638   we see something like the above, we spill the inner register to memory.  */
1639
1640rtx
1641spill_xfmode_operand (rtx in, int force)
1642{
1643  if (GET_CODE (in) == SUBREG
1644      && GET_MODE (SUBREG_REG (in)) == TImode
1645      && GET_CODE (SUBREG_REG (in)) == REG)
1646    {
1647      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, /*rescan=*/true);
1648      return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1649    }
1650  else if (force && GET_CODE (in) == REG)
1651    {
1652      rtx mem = gen_mem_addressof (in, NULL_TREE, /*rescan=*/true);
1653      return gen_rtx_MEM (XFmode, copy_to_reg (XEXP (mem, 0)));
1654    }
1655  else if (GET_CODE (in) == MEM
1656	   && GET_CODE (XEXP (in, 0)) == ADDRESSOF)
1657    return change_address (in, XFmode, copy_to_reg (XEXP (in, 0)));
1658  else
1659    return in;
1660}
1661
1662/* Emit comparison instruction if necessary, returning the expression
1663   that holds the compare result in the proper mode.  */
1664
1665static GTY(()) rtx cmptf_libfunc;
1666
1667rtx
1668ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1669{
1670  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1671  rtx cmp;
1672
1673  /* If we have a BImode input, then we already have a compare result, and
1674     do not need to emit another comparison.  */
1675  if (GET_MODE (op0) == BImode)
1676    {
1677      if ((code == NE || code == EQ) && op1 == const0_rtx)
1678	cmp = op0;
1679      else
1680	abort ();
1681    }
1682  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1683     magic number as its third argument, that indicates what to do.
1684     The return value is an integer to be compared against zero.  */
1685  else if (TARGET_HPUX && GET_MODE (op0) == TFmode)
1686    {
1687      enum qfcmp_magic {
1688	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1689	QCMP_UNORD = 2,
1690	QCMP_EQ = 4,
1691	QCMP_LT = 8,
1692	QCMP_GT = 16
1693      } magic;
1694      enum rtx_code ncode;
1695      rtx ret, insns;
1696      if (GET_MODE (op1) != TFmode)
1697	abort ();
1698      switch (code)
1699	{
1700	  /* 1 = equal, 0 = not equal.  Equality operators do
1701	     not raise FP_INVALID when given an SNaN operand.  */
1702	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1703	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1704	  /* isunordered() from C99.  */
1705	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1706	  /* Relational operators raise FP_INVALID when given
1707	     an SNaN operand.  */
1708	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1709	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1710	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1711	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1712	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1713	     Expanders for buneq etc. weuld have to be added to ia64.md
1714	     for this to be useful.  */
1715	default: abort ();
1716	}
1717
1718      start_sequence ();
1719
1720      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1721				     op0, TFmode, op1, TFmode,
1722				     GEN_INT (magic), DImode);
1723      cmp = gen_reg_rtx (BImode);
1724      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1725			      gen_rtx_fmt_ee (ncode, BImode,
1726					      ret, const0_rtx)));
1727
1728      insns = get_insns ();
1729      end_sequence ();
1730
1731      emit_libcall_block (insns, cmp, cmp,
1732			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1733      code = NE;
1734    }
1735  else
1736    {
1737      cmp = gen_reg_rtx (BImode);
1738      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1739			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1740      code = NE;
1741    }
1742
1743  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1744}
1745
1746/* Emit the appropriate sequence for a call.  */
1747
1748void
1749ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1750		  int sibcall_p)
1751{
1752  rtx insn, b0;
1753
1754  addr = XEXP (addr, 0);
1755  addr = convert_memory_address (DImode, addr);
1756  b0 = gen_rtx_REG (DImode, R_BR (0));
1757
1758  /* ??? Should do this for functions known to bind local too.  */
1759  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1760    {
1761      if (sibcall_p)
1762	insn = gen_sibcall_nogp (addr);
1763      else if (! retval)
1764	insn = gen_call_nogp (addr, b0);
1765      else
1766	insn = gen_call_value_nogp (retval, addr, b0);
1767      insn = emit_call_insn (insn);
1768    }
1769  else
1770    {
1771      if (sibcall_p)
1772	insn = gen_sibcall_gp (addr);
1773      else if (! retval)
1774	insn = gen_call_gp (addr, b0);
1775      else
1776	insn = gen_call_value_gp (retval, addr, b0);
1777      insn = emit_call_insn (insn);
1778
1779      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1780    }
1781
1782  if (sibcall_p)
1783    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1784}
1785
1786void
1787ia64_reload_gp (void)
1788{
1789  rtx tmp;
1790
1791  if (current_frame_info.reg_save_gp)
1792    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1793  else
1794    {
1795      HOST_WIDE_INT offset;
1796
1797      offset = (current_frame_info.spill_cfa_off
1798	        + current_frame_info.spill_size);
1799      if (frame_pointer_needed)
1800        {
1801          tmp = hard_frame_pointer_rtx;
1802          offset = -offset;
1803        }
1804      else
1805        {
1806          tmp = stack_pointer_rtx;
1807          offset = current_frame_info.total_size - offset;
1808        }
1809
1810      if (CONST_OK_FOR_I (offset))
1811        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1812			       tmp, GEN_INT (offset)));
1813      else
1814        {
1815          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1816          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1817			         pic_offset_table_rtx, tmp));
1818        }
1819
1820      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1821    }
1822
1823  emit_move_insn (pic_offset_table_rtx, tmp);
1824}
1825
1826void
1827ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1828		 rtx scratch_b, int noreturn_p, int sibcall_p)
1829{
1830  rtx insn;
1831  bool is_desc = false;
1832
1833  /* If we find we're calling through a register, then we're actually
1834     calling through a descriptor, so load up the values.  */
1835  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1836    {
1837      rtx tmp;
1838      bool addr_dead_p;
1839
1840      /* ??? We are currently constrained to *not* use peep2, because
1841	 we can legitimately change the global lifetime of the GP
1842	 (in the form of killing where previously live).  This is
1843	 because a call through a descriptor doesn't use the previous
1844	 value of the GP, while a direct call does, and we do not
1845	 commit to either form until the split here.
1846
1847	 That said, this means that we lack precise life info for
1848	 whether ADDR is dead after this call.  This is not terribly
1849	 important, since we can fix things up essentially for free
1850	 with the POST_DEC below, but it's nice to not use it when we
1851	 can immediately tell it's not necessary.  */
1852      addr_dead_p = ((noreturn_p || sibcall_p
1853		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
1854					    REGNO (addr)))
1855		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
1856
1857      /* Load the code address into scratch_b.  */
1858      tmp = gen_rtx_POST_INC (Pmode, addr);
1859      tmp = gen_rtx_MEM (Pmode, tmp);
1860      emit_move_insn (scratch_r, tmp);
1861      emit_move_insn (scratch_b, scratch_r);
1862
1863      /* Load the GP address.  If ADDR is not dead here, then we must
1864	 revert the change made above via the POST_INCREMENT.  */
1865      if (!addr_dead_p)
1866	tmp = gen_rtx_POST_DEC (Pmode, addr);
1867      else
1868	tmp = addr;
1869      tmp = gen_rtx_MEM (Pmode, tmp);
1870      emit_move_insn (pic_offset_table_rtx, tmp);
1871
1872      is_desc = true;
1873      addr = scratch_b;
1874    }
1875
1876  if (sibcall_p)
1877    insn = gen_sibcall_nogp (addr);
1878  else if (retval)
1879    insn = gen_call_value_nogp (retval, addr, retaddr);
1880  else
1881    insn = gen_call_nogp (addr, retaddr);
1882  emit_call_insn (insn);
1883
1884  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
1885    ia64_reload_gp ();
1886}
1887
1888/* Begin the assembly file.  */
1889
1890static void
1891ia64_file_start (void)
1892{
1893  default_file_start ();
1894  emit_safe_across_calls ();
1895}
1896
1897void
1898emit_safe_across_calls (void)
1899{
1900  unsigned int rs, re;
1901  int out_state;
1902
1903  rs = 1;
1904  out_state = 0;
1905  while (1)
1906    {
1907      while (rs < 64 && call_used_regs[PR_REG (rs)])
1908	rs++;
1909      if (rs >= 64)
1910	break;
1911      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
1912	continue;
1913      if (out_state == 0)
1914	{
1915	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
1916	  out_state = 1;
1917	}
1918      else
1919	fputc (',', asm_out_file);
1920      if (re == rs + 1)
1921	fprintf (asm_out_file, "p%u", rs);
1922      else
1923	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
1924      rs = re + 1;
1925    }
1926  if (out_state)
1927    fputc ('\n', asm_out_file);
1928}
1929
1930/* Helper function for ia64_compute_frame_size: find an appropriate general
1931   register to spill some special register to.  SPECIAL_SPILL_MASK contains
1932   bits in GR0 to GR31 that have already been allocated by this routine.
1933   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
1934
1935static int
1936find_gr_spill (int try_locals)
1937{
1938  int regno;
1939
1940  /* If this is a leaf function, first try an otherwise unused
1941     call-clobbered register.  */
1942  if (current_function_is_leaf)
1943    {
1944      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
1945	if (! regs_ever_live[regno]
1946	    && call_used_regs[regno]
1947	    && ! fixed_regs[regno]
1948	    && ! global_regs[regno]
1949	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1950	  {
1951	    current_frame_info.gr_used_mask |= 1 << regno;
1952	    return regno;
1953	  }
1954    }
1955
1956  if (try_locals)
1957    {
1958      regno = current_frame_info.n_local_regs;
1959      /* If there is a frame pointer, then we can't use loc79, because
1960	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
1961	 reg_name switching code in ia64_expand_prologue.  */
1962      if (regno < (80 - frame_pointer_needed))
1963	{
1964	  current_frame_info.n_local_regs = regno + 1;
1965	  return LOC_REG (0) + regno;
1966	}
1967    }
1968
1969  /* Failed to find a general register to spill to.  Must use stack.  */
1970  return 0;
1971}
1972
1973/* In order to make for nice schedules, we try to allocate every temporary
1974   to a different register.  We must of course stay away from call-saved,
1975   fixed, and global registers.  We must also stay away from registers
1976   allocated in current_frame_info.gr_used_mask, since those include regs
1977   used all through the prologue.
1978
1979   Any register allocated here must be used immediately.  The idea is to
1980   aid scheduling, not to solve data flow problems.  */
1981
1982static int last_scratch_gr_reg;
1983
1984static int
1985next_scratch_gr_reg (void)
1986{
1987  int i, regno;
1988
1989  for (i = 0; i < 32; ++i)
1990    {
1991      regno = (last_scratch_gr_reg + i + 1) & 31;
1992      if (call_used_regs[regno]
1993	  && ! fixed_regs[regno]
1994	  && ! global_regs[regno]
1995	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
1996	{
1997	  last_scratch_gr_reg = regno;
1998	  return regno;
1999	}
2000    }
2001
2002  /* There must be _something_ available.  */
2003  abort ();
2004}
2005
2006/* Helper function for ia64_compute_frame_size, called through
2007   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2008
2009static void
2010mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2011{
2012  unsigned int regno = REGNO (reg);
2013  if (regno < 32)
2014    {
2015      unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2016      for (i = 0; i < n; ++i)
2017	current_frame_info.gr_used_mask |= 1 << (regno + i);
2018    }
2019}
2020
2021/* Returns the number of bytes offset between the frame pointer and the stack
2022   pointer for the current function.  SIZE is the number of bytes of space
2023   needed for local variables.  */
2024
2025static void
2026ia64_compute_frame_size (HOST_WIDE_INT size)
2027{
2028  HOST_WIDE_INT total_size;
2029  HOST_WIDE_INT spill_size = 0;
2030  HOST_WIDE_INT extra_spill_size = 0;
2031  HOST_WIDE_INT pretend_args_size;
2032  HARD_REG_SET mask;
2033  int n_spilled = 0;
2034  int spilled_gr_p = 0;
2035  int spilled_fr_p = 0;
2036  unsigned int regno;
2037  int i;
2038
2039  if (current_frame_info.initialized)
2040    return;
2041
2042  memset (&current_frame_info, 0, sizeof current_frame_info);
2043  CLEAR_HARD_REG_SET (mask);
2044
2045  /* Don't allocate scratches to the return register.  */
2046  diddle_return_value (mark_reg_gr_used_mask, NULL);
2047
2048  /* Don't allocate scratches to the EH scratch registers.  */
2049  if (cfun->machine->ia64_eh_epilogue_sp)
2050    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2051  if (cfun->machine->ia64_eh_epilogue_bsp)
2052    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2053
2054  /* Find the size of the register stack frame.  We have only 80 local
2055     registers, because we reserve 8 for the inputs and 8 for the
2056     outputs.  */
2057
2058  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2059     since we'll be adjusting that down later.  */
2060  regno = LOC_REG (78) + ! frame_pointer_needed;
2061  for (; regno >= LOC_REG (0); regno--)
2062    if (regs_ever_live[regno])
2063      break;
2064  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2065
2066  /* For functions marked with the syscall_linkage attribute, we must mark
2067     all eight input registers as in use, so that locals aren't visible to
2068     the caller.  */
2069
2070  if (cfun->machine->n_varargs > 0
2071      || lookup_attribute ("syscall_linkage",
2072			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2073    current_frame_info.n_input_regs = 8;
2074  else
2075    {
2076      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2077	if (regs_ever_live[regno])
2078	  break;
2079      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2080    }
2081
2082  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2083    if (regs_ever_live[regno])
2084      break;
2085  i = regno - OUT_REG (0) + 1;
2086
2087  /* When -p profiling, we need one output register for the mcount argument.
2088     Likewise for -a profiling for the bb_init_func argument.  For -ax
2089     profiling, we need two output registers for the two bb_init_trace_func
2090     arguments.  */
2091  if (current_function_profile)
2092    i = MAX (i, 1);
2093  current_frame_info.n_output_regs = i;
2094
2095  /* ??? No rotating register support yet.  */
2096  current_frame_info.n_rotate_regs = 0;
2097
2098  /* Discover which registers need spilling, and how much room that
2099     will take.  Begin with floating point and general registers,
2100     which will always wind up on the stack.  */
2101
2102  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2103    if (regs_ever_live[regno] && ! call_used_regs[regno])
2104      {
2105	SET_HARD_REG_BIT (mask, regno);
2106	spill_size += 16;
2107	n_spilled += 1;
2108	spilled_fr_p = 1;
2109      }
2110
2111  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2112    if (regs_ever_live[regno] && ! call_used_regs[regno])
2113      {
2114	SET_HARD_REG_BIT (mask, regno);
2115	spill_size += 8;
2116	n_spilled += 1;
2117	spilled_gr_p = 1;
2118      }
2119
2120  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2121    if (regs_ever_live[regno] && ! call_used_regs[regno])
2122      {
2123	SET_HARD_REG_BIT (mask, regno);
2124	spill_size += 8;
2125	n_spilled += 1;
2126      }
2127
2128  /* Now come all special registers that might get saved in other
2129     general registers.  */
2130
2131  if (frame_pointer_needed)
2132    {
2133      current_frame_info.reg_fp = find_gr_spill (1);
2134      /* If we did not get a register, then we take LOC79.  This is guaranteed
2135	 to be free, even if regs_ever_live is already set, because this is
2136	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2137	 as we don't count loc79 above.  */
2138      if (current_frame_info.reg_fp == 0)
2139	{
2140	  current_frame_info.reg_fp = LOC_REG (79);
2141	  current_frame_info.n_local_regs++;
2142	}
2143    }
2144
2145  if (! current_function_is_leaf)
2146    {
2147      /* Emit a save of BR0 if we call other functions.  Do this even
2148	 if this function doesn't return, as EH depends on this to be
2149	 able to unwind the stack.  */
2150      SET_HARD_REG_BIT (mask, BR_REG (0));
2151
2152      current_frame_info.reg_save_b0 = find_gr_spill (1);
2153      if (current_frame_info.reg_save_b0 == 0)
2154	{
2155	  spill_size += 8;
2156	  n_spilled += 1;
2157	}
2158
2159      /* Similarly for ar.pfs.  */
2160      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2161      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2162      if (current_frame_info.reg_save_ar_pfs == 0)
2163	{
2164	  extra_spill_size += 8;
2165	  n_spilled += 1;
2166	}
2167
2168      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2169	 registers are clobbered, so we fall back to the stack.  */
2170      current_frame_info.reg_save_gp
2171	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2172      if (current_frame_info.reg_save_gp == 0)
2173	{
2174	  SET_HARD_REG_BIT (mask, GR_REG (1));
2175	  spill_size += 8;
2176	  n_spilled += 1;
2177	}
2178    }
2179  else
2180    {
2181      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2182	{
2183	  SET_HARD_REG_BIT (mask, BR_REG (0));
2184	  spill_size += 8;
2185	  n_spilled += 1;
2186	}
2187
2188      if (regs_ever_live[AR_PFS_REGNUM])
2189	{
2190	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2191	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2192	  if (current_frame_info.reg_save_ar_pfs == 0)
2193	    {
2194	      extra_spill_size += 8;
2195	      n_spilled += 1;
2196	    }
2197	}
2198    }
2199
2200  /* Unwind descriptor hackery: things are most efficient if we allocate
2201     consecutive GR save registers for RP, PFS, FP in that order. However,
2202     it is absolutely critical that FP get the only hard register that's
2203     guaranteed to be free, so we allocated it first.  If all three did
2204     happen to be allocated hard regs, and are consecutive, rearrange them
2205     into the preferred order now.  */
2206  if (current_frame_info.reg_fp != 0
2207      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2208      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2209    {
2210      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2211      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2212      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2213    }
2214
2215  /* See if we need to store the predicate register block.  */
2216  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2217    if (regs_ever_live[regno] && ! call_used_regs[regno])
2218      break;
2219  if (regno <= PR_REG (63))
2220    {
2221      SET_HARD_REG_BIT (mask, PR_REG (0));
2222      current_frame_info.reg_save_pr = find_gr_spill (1);
2223      if (current_frame_info.reg_save_pr == 0)
2224	{
2225	  extra_spill_size += 8;
2226	  n_spilled += 1;
2227	}
2228
2229      /* ??? Mark them all as used so that register renaming and such
2230	 are free to use them.  */
2231      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2232	regs_ever_live[regno] = 1;
2233    }
2234
2235  /* If we're forced to use st8.spill, we're forced to save and restore
2236     ar.unat as well.  The check for existing liveness allows inline asm
2237     to touch ar.unat.  */
2238  if (spilled_gr_p || cfun->machine->n_varargs
2239      || regs_ever_live[AR_UNAT_REGNUM])
2240    {
2241      regs_ever_live[AR_UNAT_REGNUM] = 1;
2242      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2243      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2244      if (current_frame_info.reg_save_ar_unat == 0)
2245	{
2246	  extra_spill_size += 8;
2247	  n_spilled += 1;
2248	}
2249    }
2250
2251  if (regs_ever_live[AR_LC_REGNUM])
2252    {
2253      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2254      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2255      if (current_frame_info.reg_save_ar_lc == 0)
2256	{
2257	  extra_spill_size += 8;
2258	  n_spilled += 1;
2259	}
2260    }
2261
2262  /* If we have an odd number of words of pretend arguments written to
2263     the stack, then the FR save area will be unaligned.  We round the
2264     size of this area up to keep things 16 byte aligned.  */
2265  if (spilled_fr_p)
2266    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2267  else
2268    pretend_args_size = current_function_pretend_args_size;
2269
2270  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2271		+ current_function_outgoing_args_size);
2272  total_size = IA64_STACK_ALIGN (total_size);
2273
2274  /* We always use the 16-byte scratch area provided by the caller, but
2275     if we are a leaf function, there's no one to which we need to provide
2276     a scratch area.  */
2277  if (current_function_is_leaf)
2278    total_size = MAX (0, total_size - 16);
2279
2280  current_frame_info.total_size = total_size;
2281  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2282  current_frame_info.spill_size = spill_size;
2283  current_frame_info.extra_spill_size = extra_spill_size;
2284  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2285  current_frame_info.n_spilled = n_spilled;
2286  current_frame_info.initialized = reload_completed;
2287}
2288
2289/* Compute the initial difference between the specified pair of registers.  */
2290
2291HOST_WIDE_INT
2292ia64_initial_elimination_offset (int from, int to)
2293{
2294  HOST_WIDE_INT offset;
2295
2296  ia64_compute_frame_size (get_frame_size ());
2297  switch (from)
2298    {
2299    case FRAME_POINTER_REGNUM:
2300      if (to == HARD_FRAME_POINTER_REGNUM)
2301	{
2302	  if (current_function_is_leaf)
2303	    offset = -current_frame_info.total_size;
2304	  else
2305	    offset = -(current_frame_info.total_size
2306		       - current_function_outgoing_args_size - 16);
2307	}
2308      else if (to == STACK_POINTER_REGNUM)
2309	{
2310	  if (current_function_is_leaf)
2311	    offset = 0;
2312	  else
2313	    offset = 16 + current_function_outgoing_args_size;
2314	}
2315      else
2316	abort ();
2317      break;
2318
2319    case ARG_POINTER_REGNUM:
2320      /* Arguments start above the 16 byte save area, unless stdarg
2321	 in which case we store through the 16 byte save area.  */
2322      if (to == HARD_FRAME_POINTER_REGNUM)
2323	offset = 16 - current_function_pretend_args_size;
2324      else if (to == STACK_POINTER_REGNUM)
2325	offset = (current_frame_info.total_size
2326		  + 16 - current_function_pretend_args_size);
2327      else
2328	abort ();
2329      break;
2330
2331    default:
2332      abort ();
2333    }
2334
2335  return offset;
2336}
2337
2338/* If there are more than a trivial number of register spills, we use
2339   two interleaved iterators so that we can get two memory references
2340   per insn group.
2341
2342   In order to simplify things in the prologue and epilogue expanders,
2343   we use helper functions to fix up the memory references after the
2344   fact with the appropriate offsets to a POST_MODIFY memory mode.
2345   The following data structure tracks the state of the two iterators
2346   while insns are being emitted.  */
2347
2348struct spill_fill_data
2349{
2350  rtx init_after;		/* point at which to emit initializations */
2351  rtx init_reg[2];		/* initial base register */
2352  rtx iter_reg[2];		/* the iterator registers */
2353  rtx *prev_addr[2];		/* address of last memory use */
2354  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2355  HOST_WIDE_INT prev_off[2];	/* last offset */
2356  int n_iter;			/* number of iterators in use */
2357  int next_iter;		/* next iterator to use */
2358  unsigned int save_gr_used_mask;
2359};
2360
2361static struct spill_fill_data spill_fill_data;
2362
2363static void
2364setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2365{
2366  int i;
2367
2368  spill_fill_data.init_after = get_last_insn ();
2369  spill_fill_data.init_reg[0] = init_reg;
2370  spill_fill_data.init_reg[1] = init_reg;
2371  spill_fill_data.prev_addr[0] = NULL;
2372  spill_fill_data.prev_addr[1] = NULL;
2373  spill_fill_data.prev_insn[0] = NULL;
2374  spill_fill_data.prev_insn[1] = NULL;
2375  spill_fill_data.prev_off[0] = cfa_off;
2376  spill_fill_data.prev_off[1] = cfa_off;
2377  spill_fill_data.next_iter = 0;
2378  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2379
2380  spill_fill_data.n_iter = 1 + (n_spills > 2);
2381  for (i = 0; i < spill_fill_data.n_iter; ++i)
2382    {
2383      int regno = next_scratch_gr_reg ();
2384      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2385      current_frame_info.gr_used_mask |= 1 << regno;
2386    }
2387}
2388
2389static void
2390finish_spill_pointers (void)
2391{
2392  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2393}
2394
2395static rtx
2396spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2397{
2398  int iter = spill_fill_data.next_iter;
2399  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2400  rtx disp_rtx = GEN_INT (disp);
2401  rtx mem;
2402
2403  if (spill_fill_data.prev_addr[iter])
2404    {
2405      if (CONST_OK_FOR_N (disp))
2406	{
2407	  *spill_fill_data.prev_addr[iter]
2408	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2409				   gen_rtx_PLUS (DImode,
2410						 spill_fill_data.iter_reg[iter],
2411						 disp_rtx));
2412	  REG_NOTES (spill_fill_data.prev_insn[iter])
2413	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2414				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2415	}
2416      else
2417	{
2418	  /* ??? Could use register post_modify for loads.  */
2419	  if (! CONST_OK_FOR_I (disp))
2420	    {
2421	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2422	      emit_move_insn (tmp, disp_rtx);
2423	      disp_rtx = tmp;
2424	    }
2425	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2426				 spill_fill_data.iter_reg[iter], disp_rtx));
2427	}
2428    }
2429  /* Micro-optimization: if we've created a frame pointer, it's at
2430     CFA 0, which may allow the real iterator to be initialized lower,
2431     slightly increasing parallelism.  Also, if there are few saves
2432     it may eliminate the iterator entirely.  */
2433  else if (disp == 0
2434	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2435	   && frame_pointer_needed)
2436    {
2437      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2438      set_mem_alias_set (mem, get_varargs_alias_set ());
2439      return mem;
2440    }
2441  else
2442    {
2443      rtx seq, insn;
2444
2445      if (disp == 0)
2446	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2447			 spill_fill_data.init_reg[iter]);
2448      else
2449	{
2450	  start_sequence ();
2451
2452	  if (! CONST_OK_FOR_I (disp))
2453	    {
2454	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2455	      emit_move_insn (tmp, disp_rtx);
2456	      disp_rtx = tmp;
2457	    }
2458
2459	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2460				 spill_fill_data.init_reg[iter],
2461				 disp_rtx));
2462
2463	  seq = get_insns ();
2464	  end_sequence ();
2465	}
2466
2467      /* Careful for being the first insn in a sequence.  */
2468      if (spill_fill_data.init_after)
2469	insn = emit_insn_after (seq, spill_fill_data.init_after);
2470      else
2471	{
2472	  rtx first = get_insns ();
2473	  if (first)
2474	    insn = emit_insn_before (seq, first);
2475	  else
2476	    insn = emit_insn (seq);
2477	}
2478      spill_fill_data.init_after = insn;
2479
2480      /* If DISP is 0, we may or may not have a further adjustment
2481	 afterward.  If we do, then the load/store insn may be modified
2482	 to be a post-modify.  If we don't, then this copy may be
2483	 eliminated by copyprop_hardreg_forward, which makes this
2484	 insn garbage, which runs afoul of the sanity check in
2485	 propagate_one_insn.  So mark this insn as legal to delete.  */
2486      if (disp == 0)
2487	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2488					     REG_NOTES (insn));
2489    }
2490
2491  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2492
2493  /* ??? Not all of the spills are for varargs, but some of them are.
2494     The rest of the spills belong in an alias set of their own.  But
2495     it doesn't actually hurt to include them here.  */
2496  set_mem_alias_set (mem, get_varargs_alias_set ());
2497
2498  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2499  spill_fill_data.prev_off[iter] = cfa_off;
2500
2501  if (++iter >= spill_fill_data.n_iter)
2502    iter = 0;
2503  spill_fill_data.next_iter = iter;
2504
2505  return mem;
2506}
2507
2508static void
2509do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2510	  rtx frame_reg)
2511{
2512  int iter = spill_fill_data.next_iter;
2513  rtx mem, insn;
2514
2515  mem = spill_restore_mem (reg, cfa_off);
2516  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2517  spill_fill_data.prev_insn[iter] = insn;
2518
2519  if (frame_reg)
2520    {
2521      rtx base;
2522      HOST_WIDE_INT off;
2523
2524      RTX_FRAME_RELATED_P (insn) = 1;
2525
2526      /* Don't even pretend that the unwind code can intuit its way
2527	 through a pair of interleaved post_modify iterators.  Just
2528	 provide the correct answer.  */
2529
2530      if (frame_pointer_needed)
2531	{
2532	  base = hard_frame_pointer_rtx;
2533	  off = - cfa_off;
2534	}
2535      else
2536	{
2537	  base = stack_pointer_rtx;
2538	  off = current_frame_info.total_size - cfa_off;
2539	}
2540
2541      REG_NOTES (insn)
2542	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2543		gen_rtx_SET (VOIDmode,
2544			     gen_rtx_MEM (GET_MODE (reg),
2545					  plus_constant (base, off)),
2546			     frame_reg),
2547		REG_NOTES (insn));
2548    }
2549}
2550
2551static void
2552do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2553{
2554  int iter = spill_fill_data.next_iter;
2555  rtx insn;
2556
2557  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2558				GEN_INT (cfa_off)));
2559  spill_fill_data.prev_insn[iter] = insn;
2560}
2561
2562/* Wrapper functions that discards the CONST_INT spill offset.  These
2563   exist so that we can give gr_spill/gr_fill the offset they need and
2564   use a consistent function interface.  */
2565
2566static rtx
2567gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2568{
2569  return gen_movdi (dest, src);
2570}
2571
2572static rtx
2573gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2574{
2575  return gen_fr_spill (dest, src);
2576}
2577
2578static rtx
2579gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2580{
2581  return gen_fr_restore (dest, src);
2582}
2583
2584/* Called after register allocation to add any instructions needed for the
2585   prologue.  Using a prologue insn is favored compared to putting all of the
2586   instructions in output_function_prologue(), since it allows the scheduler
2587   to intermix instructions with the saves of the caller saved registers.  In
2588   some cases, it might be necessary to emit a barrier instruction as the last
2589   insn to prevent such scheduling.
2590
2591   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2592   so that the debug info generation code can handle them properly.
2593
2594   The register save area is layed out like so:
2595   cfa+16
2596	[ varargs spill area ]
2597	[ fr register spill area ]
2598	[ br register spill area ]
2599	[ ar register spill area ]
2600	[ pr register spill area ]
2601	[ gr register spill area ] */
2602
2603/* ??? Get inefficient code when the frame size is larger than can fit in an
2604   adds instruction.  */
2605
2606void
2607ia64_expand_prologue (void)
2608{
2609  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2610  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2611  rtx reg, alt_reg;
2612
2613  ia64_compute_frame_size (get_frame_size ());
2614  last_scratch_gr_reg = 15;
2615
2616  /* If there is no epilogue, then we don't need some prologue insns.
2617     We need to avoid emitting the dead prologue insns, because flow
2618     will complain about them.  */
2619  if (optimize)
2620    {
2621      edge e;
2622
2623      for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next)
2624	if ((e->flags & EDGE_FAKE) == 0
2625	    && (e->flags & EDGE_FALLTHRU) != 0)
2626	  break;
2627      epilogue_p = (e != NULL);
2628    }
2629  else
2630    epilogue_p = 1;
2631
2632  /* Set the local, input, and output register names.  We need to do this
2633     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2634     half.  If we use in/loc/out register names, then we get assembler errors
2635     in crtn.S because there is no alloc insn or regstk directive in there.  */
2636  if (! TARGET_REG_NAMES)
2637    {
2638      int inputs = current_frame_info.n_input_regs;
2639      int locals = current_frame_info.n_local_regs;
2640      int outputs = current_frame_info.n_output_regs;
2641
2642      for (i = 0; i < inputs; i++)
2643	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2644      for (i = 0; i < locals; i++)
2645	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2646      for (i = 0; i < outputs; i++)
2647	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2648    }
2649
2650  /* Set the frame pointer register name.  The regnum is logically loc79,
2651     but of course we'll not have allocated that many locals.  Rather than
2652     worrying about renumbering the existing rtxs, we adjust the name.  */
2653  /* ??? This code means that we can never use one local register when
2654     there is a frame pointer.  loc79 gets wasted in this case, as it is
2655     renamed to a register that will never be used.  See also the try_locals
2656     code in find_gr_spill.  */
2657  if (current_frame_info.reg_fp)
2658    {
2659      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2660      reg_names[HARD_FRAME_POINTER_REGNUM]
2661	= reg_names[current_frame_info.reg_fp];
2662      reg_names[current_frame_info.reg_fp] = tmp;
2663    }
2664
2665  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2666  if (current_frame_info.n_local_regs == 0
2667      && current_frame_info.n_output_regs == 0
2668      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2669      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2670    {
2671      /* If there is no alloc, but there are input registers used, then we
2672	 need a .regstk directive.  */
2673      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2674      ar_pfs_save_reg = NULL_RTX;
2675    }
2676  else
2677    {
2678      current_frame_info.need_regstk = 0;
2679
2680      if (current_frame_info.reg_save_ar_pfs)
2681	regno = current_frame_info.reg_save_ar_pfs;
2682      else
2683	regno = next_scratch_gr_reg ();
2684      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2685
2686      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2687				   GEN_INT (current_frame_info.n_input_regs),
2688				   GEN_INT (current_frame_info.n_local_regs),
2689				   GEN_INT (current_frame_info.n_output_regs),
2690				   GEN_INT (current_frame_info.n_rotate_regs)));
2691      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2692    }
2693
2694  /* Set up frame pointer, stack pointer, and spill iterators.  */
2695
2696  n_varargs = cfun->machine->n_varargs;
2697  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2698			stack_pointer_rtx, 0);
2699
2700  if (frame_pointer_needed)
2701    {
2702      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2703      RTX_FRAME_RELATED_P (insn) = 1;
2704    }
2705
2706  if (current_frame_info.total_size != 0)
2707    {
2708      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2709      rtx offset;
2710
2711      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2712	offset = frame_size_rtx;
2713      else
2714	{
2715	  regno = next_scratch_gr_reg ();
2716	  offset = gen_rtx_REG (DImode, regno);
2717	  emit_move_insn (offset, frame_size_rtx);
2718	}
2719
2720      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
2721				    stack_pointer_rtx, offset));
2722
2723      if (! frame_pointer_needed)
2724	{
2725	  RTX_FRAME_RELATED_P (insn) = 1;
2726	  if (GET_CODE (offset) != CONST_INT)
2727	    {
2728	      REG_NOTES (insn)
2729		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2730			gen_rtx_SET (VOIDmode,
2731				     stack_pointer_rtx,
2732				     gen_rtx_PLUS (DImode,
2733						   stack_pointer_rtx,
2734						   frame_size_rtx)),
2735			REG_NOTES (insn));
2736	    }
2737	}
2738
2739      /* ??? At this point we must generate a magic insn that appears to
2740	 modify the stack pointer, the frame pointer, and all spill
2741	 iterators.  This would allow the most scheduling freedom.  For
2742	 now, just hard stop.  */
2743      emit_insn (gen_blockage ());
2744    }
2745
2746  /* Must copy out ar.unat before doing any integer spills.  */
2747  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
2748    {
2749      if (current_frame_info.reg_save_ar_unat)
2750	ar_unat_save_reg
2751	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
2752      else
2753	{
2754	  alt_regno = next_scratch_gr_reg ();
2755	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
2756	  current_frame_info.gr_used_mask |= 1 << alt_regno;
2757	}
2758
2759      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2760      insn = emit_move_insn (ar_unat_save_reg, reg);
2761      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
2762
2763      /* Even if we're not going to generate an epilogue, we still
2764	 need to save the register so that EH works.  */
2765      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
2766	emit_insn (gen_prologue_use (ar_unat_save_reg));
2767    }
2768  else
2769    ar_unat_save_reg = NULL_RTX;
2770
2771  /* Spill all varargs registers.  Do this before spilling any GR registers,
2772     since we want the UNAT bits for the GR registers to override the UNAT
2773     bits from varargs, which we don't care about.  */
2774
2775  cfa_off = -16;
2776  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
2777    {
2778      reg = gen_rtx_REG (DImode, regno);
2779      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
2780    }
2781
2782  /* Locate the bottom of the register save area.  */
2783  cfa_off = (current_frame_info.spill_cfa_off
2784	     + current_frame_info.spill_size
2785	     + current_frame_info.extra_spill_size);
2786
2787  /* Save the predicate register block either in a register or in memory.  */
2788  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2789    {
2790      reg = gen_rtx_REG (DImode, PR_REG (0));
2791      if (current_frame_info.reg_save_pr != 0)
2792	{
2793	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2794	  insn = emit_move_insn (alt_reg, reg);
2795
2796	  /* ??? Denote pr spill/fill by a DImode move that modifies all
2797	     64 hard registers.  */
2798	  RTX_FRAME_RELATED_P (insn) = 1;
2799	  REG_NOTES (insn)
2800	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2801			gen_rtx_SET (VOIDmode, alt_reg, reg),
2802			REG_NOTES (insn));
2803
2804	  /* Even if we're not going to generate an epilogue, we still
2805	     need to save the register so that EH works.  */
2806	  if (! epilogue_p)
2807	    emit_insn (gen_prologue_use (alt_reg));
2808	}
2809      else
2810	{
2811	  alt_regno = next_scratch_gr_reg ();
2812	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2813	  insn = emit_move_insn (alt_reg, reg);
2814	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2815	  cfa_off -= 8;
2816	}
2817    }
2818
2819  /* Handle AR regs in numerical order.  All of them get special handling.  */
2820  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
2821      && current_frame_info.reg_save_ar_unat == 0)
2822    {
2823      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
2824      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
2825      cfa_off -= 8;
2826    }
2827
2828  /* The alloc insn already copied ar.pfs into a general register.  The
2829     only thing we have to do now is copy that register to a stack slot
2830     if we'd not allocated a local register for the job.  */
2831  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
2832      && current_frame_info.reg_save_ar_pfs == 0)
2833    {
2834      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
2835      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
2836      cfa_off -= 8;
2837    }
2838
2839  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
2840    {
2841      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
2842      if (current_frame_info.reg_save_ar_lc != 0)
2843	{
2844	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
2845	  insn = emit_move_insn (alt_reg, reg);
2846	  RTX_FRAME_RELATED_P (insn) = 1;
2847
2848	  /* Even if we're not going to generate an epilogue, we still
2849	     need to save the register so that EH works.  */
2850	  if (! epilogue_p)
2851	    emit_insn (gen_prologue_use (alt_reg));
2852	}
2853      else
2854	{
2855	  alt_regno = next_scratch_gr_reg ();
2856	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2857	  emit_move_insn (alt_reg, reg);
2858	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2859	  cfa_off -= 8;
2860	}
2861    }
2862
2863  if (current_frame_info.reg_save_gp)
2864    {
2865      insn = emit_move_insn (gen_rtx_REG (DImode,
2866					  current_frame_info.reg_save_gp),
2867			     pic_offset_table_rtx);
2868      /* We don't know for sure yet if this is actually needed, since
2869	 we've not split the PIC call patterns.  If all of the calls
2870	 are indirect, and not followed by any uses of the gp, then
2871	 this save is dead.  Allow it to go away.  */
2872      REG_NOTES (insn)
2873	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
2874    }
2875
2876  /* We should now be at the base of the gr/br/fr spill area.  */
2877  if (cfa_off != (current_frame_info.spill_cfa_off
2878		  + current_frame_info.spill_size))
2879    abort ();
2880
2881  /* Spill all general registers.  */
2882  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
2883    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2884      {
2885	reg = gen_rtx_REG (DImode, regno);
2886	do_spill (gen_gr_spill, reg, cfa_off, reg);
2887	cfa_off -= 8;
2888      }
2889
2890  /* Handle BR0 specially -- it may be getting stored permanently in
2891     some GR register.  */
2892  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
2893    {
2894      reg = gen_rtx_REG (DImode, BR_REG (0));
2895      if (current_frame_info.reg_save_b0 != 0)
2896	{
2897	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
2898	  insn = emit_move_insn (alt_reg, reg);
2899	  RTX_FRAME_RELATED_P (insn) = 1;
2900
2901	  /* Even if we're not going to generate an epilogue, we still
2902	     need to save the register so that EH works.  */
2903	  if (! epilogue_p)
2904	    emit_insn (gen_prologue_use (alt_reg));
2905	}
2906      else
2907	{
2908	  alt_regno = next_scratch_gr_reg ();
2909	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2910	  emit_move_insn (alt_reg, reg);
2911	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2912	  cfa_off -= 8;
2913	}
2914    }
2915
2916  /* Spill the rest of the BR registers.  */
2917  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
2918    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2919      {
2920	alt_regno = next_scratch_gr_reg ();
2921	alt_reg = gen_rtx_REG (DImode, alt_regno);
2922	reg = gen_rtx_REG (DImode, regno);
2923	emit_move_insn (alt_reg, reg);
2924	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
2925	cfa_off -= 8;
2926      }
2927
2928  /* Align the frame and spill all FR registers.  */
2929  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
2930    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
2931      {
2932        if (cfa_off & 15)
2933	  abort ();
2934	reg = gen_rtx_REG (XFmode, regno);
2935	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
2936	cfa_off -= 16;
2937      }
2938
2939  if (cfa_off != current_frame_info.spill_cfa_off)
2940    abort ();
2941
2942  finish_spill_pointers ();
2943}
2944
2945/* Called after register allocation to add any instructions needed for the
2946   epilogue.  Using an epilogue insn is favored compared to putting all of the
2947   instructions in output_function_prologue(), since it allows the scheduler
2948   to intermix instructions with the saves of the caller saved registers.  In
2949   some cases, it might be necessary to emit a barrier instruction as the last
2950   insn to prevent such scheduling.  */
2951
2952void
2953ia64_expand_epilogue (int sibcall_p)
2954{
2955  rtx insn, reg, alt_reg, ar_unat_save_reg;
2956  int regno, alt_regno, cfa_off;
2957
2958  ia64_compute_frame_size (get_frame_size ());
2959
2960  /* If there is a frame pointer, then we use it instead of the stack
2961     pointer, so that the stack pointer does not need to be valid when
2962     the epilogue starts.  See EXIT_IGNORE_STACK.  */
2963  if (frame_pointer_needed)
2964    setup_spill_pointers (current_frame_info.n_spilled,
2965			  hard_frame_pointer_rtx, 0);
2966  else
2967    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
2968			  current_frame_info.total_size);
2969
2970  if (current_frame_info.total_size != 0)
2971    {
2972      /* ??? At this point we must generate a magic insn that appears to
2973         modify the spill iterators and the frame pointer.  This would
2974	 allow the most scheduling freedom.  For now, just hard stop.  */
2975      emit_insn (gen_blockage ());
2976    }
2977
2978  /* Locate the bottom of the register save area.  */
2979  cfa_off = (current_frame_info.spill_cfa_off
2980	     + current_frame_info.spill_size
2981	     + current_frame_info.extra_spill_size);
2982
2983  /* Restore the predicate registers.  */
2984  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
2985    {
2986      if (current_frame_info.reg_save_pr != 0)
2987	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
2988      else
2989	{
2990	  alt_regno = next_scratch_gr_reg ();
2991	  alt_reg = gen_rtx_REG (DImode, alt_regno);
2992	  do_restore (gen_movdi_x, alt_reg, cfa_off);
2993	  cfa_off -= 8;
2994	}
2995      reg = gen_rtx_REG (DImode, PR_REG (0));
2996      emit_move_insn (reg, alt_reg);
2997    }
2998
2999  /* Restore the application registers.  */
3000
3001  /* Load the saved unat from the stack, but do not restore it until
3002     after the GRs have been restored.  */
3003  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3004    {
3005      if (current_frame_info.reg_save_ar_unat != 0)
3006        ar_unat_save_reg
3007	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3008      else
3009	{
3010	  alt_regno = next_scratch_gr_reg ();
3011	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3012	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3013	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3014	  cfa_off -= 8;
3015	}
3016    }
3017  else
3018    ar_unat_save_reg = NULL_RTX;
3019
3020  if (current_frame_info.reg_save_ar_pfs != 0)
3021    {
3022      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3023      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3024      emit_move_insn (reg, alt_reg);
3025    }
3026  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3027    {
3028      alt_regno = next_scratch_gr_reg ();
3029      alt_reg = gen_rtx_REG (DImode, alt_regno);
3030      do_restore (gen_movdi_x, alt_reg, cfa_off);
3031      cfa_off -= 8;
3032      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3033      emit_move_insn (reg, alt_reg);
3034    }
3035
3036  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3037    {
3038      if (current_frame_info.reg_save_ar_lc != 0)
3039	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3040      else
3041	{
3042	  alt_regno = next_scratch_gr_reg ();
3043	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3044	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3045	  cfa_off -= 8;
3046	}
3047      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3048      emit_move_insn (reg, alt_reg);
3049    }
3050
3051  /* We should now be at the base of the gr/br/fr spill area.  */
3052  if (cfa_off != (current_frame_info.spill_cfa_off
3053		  + current_frame_info.spill_size))
3054    abort ();
3055
3056  /* The GP may be stored on the stack in the prologue, but it's
3057     never restored in the epilogue.  Skip the stack slot.  */
3058  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3059    cfa_off -= 8;
3060
3061  /* Restore all general registers.  */
3062  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3063    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3064      {
3065	reg = gen_rtx_REG (DImode, regno);
3066	do_restore (gen_gr_restore, reg, cfa_off);
3067	cfa_off -= 8;
3068      }
3069
3070  /* Restore the branch registers.  Handle B0 specially, as it may
3071     have gotten stored in some GR register.  */
3072  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3073    {
3074      if (current_frame_info.reg_save_b0 != 0)
3075	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3076      else
3077	{
3078	  alt_regno = next_scratch_gr_reg ();
3079	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3080	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3081	  cfa_off -= 8;
3082	}
3083      reg = gen_rtx_REG (DImode, BR_REG (0));
3084      emit_move_insn (reg, alt_reg);
3085    }
3086
3087  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3088    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3089      {
3090	alt_regno = next_scratch_gr_reg ();
3091	alt_reg = gen_rtx_REG (DImode, alt_regno);
3092	do_restore (gen_movdi_x, alt_reg, cfa_off);
3093	cfa_off -= 8;
3094	reg = gen_rtx_REG (DImode, regno);
3095	emit_move_insn (reg, alt_reg);
3096      }
3097
3098  /* Restore floating point registers.  */
3099  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3100    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3101      {
3102        if (cfa_off & 15)
3103	  abort ();
3104	reg = gen_rtx_REG (XFmode, regno);
3105	do_restore (gen_fr_restore_x, reg, cfa_off);
3106	cfa_off -= 16;
3107      }
3108
3109  /* Restore ar.unat for real.  */
3110  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3111    {
3112      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3113      emit_move_insn (reg, ar_unat_save_reg);
3114    }
3115
3116  if (cfa_off != current_frame_info.spill_cfa_off)
3117    abort ();
3118
3119  finish_spill_pointers ();
3120
3121  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3122    {
3123      /* ??? At this point we must generate a magic insn that appears to
3124         modify the spill iterators, the stack pointer, and the frame
3125	 pointer.  This would allow the most scheduling freedom.  For now,
3126	 just hard stop.  */
3127      emit_insn (gen_blockage ());
3128    }
3129
3130  if (cfun->machine->ia64_eh_epilogue_sp)
3131    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3132  else if (frame_pointer_needed)
3133    {
3134      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3135      RTX_FRAME_RELATED_P (insn) = 1;
3136    }
3137  else if (current_frame_info.total_size)
3138    {
3139      rtx offset, frame_size_rtx;
3140
3141      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3142      if (CONST_OK_FOR_I (current_frame_info.total_size))
3143	offset = frame_size_rtx;
3144      else
3145	{
3146	  regno = next_scratch_gr_reg ();
3147	  offset = gen_rtx_REG (DImode, regno);
3148	  emit_move_insn (offset, frame_size_rtx);
3149	}
3150
3151      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3152				    offset));
3153
3154      RTX_FRAME_RELATED_P (insn) = 1;
3155      if (GET_CODE (offset) != CONST_INT)
3156	{
3157	  REG_NOTES (insn)
3158	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3159			gen_rtx_SET (VOIDmode,
3160				     stack_pointer_rtx,
3161				     gen_rtx_PLUS (DImode,
3162						   stack_pointer_rtx,
3163						   frame_size_rtx)),
3164			REG_NOTES (insn));
3165	}
3166    }
3167
3168  if (cfun->machine->ia64_eh_epilogue_bsp)
3169    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3170
3171  if (! sibcall_p)
3172    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3173  else
3174    {
3175      int fp = GR_REG (2);
3176      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3177	 first available call clobbered register.  If there was a frame_pointer
3178	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3179	 so we have to make sure we're using the string "r2" when emitting
3180	 the register name for the assembler.  */
3181      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3182	fp = HARD_FRAME_POINTER_REGNUM;
3183
3184      /* We must emit an alloc to force the input registers to become output
3185	 registers.  Otherwise, if the callee tries to pass its parameters
3186	 through to another call without an intervening alloc, then these
3187	 values get lost.  */
3188      /* ??? We don't need to preserve all input registers.  We only need to
3189	 preserve those input registers used as arguments to the sibling call.
3190	 It is unclear how to compute that number here.  */
3191      if (current_frame_info.n_input_regs != 0)
3192	{
3193	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3194	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3195				const0_rtx, const0_rtx,
3196				n_inputs, const0_rtx));
3197	  RTX_FRAME_RELATED_P (insn) = 1;
3198	}
3199    }
3200}
3201
3202/* Return 1 if br.ret can do all the work required to return from a
3203   function.  */
3204
3205int
3206ia64_direct_return (void)
3207{
3208  if (reload_completed && ! frame_pointer_needed)
3209    {
3210      ia64_compute_frame_size (get_frame_size ());
3211
3212      return (current_frame_info.total_size == 0
3213	      && current_frame_info.n_spilled == 0
3214	      && current_frame_info.reg_save_b0 == 0
3215	      && current_frame_info.reg_save_pr == 0
3216	      && current_frame_info.reg_save_ar_pfs == 0
3217	      && current_frame_info.reg_save_ar_unat == 0
3218	      && current_frame_info.reg_save_ar_lc == 0);
3219    }
3220  return 0;
3221}
3222
3223/* Return the magic cookie that we use to hold the return address
3224   during early compilation.  */
3225
3226rtx
3227ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3228{
3229  if (count != 0)
3230    return NULL;
3231  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3232}
3233
3234/* Split this value after reload, now that we know where the return
3235   address is saved.  */
3236
3237void
3238ia64_split_return_addr_rtx (rtx dest)
3239{
3240  rtx src;
3241
3242  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3243    {
3244      if (current_frame_info.reg_save_b0 != 0)
3245	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3246      else
3247	{
3248	  HOST_WIDE_INT off;
3249	  unsigned int regno;
3250
3251	  /* Compute offset from CFA for BR0.  */
3252	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3253	  off = (current_frame_info.spill_cfa_off
3254		 + current_frame_info.spill_size);
3255	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3256	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3257	      off -= 8;
3258
3259	  /* Convert CFA offset to a register based offset.  */
3260	  if (frame_pointer_needed)
3261	    src = hard_frame_pointer_rtx;
3262	  else
3263	    {
3264	      src = stack_pointer_rtx;
3265	      off += current_frame_info.total_size;
3266	    }
3267
3268	  /* Load address into scratch register.  */
3269	  if (CONST_OK_FOR_I (off))
3270	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3271	  else
3272	    {
3273	      emit_move_insn (dest, GEN_INT (off));
3274	      emit_insn (gen_adddi3 (dest, src, dest));
3275	    }
3276
3277	  src = gen_rtx_MEM (Pmode, dest);
3278	}
3279    }
3280  else
3281    src = gen_rtx_REG (DImode, BR_REG (0));
3282
3283  emit_move_insn (dest, src);
3284}
3285
3286int
3287ia64_hard_regno_rename_ok (int from, int to)
3288{
3289  /* Don't clobber any of the registers we reserved for the prologue.  */
3290  if (to == current_frame_info.reg_fp
3291      || to == current_frame_info.reg_save_b0
3292      || to == current_frame_info.reg_save_pr
3293      || to == current_frame_info.reg_save_ar_pfs
3294      || to == current_frame_info.reg_save_ar_unat
3295      || to == current_frame_info.reg_save_ar_lc)
3296    return 0;
3297
3298  if (from == current_frame_info.reg_fp
3299      || from == current_frame_info.reg_save_b0
3300      || from == current_frame_info.reg_save_pr
3301      || from == current_frame_info.reg_save_ar_pfs
3302      || from == current_frame_info.reg_save_ar_unat
3303      || from == current_frame_info.reg_save_ar_lc)
3304    return 0;
3305
3306  /* Don't use output registers outside the register frame.  */
3307  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3308    return 0;
3309
3310  /* Retain even/oddness on predicate register pairs.  */
3311  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3312    return (from & 1) == (to & 1);
3313
3314  return 1;
3315}
3316
3317/* Target hook for assembling integer objects.  Handle word-sized
3318   aligned objects and detect the cases when @fptr is needed.  */
3319
3320static bool
3321ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3322{
3323  if (size == POINTER_SIZE / BITS_PER_UNIT
3324      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3325      && GET_CODE (x) == SYMBOL_REF
3326      && SYMBOL_REF_FUNCTION_P (x))
3327    {
3328      static const char * const directive[2][2] = {
3329	  /* 64-bit pointer */  /* 32-bit pointer */
3330	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3331	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3332      };
3333      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3334      output_addr_const (asm_out_file, x);
3335      fputs (")\n", asm_out_file);
3336      return true;
3337    }
3338  return default_assemble_integer (x, size, aligned_p);
3339}
3340
3341/* Emit the function prologue.  */
3342
3343static void
3344ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3345{
3346  int mask, grsave, grsave_prev;
3347
3348  if (current_frame_info.need_regstk)
3349    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3350	     current_frame_info.n_input_regs,
3351	     current_frame_info.n_local_regs,
3352	     current_frame_info.n_output_regs,
3353	     current_frame_info.n_rotate_regs);
3354
3355  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3356    return;
3357
3358  /* Emit the .prologue directive.  */
3359
3360  mask = 0;
3361  grsave = grsave_prev = 0;
3362  if (current_frame_info.reg_save_b0 != 0)
3363    {
3364      mask |= 8;
3365      grsave = grsave_prev = current_frame_info.reg_save_b0;
3366    }
3367  if (current_frame_info.reg_save_ar_pfs != 0
3368      && (grsave_prev == 0
3369	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3370    {
3371      mask |= 4;
3372      if (grsave_prev == 0)
3373	grsave = current_frame_info.reg_save_ar_pfs;
3374      grsave_prev = current_frame_info.reg_save_ar_pfs;
3375    }
3376  if (current_frame_info.reg_fp != 0
3377      && (grsave_prev == 0
3378	  || current_frame_info.reg_fp == grsave_prev + 1))
3379    {
3380      mask |= 2;
3381      if (grsave_prev == 0)
3382	grsave = HARD_FRAME_POINTER_REGNUM;
3383      grsave_prev = current_frame_info.reg_fp;
3384    }
3385  if (current_frame_info.reg_save_pr != 0
3386      && (grsave_prev == 0
3387	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3388    {
3389      mask |= 1;
3390      if (grsave_prev == 0)
3391	grsave = current_frame_info.reg_save_pr;
3392    }
3393
3394  if (mask && TARGET_GNU_AS)
3395    fprintf (file, "\t.prologue %d, %d\n", mask,
3396	     ia64_dbx_register_number (grsave));
3397  else
3398    fputs ("\t.prologue\n", file);
3399
3400  /* Emit a .spill directive, if necessary, to relocate the base of
3401     the register spill area.  */
3402  if (current_frame_info.spill_cfa_off != -16)
3403    fprintf (file, "\t.spill %ld\n",
3404	     (long) (current_frame_info.spill_cfa_off
3405		     + current_frame_info.spill_size));
3406}
3407
3408/* Emit the .body directive at the scheduled end of the prologue.  */
3409
3410static void
3411ia64_output_function_end_prologue (FILE *file)
3412{
3413  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3414    return;
3415
3416  fputs ("\t.body\n", file);
3417}
3418
3419/* Emit the function epilogue.  */
3420
3421static void
3422ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3423			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3424{
3425  int i;
3426
3427  if (current_frame_info.reg_fp)
3428    {
3429      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3430      reg_names[HARD_FRAME_POINTER_REGNUM]
3431	= reg_names[current_frame_info.reg_fp];
3432      reg_names[current_frame_info.reg_fp] = tmp;
3433    }
3434  if (! TARGET_REG_NAMES)
3435    {
3436      for (i = 0; i < current_frame_info.n_input_regs; i++)
3437	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3438      for (i = 0; i < current_frame_info.n_local_regs; i++)
3439	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3440      for (i = 0; i < current_frame_info.n_output_regs; i++)
3441	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3442    }
3443
3444  current_frame_info.initialized = 0;
3445}
3446
3447int
3448ia64_dbx_register_number (int regno)
3449{
3450  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3451     from its home at loc79 to something inside the register frame.  We
3452     must perform the same renumbering here for the debug info.  */
3453  if (current_frame_info.reg_fp)
3454    {
3455      if (regno == HARD_FRAME_POINTER_REGNUM)
3456	regno = current_frame_info.reg_fp;
3457      else if (regno == current_frame_info.reg_fp)
3458	regno = HARD_FRAME_POINTER_REGNUM;
3459    }
3460
3461  if (IN_REGNO_P (regno))
3462    return 32 + regno - IN_REG (0);
3463  else if (LOC_REGNO_P (regno))
3464    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3465  else if (OUT_REGNO_P (regno))
3466    return (32 + current_frame_info.n_input_regs
3467	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3468  else
3469    return regno;
3470}
3471
3472void
3473ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3474{
3475  rtx addr_reg, eight = GEN_INT (8);
3476
3477  /* The Intel assembler requires that the global __ia64_trampoline symbol
3478     be declared explicitly */
3479  if (!TARGET_GNU_AS)
3480    {
3481      static bool declared_ia64_trampoline = false;
3482
3483      if (!declared_ia64_trampoline)
3484	{
3485	  declared_ia64_trampoline = true;
3486	  (*targetm.asm_out.globalize_label) (asm_out_file,
3487					      "__ia64_trampoline");
3488	}
3489    }
3490
3491  /* Load up our iterator.  */
3492  addr_reg = gen_reg_rtx (Pmode);
3493  emit_move_insn (addr_reg, addr);
3494
3495  /* The first two words are the fake descriptor:
3496     __ia64_trampoline, ADDR+16.  */
3497  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3498		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3499  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3500
3501  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3502		  copy_to_reg (plus_constant (addr, 16)));
3503  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3504
3505  /* The third word is the target descriptor.  */
3506  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3507  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3508
3509  /* The fourth word is the static chain.  */
3510  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3511}
3512
3513/* Do any needed setup for a variadic function.  CUM has not been updated
3514   for the last named argument which has type TYPE and mode MODE.
3515
3516   We generate the actual spill instructions during prologue generation.  */
3517
3518void
3519ia64_setup_incoming_varargs (CUMULATIVE_ARGS cum, int int_mode, tree type,
3520			     int * pretend_size,
3521			     int second_time ATTRIBUTE_UNUSED)
3522{
3523  /* Skip the current argument.  */
3524  ia64_function_arg_advance (&cum, int_mode, type, 1);
3525
3526  if (cum.words < MAX_ARGUMENT_SLOTS)
3527    {
3528      int n = MAX_ARGUMENT_SLOTS - cum.words;
3529      *pretend_size = n * UNITS_PER_WORD;
3530      cfun->machine->n_varargs = n;
3531    }
3532}
3533
3534/* Check whether TYPE is a homogeneous floating point aggregate.  If
3535   it is, return the mode of the floating point type that appears
3536   in all leafs.  If it is not, return VOIDmode.
3537
3538   An aggregate is a homogeneous floating point aggregate is if all
3539   fields/elements in it have the same floating point type (e.g,
3540   SFmode).  128-bit quad-precision floats are excluded.  */
3541
3542static enum machine_mode
3543hfa_element_mode (tree type, int nested)
3544{
3545  enum machine_mode element_mode = VOIDmode;
3546  enum machine_mode mode;
3547  enum tree_code code = TREE_CODE (type);
3548  int know_element_mode = 0;
3549  tree t;
3550
3551  switch (code)
3552    {
3553    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3554    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3555    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3556    case FILE_TYPE:	case SET_TYPE:		case LANG_TYPE:
3557    case FUNCTION_TYPE:
3558      return VOIDmode;
3559
3560      /* Fortran complex types are supposed to be HFAs, so we need to handle
3561	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3562	 types though.  */
3563    case COMPLEX_TYPE:
3564      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3565	  && TYPE_MODE (type) != TCmode)
3566	return GET_MODE_INNER (TYPE_MODE (type));
3567      else
3568	return VOIDmode;
3569
3570    case REAL_TYPE:
3571      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3572	 mode if this is contained within an aggregate.  */
3573      if (nested && TYPE_MODE (type) != TFmode)
3574	return TYPE_MODE (type);
3575      else
3576	return VOIDmode;
3577
3578    case ARRAY_TYPE:
3579      return hfa_element_mode (TREE_TYPE (type), 1);
3580
3581    case RECORD_TYPE:
3582    case UNION_TYPE:
3583    case QUAL_UNION_TYPE:
3584      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3585	{
3586	  if (TREE_CODE (t) != FIELD_DECL)
3587	    continue;
3588
3589	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3590	  if (know_element_mode)
3591	    {
3592	      if (mode != element_mode)
3593		return VOIDmode;
3594	    }
3595	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3596	    return VOIDmode;
3597	  else
3598	    {
3599	      know_element_mode = 1;
3600	      element_mode = mode;
3601	    }
3602	}
3603      return element_mode;
3604
3605    default:
3606      /* If we reach here, we probably have some front-end specific type
3607	 that the backend doesn't know about.  This can happen via the
3608	 aggregate_value_p call in init_function_start.  All we can do is
3609	 ignore unknown tree types.  */
3610      return VOIDmode;
3611    }
3612
3613  return VOIDmode;
3614}
3615
3616/* Return the number of words required to hold a quantity of TYPE and MODE
3617   when passed as an argument.  */
3618static int
3619ia64_function_arg_words (tree type, enum machine_mode mode)
3620{
3621  int words;
3622
3623  if (mode == BLKmode)
3624    words = int_size_in_bytes (type);
3625  else
3626    words = GET_MODE_SIZE (mode);
3627
3628  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3629}
3630
3631/* Return the number of registers that should be skipped so the current
3632   argument (described by TYPE and WORDS) will be properly aligned.
3633
3634   Integer and float arguments larger than 8 bytes start at the next
3635   even boundary.  Aggregates larger than 8 bytes start at the next
3636   even boundary if the aggregate has 16 byte alignment.  Note that
3637   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3638   but are still to be aligned in registers.
3639
3640   ??? The ABI does not specify how to handle aggregates with
3641   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3642   all as if they had 16 byte alignment.  Such aggregates can occur
3643   only if gcc extensions are used.  */
3644static int
3645ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3646{
3647  if ((cum->words & 1) == 0)
3648    return 0;
3649
3650  if (type
3651      && TREE_CODE (type) != INTEGER_TYPE
3652      && TREE_CODE (type) != REAL_TYPE)
3653    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3654  else
3655    return words > 1;
3656}
3657
3658/* Return rtx for register where argument is passed, or zero if it is passed
3659   on the stack.  */
3660/* ??? 128-bit quad-precision floats are always passed in general
3661   registers.  */
3662
3663rtx
3664ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3665		   int named, int incoming)
3666{
3667  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3668  int words = ia64_function_arg_words (type, mode);
3669  int offset = ia64_function_arg_offset (cum, type, words);
3670  enum machine_mode hfa_mode = VOIDmode;
3671
3672  /* If all argument slots are used, then it must go on the stack.  */
3673  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3674    return 0;
3675
3676  /* Check for and handle homogeneous FP aggregates.  */
3677  if (type)
3678    hfa_mode = hfa_element_mode (type, 0);
3679
3680  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3681     and unprototyped hfas are passed specially.  */
3682  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3683    {
3684      rtx loc[16];
3685      int i = 0;
3686      int fp_regs = cum->fp_regs;
3687      int int_regs = cum->words + offset;
3688      int hfa_size = GET_MODE_SIZE (hfa_mode);
3689      int byte_size;
3690      int args_byte_size;
3691
3692      /* If prototyped, pass it in FR regs then GR regs.
3693	 If not prototyped, pass it in both FR and GR regs.
3694
3695	 If this is an SFmode aggregate, then it is possible to run out of
3696	 FR regs while GR regs are still left.  In that case, we pass the
3697	 remaining part in the GR regs.  */
3698
3699      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3700	 of the argument, the last FP register, or the last argument slot.  */
3701
3702      byte_size = ((mode == BLKmode)
3703		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3704      args_byte_size = int_regs * UNITS_PER_WORD;
3705      offset = 0;
3706      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3707	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3708	{
3709	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3710				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3711							      + fp_regs)),
3712				      GEN_INT (offset));
3713	  offset += hfa_size;
3714	  args_byte_size += hfa_size;
3715	  fp_regs++;
3716	}
3717
3718      /* If no prototype, then the whole thing must go in GR regs.  */
3719      if (! cum->prototype)
3720	offset = 0;
3721      /* If this is an SFmode aggregate, then we might have some left over
3722	 that needs to go in GR regs.  */
3723      else if (byte_size != offset)
3724	int_regs += offset / UNITS_PER_WORD;
3725
3726      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
3727
3728      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
3729	{
3730	  enum machine_mode gr_mode = DImode;
3731	  unsigned int gr_size;
3732
3733	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
3734	     then this goes in a GR reg left adjusted/little endian, right
3735	     adjusted/big endian.  */
3736	  /* ??? Currently this is handled wrong, because 4-byte hunks are
3737	     always right adjusted/little endian.  */
3738	  if (offset & 0x4)
3739	    gr_mode = SImode;
3740	  /* If we have an even 4 byte hunk because the aggregate is a
3741	     multiple of 4 bytes in size, then this goes in a GR reg right
3742	     adjusted/little endian.  */
3743	  else if (byte_size - offset == 4)
3744	    gr_mode = SImode;
3745
3746	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3747				      gen_rtx_REG (gr_mode, (basereg
3748							     + int_regs)),
3749				      GEN_INT (offset));
3750
3751	  gr_size = GET_MODE_SIZE (gr_mode);
3752	  offset += gr_size;
3753	  if (gr_size == UNITS_PER_WORD
3754	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
3755	    int_regs++;
3756	  else if (gr_size > UNITS_PER_WORD)
3757	    int_regs += gr_size / UNITS_PER_WORD;
3758	}
3759
3760      /* If we ended up using just one location, just return that one loc, but
3761	 change the mode back to the argument mode.  */
3762      if (i == 1)
3763	return gen_rtx_REG (mode, REGNO (XEXP (loc[0], 0)));
3764      else
3765	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
3766    }
3767
3768  /* Integral and aggregates go in general registers.  If we have run out of
3769     FR registers, then FP values must also go in general registers.  This can
3770     happen when we have a SFmode HFA.  */
3771  else if (mode == TFmode || mode == TCmode
3772	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
3773    {
3774      int byte_size = ((mode == BLKmode)
3775                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3776      if (BYTES_BIG_ENDIAN
3777	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3778	&& byte_size < UNITS_PER_WORD
3779	&& byte_size > 0)
3780	{
3781	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3782					  gen_rtx_REG (DImode,
3783						       (basereg + cum->words
3784							+ offset)),
3785					  const0_rtx);
3786	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3787	}
3788      else
3789	return gen_rtx_REG (mode, basereg + cum->words + offset);
3790
3791    }
3792
3793  /* If there is a prototype, then FP values go in a FR register when
3794     named, and in a GR register when unnamed.  */
3795  else if (cum->prototype)
3796    {
3797      if (named)
3798	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
3799      /* In big-endian mode, an anonymous SFmode value must be represented
3800         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
3801	 the value into the high half of the general register.  */
3802      else if (BYTES_BIG_ENDIAN && mode == SFmode)
3803	return gen_rtx_PARALLEL (mode,
3804		 gen_rtvec (1,
3805                   gen_rtx_EXPR_LIST (VOIDmode,
3806		     gen_rtx_REG (DImode, basereg + cum->words + offset),
3807				      const0_rtx)));
3808      else
3809	return gen_rtx_REG (mode, basereg + cum->words + offset);
3810    }
3811  /* If there is no prototype, then FP values go in both FR and GR
3812     registers.  */
3813  else
3814    {
3815      /* See comment above.  */
3816      enum machine_mode inner_mode =
3817	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
3818
3819      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
3820				      gen_rtx_REG (mode, (FR_ARG_FIRST
3821							  + cum->fp_regs)),
3822				      const0_rtx);
3823      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3824				      gen_rtx_REG (inner_mode,
3825						   (basereg + cum->words
3826						    + offset)),
3827				      const0_rtx);
3828
3829      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
3830    }
3831}
3832
3833/* Return number of words, at the beginning of the argument, that must be
3834   put in registers.  0 is the argument is entirely in registers or entirely
3835   in memory.  */
3836
3837int
3838ia64_function_arg_partial_nregs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3839				 tree type, int named ATTRIBUTE_UNUSED)
3840{
3841  int words = ia64_function_arg_words (type, mode);
3842  int offset = ia64_function_arg_offset (cum, type, words);
3843
3844  /* If all argument slots are used, then it must go on the stack.  */
3845  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3846    return 0;
3847
3848  /* It doesn't matter whether the argument goes in FR or GR regs.  If
3849     it fits within the 8 argument slots, then it goes entirely in
3850     registers.  If it extends past the last argument slot, then the rest
3851     goes on the stack.  */
3852
3853  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
3854    return 0;
3855
3856  return MAX_ARGUMENT_SLOTS - cum->words - offset;
3857}
3858
3859/* Update CUM to point after this argument.  This is patterned after
3860   ia64_function_arg.  */
3861
3862void
3863ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3864			   tree type, int named)
3865{
3866  int words = ia64_function_arg_words (type, mode);
3867  int offset = ia64_function_arg_offset (cum, type, words);
3868  enum machine_mode hfa_mode = VOIDmode;
3869
3870  /* If all arg slots are already full, then there is nothing to do.  */
3871  if (cum->words >= MAX_ARGUMENT_SLOTS)
3872    return;
3873
3874  cum->words += words + offset;
3875
3876  /* Check for and handle homogeneous FP aggregates.  */
3877  if (type)
3878    hfa_mode = hfa_element_mode (type, 0);
3879
3880  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3881     and unprototyped hfas are passed specially.  */
3882  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3883    {
3884      int fp_regs = cum->fp_regs;
3885      /* This is the original value of cum->words + offset.  */
3886      int int_regs = cum->words - words;
3887      int hfa_size = GET_MODE_SIZE (hfa_mode);
3888      int byte_size;
3889      int args_byte_size;
3890
3891      /* If prototyped, pass it in FR regs then GR regs.
3892	 If not prototyped, pass it in both FR and GR regs.
3893
3894	 If this is an SFmode aggregate, then it is possible to run out of
3895	 FR regs while GR regs are still left.  In that case, we pass the
3896	 remaining part in the GR regs.  */
3897
3898      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3899	 of the argument, the last FP register, or the last argument slot.  */
3900
3901      byte_size = ((mode == BLKmode)
3902		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3903      args_byte_size = int_regs * UNITS_PER_WORD;
3904      offset = 0;
3905      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3906	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
3907	{
3908	  offset += hfa_size;
3909	  args_byte_size += hfa_size;
3910	  fp_regs++;
3911	}
3912
3913      cum->fp_regs = fp_regs;
3914    }
3915
3916  /* Integral and aggregates go in general registers.  If we have run out of
3917     FR registers, then FP values must also go in general registers.  This can
3918     happen when we have a SFmode HFA.  */
3919  else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)
3920    cum->int_regs = cum->words;
3921
3922  /* If there is a prototype, then FP values go in a FR register when
3923     named, and in a GR register when unnamed.  */
3924  else if (cum->prototype)
3925    {
3926      if (! named)
3927	cum->int_regs = cum->words;
3928      else
3929	/* ??? Complex types should not reach here.  */
3930	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3931    }
3932  /* If there is no prototype, then FP values go in both FR and GR
3933     registers.  */
3934  else
3935    {
3936      /* ??? Complex types should not reach here.  */
3937      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
3938      cum->int_regs = cum->words;
3939    }
3940}
3941
3942/* Variable sized types are passed by reference.  */
3943/* ??? At present this is a GCC extension to the IA-64 ABI.  */
3944
3945int
3946ia64_function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3947				     enum machine_mode mode ATTRIBUTE_UNUSED,
3948				     tree type, int named ATTRIBUTE_UNUSED)
3949{
3950  return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3951}
3952
3953/* True if it is OK to do sibling call optimization for the specified
3954   call expression EXP.  DECL will be the called function, or NULL if
3955   this is an indirect call.  */
3956static bool
3957ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3958{
3959  /* We can't perform a sibcall if the current function has the syscall_linkage
3960     attribute.  */
3961  if (lookup_attribute ("syscall_linkage",
3962			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
3963    return false;
3964
3965  /* We must always return with our current GP.  This means we can
3966     only sibcall to functions defined in the current module.  */
3967  return decl && (*targetm.binds_local_p) (decl);
3968}
3969
3970
3971/* Implement va_arg.  */
3972
3973rtx
3974ia64_va_arg (tree valist, tree type)
3975{
3976  tree t;
3977
3978  /* Variable sized types are passed by reference.  */
3979  if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
3980    {
3981      rtx addr = force_reg (ptr_mode,
3982	    std_expand_builtin_va_arg (valist, build_pointer_type (type)));
3983#ifdef POINTERS_EXTEND_UNSIGNED
3984      addr = convert_memory_address (Pmode, addr);
3985#endif
3986      return gen_rtx_MEM (ptr_mode, addr);
3987    }
3988
3989  /* Aggregate arguments with alignment larger than 8 bytes start at
3990     the next even boundary.  Integer and floating point arguments
3991     do so if they are larger than 8 bytes, whether or not they are
3992     also aligned larger than 8 bytes.  */
3993  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
3994      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
3995    {
3996      t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
3997		 build_int_2 (2 * UNITS_PER_WORD - 1, 0));
3998      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
3999		 build_int_2 (-2 * UNITS_PER_WORD, -1));
4000      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4001      TREE_SIDE_EFFECTS (t) = 1;
4002      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4003    }
4004
4005  return std_expand_builtin_va_arg (valist, type);
4006}
4007
4008/* Return 1 if function return value returned in memory.  Return 0 if it is
4009   in a register.  */
4010
4011int
4012ia64_return_in_memory (tree valtype)
4013{
4014  enum machine_mode mode;
4015  enum machine_mode hfa_mode;
4016  HOST_WIDE_INT byte_size;
4017
4018  mode = TYPE_MODE (valtype);
4019  byte_size = GET_MODE_SIZE (mode);
4020  if (mode == BLKmode)
4021    {
4022      byte_size = int_size_in_bytes (valtype);
4023      if (byte_size < 0)
4024	return 1;
4025    }
4026
4027  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4028
4029  hfa_mode = hfa_element_mode (valtype, 0);
4030  if (hfa_mode != VOIDmode)
4031    {
4032      int hfa_size = GET_MODE_SIZE (hfa_mode);
4033
4034      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4035	return 1;
4036      else
4037	return 0;
4038    }
4039  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4040    return 1;
4041  else
4042    return 0;
4043}
4044
4045/* Return rtx for register that holds the function return value.  */
4046
4047rtx
4048ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4049{
4050  enum machine_mode mode;
4051  enum machine_mode hfa_mode;
4052
4053  mode = TYPE_MODE (valtype);
4054  hfa_mode = hfa_element_mode (valtype, 0);
4055
4056  if (hfa_mode != VOIDmode)
4057    {
4058      rtx loc[8];
4059      int i;
4060      int hfa_size;
4061      int byte_size;
4062      int offset;
4063
4064      hfa_size = GET_MODE_SIZE (hfa_mode);
4065      byte_size = ((mode == BLKmode)
4066		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4067      offset = 0;
4068      for (i = 0; offset < byte_size; i++)
4069	{
4070	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4071				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4072				      GEN_INT (offset));
4073	  offset += hfa_size;
4074	}
4075
4076      if (i == 1)
4077	return XEXP (loc[0], 0);
4078      else
4079	return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4080    }
4081  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4082    return gen_rtx_REG (mode, FR_ARG_FIRST);
4083  else
4084    {
4085      if (BYTES_BIG_ENDIAN
4086	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4087	{
4088	  rtx loc[8];
4089	  int offset;
4090	  int bytesize;
4091	  int i;
4092
4093	  offset = 0;
4094	  bytesize = int_size_in_bytes (valtype);
4095	  for (i = 0; offset < bytesize; i++)
4096	    {
4097	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4098					  gen_rtx_REG (DImode,
4099						       GR_RET_FIRST + i),
4100					  GEN_INT (offset));
4101	      offset += UNITS_PER_WORD;
4102	    }
4103	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4104	}
4105      else
4106	return gen_rtx_REG (mode, GR_RET_FIRST);
4107    }
4108}
4109
4110/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
4111   We need to emit DTP-relative relocations.  */
4112
4113void
4114ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4115{
4116  if (size != 8)
4117    abort ();
4118  fputs ("\tdata8.ua\t@dtprel(", file);
4119  output_addr_const (file, x);
4120  fputs (")", file);
4121}
4122
4123/* Print a memory address as an operand to reference that memory location.  */
4124
4125/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4126   also call this from ia64_print_operand for memory addresses.  */
4127
4128void
4129ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4130			    rtx address ATTRIBUTE_UNUSED)
4131{
4132}
4133
4134/* Print an operand to an assembler instruction.
4135   C	Swap and print a comparison operator.
4136   D	Print an FP comparison operator.
4137   E    Print 32 - constant, for SImode shifts as extract.
4138   e    Print 64 - constant, for DImode rotates.
4139   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4140        a floating point register emitted normally.
4141   I	Invert a predicate register by adding 1.
4142   J    Select the proper predicate register for a condition.
4143   j    Select the inverse predicate register for a condition.
4144   O	Append .acq for volatile load.
4145   P	Postincrement of a MEM.
4146   Q	Append .rel for volatile store.
4147   S	Shift amount for shladd instruction.
4148   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4149	for Intel assembler.
4150   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4151	for Intel assembler.
4152   r	Print register name, or constant 0 as r0.  HP compatibility for
4153	Linux kernel.  */
4154void
4155ia64_print_operand (FILE * file, rtx x, int code)
4156{
4157  const char *str;
4158
4159  switch (code)
4160    {
4161    case 0:
4162      /* Handled below.  */
4163      break;
4164
4165    case 'C':
4166      {
4167	enum rtx_code c = swap_condition (GET_CODE (x));
4168	fputs (GET_RTX_NAME (c), file);
4169	return;
4170      }
4171
4172    case 'D':
4173      switch (GET_CODE (x))
4174	{
4175	case NE:
4176	  str = "neq";
4177	  break;
4178	case UNORDERED:
4179	  str = "unord";
4180	  break;
4181	case ORDERED:
4182	  str = "ord";
4183	  break;
4184	default:
4185	  str = GET_RTX_NAME (GET_CODE (x));
4186	  break;
4187	}
4188      fputs (str, file);
4189      return;
4190
4191    case 'E':
4192      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4193      return;
4194
4195    case 'e':
4196      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4197      return;
4198
4199    case 'F':
4200      if (x == CONST0_RTX (GET_MODE (x)))
4201	str = reg_names [FR_REG (0)];
4202      else if (x == CONST1_RTX (GET_MODE (x)))
4203	str = reg_names [FR_REG (1)];
4204      else if (GET_CODE (x) == REG)
4205	str = reg_names [REGNO (x)];
4206      else
4207	abort ();
4208      fputs (str, file);
4209      return;
4210
4211    case 'I':
4212      fputs (reg_names [REGNO (x) + 1], file);
4213      return;
4214
4215    case 'J':
4216    case 'j':
4217      {
4218	unsigned int regno = REGNO (XEXP (x, 0));
4219	if (GET_CODE (x) == EQ)
4220	  regno += 1;
4221	if (code == 'j')
4222	  regno ^= 1;
4223        fputs (reg_names [regno], file);
4224      }
4225      return;
4226
4227    case 'O':
4228      if (MEM_VOLATILE_P (x))
4229	fputs(".acq", file);
4230      return;
4231
4232    case 'P':
4233      {
4234	HOST_WIDE_INT value;
4235
4236	switch (GET_CODE (XEXP (x, 0)))
4237	  {
4238	  default:
4239	    return;
4240
4241	  case POST_MODIFY:
4242	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4243	    if (GET_CODE (x) == CONST_INT)
4244	      value = INTVAL (x);
4245	    else if (GET_CODE (x) == REG)
4246	      {
4247		fprintf (file, ", %s", reg_names[REGNO (x)]);
4248		return;
4249	      }
4250	    else
4251	      abort ();
4252	    break;
4253
4254	  case POST_INC:
4255	    value = GET_MODE_SIZE (GET_MODE (x));
4256	    break;
4257
4258	  case POST_DEC:
4259	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4260	    break;
4261	  }
4262
4263	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4264	return;
4265      }
4266
4267    case 'Q':
4268      if (MEM_VOLATILE_P (x))
4269	fputs(".rel", file);
4270      return;
4271
4272    case 'S':
4273      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4274      return;
4275
4276    case 'T':
4277      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4278	{
4279	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4280	  return;
4281	}
4282      break;
4283
4284    case 'U':
4285      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4286	{
4287	  const char *prefix = "0x";
4288	  if (INTVAL (x) & 0x80000000)
4289	    {
4290	      fprintf (file, "0xffffffff");
4291	      prefix = "";
4292	    }
4293	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4294	  return;
4295	}
4296      break;
4297
4298    case 'r':
4299      /* If this operand is the constant zero, write it as register zero.
4300	 Any register, zero, or CONST_INT value is OK here.  */
4301      if (GET_CODE (x) == REG)
4302	fputs (reg_names[REGNO (x)], file);
4303      else if (x == CONST0_RTX (GET_MODE (x)))
4304	fputs ("r0", file);
4305      else if (GET_CODE (x) == CONST_INT)
4306	output_addr_const (file, x);
4307      else
4308	output_operand_lossage ("invalid %%r value");
4309      return;
4310
4311    case '+':
4312      {
4313	const char *which;
4314
4315	/* For conditional branches, returns or calls, substitute
4316	   sptk, dptk, dpnt, or spnt for %s.  */
4317	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4318	if (x)
4319	  {
4320	    int pred_val = INTVAL (XEXP (x, 0));
4321
4322	    /* Guess top and bottom 10% statically predicted.  */
4323	    if (pred_val < REG_BR_PROB_BASE / 50)
4324	      which = ".spnt";
4325	    else if (pred_val < REG_BR_PROB_BASE / 2)
4326	      which = ".dpnt";
4327	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4328	      which = ".dptk";
4329	    else
4330	      which = ".sptk";
4331	  }
4332	else if (GET_CODE (current_output_insn) == CALL_INSN)
4333	  which = ".sptk";
4334	else
4335	  which = ".dptk";
4336
4337	fputs (which, file);
4338	return;
4339      }
4340
4341    case ',':
4342      x = current_insn_predicate;
4343      if (x)
4344	{
4345	  unsigned int regno = REGNO (XEXP (x, 0));
4346	  if (GET_CODE (x) == EQ)
4347	    regno += 1;
4348          fprintf (file, "(%s) ", reg_names [regno]);
4349	}
4350      return;
4351
4352    default:
4353      output_operand_lossage ("ia64_print_operand: unknown code");
4354      return;
4355    }
4356
4357  switch (GET_CODE (x))
4358    {
4359      /* This happens for the spill/restore instructions.  */
4360    case POST_INC:
4361    case POST_DEC:
4362    case POST_MODIFY:
4363      x = XEXP (x, 0);
4364      /* ... fall through ...  */
4365
4366    case REG:
4367      fputs (reg_names [REGNO (x)], file);
4368      break;
4369
4370    case MEM:
4371      {
4372	rtx addr = XEXP (x, 0);
4373	if (GET_RTX_CLASS (GET_CODE (addr)) == 'a')
4374	  addr = XEXP (addr, 0);
4375	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4376	break;
4377      }
4378
4379    default:
4380      output_addr_const (file, x);
4381      break;
4382    }
4383
4384  return;
4385}
4386
4387/* Compute a (partial) cost for rtx X.  Return true if the complete
4388   cost has been computed, and false if subexpressions should be
4389   scanned.  In either case, *TOTAL contains the cost result.  */
4390/* ??? This is incomplete.  */
4391
4392static bool
4393ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4394{
4395  switch (code)
4396    {
4397    case CONST_INT:
4398      switch (outer_code)
4399        {
4400        case SET:
4401	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4402	  return true;
4403        case PLUS:
4404	  if (CONST_OK_FOR_I (INTVAL (x)))
4405	    *total = 0;
4406	  else if (CONST_OK_FOR_J (INTVAL (x)))
4407	    *total = 1;
4408	  else
4409	    *total = COSTS_N_INSNS (1);
4410	  return true;
4411        default:
4412	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4413	    *total = 0;
4414	  else
4415	    *total = COSTS_N_INSNS (1);
4416	  return true;
4417	}
4418
4419    case CONST_DOUBLE:
4420      *total = COSTS_N_INSNS (1);
4421      return true;
4422
4423    case CONST:
4424    case SYMBOL_REF:
4425    case LABEL_REF:
4426      *total = COSTS_N_INSNS (3);
4427      return true;
4428
4429    case MULT:
4430      /* For multiplies wider than HImode, we have to go to the FPU,
4431         which normally involves copies.  Plus there's the latency
4432         of the multiply itself, and the latency of the instructions to
4433         transfer integer regs to FP regs.  */
4434      /* ??? Check for FP mode.  */
4435      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4436        *total = COSTS_N_INSNS (10);
4437      else
4438	*total = COSTS_N_INSNS (2);
4439      return true;
4440
4441    case PLUS:
4442    case MINUS:
4443    case ASHIFT:
4444    case ASHIFTRT:
4445    case LSHIFTRT:
4446      *total = COSTS_N_INSNS (1);
4447      return true;
4448
4449    case DIV:
4450    case UDIV:
4451    case MOD:
4452    case UMOD:
4453      /* We make divide expensive, so that divide-by-constant will be
4454         optimized to a multiply.  */
4455      *total = COSTS_N_INSNS (60);
4456      return true;
4457
4458    default:
4459      return false;
4460    }
4461}
4462
4463/* Calculate the cost of moving data from a register in class FROM to
4464   one in class TO, using MODE.  */
4465
4466int
4467ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4468			 enum reg_class to)
4469{
4470  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4471  if (to == ADDL_REGS)
4472    to = GR_REGS;
4473  if (from == ADDL_REGS)
4474    from = GR_REGS;
4475
4476  /* All costs are symmetric, so reduce cases by putting the
4477     lower number class as the destination.  */
4478  if (from < to)
4479    {
4480      enum reg_class tmp = to;
4481      to = from, from = tmp;
4482    }
4483
4484  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4485     so that we get secondary memory reloads.  Between FR_REGS,
4486     we have to make this at least as expensive as MEMORY_MOVE_COST
4487     to avoid spectacularly poor register class preferencing.  */
4488  if (mode == XFmode)
4489    {
4490      if (to != GR_REGS || from != GR_REGS)
4491        return MEMORY_MOVE_COST (mode, to, 0);
4492      else
4493	return 3;
4494    }
4495
4496  switch (to)
4497    {
4498    case PR_REGS:
4499      /* Moving between PR registers takes two insns.  */
4500      if (from == PR_REGS)
4501	return 3;
4502      /* Moving between PR and anything but GR is impossible.  */
4503      if (from != GR_REGS)
4504	return MEMORY_MOVE_COST (mode, to, 0);
4505      break;
4506
4507    case BR_REGS:
4508      /* Moving between BR and anything but GR is impossible.  */
4509      if (from != GR_REGS && from != GR_AND_BR_REGS)
4510	return MEMORY_MOVE_COST (mode, to, 0);
4511      break;
4512
4513    case AR_I_REGS:
4514    case AR_M_REGS:
4515      /* Moving between AR and anything but GR is impossible.  */
4516      if (from != GR_REGS)
4517	return MEMORY_MOVE_COST (mode, to, 0);
4518      break;
4519
4520    case GR_REGS:
4521    case FR_REGS:
4522    case GR_AND_FR_REGS:
4523    case GR_AND_BR_REGS:
4524    case ALL_REGS:
4525      break;
4526
4527    default:
4528      abort ();
4529    }
4530
4531  return 2;
4532}
4533
4534/* This function returns the register class required for a secondary
4535   register when copying between one of the registers in CLASS, and X,
4536   using MODE.  A return value of NO_REGS means that no secondary register
4537   is required.  */
4538
4539enum reg_class
4540ia64_secondary_reload_class (enum reg_class class,
4541			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4542{
4543  int regno = -1;
4544
4545  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4546    regno = true_regnum (x);
4547
4548  switch (class)
4549    {
4550    case BR_REGS:
4551    case AR_M_REGS:
4552    case AR_I_REGS:
4553      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4554	 interaction.  We end up with two pseudos with overlapping lifetimes
4555	 both of which are equiv to the same constant, and both which need
4556	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4557	 changes depending on the path length, which means the qty_first_reg
4558	 check in make_regs_eqv can give different answers at different times.
4559	 At some point I'll probably need a reload_indi pattern to handle
4560	 this.
4561
4562	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4563	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4564	 non-general registers for good measure.  */
4565      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4566	return GR_REGS;
4567
4568      /* This is needed if a pseudo used as a call_operand gets spilled to a
4569	 stack slot.  */
4570      if (GET_CODE (x) == MEM)
4571	return GR_REGS;
4572      break;
4573
4574    case FR_REGS:
4575      /* Need to go through general registers to get to other class regs.  */
4576      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4577	return GR_REGS;
4578
4579      /* This can happen when a paradoxical subreg is an operand to the
4580	 muldi3 pattern.  */
4581      /* ??? This shouldn't be necessary after instruction scheduling is
4582	 enabled, because paradoxical subregs are not accepted by
4583	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4584	 stop the paradoxical subreg stupidity in the *_operand functions
4585	 in recog.c.  */
4586      if (GET_CODE (x) == MEM
4587	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4588	      || GET_MODE (x) == QImode))
4589	return GR_REGS;
4590
4591      /* This can happen because of the ior/and/etc patterns that accept FP
4592	 registers as operands.  If the third operand is a constant, then it
4593	 needs to be reloaded into a FP register.  */
4594      if (GET_CODE (x) == CONST_INT)
4595	return GR_REGS;
4596
4597      /* This can happen because of register elimination in a muldi3 insn.
4598	 E.g. `26107 * (unsigned long)&u'.  */
4599      if (GET_CODE (x) == PLUS)
4600	return GR_REGS;
4601      break;
4602
4603    case PR_REGS:
4604      /* ??? This happens if we cse/gcse a BImode value across a call,
4605	 and the function has a nonlocal goto.  This is because global
4606	 does not allocate call crossing pseudos to hard registers when
4607	 current_function_has_nonlocal_goto is true.  This is relatively
4608	 common for C++ programs that use exceptions.  To reproduce,
4609	 return NO_REGS and compile libstdc++.  */
4610      if (GET_CODE (x) == MEM)
4611	return GR_REGS;
4612
4613      /* This can happen when we take a BImode subreg of a DImode value,
4614	 and that DImode value winds up in some non-GR register.  */
4615      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4616	return GR_REGS;
4617      break;
4618
4619    default:
4620      break;
4621    }
4622
4623  return NO_REGS;
4624}
4625
4626
4627/* Emit text to declare externally defined variables and functions, because
4628   the Intel assembler does not support undefined externals.  */
4629
4630void
4631ia64_asm_output_external (FILE *file, tree decl, const char *name)
4632{
4633  int save_referenced;
4634
4635  /* GNU as does not need anything here, but the HP linker does need
4636     something for external functions.  */
4637
4638  if (TARGET_GNU_AS
4639      && (!TARGET_HPUX_LD
4640	  || TREE_CODE (decl) != FUNCTION_DECL
4641	  || strstr (name, "__builtin_") == name))
4642    return;
4643
4644  /* ??? The Intel assembler creates a reference that needs to be satisfied by
4645     the linker when we do this, so we need to be careful not to do this for
4646     builtin functions which have no library equivalent.  Unfortunately, we
4647     can't tell here whether or not a function will actually be called by
4648     expand_expr, so we pull in library functions even if we may not need
4649     them later.  */
4650  if (! strcmp (name, "__builtin_next_arg")
4651      || ! strcmp (name, "alloca")
4652      || ! strcmp (name, "__builtin_constant_p")
4653      || ! strcmp (name, "__builtin_args_info"))
4654    return;
4655
4656  if (TARGET_HPUX_LD)
4657    ia64_hpux_add_extern_decl (decl);
4658  else
4659    {
4660      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
4661         restore it.  */
4662      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
4663      if (TREE_CODE (decl) == FUNCTION_DECL)
4664        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
4665      (*targetm.asm_out.globalize_label) (file, name);
4666      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
4667    }
4668}
4669
4670/* Parse the -mfixed-range= option string.  */
4671
4672static void
4673fix_range (const char *const_str)
4674{
4675  int i, first, last;
4676  char *str, *dash, *comma;
4677
4678  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4679     REG2 are either register names or register numbers.  The effect
4680     of this option is to mark the registers in the range from REG1 to
4681     REG2 as ``fixed'' so they won't be used by the compiler.  This is
4682     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
4683
4684  i = strlen (const_str);
4685  str = (char *) alloca (i + 1);
4686  memcpy (str, const_str, i + 1);
4687
4688  while (1)
4689    {
4690      dash = strchr (str, '-');
4691      if (!dash)
4692	{
4693	  warning ("value of -mfixed-range must have form REG1-REG2");
4694	  return;
4695	}
4696      *dash = '\0';
4697
4698      comma = strchr (dash + 1, ',');
4699      if (comma)
4700	*comma = '\0';
4701
4702      first = decode_reg_name (str);
4703      if (first < 0)
4704	{
4705	  warning ("unknown register name: %s", str);
4706	  return;
4707	}
4708
4709      last = decode_reg_name (dash + 1);
4710      if (last < 0)
4711	{
4712	  warning ("unknown register name: %s", dash + 1);
4713	  return;
4714	}
4715
4716      *dash = '-';
4717
4718      if (first > last)
4719	{
4720	  warning ("%s-%s is an empty range", str, dash + 1);
4721	  return;
4722	}
4723
4724      for (i = first; i <= last; ++i)
4725	fixed_regs[i] = call_used_regs[i] = 1;
4726
4727      if (!comma)
4728	break;
4729
4730      *comma = ',';
4731      str = comma + 1;
4732    }
4733}
4734
4735static struct machine_function *
4736ia64_init_machine_status (void)
4737{
4738  return ggc_alloc_cleared (sizeof (struct machine_function));
4739}
4740
4741/* Handle TARGET_OPTIONS switches.  */
4742
4743void
4744ia64_override_options (void)
4745{
4746  static struct pta
4747    {
4748      const char *const name;		/* processor name or nickname.  */
4749      const enum processor_type processor;
4750    }
4751  const processor_alias_table[] =
4752    {
4753      {"itanium", PROCESSOR_ITANIUM},
4754      {"itanium1", PROCESSOR_ITANIUM},
4755      {"merced", PROCESSOR_ITANIUM},
4756      {"itanium2", PROCESSOR_ITANIUM2},
4757      {"mckinley", PROCESSOR_ITANIUM2},
4758    };
4759
4760  int const pta_size = ARRAY_SIZE (processor_alias_table);
4761  int i;
4762
4763  if (TARGET_AUTO_PIC)
4764    target_flags |= MASK_CONST_GP;
4765
4766  if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR)
4767    {
4768      warning ("cannot optimize floating point division for both latency and throughput");
4769      target_flags &= ~MASK_INLINE_FLOAT_DIV_THR;
4770    }
4771
4772  if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR)
4773    {
4774      warning ("cannot optimize integer division for both latency and throughput");
4775      target_flags &= ~MASK_INLINE_INT_DIV_THR;
4776    }
4777
4778  if (TARGET_INLINE_SQRT_LAT && TARGET_INLINE_SQRT_THR)
4779    {
4780      warning ("cannot optimize square root for both latency and throughput");
4781      target_flags &= ~MASK_INLINE_SQRT_THR;
4782    }
4783
4784  if (TARGET_INLINE_SQRT_LAT)
4785    {
4786      warning ("not yet implemented: latency-optimized inline square root");
4787      target_flags &= ~MASK_INLINE_SQRT_LAT;
4788    }
4789
4790  if (ia64_fixed_range_string)
4791    fix_range (ia64_fixed_range_string);
4792
4793  if (ia64_tls_size_string)
4794    {
4795      char *end;
4796      unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10);
4797      if (*end || (tmp != 14 && tmp != 22 && tmp != 64))
4798	error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string);
4799      else
4800	ia64_tls_size = tmp;
4801    }
4802
4803  if (!ia64_tune_string)
4804    ia64_tune_string = "itanium2";
4805
4806  for (i = 0; i < pta_size; i++)
4807    if (! strcmp (ia64_tune_string, processor_alias_table[i].name))
4808      {
4809	ia64_tune = processor_alias_table[i].processor;
4810	break;
4811      }
4812
4813  if (i == pta_size)
4814    error ("bad value (%s) for -tune= switch", ia64_tune_string);
4815
4816  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
4817  flag_schedule_insns_after_reload = 0;
4818
4819  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
4820
4821  init_machine_status = ia64_init_machine_status;
4822}
4823
4824static enum attr_itanium_class ia64_safe_itanium_class (rtx);
4825static enum attr_type ia64_safe_type (rtx);
4826
4827static enum attr_itanium_class
4828ia64_safe_itanium_class (rtx insn)
4829{
4830  if (recog_memoized (insn) >= 0)
4831    return get_attr_itanium_class (insn);
4832  else
4833    return ITANIUM_CLASS_UNKNOWN;
4834}
4835
4836static enum attr_type
4837ia64_safe_type (rtx insn)
4838{
4839  if (recog_memoized (insn) >= 0)
4840    return get_attr_type (insn);
4841  else
4842    return TYPE_UNKNOWN;
4843}
4844
4845/* The following collection of routines emit instruction group stop bits as
4846   necessary to avoid dependencies.  */
4847
4848/* Need to track some additional registers as far as serialization is
4849   concerned so we can properly handle br.call and br.ret.  We could
4850   make these registers visible to gcc, but since these registers are
4851   never explicitly used in gcc generated code, it seems wasteful to
4852   do so (plus it would make the call and return patterns needlessly
4853   complex).  */
4854#define REG_GP		(GR_REG (1))
4855#define REG_RP		(BR_REG (0))
4856#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
4857/* This is used for volatile asms which may require a stop bit immediately
4858   before and after them.  */
4859#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
4860#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
4861#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
4862
4863/* For each register, we keep track of how it has been written in the
4864   current instruction group.
4865
4866   If a register is written unconditionally (no qualifying predicate),
4867   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
4868
4869   If a register is written if its qualifying predicate P is true, we
4870   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
4871   may be written again by the complement of P (P^1) and when this happens,
4872   WRITE_COUNT gets set to 2.
4873
4874   The result of this is that whenever an insn attempts to write a register
4875   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
4876
4877   If a predicate register is written by a floating-point insn, we set
4878   WRITTEN_BY_FP to true.
4879
4880   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
4881   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
4882
4883struct reg_write_state
4884{
4885  unsigned int write_count : 2;
4886  unsigned int first_pred : 16;
4887  unsigned int written_by_fp : 1;
4888  unsigned int written_by_and : 1;
4889  unsigned int written_by_or : 1;
4890};
4891
4892/* Cumulative info for the current instruction group.  */
4893struct reg_write_state rws_sum[NUM_REGS];
4894/* Info for the current instruction.  This gets copied to rws_sum after a
4895   stop bit is emitted.  */
4896struct reg_write_state rws_insn[NUM_REGS];
4897
4898/* Indicates whether this is the first instruction after a stop bit,
4899   in which case we don't need another stop bit.  Without this, we hit
4900   the abort in ia64_variable_issue when scheduling an alloc.  */
4901static int first_instruction;
4902
4903/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
4904   RTL for one instruction.  */
4905struct reg_flags
4906{
4907  unsigned int is_write : 1;	/* Is register being written?  */
4908  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
4909  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
4910  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
4911  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
4912  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
4913};
4914
4915static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
4916static int rws_access_regno (int, struct reg_flags, int);
4917static int rws_access_reg (rtx, struct reg_flags, int);
4918static void update_set_flags (rtx, struct reg_flags *, int *, rtx *);
4919static int set_src_needs_barrier (rtx, struct reg_flags, int, rtx);
4920static int rtx_needs_barrier (rtx, struct reg_flags, int);
4921static void init_insn_group_barriers (void);
4922static int group_barrier_needed_p (rtx);
4923static int safe_group_barrier_needed_p (rtx);
4924
4925/* Update *RWS for REGNO, which is being written by the current instruction,
4926   with predicate PRED, and associated register flags in FLAGS.  */
4927
4928static void
4929rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
4930{
4931  if (pred)
4932    rws[regno].write_count++;
4933  else
4934    rws[regno].write_count = 2;
4935  rws[regno].written_by_fp |= flags.is_fp;
4936  /* ??? Not tracking and/or across differing predicates.  */
4937  rws[regno].written_by_and = flags.is_and;
4938  rws[regno].written_by_or = flags.is_or;
4939  rws[regno].first_pred = pred;
4940}
4941
4942/* Handle an access to register REGNO of type FLAGS using predicate register
4943   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
4944   a dependency with an earlier instruction in the same group.  */
4945
4946static int
4947rws_access_regno (int regno, struct reg_flags flags, int pred)
4948{
4949  int need_barrier = 0;
4950
4951  if (regno >= NUM_REGS)
4952    abort ();
4953
4954  if (! PR_REGNO_P (regno))
4955    flags.is_and = flags.is_or = 0;
4956
4957  if (flags.is_write)
4958    {
4959      int write_count;
4960
4961      /* One insn writes same reg multiple times?  */
4962      if (rws_insn[regno].write_count > 0)
4963	abort ();
4964
4965      /* Update info for current instruction.  */
4966      rws_update (rws_insn, regno, flags, pred);
4967      write_count = rws_sum[regno].write_count;
4968
4969      switch (write_count)
4970	{
4971	case 0:
4972	  /* The register has not been written yet.  */
4973	  rws_update (rws_sum, regno, flags, pred);
4974	  break;
4975
4976	case 1:
4977	  /* The register has been written via a predicate.  If this is
4978	     not a complementary predicate, then we need a barrier.  */
4979	  /* ??? This assumes that P and P+1 are always complementary
4980	     predicates for P even.  */
4981	  if (flags.is_and && rws_sum[regno].written_by_and)
4982	    ;
4983	  else if (flags.is_or && rws_sum[regno].written_by_or)
4984	    ;
4985	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
4986	    need_barrier = 1;
4987	  rws_update (rws_sum, regno, flags, pred);
4988	  break;
4989
4990	case 2:
4991	  /* The register has been unconditionally written already.  We
4992	     need a barrier.  */
4993	  if (flags.is_and && rws_sum[regno].written_by_and)
4994	    ;
4995	  else if (flags.is_or && rws_sum[regno].written_by_or)
4996	    ;
4997	  else
4998	    need_barrier = 1;
4999	  rws_sum[regno].written_by_and = flags.is_and;
5000	  rws_sum[regno].written_by_or = flags.is_or;
5001	  break;
5002
5003	default:
5004	  abort ();
5005	}
5006    }
5007  else
5008    {
5009      if (flags.is_branch)
5010	{
5011	  /* Branches have several RAW exceptions that allow to avoid
5012	     barriers.  */
5013
5014	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5015	    /* RAW dependencies on branch regs are permissible as long
5016	       as the writer is a non-branch instruction.  Since we
5017	       never generate code that uses a branch register written
5018	       by a branch instruction, handling this case is
5019	       easy.  */
5020	    return 0;
5021
5022	  if (REGNO_REG_CLASS (regno) == PR_REGS
5023	      && ! rws_sum[regno].written_by_fp)
5024	    /* The predicates of a branch are available within the
5025	       same insn group as long as the predicate was written by
5026	       something other than a floating-point instruction.  */
5027	    return 0;
5028	}
5029
5030      if (flags.is_and && rws_sum[regno].written_by_and)
5031	return 0;
5032      if (flags.is_or && rws_sum[regno].written_by_or)
5033	return 0;
5034
5035      switch (rws_sum[regno].write_count)
5036	{
5037	case 0:
5038	  /* The register has not been written yet.  */
5039	  break;
5040
5041	case 1:
5042	  /* The register has been written via a predicate.  If this is
5043	     not a complementary predicate, then we need a barrier.  */
5044	  /* ??? This assumes that P and P+1 are always complementary
5045	     predicates for P even.  */
5046	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5047	    need_barrier = 1;
5048	  break;
5049
5050	case 2:
5051	  /* The register has been unconditionally written already.  We
5052	     need a barrier.  */
5053	  need_barrier = 1;
5054	  break;
5055
5056	default:
5057	  abort ();
5058	}
5059    }
5060
5061  return need_barrier;
5062}
5063
5064static int
5065rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5066{
5067  int regno = REGNO (reg);
5068  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5069
5070  if (n == 1)
5071    return rws_access_regno (regno, flags, pred);
5072  else
5073    {
5074      int need_barrier = 0;
5075      while (--n >= 0)
5076	need_barrier |= rws_access_regno (regno + n, flags, pred);
5077      return need_barrier;
5078    }
5079}
5080
5081/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5082   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5083
5084static void
5085update_set_flags (rtx x, struct reg_flags *pflags, int *ppred, rtx *pcond)
5086{
5087  rtx src = SET_SRC (x);
5088
5089  *pcond = 0;
5090
5091  switch (GET_CODE (src))
5092    {
5093    case CALL:
5094      return;
5095
5096    case IF_THEN_ELSE:
5097      if (SET_DEST (x) == pc_rtx)
5098	/* X is a conditional branch.  */
5099	return;
5100      else
5101	{
5102	  int is_complemented = 0;
5103
5104	  /* X is a conditional move.  */
5105	  rtx cond = XEXP (src, 0);
5106	  if (GET_CODE (cond) == EQ)
5107	    is_complemented = 1;
5108	  cond = XEXP (cond, 0);
5109	  if (GET_CODE (cond) != REG
5110	      && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5111	    abort ();
5112	  *pcond = cond;
5113	  if (XEXP (src, 1) == SET_DEST (x)
5114	      || XEXP (src, 2) == SET_DEST (x))
5115	    {
5116	      /* X is a conditional move that conditionally writes the
5117		 destination.  */
5118
5119	      /* We need another complement in this case.  */
5120	      if (XEXP (src, 1) == SET_DEST (x))
5121		is_complemented = ! is_complemented;
5122
5123	      *ppred = REGNO (cond);
5124	      if (is_complemented)
5125		++*ppred;
5126	    }
5127
5128	  /* ??? If this is a conditional write to the dest, then this
5129	     instruction does not actually read one source.  This probably
5130	     doesn't matter, because that source is also the dest.  */
5131	  /* ??? Multiple writes to predicate registers are allowed
5132	     if they are all AND type compares, or if they are all OR
5133	     type compares.  We do not generate such instructions
5134	     currently.  */
5135	}
5136      /* ... fall through ...  */
5137
5138    default:
5139      if (GET_RTX_CLASS (GET_CODE (src)) == '<'
5140	  && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT)
5141	/* Set pflags->is_fp to 1 so that we know we're dealing
5142	   with a floating point comparison when processing the
5143	   destination of the SET.  */
5144	pflags->is_fp = 1;
5145
5146      /* Discover if this is a parallel comparison.  We only handle
5147	 and.orcm and or.andcm at present, since we must retain a
5148	 strict inverse on the predicate pair.  */
5149      else if (GET_CODE (src) == AND)
5150	pflags->is_and = 1;
5151      else if (GET_CODE (src) == IOR)
5152	pflags->is_or = 1;
5153
5154      break;
5155    }
5156}
5157
5158/* Subroutine of rtx_needs_barrier; this function determines whether the
5159   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5160   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5161   for this insn.  */
5162
5163static int
5164set_src_needs_barrier (rtx x, struct reg_flags flags, int pred, rtx cond)
5165{
5166  int need_barrier = 0;
5167  rtx dst;
5168  rtx src = SET_SRC (x);
5169
5170  if (GET_CODE (src) == CALL)
5171    /* We don't need to worry about the result registers that
5172       get written by subroutine call.  */
5173    return rtx_needs_barrier (src, flags, pred);
5174  else if (SET_DEST (x) == pc_rtx)
5175    {
5176      /* X is a conditional branch.  */
5177      /* ??? This seems redundant, as the caller sets this bit for
5178	 all JUMP_INSNs.  */
5179      flags.is_branch = 1;
5180      return rtx_needs_barrier (src, flags, pred);
5181    }
5182
5183  need_barrier = rtx_needs_barrier (src, flags, pred);
5184
5185  /* This instruction unconditionally uses a predicate register.  */
5186  if (cond)
5187    need_barrier |= rws_access_reg (cond, flags, 0);
5188
5189  dst = SET_DEST (x);
5190  if (GET_CODE (dst) == ZERO_EXTRACT)
5191    {
5192      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5193      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5194      dst = XEXP (dst, 0);
5195    }
5196  return need_barrier;
5197}
5198
5199/* Handle an access to rtx X of type FLAGS using predicate register
5200   PRED.  Return 1 if this access creates a dependency with an earlier
5201   instruction in the same group.  */
5202
5203static int
5204rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5205{
5206  int i, j;
5207  int is_complemented = 0;
5208  int need_barrier = 0;
5209  const char *format_ptr;
5210  struct reg_flags new_flags;
5211  rtx cond = 0;
5212
5213  if (! x)
5214    return 0;
5215
5216  new_flags = flags;
5217
5218  switch (GET_CODE (x))
5219    {
5220    case SET:
5221      update_set_flags (x, &new_flags, &pred, &cond);
5222      need_barrier = set_src_needs_barrier (x, new_flags, pred, cond);
5223      if (GET_CODE (SET_SRC (x)) != CALL)
5224	{
5225	  new_flags.is_write = 1;
5226	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5227	}
5228      break;
5229
5230    case CALL:
5231      new_flags.is_write = 0;
5232      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5233
5234      /* Avoid multiple register writes, in case this is a pattern with
5235	 multiple CALL rtx.  This avoids an abort in rws_access_reg.  */
5236      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5237	{
5238	  new_flags.is_write = 1;
5239	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5240	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5241	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5242	}
5243      break;
5244
5245    case COND_EXEC:
5246      /* X is a predicated instruction.  */
5247
5248      cond = COND_EXEC_TEST (x);
5249      if (pred)
5250	abort ();
5251      need_barrier = rtx_needs_barrier (cond, flags, 0);
5252
5253      if (GET_CODE (cond) == EQ)
5254	is_complemented = 1;
5255      cond = XEXP (cond, 0);
5256      if (GET_CODE (cond) != REG
5257	  && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS)
5258	abort ();
5259      pred = REGNO (cond);
5260      if (is_complemented)
5261	++pred;
5262
5263      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5264      return need_barrier;
5265
5266    case CLOBBER:
5267    case USE:
5268      /* Clobber & use are for earlier compiler-phases only.  */
5269      break;
5270
5271    case ASM_OPERANDS:
5272    case ASM_INPUT:
5273      /* We always emit stop bits for traditional asms.  We emit stop bits
5274	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5275      if (GET_CODE (x) != ASM_OPERANDS
5276	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5277	{
5278	  /* Avoid writing the register multiple times if we have multiple
5279	     asm outputs.  This avoids an abort in rws_access_reg.  */
5280	  if (! rws_insn[REG_VOLATILE].write_count)
5281	    {
5282	      new_flags.is_write = 1;
5283	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5284	    }
5285	  return 1;
5286	}
5287
5288      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5289	 We can not just fall through here since then we would be confused
5290	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5291	 traditional asms unlike their normal usage.  */
5292
5293      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5294	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5295	  need_barrier = 1;
5296      break;
5297
5298    case PARALLEL:
5299      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5300	{
5301	  rtx pat = XVECEXP (x, 0, i);
5302	  if (GET_CODE (pat) == SET)
5303	    {
5304	      update_set_flags (pat, &new_flags, &pred, &cond);
5305	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
5306	    }
5307	  else if (GET_CODE (pat) == USE
5308		   || GET_CODE (pat) == CALL
5309		   || GET_CODE (pat) == ASM_OPERANDS)
5310	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5311	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
5312	    abort ();
5313	}
5314      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5315	{
5316	  rtx pat = XVECEXP (x, 0, i);
5317	  if (GET_CODE (pat) == SET)
5318	    {
5319	      if (GET_CODE (SET_SRC (pat)) != CALL)
5320		{
5321		  new_flags.is_write = 1;
5322		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5323						     pred);
5324		}
5325	    }
5326	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5327	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5328	}
5329      break;
5330
5331    case SUBREG:
5332      x = SUBREG_REG (x);
5333      /* FALLTHRU */
5334    case REG:
5335      if (REGNO (x) == AR_UNAT_REGNUM)
5336	{
5337	  for (i = 0; i < 64; ++i)
5338	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5339	}
5340      else
5341	need_barrier = rws_access_reg (x, flags, pred);
5342      break;
5343
5344    case MEM:
5345      /* Find the regs used in memory address computation.  */
5346      new_flags.is_write = 0;
5347      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5348      break;
5349
5350    case CONST_INT:   case CONST_DOUBLE:
5351    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5352      break;
5353
5354      /* Operators with side-effects.  */
5355    case POST_INC:    case POST_DEC:
5356      if (GET_CODE (XEXP (x, 0)) != REG)
5357	abort ();
5358
5359      new_flags.is_write = 0;
5360      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5361      new_flags.is_write = 1;
5362      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5363      break;
5364
5365    case POST_MODIFY:
5366      if (GET_CODE (XEXP (x, 0)) != REG)
5367	abort ();
5368
5369      new_flags.is_write = 0;
5370      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5371      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5372      new_flags.is_write = 1;
5373      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5374      break;
5375
5376      /* Handle common unary and binary ops for efficiency.  */
5377    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5378    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5379    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5380    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5381    case NE:       case EQ:      case GE:      case GT:        case LE:
5382    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5383      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5384      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5385      break;
5386
5387    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5388    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5389    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5390    case SQRT:     case FFS:		case POPCOUNT:
5391      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5392      break;
5393
5394    case UNSPEC:
5395      switch (XINT (x, 1))
5396	{
5397	case UNSPEC_LTOFF_DTPMOD:
5398	case UNSPEC_LTOFF_DTPREL:
5399	case UNSPEC_DTPREL:
5400	case UNSPEC_LTOFF_TPREL:
5401	case UNSPEC_TPREL:
5402	case UNSPEC_PRED_REL_MUTEX:
5403	case UNSPEC_PIC_CALL:
5404        case UNSPEC_MF:
5405        case UNSPEC_FETCHADD_ACQ:
5406	case UNSPEC_BSP_VALUE:
5407	case UNSPEC_FLUSHRS:
5408	case UNSPEC_BUNDLE_SELECTOR:
5409          break;
5410
5411	case UNSPEC_GR_SPILL:
5412	case UNSPEC_GR_RESTORE:
5413	  {
5414	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5415	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5416
5417	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5418	    new_flags.is_write = (XINT (x, 1) == 1);
5419	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5420					      new_flags, pred);
5421	    break;
5422	  }
5423
5424	case UNSPEC_FR_SPILL:
5425	case UNSPEC_FR_RESTORE:
5426	case UNSPEC_GETF_EXP:
5427	case UNSPEC_SETF_EXP:
5428        case UNSPEC_ADDP4:
5429	case UNSPEC_FR_SQRT_RECIP_APPROX:
5430	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5431	  break;
5432
5433	case UNSPEC_FR_RECIP_APPROX:
5434	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5435	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5436	  break;
5437
5438        case UNSPEC_CMPXCHG_ACQ:
5439	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5440	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5441	  break;
5442
5443	default:
5444	  abort ();
5445	}
5446      break;
5447
5448    case UNSPEC_VOLATILE:
5449      switch (XINT (x, 1))
5450	{
5451	case UNSPECV_ALLOC:
5452	  /* Alloc must always be the first instruction of a group.
5453	     We force this by always returning true.  */
5454	  /* ??? We might get better scheduling if we explicitly check for
5455	     input/local/output register dependencies, and modify the
5456	     scheduler so that alloc is always reordered to the start of
5457	     the current group.  We could then eliminate all of the
5458	     first_instruction code.  */
5459	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5460
5461	  new_flags.is_write = 1;
5462	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5463	  return 1;
5464
5465	case UNSPECV_SET_BSP:
5466	  need_barrier = 1;
5467          break;
5468
5469	case UNSPECV_BLOCKAGE:
5470	case UNSPECV_INSN_GROUP_BARRIER:
5471	case UNSPECV_BREAK:
5472	case UNSPECV_PSAC_ALL:
5473	case UNSPECV_PSAC_NORMAL:
5474	  return 0;
5475
5476	default:
5477	  abort ();
5478	}
5479      break;
5480
5481    case RETURN:
5482      new_flags.is_write = 0;
5483      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5484      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5485
5486      new_flags.is_write = 1;
5487      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5488      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5489      break;
5490
5491    default:
5492      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5493      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5494	switch (format_ptr[i])
5495	  {
5496	  case '0':	/* unused field */
5497	  case 'i':	/* integer */
5498	  case 'n':	/* note */
5499	  case 'w':	/* wide integer */
5500	  case 's':	/* pointer to string */
5501	  case 'S':	/* optional pointer to string */
5502	    break;
5503
5504	  case 'e':
5505	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5506	      need_barrier = 1;
5507	    break;
5508
5509	  case 'E':
5510	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5511	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5512		need_barrier = 1;
5513	    break;
5514
5515	  default:
5516	    abort ();
5517	  }
5518      break;
5519    }
5520  return need_barrier;
5521}
5522
5523/* Clear out the state for group_barrier_needed_p at the start of a
5524   sequence of insns.  */
5525
5526static void
5527init_insn_group_barriers (void)
5528{
5529  memset (rws_sum, 0, sizeof (rws_sum));
5530  first_instruction = 1;
5531}
5532
5533/* Given the current state, recorded by previous calls to this function,
5534   determine whether a group barrier (a stop bit) is necessary before INSN.
5535   Return nonzero if so.  */
5536
5537static int
5538group_barrier_needed_p (rtx insn)
5539{
5540  rtx pat;
5541  int need_barrier = 0;
5542  struct reg_flags flags;
5543
5544  memset (&flags, 0, sizeof (flags));
5545  switch (GET_CODE (insn))
5546    {
5547    case NOTE:
5548      break;
5549
5550    case BARRIER:
5551      /* A barrier doesn't imply an instruction group boundary.  */
5552      break;
5553
5554    case CODE_LABEL:
5555      memset (rws_insn, 0, sizeof (rws_insn));
5556      return 1;
5557
5558    case CALL_INSN:
5559      flags.is_branch = 1;
5560      flags.is_sibcall = SIBLING_CALL_P (insn);
5561      memset (rws_insn, 0, sizeof (rws_insn));
5562
5563      /* Don't bundle a call following another call.  */
5564      if ((pat = prev_active_insn (insn))
5565	  && GET_CODE (pat) == CALL_INSN)
5566	{
5567	  need_barrier = 1;
5568	  break;
5569	}
5570
5571      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5572      break;
5573
5574    case JUMP_INSN:
5575      flags.is_branch = 1;
5576
5577      /* Don't bundle a jump following a call.  */
5578      if ((pat = prev_active_insn (insn))
5579	  && GET_CODE (pat) == CALL_INSN)
5580	{
5581	  need_barrier = 1;
5582	  break;
5583	}
5584      /* FALLTHRU */
5585
5586    case INSN:
5587      if (GET_CODE (PATTERN (insn)) == USE
5588	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5589	/* Don't care about USE and CLOBBER "insns"---those are used to
5590	   indicate to the optimizer that it shouldn't get rid of
5591	   certain operations.  */
5592	break;
5593
5594      pat = PATTERN (insn);
5595
5596      /* Ug.  Hack hacks hacked elsewhere.  */
5597      switch (recog_memoized (insn))
5598	{
5599	  /* We play dependency tricks with the epilogue in order
5600	     to get proper schedules.  Undo this for dv analysis.  */
5601	case CODE_FOR_epilogue_deallocate_stack:
5602	case CODE_FOR_prologue_allocate_stack:
5603	  pat = XVECEXP (pat, 0, 0);
5604	  break;
5605
5606	  /* The pattern we use for br.cloop confuses the code above.
5607	     The second element of the vector is representative.  */
5608	case CODE_FOR_doloop_end_internal:
5609	  pat = XVECEXP (pat, 0, 1);
5610	  break;
5611
5612	  /* Doesn't generate code.  */
5613	case CODE_FOR_pred_rel_mutex:
5614	case CODE_FOR_prologue_use:
5615	  return 0;
5616
5617	default:
5618	  break;
5619	}
5620
5621      memset (rws_insn, 0, sizeof (rws_insn));
5622      need_barrier = rtx_needs_barrier (pat, flags, 0);
5623
5624      /* Check to see if the previous instruction was a volatile
5625	 asm.  */
5626      if (! need_barrier)
5627	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5628      break;
5629
5630    default:
5631      abort ();
5632    }
5633
5634  if (first_instruction && INSN_P (insn)
5635      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5636      && GET_CODE (PATTERN (insn)) != USE
5637      && GET_CODE (PATTERN (insn)) != CLOBBER)
5638    {
5639      need_barrier = 0;
5640      first_instruction = 0;
5641    }
5642
5643  return need_barrier;
5644}
5645
5646/* Like group_barrier_needed_p, but do not clobber the current state.  */
5647
5648static int
5649safe_group_barrier_needed_p (rtx insn)
5650{
5651  struct reg_write_state rws_saved[NUM_REGS];
5652  int saved_first_instruction;
5653  int t;
5654
5655  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5656  saved_first_instruction = first_instruction;
5657
5658  t = group_barrier_needed_p (insn);
5659
5660  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5661  first_instruction = saved_first_instruction;
5662
5663  return t;
5664}
5665
5666/* Scan the current function and insert stop bits as necessary to
5667   eliminate dependencies.  This function assumes that a final
5668   instruction scheduling pass has been run which has already
5669   inserted most of the necessary stop bits.  This function only
5670   inserts new ones at basic block boundaries, since these are
5671   invisible to the scheduler.  */
5672
5673static void
5674emit_insn_group_barriers (FILE *dump)
5675{
5676  rtx insn;
5677  rtx last_label = 0;
5678  int insns_since_last_label = 0;
5679
5680  init_insn_group_barriers ();
5681
5682  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5683    {
5684      if (GET_CODE (insn) == CODE_LABEL)
5685	{
5686	  if (insns_since_last_label)
5687	    last_label = insn;
5688	  insns_since_last_label = 0;
5689	}
5690      else if (GET_CODE (insn) == NOTE
5691	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
5692	{
5693	  if (insns_since_last_label)
5694	    last_label = insn;
5695	  insns_since_last_label = 0;
5696	}
5697      else if (GET_CODE (insn) == INSN
5698	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
5699	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
5700	{
5701	  init_insn_group_barriers ();
5702	  last_label = 0;
5703	}
5704      else if (INSN_P (insn))
5705	{
5706	  insns_since_last_label = 1;
5707
5708	  if (group_barrier_needed_p (insn))
5709	    {
5710	      if (last_label)
5711		{
5712		  if (dump)
5713		    fprintf (dump, "Emitting stop before label %d\n",
5714			     INSN_UID (last_label));
5715		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
5716		  insn = last_label;
5717
5718		  init_insn_group_barriers ();
5719		  last_label = 0;
5720		}
5721	    }
5722	}
5723    }
5724}
5725
5726/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
5727   This function has to emit all necessary group barriers.  */
5728
5729static void
5730emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
5731{
5732  rtx insn;
5733
5734  init_insn_group_barriers ();
5735
5736  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5737    {
5738      if (GET_CODE (insn) == BARRIER)
5739	{
5740	  rtx last = prev_active_insn (insn);
5741
5742	  if (! last)
5743	    continue;
5744	  if (GET_CODE (last) == JUMP_INSN
5745	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
5746	    last = prev_active_insn (last);
5747	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
5748	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
5749
5750	  init_insn_group_barriers ();
5751	}
5752      else if (INSN_P (insn))
5753	{
5754	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
5755	    init_insn_group_barriers ();
5756	  else if (group_barrier_needed_p (insn))
5757	    {
5758	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5759	      init_insn_group_barriers ();
5760	      group_barrier_needed_p (insn);
5761	    }
5762	}
5763    }
5764}
5765
5766
5767static int errata_find_address_regs (rtx *, void *);
5768static void errata_emit_nops (rtx);
5769static void fixup_errata (void);
5770
5771/* This structure is used to track some details about the previous insns
5772   groups so we can determine if it may be necessary to insert NOPs to
5773   workaround hardware errata.  */
5774static struct group
5775{
5776  HARD_REG_SET p_reg_set;
5777  HARD_REG_SET gr_reg_conditionally_set;
5778} last_group[2];
5779
5780/* Index into the last_group array.  */
5781static int group_idx;
5782
5783/* Called through for_each_rtx; determines if a hard register that was
5784   conditionally set in the previous group is used as an address register.
5785   It ensures that for_each_rtx returns 1 in that case.  */
5786static int
5787errata_find_address_regs (rtx *xp, void *data ATTRIBUTE_UNUSED)
5788{
5789  rtx x = *xp;
5790  if (GET_CODE (x) != MEM)
5791    return 0;
5792  x = XEXP (x, 0);
5793  if (GET_CODE (x) == POST_MODIFY)
5794    x = XEXP (x, 0);
5795  if (GET_CODE (x) == REG)
5796    {
5797      struct group *prev_group = last_group + (group_idx ^ 1);
5798      if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set,
5799			     REGNO (x)))
5800	return 1;
5801      return -1;
5802    }
5803  return 0;
5804}
5805
5806/* Called for each insn; this function keeps track of the state in
5807   last_group and emits additional NOPs if necessary to work around
5808   an Itanium A/B step erratum.  */
5809static void
5810errata_emit_nops (rtx insn)
5811{
5812  struct group *this_group = last_group + group_idx;
5813  struct group *prev_group = last_group + (group_idx ^ 1);
5814  rtx pat = PATTERN (insn);
5815  rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0;
5816  rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat;
5817  enum attr_type type;
5818  rtx set = real_pat;
5819
5820  if (GET_CODE (real_pat) == USE
5821      || GET_CODE (real_pat) == CLOBBER
5822      || GET_CODE (real_pat) == ASM_INPUT
5823      || GET_CODE (real_pat) == ADDR_VEC
5824      || GET_CODE (real_pat) == ADDR_DIFF_VEC
5825      || asm_noperands (PATTERN (insn)) >= 0)
5826    return;
5827
5828  /* single_set doesn't work for COND_EXEC insns, so we have to duplicate
5829     parts of it.  */
5830
5831  if (GET_CODE (set) == PARALLEL)
5832    {
5833      int i;
5834      set = XVECEXP (real_pat, 0, 0);
5835      for (i = 1; i < XVECLEN (real_pat, 0); i++)
5836	if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE
5837	    && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER)
5838	  {
5839	    set = 0;
5840	    break;
5841	  }
5842    }
5843
5844  if (set && GET_CODE (set) != SET)
5845    set = 0;
5846
5847  type  = get_attr_type (insn);
5848
5849  if (type == TYPE_F
5850      && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set))))
5851    SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set)));
5852
5853  if ((type == TYPE_M || type == TYPE_A) && cond && set
5854      && REG_P (SET_DEST (set))
5855      && GET_CODE (SET_SRC (set)) != PLUS
5856      && GET_CODE (SET_SRC (set)) != MINUS
5857      && (GET_CODE (SET_SRC (set)) != ASHIFT
5858	  || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode))
5859      && (GET_CODE (SET_SRC (set)) != MEM
5860	  || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY)
5861      && GENERAL_REGNO_P (REGNO (SET_DEST (set))))
5862    {
5863      if (GET_RTX_CLASS (GET_CODE (cond)) != '<'
5864	  || ! REG_P (XEXP (cond, 0)))
5865	abort ();
5866
5867      if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0))))
5868	SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set)));
5869    }
5870  if (for_each_rtx (&real_pat, errata_find_address_regs, NULL))
5871    {
5872      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5873      emit_insn_before (gen_nop (), insn);
5874      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
5875      group_idx = 0;
5876      memset (last_group, 0, sizeof last_group);
5877    }
5878}
5879
5880/* Emit extra nops if they are required to work around hardware errata.  */
5881
5882static void
5883fixup_errata (void)
5884{
5885  rtx insn;
5886
5887  if (! TARGET_B_STEP)
5888    return;
5889
5890  group_idx = 0;
5891  memset (last_group, 0, sizeof last_group);
5892
5893  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5894    {
5895      if (!INSN_P (insn))
5896	continue;
5897
5898      if (ia64_safe_type (insn) == TYPE_S)
5899	{
5900	  group_idx ^= 1;
5901	  memset (last_group + group_idx, 0, sizeof last_group[group_idx]);
5902	}
5903      else
5904	errata_emit_nops (insn);
5905    }
5906}
5907
5908
5909/* Instruction scheduling support.  */
5910
5911#define NR_BUNDLES 10
5912
5913/* A list of names of all available bundles.  */
5914
5915static const char *bundle_name [NR_BUNDLES] =
5916{
5917  ".mii",
5918  ".mmi",
5919  ".mfi",
5920  ".mmf",
5921#if NR_BUNDLES == 10
5922  ".bbb",
5923  ".mbb",
5924#endif
5925  ".mib",
5926  ".mmb",
5927  ".mfb",
5928  ".mlx"
5929};
5930
5931/* Nonzero if we should insert stop bits into the schedule.  */
5932
5933int ia64_final_schedule = 0;
5934
5935/* Codes of the corresponding quieryied units: */
5936
5937static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
5938static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
5939
5940static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
5941static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
5942
5943static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
5944
5945/* The following variable value is an insn group barrier.  */
5946
5947static rtx dfa_stop_insn;
5948
5949/* The following variable value is the last issued insn.  */
5950
5951static rtx last_scheduled_insn;
5952
5953/* The following variable value is size of the DFA state.  */
5954
5955static size_t dfa_state_size;
5956
5957/* The following variable value is pointer to a DFA state used as
5958   temporary variable.  */
5959
5960static state_t temp_dfa_state = NULL;
5961
5962/* The following variable value is DFA state after issuing the last
5963   insn.  */
5964
5965static state_t prev_cycle_state = NULL;
5966
5967/* The following array element values are TRUE if the corresponding
5968   insn requires to add stop bits before it.  */
5969
5970static char *stops_p;
5971
5972/* The following variable is used to set up the mentioned above array.  */
5973
5974static int stop_before_p = 0;
5975
5976/* The following variable value is length of the arrays `clocks' and
5977   `add_cycles'. */
5978
5979static int clocks_length;
5980
5981/* The following array element values are cycles on which the
5982   corresponding insn will be issued.  The array is used only for
5983   Itanium1.  */
5984
5985static int *clocks;
5986
5987/* The following array element values are numbers of cycles should be
5988   added to improve insn scheduling for MM_insns for Itanium1.  */
5989
5990static int *add_cycles;
5991
5992static rtx ia64_single_set (rtx);
5993static void ia64_emit_insn_before (rtx, rtx);
5994
5995/* Map a bundle number to its pseudo-op.  */
5996
5997const char *
5998get_bundle_name (int b)
5999{
6000  return bundle_name[b];
6001}
6002
6003
6004/* Return the maximum number of instructions a cpu can issue.  */
6005
6006static int
6007ia64_issue_rate (void)
6008{
6009  return 6;
6010}
6011
6012/* Helper function - like single_set, but look inside COND_EXEC.  */
6013
6014static rtx
6015ia64_single_set (rtx insn)
6016{
6017  rtx x = PATTERN (insn), ret;
6018  if (GET_CODE (x) == COND_EXEC)
6019    x = COND_EXEC_CODE (x);
6020  if (GET_CODE (x) == SET)
6021    return x;
6022
6023  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6024     Although they are not classical single set, the second set is there just
6025     to protect it from moving past FP-relative stack accesses.  */
6026  switch (recog_memoized (insn))
6027    {
6028    case CODE_FOR_prologue_allocate_stack:
6029    case CODE_FOR_epilogue_deallocate_stack:
6030      ret = XVECEXP (x, 0, 0);
6031      break;
6032
6033    default:
6034      ret = single_set_2 (insn, x);
6035      break;
6036    }
6037
6038  return ret;
6039}
6040
6041/* Adjust the cost of a scheduling dependency.  Return the new cost of
6042   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
6043
6044static int
6045ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6046{
6047  enum attr_itanium_class dep_class;
6048  enum attr_itanium_class insn_class;
6049
6050  if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6051    return cost;
6052
6053  insn_class = ia64_safe_itanium_class (insn);
6054  dep_class = ia64_safe_itanium_class (dep_insn);
6055  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6056      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6057    return 0;
6058
6059  return cost;
6060}
6061
6062/* Like emit_insn_before, but skip cycle_display notes.
6063   ??? When cycle display notes are implemented, update this.  */
6064
6065static void
6066ia64_emit_insn_before (rtx insn, rtx before)
6067{
6068  emit_insn_before (insn, before);
6069}
6070
6071/* The following function marks insns who produce addresses for load
6072   and store insns.  Such insns will be placed into M slots because it
6073   decrease latency time for Itanium1 (see function
6074   `ia64_produce_address_p' and the DFA descriptions).  */
6075
6076static void
6077ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6078{
6079  rtx insn, link, next, next_tail;
6080
6081  next_tail = NEXT_INSN (tail);
6082  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6083    if (INSN_P (insn))
6084      insn->call = 0;
6085  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6086    if (INSN_P (insn)
6087	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6088      {
6089	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6090	  {
6091	    next = XEXP (link, 0);
6092	    if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_ST
6093		 || ia64_safe_itanium_class (next) == ITANIUM_CLASS_STF)
6094		&& ia64_st_address_bypass_p (insn, next))
6095	      break;
6096	    else if ((ia64_safe_itanium_class (next) == ITANIUM_CLASS_LD
6097		      || ia64_safe_itanium_class (next)
6098		      == ITANIUM_CLASS_FLD)
6099		     && ia64_ld_address_bypass_p (insn, next))
6100	      break;
6101	  }
6102	insn->call = link != 0;
6103      }
6104}
6105
6106/* We're beginning a new block.  Initialize data structures as necessary.  */
6107
6108static void
6109ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6110		 int sched_verbose ATTRIBUTE_UNUSED,
6111		 int max_ready ATTRIBUTE_UNUSED)
6112{
6113#ifdef ENABLE_CHECKING
6114  rtx insn;
6115
6116  if (reload_completed)
6117    for (insn = NEXT_INSN (current_sched_info->prev_head);
6118	 insn != current_sched_info->next_tail;
6119	 insn = NEXT_INSN (insn))
6120      if (SCHED_GROUP_P (insn))
6121	abort ();
6122#endif
6123  last_scheduled_insn = NULL_RTX;
6124  init_insn_group_barriers ();
6125}
6126
6127/* We are about to being issuing insns for this clock cycle.
6128   Override the default sort algorithm to better slot instructions.  */
6129
6130static int
6131ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6132			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6133			int reorder_type)
6134{
6135  int n_asms;
6136  int n_ready = *pn_ready;
6137  rtx *e_ready = ready + n_ready;
6138  rtx *insnp;
6139
6140  if (sched_verbose)
6141    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6142
6143  if (reorder_type == 0)
6144    {
6145      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6146      n_asms = 0;
6147      for (insnp = ready; insnp < e_ready; insnp++)
6148	if (insnp < e_ready)
6149	  {
6150	    rtx insn = *insnp;
6151	    enum attr_type t = ia64_safe_type (insn);
6152	    if (t == TYPE_UNKNOWN)
6153	      {
6154		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6155		    || asm_noperands (PATTERN (insn)) >= 0)
6156		  {
6157		    rtx lowest = ready[n_asms];
6158		    ready[n_asms] = insn;
6159		    *insnp = lowest;
6160		    n_asms++;
6161		  }
6162		else
6163		  {
6164		    rtx highest = ready[n_ready - 1];
6165		    ready[n_ready - 1] = insn;
6166		    *insnp = highest;
6167		    return 1;
6168		  }
6169	      }
6170	  }
6171
6172      if (n_asms < n_ready)
6173	{
6174	  /* Some normal insns to process.  Skip the asms.  */
6175	  ready += n_asms;
6176	  n_ready -= n_asms;
6177	}
6178      else if (n_ready > 0)
6179	return 1;
6180    }
6181
6182  if (ia64_final_schedule)
6183    {
6184      int deleted = 0;
6185      int nr_need_stop = 0;
6186
6187      for (insnp = ready; insnp < e_ready; insnp++)
6188	if (safe_group_barrier_needed_p (*insnp))
6189	  nr_need_stop++;
6190
6191      if (reorder_type == 1 && n_ready == nr_need_stop)
6192	return 0;
6193      if (reorder_type == 0)
6194	return 1;
6195      insnp = e_ready;
6196      /* Move down everything that needs a stop bit, preserving
6197	 relative order.  */
6198      while (insnp-- > ready + deleted)
6199	while (insnp >= ready + deleted)
6200	  {
6201	    rtx insn = *insnp;
6202	    if (! safe_group_barrier_needed_p (insn))
6203	      break;
6204	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6205	    *ready = insn;
6206	    deleted++;
6207	  }
6208      n_ready -= deleted;
6209      ready += deleted;
6210    }
6211
6212  return 1;
6213}
6214
6215/* We are about to being issuing insns for this clock cycle.  Override
6216   the default sort algorithm to better slot instructions.  */
6217
6218static int
6219ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6220		    int clock_var)
6221{
6222  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6223				 pn_ready, clock_var, 0);
6224}
6225
6226/* Like ia64_sched_reorder, but called after issuing each insn.
6227   Override the default sort algorithm to better slot instructions.  */
6228
6229static int
6230ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6231		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6232		     int *pn_ready, int clock_var)
6233{
6234  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6235    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6236  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6237				 clock_var, 1);
6238}
6239
6240/* We are about to issue INSN.  Return the number of insns left on the
6241   ready queue that can be issued this cycle.  */
6242
6243static int
6244ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6245		     int sched_verbose ATTRIBUTE_UNUSED,
6246		     rtx insn ATTRIBUTE_UNUSED,
6247		     int can_issue_more ATTRIBUTE_UNUSED)
6248{
6249  last_scheduled_insn = insn;
6250  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6251  if (reload_completed)
6252    {
6253      if (group_barrier_needed_p (insn))
6254	abort ();
6255      if (GET_CODE (insn) == CALL_INSN)
6256	init_insn_group_barriers ();
6257      stops_p [INSN_UID (insn)] = stop_before_p;
6258      stop_before_p = 0;
6259    }
6260  return 1;
6261}
6262
6263/* We are choosing insn from the ready queue.  Return nonzero if INSN
6264   can be chosen.  */
6265
6266static int
6267ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6268{
6269  if (insn == NULL_RTX || !INSN_P (insn))
6270    abort ();
6271  return (!reload_completed
6272	  || !safe_group_barrier_needed_p (insn));
6273}
6274
6275/* The following variable value is pseudo-insn used by the DFA insn
6276   scheduler to change the DFA state when the simulated clock is
6277   increased.  */
6278
6279static rtx dfa_pre_cycle_insn;
6280
6281/* We are about to being issuing INSN.  Return nonzero if we can not
6282   issue it on given cycle CLOCK and return zero if we should not sort
6283   the ready queue on the next clock start.  */
6284
6285static int
6286ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6287		    int clock, int *sort_p)
6288{
6289  int setup_clocks_p = FALSE;
6290
6291  if (insn == NULL_RTX || !INSN_P (insn))
6292    abort ();
6293  if ((reload_completed && safe_group_barrier_needed_p (insn))
6294      || (last_scheduled_insn
6295	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6296	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6297	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6298    {
6299      init_insn_group_barriers ();
6300      if (verbose && dump)
6301	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6302		 last_clock == clock ? " + cycle advance" : "");
6303      stop_before_p = 1;
6304      if (last_clock == clock)
6305	{
6306	  state_transition (curr_state, dfa_stop_insn);
6307	  if (TARGET_EARLY_STOP_BITS)
6308	    *sort_p = (last_scheduled_insn == NULL_RTX
6309		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6310	  else
6311	    *sort_p = 0;
6312	  return 1;
6313	}
6314      else if (reload_completed)
6315	setup_clocks_p = TRUE;
6316      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6317	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6318	state_reset (curr_state);
6319      else
6320	{
6321	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6322	  state_transition (curr_state, dfa_stop_insn);
6323	  state_transition (curr_state, dfa_pre_cycle_insn);
6324	  state_transition (curr_state, NULL);
6325	}
6326    }
6327  else if (reload_completed)
6328    setup_clocks_p = TRUE;
6329  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6330      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6331      && asm_noperands (PATTERN (insn)) < 0)
6332    {
6333      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6334
6335      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6336	{
6337	  rtx link;
6338	  int d = -1;
6339
6340	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6341	    if (REG_NOTE_KIND (link) == 0)
6342	      {
6343		enum attr_itanium_class dep_class;
6344		rtx dep_insn = XEXP (link, 0);
6345
6346		dep_class = ia64_safe_itanium_class (dep_insn);
6347		if ((dep_class == ITANIUM_CLASS_MMMUL
6348		     || dep_class == ITANIUM_CLASS_MMSHF)
6349		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6350		    && (d < 0
6351			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6352		  d = last_clock - clocks [INSN_UID (dep_insn)];
6353	      }
6354	  if (d >= 0)
6355	    add_cycles [INSN_UID (insn)] = 3 - d;
6356	}
6357    }
6358  return 0;
6359}
6360
6361
6362
6363/* The following page contains abstract data `bundle states' which are
6364   used for bundling insns (inserting nops and template generation).  */
6365
6366/* The following describes state of insn bundling.  */
6367
6368struct bundle_state
6369{
6370  /* Unique bundle state number to identify them in the debugging
6371     output  */
6372  int unique_num;
6373  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
6374  /* number nops before and after the insn  */
6375  short before_nops_num, after_nops_num;
6376  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6377                   insn */
6378  int cost;     /* cost of the state in cycles */
6379  int accumulated_insns_num; /* number of all previous insns including
6380				nops.  L is considered as 2 insns */
6381  int branch_deviation; /* deviation of previous branches from 3rd slots  */
6382  struct bundle_state *next;  /* next state with the same insn_num  */
6383  struct bundle_state *originator; /* originator (previous insn state)  */
6384  /* All bundle states are in the following chain.  */
6385  struct bundle_state *allocated_states_chain;
6386  /* The DFA State after issuing the insn and the nops.  */
6387  state_t dfa_state;
6388};
6389
6390/* The following is map insn number to the corresponding bundle state.  */
6391
6392static struct bundle_state **index_to_bundle_states;
6393
6394/* The unique number of next bundle state.  */
6395
6396static int bundle_states_num;
6397
6398/* All allocated bundle states are in the following chain.  */
6399
6400static struct bundle_state *allocated_bundle_states_chain;
6401
6402/* All allocated but not used bundle states are in the following
6403   chain.  */
6404
6405static struct bundle_state *free_bundle_state_chain;
6406
6407
6408/* The following function returns a free bundle state.  */
6409
6410static struct bundle_state *
6411get_free_bundle_state (void)
6412{
6413  struct bundle_state *result;
6414
6415  if (free_bundle_state_chain != NULL)
6416    {
6417      result = free_bundle_state_chain;
6418      free_bundle_state_chain = result->next;
6419    }
6420  else
6421    {
6422      result = xmalloc (sizeof (struct bundle_state));
6423      result->dfa_state = xmalloc (dfa_state_size);
6424      result->allocated_states_chain = allocated_bundle_states_chain;
6425      allocated_bundle_states_chain = result;
6426    }
6427  result->unique_num = bundle_states_num++;
6428  return result;
6429
6430}
6431
6432/* The following function frees given bundle state.  */
6433
6434static void
6435free_bundle_state (struct bundle_state *state)
6436{
6437  state->next = free_bundle_state_chain;
6438  free_bundle_state_chain = state;
6439}
6440
6441/* Start work with abstract data `bundle states'.  */
6442
6443static void
6444initiate_bundle_states (void)
6445{
6446  bundle_states_num = 0;
6447  free_bundle_state_chain = NULL;
6448  allocated_bundle_states_chain = NULL;
6449}
6450
6451/* Finish work with abstract data `bundle states'.  */
6452
6453static void
6454finish_bundle_states (void)
6455{
6456  struct bundle_state *curr_state, *next_state;
6457
6458  for (curr_state = allocated_bundle_states_chain;
6459       curr_state != NULL;
6460       curr_state = next_state)
6461    {
6462      next_state = curr_state->allocated_states_chain;
6463      free (curr_state->dfa_state);
6464      free (curr_state);
6465    }
6466}
6467
6468/* Hash table of the bundle states.  The key is dfa_state and insn_num
6469   of the bundle states.  */
6470
6471static htab_t bundle_state_table;
6472
6473/* The function returns hash of BUNDLE_STATE.  */
6474
6475static unsigned
6476bundle_state_hash (const void *bundle_state)
6477{
6478  const struct bundle_state *state = (struct bundle_state *) bundle_state;
6479  unsigned result, i;
6480
6481  for (result = i = 0; i < dfa_state_size; i++)
6482    result += (((unsigned char *) state->dfa_state) [i]
6483	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6484  return result + state->insn_num;
6485}
6486
6487/* The function returns nonzero if the bundle state keys are equal.  */
6488
6489static int
6490bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6491{
6492  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6493  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6494
6495  return (state1->insn_num == state2->insn_num
6496	  && memcmp (state1->dfa_state, state2->dfa_state,
6497		     dfa_state_size) == 0);
6498}
6499
6500/* The function inserts the BUNDLE_STATE into the hash table.  The
6501   function returns nonzero if the bundle has been inserted into the
6502   table.  The table contains the best bundle state with given key.  */
6503
6504static int
6505insert_bundle_state (struct bundle_state *bundle_state)
6506{
6507  void **entry_ptr;
6508
6509  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6510  if (*entry_ptr == NULL)
6511    {
6512      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6513      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6514      *entry_ptr = (void *) bundle_state;
6515      return TRUE;
6516    }
6517  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6518	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6519	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6520		   > bundle_state->accumulated_insns_num
6521		   || (((struct bundle_state *)
6522			*entry_ptr)->accumulated_insns_num
6523		       == bundle_state->accumulated_insns_num
6524		       && ((struct bundle_state *)
6525			   *entry_ptr)->branch_deviation
6526		       > bundle_state->branch_deviation))))
6527
6528    {
6529      struct bundle_state temp;
6530
6531      temp = *(struct bundle_state *) *entry_ptr;
6532      *(struct bundle_state *) *entry_ptr = *bundle_state;
6533      ((struct bundle_state *) *entry_ptr)->next = temp.next;
6534      *bundle_state = temp;
6535    }
6536  return FALSE;
6537}
6538
6539/* Start work with the hash table.  */
6540
6541static void
6542initiate_bundle_state_table (void)
6543{
6544  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6545				    (htab_del) 0);
6546}
6547
6548/* Finish work with the hash table.  */
6549
6550static void
6551finish_bundle_state_table (void)
6552{
6553  htab_delete (bundle_state_table);
6554}
6555
6556
6557
6558/* The following variable is a insn `nop' used to check bundle states
6559   with different number of inserted nops.  */
6560
6561static rtx ia64_nop;
6562
6563/* The following function tries to issue NOPS_NUM nops for the current
6564   state without advancing processor cycle.  If it failed, the
6565   function returns FALSE and frees the current state.  */
6566
6567static int
6568try_issue_nops (struct bundle_state *curr_state, int nops_num)
6569{
6570  int i;
6571
6572  for (i = 0; i < nops_num; i++)
6573    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6574      {
6575	free_bundle_state (curr_state);
6576	return FALSE;
6577      }
6578  return TRUE;
6579}
6580
6581/* The following function tries to issue INSN for the current
6582   state without advancing processor cycle.  If it failed, the
6583   function returns FALSE and frees the current state.  */
6584
6585static int
6586try_issue_insn (struct bundle_state *curr_state, rtx insn)
6587{
6588  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6589    {
6590      free_bundle_state (curr_state);
6591      return FALSE;
6592    }
6593  return TRUE;
6594}
6595
6596/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6597   starting with ORIGINATOR without advancing processor cycle.  If
6598   TRY_BUNDLE_END_P is TRUE, the function also/only (if
6599   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6600   If it was successful, the function creates new bundle state and
6601   insert into the hash table and into `index_to_bundle_states'.  */
6602
6603static void
6604issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6605		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6606{
6607  struct bundle_state *curr_state;
6608
6609  curr_state = get_free_bundle_state ();
6610  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6611  curr_state->insn = insn;
6612  curr_state->insn_num = originator->insn_num + 1;
6613  curr_state->cost = originator->cost;
6614  curr_state->originator = originator;
6615  curr_state->before_nops_num = before_nops_num;
6616  curr_state->after_nops_num = 0;
6617  curr_state->accumulated_insns_num
6618    = originator->accumulated_insns_num + before_nops_num;
6619  curr_state->branch_deviation = originator->branch_deviation;
6620  if (insn == NULL_RTX)
6621    abort ();
6622  else if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6623    {
6624      if (GET_MODE (insn) == TImode)
6625	abort ();
6626      if (!try_issue_nops (curr_state, before_nops_num))
6627	return;
6628      if (!try_issue_insn (curr_state, insn))
6629	return;
6630      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6631      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6632	  && curr_state->accumulated_insns_num % 3 != 0)
6633	{
6634	  free_bundle_state (curr_state);
6635	  return;
6636	}
6637    }
6638  else if (GET_MODE (insn) != TImode)
6639    {
6640      if (!try_issue_nops (curr_state, before_nops_num))
6641	return;
6642      if (!try_issue_insn (curr_state, insn))
6643	return;
6644      curr_state->accumulated_insns_num++;
6645      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6646	  || asm_noperands (PATTERN (insn)) >= 0)
6647	abort ();
6648      if (ia64_safe_type (insn) == TYPE_L)
6649	curr_state->accumulated_insns_num++;
6650    }
6651  else
6652    {
6653      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6654      state_transition (curr_state->dfa_state, NULL);
6655      curr_state->cost++;
6656      if (!try_issue_nops (curr_state, before_nops_num))
6657	return;
6658      if (!try_issue_insn (curr_state, insn))
6659	return;
6660      curr_state->accumulated_insns_num++;
6661      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6662	  || asm_noperands (PATTERN (insn)) >= 0)
6663	{
6664	  /* Finish bundle containing asm insn.  */
6665	  curr_state->after_nops_num
6666	    = 3 - curr_state->accumulated_insns_num % 3;
6667	  curr_state->accumulated_insns_num
6668	    += 3 - curr_state->accumulated_insns_num % 3;
6669	}
6670      else if (ia64_safe_type (insn) == TYPE_L)
6671	curr_state->accumulated_insns_num++;
6672    }
6673  if (ia64_safe_type (insn) == TYPE_B)
6674    curr_state->branch_deviation
6675      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6676  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6677    {
6678      if (!only_bundle_end_p && insert_bundle_state (curr_state))
6679	{
6680	  state_t dfa_state;
6681	  struct bundle_state *curr_state1;
6682	  struct bundle_state *allocated_states_chain;
6683
6684	  curr_state1 = get_free_bundle_state ();
6685	  dfa_state = curr_state1->dfa_state;
6686	  allocated_states_chain = curr_state1->allocated_states_chain;
6687	  *curr_state1 = *curr_state;
6688	  curr_state1->dfa_state = dfa_state;
6689	  curr_state1->allocated_states_chain = allocated_states_chain;
6690	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6691		  dfa_state_size);
6692	  curr_state = curr_state1;
6693	}
6694      if (!try_issue_nops (curr_state,
6695			   3 - curr_state->accumulated_insns_num % 3))
6696	return;
6697      curr_state->after_nops_num
6698	= 3 - curr_state->accumulated_insns_num % 3;
6699      curr_state->accumulated_insns_num
6700	+= 3 - curr_state->accumulated_insns_num % 3;
6701    }
6702  if (!insert_bundle_state (curr_state))
6703    free_bundle_state (curr_state);
6704  return;
6705}
6706
6707/* The following function returns position in the two window bundle
6708   for given STATE.  */
6709
6710static int
6711get_max_pos (state_t state)
6712{
6713  if (cpu_unit_reservation_p (state, pos_6))
6714    return 6;
6715  else if (cpu_unit_reservation_p (state, pos_5))
6716    return 5;
6717  else if (cpu_unit_reservation_p (state, pos_4))
6718    return 4;
6719  else if (cpu_unit_reservation_p (state, pos_3))
6720    return 3;
6721  else if (cpu_unit_reservation_p (state, pos_2))
6722    return 2;
6723  else if (cpu_unit_reservation_p (state, pos_1))
6724    return 1;
6725  else
6726    return 0;
6727}
6728
6729/* The function returns code of a possible template for given position
6730   and state.  The function should be called only with 2 values of
6731   position equal to 3 or 6.  */
6732
6733static int
6734get_template (state_t state, int pos)
6735{
6736  switch (pos)
6737    {
6738    case 3:
6739      if (cpu_unit_reservation_p (state, _0mii_))
6740	return 0;
6741      else if (cpu_unit_reservation_p (state, _0mmi_))
6742	return 1;
6743      else if (cpu_unit_reservation_p (state, _0mfi_))
6744	return 2;
6745      else if (cpu_unit_reservation_p (state, _0mmf_))
6746	return 3;
6747      else if (cpu_unit_reservation_p (state, _0bbb_))
6748	return 4;
6749      else if (cpu_unit_reservation_p (state, _0mbb_))
6750	return 5;
6751      else if (cpu_unit_reservation_p (state, _0mib_))
6752	return 6;
6753      else if (cpu_unit_reservation_p (state, _0mmb_))
6754	return 7;
6755      else if (cpu_unit_reservation_p (state, _0mfb_))
6756	return 8;
6757      else if (cpu_unit_reservation_p (state, _0mlx_))
6758	return 9;
6759      else
6760	abort ();
6761    case 6:
6762      if (cpu_unit_reservation_p (state, _1mii_))
6763	return 0;
6764      else if (cpu_unit_reservation_p (state, _1mmi_))
6765	return 1;
6766      else if (cpu_unit_reservation_p (state, _1mfi_))
6767	return 2;
6768      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6769	return 3;
6770      else if (cpu_unit_reservation_p (state, _1bbb_))
6771	return 4;
6772      else if (cpu_unit_reservation_p (state, _1mbb_))
6773	return 5;
6774      else if (cpu_unit_reservation_p (state, _1mib_))
6775	return 6;
6776      else if (cpu_unit_reservation_p (state, _1mmb_))
6777	return 7;
6778      else if (cpu_unit_reservation_p (state, _1mfb_))
6779	return 8;
6780      else if (cpu_unit_reservation_p (state, _1mlx_))
6781	return 9;
6782      else
6783	abort ();
6784    default:
6785      abort ();
6786    }
6787}
6788
6789/* The following function returns an insn important for insn bundling
6790   followed by INSN and before TAIL.  */
6791
6792static rtx
6793get_next_important_insn (rtx insn, rtx tail)
6794{
6795  for (; insn && insn != tail; insn = NEXT_INSN (insn))
6796    if (INSN_P (insn)
6797	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6798	&& GET_CODE (PATTERN (insn)) != USE
6799	&& GET_CODE (PATTERN (insn)) != CLOBBER)
6800      return insn;
6801  return NULL_RTX;
6802}
6803
6804/* The following function does insn bundling.  Bundling means
6805   inserting templates and nop insns to fit insn groups into permitted
6806   templates.  Instruction scheduling uses NDFA (non-deterministic
6807   finite automata) encoding informations about the templates and the
6808   inserted nops.  Nondeterminism of the automata permits follows
6809   all possible insn sequences very fast.
6810
6811   Unfortunately it is not possible to get information about inserting
6812   nop insns and used templates from the automata states.  The
6813   automata only says that we can issue an insn possibly inserting
6814   some nops before it and using some template.  Therefore insn
6815   bundling in this function is implemented by using DFA
6816   (deterministic finite automata).  We follows all possible insn
6817   sequences by inserting 0-2 nops (that is what the NDFA describe for
6818   insn scheduling) before/after each insn being bundled.  We know the
6819   start of simulated processor cycle from insn scheduling (insn
6820   starting a new cycle has TImode).
6821
6822   Simple implementation of insn bundling would create enormous
6823   number of possible insn sequences satisfying information about new
6824   cycle ticks taken from the insn scheduling.  To make the algorithm
6825   practical we use dynamic programming.  Each decision (about
6826   inserting nops and implicitly about previous decisions) is described
6827   by structure bundle_state (see above).  If we generate the same
6828   bundle state (key is automaton state after issuing the insns and
6829   nops for it), we reuse already generated one.  As consequence we
6830   reject some decisions which can not improve the solution and
6831   reduce memory for the algorithm.
6832
6833   When we reach the end of EBB (extended basic block), we choose the
6834   best sequence and then, moving back in EBB, insert templates for
6835   the best alternative.  The templates are taken from querying
6836   automaton state for each insn in chosen bundle states.
6837
6838   So the algorithm makes two (forward and backward) passes through
6839   EBB.  There is an additional forward pass through EBB for Itanium1
6840   processor.  This pass inserts more nops to make dependency between
6841   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
6842
6843static void
6844bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
6845{
6846  struct bundle_state *curr_state, *next_state, *best_state;
6847  rtx insn, next_insn;
6848  int insn_num;
6849  int i, bundle_end_p, only_bundle_end_p, asm_p;
6850  int pos = 0, max_pos, template0, template1;
6851  rtx b;
6852  rtx nop;
6853  enum attr_type type;
6854
6855  insn_num = 0;
6856  /* Count insns in the EBB.  */
6857  for (insn = NEXT_INSN (prev_head_insn);
6858       insn && insn != tail;
6859       insn = NEXT_INSN (insn))
6860    if (INSN_P (insn))
6861      insn_num++;
6862  if (insn_num == 0)
6863    return;
6864  bundling_p = 1;
6865  dfa_clean_insn_cache ();
6866  initiate_bundle_state_table ();
6867  index_to_bundle_states = xmalloc ((insn_num + 2)
6868				    * sizeof (struct bundle_state *));
6869  /* First (forward) pass -- generation of bundle states. */
6870  curr_state = get_free_bundle_state ();
6871  curr_state->insn = NULL;
6872  curr_state->before_nops_num = 0;
6873  curr_state->after_nops_num = 0;
6874  curr_state->insn_num = 0;
6875  curr_state->cost = 0;
6876  curr_state->accumulated_insns_num = 0;
6877  curr_state->branch_deviation = 0;
6878  curr_state->next = NULL;
6879  curr_state->originator = NULL;
6880  state_reset (curr_state->dfa_state);
6881  index_to_bundle_states [0] = curr_state;
6882  insn_num = 0;
6883  /* Shift cycle mark if it is put on insn which could be ignored.  */
6884  for (insn = NEXT_INSN (prev_head_insn);
6885       insn != tail;
6886       insn = NEXT_INSN (insn))
6887    if (INSN_P (insn)
6888	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6889	    || GET_CODE (PATTERN (insn)) == USE
6890	    || GET_CODE (PATTERN (insn)) == CLOBBER)
6891	&& GET_MODE (insn) == TImode)
6892      {
6893	PUT_MODE (insn, VOIDmode);
6894	for (next_insn = NEXT_INSN (insn);
6895	     next_insn != tail;
6896	     next_insn = NEXT_INSN (next_insn))
6897	  if (INSN_P (next_insn)
6898	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
6899	      && GET_CODE (PATTERN (next_insn)) != USE
6900	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
6901	    {
6902	      PUT_MODE (next_insn, TImode);
6903	      break;
6904	    }
6905      }
6906  /* Froward pass: generation of bundle states.  */
6907  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
6908       insn != NULL_RTX;
6909       insn = next_insn)
6910    {
6911      if (!INSN_P (insn)
6912	  || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
6913	  || GET_CODE (PATTERN (insn)) == USE
6914	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6915	abort ();
6916      type = ia64_safe_type (insn);
6917      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
6918      insn_num++;
6919      index_to_bundle_states [insn_num] = NULL;
6920      for (curr_state = index_to_bundle_states [insn_num - 1];
6921	   curr_state != NULL;
6922	   curr_state = next_state)
6923	{
6924	  pos = curr_state->accumulated_insns_num % 3;
6925	  next_state = curr_state->next;
6926	  /* We must fill up the current bundle in order to start a
6927	     subsequent asm insn in a new bundle.  Asm insn is always
6928	     placed in a separate bundle.  */
6929	  only_bundle_end_p
6930	    = (next_insn != NULL_RTX
6931	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
6932	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
6933	  /* We may fill up the current bundle if it is the cycle end
6934	     without a group barrier.  */
6935	  bundle_end_p
6936	    = (only_bundle_end_p || next_insn == NULL_RTX
6937	       || (GET_MODE (next_insn) == TImode
6938		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
6939	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
6940	      || type == TYPE_S
6941	      /* We need to insert 2 nops for cases like M_MII.  To
6942		 guarantee issuing all insns on the same cycle for
6943		 Itanium 1, we need to issue 2 nops after the first M
6944		 insn (MnnMII where n is a nop insn).  */
6945	      || ((type == TYPE_M || type == TYPE_A)
6946		  && ia64_tune == PROCESSOR_ITANIUM
6947		  && !bundle_end_p && pos == 1))
6948	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
6949				 only_bundle_end_p);
6950	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
6951			       only_bundle_end_p);
6952	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
6953			       only_bundle_end_p);
6954	}
6955      if (index_to_bundle_states [insn_num] == NULL)
6956	abort ();
6957      for (curr_state = index_to_bundle_states [insn_num];
6958	   curr_state != NULL;
6959	   curr_state = curr_state->next)
6960	if (verbose >= 2 && dump)
6961	  {
6962	    /* This structure is taken from generated code of the
6963	       pipeline hazard recognizer (see file insn-attrtab.c).
6964	       Please don't forget to change the structure if a new
6965	       automaton is added to .md file.  */
6966	    struct DFA_chip
6967	    {
6968	      unsigned short one_automaton_state;
6969	      unsigned short oneb_automaton_state;
6970	      unsigned short two_automaton_state;
6971	      unsigned short twob_automaton_state;
6972	    };
6973
6974	    fprintf
6975	      (dump,
6976	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
6977	       curr_state->unique_num,
6978	       (curr_state->originator == NULL
6979		? -1 : curr_state->originator->unique_num),
6980	       curr_state->cost,
6981	       curr_state->before_nops_num, curr_state->after_nops_num,
6982	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
6983	       (ia64_tune == PROCESSOR_ITANIUM
6984		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
6985		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
6986	       INSN_UID (insn));
6987	  }
6988    }
6989  if (index_to_bundle_states [insn_num] == NULL)
6990    /* We should find a solution because the 2nd insn scheduling has
6991       found one.  */
6992    abort ();
6993  /* Find a state corresponding to the best insn sequence.  */
6994  best_state = NULL;
6995  for (curr_state = index_to_bundle_states [insn_num];
6996       curr_state != NULL;
6997       curr_state = curr_state->next)
6998    /* We are just looking at the states with fully filled up last
6999       bundle.  The first we prefer insn sequences with minimal cost
7000       then with minimal inserted nops and finally with branch insns
7001       placed in the 3rd slots.  */
7002    if (curr_state->accumulated_insns_num % 3 == 0
7003	&& (best_state == NULL || best_state->cost > curr_state->cost
7004	    || (best_state->cost == curr_state->cost
7005		&& (curr_state->accumulated_insns_num
7006		    < best_state->accumulated_insns_num
7007		    || (curr_state->accumulated_insns_num
7008			== best_state->accumulated_insns_num
7009			&& curr_state->branch_deviation
7010			< best_state->branch_deviation)))))
7011      best_state = curr_state;
7012  /* Second (backward) pass: adding nops and templates.  */
7013  insn_num = best_state->before_nops_num;
7014  template0 = template1 = -1;
7015  for (curr_state = best_state;
7016       curr_state->originator != NULL;
7017       curr_state = curr_state->originator)
7018    {
7019      insn = curr_state->insn;
7020      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7021	       || asm_noperands (PATTERN (insn)) >= 0);
7022      insn_num++;
7023      if (verbose >= 2 && dump)
7024	{
7025	  struct DFA_chip
7026	  {
7027	    unsigned short one_automaton_state;
7028	    unsigned short oneb_automaton_state;
7029	    unsigned short two_automaton_state;
7030	    unsigned short twob_automaton_state;
7031	  };
7032
7033	  fprintf
7034	    (dump,
7035	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7036	     curr_state->unique_num,
7037	     (curr_state->originator == NULL
7038	      ? -1 : curr_state->originator->unique_num),
7039	     curr_state->cost,
7040	     curr_state->before_nops_num, curr_state->after_nops_num,
7041	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
7042	     (ia64_tune == PROCESSOR_ITANIUM
7043	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7044	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7045	     INSN_UID (insn));
7046	}
7047      /* Find the position in the current bundle window.  The window can
7048	 contain at most two bundles.  Two bundle window means that
7049	 the processor will make two bundle rotation.  */
7050      max_pos = get_max_pos (curr_state->dfa_state);
7051      if (max_pos == 6
7052	  /* The following (negative template number) means that the
7053	     processor did one bundle rotation.  */
7054	  || (max_pos == 3 && template0 < 0))
7055	{
7056	  /* We are at the end of the window -- find template(s) for
7057	     its bundle(s).  */
7058	  pos = max_pos;
7059	  if (max_pos == 3)
7060	    template0 = get_template (curr_state->dfa_state, 3);
7061	  else
7062	    {
7063	      template1 = get_template (curr_state->dfa_state, 3);
7064	      template0 = get_template (curr_state->dfa_state, 6);
7065	    }
7066	}
7067      if (max_pos > 3 && template1 < 0)
7068	/* It may happen when we have the stop inside a bundle.  */
7069	{
7070	  if (pos > 3)
7071	    abort ();
7072	  template1 = get_template (curr_state->dfa_state, 3);
7073	  pos += 3;
7074	}
7075      if (!asm_p)
7076	/* Emit nops after the current insn.  */
7077	for (i = 0; i < curr_state->after_nops_num; i++)
7078	  {
7079	    nop = gen_nop ();
7080	    emit_insn_after (nop, insn);
7081	    pos--;
7082	    if (pos < 0)
7083	      abort ();
7084	    if (pos % 3 == 0)
7085	      {
7086		/* We are at the start of a bundle: emit the template
7087		   (it should be defined).  */
7088		if (template0 < 0)
7089		  abort ();
7090		b = gen_bundle_selector (GEN_INT (template0));
7091		ia64_emit_insn_before (b, nop);
7092		/* If we have two bundle window, we make one bundle
7093		   rotation.  Otherwise template0 will be undefined
7094		   (negative value).  */
7095		template0 = template1;
7096		template1 = -1;
7097	      }
7098	  }
7099      /* Move the position backward in the window.  Group barrier has
7100	 no slot.  Asm insn takes all bundle.  */
7101      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7102	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7103	  && asm_noperands (PATTERN (insn)) < 0)
7104	pos--;
7105      /* Long insn takes 2 slots.  */
7106      if (ia64_safe_type (insn) == TYPE_L)
7107	pos--;
7108      if (pos < 0)
7109	abort ();
7110      if (pos % 3 == 0
7111	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7112	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7113	  && asm_noperands (PATTERN (insn)) < 0)
7114	{
7115	  /* The current insn is at the bundle start: emit the
7116	     template.  */
7117	  if (template0 < 0)
7118	    abort ();
7119	  b = gen_bundle_selector (GEN_INT (template0));
7120	  ia64_emit_insn_before (b, insn);
7121	  b = PREV_INSN (insn);
7122	  insn = b;
7123	  /* See comment above in analogous place for emiting nops
7124	     after the insn.  */
7125	  template0 = template1;
7126	  template1 = -1;
7127	}
7128      /* Emit nops after the current insn.  */
7129      for (i = 0; i < curr_state->before_nops_num; i++)
7130	{
7131	  nop = gen_nop ();
7132	  ia64_emit_insn_before (nop, insn);
7133	  nop = PREV_INSN (insn);
7134	  insn = nop;
7135	  pos--;
7136	  if (pos < 0)
7137	    abort ();
7138	  if (pos % 3 == 0)
7139	    {
7140	      /* See comment above in analogous place for emiting nops
7141		 after the insn.  */
7142	      if (template0 < 0)
7143		abort ();
7144	      b = gen_bundle_selector (GEN_INT (template0));
7145	      ia64_emit_insn_before (b, insn);
7146	      b = PREV_INSN (insn);
7147	      insn = b;
7148	      template0 = template1;
7149	      template1 = -1;
7150	    }
7151	}
7152    }
7153  if (ia64_tune == PROCESSOR_ITANIUM)
7154    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7155       Itanium1 has a strange design, if the distance between an insn
7156       and dependent MM-insn is less 4 then we have a 6 additional
7157       cycles stall.  So we make the distance equal to 4 cycles if it
7158       is less.  */
7159    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7160	 insn != NULL_RTX;
7161	 insn = next_insn)
7162      {
7163	if (!INSN_P (insn)
7164	    || ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7165	    || GET_CODE (PATTERN (insn)) == USE
7166	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7167	  abort ();
7168	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7169	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7170	  /* We found a MM-insn which needs additional cycles.  */
7171	  {
7172	    rtx last;
7173	    int i, j, n;
7174	    int pred_stop_p;
7175
7176	    /* Now we are searching for a template of the bundle in
7177	       which the MM-insn is placed and the position of the
7178	       insn in the bundle (0, 1, 2).  Also we are searching
7179	       for that there is a stop before the insn.  */
7180	    last = prev_active_insn (insn);
7181	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7182	    if (pred_stop_p)
7183	      last = prev_active_insn (last);
7184	    n = 0;
7185	    for (;; last = prev_active_insn (last))
7186	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
7187		{
7188		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7189		  if (template0 == 9)
7190		    /* The insn is in MLX bundle.  Change the template
7191		       onto MFI because we will add nops before the
7192		       insn.  It simplifies subsequent code a lot.  */
7193		    PATTERN (last)
7194		      = gen_bundle_selector (GEN_INT (2)); /* -> MFI */
7195		  break;
7196		}
7197	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7198		       && (ia64_safe_itanium_class (last)
7199			   != ITANIUM_CLASS_IGNORE))
7200		n++;
7201	    /* Some check of correctness: the stop is not at the
7202	       bundle start, there are no more 3 insns in the bundle,
7203	       and the MM-insn is not at the start of bundle with
7204	       template MLX.  */
7205	    if ((pred_stop_p && n == 0) || n > 2
7206		|| (template0 == 9 && n != 0))
7207	      abort ();
7208	    /* Put nops after the insn in the bundle.  */
7209	    for (j = 3 - n; j > 0; j --)
7210	      ia64_emit_insn_before (gen_nop (), insn);
7211	    /* It takes into account that we will add more N nops
7212	       before the insn lately -- please see code below.  */
7213	    add_cycles [INSN_UID (insn)]--;
7214	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7215	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7216				     insn);
7217	    if (pred_stop_p)
7218	      add_cycles [INSN_UID (insn)]--;
7219	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7220	      {
7221		/* Insert "MII;" template.  */
7222		ia64_emit_insn_before (gen_bundle_selector (GEN_INT (0)),
7223				       insn);
7224		ia64_emit_insn_before (gen_nop (), insn);
7225		ia64_emit_insn_before (gen_nop (), insn);
7226		if (i > 1)
7227		  {
7228		    /* To decrease code size, we use "MI;I;"
7229		       template.  */
7230		    ia64_emit_insn_before
7231		      (gen_insn_group_barrier (GEN_INT (3)), insn);
7232		    i--;
7233		  }
7234		ia64_emit_insn_before (gen_nop (), insn);
7235		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7236				       insn);
7237	      }
7238	    /* Put the MM-insn in the same slot of a bundle with the
7239	       same template as the original one.  */
7240	    ia64_emit_insn_before (gen_bundle_selector (GEN_INT (template0)),
7241				   insn);
7242	    /* To put the insn in the same slot, add necessary number
7243	       of nops.  */
7244	    for (j = n; j > 0; j --)
7245	      ia64_emit_insn_before (gen_nop (), insn);
7246	    /* Put the stop if the original bundle had it.  */
7247	    if (pred_stop_p)
7248	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7249				     insn);
7250	  }
7251      }
7252  free (index_to_bundle_states);
7253  finish_bundle_state_table ();
7254  bundling_p = 0;
7255  dfa_clean_insn_cache ();
7256}
7257
7258/* The following function is called at the end of scheduling BB or
7259   EBB.  After reload, it inserts stop bits and does insn bundling.  */
7260
7261static void
7262ia64_sched_finish (FILE *dump, int sched_verbose)
7263{
7264  if (sched_verbose)
7265    fprintf (dump, "// Finishing schedule.\n");
7266  if (!reload_completed)
7267    return;
7268  if (reload_completed)
7269    {
7270      final_emit_insn_group_barriers (dump);
7271      bundling (dump, sched_verbose, current_sched_info->prev_head,
7272		current_sched_info->next_tail);
7273      if (sched_verbose && dump)
7274	fprintf (dump, "//    finishing %d-%d\n",
7275		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7276		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7277
7278      return;
7279    }
7280}
7281
7282/* The following function inserts stop bits in scheduled BB or EBB.  */
7283
7284static void
7285final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7286{
7287  rtx insn;
7288  int need_barrier_p = 0;
7289  rtx prev_insn = NULL_RTX;
7290
7291  init_insn_group_barriers ();
7292
7293  for (insn = NEXT_INSN (current_sched_info->prev_head);
7294       insn != current_sched_info->next_tail;
7295       insn = NEXT_INSN (insn))
7296    {
7297      if (GET_CODE (insn) == BARRIER)
7298	{
7299	  rtx last = prev_active_insn (insn);
7300
7301	  if (! last)
7302	    continue;
7303	  if (GET_CODE (last) == JUMP_INSN
7304	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7305	    last = prev_active_insn (last);
7306	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7307	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7308
7309	  init_insn_group_barriers ();
7310	  need_barrier_p = 0;
7311	  prev_insn = NULL_RTX;
7312	}
7313      else if (INSN_P (insn))
7314	{
7315	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7316	    {
7317	      init_insn_group_barriers ();
7318	      need_barrier_p = 0;
7319	      prev_insn = NULL_RTX;
7320	    }
7321	  else if (need_barrier_p || group_barrier_needed_p (insn))
7322	    {
7323	      if (TARGET_EARLY_STOP_BITS)
7324		{
7325		  rtx last;
7326
7327		  for (last = insn;
7328		       last != current_sched_info->prev_head;
7329		       last = PREV_INSN (last))
7330		    if (INSN_P (last) && GET_MODE (last) == TImode
7331			&& stops_p [INSN_UID (last)])
7332		      break;
7333		  if (last == current_sched_info->prev_head)
7334		    last = insn;
7335		  last = prev_active_insn (last);
7336		  if (last
7337		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7338		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7339				     last);
7340		  init_insn_group_barriers ();
7341		  for (last = NEXT_INSN (last);
7342		       last != insn;
7343		       last = NEXT_INSN (last))
7344		    if (INSN_P (last))
7345		      group_barrier_needed_p (last);
7346		}
7347	      else
7348		{
7349		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7350				    insn);
7351		  init_insn_group_barriers ();
7352		}
7353	      group_barrier_needed_p (insn);
7354	      prev_insn = NULL_RTX;
7355	    }
7356	  else if (recog_memoized (insn) >= 0)
7357	    prev_insn = insn;
7358	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
7359			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
7360			    || asm_noperands (PATTERN (insn)) >= 0);
7361	}
7362    }
7363}
7364
7365
7366
7367/* If the following function returns TRUE, we will use the the DFA
7368   insn scheduler.  */
7369
7370static int
7371ia64_use_dfa_pipeline_interface (void)
7372{
7373  return 1;
7374}
7375
7376/* If the following function returns TRUE, we will use the the DFA
7377   insn scheduler.  */
7378
7379static int
7380ia64_first_cycle_multipass_dfa_lookahead (void)
7381{
7382  return (reload_completed ? 6 : 4);
7383}
7384
7385/* The following function initiates variable `dfa_pre_cycle_insn'.  */
7386
7387static void
7388ia64_init_dfa_pre_cycle_insn (void)
7389{
7390  if (temp_dfa_state == NULL)
7391    {
7392      dfa_state_size = state_size ();
7393      temp_dfa_state = xmalloc (dfa_state_size);
7394      prev_cycle_state = xmalloc (dfa_state_size);
7395    }
7396  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7397  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7398  recog_memoized (dfa_pre_cycle_insn);
7399  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7400  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7401  recog_memoized (dfa_stop_insn);
7402}
7403
7404/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7405   used by the DFA insn scheduler.  */
7406
7407static rtx
7408ia64_dfa_pre_cycle_insn (void)
7409{
7410  return dfa_pre_cycle_insn;
7411}
7412
7413/* The following function returns TRUE if PRODUCER (of type ilog or
7414   ld) produces address for CONSUMER (of type st or stf). */
7415
7416int
7417ia64_st_address_bypass_p (rtx producer, rtx consumer)
7418{
7419  rtx dest, reg, mem;
7420
7421  if (producer == NULL_RTX || consumer == NULL_RTX)
7422    abort ();
7423  dest = ia64_single_set (producer);
7424  if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7425      || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7426    abort ();
7427  if (GET_CODE (reg) == SUBREG)
7428    reg = SUBREG_REG (reg);
7429  dest = ia64_single_set (consumer);
7430  if (dest == NULL_RTX || (mem = SET_DEST (dest)) == NULL_RTX
7431      || GET_CODE (mem) != MEM)
7432    abort ();
7433  return reg_mentioned_p (reg, mem);
7434}
7435
7436/* The following function returns TRUE if PRODUCER (of type ilog or
7437   ld) produces address for CONSUMER (of type ld or fld). */
7438
7439int
7440ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7441{
7442  rtx dest, src, reg, mem;
7443
7444  if (producer == NULL_RTX || consumer == NULL_RTX)
7445    abort ();
7446  dest = ia64_single_set (producer);
7447  if (dest == NULL_RTX || (reg = SET_DEST (dest)) == NULL_RTX
7448      || (GET_CODE (reg) != REG && GET_CODE (reg) != SUBREG))
7449    abort ();
7450  if (GET_CODE (reg) == SUBREG)
7451    reg = SUBREG_REG (reg);
7452  src = ia64_single_set (consumer);
7453  if (src == NULL_RTX || (mem = SET_SRC (src)) == NULL_RTX)
7454    abort ();
7455  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7456    mem = XVECEXP (mem, 0, 0);
7457  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7458    mem = XEXP (mem, 0);
7459
7460  /* Note that LO_SUM is used for GOT loads.  */
7461  if (GET_CODE (mem) != LO_SUM && GET_CODE (mem) != MEM)
7462    abort ();
7463
7464  return reg_mentioned_p (reg, mem);
7465}
7466
7467/* The following function returns TRUE if INSN produces address for a
7468   load/store insn.  We will place such insns into M slot because it
7469   decreases its latency time. */
7470
7471int
7472ia64_produce_address_p (rtx insn)
7473{
7474  return insn->call;
7475}
7476
7477
7478/* Emit pseudo-ops for the assembler to describe predicate relations.
7479   At present this assumes that we only consider predicate pairs to
7480   be mutex, and that the assembler can deduce proper values from
7481   straight-line code.  */
7482
7483static void
7484emit_predicate_relation_info (void)
7485{
7486  basic_block bb;
7487
7488  FOR_EACH_BB_REVERSE (bb)
7489    {
7490      int r;
7491      rtx head = BB_HEAD (bb);
7492
7493      /* We only need such notes at code labels.  */
7494      if (GET_CODE (head) != CODE_LABEL)
7495	continue;
7496      if (GET_CODE (NEXT_INSN (head)) == NOTE
7497	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7498	head = NEXT_INSN (head);
7499
7500      for (r = PR_REG (0); r < PR_REG (64); r += 2)
7501	if (REGNO_REG_SET_P (bb->global_live_at_start, r))
7502	  {
7503	    rtx p = gen_rtx_REG (BImode, r);
7504	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7505	    if (head == BB_END (bb))
7506	      BB_END (bb) = n;
7507	    head = n;
7508	  }
7509    }
7510
7511  /* Look for conditional calls that do not return, and protect predicate
7512     relations around them.  Otherwise the assembler will assume the call
7513     returns, and complain about uses of call-clobbered predicates after
7514     the call.  */
7515  FOR_EACH_BB_REVERSE (bb)
7516    {
7517      rtx insn = BB_HEAD (bb);
7518
7519      while (1)
7520	{
7521	  if (GET_CODE (insn) == CALL_INSN
7522	      && GET_CODE (PATTERN (insn)) == COND_EXEC
7523	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7524	    {
7525	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7526	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7527	      if (BB_HEAD (bb) == insn)
7528		BB_HEAD (bb) = b;
7529	      if (BB_END (bb) == insn)
7530		BB_END (bb) = a;
7531	    }
7532
7533	  if (insn == BB_END (bb))
7534	    break;
7535	  insn = NEXT_INSN (insn);
7536	}
7537    }
7538}
7539
7540/* Perform machine dependent operations on the rtl chain INSNS.  */
7541
7542static void
7543ia64_reorg (void)
7544{
7545  /* We are freeing block_for_insn in the toplev to keep compatibility
7546     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7547  compute_bb_for_insn ();
7548
7549  /* If optimizing, we'll have split before scheduling.  */
7550  if (optimize == 0)
7551    split_all_insns (0);
7552
7553  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7554     non-optimizing bootstrap.  */
7555  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7556
7557  if (ia64_flag_schedule_insns2)
7558    {
7559      timevar_push (TV_SCHED2);
7560      ia64_final_schedule = 1;
7561
7562      initiate_bundle_states ();
7563      ia64_nop = make_insn_raw (gen_nop ());
7564      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7565      recog_memoized (ia64_nop);
7566      clocks_length = get_max_uid () + 1;
7567      stops_p = xcalloc (1, clocks_length);
7568      if (ia64_tune == PROCESSOR_ITANIUM)
7569	{
7570	  clocks = xcalloc (clocks_length, sizeof (int));
7571	  add_cycles = xcalloc (clocks_length, sizeof (int));
7572	}
7573      if (ia64_tune == PROCESSOR_ITANIUM2)
7574	{
7575	  pos_1 = get_cpu_unit_code ("2_1");
7576	  pos_2 = get_cpu_unit_code ("2_2");
7577	  pos_3 = get_cpu_unit_code ("2_3");
7578	  pos_4 = get_cpu_unit_code ("2_4");
7579	  pos_5 = get_cpu_unit_code ("2_5");
7580	  pos_6 = get_cpu_unit_code ("2_6");
7581	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
7582	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7583	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7584	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7585	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7586	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7587	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
7588	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7589	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7590	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7591	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
7592	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7593	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7594	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7595	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7596	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7597	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
7598	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7599	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7600	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7601	}
7602      else
7603	{
7604	  pos_1 = get_cpu_unit_code ("1_1");
7605	  pos_2 = get_cpu_unit_code ("1_2");
7606	  pos_3 = get_cpu_unit_code ("1_3");
7607	  pos_4 = get_cpu_unit_code ("1_4");
7608	  pos_5 = get_cpu_unit_code ("1_5");
7609	  pos_6 = get_cpu_unit_code ("1_6");
7610	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
7611	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7612	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7613	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7614	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7615	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7616	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
7617	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7618	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7619	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7620	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
7621	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7622	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7623	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7624	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7625	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7626	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
7627	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7628	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7629	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7630	}
7631      schedule_ebbs (rtl_dump_file);
7632      finish_bundle_states ();
7633      if (ia64_tune == PROCESSOR_ITANIUM)
7634	{
7635	  free (add_cycles);
7636	  free (clocks);
7637	}
7638      free (stops_p);
7639      emit_insn_group_barriers (rtl_dump_file);
7640
7641      ia64_final_schedule = 0;
7642      timevar_pop (TV_SCHED2);
7643    }
7644  else
7645    emit_all_insn_group_barriers (rtl_dump_file);
7646
7647  /* A call must not be the last instruction in a function, so that the
7648     return address is still within the function, so that unwinding works
7649     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7650  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7651    {
7652      rtx insn;
7653      int saw_stop = 0;
7654
7655      insn = get_last_insn ();
7656      if (! INSN_P (insn))
7657        insn = prev_active_insn (insn);
7658      /* Skip over insns that expand to nothing.  */
7659      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7660        {
7661	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7662	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7663	    saw_stop = 1;
7664	  insn = prev_active_insn (insn);
7665	}
7666      if (GET_CODE (insn) == CALL_INSN)
7667	{
7668	  if (! saw_stop)
7669	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7670	  emit_insn (gen_break_f ());
7671	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7672	}
7673    }
7674
7675  fixup_errata ();
7676  emit_predicate_relation_info ();
7677}
7678
7679/* Return true if REGNO is used by the epilogue.  */
7680
7681int
7682ia64_epilogue_uses (int regno)
7683{
7684  switch (regno)
7685    {
7686    case R_GR (1):
7687      /* With a call to a function in another module, we will write a new
7688	 value to "gp".  After returning from such a call, we need to make
7689	 sure the function restores the original gp-value, even if the
7690	 function itself does not use the gp anymore.  */
7691      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7692
7693    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7694    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7695      /* For functions defined with the syscall_linkage attribute, all
7696	 input registers are marked as live at all function exits.  This
7697	 prevents the register allocator from using the input registers,
7698	 which in turn makes it possible to restart a system call after
7699	 an interrupt without having to save/restore the input registers.
7700	 This also prevents kernel data from leaking to application code.  */
7701      return lookup_attribute ("syscall_linkage",
7702	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7703
7704    case R_BR (0):
7705      /* Conditional return patterns can't represent the use of `b0' as
7706         the return address, so we force the value live this way.  */
7707      return 1;
7708
7709    case AR_PFS_REGNUM:
7710      /* Likewise for ar.pfs, which is used by br.ret.  */
7711      return 1;
7712
7713    default:
7714      return 0;
7715    }
7716}
7717
7718/* Return true if REGNO is used by the frame unwinder.  */
7719
7720int
7721ia64_eh_uses (int regno)
7722{
7723  if (! reload_completed)
7724    return 0;
7725
7726  if (current_frame_info.reg_save_b0
7727      && regno == current_frame_info.reg_save_b0)
7728    return 1;
7729  if (current_frame_info.reg_save_pr
7730      && regno == current_frame_info.reg_save_pr)
7731    return 1;
7732  if (current_frame_info.reg_save_ar_pfs
7733      && regno == current_frame_info.reg_save_ar_pfs)
7734    return 1;
7735  if (current_frame_info.reg_save_ar_unat
7736      && regno == current_frame_info.reg_save_ar_unat)
7737    return 1;
7738  if (current_frame_info.reg_save_ar_lc
7739      && regno == current_frame_info.reg_save_ar_lc)
7740    return 1;
7741
7742  return 0;
7743}
7744
7745/* Return true if this goes in small data/bss.  */
7746
7747/* ??? We could also support own long data here.  Generating movl/add/ld8
7748   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7749   code faster because there is one less load.  This also includes incomplete
7750   types which can't go in sdata/sbss.  */
7751
7752static bool
7753ia64_in_small_data_p (tree exp)
7754{
7755  if (TARGET_NO_SDATA)
7756    return false;
7757
7758  /* We want to merge strings, so we never consider them small data.  */
7759  if (TREE_CODE (exp) == STRING_CST)
7760    return false;
7761
7762  /* Functions are never small data.  */
7763  if (TREE_CODE (exp) == FUNCTION_DECL)
7764    return false;
7765
7766  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
7767    {
7768      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
7769      if (strcmp (section, ".sdata") == 0
7770	  || strcmp (section, ".sbss") == 0)
7771	return true;
7772    }
7773  else
7774    {
7775      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
7776
7777      /* If this is an incomplete type with size 0, then we can't put it
7778	 in sdata because it might be too big when completed.  */
7779      if (size > 0 && size <= ia64_section_threshold)
7780	return true;
7781    }
7782
7783  return false;
7784}
7785
7786/* Output assembly directives for prologue regions.  */
7787
7788/* The current basic block number.  */
7789
7790static bool last_block;
7791
7792/* True if we need a copy_state command at the start of the next block.  */
7793
7794static bool need_copy_state;
7795
7796/* The function emits unwind directives for the start of an epilogue.  */
7797
7798static void
7799process_epilogue (void)
7800{
7801  /* If this isn't the last block of the function, then we need to label the
7802     current state, and copy it back in at the start of the next block.  */
7803
7804  if (!last_block)
7805    {
7806      fprintf (asm_out_file, "\t.label_state 1\n");
7807      need_copy_state = true;
7808    }
7809
7810  fprintf (asm_out_file, "\t.restore sp\n");
7811}
7812
7813/* This function processes a SET pattern looking for specific patterns
7814   which result in emitting an assembly directive required for unwinding.  */
7815
7816static int
7817process_set (FILE *asm_out_file, rtx pat)
7818{
7819  rtx src = SET_SRC (pat);
7820  rtx dest = SET_DEST (pat);
7821  int src_regno, dest_regno;
7822
7823  /* Look for the ALLOC insn.  */
7824  if (GET_CODE (src) == UNSPEC_VOLATILE
7825      && XINT (src, 1) == UNSPECV_ALLOC
7826      && GET_CODE (dest) == REG)
7827    {
7828      dest_regno = REGNO (dest);
7829
7830      /* If this is the final destination for ar.pfs, then this must
7831	 be the alloc in the prologue.  */
7832      if (dest_regno == current_frame_info.reg_save_ar_pfs)
7833	fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
7834		 ia64_dbx_register_number (dest_regno));
7835      else
7836	{
7837	  /* This must be an alloc before a sibcall.  We must drop the
7838	     old frame info.  The easiest way to drop the old frame
7839	     info is to ensure we had a ".restore sp" directive
7840	     followed by a new prologue.  If the procedure doesn't
7841	     have a memory-stack frame, we'll issue a dummy ".restore
7842	     sp" now.  */
7843	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
7844	    /* if haven't done process_epilogue() yet, do it now */
7845	    process_epilogue ();
7846	  fprintf (asm_out_file, "\t.prologue\n");
7847	}
7848      return 1;
7849    }
7850
7851  /* Look for SP = ....  */
7852  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
7853    {
7854      if (GET_CODE (src) == PLUS)
7855        {
7856	  rtx op0 = XEXP (src, 0);
7857	  rtx op1 = XEXP (src, 1);
7858	  if (op0 == dest && GET_CODE (op1) == CONST_INT)
7859	    {
7860	      if (INTVAL (op1) < 0)
7861		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
7862			 -INTVAL (op1));
7863	      else
7864		process_epilogue ();
7865	    }
7866	  else
7867	    abort ();
7868	}
7869      else if (GET_CODE (src) == REG
7870	       && REGNO (src) == HARD_FRAME_POINTER_REGNUM)
7871	process_epilogue ();
7872      else
7873	abort ();
7874
7875      return 1;
7876    }
7877
7878  /* Register move we need to look at.  */
7879  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
7880    {
7881      src_regno = REGNO (src);
7882      dest_regno = REGNO (dest);
7883
7884      switch (src_regno)
7885	{
7886	case BR_REG (0):
7887	  /* Saving return address pointer.  */
7888	  if (dest_regno != current_frame_info.reg_save_b0)
7889	    abort ();
7890	  fprintf (asm_out_file, "\t.save rp, r%d\n",
7891		   ia64_dbx_register_number (dest_regno));
7892	  return 1;
7893
7894	case PR_REG (0):
7895	  if (dest_regno != current_frame_info.reg_save_pr)
7896	    abort ();
7897	  fprintf (asm_out_file, "\t.save pr, r%d\n",
7898		   ia64_dbx_register_number (dest_regno));
7899	  return 1;
7900
7901	case AR_UNAT_REGNUM:
7902	  if (dest_regno != current_frame_info.reg_save_ar_unat)
7903	    abort ();
7904	  fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
7905		   ia64_dbx_register_number (dest_regno));
7906	  return 1;
7907
7908	case AR_LC_REGNUM:
7909	  if (dest_regno != current_frame_info.reg_save_ar_lc)
7910	    abort ();
7911	  fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
7912		   ia64_dbx_register_number (dest_regno));
7913	  return 1;
7914
7915	case STACK_POINTER_REGNUM:
7916	  if (dest_regno != HARD_FRAME_POINTER_REGNUM
7917	      || ! frame_pointer_needed)
7918	    abort ();
7919	  fprintf (asm_out_file, "\t.vframe r%d\n",
7920		   ia64_dbx_register_number (dest_regno));
7921	  return 1;
7922
7923	default:
7924	  /* Everything else should indicate being stored to memory.  */
7925	  abort ();
7926	}
7927    }
7928
7929  /* Memory store we need to look at.  */
7930  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
7931    {
7932      long off;
7933      rtx base;
7934      const char *saveop;
7935
7936      if (GET_CODE (XEXP (dest, 0)) == REG)
7937	{
7938	  base = XEXP (dest, 0);
7939	  off = 0;
7940	}
7941      else if (GET_CODE (XEXP (dest, 0)) == PLUS
7942	       && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT)
7943	{
7944	  base = XEXP (XEXP (dest, 0), 0);
7945	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
7946	}
7947      else
7948	abort ();
7949
7950      if (base == hard_frame_pointer_rtx)
7951	{
7952	  saveop = ".savepsp";
7953	  off = - off;
7954	}
7955      else if (base == stack_pointer_rtx)
7956	saveop = ".savesp";
7957      else
7958	abort ();
7959
7960      src_regno = REGNO (src);
7961      switch (src_regno)
7962	{
7963	case BR_REG (0):
7964	  if (current_frame_info.reg_save_b0 != 0)
7965	    abort ();
7966	  fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
7967	  return 1;
7968
7969	case PR_REG (0):
7970	  if (current_frame_info.reg_save_pr != 0)
7971	    abort ();
7972	  fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
7973	  return 1;
7974
7975	case AR_LC_REGNUM:
7976	  if (current_frame_info.reg_save_ar_lc != 0)
7977	    abort ();
7978	  fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
7979	  return 1;
7980
7981	case AR_PFS_REGNUM:
7982	  if (current_frame_info.reg_save_ar_pfs != 0)
7983	    abort ();
7984	  fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
7985	  return 1;
7986
7987	case AR_UNAT_REGNUM:
7988	  if (current_frame_info.reg_save_ar_unat != 0)
7989	    abort ();
7990	  fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
7991	  return 1;
7992
7993	case GR_REG (4):
7994	case GR_REG (5):
7995	case GR_REG (6):
7996	case GR_REG (7):
7997	  fprintf (asm_out_file, "\t.save.g 0x%x\n",
7998		   1 << (src_regno - GR_REG (4)));
7999	  return 1;
8000
8001	case BR_REG (1):
8002	case BR_REG (2):
8003	case BR_REG (3):
8004	case BR_REG (4):
8005	case BR_REG (5):
8006	  fprintf (asm_out_file, "\t.save.b 0x%x\n",
8007		   1 << (src_regno - BR_REG (1)));
8008	  return 1;
8009
8010	case FR_REG (2):
8011	case FR_REG (3):
8012	case FR_REG (4):
8013	case FR_REG (5):
8014	  fprintf (asm_out_file, "\t.save.f 0x%x\n",
8015		   1 << (src_regno - FR_REG (2)));
8016	  return 1;
8017
8018	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8019	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8020	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8021	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8022	  fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
8023		   1 << (src_regno - FR_REG (12)));
8024	  return 1;
8025
8026	default:
8027	  return 0;
8028	}
8029    }
8030
8031  return 0;
8032}
8033
8034
8035/* This function looks at a single insn and emits any directives
8036   required to unwind this insn.  */
8037void
8038process_for_unwind_directive (FILE *asm_out_file, rtx insn)
8039{
8040  if (flag_unwind_tables
8041      || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8042    {
8043      rtx pat;
8044
8045      if (GET_CODE (insn) == NOTE
8046	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
8047	{
8048	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
8049
8050	  /* Restore unwind state from immediately before the epilogue.  */
8051	  if (need_copy_state)
8052	    {
8053	      fprintf (asm_out_file, "\t.body\n");
8054	      fprintf (asm_out_file, "\t.copy_state 1\n");
8055	      need_copy_state = false;
8056	    }
8057	}
8058
8059      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8060	return;
8061
8062      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8063      if (pat)
8064	pat = XEXP (pat, 0);
8065      else
8066	pat = PATTERN (insn);
8067
8068      switch (GET_CODE (pat))
8069        {
8070	case SET:
8071	  process_set (asm_out_file, pat);
8072	  break;
8073
8074	case PARALLEL:
8075	  {
8076	    int par_index;
8077	    int limit = XVECLEN (pat, 0);
8078	    for (par_index = 0; par_index < limit; par_index++)
8079	      {
8080		rtx x = XVECEXP (pat, 0, par_index);
8081		if (GET_CODE (x) == SET)
8082		  process_set (asm_out_file, x);
8083	      }
8084	    break;
8085	  }
8086
8087	default:
8088	  abort ();
8089	}
8090    }
8091}
8092
8093
8094void
8095ia64_init_builtins (void)
8096{
8097  tree psi_type_node = build_pointer_type (integer_type_node);
8098  tree pdi_type_node = build_pointer_type (long_integer_type_node);
8099
8100  /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */
8101  tree si_ftype_psi_si_si
8102    = build_function_type_list (integer_type_node,
8103				psi_type_node, integer_type_node,
8104				integer_type_node, NULL_TREE);
8105
8106  /* __sync_val_compare_and_swap_di */
8107  tree di_ftype_pdi_di_di
8108    = build_function_type_list (long_integer_type_node,
8109				pdi_type_node, long_integer_type_node,
8110				long_integer_type_node, NULL_TREE);
8111  /* __sync_bool_compare_and_swap_di */
8112  tree si_ftype_pdi_di_di
8113    = build_function_type_list (integer_type_node,
8114				pdi_type_node, long_integer_type_node,
8115				long_integer_type_node, NULL_TREE);
8116  /* __sync_synchronize */
8117  tree void_ftype_void
8118    = build_function_type (void_type_node, void_list_node);
8119
8120  /* __sync_lock_test_and_set_si */
8121  tree si_ftype_psi_si
8122    = build_function_type_list (integer_type_node,
8123				psi_type_node, integer_type_node, NULL_TREE);
8124
8125  /* __sync_lock_test_and_set_di */
8126  tree di_ftype_pdi_di
8127    = build_function_type_list (long_integer_type_node,
8128				pdi_type_node, long_integer_type_node,
8129				NULL_TREE);
8130
8131  /* __sync_lock_release_si */
8132  tree void_ftype_psi
8133    = build_function_type_list (void_type_node, psi_type_node, NULL_TREE);
8134
8135  /* __sync_lock_release_di */
8136  tree void_ftype_pdi
8137    = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE);
8138
8139  tree fpreg_type;
8140  tree float80_type;
8141
8142  /* The __fpreg type.  */
8143  fpreg_type = make_node (REAL_TYPE);
8144  /* ??? The back end should know to load/save __fpreg variables using
8145     the ldf.fill and stf.spill instructions.  */
8146  TYPE_PRECISION (fpreg_type) = 96;
8147  layout_type (fpreg_type);
8148  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8149
8150  /* The __float80 type.  */
8151  float80_type = make_node (REAL_TYPE);
8152  TYPE_PRECISION (float80_type) = 96;
8153  layout_type (float80_type);
8154  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8155
8156  /* The __float128 type.  */
8157  if (!TARGET_HPUX)
8158    {
8159      tree float128_type = make_node (REAL_TYPE);
8160      TYPE_PRECISION (float128_type) = 128;
8161      layout_type (float128_type);
8162      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8163    }
8164  else
8165    /* Under HPUX, this is a synonym for "long double".  */
8166    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8167					       "__float128");
8168
8169#define def_builtin(name, type, code) \
8170  builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE)
8171
8172  def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si,
8173	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI);
8174  def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di,
8175	       IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI);
8176  def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si,
8177	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI);
8178  def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di,
8179	       IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI);
8180
8181  def_builtin ("__sync_synchronize", void_ftype_void,
8182	       IA64_BUILTIN_SYNCHRONIZE);
8183
8184  def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si,
8185	       IA64_BUILTIN_LOCK_TEST_AND_SET_SI);
8186  def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di,
8187	       IA64_BUILTIN_LOCK_TEST_AND_SET_DI);
8188  def_builtin ("__sync_lock_release_si", void_ftype_psi,
8189	       IA64_BUILTIN_LOCK_RELEASE_SI);
8190  def_builtin ("__sync_lock_release_di", void_ftype_pdi,
8191	       IA64_BUILTIN_LOCK_RELEASE_DI);
8192
8193  def_builtin ("__builtin_ia64_bsp",
8194	       build_function_type (ptr_type_node, void_list_node),
8195	       IA64_BUILTIN_BSP);
8196
8197  def_builtin ("__builtin_ia64_flushrs",
8198	       build_function_type (void_type_node, void_list_node),
8199	       IA64_BUILTIN_FLUSHRS);
8200
8201  def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si,
8202	       IA64_BUILTIN_FETCH_AND_ADD_SI);
8203  def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si,
8204	       IA64_BUILTIN_FETCH_AND_SUB_SI);
8205  def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si,
8206	       IA64_BUILTIN_FETCH_AND_OR_SI);
8207  def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si,
8208	       IA64_BUILTIN_FETCH_AND_AND_SI);
8209  def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si,
8210	       IA64_BUILTIN_FETCH_AND_XOR_SI);
8211  def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si,
8212	       IA64_BUILTIN_FETCH_AND_NAND_SI);
8213
8214  def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si,
8215	       IA64_BUILTIN_ADD_AND_FETCH_SI);
8216  def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si,
8217	       IA64_BUILTIN_SUB_AND_FETCH_SI);
8218  def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si,
8219	       IA64_BUILTIN_OR_AND_FETCH_SI);
8220  def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si,
8221	       IA64_BUILTIN_AND_AND_FETCH_SI);
8222  def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si,
8223	       IA64_BUILTIN_XOR_AND_FETCH_SI);
8224  def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si,
8225	       IA64_BUILTIN_NAND_AND_FETCH_SI);
8226
8227  def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di,
8228	       IA64_BUILTIN_FETCH_AND_ADD_DI);
8229  def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di,
8230	       IA64_BUILTIN_FETCH_AND_SUB_DI);
8231  def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di,
8232	       IA64_BUILTIN_FETCH_AND_OR_DI);
8233  def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di,
8234	       IA64_BUILTIN_FETCH_AND_AND_DI);
8235  def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di,
8236	       IA64_BUILTIN_FETCH_AND_XOR_DI);
8237  def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di,
8238	       IA64_BUILTIN_FETCH_AND_NAND_DI);
8239
8240  def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di,
8241	       IA64_BUILTIN_ADD_AND_FETCH_DI);
8242  def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di,
8243	       IA64_BUILTIN_SUB_AND_FETCH_DI);
8244  def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di,
8245	       IA64_BUILTIN_OR_AND_FETCH_DI);
8246  def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di,
8247	       IA64_BUILTIN_AND_AND_FETCH_DI);
8248  def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di,
8249	       IA64_BUILTIN_XOR_AND_FETCH_DI);
8250  def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di,
8251	       IA64_BUILTIN_NAND_AND_FETCH_DI);
8252
8253#undef def_builtin
8254}
8255
8256/* Expand fetch_and_op intrinsics.  The basic code sequence is:
8257
8258     mf
8259     tmp = [ptr];
8260     do {
8261       ret = tmp;
8262       ar.ccv = tmp;
8263       tmp <op>= value;
8264       cmpxchgsz.acq tmp = [ptr], tmp
8265     } while (tmp != ret)
8266*/
8267
8268static rtx
8269ia64_expand_fetch_and_op (optab binoptab, enum machine_mode mode,
8270			  tree arglist, rtx target)
8271{
8272  rtx ret, label, tmp, ccv, insn, mem, value;
8273  tree arg0, arg1;
8274
8275  arg0 = TREE_VALUE (arglist);
8276  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8277  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8278#ifdef POINTERS_EXTEND_UNSIGNED
8279  if (GET_MODE(mem) != Pmode)
8280    mem = convert_memory_address (Pmode, mem);
8281#endif
8282  value = expand_expr (arg1, NULL_RTX, mode, 0);
8283
8284  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8285  MEM_VOLATILE_P (mem) = 1;
8286
8287  if (target && register_operand (target, mode))
8288    ret = target;
8289  else
8290    ret = gen_reg_rtx (mode);
8291
8292  emit_insn (gen_mf ());
8293
8294  /* Special case for fetchadd instructions.  */
8295  if (binoptab == add_optab && fetchadd_operand (value, VOIDmode))
8296    {
8297      if (mode == SImode)
8298        insn = gen_fetchadd_acq_si (ret, mem, value);
8299      else
8300        insn = gen_fetchadd_acq_di (ret, mem, value);
8301      emit_insn (insn);
8302      return ret;
8303    }
8304
8305  tmp = gen_reg_rtx (mode);
8306  /* ar.ccv must always be loaded with a zero-extended DImode value.  */
8307  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8308  emit_move_insn (tmp, mem);
8309
8310  label = gen_label_rtx ();
8311  emit_label (label);
8312  emit_move_insn (ret, tmp);
8313  convert_move (ccv, tmp, /*unsignedp=*/1);
8314
8315  /* Perform the specific operation.  Special case NAND by noticing
8316     one_cmpl_optab instead.  */
8317  if (binoptab == one_cmpl_optab)
8318    {
8319      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8320      binoptab = and_optab;
8321    }
8322  tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN);
8323
8324  if (mode == SImode)
8325    insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv);
8326  else
8327    insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv);
8328  emit_insn (insn);
8329
8330  emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label);
8331
8332  return ret;
8333}
8334
8335/* Expand op_and_fetch intrinsics.  The basic code sequence is:
8336
8337     mf
8338     tmp = [ptr];
8339     do {
8340       old = tmp;
8341       ar.ccv = tmp;
8342       ret = tmp <op> value;
8343       cmpxchgsz.acq tmp = [ptr], ret
8344     } while (tmp != old)
8345*/
8346
8347static rtx
8348ia64_expand_op_and_fetch (optab binoptab, enum machine_mode mode,
8349			  tree arglist, rtx target)
8350{
8351  rtx old, label, tmp, ret, ccv, insn, mem, value;
8352  tree arg0, arg1;
8353
8354  arg0 = TREE_VALUE (arglist);
8355  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8356  mem = expand_expr (arg0, NULL_RTX, Pmode, 0);
8357#ifdef POINTERS_EXTEND_UNSIGNED
8358  if (GET_MODE(mem) != Pmode)
8359    mem = convert_memory_address (Pmode, mem);
8360#endif
8361
8362  value = expand_expr (arg1, NULL_RTX, mode, 0);
8363
8364  mem = gen_rtx_MEM (mode, force_reg (Pmode, mem));
8365  MEM_VOLATILE_P (mem) = 1;
8366
8367  if (target && ! register_operand (target, mode))
8368    target = NULL_RTX;
8369
8370  emit_insn (gen_mf ());
8371  tmp = gen_reg_rtx (mode);
8372  old = gen_reg_rtx (mode);
8373  /* ar.ccv must always be loaded with a zero-extended DImode value.  */
8374  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8375
8376  emit_move_insn (tmp, mem);
8377
8378  label = gen_label_rtx ();
8379  emit_label (label);
8380  emit_move_insn (old, tmp);
8381  convert_move (ccv, tmp, /*unsignedp=*/1);
8382
8383  /* Perform the specific operation.  Special case NAND by noticing
8384     one_cmpl_optab instead.  */
8385  if (binoptab == one_cmpl_optab)
8386    {
8387      tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN);
8388      binoptab = and_optab;
8389    }
8390  ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN);
8391
8392  if (mode == SImode)
8393    insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv);
8394  else
8395    insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv);
8396  emit_insn (insn);
8397
8398  emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label);
8399
8400  return ret;
8401}
8402
8403/* Expand val_ and bool_compare_and_swap.  For val_ we want:
8404
8405     ar.ccv = oldval
8406     mf
8407     cmpxchgsz.acq ret = [ptr], newval, ar.ccv
8408     return ret
8409
8410   For bool_ it's the same except return ret == oldval.
8411*/
8412
8413static rtx
8414ia64_expand_compare_and_swap (enum machine_mode rmode, enum machine_mode mode,
8415			      int boolp, tree arglist, rtx target)
8416{
8417  tree arg0, arg1, arg2;
8418  rtx mem, old, new, ccv, tmp, insn;
8419
8420  arg0 = TREE_VALUE (arglist);
8421  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8422  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
8423  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8424  old = expand_expr (arg1, NULL_RTX, mode, 0);
8425  new = expand_expr (arg2, NULL_RTX, mode, 0);
8426
8427  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8428  MEM_VOLATILE_P (mem) = 1;
8429
8430  if (GET_MODE (old) != mode)
8431    old = convert_to_mode (mode, old, /*unsignedp=*/1);
8432  if (GET_MODE (new) != mode)
8433    new = convert_to_mode (mode, new, /*unsignedp=*/1);
8434
8435  if (! register_operand (old, mode))
8436    old = copy_to_mode_reg (mode, old);
8437  if (! register_operand (new, mode))
8438    new = copy_to_mode_reg (mode, new);
8439
8440  if (! boolp && target && register_operand (target, mode))
8441    tmp = target;
8442  else
8443    tmp = gen_reg_rtx (mode);
8444
8445  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
8446  convert_move (ccv, old, /*unsignedp=*/1);
8447  emit_insn (gen_mf ());
8448  if (mode == SImode)
8449    insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
8450  else
8451    insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv);
8452  emit_insn (insn);
8453
8454  if (boolp)
8455    {
8456      if (! target)
8457	target = gen_reg_rtx (rmode);
8458      return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1);
8459    }
8460  else
8461    return tmp;
8462}
8463
8464/* Expand lock_test_and_set.  I.e. `xchgsz ret = [ptr], new'.  */
8465
8466static rtx
8467ia64_expand_lock_test_and_set (enum machine_mode mode, tree arglist,
8468			       rtx target)
8469{
8470  tree arg0, arg1;
8471  rtx mem, new, ret, insn;
8472
8473  arg0 = TREE_VALUE (arglist);
8474  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
8475  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8476  new = expand_expr (arg1, NULL_RTX, mode, 0);
8477
8478  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8479  MEM_VOLATILE_P (mem) = 1;
8480  if (! register_operand (new, mode))
8481    new = copy_to_mode_reg (mode, new);
8482
8483  if (target && register_operand (target, mode))
8484    ret = target;
8485  else
8486    ret = gen_reg_rtx (mode);
8487
8488  if (mode == SImode)
8489    insn = gen_xchgsi (ret, mem, new);
8490  else
8491    insn = gen_xchgdi (ret, mem, new);
8492  emit_insn (insn);
8493
8494  return ret;
8495}
8496
8497/* Expand lock_release.  I.e. `stsz.rel [ptr] = r0'.  */
8498
8499static rtx
8500ia64_expand_lock_release (enum machine_mode mode, tree arglist,
8501			  rtx target ATTRIBUTE_UNUSED)
8502{
8503  tree arg0;
8504  rtx mem;
8505
8506  arg0 = TREE_VALUE (arglist);
8507  mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0);
8508
8509  mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem));
8510  MEM_VOLATILE_P (mem) = 1;
8511
8512  emit_move_insn (mem, const0_rtx);
8513
8514  return const0_rtx;
8515}
8516
8517rtx
8518ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8519		     enum machine_mode mode ATTRIBUTE_UNUSED,
8520		     int ignore ATTRIBUTE_UNUSED)
8521{
8522  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8523  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8524  tree arglist = TREE_OPERAND (exp, 1);
8525  enum machine_mode rmode = VOIDmode;
8526
8527  switch (fcode)
8528    {
8529    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8530    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8531      mode = SImode;
8532      rmode = SImode;
8533      break;
8534
8535    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8536    case IA64_BUILTIN_LOCK_RELEASE_SI:
8537    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8538    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8539    case IA64_BUILTIN_FETCH_AND_OR_SI:
8540    case IA64_BUILTIN_FETCH_AND_AND_SI:
8541    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8542    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8543    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8544    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8545    case IA64_BUILTIN_OR_AND_FETCH_SI:
8546    case IA64_BUILTIN_AND_AND_FETCH_SI:
8547    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8548    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8549      mode = SImode;
8550      break;
8551
8552    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8553      mode = DImode;
8554      rmode = SImode;
8555      break;
8556
8557    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8558      mode = DImode;
8559      rmode = DImode;
8560      break;
8561
8562    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8563    case IA64_BUILTIN_LOCK_RELEASE_DI:
8564    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8565    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8566    case IA64_BUILTIN_FETCH_AND_OR_DI:
8567    case IA64_BUILTIN_FETCH_AND_AND_DI:
8568    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8569    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8570    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8571    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8572    case IA64_BUILTIN_OR_AND_FETCH_DI:
8573    case IA64_BUILTIN_AND_AND_FETCH_DI:
8574    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8575    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8576      mode = DImode;
8577      break;
8578
8579    default:
8580      break;
8581    }
8582
8583  switch (fcode)
8584    {
8585    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI:
8586    case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI:
8587      return ia64_expand_compare_and_swap (rmode, mode, 1, arglist,
8588					   target);
8589
8590    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI:
8591    case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI:
8592      return ia64_expand_compare_and_swap (rmode, mode, 0, arglist,
8593					   target);
8594
8595    case IA64_BUILTIN_SYNCHRONIZE:
8596      emit_insn (gen_mf ());
8597      return const0_rtx;
8598
8599    case IA64_BUILTIN_LOCK_TEST_AND_SET_SI:
8600    case IA64_BUILTIN_LOCK_TEST_AND_SET_DI:
8601      return ia64_expand_lock_test_and_set (mode, arglist, target);
8602
8603    case IA64_BUILTIN_LOCK_RELEASE_SI:
8604    case IA64_BUILTIN_LOCK_RELEASE_DI:
8605      return ia64_expand_lock_release (mode, arglist, target);
8606
8607    case IA64_BUILTIN_BSP:
8608      if (! target || ! register_operand (target, DImode))
8609	target = gen_reg_rtx (DImode);
8610      emit_insn (gen_bsp_value (target));
8611#ifdef POINTERS_EXTEND_UNSIGNED
8612      target = convert_memory_address (ptr_mode, target);
8613#endif
8614      return target;
8615
8616    case IA64_BUILTIN_FLUSHRS:
8617      emit_insn (gen_flushrs ());
8618      return const0_rtx;
8619
8620    case IA64_BUILTIN_FETCH_AND_ADD_SI:
8621    case IA64_BUILTIN_FETCH_AND_ADD_DI:
8622      return ia64_expand_fetch_and_op (add_optab, mode, arglist, target);
8623
8624    case IA64_BUILTIN_FETCH_AND_SUB_SI:
8625    case IA64_BUILTIN_FETCH_AND_SUB_DI:
8626      return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target);
8627
8628    case IA64_BUILTIN_FETCH_AND_OR_SI:
8629    case IA64_BUILTIN_FETCH_AND_OR_DI:
8630      return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target);
8631
8632    case IA64_BUILTIN_FETCH_AND_AND_SI:
8633    case IA64_BUILTIN_FETCH_AND_AND_DI:
8634      return ia64_expand_fetch_and_op (and_optab, mode, arglist, target);
8635
8636    case IA64_BUILTIN_FETCH_AND_XOR_SI:
8637    case IA64_BUILTIN_FETCH_AND_XOR_DI:
8638      return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target);
8639
8640    case IA64_BUILTIN_FETCH_AND_NAND_SI:
8641    case IA64_BUILTIN_FETCH_AND_NAND_DI:
8642      return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target);
8643
8644    case IA64_BUILTIN_ADD_AND_FETCH_SI:
8645    case IA64_BUILTIN_ADD_AND_FETCH_DI:
8646      return ia64_expand_op_and_fetch (add_optab, mode, arglist, target);
8647
8648    case IA64_BUILTIN_SUB_AND_FETCH_SI:
8649    case IA64_BUILTIN_SUB_AND_FETCH_DI:
8650      return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target);
8651
8652    case IA64_BUILTIN_OR_AND_FETCH_SI:
8653    case IA64_BUILTIN_OR_AND_FETCH_DI:
8654      return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target);
8655
8656    case IA64_BUILTIN_AND_AND_FETCH_SI:
8657    case IA64_BUILTIN_AND_AND_FETCH_DI:
8658      return ia64_expand_op_and_fetch (and_optab, mode, arglist, target);
8659
8660    case IA64_BUILTIN_XOR_AND_FETCH_SI:
8661    case IA64_BUILTIN_XOR_AND_FETCH_DI:
8662      return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target);
8663
8664    case IA64_BUILTIN_NAND_AND_FETCH_SI:
8665    case IA64_BUILTIN_NAND_AND_FETCH_DI:
8666      return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target);
8667
8668    default:
8669      break;
8670    }
8671
8672  return NULL_RTX;
8673}
8674
8675/* For the HP-UX IA64 aggregate parameters are passed stored in the
8676   most significant bits of the stack slot.  */
8677
8678enum direction
8679ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8680{
8681   /* Exception to normal case for structures/unions/etc.  */
8682
8683   if (type && AGGREGATE_TYPE_P (type)
8684       && int_size_in_bytes (type) < UNITS_PER_WORD)
8685     return upward;
8686
8687   /* Fall back to the default.  */
8688   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8689}
8690
8691/* Linked list of all external functions that are to be emitted by GCC.
8692   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8693   order to avoid putting out names that are never really used.  */
8694
8695struct extern_func_list GTY(())
8696{
8697  struct extern_func_list *next;
8698  tree decl;
8699};
8700
8701static GTY(()) struct extern_func_list *extern_func_head;
8702
8703static void
8704ia64_hpux_add_extern_decl (tree decl)
8705{
8706  struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8707
8708  p->decl = decl;
8709  p->next = extern_func_head;
8710  extern_func_head = p;
8711}
8712
8713/* Print out the list of used global functions.  */
8714
8715static void
8716ia64_hpux_file_end (void)
8717{
8718  struct extern_func_list *p;
8719
8720  for (p = extern_func_head; p; p = p->next)
8721    {
8722      tree decl = p->decl;
8723      tree id = DECL_ASSEMBLER_NAME (decl);
8724
8725      if (!id)
8726	abort ();
8727
8728      if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8729        {
8730	  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8731
8732	  TREE_ASM_WRITTEN (decl) = 1;
8733	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
8734	  fputs (TYPE_ASM_OP, asm_out_file);
8735	  assemble_name (asm_out_file, name);
8736	  fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8737        }
8738    }
8739
8740  extern_func_head = 0;
8741}
8742
8743/* Rename all the TFmode libfuncs using the HPUX conventions.  */
8744
8745static void
8746ia64_hpux_init_libfuncs (void)
8747{
8748  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8749  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8750  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8751  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8752  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8753  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8754  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8755  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8756
8757  /* ia64_expand_compare uses this.  */
8758  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8759
8760  /* These should never be used.  */
8761  set_optab_libfunc (eq_optab, TFmode, 0);
8762  set_optab_libfunc (ne_optab, TFmode, 0);
8763  set_optab_libfunc (gt_optab, TFmode, 0);
8764  set_optab_libfunc (ge_optab, TFmode, 0);
8765  set_optab_libfunc (lt_optab, TFmode, 0);
8766  set_optab_libfunc (le_optab, TFmode, 0);
8767
8768  set_conv_libfunc (sext_optab,   TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8769  set_conv_libfunc (sext_optab,   TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8770  set_conv_libfunc (sext_optab,   TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8771  set_conv_libfunc (trunc_optab,  SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8772  set_conv_libfunc (trunc_optab,  DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8773  set_conv_libfunc (trunc_optab,  XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8774
8775  set_conv_libfunc (sfix_optab,   SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8776  set_conv_libfunc (sfix_optab,   DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8777  set_conv_libfunc (ufix_optab,   SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8778  set_conv_libfunc (ufix_optab,   DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8779
8780  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8781  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8782}
8783
8784/* Rename the division and modulus functions in VMS.  */
8785
8786static void
8787ia64_vms_init_libfuncs (void)
8788{
8789  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8790  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8791  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8792  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8793  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8794  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8795  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8796  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8797}
8798
8799/* Switch to the section to which we should output X.  The only thing
8800   special we do here is to honor small data.  */
8801
8802static void
8803ia64_select_rtx_section (enum machine_mode mode, rtx x,
8804			 unsigned HOST_WIDE_INT align)
8805{
8806  if (GET_MODE_SIZE (mode) > 0
8807      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8808    sdata_section ();
8809  else
8810    default_elf_select_rtx_section (mode, x, align);
8811}
8812
8813/* It is illegal to have relocations in shared segments on AIX and HPUX.
8814   Pretend flag_pic is always set.  */
8815
8816static void
8817ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8818{
8819  default_elf_select_section_1 (exp, reloc, align, true);
8820}
8821
8822static void
8823ia64_rwreloc_unique_section (tree decl, int reloc)
8824{
8825  default_unique_section_1 (decl, reloc, true);
8826}
8827
8828static void
8829ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8830				 unsigned HOST_WIDE_INT align)
8831{
8832  int save_pic = flag_pic;
8833  flag_pic = 1;
8834  ia64_select_rtx_section (mode, x, align);
8835  flag_pic = save_pic;
8836}
8837
8838static unsigned int
8839ia64_rwreloc_section_type_flags (tree decl, const char *name, int reloc)
8840{
8841  return default_section_type_flags_1 (decl, name, reloc, true);
8842}
8843
8844/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8845   structure type and that the address of that type should be passed
8846   in out0, rather than in r8.  */
8847
8848static bool
8849ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8850{
8851  tree ret_type = TREE_TYPE (fntype);
8852
8853  /* The Itanium C++ ABI requires that out0, rather than r8, be used
8854     as the structure return address parameter, if the return value
8855     type has a non-trivial copy constructor or destructor.  It is not
8856     clear if this same convention should be used for other
8857     programming languages.  Until G++ 3.4, we incorrectly used r8 for
8858     these return values.  */
8859  return (abi_version_at_least (2)
8860	  && ret_type
8861	  && TYPE_MODE (ret_type) == BLKmode
8862	  && TREE_ADDRESSABLE (ret_type)
8863	  && strcmp (lang_hooks.name, "GNU C++") == 0);
8864}
8865
8866/* Output the assembler code for a thunk function.  THUNK_DECL is the
8867   declaration for the thunk function itself, FUNCTION is the decl for
8868   the target function.  DELTA is an immediate constant offset to be
8869   added to THIS.  If VCALL_OFFSET is nonzero, the word at
8870   *(*this + vcall_offset) should be added to THIS.  */
8871
8872static void
8873ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8874		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8875		      tree function)
8876{
8877  rtx this, insn, funexp;
8878  unsigned int this_parmno;
8879  unsigned int this_regno;
8880
8881  reload_completed = 1;
8882  epilogue_completed = 1;
8883  no_new_pseudos = 1;
8884
8885  /* Set things up as ia64_expand_prologue might.  */
8886  last_scratch_gr_reg = 15;
8887
8888  memset (&current_frame_info, 0, sizeof (current_frame_info));
8889  current_frame_info.spill_cfa_off = -16;
8890  current_frame_info.n_input_regs = 1;
8891  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8892
8893  /* Mark the end of the (empty) prologue.  */
8894  emit_note (NOTE_INSN_PROLOGUE_END);
8895
8896  /* Figure out whether "this" will be the first parameter (the
8897     typical case) or the second parameter (as happens when the
8898     virtual function returns certain class objects).  */
8899  this_parmno
8900    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8901       ? 1 : 0);
8902  this_regno = IN_REG (this_parmno);
8903  if (!TARGET_REG_NAMES)
8904    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8905
8906  this = gen_rtx_REG (Pmode, this_regno);
8907  if (TARGET_ILP32)
8908    {
8909      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8910      REG_POINTER (tmp) = 1;
8911      if (delta && CONST_OK_FOR_I (delta))
8912	{
8913	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8914	  delta = 0;
8915	}
8916      else
8917	emit_insn (gen_ptr_extend (this, tmp));
8918    }
8919
8920  /* Apply the constant offset, if required.  */
8921  if (delta)
8922    {
8923      rtx delta_rtx = GEN_INT (delta);
8924
8925      if (!CONST_OK_FOR_I (delta))
8926	{
8927	  rtx tmp = gen_rtx_REG (Pmode, 2);
8928	  emit_move_insn (tmp, delta_rtx);
8929	  delta_rtx = tmp;
8930	}
8931      emit_insn (gen_adddi3 (this, this, delta_rtx));
8932    }
8933
8934  /* Apply the offset from the vtable, if required.  */
8935  if (vcall_offset)
8936    {
8937      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8938      rtx tmp = gen_rtx_REG (Pmode, 2);
8939
8940      if (TARGET_ILP32)
8941	{
8942	  rtx t = gen_rtx_REG (ptr_mode, 2);
8943	  REG_POINTER (t) = 1;
8944	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8945	  if (CONST_OK_FOR_I (vcall_offset))
8946	    {
8947	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8948						  vcall_offset_rtx));
8949	      vcall_offset = 0;
8950	    }
8951	  else
8952	    emit_insn (gen_ptr_extend (tmp, t));
8953	}
8954      else
8955	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8956
8957      if (vcall_offset)
8958	{
8959	  if (!CONST_OK_FOR_J (vcall_offset))
8960	    {
8961	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8962	      emit_move_insn (tmp2, vcall_offset_rtx);
8963	      vcall_offset_rtx = tmp2;
8964	    }
8965	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8966	}
8967
8968      if (TARGET_ILP32)
8969	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8970			gen_rtx_MEM (ptr_mode, tmp));
8971      else
8972	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8973
8974      emit_insn (gen_adddi3 (this, this, tmp));
8975    }
8976
8977  /* Generate a tail call to the target function.  */
8978  if (! TREE_USED (function))
8979    {
8980      assemble_external (function);
8981      TREE_USED (function) = 1;
8982    }
8983  funexp = XEXP (DECL_RTL (function), 0);
8984  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8985  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8986  insn = get_last_insn ();
8987  SIBLING_CALL_P (insn) = 1;
8988
8989  /* Code generation for calls relies on splitting.  */
8990  reload_completed = 1;
8991  epilogue_completed = 1;
8992  try_split (PATTERN (insn), insn, 0);
8993
8994  emit_barrier ();
8995
8996  /* Run just enough of rest_of_compilation to get the insns emitted.
8997     There's not really enough bulk here to make other passes such as
8998     instruction scheduling worth while.  Note that use_thunk calls
8999     assemble_start_function and assemble_end_function.  */
9000
9001  insn_locators_initialize ();
9002  emit_all_insn_group_barriers (NULL);
9003  insn = get_insns ();
9004  shorten_branches (insn);
9005  final_start_function (insn, file, 1);
9006  final (insn, file, 1, 0);
9007  final_end_function ();
9008
9009  reload_completed = 0;
9010  epilogue_completed = 0;
9011  no_new_pseudos = 0;
9012}
9013
9014/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9015
9016static rtx
9017ia64_struct_value_rtx (tree fntype,
9018		       int incoming ATTRIBUTE_UNUSED)
9019{
9020  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9021    return NULL_RTX;
9022  return gen_rtx_REG (Pmode, GR_REG (8));
9023}
9024
9025#include "gt-ia64.h"
9026