1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
3   Free Software Foundation, Inc.
4   Contributed by James E. Wilson <wilson@cygnus.com> and
5		  David Mosberger <davidm@hpl.hp.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to
21the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22Boston, MA 02110-1301, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "toplev.h"
46#include "sched-int.h"
47#include "timevar.h"
48#include "target.h"
49#include "target-def.h"
50#include "tm_p.h"
51#include "hashtab.h"
52#include "langhooks.h"
53#include "cfglayout.h"
54#include "tree-gimple.h"
55#include "intl.h"
56#include "debug.h"
57
58/* This is used for communication between ASM_OUTPUT_LABEL and
59   ASM_OUTPUT_LABELREF.  */
60int ia64_asm_output_label = 0;
61
62/* Define the information needed to generate branch and scc insns.  This is
63   stored from the compare operation.  */
64struct rtx_def * ia64_compare_op0;
65struct rtx_def * ia64_compare_op1;
66
67/* Register names for ia64_expand_prologue.  */
68static const char * const ia64_reg_numbers[96] =
69{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
70  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
71  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
72  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
73  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
74  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
75  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
76  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
77  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
78  "r104","r105","r106","r107","r108","r109","r110","r111",
79  "r112","r113","r114","r115","r116","r117","r118","r119",
80  "r120","r121","r122","r123","r124","r125","r126","r127"};
81
82/* ??? These strings could be shared with REGISTER_NAMES.  */
83static const char * const ia64_input_reg_names[8] =
84{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
85
86/* ??? These strings could be shared with REGISTER_NAMES.  */
87static const char * const ia64_local_reg_names[80] =
88{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
89  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
90  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
91  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
92  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
93  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
94  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
95  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
96  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
97  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
98
99/* ??? These strings could be shared with REGISTER_NAMES.  */
100static const char * const ia64_output_reg_names[8] =
101{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
102
103/* Which cpu are we scheduling for.  */
104enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
105
106/* Determines whether we run our final scheduling pass or not.  We always
107   avoid the normal second scheduling pass.  */
108static int ia64_flag_schedule_insns2;
109
110/* Determines whether we run variable tracking in machine dependent
111   reorganization.  */
112static int ia64_flag_var_tracking;
113
114/* Variables which are this size or smaller are put in the sdata/sbss
115   sections.  */
116
117unsigned int ia64_section_threshold;
118
119/* The following variable is used by the DFA insn scheduler.  The value is
120   TRUE if we do insn bundling instead of insn scheduling.  */
121int bundling_p = 0;
122
123/* Structure to be filled in by ia64_compute_frame_size with register
124   save masks and offsets for the current function.  */
125
126struct ia64_frame_info
127{
128  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
129				   the caller's scratch area.  */
130  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
131  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
132  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
133  HARD_REG_SET mask;		/* mask of saved registers.  */
134  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
135				   registers or long-term scratches.  */
136  int n_spilled;		/* number of spilled registers.  */
137  int reg_fp;			/* register for fp.  */
138  int reg_save_b0;		/* save register for b0.  */
139  int reg_save_pr;		/* save register for prs.  */
140  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
141  int reg_save_ar_unat;		/* save register for ar.unat.  */
142  int reg_save_ar_lc;		/* save register for ar.lc.  */
143  int reg_save_gp;		/* save register for gp.  */
144  int n_input_regs;		/* number of input registers used.  */
145  int n_local_regs;		/* number of local registers used.  */
146  int n_output_regs;		/* number of output registers used.  */
147  int n_rotate_regs;		/* number of rotating registers used.  */
148
149  char need_regstk;		/* true if a .regstk directive needed.  */
150  char initialized;		/* true if the data is finalized.  */
151};
152
153/* Current frame information calculated by ia64_compute_frame_size.  */
154static struct ia64_frame_info current_frame_info;
155
156static int ia64_first_cycle_multipass_dfa_lookahead (void);
157static void ia64_dependencies_evaluation_hook (rtx, rtx);
158static void ia64_init_dfa_pre_cycle_insn (void);
159static rtx ia64_dfa_pre_cycle_insn (void);
160static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
161static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
162static rtx gen_tls_get_addr (void);
163static rtx gen_thread_pointer (void);
164static int find_gr_spill (int);
165static int next_scratch_gr_reg (void);
166static void mark_reg_gr_used_mask (rtx, void *);
167static void ia64_compute_frame_size (HOST_WIDE_INT);
168static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
169static void finish_spill_pointers (void);
170static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
171static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
172static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
173static rtx gen_movdi_x (rtx, rtx, rtx);
174static rtx gen_fr_spill_x (rtx, rtx, rtx);
175static rtx gen_fr_restore_x (rtx, rtx, rtx);
176
177static enum machine_mode hfa_element_mode (tree, bool);
178static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
179					 tree, int *, int);
180static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
181				   tree, bool);
182static bool ia64_function_ok_for_sibcall (tree, tree);
183static bool ia64_return_in_memory (tree, tree);
184static bool ia64_rtx_costs (rtx, int, int, int *);
185static void fix_range (const char *);
186static bool ia64_handle_option (size_t, const char *, int);
187static struct machine_function * ia64_init_machine_status (void);
188static void emit_insn_group_barriers (FILE *);
189static void emit_all_insn_group_barriers (FILE *);
190static void final_emit_insn_group_barriers (FILE *);
191static void emit_predicate_relation_info (void);
192static void ia64_reorg (void);
193static bool ia64_in_small_data_p (tree);
194static void process_epilogue (FILE *, rtx, bool, bool);
195static int process_set (FILE *, rtx, rtx, bool, bool);
196
197static bool ia64_assemble_integer (rtx, unsigned int, int);
198static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
199static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
200static void ia64_output_function_end_prologue (FILE *);
201
202static int ia64_issue_rate (void);
203static int ia64_adjust_cost (rtx, rtx, rtx, int);
204static void ia64_sched_init (FILE *, int, int);
205static void ia64_sched_finish (FILE *, int);
206static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
207static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
208static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
209static int ia64_variable_issue (FILE *, int, rtx, int);
210
211static struct bundle_state *get_free_bundle_state (void);
212static void free_bundle_state (struct bundle_state *);
213static void initiate_bundle_states (void);
214static void finish_bundle_states (void);
215static unsigned bundle_state_hash (const void *);
216static int bundle_state_eq_p (const void *, const void *);
217static int insert_bundle_state (struct bundle_state *);
218static void initiate_bundle_state_table (void);
219static void finish_bundle_state_table (void);
220static int try_issue_nops (struct bundle_state *, int);
221static int try_issue_insn (struct bundle_state *, rtx);
222static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
223static int get_max_pos (state_t);
224static int get_template (state_t, int);
225
226static rtx get_next_important_insn (rtx, rtx);
227static void bundling (FILE *, int, rtx, rtx);
228
229static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
230				  HOST_WIDE_INT, tree);
231static void ia64_file_start (void);
232
233static void ia64_select_rtx_section (enum machine_mode, rtx,
234				     unsigned HOST_WIDE_INT);
235static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
236     ATTRIBUTE_UNUSED;
237static void ia64_rwreloc_select_section (tree, int, unsigned HOST_WIDE_INT)
238     ATTRIBUTE_UNUSED;
239static void ia64_rwreloc_unique_section (tree, int)
240     ATTRIBUTE_UNUSED;
241static void ia64_rwreloc_select_rtx_section (enum machine_mode, rtx,
242					     unsigned HOST_WIDE_INT)
243     ATTRIBUTE_UNUSED;
244static unsigned int ia64_section_type_flags (tree, const char *, int);
245static void ia64_hpux_add_extern_decl (tree decl)
246     ATTRIBUTE_UNUSED;
247static void ia64_hpux_file_end (void)
248     ATTRIBUTE_UNUSED;
249static void ia64_init_libfuncs (void)
250     ATTRIBUTE_UNUSED;
251static void ia64_hpux_init_libfuncs (void)
252     ATTRIBUTE_UNUSED;
253static void ia64_sysv4_init_libfuncs (void)
254     ATTRIBUTE_UNUSED;
255static void ia64_vms_init_libfuncs (void)
256     ATTRIBUTE_UNUSED;
257
258static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
259static void ia64_encode_section_info (tree, rtx, int);
260static rtx ia64_struct_value_rtx (tree, int);
261static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
262static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
263static bool ia64_vector_mode_supported_p (enum machine_mode mode);
264static bool ia64_cannot_force_const_mem (rtx);
265static const char *ia64_mangle_fundamental_type (tree);
266static const char *ia64_invalid_conversion (tree, tree);
267static const char *ia64_invalid_unary_op (int, tree);
268static const char *ia64_invalid_binary_op (int, tree, tree);
269
270/* Table of valid machine attributes.  */
271static const struct attribute_spec ia64_attribute_table[] =
272{
273  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
274  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
275  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
276  { NULL,	       0, 0, false, false, false, NULL }
277};
278
279/* Initialize the GCC target structure.  */
280#undef TARGET_ATTRIBUTE_TABLE
281#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
282
283#undef TARGET_INIT_BUILTINS
284#define TARGET_INIT_BUILTINS ia64_init_builtins
285
286#undef TARGET_EXPAND_BUILTIN
287#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
288
289#undef TARGET_ASM_BYTE_OP
290#define TARGET_ASM_BYTE_OP "\tdata1\t"
291#undef TARGET_ASM_ALIGNED_HI_OP
292#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
293#undef TARGET_ASM_ALIGNED_SI_OP
294#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
295#undef TARGET_ASM_ALIGNED_DI_OP
296#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
297#undef TARGET_ASM_UNALIGNED_HI_OP
298#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
299#undef TARGET_ASM_UNALIGNED_SI_OP
300#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
301#undef TARGET_ASM_UNALIGNED_DI_OP
302#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
303#undef TARGET_ASM_INTEGER
304#define TARGET_ASM_INTEGER ia64_assemble_integer
305
306#undef TARGET_ASM_FUNCTION_PROLOGUE
307#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
308#undef TARGET_ASM_FUNCTION_END_PROLOGUE
309#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
310#undef TARGET_ASM_FUNCTION_EPILOGUE
311#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
312
313#undef TARGET_IN_SMALL_DATA_P
314#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
315
316#undef TARGET_SCHED_ADJUST_COST
317#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
318#undef TARGET_SCHED_ISSUE_RATE
319#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
320#undef TARGET_SCHED_VARIABLE_ISSUE
321#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
322#undef TARGET_SCHED_INIT
323#define TARGET_SCHED_INIT ia64_sched_init
324#undef TARGET_SCHED_FINISH
325#define TARGET_SCHED_FINISH ia64_sched_finish
326#undef TARGET_SCHED_REORDER
327#define TARGET_SCHED_REORDER ia64_sched_reorder
328#undef TARGET_SCHED_REORDER2
329#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
330
331#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
332#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
333
334#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
335#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
336
337#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
338#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
339#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
340#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
341
342#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
343#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
344  ia64_first_cycle_multipass_dfa_lookahead_guard
345
346#undef TARGET_SCHED_DFA_NEW_CYCLE
347#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
348
349#undef TARGET_FUNCTION_OK_FOR_SIBCALL
350#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
351#undef TARGET_ARG_PARTIAL_BYTES
352#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
353
354#undef TARGET_ASM_OUTPUT_MI_THUNK
355#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
356#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
357#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
358
359#undef TARGET_ASM_FILE_START
360#define TARGET_ASM_FILE_START ia64_file_start
361
362#undef TARGET_RTX_COSTS
363#define TARGET_RTX_COSTS ia64_rtx_costs
364#undef TARGET_ADDRESS_COST
365#define TARGET_ADDRESS_COST hook_int_rtx_0
366
367#undef TARGET_MACHINE_DEPENDENT_REORG
368#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
369
370#undef TARGET_ENCODE_SECTION_INFO
371#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
372
373#undef  TARGET_SECTION_TYPE_FLAGS
374#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
375
376#ifdef HAVE_AS_TLS
377#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
378#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
379#endif
380
381/* ??? ABI doesn't allow us to define this.  */
382#if 0
383#undef TARGET_PROMOTE_FUNCTION_ARGS
384#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
385#endif
386
387/* ??? ABI doesn't allow us to define this.  */
388#if 0
389#undef TARGET_PROMOTE_FUNCTION_RETURN
390#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
391#endif
392
393/* ??? Investigate.  */
394#if 0
395#undef TARGET_PROMOTE_PROTOTYPES
396#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
397#endif
398
399#undef TARGET_STRUCT_VALUE_RTX
400#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
401#undef TARGET_RETURN_IN_MEMORY
402#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
403#undef TARGET_SETUP_INCOMING_VARARGS
404#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
405#undef TARGET_STRICT_ARGUMENT_NAMING
406#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
407#undef TARGET_MUST_PASS_IN_STACK
408#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
409
410#undef TARGET_GIMPLIFY_VA_ARG_EXPR
411#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
412
413#undef TARGET_UNWIND_EMIT
414#define TARGET_UNWIND_EMIT process_for_unwind_directive
415
416#undef TARGET_SCALAR_MODE_SUPPORTED_P
417#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
418#undef TARGET_VECTOR_MODE_SUPPORTED_P
419#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
420
421/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
422   in an order different from the specified program order.  */
423#undef TARGET_RELAXED_ORDERING
424#define TARGET_RELAXED_ORDERING true
425
426#undef TARGET_DEFAULT_TARGET_FLAGS
427#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
428#undef TARGET_HANDLE_OPTION
429#define TARGET_HANDLE_OPTION ia64_handle_option
430
431#undef TARGET_CANNOT_FORCE_CONST_MEM
432#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
433
434#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
435#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
436
437#undef TARGET_INVALID_CONVERSION
438#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
439#undef TARGET_INVALID_UNARY_OP
440#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
441#undef TARGET_INVALID_BINARY_OP
442#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
443
444struct gcc_target targetm = TARGET_INITIALIZER;
445
446typedef enum
447  {
448    ADDR_AREA_NORMAL,	/* normal address area */
449    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
450  }
451ia64_addr_area;
452
453static GTY(()) tree small_ident1;
454static GTY(()) tree small_ident2;
455
456static void
457init_idents (void)
458{
459  if (small_ident1 == 0)
460    {
461      small_ident1 = get_identifier ("small");
462      small_ident2 = get_identifier ("__small__");
463    }
464}
465
466/* Retrieve the address area that has been chosen for the given decl.  */
467
468static ia64_addr_area
469ia64_get_addr_area (tree decl)
470{
471  tree model_attr;
472
473  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
474  if (model_attr)
475    {
476      tree id;
477
478      init_idents ();
479      id = TREE_VALUE (TREE_VALUE (model_attr));
480      if (id == small_ident1 || id == small_ident2)
481	return ADDR_AREA_SMALL;
482    }
483  return ADDR_AREA_NORMAL;
484}
485
486static tree
487ia64_handle_model_attribute (tree *node, tree name, tree args,
488			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
489{
490  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
491  ia64_addr_area area;
492  tree arg, decl = *node;
493
494  init_idents ();
495  arg = TREE_VALUE (args);
496  if (arg == small_ident1 || arg == small_ident2)
497    {
498      addr_area = ADDR_AREA_SMALL;
499    }
500  else
501    {
502      warning (OPT_Wattributes, "invalid argument of %qs attribute",
503	       IDENTIFIER_POINTER (name));
504      *no_add_attrs = true;
505    }
506
507  switch (TREE_CODE (decl))
508    {
509    case VAR_DECL:
510      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
511	   == FUNCTION_DECL)
512	  && !TREE_STATIC (decl))
513	{
514	  error ("%Jan address area attribute cannot be specified for "
515		 "local variables", decl);
516	  *no_add_attrs = true;
517	}
518      area = ia64_get_addr_area (decl);
519      if (area != ADDR_AREA_NORMAL && addr_area != area)
520	{
521	  error ("address area of %q+D conflicts with previous "
522		 "declaration", decl);
523	  *no_add_attrs = true;
524	}
525      break;
526
527    case FUNCTION_DECL:
528      error ("%Jaddress area attribute cannot be specified for functions",
529	     decl);
530      *no_add_attrs = true;
531      break;
532
533    default:
534      warning (OPT_Wattributes, "%qs attribute ignored",
535	       IDENTIFIER_POINTER (name));
536      *no_add_attrs = true;
537      break;
538    }
539
540  return NULL_TREE;
541}
542
543static void
544ia64_encode_addr_area (tree decl, rtx symbol)
545{
546  int flags;
547
548  flags = SYMBOL_REF_FLAGS (symbol);
549  switch (ia64_get_addr_area (decl))
550    {
551    case ADDR_AREA_NORMAL: break;
552    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
553    default: gcc_unreachable ();
554    }
555  SYMBOL_REF_FLAGS (symbol) = flags;
556}
557
558static void
559ia64_encode_section_info (tree decl, rtx rtl, int first)
560{
561  default_encode_section_info (decl, rtl, first);
562
563  /* Careful not to prod global register variables.  */
564  if (TREE_CODE (decl) == VAR_DECL
565      && GET_CODE (DECL_RTL (decl)) == MEM
566      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
567      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
568    ia64_encode_addr_area (decl, XEXP (rtl, 0));
569}
570
571/* Implement CONST_OK_FOR_LETTER_P.  */
572
573bool
574ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
575{
576  switch (c)
577    {
578    case 'I':
579      return CONST_OK_FOR_I (value);
580    case 'J':
581      return CONST_OK_FOR_J (value);
582    case 'K':
583      return CONST_OK_FOR_K (value);
584    case 'L':
585      return CONST_OK_FOR_L (value);
586    case 'M':
587      return CONST_OK_FOR_M (value);
588    case 'N':
589      return CONST_OK_FOR_N (value);
590    case 'O':
591      return CONST_OK_FOR_O (value);
592    case 'P':
593      return CONST_OK_FOR_P (value);
594    default:
595      return false;
596    }
597}
598
599/* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
600
601bool
602ia64_const_double_ok_for_letter_p (rtx value, char c)
603{
604  switch (c)
605    {
606    case 'G':
607      return CONST_DOUBLE_OK_FOR_G (value);
608    default:
609      return false;
610    }
611}
612
613/* Implement EXTRA_CONSTRAINT.  */
614
615bool
616ia64_extra_constraint (rtx value, char c)
617{
618  switch (c)
619    {
620    case 'Q':
621      /* Non-volatile memory for FP_REG loads/stores.  */
622      return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
623
624    case 'R':
625      /* 1..4 for shladd arguments.  */
626      return (GET_CODE (value) == CONST_INT
627	      && INTVAL (value) >= 1 && INTVAL (value) <= 4);
628
629    case 'S':
630      /* Non-post-inc memory for asms and other unsavory creatures.  */
631      return (GET_CODE (value) == MEM
632	      && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
633	      && (reload_in_progress || memory_operand (value, VOIDmode)));
634
635    case 'T':
636      /* Symbol ref to small-address-area.  */
637      return small_addr_symbolic_operand (value, VOIDmode);
638
639    case 'U':
640      /* Vector zero.  */
641      return value == CONST0_RTX (GET_MODE (value));
642
643    case 'W':
644      /* An integer vector, such that conversion to an integer yields a
645	 value appropriate for an integer 'J' constraint.  */
646      if (GET_CODE (value) == CONST_VECTOR
647	  && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
648	{
649	  value = simplify_subreg (DImode, value, GET_MODE (value), 0);
650	  return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
651	}
652      return false;
653
654    case 'Y':
655      /* A V2SF vector containing elements that satisfy 'G'.  */
656      return
657	(GET_CODE (value) == CONST_VECTOR
658	 && GET_MODE (value) == V2SFmode
659	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
660	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
661
662    default:
663      return false;
664    }
665}
666
667/* Return 1 if the operands of a move are ok.  */
668
669int
670ia64_move_ok (rtx dst, rtx src)
671{
672  /* If we're under init_recog_no_volatile, we'll not be able to use
673     memory_operand.  So check the code directly and don't worry about
674     the validity of the underlying address, which should have been
675     checked elsewhere anyway.  */
676  if (GET_CODE (dst) != MEM)
677    return 1;
678  if (GET_CODE (src) == MEM)
679    return 0;
680  if (register_operand (src, VOIDmode))
681    return 1;
682
683  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
684  if (INTEGRAL_MODE_P (GET_MODE (dst)))
685    return src == const0_rtx;
686  else
687    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
688}
689
690/* Return 1 if the operands are ok for a floating point load pair.  */
691
692int
693ia64_load_pair_ok (rtx dst, rtx src)
694{
695  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
696    return 0;
697  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
698    return 0;
699  switch (GET_CODE (XEXP (src, 0)))
700    {
701    case REG:
702    case POST_INC:
703      break;
704    case POST_DEC:
705      return 0;
706    case POST_MODIFY:
707      {
708	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
709
710	if (GET_CODE (adjust) != CONST_INT
711	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
712	  return 0;
713      }
714      break;
715    default:
716      abort ();
717    }
718  return 1;
719}
720
721int
722addp4_optimize_ok (rtx op1, rtx op2)
723{
724  return (basereg_operand (op1, GET_MODE(op1)) !=
725	  basereg_operand (op2, GET_MODE(op2)));
726}
727
728/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
729   Return the length of the field, or <= 0 on failure.  */
730
731int
732ia64_depz_field_mask (rtx rop, rtx rshift)
733{
734  unsigned HOST_WIDE_INT op = INTVAL (rop);
735  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
736
737  /* Get rid of the zero bits we're shifting in.  */
738  op >>= shift;
739
740  /* We must now have a solid block of 1's at bit 0.  */
741  return exact_log2 (op + 1);
742}
743
744/* Return the TLS model to use for ADDR.  */
745
746static enum tls_model
747tls_symbolic_operand_type (rtx addr)
748{
749  enum tls_model tls_kind = 0;
750
751  if (GET_CODE (addr) == CONST)
752    {
753      if (GET_CODE (XEXP (addr, 0)) == PLUS
754	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
755        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
756    }
757  else if (GET_CODE (addr) == SYMBOL_REF)
758    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
759
760  return tls_kind;
761}
762
763/* Return true if X is a constant that is valid for some immediate
764   field in an instruction.  */
765
766bool
767ia64_legitimate_constant_p (rtx x)
768{
769  switch (GET_CODE (x))
770    {
771    case CONST_INT:
772    case LABEL_REF:
773      return true;
774
775    case CONST_DOUBLE:
776      if (GET_MODE (x) == VOIDmode)
777	return true;
778      return CONST_DOUBLE_OK_FOR_G (x);
779
780    case CONST:
781    case SYMBOL_REF:
782      /* ??? Short term workaround for PR 28490.  We must make the code here
783	 match the code in ia64_expand_move and move_operand, even though they
784	 are both technically wrong.  */
785      if (tls_symbolic_operand_type (x) == 0)
786	{
787	  HOST_WIDE_INT addend = 0;
788	  rtx op = x;
789
790	  if (GET_CODE (op) == CONST
791	      && GET_CODE (XEXP (op, 0)) == PLUS
792	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
793	    {
794	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
795	      op = XEXP (XEXP (op, 0), 0);
796	    }
797
798          if (any_offset_symbol_operand (op, GET_MODE (op))
799              || function_operand (op, GET_MODE (op)))
800            return true;
801	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
802	    return (addend & 0x3fff) == 0;
803	  return false;
804	}
805      return false;
806
807    case CONST_VECTOR:
808      {
809	enum machine_mode mode = GET_MODE (x);
810
811	if (mode == V2SFmode)
812	  return ia64_extra_constraint (x, 'Y');
813
814	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
815		&& GET_MODE_SIZE (mode) <= 8);
816      }
817
818    default:
819      return false;
820    }
821}
822
823/* Don't allow TLS addresses to get spilled to memory.  */
824
825static bool
826ia64_cannot_force_const_mem (rtx x)
827{
828  return tls_symbolic_operand_type (x) != 0;
829}
830
831/* Expand a symbolic constant load.  */
832
833bool
834ia64_expand_load_address (rtx dest, rtx src)
835{
836  gcc_assert (GET_CODE (dest) == REG);
837
838  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
839     having to pointer-extend the value afterward.  Other forms of address
840     computation below are also more natural to compute as 64-bit quantities.
841     If we've been given an SImode destination register, change it.  */
842  if (GET_MODE (dest) != Pmode)
843    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
844
845  if (TARGET_NO_PIC)
846    return false;
847  if (small_addr_symbolic_operand (src, VOIDmode))
848    return false;
849
850  if (TARGET_AUTO_PIC)
851    emit_insn (gen_load_gprel64 (dest, src));
852  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
853    emit_insn (gen_load_fptr (dest, src));
854  else if (sdata_symbolic_operand (src, VOIDmode))
855    emit_insn (gen_load_gprel (dest, src));
856  else
857    {
858      HOST_WIDE_INT addend = 0;
859      rtx tmp;
860
861      /* We did split constant offsets in ia64_expand_move, and we did try
862	 to keep them split in move_operand, but we also allowed reload to
863	 rematerialize arbitrary constants rather than spill the value to
864	 the stack and reload it.  So we have to be prepared here to split
865	 them apart again.  */
866      if (GET_CODE (src) == CONST)
867	{
868	  HOST_WIDE_INT hi, lo;
869
870	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
871	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
872	  hi = hi - lo;
873
874	  if (lo != 0)
875	    {
876	      addend = lo;
877	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
878	    }
879	}
880
881      tmp = gen_rtx_HIGH (Pmode, src);
882      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
883      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
884
885      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
886      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
887
888      if (addend)
889	{
890	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
891	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
892	}
893    }
894
895  return true;
896}
897
898static GTY(()) rtx gen_tls_tga;
899static rtx
900gen_tls_get_addr (void)
901{
902  if (!gen_tls_tga)
903    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
904  return gen_tls_tga;
905}
906
907static GTY(()) rtx thread_pointer_rtx;
908static rtx
909gen_thread_pointer (void)
910{
911  if (!thread_pointer_rtx)
912    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
913  return thread_pointer_rtx;
914}
915
916static rtx
917ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
918			 rtx orig_op1, HOST_WIDE_INT addend)
919{
920  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
921  rtx orig_op0 = op0;
922  HOST_WIDE_INT addend_lo, addend_hi;
923
924  switch (tls_kind)
925    {
926    case TLS_MODEL_GLOBAL_DYNAMIC:
927      start_sequence ();
928
929      tga_op1 = gen_reg_rtx (Pmode);
930      emit_insn (gen_load_dtpmod (tga_op1, op1));
931
932      tga_op2 = gen_reg_rtx (Pmode);
933      emit_insn (gen_load_dtprel (tga_op2, op1));
934
935      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
936					 LCT_CONST, Pmode, 2, tga_op1,
937					 Pmode, tga_op2, Pmode);
938
939      insns = get_insns ();
940      end_sequence ();
941
942      if (GET_MODE (op0) != Pmode)
943	op0 = tga_ret;
944      emit_libcall_block (insns, op0, tga_ret, op1);
945      break;
946
947    case TLS_MODEL_LOCAL_DYNAMIC:
948      /* ??? This isn't the completely proper way to do local-dynamic
949	 If the call to __tls_get_addr is used only by a single symbol,
950	 then we should (somehow) move the dtprel to the second arg
951	 to avoid the extra add.  */
952      start_sequence ();
953
954      tga_op1 = gen_reg_rtx (Pmode);
955      emit_insn (gen_load_dtpmod (tga_op1, op1));
956
957      tga_op2 = const0_rtx;
958
959      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
960					 LCT_CONST, Pmode, 2, tga_op1,
961					 Pmode, tga_op2, Pmode);
962
963      insns = get_insns ();
964      end_sequence ();
965
966      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
967				UNSPEC_LD_BASE);
968      tmp = gen_reg_rtx (Pmode);
969      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
970
971      if (!register_operand (op0, Pmode))
972	op0 = gen_reg_rtx (Pmode);
973      if (TARGET_TLS64)
974	{
975	  emit_insn (gen_load_dtprel (op0, op1));
976	  emit_insn (gen_adddi3 (op0, tmp, op0));
977	}
978      else
979	emit_insn (gen_add_dtprel (op0, op1, tmp));
980      break;
981
982    case TLS_MODEL_INITIAL_EXEC:
983      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
984      addend_hi = addend - addend_lo;
985
986      op1 = plus_constant (op1, addend_hi);
987      addend = addend_lo;
988
989      tmp = gen_reg_rtx (Pmode);
990      emit_insn (gen_load_tprel (tmp, op1));
991
992      if (!register_operand (op0, Pmode))
993	op0 = gen_reg_rtx (Pmode);
994      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
995      break;
996
997    case TLS_MODEL_LOCAL_EXEC:
998      if (!register_operand (op0, Pmode))
999	op0 = gen_reg_rtx (Pmode);
1000
1001      op1 = orig_op1;
1002      addend = 0;
1003      if (TARGET_TLS64)
1004	{
1005	  emit_insn (gen_load_tprel (op0, op1));
1006	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1007	}
1008      else
1009	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1010      break;
1011
1012    default:
1013      gcc_unreachable ();
1014    }
1015
1016  if (addend)
1017    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1018			       orig_op0, 1, OPTAB_DIRECT);
1019  if (orig_op0 == op0)
1020    return NULL_RTX;
1021  if (GET_MODE (orig_op0) == Pmode)
1022    return op0;
1023  return gen_lowpart (GET_MODE (orig_op0), op0);
1024}
1025
1026rtx
1027ia64_expand_move (rtx op0, rtx op1)
1028{
1029  enum machine_mode mode = GET_MODE (op0);
1030
1031  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1032    op1 = force_reg (mode, op1);
1033
1034  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1035    {
1036      HOST_WIDE_INT addend = 0;
1037      enum tls_model tls_kind;
1038      rtx sym = op1;
1039
1040      if (GET_CODE (op1) == CONST
1041	  && GET_CODE (XEXP (op1, 0)) == PLUS
1042	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1043	{
1044	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1045	  sym = XEXP (XEXP (op1, 0), 0);
1046	}
1047
1048      tls_kind = tls_symbolic_operand_type (sym);
1049      if (tls_kind)
1050	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1051
1052      if (any_offset_symbol_operand (sym, mode))
1053	addend = 0;
1054      else if (aligned_offset_symbol_operand (sym, mode))
1055	{
1056	  HOST_WIDE_INT addend_lo, addend_hi;
1057
1058	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1059	  addend_hi = addend - addend_lo;
1060
1061	  if (addend_lo != 0)
1062	    {
1063	      op1 = plus_constant (sym, addend_hi);
1064	      addend = addend_lo;
1065	    }
1066	  else
1067	    addend = 0;
1068	}
1069      else
1070	op1 = sym;
1071
1072      if (reload_completed)
1073	{
1074	  /* We really should have taken care of this offset earlier.  */
1075	  gcc_assert (addend == 0);
1076	  if (ia64_expand_load_address (op0, op1))
1077	    return NULL_RTX;
1078	}
1079
1080      if (addend)
1081	{
1082	  rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1083
1084	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1085
1086	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1087				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1088	  if (op0 == op1)
1089	    return NULL_RTX;
1090	}
1091    }
1092
1093  return op1;
1094}
1095
1096/* Split a move from OP1 to OP0 conditional on COND.  */
1097
1098void
1099ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1100{
1101  rtx insn, first = get_last_insn ();
1102
1103  emit_move_insn (op0, op1);
1104
1105  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1106    if (INSN_P (insn))
1107      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1108					  PATTERN (insn));
1109}
1110
1111/* Split a post-reload TImode or TFmode reference into two DImode
1112   components.  This is made extra difficult by the fact that we do
1113   not get any scratch registers to work with, because reload cannot
1114   be prevented from giving us a scratch that overlaps the register
1115   pair involved.  So instead, when addressing memory, we tweak the
1116   pointer register up and back down with POST_INCs.  Or up and not
1117   back down when we can get away with it.
1118
1119   REVERSED is true when the loads must be done in reversed order
1120   (high word first) for correctness.  DEAD is true when the pointer
1121   dies with the second insn we generate and therefore the second
1122   address must not carry a postmodify.
1123
1124   May return an insn which is to be emitted after the moves.  */
1125
1126static rtx
1127ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1128{
1129  rtx fixup = 0;
1130
1131  switch (GET_CODE (in))
1132    {
1133    case REG:
1134      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1135      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1136      break;
1137
1138    case CONST_INT:
1139    case CONST_DOUBLE:
1140      /* Cannot occur reversed.  */
1141      gcc_assert (!reversed);
1142
1143      if (GET_MODE (in) != TFmode)
1144	split_double (in, &out[0], &out[1]);
1145      else
1146	/* split_double does not understand how to split a TFmode
1147	   quantity into a pair of DImode constants.  */
1148	{
1149	  REAL_VALUE_TYPE r;
1150	  unsigned HOST_WIDE_INT p[2];
1151	  long l[4];  /* TFmode is 128 bits */
1152
1153	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1154	  real_to_target (l, &r, TFmode);
1155
1156	  if (FLOAT_WORDS_BIG_ENDIAN)
1157	    {
1158	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1159	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1160	    }
1161	  else
1162	    {
1163	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1164	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1165	    }
1166	  out[0] = GEN_INT (p[0]);
1167	  out[1] = GEN_INT (p[1]);
1168	}
1169      break;
1170
1171    case MEM:
1172      {
1173	rtx base = XEXP (in, 0);
1174	rtx offset;
1175
1176	switch (GET_CODE (base))
1177	  {
1178	  case REG:
1179	    if (!reversed)
1180	      {
1181		out[0] = adjust_automodify_address
1182		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1183		out[1] = adjust_automodify_address
1184		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1185	      }
1186	    else
1187	      {
1188		/* Reversal requires a pre-increment, which can only
1189		   be done as a separate insn.  */
1190		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1191		out[0] = adjust_automodify_address
1192		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1193		out[1] = adjust_address (in, DImode, 0);
1194	      }
1195	    break;
1196
1197	  case POST_INC:
1198	    gcc_assert (!reversed && !dead);
1199
1200	    /* Just do the increment in two steps.  */
1201	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1202	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1203	    break;
1204
1205	  case POST_DEC:
1206	    gcc_assert (!reversed && !dead);
1207
1208	    /* Add 8, subtract 24.  */
1209	    base = XEXP (base, 0);
1210	    out[0] = adjust_automodify_address
1211	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1212	    out[1] = adjust_automodify_address
1213	      (in, DImode,
1214	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1215	       8);
1216	    break;
1217
1218	  case POST_MODIFY:
1219	    gcc_assert (!reversed && !dead);
1220
1221	    /* Extract and adjust the modification.  This case is
1222	       trickier than the others, because we might have an
1223	       index register, or we might have a combined offset that
1224	       doesn't fit a signed 9-bit displacement field.  We can
1225	       assume the incoming expression is already legitimate.  */
1226	    offset = XEXP (base, 1);
1227	    base = XEXP (base, 0);
1228
1229	    out[0] = adjust_automodify_address
1230	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1231
1232	    if (GET_CODE (XEXP (offset, 1)) == REG)
1233	      {
1234		/* Can't adjust the postmodify to match.  Emit the
1235		   original, then a separate addition insn.  */
1236		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1237		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1238	      }
1239	    else
1240	      {
1241		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1242		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1243		  {
1244		    /* Again the postmodify cannot be made to match,
1245		       but in this case it's more efficient to get rid
1246		       of the postmodify entirely and fix up with an
1247		       add insn.  */
1248		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1249		    fixup = gen_adddi3
1250		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1251		  }
1252		else
1253		  {
1254		    /* Combined offset still fits in the displacement field.
1255		       (We cannot overflow it at the high end.)  */
1256		    out[1] = adjust_automodify_address
1257		      (in, DImode, gen_rtx_POST_MODIFY
1258		       (Pmode, base, gen_rtx_PLUS
1259			(Pmode, base,
1260			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1261		       8);
1262		  }
1263	      }
1264	    break;
1265
1266	  default:
1267	    gcc_unreachable ();
1268	  }
1269	break;
1270      }
1271
1272    default:
1273      gcc_unreachable ();
1274    }
1275
1276  return fixup;
1277}
1278
1279/* Split a TImode or TFmode move instruction after reload.
1280   This is used by *movtf_internal and *movti_internal.  */
1281void
1282ia64_split_tmode_move (rtx operands[])
1283{
1284  rtx in[2], out[2], insn;
1285  rtx fixup[2];
1286  bool dead = false;
1287  bool reversed = false;
1288
1289  /* It is possible for reload to decide to overwrite a pointer with
1290     the value it points to.  In that case we have to do the loads in
1291     the appropriate order so that the pointer is not destroyed too
1292     early.  Also we must not generate a postmodify for that second
1293     load, or rws_access_regno will die.  */
1294  if (GET_CODE (operands[1]) == MEM
1295      && reg_overlap_mentioned_p (operands[0], operands[1]))
1296    {
1297      rtx base = XEXP (operands[1], 0);
1298      while (GET_CODE (base) != REG)
1299	base = XEXP (base, 0);
1300
1301      if (REGNO (base) == REGNO (operands[0]))
1302	reversed = true;
1303      dead = true;
1304    }
1305  /* Another reason to do the moves in reversed order is if the first
1306     element of the target register pair is also the second element of
1307     the source register pair.  */
1308  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1309      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1310    reversed = true;
1311
1312  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1313  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1314
1315#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1316  if (GET_CODE (EXP) == MEM						\
1317      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1318	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1319	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1320    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1321					  XEXP (XEXP (EXP, 0), 0),	\
1322					  REG_NOTES (INSN))
1323
1324  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1325  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1326  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1327
1328  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1329  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1330  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1331
1332  if (fixup[0])
1333    emit_insn (fixup[0]);
1334  if (fixup[1])
1335    emit_insn (fixup[1]);
1336
1337#undef MAYBE_ADD_REG_INC_NOTE
1338}
1339
1340/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1341   through memory plus an extra GR scratch register.  Except that you can
1342   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1343   SECONDARY_RELOAD_CLASS, but not both.
1344
1345   We got into problems in the first place by allowing a construct like
1346   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1347   This solution attempts to prevent this situation from occurring.  When
1348   we see something like the above, we spill the inner register to memory.  */
1349
1350static rtx
1351spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1352{
1353  if (GET_CODE (in) == SUBREG
1354      && GET_MODE (SUBREG_REG (in)) == TImode
1355      && GET_CODE (SUBREG_REG (in)) == REG)
1356    {
1357      rtx memt = assign_stack_temp (TImode, 16, 0);
1358      emit_move_insn (memt, SUBREG_REG (in));
1359      return adjust_address (memt, mode, 0);
1360    }
1361  else if (force && GET_CODE (in) == REG)
1362    {
1363      rtx memx = assign_stack_temp (mode, 16, 0);
1364      emit_move_insn (memx, in);
1365      return memx;
1366    }
1367  else
1368    return in;
1369}
1370
1371/* Expand the movxf or movrf pattern (MODE says which) with the given
1372   OPERANDS, returning true if the pattern should then invoke
1373   DONE.  */
1374
1375bool
1376ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1377{
1378  rtx op0 = operands[0];
1379
1380  if (GET_CODE (op0) == SUBREG)
1381    op0 = SUBREG_REG (op0);
1382
1383  /* We must support XFmode loads into general registers for stdarg/vararg,
1384     unprototyped calls, and a rare case where a long double is passed as
1385     an argument after a float HFA fills the FP registers.  We split them into
1386     DImode loads for convenience.  We also need to support XFmode stores
1387     for the last case.  This case does not happen for stdarg/vararg routines,
1388     because we do a block store to memory of unnamed arguments.  */
1389
1390  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1391    {
1392      rtx out[2];
1393
1394      /* We're hoping to transform everything that deals with XFmode
1395	 quantities and GR registers early in the compiler.  */
1396      gcc_assert (!no_new_pseudos);
1397
1398      /* Struct to register can just use TImode instead.  */
1399      if ((GET_CODE (operands[1]) == SUBREG
1400	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1401	  || (GET_CODE (operands[1]) == REG
1402	      && GR_REGNO_P (REGNO (operands[1]))))
1403	{
1404	  rtx op1 = operands[1];
1405
1406	  if (GET_CODE (op1) == SUBREG)
1407	    op1 = SUBREG_REG (op1);
1408	  else
1409	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1410
1411	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1412	  return true;
1413	}
1414
1415      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1416	{
1417	  /* Don't word-swap when reading in the constant.  */
1418	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1419			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1420					   0, mode));
1421	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1422			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1423					   0, mode));
1424	  return true;
1425	}
1426
1427      /* If the quantity is in a register not known to be GR, spill it.  */
1428      if (register_operand (operands[1], mode))
1429	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1430
1431      gcc_assert (GET_CODE (operands[1]) == MEM);
1432
1433      /* Don't word-swap when reading in the value.  */
1434      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1435      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1436
1437      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1438      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1439      return true;
1440    }
1441
1442  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1443    {
1444      /* We're hoping to transform everything that deals with XFmode
1445	 quantities and GR registers early in the compiler.  */
1446      gcc_assert (!no_new_pseudos);
1447
1448      /* Op0 can't be a GR_REG here, as that case is handled above.
1449	 If op0 is a register, then we spill op1, so that we now have a
1450	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1451	 to force the spill.  */
1452      if (register_operand (operands[0], mode))
1453	{
1454	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1455	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1456	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1457	}
1458
1459      else
1460	{
1461	  rtx in[2];
1462
1463	  gcc_assert (GET_CODE (operands[0]) == MEM);
1464
1465	  /* Don't word-swap when writing out the value.  */
1466	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1467	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1468
1469	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1470	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1471	  return true;
1472	}
1473    }
1474
1475  if (!reload_in_progress && !reload_completed)
1476    {
1477      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1478
1479      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1480	{
1481	  rtx memt, memx, in = operands[1];
1482	  if (CONSTANT_P (in))
1483	    in = validize_mem (force_const_mem (mode, in));
1484	  if (GET_CODE (in) == MEM)
1485	    memt = adjust_address (in, TImode, 0);
1486	  else
1487	    {
1488	      memt = assign_stack_temp (TImode, 16, 0);
1489	      memx = adjust_address (memt, mode, 0);
1490	      emit_move_insn (memx, in);
1491	    }
1492	  emit_move_insn (op0, memt);
1493	  return true;
1494	}
1495
1496      if (!ia64_move_ok (operands[0], operands[1]))
1497	operands[1] = force_reg (mode, operands[1]);
1498    }
1499
1500  return false;
1501}
1502
1503/* Emit comparison instruction if necessary, returning the expression
1504   that holds the compare result in the proper mode.  */
1505
1506static GTY(()) rtx cmptf_libfunc;
1507
1508rtx
1509ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1510{
1511  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1512  rtx cmp;
1513
1514  /* If we have a BImode input, then we already have a compare result, and
1515     do not need to emit another comparison.  */
1516  if (GET_MODE (op0) == BImode)
1517    {
1518      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1519      cmp = op0;
1520    }
1521  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1522     magic number as its third argument, that indicates what to do.
1523     The return value is an integer to be compared against zero.  */
1524  else if (GET_MODE (op0) == TFmode)
1525    {
1526      enum qfcmp_magic {
1527	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1528	QCMP_UNORD = 2,
1529	QCMP_EQ = 4,
1530	QCMP_LT = 8,
1531	QCMP_GT = 16
1532      } magic;
1533      enum rtx_code ncode;
1534      rtx ret, insns;
1535
1536      gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1537      switch (code)
1538	{
1539	  /* 1 = equal, 0 = not equal.  Equality operators do
1540	     not raise FP_INVALID when given an SNaN operand.  */
1541	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1542	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1543	  /* isunordered() from C99.  */
1544	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1545	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1546	  /* Relational operators raise FP_INVALID when given
1547	     an SNaN operand.  */
1548	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1549	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1550	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1551	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1552	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1553	     Expanders for buneq etc. weuld have to be added to ia64.md
1554	     for this to be useful.  */
1555	default: gcc_unreachable ();
1556	}
1557
1558      start_sequence ();
1559
1560      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1561				     op0, TFmode, op1, TFmode,
1562				     GEN_INT (magic), DImode);
1563      cmp = gen_reg_rtx (BImode);
1564      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1565			      gen_rtx_fmt_ee (ncode, BImode,
1566					      ret, const0_rtx)));
1567
1568      insns = get_insns ();
1569      end_sequence ();
1570
1571      emit_libcall_block (insns, cmp, cmp,
1572			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1573      code = NE;
1574    }
1575  else
1576    {
1577      cmp = gen_reg_rtx (BImode);
1578      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1579			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1580      code = NE;
1581    }
1582
1583  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1584}
1585
1586/* Generate an integral vector comparison.  Return true if the condition has
1587   been reversed, and so the sense of the comparison should be inverted.  */
1588
1589static bool
1590ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1591			    rtx dest, rtx op0, rtx op1)
1592{
1593  bool negate = false;
1594  rtx x;
1595
1596  /* Canonicalize the comparison to EQ, GT, GTU.  */
1597  switch (code)
1598    {
1599    case EQ:
1600    case GT:
1601    case GTU:
1602      break;
1603
1604    case NE:
1605    case LE:
1606    case LEU:
1607      code = reverse_condition (code);
1608      negate = true;
1609      break;
1610
1611    case GE:
1612    case GEU:
1613      code = reverse_condition (code);
1614      negate = true;
1615      /* FALLTHRU */
1616
1617    case LT:
1618    case LTU:
1619      code = swap_condition (code);
1620      x = op0, op0 = op1, op1 = x;
1621      break;
1622
1623    default:
1624      gcc_unreachable ();
1625    }
1626
1627  /* Unsigned parallel compare is not supported by the hardware.  Play some
1628     tricks to turn this into a signed comparison against 0.  */
1629  if (code == GTU)
1630    {
1631      switch (mode)
1632	{
1633	case V2SImode:
1634	  {
1635	    rtx t1, t2, mask;
1636
1637	    /* Perform a parallel modulo subtraction.  */
1638	    t1 = gen_reg_rtx (V2SImode);
1639	    emit_insn (gen_subv2si3 (t1, op0, op1));
1640
1641	    /* Extract the original sign bit of op0.  */
1642	    mask = GEN_INT (-0x80000000);
1643	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1644	    mask = force_reg (V2SImode, mask);
1645	    t2 = gen_reg_rtx (V2SImode);
1646	    emit_insn (gen_andv2si3 (t2, op0, mask));
1647
1648	    /* XOR it back into the result of the subtraction.  This results
1649	       in the sign bit set iff we saw unsigned underflow.  */
1650	    x = gen_reg_rtx (V2SImode);
1651	    emit_insn (gen_xorv2si3 (x, t1, t2));
1652
1653	    code = GT;
1654	    op0 = x;
1655	    op1 = CONST0_RTX (mode);
1656	  }
1657	  break;
1658
1659	case V8QImode:
1660	case V4HImode:
1661	  /* Perform a parallel unsigned saturating subtraction.  */
1662	  x = gen_reg_rtx (mode);
1663	  emit_insn (gen_rtx_SET (VOIDmode, x,
1664				  gen_rtx_US_MINUS (mode, op0, op1)));
1665
1666	  code = EQ;
1667	  op0 = x;
1668	  op1 = CONST0_RTX (mode);
1669	  negate = !negate;
1670	  break;
1671
1672	default:
1673	  gcc_unreachable ();
1674	}
1675    }
1676
1677  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1678  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1679
1680  return negate;
1681}
1682
1683/* Emit an integral vector conditional move.  */
1684
1685void
1686ia64_expand_vecint_cmov (rtx operands[])
1687{
1688  enum machine_mode mode = GET_MODE (operands[0]);
1689  enum rtx_code code = GET_CODE (operands[3]);
1690  bool negate;
1691  rtx cmp, x, ot, of;
1692
1693  cmp = gen_reg_rtx (mode);
1694  negate = ia64_expand_vecint_compare (code, mode, cmp,
1695				       operands[4], operands[5]);
1696
1697  ot = operands[1+negate];
1698  of = operands[2-negate];
1699
1700  if (ot == CONST0_RTX (mode))
1701    {
1702      if (of == CONST0_RTX (mode))
1703	{
1704	  emit_move_insn (operands[0], ot);
1705	  return;
1706	}
1707
1708      x = gen_rtx_NOT (mode, cmp);
1709      x = gen_rtx_AND (mode, x, of);
1710      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1711    }
1712  else if (of == CONST0_RTX (mode))
1713    {
1714      x = gen_rtx_AND (mode, cmp, ot);
1715      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1716    }
1717  else
1718    {
1719      rtx t, f;
1720
1721      t = gen_reg_rtx (mode);
1722      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1723      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1724
1725      f = gen_reg_rtx (mode);
1726      x = gen_rtx_NOT (mode, cmp);
1727      x = gen_rtx_AND (mode, x, operands[2-negate]);
1728      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1729
1730      x = gen_rtx_IOR (mode, t, f);
1731      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1732    }
1733}
1734
1735/* Emit an integral vector min or max operation.  Return true if all done.  */
1736
1737bool
1738ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1739			   rtx operands[])
1740{
1741  rtx xops[6];
1742
1743  /* These four combinations are supported directly.  */
1744  if (mode == V8QImode && (code == UMIN || code == UMAX))
1745    return false;
1746  if (mode == V4HImode && (code == SMIN || code == SMAX))
1747    return false;
1748
1749  /* This combination can be implemented with only saturating subtraction.  */
1750  if (mode == V4HImode && code == UMAX)
1751    {
1752      rtx x, tmp = gen_reg_rtx (mode);
1753
1754      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1755      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1756
1757      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1758      return true;
1759    }
1760
1761  /* Everything else implemented via vector comparisons.  */
1762  xops[0] = operands[0];
1763  xops[4] = xops[1] = operands[1];
1764  xops[5] = xops[2] = operands[2];
1765
1766  switch (code)
1767    {
1768    case UMIN:
1769      code = LTU;
1770      break;
1771    case UMAX:
1772      code = GTU;
1773      break;
1774    case SMIN:
1775      code = LT;
1776      break;
1777    case SMAX:
1778      code = GT;
1779      break;
1780    default:
1781      gcc_unreachable ();
1782    }
1783  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1784
1785  ia64_expand_vecint_cmov (xops);
1786  return true;
1787}
1788
1789/* Emit an integral vector widening sum operations.  */
1790
1791void
1792ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1793{
1794  rtx l, h, x, s;
1795  enum machine_mode wmode, mode;
1796  rtx (*unpack_l) (rtx, rtx, rtx);
1797  rtx (*unpack_h) (rtx, rtx, rtx);
1798  rtx (*plus) (rtx, rtx, rtx);
1799
1800  wmode = GET_MODE (operands[0]);
1801  mode = GET_MODE (operands[1]);
1802
1803  switch (mode)
1804    {
1805    case V8QImode:
1806      unpack_l = gen_unpack1_l;
1807      unpack_h = gen_unpack1_h;
1808      plus = gen_addv4hi3;
1809      break;
1810    case V4HImode:
1811      unpack_l = gen_unpack2_l;
1812      unpack_h = gen_unpack2_h;
1813      plus = gen_addv2si3;
1814      break;
1815    default:
1816      gcc_unreachable ();
1817    }
1818
1819  /* Fill in x with the sign extension of each element in op1.  */
1820  if (unsignedp)
1821    x = CONST0_RTX (mode);
1822  else
1823    {
1824      bool neg;
1825
1826      x = gen_reg_rtx (mode);
1827
1828      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1829					CONST0_RTX (mode));
1830      gcc_assert (!neg);
1831    }
1832
1833  l = gen_reg_rtx (wmode);
1834  h = gen_reg_rtx (wmode);
1835  s = gen_reg_rtx (wmode);
1836
1837  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1838  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1839  emit_insn (plus (s, l, operands[2]));
1840  emit_insn (plus (operands[0], h, s));
1841}
1842
1843/* Emit a signed or unsigned V8QI dot product operation.  */
1844
1845void
1846ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1847{
1848  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1849
1850  /* Fill in x1 and x2 with the sign extension of each element.  */
1851  if (unsignedp)
1852    x1 = x2 = CONST0_RTX (V8QImode);
1853  else
1854    {
1855      bool neg;
1856
1857      x1 = gen_reg_rtx (V8QImode);
1858      x2 = gen_reg_rtx (V8QImode);
1859
1860      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1861					CONST0_RTX (V8QImode));
1862      gcc_assert (!neg);
1863      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1864					CONST0_RTX (V8QImode));
1865      gcc_assert (!neg);
1866    }
1867
1868  l1 = gen_reg_rtx (V4HImode);
1869  l2 = gen_reg_rtx (V4HImode);
1870  h1 = gen_reg_rtx (V4HImode);
1871  h2 = gen_reg_rtx (V4HImode);
1872
1873  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1874  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1875  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1876  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1877
1878  p1 = gen_reg_rtx (V2SImode);
1879  p2 = gen_reg_rtx (V2SImode);
1880  p3 = gen_reg_rtx (V2SImode);
1881  p4 = gen_reg_rtx (V2SImode);
1882  emit_insn (gen_pmpy2_r (p1, l1, l2));
1883  emit_insn (gen_pmpy2_l (p2, l1, l2));
1884  emit_insn (gen_pmpy2_r (p3, h1, h2));
1885  emit_insn (gen_pmpy2_l (p4, h1, h2));
1886
1887  s1 = gen_reg_rtx (V2SImode);
1888  s2 = gen_reg_rtx (V2SImode);
1889  s3 = gen_reg_rtx (V2SImode);
1890  emit_insn (gen_addv2si3 (s1, p1, p2));
1891  emit_insn (gen_addv2si3 (s2, p3, p4));
1892  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1893  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1894}
1895
1896/* Emit the appropriate sequence for a call.  */
1897
1898void
1899ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1900		  int sibcall_p)
1901{
1902  rtx insn, b0;
1903
1904  addr = XEXP (addr, 0);
1905  addr = convert_memory_address (DImode, addr);
1906  b0 = gen_rtx_REG (DImode, R_BR (0));
1907
1908  /* ??? Should do this for functions known to bind local too.  */
1909  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1910    {
1911      if (sibcall_p)
1912	insn = gen_sibcall_nogp (addr);
1913      else if (! retval)
1914	insn = gen_call_nogp (addr, b0);
1915      else
1916	insn = gen_call_value_nogp (retval, addr, b0);
1917      insn = emit_call_insn (insn);
1918    }
1919  else
1920    {
1921      if (sibcall_p)
1922	insn = gen_sibcall_gp (addr);
1923      else if (! retval)
1924	insn = gen_call_gp (addr, b0);
1925      else
1926	insn = gen_call_value_gp (retval, addr, b0);
1927      insn = emit_call_insn (insn);
1928
1929      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1930    }
1931
1932  if (sibcall_p)
1933    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1934}
1935
1936void
1937ia64_reload_gp (void)
1938{
1939  rtx tmp;
1940
1941  if (current_frame_info.reg_save_gp)
1942    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1943  else
1944    {
1945      HOST_WIDE_INT offset;
1946
1947      offset = (current_frame_info.spill_cfa_off
1948	        + current_frame_info.spill_size);
1949      if (frame_pointer_needed)
1950        {
1951          tmp = hard_frame_pointer_rtx;
1952          offset = -offset;
1953        }
1954      else
1955        {
1956          tmp = stack_pointer_rtx;
1957          offset = current_frame_info.total_size - offset;
1958        }
1959
1960      if (CONST_OK_FOR_I (offset))
1961        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1962			       tmp, GEN_INT (offset)));
1963      else
1964        {
1965          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1966          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1967			         pic_offset_table_rtx, tmp));
1968        }
1969
1970      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1971    }
1972
1973  emit_move_insn (pic_offset_table_rtx, tmp);
1974}
1975
1976void
1977ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
1978		 rtx scratch_b, int noreturn_p, int sibcall_p)
1979{
1980  rtx insn;
1981  bool is_desc = false;
1982
1983  /* If we find we're calling through a register, then we're actually
1984     calling through a descriptor, so load up the values.  */
1985  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
1986    {
1987      rtx tmp;
1988      bool addr_dead_p;
1989
1990      /* ??? We are currently constrained to *not* use peep2, because
1991	 we can legitimately change the global lifetime of the GP
1992	 (in the form of killing where previously live).  This is
1993	 because a call through a descriptor doesn't use the previous
1994	 value of the GP, while a direct call does, and we do not
1995	 commit to either form until the split here.
1996
1997	 That said, this means that we lack precise life info for
1998	 whether ADDR is dead after this call.  This is not terribly
1999	 important, since we can fix things up essentially for free
2000	 with the POST_DEC below, but it's nice to not use it when we
2001	 can immediately tell it's not necessary.  */
2002      addr_dead_p = ((noreturn_p || sibcall_p
2003		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2004					    REGNO (addr)))
2005		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2006
2007      /* Load the code address into scratch_b.  */
2008      tmp = gen_rtx_POST_INC (Pmode, addr);
2009      tmp = gen_rtx_MEM (Pmode, tmp);
2010      emit_move_insn (scratch_r, tmp);
2011      emit_move_insn (scratch_b, scratch_r);
2012
2013      /* Load the GP address.  If ADDR is not dead here, then we must
2014	 revert the change made above via the POST_INCREMENT.  */
2015      if (!addr_dead_p)
2016	tmp = gen_rtx_POST_DEC (Pmode, addr);
2017      else
2018	tmp = addr;
2019      tmp = gen_rtx_MEM (Pmode, tmp);
2020      emit_move_insn (pic_offset_table_rtx, tmp);
2021
2022      is_desc = true;
2023      addr = scratch_b;
2024    }
2025
2026  if (sibcall_p)
2027    insn = gen_sibcall_nogp (addr);
2028  else if (retval)
2029    insn = gen_call_value_nogp (retval, addr, retaddr);
2030  else
2031    insn = gen_call_nogp (addr, retaddr);
2032  emit_call_insn (insn);
2033
2034  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2035    ia64_reload_gp ();
2036}
2037
2038/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2039
2040   This differs from the generic code in that we know about the zero-extending
2041   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2042   also know that ld.acq+cmpxchg.rel equals a full barrier.
2043
2044   The loop we want to generate looks like
2045
2046	cmp_reg = mem;
2047      label:
2048        old_reg = cmp_reg;
2049	new_reg = cmp_reg op val;
2050	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2051	if (cmp_reg != old_reg)
2052	  goto label;
2053
2054   Note that we only do the plain load from memory once.  Subsequent
2055   iterations use the value loaded by the compare-and-swap pattern.  */
2056
2057void
2058ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2059		       rtx old_dst, rtx new_dst)
2060{
2061  enum machine_mode mode = GET_MODE (mem);
2062  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2063  enum insn_code icode;
2064
2065  /* Special case for using fetchadd.  */
2066  if ((mode == SImode || mode == DImode)
2067      && (code == PLUS || code == MINUS)
2068      && fetchadd_operand (val, mode))
2069    {
2070      if (code == MINUS)
2071	val = GEN_INT (-INTVAL (val));
2072
2073      if (!old_dst)
2074        old_dst = gen_reg_rtx (mode);
2075
2076      emit_insn (gen_memory_barrier ());
2077
2078      if (mode == SImode)
2079	icode = CODE_FOR_fetchadd_acq_si;
2080      else
2081	icode = CODE_FOR_fetchadd_acq_di;
2082      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2083
2084      if (new_dst)
2085	{
2086	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2087					 true, OPTAB_WIDEN);
2088	  if (new_reg != new_dst)
2089	    emit_move_insn (new_dst, new_reg);
2090	}
2091      return;
2092    }
2093
2094  /* Because of the volatile mem read, we get an ld.acq, which is the
2095     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2096  gcc_assert (MEM_VOLATILE_P (mem));
2097
2098  old_reg = gen_reg_rtx (DImode);
2099  cmp_reg = gen_reg_rtx (DImode);
2100  label = gen_label_rtx ();
2101
2102  if (mode != DImode)
2103    {
2104      val = simplify_gen_subreg (DImode, val, mode, 0);
2105      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2106    }
2107  else
2108    emit_move_insn (cmp_reg, mem);
2109
2110  emit_label (label);
2111
2112  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2113  emit_move_insn (old_reg, cmp_reg);
2114  emit_move_insn (ar_ccv, cmp_reg);
2115
2116  if (old_dst)
2117    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2118
2119  new_reg = cmp_reg;
2120  if (code == NOT)
2121    {
2122      new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2123      code = AND;
2124    }
2125  new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2126				 true, OPTAB_DIRECT);
2127
2128  if (mode != DImode)
2129    new_reg = gen_lowpart (mode, new_reg);
2130  if (new_dst)
2131    emit_move_insn (new_dst, new_reg);
2132
2133  switch (mode)
2134    {
2135    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2136    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2137    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2138    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2139    default:
2140      gcc_unreachable ();
2141    }
2142
2143  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2144
2145  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2146}
2147
2148/* Begin the assembly file.  */
2149
2150static void
2151ia64_file_start (void)
2152{
2153  /* Variable tracking should be run after all optimizations which change order
2154     of insns.  It also needs a valid CFG.  This can't be done in
2155     ia64_override_options, because flag_var_tracking is finalized after
2156     that.  */
2157  ia64_flag_var_tracking = flag_var_tracking;
2158  flag_var_tracking = 0;
2159
2160  default_file_start ();
2161  emit_safe_across_calls ();
2162}
2163
2164void
2165emit_safe_across_calls (void)
2166{
2167  unsigned int rs, re;
2168  int out_state;
2169
2170  rs = 1;
2171  out_state = 0;
2172  while (1)
2173    {
2174      while (rs < 64 && call_used_regs[PR_REG (rs)])
2175	rs++;
2176      if (rs >= 64)
2177	break;
2178      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2179	continue;
2180      if (out_state == 0)
2181	{
2182	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2183	  out_state = 1;
2184	}
2185      else
2186	fputc (',', asm_out_file);
2187      if (re == rs + 1)
2188	fprintf (asm_out_file, "p%u", rs);
2189      else
2190	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2191      rs = re + 1;
2192    }
2193  if (out_state)
2194    fputc ('\n', asm_out_file);
2195}
2196
2197/* Helper function for ia64_compute_frame_size: find an appropriate general
2198   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2199   bits in GR0 to GR31 that have already been allocated by this routine.
2200   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2201
2202static int
2203find_gr_spill (int try_locals)
2204{
2205  int regno;
2206
2207  /* If this is a leaf function, first try an otherwise unused
2208     call-clobbered register.  */
2209  if (current_function_is_leaf)
2210    {
2211      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2212	if (! regs_ever_live[regno]
2213	    && call_used_regs[regno]
2214	    && ! fixed_regs[regno]
2215	    && ! global_regs[regno]
2216	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2217	  {
2218	    current_frame_info.gr_used_mask |= 1 << regno;
2219	    return regno;
2220	  }
2221    }
2222
2223  if (try_locals)
2224    {
2225      regno = current_frame_info.n_local_regs;
2226      /* If there is a frame pointer, then we can't use loc79, because
2227	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2228	 reg_name switching code in ia64_expand_prologue.  */
2229      if (regno < (80 - frame_pointer_needed))
2230	{
2231	  current_frame_info.n_local_regs = regno + 1;
2232	  return LOC_REG (0) + regno;
2233	}
2234    }
2235
2236  /* Failed to find a general register to spill to.  Must use stack.  */
2237  return 0;
2238}
2239
2240/* In order to make for nice schedules, we try to allocate every temporary
2241   to a different register.  We must of course stay away from call-saved,
2242   fixed, and global registers.  We must also stay away from registers
2243   allocated in current_frame_info.gr_used_mask, since those include regs
2244   used all through the prologue.
2245
2246   Any register allocated here must be used immediately.  The idea is to
2247   aid scheduling, not to solve data flow problems.  */
2248
2249static int last_scratch_gr_reg;
2250
2251static int
2252next_scratch_gr_reg (void)
2253{
2254  int i, regno;
2255
2256  for (i = 0; i < 32; ++i)
2257    {
2258      regno = (last_scratch_gr_reg + i + 1) & 31;
2259      if (call_used_regs[regno]
2260	  && ! fixed_regs[regno]
2261	  && ! global_regs[regno]
2262	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2263	{
2264	  last_scratch_gr_reg = regno;
2265	  return regno;
2266	}
2267    }
2268
2269  /* There must be _something_ available.  */
2270  gcc_unreachable ();
2271}
2272
2273/* Helper function for ia64_compute_frame_size, called through
2274   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2275
2276static void
2277mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2278{
2279  unsigned int regno = REGNO (reg);
2280  if (regno < 32)
2281    {
2282      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2283      for (i = 0; i < n; ++i)
2284	current_frame_info.gr_used_mask |= 1 << (regno + i);
2285    }
2286}
2287
2288/* Returns the number of bytes offset between the frame pointer and the stack
2289   pointer for the current function.  SIZE is the number of bytes of space
2290   needed for local variables.  */
2291
2292static void
2293ia64_compute_frame_size (HOST_WIDE_INT size)
2294{
2295  HOST_WIDE_INT total_size;
2296  HOST_WIDE_INT spill_size = 0;
2297  HOST_WIDE_INT extra_spill_size = 0;
2298  HOST_WIDE_INT pretend_args_size;
2299  HARD_REG_SET mask;
2300  int n_spilled = 0;
2301  int spilled_gr_p = 0;
2302  int spilled_fr_p = 0;
2303  unsigned int regno;
2304  int i;
2305
2306  if (current_frame_info.initialized)
2307    return;
2308
2309  memset (&current_frame_info, 0, sizeof current_frame_info);
2310  CLEAR_HARD_REG_SET (mask);
2311
2312  /* Don't allocate scratches to the return register.  */
2313  diddle_return_value (mark_reg_gr_used_mask, NULL);
2314
2315  /* Don't allocate scratches to the EH scratch registers.  */
2316  if (cfun->machine->ia64_eh_epilogue_sp)
2317    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2318  if (cfun->machine->ia64_eh_epilogue_bsp)
2319    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2320
2321  /* Find the size of the register stack frame.  We have only 80 local
2322     registers, because we reserve 8 for the inputs and 8 for the
2323     outputs.  */
2324
2325  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2326     since we'll be adjusting that down later.  */
2327  regno = LOC_REG (78) + ! frame_pointer_needed;
2328  for (; regno >= LOC_REG (0); regno--)
2329    if (regs_ever_live[regno])
2330      break;
2331  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2332
2333  /* For functions marked with the syscall_linkage attribute, we must mark
2334     all eight input registers as in use, so that locals aren't visible to
2335     the caller.  */
2336
2337  if (cfun->machine->n_varargs > 0
2338      || lookup_attribute ("syscall_linkage",
2339			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2340    current_frame_info.n_input_regs = 8;
2341  else
2342    {
2343      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2344	if (regs_ever_live[regno])
2345	  break;
2346      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2347    }
2348
2349  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2350    if (regs_ever_live[regno])
2351      break;
2352  i = regno - OUT_REG (0) + 1;
2353
2354#ifndef PROFILE_HOOK
2355  /* When -p profiling, we need one output register for the mcount argument.
2356     Likewise for -a profiling for the bb_init_func argument.  For -ax
2357     profiling, we need two output registers for the two bb_init_trace_func
2358     arguments.  */
2359  if (current_function_profile)
2360    i = MAX (i, 1);
2361#endif
2362  current_frame_info.n_output_regs = i;
2363
2364  /* ??? No rotating register support yet.  */
2365  current_frame_info.n_rotate_regs = 0;
2366
2367  /* Discover which registers need spilling, and how much room that
2368     will take.  Begin with floating point and general registers,
2369     which will always wind up on the stack.  */
2370
2371  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2372    if (regs_ever_live[regno] && ! call_used_regs[regno])
2373      {
2374	SET_HARD_REG_BIT (mask, regno);
2375	spill_size += 16;
2376	n_spilled += 1;
2377	spilled_fr_p = 1;
2378      }
2379
2380  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2381    if (regs_ever_live[regno] && ! call_used_regs[regno])
2382      {
2383	SET_HARD_REG_BIT (mask, regno);
2384	spill_size += 8;
2385	n_spilled += 1;
2386	spilled_gr_p = 1;
2387      }
2388
2389  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2390    if (regs_ever_live[regno] && ! call_used_regs[regno])
2391      {
2392	SET_HARD_REG_BIT (mask, regno);
2393	spill_size += 8;
2394	n_spilled += 1;
2395      }
2396
2397  /* Now come all special registers that might get saved in other
2398     general registers.  */
2399
2400  if (frame_pointer_needed)
2401    {
2402      current_frame_info.reg_fp = find_gr_spill (1);
2403      /* If we did not get a register, then we take LOC79.  This is guaranteed
2404	 to be free, even if regs_ever_live is already set, because this is
2405	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2406	 as we don't count loc79 above.  */
2407      if (current_frame_info.reg_fp == 0)
2408	{
2409	  current_frame_info.reg_fp = LOC_REG (79);
2410	  current_frame_info.n_local_regs++;
2411	}
2412    }
2413
2414  if (! current_function_is_leaf)
2415    {
2416      /* Emit a save of BR0 if we call other functions.  Do this even
2417	 if this function doesn't return, as EH depends on this to be
2418	 able to unwind the stack.  */
2419      SET_HARD_REG_BIT (mask, BR_REG (0));
2420
2421      current_frame_info.reg_save_b0 = find_gr_spill (1);
2422      if (current_frame_info.reg_save_b0 == 0)
2423	{
2424	  spill_size += 8;
2425	  n_spilled += 1;
2426	}
2427
2428      /* Similarly for ar.pfs.  */
2429      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2430      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2431      if (current_frame_info.reg_save_ar_pfs == 0)
2432	{
2433	  extra_spill_size += 8;
2434	  n_spilled += 1;
2435	}
2436
2437      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2438	 registers are clobbered, so we fall back to the stack.  */
2439      current_frame_info.reg_save_gp
2440	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2441      if (current_frame_info.reg_save_gp == 0)
2442	{
2443	  SET_HARD_REG_BIT (mask, GR_REG (1));
2444	  spill_size += 8;
2445	  n_spilled += 1;
2446	}
2447    }
2448  else
2449    {
2450      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2451	{
2452	  SET_HARD_REG_BIT (mask, BR_REG (0));
2453	  spill_size += 8;
2454	  n_spilled += 1;
2455	}
2456
2457      if (regs_ever_live[AR_PFS_REGNUM])
2458	{
2459	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2460	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2461	  if (current_frame_info.reg_save_ar_pfs == 0)
2462	    {
2463	      extra_spill_size += 8;
2464	      n_spilled += 1;
2465	    }
2466	}
2467    }
2468
2469  /* Unwind descriptor hackery: things are most efficient if we allocate
2470     consecutive GR save registers for RP, PFS, FP in that order. However,
2471     it is absolutely critical that FP get the only hard register that's
2472     guaranteed to be free, so we allocated it first.  If all three did
2473     happen to be allocated hard regs, and are consecutive, rearrange them
2474     into the preferred order now.  */
2475  if (current_frame_info.reg_fp != 0
2476      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2477      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2478    {
2479      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2480      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2481      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2482    }
2483
2484  /* See if we need to store the predicate register block.  */
2485  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2486    if (regs_ever_live[regno] && ! call_used_regs[regno])
2487      break;
2488  if (regno <= PR_REG (63))
2489    {
2490      SET_HARD_REG_BIT (mask, PR_REG (0));
2491      current_frame_info.reg_save_pr = find_gr_spill (1);
2492      if (current_frame_info.reg_save_pr == 0)
2493	{
2494	  extra_spill_size += 8;
2495	  n_spilled += 1;
2496	}
2497
2498      /* ??? Mark them all as used so that register renaming and such
2499	 are free to use them.  */
2500      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2501	regs_ever_live[regno] = 1;
2502    }
2503
2504  /* If we're forced to use st8.spill, we're forced to save and restore
2505     ar.unat as well.  The check for existing liveness allows inline asm
2506     to touch ar.unat.  */
2507  if (spilled_gr_p || cfun->machine->n_varargs
2508      || regs_ever_live[AR_UNAT_REGNUM])
2509    {
2510      regs_ever_live[AR_UNAT_REGNUM] = 1;
2511      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2512      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2513      if (current_frame_info.reg_save_ar_unat == 0)
2514	{
2515	  extra_spill_size += 8;
2516	  n_spilled += 1;
2517	}
2518    }
2519
2520  if (regs_ever_live[AR_LC_REGNUM])
2521    {
2522      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2523      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2524      if (current_frame_info.reg_save_ar_lc == 0)
2525	{
2526	  extra_spill_size += 8;
2527	  n_spilled += 1;
2528	}
2529    }
2530
2531  /* If we have an odd number of words of pretend arguments written to
2532     the stack, then the FR save area will be unaligned.  We round the
2533     size of this area up to keep things 16 byte aligned.  */
2534  if (spilled_fr_p)
2535    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2536  else
2537    pretend_args_size = current_function_pretend_args_size;
2538
2539  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2540		+ current_function_outgoing_args_size);
2541  total_size = IA64_STACK_ALIGN (total_size);
2542
2543  /* We always use the 16-byte scratch area provided by the caller, but
2544     if we are a leaf function, there's no one to which we need to provide
2545     a scratch area.  */
2546  if (current_function_is_leaf)
2547    total_size = MAX (0, total_size - 16);
2548
2549  current_frame_info.total_size = total_size;
2550  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2551  current_frame_info.spill_size = spill_size;
2552  current_frame_info.extra_spill_size = extra_spill_size;
2553  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2554  current_frame_info.n_spilled = n_spilled;
2555  current_frame_info.initialized = reload_completed;
2556}
2557
2558/* Compute the initial difference between the specified pair of registers.  */
2559
2560HOST_WIDE_INT
2561ia64_initial_elimination_offset (int from, int to)
2562{
2563  HOST_WIDE_INT offset;
2564
2565  ia64_compute_frame_size (get_frame_size ());
2566  switch (from)
2567    {
2568    case FRAME_POINTER_REGNUM:
2569      switch (to)
2570	{
2571	case HARD_FRAME_POINTER_REGNUM:
2572	  if (current_function_is_leaf)
2573	    offset = -current_frame_info.total_size;
2574	  else
2575	    offset = -(current_frame_info.total_size
2576		       - current_function_outgoing_args_size - 16);
2577	  break;
2578
2579	case STACK_POINTER_REGNUM:
2580	  if (current_function_is_leaf)
2581	    offset = 0;
2582	  else
2583	    offset = 16 + current_function_outgoing_args_size;
2584	  break;
2585
2586	default:
2587	  gcc_unreachable ();
2588	}
2589      break;
2590
2591    case ARG_POINTER_REGNUM:
2592      /* Arguments start above the 16 byte save area, unless stdarg
2593	 in which case we store through the 16 byte save area.  */
2594      switch (to)
2595	{
2596	case HARD_FRAME_POINTER_REGNUM:
2597	  offset = 16 - current_function_pretend_args_size;
2598	  break;
2599
2600	case STACK_POINTER_REGNUM:
2601	  offset = (current_frame_info.total_size
2602		    + 16 - current_function_pretend_args_size);
2603	  break;
2604
2605	default:
2606	  gcc_unreachable ();
2607	}
2608      break;
2609
2610    default:
2611      gcc_unreachable ();
2612    }
2613
2614  return offset;
2615}
2616
2617/* If there are more than a trivial number of register spills, we use
2618   two interleaved iterators so that we can get two memory references
2619   per insn group.
2620
2621   In order to simplify things in the prologue and epilogue expanders,
2622   we use helper functions to fix up the memory references after the
2623   fact with the appropriate offsets to a POST_MODIFY memory mode.
2624   The following data structure tracks the state of the two iterators
2625   while insns are being emitted.  */
2626
2627struct spill_fill_data
2628{
2629  rtx init_after;		/* point at which to emit initializations */
2630  rtx init_reg[2];		/* initial base register */
2631  rtx iter_reg[2];		/* the iterator registers */
2632  rtx *prev_addr[2];		/* address of last memory use */
2633  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2634  HOST_WIDE_INT prev_off[2];	/* last offset */
2635  int n_iter;			/* number of iterators in use */
2636  int next_iter;		/* next iterator to use */
2637  unsigned int save_gr_used_mask;
2638};
2639
2640static struct spill_fill_data spill_fill_data;
2641
2642static void
2643setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2644{
2645  int i;
2646
2647  spill_fill_data.init_after = get_last_insn ();
2648  spill_fill_data.init_reg[0] = init_reg;
2649  spill_fill_data.init_reg[1] = init_reg;
2650  spill_fill_data.prev_addr[0] = NULL;
2651  spill_fill_data.prev_addr[1] = NULL;
2652  spill_fill_data.prev_insn[0] = NULL;
2653  spill_fill_data.prev_insn[1] = NULL;
2654  spill_fill_data.prev_off[0] = cfa_off;
2655  spill_fill_data.prev_off[1] = cfa_off;
2656  spill_fill_data.next_iter = 0;
2657  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2658
2659  spill_fill_data.n_iter = 1 + (n_spills > 2);
2660  for (i = 0; i < spill_fill_data.n_iter; ++i)
2661    {
2662      int regno = next_scratch_gr_reg ();
2663      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2664      current_frame_info.gr_used_mask |= 1 << regno;
2665    }
2666}
2667
2668static void
2669finish_spill_pointers (void)
2670{
2671  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2672}
2673
2674static rtx
2675spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2676{
2677  int iter = spill_fill_data.next_iter;
2678  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2679  rtx disp_rtx = GEN_INT (disp);
2680  rtx mem;
2681
2682  if (spill_fill_data.prev_addr[iter])
2683    {
2684      if (CONST_OK_FOR_N (disp))
2685	{
2686	  *spill_fill_data.prev_addr[iter]
2687	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2688				   gen_rtx_PLUS (DImode,
2689						 spill_fill_data.iter_reg[iter],
2690						 disp_rtx));
2691	  REG_NOTES (spill_fill_data.prev_insn[iter])
2692	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2693				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2694	}
2695      else
2696	{
2697	  /* ??? Could use register post_modify for loads.  */
2698	  if (! CONST_OK_FOR_I (disp))
2699	    {
2700	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2701	      emit_move_insn (tmp, disp_rtx);
2702	      disp_rtx = tmp;
2703	    }
2704	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2705				 spill_fill_data.iter_reg[iter], disp_rtx));
2706	}
2707    }
2708  /* Micro-optimization: if we've created a frame pointer, it's at
2709     CFA 0, which may allow the real iterator to be initialized lower,
2710     slightly increasing parallelism.  Also, if there are few saves
2711     it may eliminate the iterator entirely.  */
2712  else if (disp == 0
2713	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2714	   && frame_pointer_needed)
2715    {
2716      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2717      set_mem_alias_set (mem, get_varargs_alias_set ());
2718      return mem;
2719    }
2720  else
2721    {
2722      rtx seq, insn;
2723
2724      if (disp == 0)
2725	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2726			 spill_fill_data.init_reg[iter]);
2727      else
2728	{
2729	  start_sequence ();
2730
2731	  if (! CONST_OK_FOR_I (disp))
2732	    {
2733	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2734	      emit_move_insn (tmp, disp_rtx);
2735	      disp_rtx = tmp;
2736	    }
2737
2738	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2739				 spill_fill_data.init_reg[iter],
2740				 disp_rtx));
2741
2742	  seq = get_insns ();
2743	  end_sequence ();
2744	}
2745
2746      /* Careful for being the first insn in a sequence.  */
2747      if (spill_fill_data.init_after)
2748	insn = emit_insn_after (seq, spill_fill_data.init_after);
2749      else
2750	{
2751	  rtx first = get_insns ();
2752	  if (first)
2753	    insn = emit_insn_before (seq, first);
2754	  else
2755	    insn = emit_insn (seq);
2756	}
2757      spill_fill_data.init_after = insn;
2758
2759      /* If DISP is 0, we may or may not have a further adjustment
2760	 afterward.  If we do, then the load/store insn may be modified
2761	 to be a post-modify.  If we don't, then this copy may be
2762	 eliminated by copyprop_hardreg_forward, which makes this
2763	 insn garbage, which runs afoul of the sanity check in
2764	 propagate_one_insn.  So mark this insn as legal to delete.  */
2765      if (disp == 0)
2766	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2767					     REG_NOTES (insn));
2768    }
2769
2770  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2771
2772  /* ??? Not all of the spills are for varargs, but some of them are.
2773     The rest of the spills belong in an alias set of their own.  But
2774     it doesn't actually hurt to include them here.  */
2775  set_mem_alias_set (mem, get_varargs_alias_set ());
2776
2777  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2778  spill_fill_data.prev_off[iter] = cfa_off;
2779
2780  if (++iter >= spill_fill_data.n_iter)
2781    iter = 0;
2782  spill_fill_data.next_iter = iter;
2783
2784  return mem;
2785}
2786
2787static void
2788do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2789	  rtx frame_reg)
2790{
2791  int iter = spill_fill_data.next_iter;
2792  rtx mem, insn;
2793
2794  mem = spill_restore_mem (reg, cfa_off);
2795  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2796  spill_fill_data.prev_insn[iter] = insn;
2797
2798  if (frame_reg)
2799    {
2800      rtx base;
2801      HOST_WIDE_INT off;
2802
2803      RTX_FRAME_RELATED_P (insn) = 1;
2804
2805      /* Don't even pretend that the unwind code can intuit its way
2806	 through a pair of interleaved post_modify iterators.  Just
2807	 provide the correct answer.  */
2808
2809      if (frame_pointer_needed)
2810	{
2811	  base = hard_frame_pointer_rtx;
2812	  off = - cfa_off;
2813	}
2814      else
2815	{
2816	  base = stack_pointer_rtx;
2817	  off = current_frame_info.total_size - cfa_off;
2818	}
2819
2820      REG_NOTES (insn)
2821	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2822		gen_rtx_SET (VOIDmode,
2823			     gen_rtx_MEM (GET_MODE (reg),
2824					  plus_constant (base, off)),
2825			     frame_reg),
2826		REG_NOTES (insn));
2827    }
2828}
2829
2830static void
2831do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2832{
2833  int iter = spill_fill_data.next_iter;
2834  rtx insn;
2835
2836  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2837				GEN_INT (cfa_off)));
2838  spill_fill_data.prev_insn[iter] = insn;
2839}
2840
2841/* Wrapper functions that discards the CONST_INT spill offset.  These
2842   exist so that we can give gr_spill/gr_fill the offset they need and
2843   use a consistent function interface.  */
2844
2845static rtx
2846gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2847{
2848  return gen_movdi (dest, src);
2849}
2850
2851static rtx
2852gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2853{
2854  return gen_fr_spill (dest, src);
2855}
2856
2857static rtx
2858gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2859{
2860  return gen_fr_restore (dest, src);
2861}
2862
2863/* Called after register allocation to add any instructions needed for the
2864   prologue.  Using a prologue insn is favored compared to putting all of the
2865   instructions in output_function_prologue(), since it allows the scheduler
2866   to intermix instructions with the saves of the caller saved registers.  In
2867   some cases, it might be necessary to emit a barrier instruction as the last
2868   insn to prevent such scheduling.
2869
2870   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2871   so that the debug info generation code can handle them properly.
2872
2873   The register save area is layed out like so:
2874   cfa+16
2875	[ varargs spill area ]
2876	[ fr register spill area ]
2877	[ br register spill area ]
2878	[ ar register spill area ]
2879	[ pr register spill area ]
2880	[ gr register spill area ] */
2881
2882/* ??? Get inefficient code when the frame size is larger than can fit in an
2883   adds instruction.  */
2884
2885void
2886ia64_expand_prologue (void)
2887{
2888  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2889  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2890  rtx reg, alt_reg;
2891
2892  ia64_compute_frame_size (get_frame_size ());
2893  last_scratch_gr_reg = 15;
2894
2895  /* If there is no epilogue, then we don't need some prologue insns.
2896     We need to avoid emitting the dead prologue insns, because flow
2897     will complain about them.  */
2898  if (optimize)
2899    {
2900      edge e;
2901      edge_iterator ei;
2902
2903      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2904	if ((e->flags & EDGE_FAKE) == 0
2905	    && (e->flags & EDGE_FALLTHRU) != 0)
2906	  break;
2907      epilogue_p = (e != NULL);
2908    }
2909  else
2910    epilogue_p = 1;
2911
2912  /* Set the local, input, and output register names.  We need to do this
2913     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2914     half.  If we use in/loc/out register names, then we get assembler errors
2915     in crtn.S because there is no alloc insn or regstk directive in there.  */
2916  if (! TARGET_REG_NAMES)
2917    {
2918      int inputs = current_frame_info.n_input_regs;
2919      int locals = current_frame_info.n_local_regs;
2920      int outputs = current_frame_info.n_output_regs;
2921
2922      for (i = 0; i < inputs; i++)
2923	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2924      for (i = 0; i < locals; i++)
2925	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2926      for (i = 0; i < outputs; i++)
2927	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2928    }
2929
2930  /* Set the frame pointer register name.  The regnum is logically loc79,
2931     but of course we'll not have allocated that many locals.  Rather than
2932     worrying about renumbering the existing rtxs, we adjust the name.  */
2933  /* ??? This code means that we can never use one local register when
2934     there is a frame pointer.  loc79 gets wasted in this case, as it is
2935     renamed to a register that will never be used.  See also the try_locals
2936     code in find_gr_spill.  */
2937  if (current_frame_info.reg_fp)
2938    {
2939      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2940      reg_names[HARD_FRAME_POINTER_REGNUM]
2941	= reg_names[current_frame_info.reg_fp];
2942      reg_names[current_frame_info.reg_fp] = tmp;
2943    }
2944
2945  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2946  if (current_frame_info.n_local_regs == 0
2947      && current_frame_info.n_output_regs == 0
2948      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2949      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2950    {
2951      /* If there is no alloc, but there are input registers used, then we
2952	 need a .regstk directive.  */
2953      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2954      ar_pfs_save_reg = NULL_RTX;
2955    }
2956  else
2957    {
2958      current_frame_info.need_regstk = 0;
2959
2960      if (current_frame_info.reg_save_ar_pfs)
2961	regno = current_frame_info.reg_save_ar_pfs;
2962      else
2963	regno = next_scratch_gr_reg ();
2964      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2965
2966      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2967				   GEN_INT (current_frame_info.n_input_regs),
2968				   GEN_INT (current_frame_info.n_local_regs),
2969				   GEN_INT (current_frame_info.n_output_regs),
2970				   GEN_INT (current_frame_info.n_rotate_regs)));
2971      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2972    }
2973
2974  /* Set up frame pointer, stack pointer, and spill iterators.  */
2975
2976  n_varargs = cfun->machine->n_varargs;
2977  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
2978			stack_pointer_rtx, 0);
2979
2980  if (frame_pointer_needed)
2981    {
2982      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
2983      RTX_FRAME_RELATED_P (insn) = 1;
2984    }
2985
2986  if (current_frame_info.total_size != 0)
2987    {
2988      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
2989      rtx offset;
2990
2991      if (CONST_OK_FOR_I (- current_frame_info.total_size))
2992	offset = frame_size_rtx;
2993      else
2994	{
2995	  regno = next_scratch_gr_reg ();
2996	  offset = gen_rtx_REG (DImode, regno);
2997	  emit_move_insn (offset, frame_size_rtx);
2998	}
2999
3000      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3001				    stack_pointer_rtx, offset));
3002
3003      if (! frame_pointer_needed)
3004	{
3005	  RTX_FRAME_RELATED_P (insn) = 1;
3006	  if (GET_CODE (offset) != CONST_INT)
3007	    {
3008	      REG_NOTES (insn)
3009		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3010			gen_rtx_SET (VOIDmode,
3011				     stack_pointer_rtx,
3012				     gen_rtx_PLUS (DImode,
3013						   stack_pointer_rtx,
3014						   frame_size_rtx)),
3015			REG_NOTES (insn));
3016	    }
3017	}
3018
3019      /* ??? At this point we must generate a magic insn that appears to
3020	 modify the stack pointer, the frame pointer, and all spill
3021	 iterators.  This would allow the most scheduling freedom.  For
3022	 now, just hard stop.  */
3023      emit_insn (gen_blockage ());
3024    }
3025
3026  /* Must copy out ar.unat before doing any integer spills.  */
3027  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3028    {
3029      if (current_frame_info.reg_save_ar_unat)
3030	ar_unat_save_reg
3031	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3032      else
3033	{
3034	  alt_regno = next_scratch_gr_reg ();
3035	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3036	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3037	}
3038
3039      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3040      insn = emit_move_insn (ar_unat_save_reg, reg);
3041      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3042
3043      /* Even if we're not going to generate an epilogue, we still
3044	 need to save the register so that EH works.  */
3045      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3046	emit_insn (gen_prologue_use (ar_unat_save_reg));
3047    }
3048  else
3049    ar_unat_save_reg = NULL_RTX;
3050
3051  /* Spill all varargs registers.  Do this before spilling any GR registers,
3052     since we want the UNAT bits for the GR registers to override the UNAT
3053     bits from varargs, which we don't care about.  */
3054
3055  cfa_off = -16;
3056  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3057    {
3058      reg = gen_rtx_REG (DImode, regno);
3059      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3060    }
3061
3062  /* Locate the bottom of the register save area.  */
3063  cfa_off = (current_frame_info.spill_cfa_off
3064	     + current_frame_info.spill_size
3065	     + current_frame_info.extra_spill_size);
3066
3067  /* Save the predicate register block either in a register or in memory.  */
3068  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3069    {
3070      reg = gen_rtx_REG (DImode, PR_REG (0));
3071      if (current_frame_info.reg_save_pr != 0)
3072	{
3073	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3074	  insn = emit_move_insn (alt_reg, reg);
3075
3076	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3077	     64 hard registers.  */
3078	  RTX_FRAME_RELATED_P (insn) = 1;
3079	  REG_NOTES (insn)
3080	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3081			gen_rtx_SET (VOIDmode, alt_reg, reg),
3082			REG_NOTES (insn));
3083
3084	  /* Even if we're not going to generate an epilogue, we still
3085	     need to save the register so that EH works.  */
3086	  if (! epilogue_p)
3087	    emit_insn (gen_prologue_use (alt_reg));
3088	}
3089      else
3090	{
3091	  alt_regno = next_scratch_gr_reg ();
3092	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3093	  insn = emit_move_insn (alt_reg, reg);
3094	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3095	  cfa_off -= 8;
3096	}
3097    }
3098
3099  /* Handle AR regs in numerical order.  All of them get special handling.  */
3100  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3101      && current_frame_info.reg_save_ar_unat == 0)
3102    {
3103      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3104      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3105      cfa_off -= 8;
3106    }
3107
3108  /* The alloc insn already copied ar.pfs into a general register.  The
3109     only thing we have to do now is copy that register to a stack slot
3110     if we'd not allocated a local register for the job.  */
3111  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3112      && current_frame_info.reg_save_ar_pfs == 0)
3113    {
3114      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3115      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3116      cfa_off -= 8;
3117    }
3118
3119  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3120    {
3121      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3122      if (current_frame_info.reg_save_ar_lc != 0)
3123	{
3124	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3125	  insn = emit_move_insn (alt_reg, reg);
3126	  RTX_FRAME_RELATED_P (insn) = 1;
3127
3128	  /* Even if we're not going to generate an epilogue, we still
3129	     need to save the register so that EH works.  */
3130	  if (! epilogue_p)
3131	    emit_insn (gen_prologue_use (alt_reg));
3132	}
3133      else
3134	{
3135	  alt_regno = next_scratch_gr_reg ();
3136	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3137	  emit_move_insn (alt_reg, reg);
3138	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3139	  cfa_off -= 8;
3140	}
3141    }
3142
3143  if (current_frame_info.reg_save_gp)
3144    {
3145      insn = emit_move_insn (gen_rtx_REG (DImode,
3146					  current_frame_info.reg_save_gp),
3147			     pic_offset_table_rtx);
3148      /* We don't know for sure yet if this is actually needed, since
3149	 we've not split the PIC call patterns.  If all of the calls
3150	 are indirect, and not followed by any uses of the gp, then
3151	 this save is dead.  Allow it to go away.  */
3152      REG_NOTES (insn)
3153	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3154    }
3155
3156  /* We should now be at the base of the gr/br/fr spill area.  */
3157  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3158			  + current_frame_info.spill_size));
3159
3160  /* Spill all general registers.  */
3161  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3162    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3163      {
3164	reg = gen_rtx_REG (DImode, regno);
3165	do_spill (gen_gr_spill, reg, cfa_off, reg);
3166	cfa_off -= 8;
3167      }
3168
3169  /* Handle BR0 specially -- it may be getting stored permanently in
3170     some GR register.  */
3171  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3172    {
3173      reg = gen_rtx_REG (DImode, BR_REG (0));
3174      if (current_frame_info.reg_save_b0 != 0)
3175	{
3176	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3177	  insn = emit_move_insn (alt_reg, reg);
3178	  RTX_FRAME_RELATED_P (insn) = 1;
3179
3180	  /* Even if we're not going to generate an epilogue, we still
3181	     need to save the register so that EH works.  */
3182	  if (! epilogue_p)
3183	    emit_insn (gen_prologue_use (alt_reg));
3184	}
3185      else
3186	{
3187	  alt_regno = next_scratch_gr_reg ();
3188	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3189	  emit_move_insn (alt_reg, reg);
3190	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3191	  cfa_off -= 8;
3192	}
3193    }
3194
3195  /* Spill the rest of the BR registers.  */
3196  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3197    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3198      {
3199	alt_regno = next_scratch_gr_reg ();
3200	alt_reg = gen_rtx_REG (DImode, alt_regno);
3201	reg = gen_rtx_REG (DImode, regno);
3202	emit_move_insn (alt_reg, reg);
3203	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3204	cfa_off -= 8;
3205      }
3206
3207  /* Align the frame and spill all FR registers.  */
3208  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3209    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3210      {
3211        gcc_assert (!(cfa_off & 15));
3212	reg = gen_rtx_REG (XFmode, regno);
3213	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3214	cfa_off -= 16;
3215      }
3216
3217  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3218
3219  finish_spill_pointers ();
3220}
3221
3222/* Called after register allocation to add any instructions needed for the
3223   epilogue.  Using an epilogue insn is favored compared to putting all of the
3224   instructions in output_function_prologue(), since it allows the scheduler
3225   to intermix instructions with the saves of the caller saved registers.  In
3226   some cases, it might be necessary to emit a barrier instruction as the last
3227   insn to prevent such scheduling.  */
3228
3229void
3230ia64_expand_epilogue (int sibcall_p)
3231{
3232  rtx insn, reg, alt_reg, ar_unat_save_reg;
3233  int regno, alt_regno, cfa_off;
3234
3235  ia64_compute_frame_size (get_frame_size ());
3236
3237  /* If there is a frame pointer, then we use it instead of the stack
3238     pointer, so that the stack pointer does not need to be valid when
3239     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3240  if (frame_pointer_needed)
3241    setup_spill_pointers (current_frame_info.n_spilled,
3242			  hard_frame_pointer_rtx, 0);
3243  else
3244    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3245			  current_frame_info.total_size);
3246
3247  if (current_frame_info.total_size != 0)
3248    {
3249      /* ??? At this point we must generate a magic insn that appears to
3250         modify the spill iterators and the frame pointer.  This would
3251	 allow the most scheduling freedom.  For now, just hard stop.  */
3252      emit_insn (gen_blockage ());
3253    }
3254
3255  /* Locate the bottom of the register save area.  */
3256  cfa_off = (current_frame_info.spill_cfa_off
3257	     + current_frame_info.spill_size
3258	     + current_frame_info.extra_spill_size);
3259
3260  /* Restore the predicate registers.  */
3261  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3262    {
3263      if (current_frame_info.reg_save_pr != 0)
3264	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3265      else
3266	{
3267	  alt_regno = next_scratch_gr_reg ();
3268	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3269	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3270	  cfa_off -= 8;
3271	}
3272      reg = gen_rtx_REG (DImode, PR_REG (0));
3273      emit_move_insn (reg, alt_reg);
3274    }
3275
3276  /* Restore the application registers.  */
3277
3278  /* Load the saved unat from the stack, but do not restore it until
3279     after the GRs have been restored.  */
3280  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3281    {
3282      if (current_frame_info.reg_save_ar_unat != 0)
3283        ar_unat_save_reg
3284	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3285      else
3286	{
3287	  alt_regno = next_scratch_gr_reg ();
3288	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3289	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3290	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3291	  cfa_off -= 8;
3292	}
3293    }
3294  else
3295    ar_unat_save_reg = NULL_RTX;
3296
3297  if (current_frame_info.reg_save_ar_pfs != 0)
3298    {
3299      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3300      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3301      emit_move_insn (reg, alt_reg);
3302    }
3303  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3304    {
3305      alt_regno = next_scratch_gr_reg ();
3306      alt_reg = gen_rtx_REG (DImode, alt_regno);
3307      do_restore (gen_movdi_x, alt_reg, cfa_off);
3308      cfa_off -= 8;
3309      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3310      emit_move_insn (reg, alt_reg);
3311    }
3312
3313  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3314    {
3315      if (current_frame_info.reg_save_ar_lc != 0)
3316	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3317      else
3318	{
3319	  alt_regno = next_scratch_gr_reg ();
3320	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3321	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3322	  cfa_off -= 8;
3323	}
3324      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3325      emit_move_insn (reg, alt_reg);
3326    }
3327
3328  /* We should now be at the base of the gr/br/fr spill area.  */
3329  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3330			  + current_frame_info.spill_size));
3331
3332  /* The GP may be stored on the stack in the prologue, but it's
3333     never restored in the epilogue.  Skip the stack slot.  */
3334  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3335    cfa_off -= 8;
3336
3337  /* Restore all general registers.  */
3338  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3339    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3340      {
3341	reg = gen_rtx_REG (DImode, regno);
3342	do_restore (gen_gr_restore, reg, cfa_off);
3343	cfa_off -= 8;
3344      }
3345
3346  /* Restore the branch registers.  Handle B0 specially, as it may
3347     have gotten stored in some GR register.  */
3348  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3349    {
3350      if (current_frame_info.reg_save_b0 != 0)
3351	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3352      else
3353	{
3354	  alt_regno = next_scratch_gr_reg ();
3355	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3356	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3357	  cfa_off -= 8;
3358	}
3359      reg = gen_rtx_REG (DImode, BR_REG (0));
3360      emit_move_insn (reg, alt_reg);
3361    }
3362
3363  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3364    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3365      {
3366	alt_regno = next_scratch_gr_reg ();
3367	alt_reg = gen_rtx_REG (DImode, alt_regno);
3368	do_restore (gen_movdi_x, alt_reg, cfa_off);
3369	cfa_off -= 8;
3370	reg = gen_rtx_REG (DImode, regno);
3371	emit_move_insn (reg, alt_reg);
3372      }
3373
3374  /* Restore floating point registers.  */
3375  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3376    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3377      {
3378        gcc_assert (!(cfa_off & 15));
3379	reg = gen_rtx_REG (XFmode, regno);
3380	do_restore (gen_fr_restore_x, reg, cfa_off);
3381	cfa_off -= 16;
3382      }
3383
3384  /* Restore ar.unat for real.  */
3385  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3386    {
3387      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3388      emit_move_insn (reg, ar_unat_save_reg);
3389    }
3390
3391  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3392
3393  finish_spill_pointers ();
3394
3395  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3396    {
3397      /* ??? At this point we must generate a magic insn that appears to
3398         modify the spill iterators, the stack pointer, and the frame
3399	 pointer.  This would allow the most scheduling freedom.  For now,
3400	 just hard stop.  */
3401      emit_insn (gen_blockage ());
3402    }
3403
3404  if (cfun->machine->ia64_eh_epilogue_sp)
3405    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3406  else if (frame_pointer_needed)
3407    {
3408      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3409      RTX_FRAME_RELATED_P (insn) = 1;
3410    }
3411  else if (current_frame_info.total_size)
3412    {
3413      rtx offset, frame_size_rtx;
3414
3415      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3416      if (CONST_OK_FOR_I (current_frame_info.total_size))
3417	offset = frame_size_rtx;
3418      else
3419	{
3420	  regno = next_scratch_gr_reg ();
3421	  offset = gen_rtx_REG (DImode, regno);
3422	  emit_move_insn (offset, frame_size_rtx);
3423	}
3424
3425      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3426				    offset));
3427
3428      RTX_FRAME_RELATED_P (insn) = 1;
3429      if (GET_CODE (offset) != CONST_INT)
3430	{
3431	  REG_NOTES (insn)
3432	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3433			gen_rtx_SET (VOIDmode,
3434				     stack_pointer_rtx,
3435				     gen_rtx_PLUS (DImode,
3436						   stack_pointer_rtx,
3437						   frame_size_rtx)),
3438			REG_NOTES (insn));
3439	}
3440    }
3441
3442  if (cfun->machine->ia64_eh_epilogue_bsp)
3443    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3444
3445  if (! sibcall_p)
3446    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3447  else
3448    {
3449      int fp = GR_REG (2);
3450      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3451	 first available call clobbered register.  If there was a frame_pointer
3452	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3453	 so we have to make sure we're using the string "r2" when emitting
3454	 the register name for the assembler.  */
3455      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3456	fp = HARD_FRAME_POINTER_REGNUM;
3457
3458      /* We must emit an alloc to force the input registers to become output
3459	 registers.  Otherwise, if the callee tries to pass its parameters
3460	 through to another call without an intervening alloc, then these
3461	 values get lost.  */
3462      /* ??? We don't need to preserve all input registers.  We only need to
3463	 preserve those input registers used as arguments to the sibling call.
3464	 It is unclear how to compute that number here.  */
3465      if (current_frame_info.n_input_regs != 0)
3466	{
3467	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3468	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3469				const0_rtx, const0_rtx,
3470				n_inputs, const0_rtx));
3471	  RTX_FRAME_RELATED_P (insn) = 1;
3472	}
3473    }
3474}
3475
3476/* Return 1 if br.ret can do all the work required to return from a
3477   function.  */
3478
3479int
3480ia64_direct_return (void)
3481{
3482  if (reload_completed && ! frame_pointer_needed)
3483    {
3484      ia64_compute_frame_size (get_frame_size ());
3485
3486      return (current_frame_info.total_size == 0
3487	      && current_frame_info.n_spilled == 0
3488	      && current_frame_info.reg_save_b0 == 0
3489	      && current_frame_info.reg_save_pr == 0
3490	      && current_frame_info.reg_save_ar_pfs == 0
3491	      && current_frame_info.reg_save_ar_unat == 0
3492	      && current_frame_info.reg_save_ar_lc == 0);
3493    }
3494  return 0;
3495}
3496
3497/* Return the magic cookie that we use to hold the return address
3498   during early compilation.  */
3499
3500rtx
3501ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3502{
3503  if (count != 0)
3504    return NULL;
3505  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3506}
3507
3508/* Split this value after reload, now that we know where the return
3509   address is saved.  */
3510
3511void
3512ia64_split_return_addr_rtx (rtx dest)
3513{
3514  rtx src;
3515
3516  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3517    {
3518      if (current_frame_info.reg_save_b0 != 0)
3519	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3520      else
3521	{
3522	  HOST_WIDE_INT off;
3523	  unsigned int regno;
3524
3525	  /* Compute offset from CFA for BR0.  */
3526	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3527	  off = (current_frame_info.spill_cfa_off
3528		 + current_frame_info.spill_size);
3529	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3530	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3531	      off -= 8;
3532
3533	  /* Convert CFA offset to a register based offset.  */
3534	  if (frame_pointer_needed)
3535	    src = hard_frame_pointer_rtx;
3536	  else
3537	    {
3538	      src = stack_pointer_rtx;
3539	      off += current_frame_info.total_size;
3540	    }
3541
3542	  /* Load address into scratch register.  */
3543	  if (CONST_OK_FOR_I (off))
3544	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3545	  else
3546	    {
3547	      emit_move_insn (dest, GEN_INT (off));
3548	      emit_insn (gen_adddi3 (dest, src, dest));
3549	    }
3550
3551	  src = gen_rtx_MEM (Pmode, dest);
3552	}
3553    }
3554  else
3555    src = gen_rtx_REG (DImode, BR_REG (0));
3556
3557  emit_move_insn (dest, src);
3558}
3559
3560int
3561ia64_hard_regno_rename_ok (int from, int to)
3562{
3563  /* Don't clobber any of the registers we reserved for the prologue.  */
3564  if (to == current_frame_info.reg_fp
3565      || to == current_frame_info.reg_save_b0
3566      || to == current_frame_info.reg_save_pr
3567      || to == current_frame_info.reg_save_ar_pfs
3568      || to == current_frame_info.reg_save_ar_unat
3569      || to == current_frame_info.reg_save_ar_lc)
3570    return 0;
3571
3572  if (from == current_frame_info.reg_fp
3573      || from == current_frame_info.reg_save_b0
3574      || from == current_frame_info.reg_save_pr
3575      || from == current_frame_info.reg_save_ar_pfs
3576      || from == current_frame_info.reg_save_ar_unat
3577      || from == current_frame_info.reg_save_ar_lc)
3578    return 0;
3579
3580  /* Don't use output registers outside the register frame.  */
3581  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3582    return 0;
3583
3584  /* Retain even/oddness on predicate register pairs.  */
3585  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3586    return (from & 1) == (to & 1);
3587
3588  return 1;
3589}
3590
3591/* Target hook for assembling integer objects.  Handle word-sized
3592   aligned objects and detect the cases when @fptr is needed.  */
3593
3594static bool
3595ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3596{
3597  if (size == POINTER_SIZE / BITS_PER_UNIT
3598      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3599      && GET_CODE (x) == SYMBOL_REF
3600      && SYMBOL_REF_FUNCTION_P (x))
3601    {
3602      static const char * const directive[2][2] = {
3603	  /* 64-bit pointer */  /* 32-bit pointer */
3604	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3605	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3606      };
3607      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3608      output_addr_const (asm_out_file, x);
3609      fputs (")\n", asm_out_file);
3610      return true;
3611    }
3612  return default_assemble_integer (x, size, aligned_p);
3613}
3614
3615/* Emit the function prologue.  */
3616
3617static void
3618ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3619{
3620  int mask, grsave, grsave_prev;
3621
3622  if (current_frame_info.need_regstk)
3623    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3624	     current_frame_info.n_input_regs,
3625	     current_frame_info.n_local_regs,
3626	     current_frame_info.n_output_regs,
3627	     current_frame_info.n_rotate_regs);
3628
3629  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3630    return;
3631
3632  /* Emit the .prologue directive.  */
3633
3634  mask = 0;
3635  grsave = grsave_prev = 0;
3636  if (current_frame_info.reg_save_b0 != 0)
3637    {
3638      mask |= 8;
3639      grsave = grsave_prev = current_frame_info.reg_save_b0;
3640    }
3641  if (current_frame_info.reg_save_ar_pfs != 0
3642      && (grsave_prev == 0
3643	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3644    {
3645      mask |= 4;
3646      if (grsave_prev == 0)
3647	grsave = current_frame_info.reg_save_ar_pfs;
3648      grsave_prev = current_frame_info.reg_save_ar_pfs;
3649    }
3650  if (current_frame_info.reg_fp != 0
3651      && (grsave_prev == 0
3652	  || current_frame_info.reg_fp == grsave_prev + 1))
3653    {
3654      mask |= 2;
3655      if (grsave_prev == 0)
3656	grsave = HARD_FRAME_POINTER_REGNUM;
3657      grsave_prev = current_frame_info.reg_fp;
3658    }
3659  if (current_frame_info.reg_save_pr != 0
3660      && (grsave_prev == 0
3661	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3662    {
3663      mask |= 1;
3664      if (grsave_prev == 0)
3665	grsave = current_frame_info.reg_save_pr;
3666    }
3667
3668  if (mask && TARGET_GNU_AS)
3669    fprintf (file, "\t.prologue %d, %d\n", mask,
3670	     ia64_dbx_register_number (grsave));
3671  else
3672    fputs ("\t.prologue\n", file);
3673
3674  /* Emit a .spill directive, if necessary, to relocate the base of
3675     the register spill area.  */
3676  if (current_frame_info.spill_cfa_off != -16)
3677    fprintf (file, "\t.spill %ld\n",
3678	     (long) (current_frame_info.spill_cfa_off
3679		     + current_frame_info.spill_size));
3680}
3681
3682/* Emit the .body directive at the scheduled end of the prologue.  */
3683
3684static void
3685ia64_output_function_end_prologue (FILE *file)
3686{
3687  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3688    return;
3689
3690  fputs ("\t.body\n", file);
3691}
3692
3693/* Emit the function epilogue.  */
3694
3695static void
3696ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3697			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3698{
3699  int i;
3700
3701  if (current_frame_info.reg_fp)
3702    {
3703      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3704      reg_names[HARD_FRAME_POINTER_REGNUM]
3705	= reg_names[current_frame_info.reg_fp];
3706      reg_names[current_frame_info.reg_fp] = tmp;
3707    }
3708  if (! TARGET_REG_NAMES)
3709    {
3710      for (i = 0; i < current_frame_info.n_input_regs; i++)
3711	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3712      for (i = 0; i < current_frame_info.n_local_regs; i++)
3713	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3714      for (i = 0; i < current_frame_info.n_output_regs; i++)
3715	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3716    }
3717
3718  current_frame_info.initialized = 0;
3719}
3720
3721int
3722ia64_dbx_register_number (int regno)
3723{
3724  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3725     from its home at loc79 to something inside the register frame.  We
3726     must perform the same renumbering here for the debug info.  */
3727  if (current_frame_info.reg_fp)
3728    {
3729      if (regno == HARD_FRAME_POINTER_REGNUM)
3730	regno = current_frame_info.reg_fp;
3731      else if (regno == current_frame_info.reg_fp)
3732	regno = HARD_FRAME_POINTER_REGNUM;
3733    }
3734
3735  if (IN_REGNO_P (regno))
3736    return 32 + regno - IN_REG (0);
3737  else if (LOC_REGNO_P (regno))
3738    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3739  else if (OUT_REGNO_P (regno))
3740    return (32 + current_frame_info.n_input_regs
3741	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3742  else
3743    return regno;
3744}
3745
3746void
3747ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3748{
3749  rtx addr_reg, eight = GEN_INT (8);
3750
3751  /* The Intel assembler requires that the global __ia64_trampoline symbol
3752     be declared explicitly */
3753  if (!TARGET_GNU_AS)
3754    {
3755      static bool declared_ia64_trampoline = false;
3756
3757      if (!declared_ia64_trampoline)
3758	{
3759	  declared_ia64_trampoline = true;
3760	  (*targetm.asm_out.globalize_label) (asm_out_file,
3761					      "__ia64_trampoline");
3762	}
3763    }
3764
3765  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3766  addr = convert_memory_address (Pmode, addr);
3767  fnaddr = convert_memory_address (Pmode, fnaddr);
3768  static_chain = convert_memory_address (Pmode, static_chain);
3769
3770  /* Load up our iterator.  */
3771  addr_reg = gen_reg_rtx (Pmode);
3772  emit_move_insn (addr_reg, addr);
3773
3774  /* The first two words are the fake descriptor:
3775     __ia64_trampoline, ADDR+16.  */
3776  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3777		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3778  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3779
3780  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3781		  copy_to_reg (plus_constant (addr, 16)));
3782  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3783
3784  /* The third word is the target descriptor.  */
3785  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3786  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3787
3788  /* The fourth word is the static chain.  */
3789  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3790}
3791
3792/* Do any needed setup for a variadic function.  CUM has not been updated
3793   for the last named argument which has type TYPE and mode MODE.
3794
3795   We generate the actual spill instructions during prologue generation.  */
3796
3797static void
3798ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3799			     tree type, int * pretend_size,
3800			     int second_time ATTRIBUTE_UNUSED)
3801{
3802  CUMULATIVE_ARGS next_cum = *cum;
3803
3804  /* Skip the current argument.  */
3805  ia64_function_arg_advance (&next_cum, mode, type, 1);
3806
3807  if (next_cum.words < MAX_ARGUMENT_SLOTS)
3808    {
3809      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3810      *pretend_size = n * UNITS_PER_WORD;
3811      cfun->machine->n_varargs = n;
3812    }
3813}
3814
3815/* Check whether TYPE is a homogeneous floating point aggregate.  If
3816   it is, return the mode of the floating point type that appears
3817   in all leafs.  If it is not, return VOIDmode.
3818
3819   An aggregate is a homogeneous floating point aggregate is if all
3820   fields/elements in it have the same floating point type (e.g,
3821   SFmode).  128-bit quad-precision floats are excluded.
3822
3823   Variable sized aggregates should never arrive here, since we should
3824   have already decided to pass them by reference.  Top-level zero-sized
3825   aggregates are excluded because our parallels crash the middle-end.  */
3826
3827static enum machine_mode
3828hfa_element_mode (tree type, bool nested)
3829{
3830  enum machine_mode element_mode = VOIDmode;
3831  enum machine_mode mode;
3832  enum tree_code code = TREE_CODE (type);
3833  int know_element_mode = 0;
3834  tree t;
3835
3836  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3837    return VOIDmode;
3838
3839  switch (code)
3840    {
3841    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3842    case BOOLEAN_TYPE:	case CHAR_TYPE:		case POINTER_TYPE:
3843    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3844    case LANG_TYPE:		case FUNCTION_TYPE:
3845      return VOIDmode;
3846
3847      /* Fortran complex types are supposed to be HFAs, so we need to handle
3848	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3849	 types though.  */
3850    case COMPLEX_TYPE:
3851      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3852	  && TYPE_MODE (type) != TCmode)
3853	return GET_MODE_INNER (TYPE_MODE (type));
3854      else
3855	return VOIDmode;
3856
3857    case REAL_TYPE:
3858      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3859	 mode if this is contained within an aggregate.  */
3860      if (nested && TYPE_MODE (type) != TFmode)
3861	return TYPE_MODE (type);
3862      else
3863	return VOIDmode;
3864
3865    case ARRAY_TYPE:
3866      return hfa_element_mode (TREE_TYPE (type), 1);
3867
3868    case RECORD_TYPE:
3869    case UNION_TYPE:
3870    case QUAL_UNION_TYPE:
3871      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3872	{
3873	  if (TREE_CODE (t) != FIELD_DECL)
3874	    continue;
3875
3876	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3877	  if (know_element_mode)
3878	    {
3879	      if (mode != element_mode)
3880		return VOIDmode;
3881	    }
3882	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3883	    return VOIDmode;
3884	  else
3885	    {
3886	      know_element_mode = 1;
3887	      element_mode = mode;
3888	    }
3889	}
3890      return element_mode;
3891
3892    default:
3893      /* If we reach here, we probably have some front-end specific type
3894	 that the backend doesn't know about.  This can happen via the
3895	 aggregate_value_p call in init_function_start.  All we can do is
3896	 ignore unknown tree types.  */
3897      return VOIDmode;
3898    }
3899
3900  return VOIDmode;
3901}
3902
3903/* Return the number of words required to hold a quantity of TYPE and MODE
3904   when passed as an argument.  */
3905static int
3906ia64_function_arg_words (tree type, enum machine_mode mode)
3907{
3908  int words;
3909
3910  if (mode == BLKmode)
3911    words = int_size_in_bytes (type);
3912  else
3913    words = GET_MODE_SIZE (mode);
3914
3915  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3916}
3917
3918/* Return the number of registers that should be skipped so the current
3919   argument (described by TYPE and WORDS) will be properly aligned.
3920
3921   Integer and float arguments larger than 8 bytes start at the next
3922   even boundary.  Aggregates larger than 8 bytes start at the next
3923   even boundary if the aggregate has 16 byte alignment.  Note that
3924   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3925   but are still to be aligned in registers.
3926
3927   ??? The ABI does not specify how to handle aggregates with
3928   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3929   all as if they had 16 byte alignment.  Such aggregates can occur
3930   only if gcc extensions are used.  */
3931static int
3932ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3933{
3934  if ((cum->words & 1) == 0)
3935    return 0;
3936
3937  if (type
3938      && TREE_CODE (type) != INTEGER_TYPE
3939      && TREE_CODE (type) != REAL_TYPE)
3940    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3941  else
3942    return words > 1;
3943}
3944
3945/* Return rtx for register where argument is passed, or zero if it is passed
3946   on the stack.  */
3947/* ??? 128-bit quad-precision floats are always passed in general
3948   registers.  */
3949
3950rtx
3951ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3952		   int named, int incoming)
3953{
3954  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3955  int words = ia64_function_arg_words (type, mode);
3956  int offset = ia64_function_arg_offset (cum, type, words);
3957  enum machine_mode hfa_mode = VOIDmode;
3958
3959  /* If all argument slots are used, then it must go on the stack.  */
3960  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3961    return 0;
3962
3963  /* Check for and handle homogeneous FP aggregates.  */
3964  if (type)
3965    hfa_mode = hfa_element_mode (type, 0);
3966
3967  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3968     and unprototyped hfas are passed specially.  */
3969  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3970    {
3971      rtx loc[16];
3972      int i = 0;
3973      int fp_regs = cum->fp_regs;
3974      int int_regs = cum->words + offset;
3975      int hfa_size = GET_MODE_SIZE (hfa_mode);
3976      int byte_size;
3977      int args_byte_size;
3978
3979      /* If prototyped, pass it in FR regs then GR regs.
3980	 If not prototyped, pass it in both FR and GR regs.
3981
3982	 If this is an SFmode aggregate, then it is possible to run out of
3983	 FR regs while GR regs are still left.  In that case, we pass the
3984	 remaining part in the GR regs.  */
3985
3986      /* Fill the FP regs.  We do this always.  We stop if we reach the end
3987	 of the argument, the last FP register, or the last argument slot.  */
3988
3989      byte_size = ((mode == BLKmode)
3990		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3991      args_byte_size = int_regs * UNITS_PER_WORD;
3992      offset = 0;
3993      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
3994	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
3995	{
3996	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
3997				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
3998							      + fp_regs)),
3999				      GEN_INT (offset));
4000	  offset += hfa_size;
4001	  args_byte_size += hfa_size;
4002	  fp_regs++;
4003	}
4004
4005      /* If no prototype, then the whole thing must go in GR regs.  */
4006      if (! cum->prototype)
4007	offset = 0;
4008      /* If this is an SFmode aggregate, then we might have some left over
4009	 that needs to go in GR regs.  */
4010      else if (byte_size != offset)
4011	int_regs += offset / UNITS_PER_WORD;
4012
4013      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4014
4015      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4016	{
4017	  enum machine_mode gr_mode = DImode;
4018	  unsigned int gr_size;
4019
4020	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4021	     then this goes in a GR reg left adjusted/little endian, right
4022	     adjusted/big endian.  */
4023	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4024	     always right adjusted/little endian.  */
4025	  if (offset & 0x4)
4026	    gr_mode = SImode;
4027	  /* If we have an even 4 byte hunk because the aggregate is a
4028	     multiple of 4 bytes in size, then this goes in a GR reg right
4029	     adjusted/little endian.  */
4030	  else if (byte_size - offset == 4)
4031	    gr_mode = SImode;
4032
4033	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4034				      gen_rtx_REG (gr_mode, (basereg
4035							     + int_regs)),
4036				      GEN_INT (offset));
4037
4038	  gr_size = GET_MODE_SIZE (gr_mode);
4039	  offset += gr_size;
4040	  if (gr_size == UNITS_PER_WORD
4041	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4042	    int_regs++;
4043	  else if (gr_size > UNITS_PER_WORD)
4044	    int_regs += gr_size / UNITS_PER_WORD;
4045	}
4046      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4047    }
4048
4049  /* Integral and aggregates go in general registers.  If we have run out of
4050     FR registers, then FP values must also go in general registers.  This can
4051     happen when we have a SFmode HFA.  */
4052  else if (mode == TFmode || mode == TCmode
4053	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4054    {
4055      int byte_size = ((mode == BLKmode)
4056                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4057      if (BYTES_BIG_ENDIAN
4058	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4059	&& byte_size < UNITS_PER_WORD
4060	&& byte_size > 0)
4061	{
4062	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4063					  gen_rtx_REG (DImode,
4064						       (basereg + cum->words
4065							+ offset)),
4066					  const0_rtx);
4067	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4068	}
4069      else
4070	return gen_rtx_REG (mode, basereg + cum->words + offset);
4071
4072    }
4073
4074  /* If there is a prototype, then FP values go in a FR register when
4075     named, and in a GR register when unnamed.  */
4076  else if (cum->prototype)
4077    {
4078      if (named)
4079	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4080      /* In big-endian mode, an anonymous SFmode value must be represented
4081         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4082	 the value into the high half of the general register.  */
4083      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4084	return gen_rtx_PARALLEL (mode,
4085		 gen_rtvec (1,
4086                   gen_rtx_EXPR_LIST (VOIDmode,
4087		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4088				      const0_rtx)));
4089      else
4090	return gen_rtx_REG (mode, basereg + cum->words + offset);
4091    }
4092  /* If there is no prototype, then FP values go in both FR and GR
4093     registers.  */
4094  else
4095    {
4096      /* See comment above.  */
4097      enum machine_mode inner_mode =
4098	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4099
4100      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4101				      gen_rtx_REG (mode, (FR_ARG_FIRST
4102							  + cum->fp_regs)),
4103				      const0_rtx);
4104      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4105				      gen_rtx_REG (inner_mode,
4106						   (basereg + cum->words
4107						    + offset)),
4108				      const0_rtx);
4109
4110      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4111    }
4112}
4113
4114/* Return number of bytes, at the beginning of the argument, that must be
4115   put in registers.  0 is the argument is entirely in registers or entirely
4116   in memory.  */
4117
4118static int
4119ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4120			tree type, bool named ATTRIBUTE_UNUSED)
4121{
4122  int words = ia64_function_arg_words (type, mode);
4123  int offset = ia64_function_arg_offset (cum, type, words);
4124
4125  /* If all argument slots are used, then it must go on the stack.  */
4126  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4127    return 0;
4128
4129  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4130     it fits within the 8 argument slots, then it goes entirely in
4131     registers.  If it extends past the last argument slot, then the rest
4132     goes on the stack.  */
4133
4134  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4135    return 0;
4136
4137  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4138}
4139
4140/* Update CUM to point after this argument.  This is patterned after
4141   ia64_function_arg.  */
4142
4143void
4144ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4145			   tree type, int named)
4146{
4147  int words = ia64_function_arg_words (type, mode);
4148  int offset = ia64_function_arg_offset (cum, type, words);
4149  enum machine_mode hfa_mode = VOIDmode;
4150
4151  /* If all arg slots are already full, then there is nothing to do.  */
4152  if (cum->words >= MAX_ARGUMENT_SLOTS)
4153    return;
4154
4155  cum->words += words + offset;
4156
4157  /* Check for and handle homogeneous FP aggregates.  */
4158  if (type)
4159    hfa_mode = hfa_element_mode (type, 0);
4160
4161  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4162     and unprototyped hfas are passed specially.  */
4163  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4164    {
4165      int fp_regs = cum->fp_regs;
4166      /* This is the original value of cum->words + offset.  */
4167      int int_regs = cum->words - words;
4168      int hfa_size = GET_MODE_SIZE (hfa_mode);
4169      int byte_size;
4170      int args_byte_size;
4171
4172      /* If prototyped, pass it in FR regs then GR regs.
4173	 If not prototyped, pass it in both FR and GR regs.
4174
4175	 If this is an SFmode aggregate, then it is possible to run out of
4176	 FR regs while GR regs are still left.  In that case, we pass the
4177	 remaining part in the GR regs.  */
4178
4179      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4180	 of the argument, the last FP register, or the last argument slot.  */
4181
4182      byte_size = ((mode == BLKmode)
4183		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4184      args_byte_size = int_regs * UNITS_PER_WORD;
4185      offset = 0;
4186      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4187	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4188	{
4189	  offset += hfa_size;
4190	  args_byte_size += hfa_size;
4191	  fp_regs++;
4192	}
4193
4194      cum->fp_regs = fp_regs;
4195    }
4196
4197  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4198     If we have run out of FR registers, then other FP values must also go in
4199     general registers.  This can happen when we have a SFmode HFA.  */
4200  else if (mode == TFmode || mode == TCmode
4201           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4202    cum->int_regs = cum->words;
4203
4204  /* If there is a prototype, then FP values go in a FR register when
4205     named, and in a GR register when unnamed.  */
4206  else if (cum->prototype)
4207    {
4208      if (! named)
4209	cum->int_regs = cum->words;
4210      else
4211	/* ??? Complex types should not reach here.  */
4212	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4213    }
4214  /* If there is no prototype, then FP values go in both FR and GR
4215     registers.  */
4216  else
4217    {
4218      /* ??? Complex types should not reach here.  */
4219      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4220      cum->int_regs = cum->words;
4221    }
4222}
4223
4224/* Arguments with alignment larger than 8 bytes start at the next even
4225   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4226   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4227
4228int
4229ia64_function_arg_boundary (enum machine_mode mode, tree type)
4230{
4231
4232  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4233    return PARM_BOUNDARY * 2;
4234
4235  if (type)
4236    {
4237      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4238        return PARM_BOUNDARY * 2;
4239      else
4240        return PARM_BOUNDARY;
4241    }
4242
4243  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4244    return PARM_BOUNDARY * 2;
4245  else
4246    return PARM_BOUNDARY;
4247}
4248
4249/* True if it is OK to do sibling call optimization for the specified
4250   call expression EXP.  DECL will be the called function, or NULL if
4251   this is an indirect call.  */
4252static bool
4253ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4254{
4255  /* We can't perform a sibcall if the current function has the syscall_linkage
4256     attribute.  */
4257  if (lookup_attribute ("syscall_linkage",
4258			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4259    return false;
4260
4261  /* We must always return with our current GP.  This means we can
4262     only sibcall to functions defined in the current module.  */
4263  return decl && (*targetm.binds_local_p) (decl);
4264}
4265
4266
4267/* Implement va_arg.  */
4268
4269static tree
4270ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4271{
4272  /* Variable sized types are passed by reference.  */
4273  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4274    {
4275      tree ptrtype = build_pointer_type (type);
4276      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4277      return build_va_arg_indirect_ref (addr);
4278    }
4279
4280  /* Aggregate arguments with alignment larger than 8 bytes start at
4281     the next even boundary.  Integer and floating point arguments
4282     do so if they are larger than 8 bytes, whether or not they are
4283     also aligned larger than 8 bytes.  */
4284  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4285      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4286    {
4287      tree t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
4288		      build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4289      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4290		 build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4291      t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4292      gimplify_and_add (t, pre_p);
4293    }
4294
4295  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4296}
4297
4298/* Return 1 if function return value returned in memory.  Return 0 if it is
4299   in a register.  */
4300
4301static bool
4302ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4303{
4304  enum machine_mode mode;
4305  enum machine_mode hfa_mode;
4306  HOST_WIDE_INT byte_size;
4307
4308  mode = TYPE_MODE (valtype);
4309  byte_size = GET_MODE_SIZE (mode);
4310  if (mode == BLKmode)
4311    {
4312      byte_size = int_size_in_bytes (valtype);
4313      if (byte_size < 0)
4314	return true;
4315    }
4316
4317  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4318
4319  hfa_mode = hfa_element_mode (valtype, 0);
4320  if (hfa_mode != VOIDmode)
4321    {
4322      int hfa_size = GET_MODE_SIZE (hfa_mode);
4323
4324      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4325	return true;
4326      else
4327	return false;
4328    }
4329  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4330    return true;
4331  else
4332    return false;
4333}
4334
4335/* Return rtx for register that holds the function return value.  */
4336
4337rtx
4338ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4339{
4340  enum machine_mode mode;
4341  enum machine_mode hfa_mode;
4342
4343  mode = TYPE_MODE (valtype);
4344  hfa_mode = hfa_element_mode (valtype, 0);
4345
4346  if (hfa_mode != VOIDmode)
4347    {
4348      rtx loc[8];
4349      int i;
4350      int hfa_size;
4351      int byte_size;
4352      int offset;
4353
4354      hfa_size = GET_MODE_SIZE (hfa_mode);
4355      byte_size = ((mode == BLKmode)
4356		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4357      offset = 0;
4358      for (i = 0; offset < byte_size; i++)
4359	{
4360	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4361				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4362				      GEN_INT (offset));
4363	  offset += hfa_size;
4364	}
4365      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4366    }
4367  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4368    return gen_rtx_REG (mode, FR_ARG_FIRST);
4369  else
4370    {
4371      bool need_parallel = false;
4372
4373      /* In big-endian mode, we need to manage the layout of aggregates
4374	 in the registers so that we get the bits properly aligned in
4375	 the highpart of the registers.  */
4376      if (BYTES_BIG_ENDIAN
4377	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4378	need_parallel = true;
4379
4380      /* Something like struct S { long double x; char a[0] } is not an
4381	 HFA structure, and therefore doesn't go in fp registers.  But
4382	 the middle-end will give it XFmode anyway, and XFmode values
4383	 don't normally fit in integer registers.  So we need to smuggle
4384	 the value inside a parallel.  */
4385      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4386	need_parallel = true;
4387
4388      if (need_parallel)
4389	{
4390	  rtx loc[8];
4391	  int offset;
4392	  int bytesize;
4393	  int i;
4394
4395	  offset = 0;
4396	  bytesize = int_size_in_bytes (valtype);
4397	  /* An empty PARALLEL is invalid here, but the return value
4398	     doesn't matter for empty structs.  */
4399	  if (bytesize == 0)
4400	    return gen_rtx_REG (mode, GR_RET_FIRST);
4401	  for (i = 0; offset < bytesize; i++)
4402	    {
4403	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4404					  gen_rtx_REG (DImode,
4405						       GR_RET_FIRST + i),
4406					  GEN_INT (offset));
4407	      offset += UNITS_PER_WORD;
4408	    }
4409	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4410	}
4411
4412      return gen_rtx_REG (mode, GR_RET_FIRST);
4413    }
4414}
4415
4416/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4417   We need to emit DTP-relative relocations.  */
4418
4419static void
4420ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4421{
4422  gcc_assert (size == 4 || size == 8);
4423  if (size == 4)
4424    fputs ("\tdata4.ua\t@dtprel(", file);
4425  else
4426    fputs ("\tdata8.ua\t@dtprel(", file);
4427  output_addr_const (file, x);
4428  fputs (")", file);
4429}
4430
4431/* Print a memory address as an operand to reference that memory location.  */
4432
4433/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4434   also call this from ia64_print_operand for memory addresses.  */
4435
4436void
4437ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4438			    rtx address ATTRIBUTE_UNUSED)
4439{
4440}
4441
4442/* Print an operand to an assembler instruction.
4443   C	Swap and print a comparison operator.
4444   D	Print an FP comparison operator.
4445   E    Print 32 - constant, for SImode shifts as extract.
4446   e    Print 64 - constant, for DImode rotates.
4447   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4448        a floating point register emitted normally.
4449   I	Invert a predicate register by adding 1.
4450   J    Select the proper predicate register for a condition.
4451   j    Select the inverse predicate register for a condition.
4452   O	Append .acq for volatile load.
4453   P	Postincrement of a MEM.
4454   Q	Append .rel for volatile store.
4455   S	Shift amount for shladd instruction.
4456   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4457	for Intel assembler.
4458   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4459	for Intel assembler.
4460   X	A pair of floating point registers.
4461   r	Print register name, or constant 0 as r0.  HP compatibility for
4462	Linux kernel.
4463   v    Print vector constant value as an 8-byte integer value.  */
4464
4465void
4466ia64_print_operand (FILE * file, rtx x, int code)
4467{
4468  const char *str;
4469
4470  switch (code)
4471    {
4472    case 0:
4473      /* Handled below.  */
4474      break;
4475
4476    case 'C':
4477      {
4478	enum rtx_code c = swap_condition (GET_CODE (x));
4479	fputs (GET_RTX_NAME (c), file);
4480	return;
4481      }
4482
4483    case 'D':
4484      switch (GET_CODE (x))
4485	{
4486	case NE:
4487	  str = "neq";
4488	  break;
4489	case UNORDERED:
4490	  str = "unord";
4491	  break;
4492	case ORDERED:
4493	  str = "ord";
4494	  break;
4495	default:
4496	  str = GET_RTX_NAME (GET_CODE (x));
4497	  break;
4498	}
4499      fputs (str, file);
4500      return;
4501
4502    case 'E':
4503      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4504      return;
4505
4506    case 'e':
4507      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4508      return;
4509
4510    case 'F':
4511      if (x == CONST0_RTX (GET_MODE (x)))
4512	str = reg_names [FR_REG (0)];
4513      else if (x == CONST1_RTX (GET_MODE (x)))
4514	str = reg_names [FR_REG (1)];
4515      else
4516	{
4517	  gcc_assert (GET_CODE (x) == REG);
4518	  str = reg_names [REGNO (x)];
4519	}
4520      fputs (str, file);
4521      return;
4522
4523    case 'I':
4524      fputs (reg_names [REGNO (x) + 1], file);
4525      return;
4526
4527    case 'J':
4528    case 'j':
4529      {
4530	unsigned int regno = REGNO (XEXP (x, 0));
4531	if (GET_CODE (x) == EQ)
4532	  regno += 1;
4533	if (code == 'j')
4534	  regno ^= 1;
4535        fputs (reg_names [regno], file);
4536      }
4537      return;
4538
4539    case 'O':
4540      if (MEM_VOLATILE_P (x))
4541	fputs(".acq", file);
4542      return;
4543
4544    case 'P':
4545      {
4546	HOST_WIDE_INT value;
4547
4548	switch (GET_CODE (XEXP (x, 0)))
4549	  {
4550	  default:
4551	    return;
4552
4553	  case POST_MODIFY:
4554	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4555	    if (GET_CODE (x) == CONST_INT)
4556	      value = INTVAL (x);
4557	    else
4558	      {
4559		gcc_assert (GET_CODE (x) == REG);
4560		fprintf (file, ", %s", reg_names[REGNO (x)]);
4561		return;
4562	      }
4563	    break;
4564
4565	  case POST_INC:
4566	    value = GET_MODE_SIZE (GET_MODE (x));
4567	    break;
4568
4569	  case POST_DEC:
4570	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4571	    break;
4572	  }
4573
4574	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4575	return;
4576      }
4577
4578    case 'Q':
4579      if (MEM_VOLATILE_P (x))
4580	fputs(".rel", file);
4581      return;
4582
4583    case 'S':
4584      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4585      return;
4586
4587    case 'T':
4588      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4589	{
4590	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4591	  return;
4592	}
4593      break;
4594
4595    case 'U':
4596      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4597	{
4598	  const char *prefix = "0x";
4599	  if (INTVAL (x) & 0x80000000)
4600	    {
4601	      fprintf (file, "0xffffffff");
4602	      prefix = "";
4603	    }
4604	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4605	  return;
4606	}
4607      break;
4608
4609    case 'X':
4610      {
4611	unsigned int regno = REGNO (x);
4612	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4613      }
4614      return;
4615
4616    case 'r':
4617      /* If this operand is the constant zero, write it as register zero.
4618	 Any register, zero, or CONST_INT value is OK here.  */
4619      if (GET_CODE (x) == REG)
4620	fputs (reg_names[REGNO (x)], file);
4621      else if (x == CONST0_RTX (GET_MODE (x)))
4622	fputs ("r0", file);
4623      else if (GET_CODE (x) == CONST_INT)
4624	output_addr_const (file, x);
4625      else
4626	output_operand_lossage ("invalid %%r value");
4627      return;
4628
4629    case 'v':
4630      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4631      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4632      break;
4633
4634    case '+':
4635      {
4636	const char *which;
4637
4638	/* For conditional branches, returns or calls, substitute
4639	   sptk, dptk, dpnt, or spnt for %s.  */
4640	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4641	if (x)
4642	  {
4643	    int pred_val = INTVAL (XEXP (x, 0));
4644
4645	    /* Guess top and bottom 10% statically predicted.  */
4646	    if (pred_val < REG_BR_PROB_BASE / 50)
4647	      which = ".spnt";
4648	    else if (pred_val < REG_BR_PROB_BASE / 2)
4649	      which = ".dpnt";
4650	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98)
4651	      which = ".dptk";
4652	    else
4653	      which = ".sptk";
4654	  }
4655	else if (GET_CODE (current_output_insn) == CALL_INSN)
4656	  which = ".sptk";
4657	else
4658	  which = ".dptk";
4659
4660	fputs (which, file);
4661	return;
4662      }
4663
4664    case ',':
4665      x = current_insn_predicate;
4666      if (x)
4667	{
4668	  unsigned int regno = REGNO (XEXP (x, 0));
4669	  if (GET_CODE (x) == EQ)
4670	    regno += 1;
4671          fprintf (file, "(%s) ", reg_names [regno]);
4672	}
4673      return;
4674
4675    default:
4676      output_operand_lossage ("ia64_print_operand: unknown code");
4677      return;
4678    }
4679
4680  switch (GET_CODE (x))
4681    {
4682      /* This happens for the spill/restore instructions.  */
4683    case POST_INC:
4684    case POST_DEC:
4685    case POST_MODIFY:
4686      x = XEXP (x, 0);
4687      /* ... fall through ...  */
4688
4689    case REG:
4690      fputs (reg_names [REGNO (x)], file);
4691      break;
4692
4693    case MEM:
4694      {
4695	rtx addr = XEXP (x, 0);
4696	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4697	  addr = XEXP (addr, 0);
4698	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4699	break;
4700      }
4701
4702    default:
4703      output_addr_const (file, x);
4704      break;
4705    }
4706
4707  return;
4708}
4709
4710/* Compute a (partial) cost for rtx X.  Return true if the complete
4711   cost has been computed, and false if subexpressions should be
4712   scanned.  In either case, *TOTAL contains the cost result.  */
4713/* ??? This is incomplete.  */
4714
4715static bool
4716ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4717{
4718  switch (code)
4719    {
4720    case CONST_INT:
4721      switch (outer_code)
4722        {
4723        case SET:
4724	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4725	  return true;
4726        case PLUS:
4727	  if (CONST_OK_FOR_I (INTVAL (x)))
4728	    *total = 0;
4729	  else if (CONST_OK_FOR_J (INTVAL (x)))
4730	    *total = 1;
4731	  else
4732	    *total = COSTS_N_INSNS (1);
4733	  return true;
4734        default:
4735	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4736	    *total = 0;
4737	  else
4738	    *total = COSTS_N_INSNS (1);
4739	  return true;
4740	}
4741
4742    case CONST_DOUBLE:
4743      *total = COSTS_N_INSNS (1);
4744      return true;
4745
4746    case CONST:
4747    case SYMBOL_REF:
4748    case LABEL_REF:
4749      *total = COSTS_N_INSNS (3);
4750      return true;
4751
4752    case MULT:
4753      /* For multiplies wider than HImode, we have to go to the FPU,
4754         which normally involves copies.  Plus there's the latency
4755         of the multiply itself, and the latency of the instructions to
4756         transfer integer regs to FP regs.  */
4757      /* ??? Check for FP mode.  */
4758      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4759        *total = COSTS_N_INSNS (10);
4760      else
4761	*total = COSTS_N_INSNS (2);
4762      return true;
4763
4764    case PLUS:
4765    case MINUS:
4766    case ASHIFT:
4767    case ASHIFTRT:
4768    case LSHIFTRT:
4769      *total = COSTS_N_INSNS (1);
4770      return true;
4771
4772    case DIV:
4773    case UDIV:
4774    case MOD:
4775    case UMOD:
4776      /* We make divide expensive, so that divide-by-constant will be
4777         optimized to a multiply.  */
4778      *total = COSTS_N_INSNS (60);
4779      return true;
4780
4781    default:
4782      return false;
4783    }
4784}
4785
4786/* Calculate the cost of moving data from a register in class FROM to
4787   one in class TO, using MODE.  */
4788
4789int
4790ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4791			 enum reg_class to)
4792{
4793  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4794  if (to == ADDL_REGS)
4795    to = GR_REGS;
4796  if (from == ADDL_REGS)
4797    from = GR_REGS;
4798
4799  /* All costs are symmetric, so reduce cases by putting the
4800     lower number class as the destination.  */
4801  if (from < to)
4802    {
4803      enum reg_class tmp = to;
4804      to = from, from = tmp;
4805    }
4806
4807  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4808     so that we get secondary memory reloads.  Between FR_REGS,
4809     we have to make this at least as expensive as MEMORY_MOVE_COST
4810     to avoid spectacularly poor register class preferencing.  */
4811  if (mode == XFmode || mode == RFmode)
4812    {
4813      if (to != GR_REGS || from != GR_REGS)
4814        return MEMORY_MOVE_COST (mode, to, 0);
4815      else
4816	return 3;
4817    }
4818
4819  switch (to)
4820    {
4821    case PR_REGS:
4822      /* Moving between PR registers takes two insns.  */
4823      if (from == PR_REGS)
4824	return 3;
4825      /* Moving between PR and anything but GR is impossible.  */
4826      if (from != GR_REGS)
4827	return MEMORY_MOVE_COST (mode, to, 0);
4828      break;
4829
4830    case BR_REGS:
4831      /* Moving between BR and anything but GR is impossible.  */
4832      if (from != GR_REGS && from != GR_AND_BR_REGS)
4833	return MEMORY_MOVE_COST (mode, to, 0);
4834      break;
4835
4836    case AR_I_REGS:
4837    case AR_M_REGS:
4838      /* Moving between AR and anything but GR is impossible.  */
4839      if (from != GR_REGS)
4840	return MEMORY_MOVE_COST (mode, to, 0);
4841      break;
4842
4843    case GR_REGS:
4844    case FR_REGS:
4845    case FP_REGS:
4846    case GR_AND_FR_REGS:
4847    case GR_AND_BR_REGS:
4848    case ALL_REGS:
4849      break;
4850
4851    default:
4852      gcc_unreachable ();
4853    }
4854
4855  return 2;
4856}
4857
4858/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
4859   to use when copying X into that class.  */
4860
4861enum reg_class
4862ia64_preferred_reload_class (rtx x, enum reg_class class)
4863{
4864  switch (class)
4865    {
4866    case FR_REGS:
4867    case FP_REGS:
4868      /* Don't allow volatile mem reloads into floating point registers.
4869	 This is defined to force reload to choose the r/m case instead
4870	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
4871      if (MEM_P (x) && MEM_VOLATILE_P (x))
4872	return NO_REGS;
4873
4874      /* Force all unrecognized constants into the constant pool.  */
4875      if (CONSTANT_P (x))
4876	return NO_REGS;
4877      break;
4878
4879    case AR_M_REGS:
4880    case AR_I_REGS:
4881      if (!OBJECT_P (x))
4882	return NO_REGS;
4883      break;
4884
4885    default:
4886      break;
4887    }
4888
4889  return class;
4890}
4891
4892/* This function returns the register class required for a secondary
4893   register when copying between one of the registers in CLASS, and X,
4894   using MODE.  A return value of NO_REGS means that no secondary register
4895   is required.  */
4896
4897enum reg_class
4898ia64_secondary_reload_class (enum reg_class class,
4899			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4900{
4901  int regno = -1;
4902
4903  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4904    regno = true_regnum (x);
4905
4906  switch (class)
4907    {
4908    case BR_REGS:
4909    case AR_M_REGS:
4910    case AR_I_REGS:
4911      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4912	 interaction.  We end up with two pseudos with overlapping lifetimes
4913	 both of which are equiv to the same constant, and both which need
4914	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4915	 changes depending on the path length, which means the qty_first_reg
4916	 check in make_regs_eqv can give different answers at different times.
4917	 At some point I'll probably need a reload_indi pattern to handle
4918	 this.
4919
4920	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4921	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4922	 non-general registers for good measure.  */
4923      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4924	return GR_REGS;
4925
4926      /* This is needed if a pseudo used as a call_operand gets spilled to a
4927	 stack slot.  */
4928      if (GET_CODE (x) == MEM)
4929	return GR_REGS;
4930      break;
4931
4932    case FR_REGS:
4933    case FP_REGS:
4934      /* Need to go through general registers to get to other class regs.  */
4935      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4936	return GR_REGS;
4937
4938      /* This can happen when a paradoxical subreg is an operand to the
4939	 muldi3 pattern.  */
4940      /* ??? This shouldn't be necessary after instruction scheduling is
4941	 enabled, because paradoxical subregs are not accepted by
4942	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4943	 stop the paradoxical subreg stupidity in the *_operand functions
4944	 in recog.c.  */
4945      if (GET_CODE (x) == MEM
4946	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4947	      || GET_MODE (x) == QImode))
4948	return GR_REGS;
4949
4950      /* This can happen because of the ior/and/etc patterns that accept FP
4951	 registers as operands.  If the third operand is a constant, then it
4952	 needs to be reloaded into a FP register.  */
4953      if (GET_CODE (x) == CONST_INT)
4954	return GR_REGS;
4955
4956      /* This can happen because of register elimination in a muldi3 insn.
4957	 E.g. `26107 * (unsigned long)&u'.  */
4958      if (GET_CODE (x) == PLUS)
4959	return GR_REGS;
4960      break;
4961
4962    case PR_REGS:
4963      /* ??? This happens if we cse/gcse a BImode value across a call,
4964	 and the function has a nonlocal goto.  This is because global
4965	 does not allocate call crossing pseudos to hard registers when
4966	 current_function_has_nonlocal_goto is true.  This is relatively
4967	 common for C++ programs that use exceptions.  To reproduce,
4968	 return NO_REGS and compile libstdc++.  */
4969      if (GET_CODE (x) == MEM)
4970	return GR_REGS;
4971
4972      /* This can happen when we take a BImode subreg of a DImode value,
4973	 and that DImode value winds up in some non-GR register.  */
4974      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
4975	return GR_REGS;
4976      break;
4977
4978    default:
4979      break;
4980    }
4981
4982  return NO_REGS;
4983}
4984
4985
4986/* Emit text to declare externally defined variables and functions, because
4987   the Intel assembler does not support undefined externals.  */
4988
4989void
4990ia64_asm_output_external (FILE *file, tree decl, const char *name)
4991{
4992  int save_referenced;
4993
4994  /* GNU as does not need anything here, but the HP linker does need
4995     something for external functions.  */
4996
4997  if (TARGET_GNU_AS
4998      && (!TARGET_HPUX_LD
4999	  || TREE_CODE (decl) != FUNCTION_DECL
5000	  || strstr (name, "__builtin_") == name))
5001    return;
5002
5003  /* ??? The Intel assembler creates a reference that needs to be satisfied by
5004     the linker when we do this, so we need to be careful not to do this for
5005     builtin functions which have no library equivalent.  Unfortunately, we
5006     can't tell here whether or not a function will actually be called by
5007     expand_expr, so we pull in library functions even if we may not need
5008     them later.  */
5009  if (! strcmp (name, "__builtin_next_arg")
5010      || ! strcmp (name, "alloca")
5011      || ! strcmp (name, "__builtin_constant_p")
5012      || ! strcmp (name, "__builtin_args_info"))
5013    return;
5014
5015  if (TARGET_HPUX_LD)
5016    ia64_hpux_add_extern_decl (decl);
5017  else
5018    {
5019      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
5020         restore it.  */
5021      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
5022      if (TREE_CODE (decl) == FUNCTION_DECL)
5023        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
5024      (*targetm.asm_out.globalize_label) (file, name);
5025      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
5026    }
5027}
5028
5029/* Parse the -mfixed-range= option string.  */
5030
5031static void
5032fix_range (const char *const_str)
5033{
5034  int i, first, last;
5035  char *str, *dash, *comma;
5036
5037  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5038     REG2 are either register names or register numbers.  The effect
5039     of this option is to mark the registers in the range from REG1 to
5040     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5041     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5042
5043  i = strlen (const_str);
5044  str = (char *) alloca (i + 1);
5045  memcpy (str, const_str, i + 1);
5046
5047  while (1)
5048    {
5049      dash = strchr (str, '-');
5050      if (!dash)
5051	{
5052	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5053	  return;
5054	}
5055      *dash = '\0';
5056
5057      comma = strchr (dash + 1, ',');
5058      if (comma)
5059	*comma = '\0';
5060
5061      first = decode_reg_name (str);
5062      if (first < 0)
5063	{
5064	  warning (0, "unknown register name: %s", str);
5065	  return;
5066	}
5067
5068      last = decode_reg_name (dash + 1);
5069      if (last < 0)
5070	{
5071	  warning (0, "unknown register name: %s", dash + 1);
5072	  return;
5073	}
5074
5075      *dash = '-';
5076
5077      if (first > last)
5078	{
5079	  warning (0, "%s-%s is an empty range", str, dash + 1);
5080	  return;
5081	}
5082
5083      for (i = first; i <= last; ++i)
5084	fixed_regs[i] = call_used_regs[i] = 1;
5085
5086      if (!comma)
5087	break;
5088
5089      *comma = ',';
5090      str = comma + 1;
5091    }
5092}
5093
5094/* Implement TARGET_HANDLE_OPTION.  */
5095
5096static bool
5097ia64_handle_option (size_t code, const char *arg, int value)
5098{
5099  switch (code)
5100    {
5101    case OPT_mfixed_range_:
5102      fix_range (arg);
5103      return true;
5104
5105    case OPT_mtls_size_:
5106      if (value != 14 && value != 22 && value != 64)
5107	error ("bad value %<%s%> for -mtls-size= switch", arg);
5108      return true;
5109
5110    case OPT_mtune_:
5111      {
5112	static struct pta
5113	  {
5114	    const char *name;		/* processor name or nickname.  */
5115	    enum processor_type processor;
5116	  }
5117	const processor_alias_table[] =
5118	  {
5119	    {"itanium", PROCESSOR_ITANIUM},
5120	    {"itanium1", PROCESSOR_ITANIUM},
5121	    {"merced", PROCESSOR_ITANIUM},
5122	    {"itanium2", PROCESSOR_ITANIUM2},
5123	    {"mckinley", PROCESSOR_ITANIUM2},
5124	  };
5125	int const pta_size = ARRAY_SIZE (processor_alias_table);
5126	int i;
5127
5128	for (i = 0; i < pta_size; i++)
5129	  if (!strcmp (arg, processor_alias_table[i].name))
5130	    {
5131	      ia64_tune = processor_alias_table[i].processor;
5132	      break;
5133	    }
5134	if (i == pta_size)
5135	  error ("bad value %<%s%> for -mtune= switch", arg);
5136	return true;
5137      }
5138
5139    default:
5140      return true;
5141    }
5142}
5143
5144/* Implement OVERRIDE_OPTIONS.  */
5145
5146void
5147ia64_override_options (void)
5148{
5149  if (TARGET_AUTO_PIC)
5150    target_flags |= MASK_CONST_GP;
5151
5152  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5153    {
5154      warning (0, "not yet implemented: latency-optimized inline square root");
5155      TARGET_INLINE_SQRT = INL_MAX_THR;
5156    }
5157
5158  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5159  flag_schedule_insns_after_reload = 0;
5160
5161  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5162
5163  init_machine_status = ia64_init_machine_status;
5164}
5165
5166static struct machine_function *
5167ia64_init_machine_status (void)
5168{
5169  return ggc_alloc_cleared (sizeof (struct machine_function));
5170}
5171
5172static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5173static enum attr_type ia64_safe_type (rtx);
5174
5175static enum attr_itanium_class
5176ia64_safe_itanium_class (rtx insn)
5177{
5178  if (recog_memoized (insn) >= 0)
5179    return get_attr_itanium_class (insn);
5180  else
5181    return ITANIUM_CLASS_UNKNOWN;
5182}
5183
5184static enum attr_type
5185ia64_safe_type (rtx insn)
5186{
5187  if (recog_memoized (insn) >= 0)
5188    return get_attr_type (insn);
5189  else
5190    return TYPE_UNKNOWN;
5191}
5192
5193/* The following collection of routines emit instruction group stop bits as
5194   necessary to avoid dependencies.  */
5195
5196/* Need to track some additional registers as far as serialization is
5197   concerned so we can properly handle br.call and br.ret.  We could
5198   make these registers visible to gcc, but since these registers are
5199   never explicitly used in gcc generated code, it seems wasteful to
5200   do so (plus it would make the call and return patterns needlessly
5201   complex).  */
5202#define REG_RP		(BR_REG (0))
5203#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
5204/* This is used for volatile asms which may require a stop bit immediately
5205   before and after them.  */
5206#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
5207#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
5208#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
5209
5210/* For each register, we keep track of how it has been written in the
5211   current instruction group.
5212
5213   If a register is written unconditionally (no qualifying predicate),
5214   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5215
5216   If a register is written if its qualifying predicate P is true, we
5217   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5218   may be written again by the complement of P (P^1) and when this happens,
5219   WRITE_COUNT gets set to 2.
5220
5221   The result of this is that whenever an insn attempts to write a register
5222   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5223
5224   If a predicate register is written by a floating-point insn, we set
5225   WRITTEN_BY_FP to true.
5226
5227   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5228   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5229
5230struct reg_write_state
5231{
5232  unsigned int write_count : 2;
5233  unsigned int first_pred : 16;
5234  unsigned int written_by_fp : 1;
5235  unsigned int written_by_and : 1;
5236  unsigned int written_by_or : 1;
5237};
5238
5239/* Cumulative info for the current instruction group.  */
5240struct reg_write_state rws_sum[NUM_REGS];
5241/* Info for the current instruction.  This gets copied to rws_sum after a
5242   stop bit is emitted.  */
5243struct reg_write_state rws_insn[NUM_REGS];
5244
5245/* Indicates whether this is the first instruction after a stop bit,
5246   in which case we don't need another stop bit.  Without this,
5247   ia64_variable_issue will die when scheduling an alloc.  */
5248static int first_instruction;
5249
5250/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5251   RTL for one instruction.  */
5252struct reg_flags
5253{
5254  unsigned int is_write : 1;	/* Is register being written?  */
5255  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
5256  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
5257  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
5258  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
5259  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
5260};
5261
5262static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5263static int rws_access_regno (int, struct reg_flags, int);
5264static int rws_access_reg (rtx, struct reg_flags, int);
5265static void update_set_flags (rtx, struct reg_flags *);
5266static int set_src_needs_barrier (rtx, struct reg_flags, int);
5267static int rtx_needs_barrier (rtx, struct reg_flags, int);
5268static void init_insn_group_barriers (void);
5269static int group_barrier_needed (rtx);
5270static int safe_group_barrier_needed (rtx);
5271
5272/* Update *RWS for REGNO, which is being written by the current instruction,
5273   with predicate PRED, and associated register flags in FLAGS.  */
5274
5275static void
5276rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5277{
5278  if (pred)
5279    rws[regno].write_count++;
5280  else
5281    rws[regno].write_count = 2;
5282  rws[regno].written_by_fp |= flags.is_fp;
5283  /* ??? Not tracking and/or across differing predicates.  */
5284  rws[regno].written_by_and = flags.is_and;
5285  rws[regno].written_by_or = flags.is_or;
5286  rws[regno].first_pred = pred;
5287}
5288
5289/* Handle an access to register REGNO of type FLAGS using predicate register
5290   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
5291   a dependency with an earlier instruction in the same group.  */
5292
5293static int
5294rws_access_regno (int regno, struct reg_flags flags, int pred)
5295{
5296  int need_barrier = 0;
5297
5298  gcc_assert (regno < NUM_REGS);
5299
5300  if (! PR_REGNO_P (regno))
5301    flags.is_and = flags.is_or = 0;
5302
5303  if (flags.is_write)
5304    {
5305      int write_count;
5306
5307      /* One insn writes same reg multiple times?  */
5308      gcc_assert (!rws_insn[regno].write_count);
5309
5310      /* Update info for current instruction.  */
5311      rws_update (rws_insn, regno, flags, pred);
5312      write_count = rws_sum[regno].write_count;
5313
5314      switch (write_count)
5315	{
5316	case 0:
5317	  /* The register has not been written yet.  */
5318	  rws_update (rws_sum, regno, flags, pred);
5319	  break;
5320
5321	case 1:
5322	  /* The register has been written via a predicate.  If this is
5323	     not a complementary predicate, then we need a barrier.  */
5324	  /* ??? This assumes that P and P+1 are always complementary
5325	     predicates for P even.  */
5326	  if (flags.is_and && rws_sum[regno].written_by_and)
5327	    ;
5328	  else if (flags.is_or && rws_sum[regno].written_by_or)
5329	    ;
5330	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
5331	    need_barrier = 1;
5332	  rws_update (rws_sum, regno, flags, pred);
5333	  break;
5334
5335	case 2:
5336	  /* The register has been unconditionally written already.  We
5337	     need a barrier.  */
5338	  if (flags.is_and && rws_sum[regno].written_by_and)
5339	    ;
5340	  else if (flags.is_or && rws_sum[regno].written_by_or)
5341	    ;
5342	  else
5343	    need_barrier = 1;
5344	  rws_sum[regno].written_by_and = flags.is_and;
5345	  rws_sum[regno].written_by_or = flags.is_or;
5346	  break;
5347
5348	default:
5349	  gcc_unreachable ();
5350	}
5351    }
5352  else
5353    {
5354      if (flags.is_branch)
5355	{
5356	  /* Branches have several RAW exceptions that allow to avoid
5357	     barriers.  */
5358
5359	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5360	    /* RAW dependencies on branch regs are permissible as long
5361	       as the writer is a non-branch instruction.  Since we
5362	       never generate code that uses a branch register written
5363	       by a branch instruction, handling this case is
5364	       easy.  */
5365	    return 0;
5366
5367	  if (REGNO_REG_CLASS (regno) == PR_REGS
5368	      && ! rws_sum[regno].written_by_fp)
5369	    /* The predicates of a branch are available within the
5370	       same insn group as long as the predicate was written by
5371	       something other than a floating-point instruction.  */
5372	    return 0;
5373	}
5374
5375      if (flags.is_and && rws_sum[regno].written_by_and)
5376	return 0;
5377      if (flags.is_or && rws_sum[regno].written_by_or)
5378	return 0;
5379
5380      switch (rws_sum[regno].write_count)
5381	{
5382	case 0:
5383	  /* The register has not been written yet.  */
5384	  break;
5385
5386	case 1:
5387	  /* The register has been written via a predicate.  If this is
5388	     not a complementary predicate, then we need a barrier.  */
5389	  /* ??? This assumes that P and P+1 are always complementary
5390	     predicates for P even.  */
5391	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5392	    need_barrier = 1;
5393	  break;
5394
5395	case 2:
5396	  /* The register has been unconditionally written already.  We
5397	     need a barrier.  */
5398	  need_barrier = 1;
5399	  break;
5400
5401	default:
5402	  gcc_unreachable ();
5403	}
5404    }
5405
5406  return need_barrier;
5407}
5408
5409static int
5410rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5411{
5412  int regno = REGNO (reg);
5413  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5414
5415  if (n == 1)
5416    return rws_access_regno (regno, flags, pred);
5417  else
5418    {
5419      int need_barrier = 0;
5420      while (--n >= 0)
5421	need_barrier |= rws_access_regno (regno + n, flags, pred);
5422      return need_barrier;
5423    }
5424}
5425
5426/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5427   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5428
5429static void
5430update_set_flags (rtx x, struct reg_flags *pflags)
5431{
5432  rtx src = SET_SRC (x);
5433
5434  switch (GET_CODE (src))
5435    {
5436    case CALL:
5437      return;
5438
5439    case IF_THEN_ELSE:
5440      /* There are three cases here:
5441	 (1) The destination is (pc), in which case this is a branch,
5442	 nothing here applies.
5443	 (2) The destination is ar.lc, in which case this is a
5444	 doloop_end_internal,
5445	 (3) The destination is an fp register, in which case this is
5446	 an fselect instruction.
5447	 In all cases, nothing we do in this function applies.  */
5448      return;
5449
5450    default:
5451      if (COMPARISON_P (src)
5452	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5453	/* Set pflags->is_fp to 1 so that we know we're dealing
5454	   with a floating point comparison when processing the
5455	   destination of the SET.  */
5456	pflags->is_fp = 1;
5457
5458      /* Discover if this is a parallel comparison.  We only handle
5459	 and.orcm and or.andcm at present, since we must retain a
5460	 strict inverse on the predicate pair.  */
5461      else if (GET_CODE (src) == AND)
5462	pflags->is_and = 1;
5463      else if (GET_CODE (src) == IOR)
5464	pflags->is_or = 1;
5465
5466      break;
5467    }
5468}
5469
5470/* Subroutine of rtx_needs_barrier; this function determines whether the
5471   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5472   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5473   for this insn.  */
5474
5475static int
5476set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5477{
5478  int need_barrier = 0;
5479  rtx dst;
5480  rtx src = SET_SRC (x);
5481
5482  if (GET_CODE (src) == CALL)
5483    /* We don't need to worry about the result registers that
5484       get written by subroutine call.  */
5485    return rtx_needs_barrier (src, flags, pred);
5486  else if (SET_DEST (x) == pc_rtx)
5487    {
5488      /* X is a conditional branch.  */
5489      /* ??? This seems redundant, as the caller sets this bit for
5490	 all JUMP_INSNs.  */
5491      flags.is_branch = 1;
5492      return rtx_needs_barrier (src, flags, pred);
5493    }
5494
5495  need_barrier = rtx_needs_barrier (src, flags, pred);
5496
5497  dst = SET_DEST (x);
5498  if (GET_CODE (dst) == ZERO_EXTRACT)
5499    {
5500      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5501      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5502      dst = XEXP (dst, 0);
5503    }
5504  return need_barrier;
5505}
5506
5507/* Handle an access to rtx X of type FLAGS using predicate register
5508   PRED.  Return 1 if this access creates a dependency with an earlier
5509   instruction in the same group.  */
5510
5511static int
5512rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5513{
5514  int i, j;
5515  int is_complemented = 0;
5516  int need_barrier = 0;
5517  const char *format_ptr;
5518  struct reg_flags new_flags;
5519  rtx cond;
5520
5521  if (! x)
5522    return 0;
5523
5524  new_flags = flags;
5525
5526  switch (GET_CODE (x))
5527    {
5528    case SET:
5529      update_set_flags (x, &new_flags);
5530      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5531      if (GET_CODE (SET_SRC (x)) != CALL)
5532	{
5533	  new_flags.is_write = 1;
5534	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5535	}
5536      break;
5537
5538    case CALL:
5539      new_flags.is_write = 0;
5540      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5541
5542      /* Avoid multiple register writes, in case this is a pattern with
5543	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5544      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5545	{
5546	  new_flags.is_write = 1;
5547	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5548	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5549	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5550	}
5551      break;
5552
5553    case COND_EXEC:
5554      /* X is a predicated instruction.  */
5555
5556      cond = COND_EXEC_TEST (x);
5557      gcc_assert (!pred);
5558      need_barrier = rtx_needs_barrier (cond, flags, 0);
5559
5560      if (GET_CODE (cond) == EQ)
5561	is_complemented = 1;
5562      cond = XEXP (cond, 0);
5563      gcc_assert (GET_CODE (cond) == REG
5564		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5565      pred = REGNO (cond);
5566      if (is_complemented)
5567	++pred;
5568
5569      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5570      return need_barrier;
5571
5572    case CLOBBER:
5573    case USE:
5574      /* Clobber & use are for earlier compiler-phases only.  */
5575      break;
5576
5577    case ASM_OPERANDS:
5578    case ASM_INPUT:
5579      /* We always emit stop bits for traditional asms.  We emit stop bits
5580	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5581      if (GET_CODE (x) != ASM_OPERANDS
5582	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5583	{
5584	  /* Avoid writing the register multiple times if we have multiple
5585	     asm outputs.  This avoids a failure in rws_access_reg.  */
5586	  if (! rws_insn[REG_VOLATILE].write_count)
5587	    {
5588	      new_flags.is_write = 1;
5589	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5590	    }
5591	  return 1;
5592	}
5593
5594      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5595	 We cannot just fall through here since then we would be confused
5596	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5597	 traditional asms unlike their normal usage.  */
5598
5599      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5600	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5601	  need_barrier = 1;
5602      break;
5603
5604    case PARALLEL:
5605      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5606	{
5607	  rtx pat = XVECEXP (x, 0, i);
5608	  switch (GET_CODE (pat))
5609	    {
5610	    case SET:
5611	      update_set_flags (pat, &new_flags);
5612	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5613	      break;
5614
5615	    case USE:
5616	    case CALL:
5617	    case ASM_OPERANDS:
5618	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
5619	      break;
5620
5621	    case CLOBBER:
5622	    case RETURN:
5623	      break;
5624
5625	    default:
5626	      gcc_unreachable ();
5627	    }
5628	}
5629      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5630	{
5631	  rtx pat = XVECEXP (x, 0, i);
5632	  if (GET_CODE (pat) == SET)
5633	    {
5634	      if (GET_CODE (SET_SRC (pat)) != CALL)
5635		{
5636		  new_flags.is_write = 1;
5637		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5638						     pred);
5639		}
5640	    }
5641	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5642	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5643	}
5644      break;
5645
5646    case SUBREG:
5647      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5648      break;
5649    case REG:
5650      if (REGNO (x) == AR_UNAT_REGNUM)
5651	{
5652	  for (i = 0; i < 64; ++i)
5653	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5654	}
5655      else
5656	need_barrier = rws_access_reg (x, flags, pred);
5657      break;
5658
5659    case MEM:
5660      /* Find the regs used in memory address computation.  */
5661      new_flags.is_write = 0;
5662      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5663      break;
5664
5665    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
5666    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5667      break;
5668
5669      /* Operators with side-effects.  */
5670    case POST_INC:    case POST_DEC:
5671      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5672
5673      new_flags.is_write = 0;
5674      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5675      new_flags.is_write = 1;
5676      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5677      break;
5678
5679    case POST_MODIFY:
5680      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5681
5682      new_flags.is_write = 0;
5683      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5684      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5685      new_flags.is_write = 1;
5686      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5687      break;
5688
5689      /* Handle common unary and binary ops for efficiency.  */
5690    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5691    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5692    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5693    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5694    case NE:       case EQ:      case GE:      case GT:        case LE:
5695    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5696      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5697      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5698      break;
5699
5700    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5701    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5702    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5703    case SQRT:     case FFS:		case POPCOUNT:
5704      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5705      break;
5706
5707    case VEC_SELECT:
5708      /* VEC_SELECT's second argument is a PARALLEL with integers that
5709	 describe the elements selected.  On ia64, those integers are
5710	 always constants.  Avoid walking the PARALLEL so that we don't
5711	 get confused with "normal" parallels and then die.  */
5712      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5713      break;
5714
5715    case UNSPEC:
5716      switch (XINT (x, 1))
5717	{
5718	case UNSPEC_LTOFF_DTPMOD:
5719	case UNSPEC_LTOFF_DTPREL:
5720	case UNSPEC_DTPREL:
5721	case UNSPEC_LTOFF_TPREL:
5722	case UNSPEC_TPREL:
5723	case UNSPEC_PRED_REL_MUTEX:
5724	case UNSPEC_PIC_CALL:
5725        case UNSPEC_MF:
5726        case UNSPEC_FETCHADD_ACQ:
5727	case UNSPEC_BSP_VALUE:
5728	case UNSPEC_FLUSHRS:
5729	case UNSPEC_BUNDLE_SELECTOR:
5730          break;
5731
5732	case UNSPEC_GR_SPILL:
5733	case UNSPEC_GR_RESTORE:
5734	  {
5735	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5736	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5737
5738	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5739	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5740	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5741					      new_flags, pred);
5742	    break;
5743	  }
5744
5745	case UNSPEC_FR_SPILL:
5746	case UNSPEC_FR_RESTORE:
5747	case UNSPEC_GETF_EXP:
5748	case UNSPEC_SETF_EXP:
5749        case UNSPEC_ADDP4:
5750	case UNSPEC_FR_SQRT_RECIP_APPROX:
5751	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5752	  break;
5753
5754	case UNSPEC_FR_RECIP_APPROX:
5755	case UNSPEC_SHRP:
5756	case UNSPEC_COPYSIGN:
5757	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5758	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5759	  break;
5760
5761        case UNSPEC_CMPXCHG_ACQ:
5762	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5763	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5764	  break;
5765
5766	default:
5767	  gcc_unreachable ();
5768	}
5769      break;
5770
5771    case UNSPEC_VOLATILE:
5772      switch (XINT (x, 1))
5773	{
5774	case UNSPECV_ALLOC:
5775	  /* Alloc must always be the first instruction of a group.
5776	     We force this by always returning true.  */
5777	  /* ??? We might get better scheduling if we explicitly check for
5778	     input/local/output register dependencies, and modify the
5779	     scheduler so that alloc is always reordered to the start of
5780	     the current group.  We could then eliminate all of the
5781	     first_instruction code.  */
5782	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5783
5784	  new_flags.is_write = 1;
5785	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5786	  return 1;
5787
5788	case UNSPECV_SET_BSP:
5789	  need_barrier = 1;
5790          break;
5791
5792	case UNSPECV_BLOCKAGE:
5793	case UNSPECV_INSN_GROUP_BARRIER:
5794	case UNSPECV_BREAK:
5795	case UNSPECV_PSAC_ALL:
5796	case UNSPECV_PSAC_NORMAL:
5797	  return 0;
5798
5799	default:
5800	  gcc_unreachable ();
5801	}
5802      break;
5803
5804    case RETURN:
5805      new_flags.is_write = 0;
5806      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5807      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5808
5809      new_flags.is_write = 1;
5810      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5811      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5812      break;
5813
5814    default:
5815      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5816      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5817	switch (format_ptr[i])
5818	  {
5819	  case '0':	/* unused field */
5820	  case 'i':	/* integer */
5821	  case 'n':	/* note */
5822	  case 'w':	/* wide integer */
5823	  case 's':	/* pointer to string */
5824	  case 'S':	/* optional pointer to string */
5825	    break;
5826
5827	  case 'e':
5828	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5829	      need_barrier = 1;
5830	    break;
5831
5832	  case 'E':
5833	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5834	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5835		need_barrier = 1;
5836	    break;
5837
5838	  default:
5839	    gcc_unreachable ();
5840	  }
5841      break;
5842    }
5843  return need_barrier;
5844}
5845
5846/* Clear out the state for group_barrier_needed at the start of a
5847   sequence of insns.  */
5848
5849static void
5850init_insn_group_barriers (void)
5851{
5852  memset (rws_sum, 0, sizeof (rws_sum));
5853  first_instruction = 1;
5854}
5855
5856/* Given the current state, determine whether a group barrier (a stop bit) is
5857   necessary before INSN.  Return nonzero if so.  This modifies the state to
5858   include the effects of INSN as a side-effect.  */
5859
5860static int
5861group_barrier_needed (rtx insn)
5862{
5863  rtx pat;
5864  int need_barrier = 0;
5865  struct reg_flags flags;
5866
5867  memset (&flags, 0, sizeof (flags));
5868  switch (GET_CODE (insn))
5869    {
5870    case NOTE:
5871      break;
5872
5873    case BARRIER:
5874      /* A barrier doesn't imply an instruction group boundary.  */
5875      break;
5876
5877    case CODE_LABEL:
5878      memset (rws_insn, 0, sizeof (rws_insn));
5879      return 1;
5880
5881    case CALL_INSN:
5882      flags.is_branch = 1;
5883      flags.is_sibcall = SIBLING_CALL_P (insn);
5884      memset (rws_insn, 0, sizeof (rws_insn));
5885
5886      /* Don't bundle a call following another call.  */
5887      if ((pat = prev_active_insn (insn))
5888	  && GET_CODE (pat) == CALL_INSN)
5889	{
5890	  need_barrier = 1;
5891	  break;
5892	}
5893
5894      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5895      break;
5896
5897    case JUMP_INSN:
5898      flags.is_branch = 1;
5899
5900      /* Don't bundle a jump following a call.  */
5901      if ((pat = prev_active_insn (insn))
5902	  && GET_CODE (pat) == CALL_INSN)
5903	{
5904	  need_barrier = 1;
5905	  break;
5906	}
5907      /* FALLTHRU */
5908
5909    case INSN:
5910      if (GET_CODE (PATTERN (insn)) == USE
5911	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5912	/* Don't care about USE and CLOBBER "insns"---those are used to
5913	   indicate to the optimizer that it shouldn't get rid of
5914	   certain operations.  */
5915	break;
5916
5917      pat = PATTERN (insn);
5918
5919      /* Ug.  Hack hacks hacked elsewhere.  */
5920      switch (recog_memoized (insn))
5921	{
5922	  /* We play dependency tricks with the epilogue in order
5923	     to get proper schedules.  Undo this for dv analysis.  */
5924	case CODE_FOR_epilogue_deallocate_stack:
5925	case CODE_FOR_prologue_allocate_stack:
5926	  pat = XVECEXP (pat, 0, 0);
5927	  break;
5928
5929	  /* The pattern we use for br.cloop confuses the code above.
5930	     The second element of the vector is representative.  */
5931	case CODE_FOR_doloop_end_internal:
5932	  pat = XVECEXP (pat, 0, 1);
5933	  break;
5934
5935	  /* Doesn't generate code.  */
5936	case CODE_FOR_pred_rel_mutex:
5937	case CODE_FOR_prologue_use:
5938	  return 0;
5939
5940	default:
5941	  break;
5942	}
5943
5944      memset (rws_insn, 0, sizeof (rws_insn));
5945      need_barrier = rtx_needs_barrier (pat, flags, 0);
5946
5947      /* Check to see if the previous instruction was a volatile
5948	 asm.  */
5949      if (! need_barrier)
5950	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5951      break;
5952
5953    default:
5954      gcc_unreachable ();
5955    }
5956
5957  if (first_instruction && INSN_P (insn)
5958      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5959      && GET_CODE (PATTERN (insn)) != USE
5960      && GET_CODE (PATTERN (insn)) != CLOBBER)
5961    {
5962      need_barrier = 0;
5963      first_instruction = 0;
5964    }
5965
5966  return need_barrier;
5967}
5968
5969/* Like group_barrier_needed, but do not clobber the current state.  */
5970
5971static int
5972safe_group_barrier_needed (rtx insn)
5973{
5974  struct reg_write_state rws_saved[NUM_REGS];
5975  int saved_first_instruction;
5976  int t;
5977
5978  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5979  saved_first_instruction = first_instruction;
5980
5981  t = group_barrier_needed (insn);
5982
5983  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5984  first_instruction = saved_first_instruction;
5985
5986  return t;
5987}
5988
5989/* Scan the current function and insert stop bits as necessary to
5990   eliminate dependencies.  This function assumes that a final
5991   instruction scheduling pass has been run which has already
5992   inserted most of the necessary stop bits.  This function only
5993   inserts new ones at basic block boundaries, since these are
5994   invisible to the scheduler.  */
5995
5996static void
5997emit_insn_group_barriers (FILE *dump)
5998{
5999  rtx insn;
6000  rtx last_label = 0;
6001  int insns_since_last_label = 0;
6002
6003  init_insn_group_barriers ();
6004
6005  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6006    {
6007      if (GET_CODE (insn) == CODE_LABEL)
6008	{
6009	  if (insns_since_last_label)
6010	    last_label = insn;
6011	  insns_since_last_label = 0;
6012	}
6013      else if (GET_CODE (insn) == NOTE
6014	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6015	{
6016	  if (insns_since_last_label)
6017	    last_label = insn;
6018	  insns_since_last_label = 0;
6019	}
6020      else if (GET_CODE (insn) == INSN
6021	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6022	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6023	{
6024	  init_insn_group_barriers ();
6025	  last_label = 0;
6026	}
6027      else if (INSN_P (insn))
6028	{
6029	  insns_since_last_label = 1;
6030
6031	  if (group_barrier_needed (insn))
6032	    {
6033	      if (last_label)
6034		{
6035		  if (dump)
6036		    fprintf (dump, "Emitting stop before label %d\n",
6037			     INSN_UID (last_label));
6038		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6039		  insn = last_label;
6040
6041		  init_insn_group_barriers ();
6042		  last_label = 0;
6043		}
6044	    }
6045	}
6046    }
6047}
6048
6049/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6050   This function has to emit all necessary group barriers.  */
6051
6052static void
6053emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6054{
6055  rtx insn;
6056
6057  init_insn_group_barriers ();
6058
6059  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6060    {
6061      if (GET_CODE (insn) == BARRIER)
6062	{
6063	  rtx last = prev_active_insn (insn);
6064
6065	  if (! last)
6066	    continue;
6067	  if (GET_CODE (last) == JUMP_INSN
6068	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6069	    last = prev_active_insn (last);
6070	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6071	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6072
6073	  init_insn_group_barriers ();
6074	}
6075      else if (INSN_P (insn))
6076	{
6077	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6078	    init_insn_group_barriers ();
6079	  else if (group_barrier_needed (insn))
6080	    {
6081	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6082	      init_insn_group_barriers ();
6083	      group_barrier_needed (insn);
6084	    }
6085	}
6086    }
6087}
6088
6089
6090
6091/* Instruction scheduling support.  */
6092
6093#define NR_BUNDLES 10
6094
6095/* A list of names of all available bundles.  */
6096
6097static const char *bundle_name [NR_BUNDLES] =
6098{
6099  ".mii",
6100  ".mmi",
6101  ".mfi",
6102  ".mmf",
6103#if NR_BUNDLES == 10
6104  ".bbb",
6105  ".mbb",
6106#endif
6107  ".mib",
6108  ".mmb",
6109  ".mfb",
6110  ".mlx"
6111};
6112
6113/* Nonzero if we should insert stop bits into the schedule.  */
6114
6115int ia64_final_schedule = 0;
6116
6117/* Codes of the corresponding queried units: */
6118
6119static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6120static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6121
6122static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6123static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6124
6125static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6126
6127/* The following variable value is an insn group barrier.  */
6128
6129static rtx dfa_stop_insn;
6130
6131/* The following variable value is the last issued insn.  */
6132
6133static rtx last_scheduled_insn;
6134
6135/* The following variable value is size of the DFA state.  */
6136
6137static size_t dfa_state_size;
6138
6139/* The following variable value is pointer to a DFA state used as
6140   temporary variable.  */
6141
6142static state_t temp_dfa_state = NULL;
6143
6144/* The following variable value is DFA state after issuing the last
6145   insn.  */
6146
6147static state_t prev_cycle_state = NULL;
6148
6149/* The following array element values are TRUE if the corresponding
6150   insn requires to add stop bits before it.  */
6151
6152static char *stops_p;
6153
6154/* The following variable is used to set up the mentioned above array.  */
6155
6156static int stop_before_p = 0;
6157
6158/* The following variable value is length of the arrays `clocks' and
6159   `add_cycles'. */
6160
6161static int clocks_length;
6162
6163/* The following array element values are cycles on which the
6164   corresponding insn will be issued.  The array is used only for
6165   Itanium1.  */
6166
6167static int *clocks;
6168
6169/* The following array element values are numbers of cycles should be
6170   added to improve insn scheduling for MM_insns for Itanium1.  */
6171
6172static int *add_cycles;
6173
6174static rtx ia64_single_set (rtx);
6175static void ia64_emit_insn_before (rtx, rtx);
6176
6177/* Map a bundle number to its pseudo-op.  */
6178
6179const char *
6180get_bundle_name (int b)
6181{
6182  return bundle_name[b];
6183}
6184
6185
6186/* Return the maximum number of instructions a cpu can issue.  */
6187
6188static int
6189ia64_issue_rate (void)
6190{
6191  return 6;
6192}
6193
6194/* Helper function - like single_set, but look inside COND_EXEC.  */
6195
6196static rtx
6197ia64_single_set (rtx insn)
6198{
6199  rtx x = PATTERN (insn), ret;
6200  if (GET_CODE (x) == COND_EXEC)
6201    x = COND_EXEC_CODE (x);
6202  if (GET_CODE (x) == SET)
6203    return x;
6204
6205  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6206     Although they are not classical single set, the second set is there just
6207     to protect it from moving past FP-relative stack accesses.  */
6208  switch (recog_memoized (insn))
6209    {
6210    case CODE_FOR_prologue_allocate_stack:
6211    case CODE_FOR_epilogue_deallocate_stack:
6212      ret = XVECEXP (x, 0, 0);
6213      break;
6214
6215    default:
6216      ret = single_set_2 (insn, x);
6217      break;
6218    }
6219
6220  return ret;
6221}
6222
6223/* Adjust the cost of a scheduling dependency.  Return the new cost of
6224   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
6225
6226static int
6227ia64_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
6228{
6229  enum attr_itanium_class dep_class;
6230  enum attr_itanium_class insn_class;
6231
6232  if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT)
6233    return cost;
6234
6235  insn_class = ia64_safe_itanium_class (insn);
6236  dep_class = ia64_safe_itanium_class (dep_insn);
6237  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6238      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6239    return 0;
6240
6241  return cost;
6242}
6243
6244/* Like emit_insn_before, but skip cycle_display notes.
6245   ??? When cycle display notes are implemented, update this.  */
6246
6247static void
6248ia64_emit_insn_before (rtx insn, rtx before)
6249{
6250  emit_insn_before (insn, before);
6251}
6252
6253/* The following function marks insns who produce addresses for load
6254   and store insns.  Such insns will be placed into M slots because it
6255   decrease latency time for Itanium1 (see function
6256   `ia64_produce_address_p' and the DFA descriptions).  */
6257
6258static void
6259ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6260{
6261  rtx insn, link, next, next_tail;
6262
6263  /* Before reload, which_alternative is not set, which means that
6264     ia64_safe_itanium_class will produce wrong results for (at least)
6265     move instructions.  */
6266  if (!reload_completed)
6267    return;
6268
6269  next_tail = NEXT_INSN (tail);
6270  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6271    if (INSN_P (insn))
6272      insn->call = 0;
6273  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6274    if (INSN_P (insn)
6275	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6276      {
6277	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6278	  {
6279	    enum attr_itanium_class c;
6280
6281	    if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6282	      continue;
6283	    next = XEXP (link, 0);
6284	    c = ia64_safe_itanium_class (next);
6285	    if ((c == ITANIUM_CLASS_ST
6286		 || c == ITANIUM_CLASS_STF)
6287		&& ia64_st_address_bypass_p (insn, next))
6288	      break;
6289	    else if ((c == ITANIUM_CLASS_LD
6290		      || c == ITANIUM_CLASS_FLD
6291		      || c == ITANIUM_CLASS_FLDP)
6292		     && ia64_ld_address_bypass_p (insn, next))
6293	      break;
6294	  }
6295	insn->call = link != 0;
6296      }
6297}
6298
6299/* We're beginning a new block.  Initialize data structures as necessary.  */
6300
6301static void
6302ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6303		 int sched_verbose ATTRIBUTE_UNUSED,
6304		 int max_ready ATTRIBUTE_UNUSED)
6305{
6306#ifdef ENABLE_CHECKING
6307  rtx insn;
6308
6309  if (reload_completed)
6310    for (insn = NEXT_INSN (current_sched_info->prev_head);
6311	 insn != current_sched_info->next_tail;
6312	 insn = NEXT_INSN (insn))
6313      gcc_assert (!SCHED_GROUP_P (insn));
6314#endif
6315  last_scheduled_insn = NULL_RTX;
6316  init_insn_group_barriers ();
6317}
6318
6319/* We are about to being issuing insns for this clock cycle.
6320   Override the default sort algorithm to better slot instructions.  */
6321
6322static int
6323ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6324			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6325			int reorder_type)
6326{
6327  int n_asms;
6328  int n_ready = *pn_ready;
6329  rtx *e_ready = ready + n_ready;
6330  rtx *insnp;
6331
6332  if (sched_verbose)
6333    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6334
6335  if (reorder_type == 0)
6336    {
6337      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6338      n_asms = 0;
6339      for (insnp = ready; insnp < e_ready; insnp++)
6340	if (insnp < e_ready)
6341	  {
6342	    rtx insn = *insnp;
6343	    enum attr_type t = ia64_safe_type (insn);
6344	    if (t == TYPE_UNKNOWN)
6345	      {
6346		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6347		    || asm_noperands (PATTERN (insn)) >= 0)
6348		  {
6349		    rtx lowest = ready[n_asms];
6350		    ready[n_asms] = insn;
6351		    *insnp = lowest;
6352		    n_asms++;
6353		  }
6354		else
6355		  {
6356		    rtx highest = ready[n_ready - 1];
6357		    ready[n_ready - 1] = insn;
6358		    *insnp = highest;
6359		    return 1;
6360		  }
6361	      }
6362	  }
6363
6364      if (n_asms < n_ready)
6365	{
6366	  /* Some normal insns to process.  Skip the asms.  */
6367	  ready += n_asms;
6368	  n_ready -= n_asms;
6369	}
6370      else if (n_ready > 0)
6371	return 1;
6372    }
6373
6374  if (ia64_final_schedule)
6375    {
6376      int deleted = 0;
6377      int nr_need_stop = 0;
6378
6379      for (insnp = ready; insnp < e_ready; insnp++)
6380	if (safe_group_barrier_needed (*insnp))
6381	  nr_need_stop++;
6382
6383      if (reorder_type == 1 && n_ready == nr_need_stop)
6384	return 0;
6385      if (reorder_type == 0)
6386	return 1;
6387      insnp = e_ready;
6388      /* Move down everything that needs a stop bit, preserving
6389	 relative order.  */
6390      while (insnp-- > ready + deleted)
6391	while (insnp >= ready + deleted)
6392	  {
6393	    rtx insn = *insnp;
6394	    if (! safe_group_barrier_needed (insn))
6395	      break;
6396	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6397	    *ready = insn;
6398	    deleted++;
6399	  }
6400      n_ready -= deleted;
6401      ready += deleted;
6402    }
6403
6404  return 1;
6405}
6406
6407/* We are about to being issuing insns for this clock cycle.  Override
6408   the default sort algorithm to better slot instructions.  */
6409
6410static int
6411ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6412		    int clock_var)
6413{
6414  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6415				 pn_ready, clock_var, 0);
6416}
6417
6418/* Like ia64_sched_reorder, but called after issuing each insn.
6419   Override the default sort algorithm to better slot instructions.  */
6420
6421static int
6422ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6423		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6424		     int *pn_ready, int clock_var)
6425{
6426  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6427    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6428  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6429				 clock_var, 1);
6430}
6431
6432/* We are about to issue INSN.  Return the number of insns left on the
6433   ready queue that can be issued this cycle.  */
6434
6435static int
6436ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6437		     int sched_verbose ATTRIBUTE_UNUSED,
6438		     rtx insn ATTRIBUTE_UNUSED,
6439		     int can_issue_more ATTRIBUTE_UNUSED)
6440{
6441  last_scheduled_insn = insn;
6442  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6443  if (reload_completed)
6444    {
6445      int needed = group_barrier_needed (insn);
6446
6447      gcc_assert (!needed);
6448      if (GET_CODE (insn) == CALL_INSN)
6449	init_insn_group_barriers ();
6450      stops_p [INSN_UID (insn)] = stop_before_p;
6451      stop_before_p = 0;
6452    }
6453  return 1;
6454}
6455
6456/* We are choosing insn from the ready queue.  Return nonzero if INSN
6457   can be chosen.  */
6458
6459static int
6460ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6461{
6462  gcc_assert (insn  && INSN_P (insn));
6463  return (!reload_completed
6464	  || !safe_group_barrier_needed (insn));
6465}
6466
6467/* The following variable value is pseudo-insn used by the DFA insn
6468   scheduler to change the DFA state when the simulated clock is
6469   increased.  */
6470
6471static rtx dfa_pre_cycle_insn;
6472
6473/* We are about to being issuing INSN.  Return nonzero if we cannot
6474   issue it on given cycle CLOCK and return zero if we should not sort
6475   the ready queue on the next clock start.  */
6476
6477static int
6478ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6479		    int clock, int *sort_p)
6480{
6481  int setup_clocks_p = FALSE;
6482
6483  gcc_assert (insn && INSN_P (insn));
6484  if ((reload_completed && safe_group_barrier_needed (insn))
6485      || (last_scheduled_insn
6486	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6487	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6488	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6489    {
6490      init_insn_group_barriers ();
6491      if (verbose && dump)
6492	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6493		 last_clock == clock ? " + cycle advance" : "");
6494      stop_before_p = 1;
6495      if (last_clock == clock)
6496	{
6497	  state_transition (curr_state, dfa_stop_insn);
6498	  if (TARGET_EARLY_STOP_BITS)
6499	    *sort_p = (last_scheduled_insn == NULL_RTX
6500		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6501	  else
6502	    *sort_p = 0;
6503	  return 1;
6504	}
6505      else if (reload_completed)
6506	setup_clocks_p = TRUE;
6507      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6508	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6509	state_reset (curr_state);
6510      else
6511	{
6512	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6513	  state_transition (curr_state, dfa_stop_insn);
6514	  state_transition (curr_state, dfa_pre_cycle_insn);
6515	  state_transition (curr_state, NULL);
6516	}
6517    }
6518  else if (reload_completed)
6519    setup_clocks_p = TRUE;
6520  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6521      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6522      && asm_noperands (PATTERN (insn)) < 0)
6523    {
6524      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6525
6526      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6527	{
6528	  rtx link;
6529	  int d = -1;
6530
6531	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6532	    if (REG_NOTE_KIND (link) == 0)
6533	      {
6534		enum attr_itanium_class dep_class;
6535		rtx dep_insn = XEXP (link, 0);
6536
6537		dep_class = ia64_safe_itanium_class (dep_insn);
6538		if ((dep_class == ITANIUM_CLASS_MMMUL
6539		     || dep_class == ITANIUM_CLASS_MMSHF)
6540		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6541		    && (d < 0
6542			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6543		  d = last_clock - clocks [INSN_UID (dep_insn)];
6544	      }
6545	  if (d >= 0)
6546	    add_cycles [INSN_UID (insn)] = 3 - d;
6547	}
6548    }
6549  return 0;
6550}
6551
6552
6553
6554/* The following page contains abstract data `bundle states' which are
6555   used for bundling insns (inserting nops and template generation).  */
6556
6557/* The following describes state of insn bundling.  */
6558
6559struct bundle_state
6560{
6561  /* Unique bundle state number to identify them in the debugging
6562     output  */
6563  int unique_num;
6564  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
6565  /* number nops before and after the insn  */
6566  short before_nops_num, after_nops_num;
6567  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
6568                   insn */
6569  int cost;     /* cost of the state in cycles */
6570  int accumulated_insns_num; /* number of all previous insns including
6571				nops.  L is considered as 2 insns */
6572  int branch_deviation; /* deviation of previous branches from 3rd slots  */
6573  struct bundle_state *next;  /* next state with the same insn_num  */
6574  struct bundle_state *originator; /* originator (previous insn state)  */
6575  /* All bundle states are in the following chain.  */
6576  struct bundle_state *allocated_states_chain;
6577  /* The DFA State after issuing the insn and the nops.  */
6578  state_t dfa_state;
6579};
6580
6581/* The following is map insn number to the corresponding bundle state.  */
6582
6583static struct bundle_state **index_to_bundle_states;
6584
6585/* The unique number of next bundle state.  */
6586
6587static int bundle_states_num;
6588
6589/* All allocated bundle states are in the following chain.  */
6590
6591static struct bundle_state *allocated_bundle_states_chain;
6592
6593/* All allocated but not used bundle states are in the following
6594   chain.  */
6595
6596static struct bundle_state *free_bundle_state_chain;
6597
6598
6599/* The following function returns a free bundle state.  */
6600
6601static struct bundle_state *
6602get_free_bundle_state (void)
6603{
6604  struct bundle_state *result;
6605
6606  if (free_bundle_state_chain != NULL)
6607    {
6608      result = free_bundle_state_chain;
6609      free_bundle_state_chain = result->next;
6610    }
6611  else
6612    {
6613      result = xmalloc (sizeof (struct bundle_state));
6614      result->dfa_state = xmalloc (dfa_state_size);
6615      result->allocated_states_chain = allocated_bundle_states_chain;
6616      allocated_bundle_states_chain = result;
6617    }
6618  result->unique_num = bundle_states_num++;
6619  return result;
6620
6621}
6622
6623/* The following function frees given bundle state.  */
6624
6625static void
6626free_bundle_state (struct bundle_state *state)
6627{
6628  state->next = free_bundle_state_chain;
6629  free_bundle_state_chain = state;
6630}
6631
6632/* Start work with abstract data `bundle states'.  */
6633
6634static void
6635initiate_bundle_states (void)
6636{
6637  bundle_states_num = 0;
6638  free_bundle_state_chain = NULL;
6639  allocated_bundle_states_chain = NULL;
6640}
6641
6642/* Finish work with abstract data `bundle states'.  */
6643
6644static void
6645finish_bundle_states (void)
6646{
6647  struct bundle_state *curr_state, *next_state;
6648
6649  for (curr_state = allocated_bundle_states_chain;
6650       curr_state != NULL;
6651       curr_state = next_state)
6652    {
6653      next_state = curr_state->allocated_states_chain;
6654      free (curr_state->dfa_state);
6655      free (curr_state);
6656    }
6657}
6658
6659/* Hash table of the bundle states.  The key is dfa_state and insn_num
6660   of the bundle states.  */
6661
6662static htab_t bundle_state_table;
6663
6664/* The function returns hash of BUNDLE_STATE.  */
6665
6666static unsigned
6667bundle_state_hash (const void *bundle_state)
6668{
6669  const struct bundle_state *state = (struct bundle_state *) bundle_state;
6670  unsigned result, i;
6671
6672  for (result = i = 0; i < dfa_state_size; i++)
6673    result += (((unsigned char *) state->dfa_state) [i]
6674	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
6675  return result + state->insn_num;
6676}
6677
6678/* The function returns nonzero if the bundle state keys are equal.  */
6679
6680static int
6681bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
6682{
6683  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
6684  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
6685
6686  return (state1->insn_num == state2->insn_num
6687	  && memcmp (state1->dfa_state, state2->dfa_state,
6688		     dfa_state_size) == 0);
6689}
6690
6691/* The function inserts the BUNDLE_STATE into the hash table.  The
6692   function returns nonzero if the bundle has been inserted into the
6693   table.  The table contains the best bundle state with given key.  */
6694
6695static int
6696insert_bundle_state (struct bundle_state *bundle_state)
6697{
6698  void **entry_ptr;
6699
6700  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
6701  if (*entry_ptr == NULL)
6702    {
6703      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
6704      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
6705      *entry_ptr = (void *) bundle_state;
6706      return TRUE;
6707    }
6708  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
6709	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
6710	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
6711		   > bundle_state->accumulated_insns_num
6712		   || (((struct bundle_state *)
6713			*entry_ptr)->accumulated_insns_num
6714		       == bundle_state->accumulated_insns_num
6715		       && ((struct bundle_state *)
6716			   *entry_ptr)->branch_deviation
6717		       > bundle_state->branch_deviation))))
6718
6719    {
6720      struct bundle_state temp;
6721
6722      temp = *(struct bundle_state *) *entry_ptr;
6723      *(struct bundle_state *) *entry_ptr = *bundle_state;
6724      ((struct bundle_state *) *entry_ptr)->next = temp.next;
6725      *bundle_state = temp;
6726    }
6727  return FALSE;
6728}
6729
6730/* Start work with the hash table.  */
6731
6732static void
6733initiate_bundle_state_table (void)
6734{
6735  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
6736				    (htab_del) 0);
6737}
6738
6739/* Finish work with the hash table.  */
6740
6741static void
6742finish_bundle_state_table (void)
6743{
6744  htab_delete (bundle_state_table);
6745}
6746
6747
6748
6749/* The following variable is a insn `nop' used to check bundle states
6750   with different number of inserted nops.  */
6751
6752static rtx ia64_nop;
6753
6754/* The following function tries to issue NOPS_NUM nops for the current
6755   state without advancing processor cycle.  If it failed, the
6756   function returns FALSE and frees the current state.  */
6757
6758static int
6759try_issue_nops (struct bundle_state *curr_state, int nops_num)
6760{
6761  int i;
6762
6763  for (i = 0; i < nops_num; i++)
6764    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
6765      {
6766	free_bundle_state (curr_state);
6767	return FALSE;
6768      }
6769  return TRUE;
6770}
6771
6772/* The following function tries to issue INSN for the current
6773   state without advancing processor cycle.  If it failed, the
6774   function returns FALSE and frees the current state.  */
6775
6776static int
6777try_issue_insn (struct bundle_state *curr_state, rtx insn)
6778{
6779  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
6780    {
6781      free_bundle_state (curr_state);
6782      return FALSE;
6783    }
6784  return TRUE;
6785}
6786
6787/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
6788   starting with ORIGINATOR without advancing processor cycle.  If
6789   TRY_BUNDLE_END_P is TRUE, the function also/only (if
6790   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
6791   If it was successful, the function creates new bundle state and
6792   insert into the hash table and into `index_to_bundle_states'.  */
6793
6794static void
6795issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
6796		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
6797{
6798  struct bundle_state *curr_state;
6799
6800  curr_state = get_free_bundle_state ();
6801  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
6802  curr_state->insn = insn;
6803  curr_state->insn_num = originator->insn_num + 1;
6804  curr_state->cost = originator->cost;
6805  curr_state->originator = originator;
6806  curr_state->before_nops_num = before_nops_num;
6807  curr_state->after_nops_num = 0;
6808  curr_state->accumulated_insns_num
6809    = originator->accumulated_insns_num + before_nops_num;
6810  curr_state->branch_deviation = originator->branch_deviation;
6811  gcc_assert (insn);
6812  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
6813    {
6814      gcc_assert (GET_MODE (insn) != TImode);
6815      if (!try_issue_nops (curr_state, before_nops_num))
6816	return;
6817      if (!try_issue_insn (curr_state, insn))
6818	return;
6819      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
6820      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
6821	  && curr_state->accumulated_insns_num % 3 != 0)
6822	{
6823	  free_bundle_state (curr_state);
6824	  return;
6825	}
6826    }
6827  else if (GET_MODE (insn) != TImode)
6828    {
6829      if (!try_issue_nops (curr_state, before_nops_num))
6830	return;
6831      if (!try_issue_insn (curr_state, insn))
6832	return;
6833      curr_state->accumulated_insns_num++;
6834      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
6835		  && asm_noperands (PATTERN (insn)) < 0);
6836
6837      if (ia64_safe_type (insn) == TYPE_L)
6838	curr_state->accumulated_insns_num++;
6839    }
6840  else
6841    {
6842      /* If this is an insn that must be first in a group, then don't allow
6843	 nops to be emitted before it.  Currently, alloc is the only such
6844	 supported instruction.  */
6845      /* ??? The bundling automatons should handle this for us, but they do
6846	 not yet have support for the first_insn attribute.  */
6847      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
6848	{
6849	  free_bundle_state (curr_state);
6850	  return;
6851	}
6852
6853      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
6854      state_transition (curr_state->dfa_state, NULL);
6855      curr_state->cost++;
6856      if (!try_issue_nops (curr_state, before_nops_num))
6857	return;
6858      if (!try_issue_insn (curr_state, insn))
6859	return;
6860      curr_state->accumulated_insns_num++;
6861      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6862	  || asm_noperands (PATTERN (insn)) >= 0)
6863	{
6864	  /* Finish bundle containing asm insn.  */
6865	  curr_state->after_nops_num
6866	    = 3 - curr_state->accumulated_insns_num % 3;
6867	  curr_state->accumulated_insns_num
6868	    += 3 - curr_state->accumulated_insns_num % 3;
6869	}
6870      else if (ia64_safe_type (insn) == TYPE_L)
6871	curr_state->accumulated_insns_num++;
6872    }
6873  if (ia64_safe_type (insn) == TYPE_B)
6874    curr_state->branch_deviation
6875      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
6876  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
6877    {
6878      if (!only_bundle_end_p && insert_bundle_state (curr_state))
6879	{
6880	  state_t dfa_state;
6881	  struct bundle_state *curr_state1;
6882	  struct bundle_state *allocated_states_chain;
6883
6884	  curr_state1 = get_free_bundle_state ();
6885	  dfa_state = curr_state1->dfa_state;
6886	  allocated_states_chain = curr_state1->allocated_states_chain;
6887	  *curr_state1 = *curr_state;
6888	  curr_state1->dfa_state = dfa_state;
6889	  curr_state1->allocated_states_chain = allocated_states_chain;
6890	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
6891		  dfa_state_size);
6892	  curr_state = curr_state1;
6893	}
6894      if (!try_issue_nops (curr_state,
6895			   3 - curr_state->accumulated_insns_num % 3))
6896	return;
6897      curr_state->after_nops_num
6898	= 3 - curr_state->accumulated_insns_num % 3;
6899      curr_state->accumulated_insns_num
6900	+= 3 - curr_state->accumulated_insns_num % 3;
6901    }
6902  if (!insert_bundle_state (curr_state))
6903    free_bundle_state (curr_state);
6904  return;
6905}
6906
6907/* The following function returns position in the two window bundle
6908   for given STATE.  */
6909
6910static int
6911get_max_pos (state_t state)
6912{
6913  if (cpu_unit_reservation_p (state, pos_6))
6914    return 6;
6915  else if (cpu_unit_reservation_p (state, pos_5))
6916    return 5;
6917  else if (cpu_unit_reservation_p (state, pos_4))
6918    return 4;
6919  else if (cpu_unit_reservation_p (state, pos_3))
6920    return 3;
6921  else if (cpu_unit_reservation_p (state, pos_2))
6922    return 2;
6923  else if (cpu_unit_reservation_p (state, pos_1))
6924    return 1;
6925  else
6926    return 0;
6927}
6928
6929/* The function returns code of a possible template for given position
6930   and state.  The function should be called only with 2 values of
6931   position equal to 3 or 6.  We avoid generating F NOPs by putting
6932   templates containing F insns at the end of the template search
6933   because undocumented anomaly in McKinley derived cores which can
6934   cause stalls if an F-unit insn (including a NOP) is issued within a
6935   six-cycle window after reading certain application registers (such
6936   as ar.bsp).  Furthermore, power-considerations also argue against
6937   the use of F-unit instructions unless they're really needed.  */
6938
6939static int
6940get_template (state_t state, int pos)
6941{
6942  switch (pos)
6943    {
6944    case 3:
6945      if (cpu_unit_reservation_p (state, _0mmi_))
6946	return 1;
6947      else if (cpu_unit_reservation_p (state, _0mii_))
6948	return 0;
6949      else if (cpu_unit_reservation_p (state, _0mmb_))
6950	return 7;
6951      else if (cpu_unit_reservation_p (state, _0mib_))
6952	return 6;
6953      else if (cpu_unit_reservation_p (state, _0mbb_))
6954	return 5;
6955      else if (cpu_unit_reservation_p (state, _0bbb_))
6956	return 4;
6957      else if (cpu_unit_reservation_p (state, _0mmf_))
6958	return 3;
6959      else if (cpu_unit_reservation_p (state, _0mfi_))
6960	return 2;
6961      else if (cpu_unit_reservation_p (state, _0mfb_))
6962	return 8;
6963      else if (cpu_unit_reservation_p (state, _0mlx_))
6964	return 9;
6965      else
6966	gcc_unreachable ();
6967    case 6:
6968      if (cpu_unit_reservation_p (state, _1mmi_))
6969	return 1;
6970      else if (cpu_unit_reservation_p (state, _1mii_))
6971	return 0;
6972      else if (cpu_unit_reservation_p (state, _1mmb_))
6973	return 7;
6974      else if (cpu_unit_reservation_p (state, _1mib_))
6975	return 6;
6976      else if (cpu_unit_reservation_p (state, _1mbb_))
6977	return 5;
6978      else if (cpu_unit_reservation_p (state, _1bbb_))
6979	return 4;
6980      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
6981	return 3;
6982      else if (cpu_unit_reservation_p (state, _1mfi_))
6983	return 2;
6984      else if (cpu_unit_reservation_p (state, _1mfb_))
6985	return 8;
6986      else if (cpu_unit_reservation_p (state, _1mlx_))
6987	return 9;
6988      else
6989	gcc_unreachable ();
6990    default:
6991      gcc_unreachable ();
6992    }
6993}
6994
6995/* The following function returns an insn important for insn bundling
6996   followed by INSN and before TAIL.  */
6997
6998static rtx
6999get_next_important_insn (rtx insn, rtx tail)
7000{
7001  for (; insn && insn != tail; insn = NEXT_INSN (insn))
7002    if (INSN_P (insn)
7003	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7004	&& GET_CODE (PATTERN (insn)) != USE
7005	&& GET_CODE (PATTERN (insn)) != CLOBBER)
7006      return insn;
7007  return NULL_RTX;
7008}
7009
7010/* Add a bundle selector TEMPLATE0 before INSN.  */
7011
7012static void
7013ia64_add_bundle_selector_before (int template0, rtx insn)
7014{
7015  rtx b = gen_bundle_selector (GEN_INT (template0));
7016
7017  ia64_emit_insn_before (b, insn);
7018#if NR_BUNDLES == 10
7019  if ((template0 == 4 || template0 == 5)
7020      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7021    {
7022      int i;
7023      rtx note = NULL_RTX;
7024
7025      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7026	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
7027	 to following nops, as br.call sets rp to the address of following
7028	 bundle and therefore an EH region end must be on a bundle
7029	 boundary.  */
7030      insn = PREV_INSN (insn);
7031      for (i = 0; i < 3; i++)
7032	{
7033	  do
7034	    insn = next_active_insn (insn);
7035	  while (GET_CODE (insn) == INSN
7036		 && get_attr_empty (insn) == EMPTY_YES);
7037	  if (GET_CODE (insn) == CALL_INSN)
7038	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7039	  else if (note)
7040	    {
7041	      int code;
7042
7043	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7044			  || code == CODE_FOR_nop_b);
7045	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7046		note = NULL_RTX;
7047	      else
7048		REG_NOTES (insn)
7049		  = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7050				       REG_NOTES (insn));
7051	    }
7052	}
7053    }
7054#endif
7055}
7056
7057/* The following function does insn bundling.  Bundling means
7058   inserting templates and nop insns to fit insn groups into permitted
7059   templates.  Instruction scheduling uses NDFA (non-deterministic
7060   finite automata) encoding informations about the templates and the
7061   inserted nops.  Nondeterminism of the automata permits follows
7062   all possible insn sequences very fast.
7063
7064   Unfortunately it is not possible to get information about inserting
7065   nop insns and used templates from the automata states.  The
7066   automata only says that we can issue an insn possibly inserting
7067   some nops before it and using some template.  Therefore insn
7068   bundling in this function is implemented by using DFA
7069   (deterministic finite automata).  We follows all possible insn
7070   sequences by inserting 0-2 nops (that is what the NDFA describe for
7071   insn scheduling) before/after each insn being bundled.  We know the
7072   start of simulated processor cycle from insn scheduling (insn
7073   starting a new cycle has TImode).
7074
7075   Simple implementation of insn bundling would create enormous
7076   number of possible insn sequences satisfying information about new
7077   cycle ticks taken from the insn scheduling.  To make the algorithm
7078   practical we use dynamic programming.  Each decision (about
7079   inserting nops and implicitly about previous decisions) is described
7080   by structure bundle_state (see above).  If we generate the same
7081   bundle state (key is automaton state after issuing the insns and
7082   nops for it), we reuse already generated one.  As consequence we
7083   reject some decisions which cannot improve the solution and
7084   reduce memory for the algorithm.
7085
7086   When we reach the end of EBB (extended basic block), we choose the
7087   best sequence and then, moving back in EBB, insert templates for
7088   the best alternative.  The templates are taken from querying
7089   automaton state for each insn in chosen bundle states.
7090
7091   So the algorithm makes two (forward and backward) passes through
7092   EBB.  There is an additional forward pass through EBB for Itanium1
7093   processor.  This pass inserts more nops to make dependency between
7094   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
7095
7096static void
7097bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7098{
7099  struct bundle_state *curr_state, *next_state, *best_state;
7100  rtx insn, next_insn;
7101  int insn_num;
7102  int i, bundle_end_p, only_bundle_end_p, asm_p;
7103  int pos = 0, max_pos, template0, template1;
7104  rtx b;
7105  rtx nop;
7106  enum attr_type type;
7107
7108  insn_num = 0;
7109  /* Count insns in the EBB.  */
7110  for (insn = NEXT_INSN (prev_head_insn);
7111       insn && insn != tail;
7112       insn = NEXT_INSN (insn))
7113    if (INSN_P (insn))
7114      insn_num++;
7115  if (insn_num == 0)
7116    return;
7117  bundling_p = 1;
7118  dfa_clean_insn_cache ();
7119  initiate_bundle_state_table ();
7120  index_to_bundle_states = xmalloc ((insn_num + 2)
7121				    * sizeof (struct bundle_state *));
7122  /* First (forward) pass -- generation of bundle states.  */
7123  curr_state = get_free_bundle_state ();
7124  curr_state->insn = NULL;
7125  curr_state->before_nops_num = 0;
7126  curr_state->after_nops_num = 0;
7127  curr_state->insn_num = 0;
7128  curr_state->cost = 0;
7129  curr_state->accumulated_insns_num = 0;
7130  curr_state->branch_deviation = 0;
7131  curr_state->next = NULL;
7132  curr_state->originator = NULL;
7133  state_reset (curr_state->dfa_state);
7134  index_to_bundle_states [0] = curr_state;
7135  insn_num = 0;
7136  /* Shift cycle mark if it is put on insn which could be ignored.  */
7137  for (insn = NEXT_INSN (prev_head_insn);
7138       insn != tail;
7139       insn = NEXT_INSN (insn))
7140    if (INSN_P (insn)
7141	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7142	    || GET_CODE (PATTERN (insn)) == USE
7143	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7144	&& GET_MODE (insn) == TImode)
7145      {
7146	PUT_MODE (insn, VOIDmode);
7147	for (next_insn = NEXT_INSN (insn);
7148	     next_insn != tail;
7149	     next_insn = NEXT_INSN (next_insn))
7150	  if (INSN_P (next_insn)
7151	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7152	      && GET_CODE (PATTERN (next_insn)) != USE
7153	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7154	    {
7155	      PUT_MODE (next_insn, TImode);
7156	      break;
7157	    }
7158      }
7159  /* Froward pass: generation of bundle states.  */
7160  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7161       insn != NULL_RTX;
7162       insn = next_insn)
7163    {
7164      gcc_assert (INSN_P (insn)
7165		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7166		  && GET_CODE (PATTERN (insn)) != USE
7167		  && GET_CODE (PATTERN (insn)) != CLOBBER);
7168      type = ia64_safe_type (insn);
7169      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7170      insn_num++;
7171      index_to_bundle_states [insn_num] = NULL;
7172      for (curr_state = index_to_bundle_states [insn_num - 1];
7173	   curr_state != NULL;
7174	   curr_state = next_state)
7175	{
7176	  pos = curr_state->accumulated_insns_num % 3;
7177	  next_state = curr_state->next;
7178	  /* We must fill up the current bundle in order to start a
7179	     subsequent asm insn in a new bundle.  Asm insn is always
7180	     placed in a separate bundle.  */
7181	  only_bundle_end_p
7182	    = (next_insn != NULL_RTX
7183	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7184	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7185	  /* We may fill up the current bundle if it is the cycle end
7186	     without a group barrier.  */
7187	  bundle_end_p
7188	    = (only_bundle_end_p || next_insn == NULL_RTX
7189	       || (GET_MODE (next_insn) == TImode
7190		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7191	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7192	      || type == TYPE_S
7193	      /* We need to insert 2 nops for cases like M_MII.  To
7194		 guarantee issuing all insns on the same cycle for
7195		 Itanium 1, we need to issue 2 nops after the first M
7196		 insn (MnnMII where n is a nop insn).  */
7197	      || ((type == TYPE_M || type == TYPE_A)
7198		  && ia64_tune == PROCESSOR_ITANIUM
7199		  && !bundle_end_p && pos == 1))
7200	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7201				 only_bundle_end_p);
7202	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7203			       only_bundle_end_p);
7204	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7205			       only_bundle_end_p);
7206	}
7207      gcc_assert (index_to_bundle_states [insn_num]);
7208      for (curr_state = index_to_bundle_states [insn_num];
7209	   curr_state != NULL;
7210	   curr_state = curr_state->next)
7211	if (verbose >= 2 && dump)
7212	  {
7213	    /* This structure is taken from generated code of the
7214	       pipeline hazard recognizer (see file insn-attrtab.c).
7215	       Please don't forget to change the structure if a new
7216	       automaton is added to .md file.  */
7217	    struct DFA_chip
7218	    {
7219	      unsigned short one_automaton_state;
7220	      unsigned short oneb_automaton_state;
7221	      unsigned short two_automaton_state;
7222	      unsigned short twob_automaton_state;
7223	    };
7224
7225	    fprintf
7226	      (dump,
7227	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7228	       curr_state->unique_num,
7229	       (curr_state->originator == NULL
7230		? -1 : curr_state->originator->unique_num),
7231	       curr_state->cost,
7232	       curr_state->before_nops_num, curr_state->after_nops_num,
7233	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
7234	       (ia64_tune == PROCESSOR_ITANIUM
7235		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7236		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7237	       INSN_UID (insn));
7238	  }
7239    }
7240
7241  /* We should find a solution because the 2nd insn scheduling has
7242     found one.  */
7243  gcc_assert (index_to_bundle_states [insn_num]);
7244  /* Find a state corresponding to the best insn sequence.  */
7245  best_state = NULL;
7246  for (curr_state = index_to_bundle_states [insn_num];
7247       curr_state != NULL;
7248       curr_state = curr_state->next)
7249    /* We are just looking at the states with fully filled up last
7250       bundle.  The first we prefer insn sequences with minimal cost
7251       then with minimal inserted nops and finally with branch insns
7252       placed in the 3rd slots.  */
7253    if (curr_state->accumulated_insns_num % 3 == 0
7254	&& (best_state == NULL || best_state->cost > curr_state->cost
7255	    || (best_state->cost == curr_state->cost
7256		&& (curr_state->accumulated_insns_num
7257		    < best_state->accumulated_insns_num
7258		    || (curr_state->accumulated_insns_num
7259			== best_state->accumulated_insns_num
7260			&& curr_state->branch_deviation
7261			< best_state->branch_deviation)))))
7262      best_state = curr_state;
7263  /* Second (backward) pass: adding nops and templates.  */
7264  insn_num = best_state->before_nops_num;
7265  template0 = template1 = -1;
7266  for (curr_state = best_state;
7267       curr_state->originator != NULL;
7268       curr_state = curr_state->originator)
7269    {
7270      insn = curr_state->insn;
7271      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7272	       || asm_noperands (PATTERN (insn)) >= 0);
7273      insn_num++;
7274      if (verbose >= 2 && dump)
7275	{
7276	  struct DFA_chip
7277	  {
7278	    unsigned short one_automaton_state;
7279	    unsigned short oneb_automaton_state;
7280	    unsigned short two_automaton_state;
7281	    unsigned short twob_automaton_state;
7282	  };
7283
7284	  fprintf
7285	    (dump,
7286	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7287	     curr_state->unique_num,
7288	     (curr_state->originator == NULL
7289	      ? -1 : curr_state->originator->unique_num),
7290	     curr_state->cost,
7291	     curr_state->before_nops_num, curr_state->after_nops_num,
7292	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
7293	     (ia64_tune == PROCESSOR_ITANIUM
7294	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7295	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7296	     INSN_UID (insn));
7297	}
7298      /* Find the position in the current bundle window.  The window can
7299	 contain at most two bundles.  Two bundle window means that
7300	 the processor will make two bundle rotation.  */
7301      max_pos = get_max_pos (curr_state->dfa_state);
7302      if (max_pos == 6
7303	  /* The following (negative template number) means that the
7304	     processor did one bundle rotation.  */
7305	  || (max_pos == 3 && template0 < 0))
7306	{
7307	  /* We are at the end of the window -- find template(s) for
7308	     its bundle(s).  */
7309	  pos = max_pos;
7310	  if (max_pos == 3)
7311	    template0 = get_template (curr_state->dfa_state, 3);
7312	  else
7313	    {
7314	      template1 = get_template (curr_state->dfa_state, 3);
7315	      template0 = get_template (curr_state->dfa_state, 6);
7316	    }
7317	}
7318      if (max_pos > 3 && template1 < 0)
7319	/* It may happen when we have the stop inside a bundle.  */
7320	{
7321	  gcc_assert (pos <= 3);
7322	  template1 = get_template (curr_state->dfa_state, 3);
7323	  pos += 3;
7324	}
7325      if (!asm_p)
7326	/* Emit nops after the current insn.  */
7327	for (i = 0; i < curr_state->after_nops_num; i++)
7328	  {
7329	    nop = gen_nop ();
7330	    emit_insn_after (nop, insn);
7331	    pos--;
7332	    gcc_assert (pos >= 0);
7333	    if (pos % 3 == 0)
7334	      {
7335		/* We are at the start of a bundle: emit the template
7336		   (it should be defined).  */
7337		gcc_assert (template0 >= 0);
7338		ia64_add_bundle_selector_before (template0, nop);
7339		/* If we have two bundle window, we make one bundle
7340		   rotation.  Otherwise template0 will be undefined
7341		   (negative value).  */
7342		template0 = template1;
7343		template1 = -1;
7344	      }
7345	  }
7346      /* Move the position backward in the window.  Group barrier has
7347	 no slot.  Asm insn takes all bundle.  */
7348      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7349	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7350	  && asm_noperands (PATTERN (insn)) < 0)
7351	pos--;
7352      /* Long insn takes 2 slots.  */
7353      if (ia64_safe_type (insn) == TYPE_L)
7354	pos--;
7355      gcc_assert (pos >= 0);
7356      if (pos % 3 == 0
7357	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7358	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7359	  && asm_noperands (PATTERN (insn)) < 0)
7360	{
7361	  /* The current insn is at the bundle start: emit the
7362	     template.  */
7363	  gcc_assert (template0 >= 0);
7364	  ia64_add_bundle_selector_before (template0, insn);
7365	  b = PREV_INSN (insn);
7366	  insn = b;
7367	  /* See comment above in analogous place for emitting nops
7368	     after the insn.  */
7369	  template0 = template1;
7370	  template1 = -1;
7371	}
7372      /* Emit nops after the current insn.  */
7373      for (i = 0; i < curr_state->before_nops_num; i++)
7374	{
7375	  nop = gen_nop ();
7376	  ia64_emit_insn_before (nop, insn);
7377	  nop = PREV_INSN (insn);
7378	  insn = nop;
7379	  pos--;
7380	  gcc_assert (pos >= 0);
7381	  if (pos % 3 == 0)
7382	    {
7383	      /* See comment above in analogous place for emitting nops
7384		 after the insn.  */
7385	      gcc_assert (template0 >= 0);
7386	      ia64_add_bundle_selector_before (template0, insn);
7387	      b = PREV_INSN (insn);
7388	      insn = b;
7389	      template0 = template1;
7390	      template1 = -1;
7391	    }
7392	}
7393    }
7394  if (ia64_tune == PROCESSOR_ITANIUM)
7395    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
7396       Itanium1 has a strange design, if the distance between an insn
7397       and dependent MM-insn is less 4 then we have a 6 additional
7398       cycles stall.  So we make the distance equal to 4 cycles if it
7399       is less.  */
7400    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7401	 insn != NULL_RTX;
7402	 insn = next_insn)
7403      {
7404	gcc_assert (INSN_P (insn)
7405		    && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7406		    && GET_CODE (PATTERN (insn)) != USE
7407		    && GET_CODE (PATTERN (insn)) != CLOBBER);
7408	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7409	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
7410	  /* We found a MM-insn which needs additional cycles.  */
7411	  {
7412	    rtx last;
7413	    int i, j, n;
7414	    int pred_stop_p;
7415
7416	    /* Now we are searching for a template of the bundle in
7417	       which the MM-insn is placed and the position of the
7418	       insn in the bundle (0, 1, 2).  Also we are searching
7419	       for that there is a stop before the insn.  */
7420	    last = prev_active_insn (insn);
7421	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
7422	    if (pred_stop_p)
7423	      last = prev_active_insn (last);
7424	    n = 0;
7425	    for (;; last = prev_active_insn (last))
7426	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
7427		{
7428		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
7429		  if (template0 == 9)
7430		    /* The insn is in MLX bundle.  Change the template
7431		       onto MFI because we will add nops before the
7432		       insn.  It simplifies subsequent code a lot.  */
7433		    PATTERN (last)
7434		      = gen_bundle_selector (const2_rtx); /* -> MFI */
7435		  break;
7436		}
7437	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
7438		       && (ia64_safe_itanium_class (last)
7439			   != ITANIUM_CLASS_IGNORE))
7440		n++;
7441	    /* Some check of correctness: the stop is not at the
7442	       bundle start, there are no more 3 insns in the bundle,
7443	       and the MM-insn is not at the start of bundle with
7444	       template MLX.  */
7445	    gcc_assert ((!pred_stop_p || n)
7446			&& n <= 2
7447			&& (template0 != 9 || !n));
7448	    /* Put nops after the insn in the bundle.  */
7449	    for (j = 3 - n; j > 0; j --)
7450	      ia64_emit_insn_before (gen_nop (), insn);
7451	    /* It takes into account that we will add more N nops
7452	       before the insn lately -- please see code below.  */
7453	    add_cycles [INSN_UID (insn)]--;
7454	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
7455	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7456				     insn);
7457	    if (pred_stop_p)
7458	      add_cycles [INSN_UID (insn)]--;
7459	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
7460	      {
7461		/* Insert "MII;" template.  */
7462		ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
7463				       insn);
7464		ia64_emit_insn_before (gen_nop (), insn);
7465		ia64_emit_insn_before (gen_nop (), insn);
7466		if (i > 1)
7467		  {
7468		    /* To decrease code size, we use "MI;I;"
7469		       template.  */
7470		    ia64_emit_insn_before
7471		      (gen_insn_group_barrier (GEN_INT (3)), insn);
7472		    i--;
7473		  }
7474		ia64_emit_insn_before (gen_nop (), insn);
7475		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7476				       insn);
7477	      }
7478	    /* Put the MM-insn in the same slot of a bundle with the
7479	       same template as the original one.  */
7480	    ia64_add_bundle_selector_before (template0, insn);
7481	    /* To put the insn in the same slot, add necessary number
7482	       of nops.  */
7483	    for (j = n; j > 0; j --)
7484	      ia64_emit_insn_before (gen_nop (), insn);
7485	    /* Put the stop if the original bundle had it.  */
7486	    if (pred_stop_p)
7487	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7488				     insn);
7489	  }
7490      }
7491  free (index_to_bundle_states);
7492  finish_bundle_state_table ();
7493  bundling_p = 0;
7494  dfa_clean_insn_cache ();
7495}
7496
7497/* The following function is called at the end of scheduling BB or
7498   EBB.  After reload, it inserts stop bits and does insn bundling.  */
7499
7500static void
7501ia64_sched_finish (FILE *dump, int sched_verbose)
7502{
7503  if (sched_verbose)
7504    fprintf (dump, "// Finishing schedule.\n");
7505  if (!reload_completed)
7506    return;
7507  if (reload_completed)
7508    {
7509      final_emit_insn_group_barriers (dump);
7510      bundling (dump, sched_verbose, current_sched_info->prev_head,
7511		current_sched_info->next_tail);
7512      if (sched_verbose && dump)
7513	fprintf (dump, "//    finishing %d-%d\n",
7514		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
7515		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
7516
7517      return;
7518    }
7519}
7520
7521/* The following function inserts stop bits in scheduled BB or EBB.  */
7522
7523static void
7524final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7525{
7526  rtx insn;
7527  int need_barrier_p = 0;
7528  rtx prev_insn = NULL_RTX;
7529
7530  init_insn_group_barriers ();
7531
7532  for (insn = NEXT_INSN (current_sched_info->prev_head);
7533       insn != current_sched_info->next_tail;
7534       insn = NEXT_INSN (insn))
7535    {
7536      if (GET_CODE (insn) == BARRIER)
7537	{
7538	  rtx last = prev_active_insn (insn);
7539
7540	  if (! last)
7541	    continue;
7542	  if (GET_CODE (last) == JUMP_INSN
7543	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
7544	    last = prev_active_insn (last);
7545	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7546	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7547
7548	  init_insn_group_barriers ();
7549	  need_barrier_p = 0;
7550	  prev_insn = NULL_RTX;
7551	}
7552      else if (INSN_P (insn))
7553	{
7554	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7555	    {
7556	      init_insn_group_barriers ();
7557	      need_barrier_p = 0;
7558	      prev_insn = NULL_RTX;
7559	    }
7560	  else if (need_barrier_p || group_barrier_needed (insn))
7561	    {
7562	      if (TARGET_EARLY_STOP_BITS)
7563		{
7564		  rtx last;
7565
7566		  for (last = insn;
7567		       last != current_sched_info->prev_head;
7568		       last = PREV_INSN (last))
7569		    if (INSN_P (last) && GET_MODE (last) == TImode
7570			&& stops_p [INSN_UID (last)])
7571		      break;
7572		  if (last == current_sched_info->prev_head)
7573		    last = insn;
7574		  last = prev_active_insn (last);
7575		  if (last
7576		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
7577		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
7578				     last);
7579		  init_insn_group_barriers ();
7580		  for (last = NEXT_INSN (last);
7581		       last != insn;
7582		       last = NEXT_INSN (last))
7583		    if (INSN_P (last))
7584		      group_barrier_needed (last);
7585		}
7586	      else
7587		{
7588		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
7589				    insn);
7590		  init_insn_group_barriers ();
7591		}
7592	      group_barrier_needed (insn);
7593	      prev_insn = NULL_RTX;
7594	    }
7595	  else if (recog_memoized (insn) >= 0)
7596	    prev_insn = insn;
7597	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
7598			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
7599			    || asm_noperands (PATTERN (insn)) >= 0);
7600	}
7601    }
7602}
7603
7604
7605
7606/* If the following function returns TRUE, we will use the DFA
7607   insn scheduler.  */
7608
7609static int
7610ia64_first_cycle_multipass_dfa_lookahead (void)
7611{
7612  return (reload_completed ? 6 : 4);
7613}
7614
7615/* The following function initiates variable `dfa_pre_cycle_insn'.  */
7616
7617static void
7618ia64_init_dfa_pre_cycle_insn (void)
7619{
7620  if (temp_dfa_state == NULL)
7621    {
7622      dfa_state_size = state_size ();
7623      temp_dfa_state = xmalloc (dfa_state_size);
7624      prev_cycle_state = xmalloc (dfa_state_size);
7625    }
7626  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
7627  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
7628  recog_memoized (dfa_pre_cycle_insn);
7629  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
7630  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
7631  recog_memoized (dfa_stop_insn);
7632}
7633
7634/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
7635   used by the DFA insn scheduler.  */
7636
7637static rtx
7638ia64_dfa_pre_cycle_insn (void)
7639{
7640  return dfa_pre_cycle_insn;
7641}
7642
7643/* The following function returns TRUE if PRODUCER (of type ilog or
7644   ld) produces address for CONSUMER (of type st or stf). */
7645
7646int
7647ia64_st_address_bypass_p (rtx producer, rtx consumer)
7648{
7649  rtx dest, reg, mem;
7650
7651  gcc_assert (producer && consumer);
7652  dest = ia64_single_set (producer);
7653  gcc_assert (dest);
7654  reg = SET_DEST (dest);
7655  gcc_assert (reg);
7656  if (GET_CODE (reg) == SUBREG)
7657    reg = SUBREG_REG (reg);
7658  gcc_assert (GET_CODE (reg) == REG);
7659
7660  dest = ia64_single_set (consumer);
7661  gcc_assert (dest);
7662  mem = SET_DEST (dest);
7663  gcc_assert (mem && GET_CODE (mem) == MEM);
7664  return reg_mentioned_p (reg, mem);
7665}
7666
7667/* The following function returns TRUE if PRODUCER (of type ilog or
7668   ld) produces address for CONSUMER (of type ld or fld). */
7669
7670int
7671ia64_ld_address_bypass_p (rtx producer, rtx consumer)
7672{
7673  rtx dest, src, reg, mem;
7674
7675  gcc_assert (producer && consumer);
7676  dest = ia64_single_set (producer);
7677  gcc_assert (dest);
7678  reg = SET_DEST (dest);
7679  gcc_assert (reg);
7680  if (GET_CODE (reg) == SUBREG)
7681    reg = SUBREG_REG (reg);
7682  gcc_assert (GET_CODE (reg) == REG);
7683
7684  src = ia64_single_set (consumer);
7685  gcc_assert (src);
7686  mem = SET_SRC (src);
7687  gcc_assert (mem);
7688  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
7689    mem = XVECEXP (mem, 0, 0);
7690  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
7691    mem = XEXP (mem, 0);
7692
7693  /* Note that LO_SUM is used for GOT loads.  */
7694  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
7695
7696  return reg_mentioned_p (reg, mem);
7697}
7698
7699/* The following function returns TRUE if INSN produces address for a
7700   load/store insn.  We will place such insns into M slot because it
7701   decreases its latency time.  */
7702
7703int
7704ia64_produce_address_p (rtx insn)
7705{
7706  return insn->call;
7707}
7708
7709
7710/* Emit pseudo-ops for the assembler to describe predicate relations.
7711   At present this assumes that we only consider predicate pairs to
7712   be mutex, and that the assembler can deduce proper values from
7713   straight-line code.  */
7714
7715static void
7716emit_predicate_relation_info (void)
7717{
7718  basic_block bb;
7719
7720  FOR_EACH_BB_REVERSE (bb)
7721    {
7722      int r;
7723      rtx head = BB_HEAD (bb);
7724
7725      /* We only need such notes at code labels.  */
7726      if (GET_CODE (head) != CODE_LABEL)
7727	continue;
7728      if (GET_CODE (NEXT_INSN (head)) == NOTE
7729	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
7730	head = NEXT_INSN (head);
7731
7732      /* Skip p0, which may be thought to be live due to (reg:DI p0)
7733	 grabbing the entire block of predicate registers.  */
7734      for (r = PR_REG (2); r < PR_REG (64); r += 2)
7735	if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
7736	  {
7737	    rtx p = gen_rtx_REG (BImode, r);
7738	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
7739	    if (head == BB_END (bb))
7740	      BB_END (bb) = n;
7741	    head = n;
7742	  }
7743    }
7744
7745  /* Look for conditional calls that do not return, and protect predicate
7746     relations around them.  Otherwise the assembler will assume the call
7747     returns, and complain about uses of call-clobbered predicates after
7748     the call.  */
7749  FOR_EACH_BB_REVERSE (bb)
7750    {
7751      rtx insn = BB_HEAD (bb);
7752
7753      while (1)
7754	{
7755	  if (GET_CODE (insn) == CALL_INSN
7756	      && GET_CODE (PATTERN (insn)) == COND_EXEC
7757	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
7758	    {
7759	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
7760	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
7761	      if (BB_HEAD (bb) == insn)
7762		BB_HEAD (bb) = b;
7763	      if (BB_END (bb) == insn)
7764		BB_END (bb) = a;
7765	    }
7766
7767	  if (insn == BB_END (bb))
7768	    break;
7769	  insn = NEXT_INSN (insn);
7770	}
7771    }
7772}
7773
7774/* Perform machine dependent operations on the rtl chain INSNS.  */
7775
7776static void
7777ia64_reorg (void)
7778{
7779  /* We are freeing block_for_insn in the toplev to keep compatibility
7780     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
7781  compute_bb_for_insn ();
7782
7783  /* If optimizing, we'll have split before scheduling.  */
7784  if (optimize == 0)
7785    split_all_insns (0);
7786
7787  /* ??? update_life_info_in_dirty_blocks fails to terminate during
7788     non-optimizing bootstrap.  */
7789  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
7790
7791  if (optimize && ia64_flag_schedule_insns2)
7792    {
7793      timevar_push (TV_SCHED2);
7794      ia64_final_schedule = 1;
7795
7796      initiate_bundle_states ();
7797      ia64_nop = make_insn_raw (gen_nop ());
7798      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
7799      recog_memoized (ia64_nop);
7800      clocks_length = get_max_uid () + 1;
7801      stops_p = xcalloc (1, clocks_length);
7802      if (ia64_tune == PROCESSOR_ITANIUM)
7803	{
7804	  clocks = xcalloc (clocks_length, sizeof (int));
7805	  add_cycles = xcalloc (clocks_length, sizeof (int));
7806	}
7807      if (ia64_tune == PROCESSOR_ITANIUM2)
7808	{
7809	  pos_1 = get_cpu_unit_code ("2_1");
7810	  pos_2 = get_cpu_unit_code ("2_2");
7811	  pos_3 = get_cpu_unit_code ("2_3");
7812	  pos_4 = get_cpu_unit_code ("2_4");
7813	  pos_5 = get_cpu_unit_code ("2_5");
7814	  pos_6 = get_cpu_unit_code ("2_6");
7815	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
7816	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
7817	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
7818	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
7819	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
7820	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
7821	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
7822	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
7823	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
7824	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
7825	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
7826	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
7827	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
7828	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
7829	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
7830	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
7831	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
7832	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
7833	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
7834	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
7835	}
7836      else
7837	{
7838	  pos_1 = get_cpu_unit_code ("1_1");
7839	  pos_2 = get_cpu_unit_code ("1_2");
7840	  pos_3 = get_cpu_unit_code ("1_3");
7841	  pos_4 = get_cpu_unit_code ("1_4");
7842	  pos_5 = get_cpu_unit_code ("1_5");
7843	  pos_6 = get_cpu_unit_code ("1_6");
7844	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
7845	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
7846	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
7847	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
7848	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
7849	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
7850	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
7851	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
7852	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
7853	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
7854	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
7855	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
7856	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
7857	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
7858	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
7859	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
7860	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
7861	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
7862	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
7863	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
7864	}
7865      schedule_ebbs (dump_file);
7866      finish_bundle_states ();
7867      if (ia64_tune == PROCESSOR_ITANIUM)
7868	{
7869	  free (add_cycles);
7870	  free (clocks);
7871	}
7872      free (stops_p);
7873      emit_insn_group_barriers (dump_file);
7874
7875      ia64_final_schedule = 0;
7876      timevar_pop (TV_SCHED2);
7877    }
7878  else
7879    emit_all_insn_group_barriers (dump_file);
7880
7881  /* A call must not be the last instruction in a function, so that the
7882     return address is still within the function, so that unwinding works
7883     properly.  Note that IA-64 differs from dwarf2 on this point.  */
7884  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
7885    {
7886      rtx insn;
7887      int saw_stop = 0;
7888
7889      insn = get_last_insn ();
7890      if (! INSN_P (insn))
7891        insn = prev_active_insn (insn);
7892      /* Skip over insns that expand to nothing.  */
7893      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
7894        {
7895	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7896	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7897	    saw_stop = 1;
7898	  insn = prev_active_insn (insn);
7899	}
7900      if (GET_CODE (insn) == CALL_INSN)
7901	{
7902	  if (! saw_stop)
7903	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7904	  emit_insn (gen_break_f ());
7905	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
7906	}
7907    }
7908
7909  emit_predicate_relation_info ();
7910
7911  if (ia64_flag_var_tracking)
7912    {
7913      timevar_push (TV_VAR_TRACKING);
7914      variable_tracking_main ();
7915      timevar_pop (TV_VAR_TRACKING);
7916    }
7917}
7918
7919/* Return true if REGNO is used by the epilogue.  */
7920
7921int
7922ia64_epilogue_uses (int regno)
7923{
7924  switch (regno)
7925    {
7926    case R_GR (1):
7927      /* With a call to a function in another module, we will write a new
7928	 value to "gp".  After returning from such a call, we need to make
7929	 sure the function restores the original gp-value, even if the
7930	 function itself does not use the gp anymore.  */
7931      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
7932
7933    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
7934    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
7935      /* For functions defined with the syscall_linkage attribute, all
7936	 input registers are marked as live at all function exits.  This
7937	 prevents the register allocator from using the input registers,
7938	 which in turn makes it possible to restart a system call after
7939	 an interrupt without having to save/restore the input registers.
7940	 This also prevents kernel data from leaking to application code.  */
7941      return lookup_attribute ("syscall_linkage",
7942	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
7943
7944    case R_BR (0):
7945      /* Conditional return patterns can't represent the use of `b0' as
7946         the return address, so we force the value live this way.  */
7947      return 1;
7948
7949    case AR_PFS_REGNUM:
7950      /* Likewise for ar.pfs, which is used by br.ret.  */
7951      return 1;
7952
7953    default:
7954      return 0;
7955    }
7956}
7957
7958/* Return true if REGNO is used by the frame unwinder.  */
7959
7960int
7961ia64_eh_uses (int regno)
7962{
7963  if (! reload_completed)
7964    return 0;
7965
7966  if (current_frame_info.reg_save_b0
7967      && regno == current_frame_info.reg_save_b0)
7968    return 1;
7969  if (current_frame_info.reg_save_pr
7970      && regno == current_frame_info.reg_save_pr)
7971    return 1;
7972  if (current_frame_info.reg_save_ar_pfs
7973      && regno == current_frame_info.reg_save_ar_pfs)
7974    return 1;
7975  if (current_frame_info.reg_save_ar_unat
7976      && regno == current_frame_info.reg_save_ar_unat)
7977    return 1;
7978  if (current_frame_info.reg_save_ar_lc
7979      && regno == current_frame_info.reg_save_ar_lc)
7980    return 1;
7981
7982  return 0;
7983}
7984
7985/* Return true if this goes in small data/bss.  */
7986
7987/* ??? We could also support own long data here.  Generating movl/add/ld8
7988   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
7989   code faster because there is one less load.  This also includes incomplete
7990   types which can't go in sdata/sbss.  */
7991
7992static bool
7993ia64_in_small_data_p (tree exp)
7994{
7995  if (TARGET_NO_SDATA)
7996    return false;
7997
7998  /* We want to merge strings, so we never consider them small data.  */
7999  if (TREE_CODE (exp) == STRING_CST)
8000    return false;
8001
8002  /* Functions are never small data.  */
8003  if (TREE_CODE (exp) == FUNCTION_DECL)
8004    return false;
8005
8006  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8007    {
8008      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8009
8010      if (strcmp (section, ".sdata") == 0
8011	  || strncmp (section, ".sdata.", 7) == 0
8012	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8013	  || strcmp (section, ".sbss") == 0
8014	  || strncmp (section, ".sbss.", 6) == 0
8015	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8016	return true;
8017    }
8018  else
8019    {
8020      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8021
8022      /* If this is an incomplete type with size 0, then we can't put it
8023	 in sdata because it might be too big when completed.  */
8024      if (size > 0 && size <= ia64_section_threshold)
8025	return true;
8026    }
8027
8028  return false;
8029}
8030
8031/* Output assembly directives for prologue regions.  */
8032
8033/* The current basic block number.  */
8034
8035static bool last_block;
8036
8037/* True if we need a copy_state command at the start of the next block.  */
8038
8039static bool need_copy_state;
8040
8041#ifndef MAX_ARTIFICIAL_LABEL_BYTES
8042# define MAX_ARTIFICIAL_LABEL_BYTES 30
8043#endif
8044
8045/* Emit a debugging label after a call-frame-related insn.  We'd
8046   rather output the label right away, but we'd have to output it
8047   after, not before, the instruction, and the instruction has not
8048   been output yet.  So we emit the label after the insn, delete it to
8049   avoid introducing basic blocks, and mark it as preserved, such that
8050   it is still output, given that it is referenced in debug info.  */
8051
8052static const char *
8053ia64_emit_deleted_label_after_insn (rtx insn)
8054{
8055  char label[MAX_ARTIFICIAL_LABEL_BYTES];
8056  rtx lb = gen_label_rtx ();
8057  rtx label_insn = emit_label_after (lb, insn);
8058
8059  LABEL_PRESERVE_P (lb) = 1;
8060
8061  delete_insn (label_insn);
8062
8063  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8064
8065  return xstrdup (label);
8066}
8067
8068/* Define the CFA after INSN with the steady-state definition.  */
8069
8070static void
8071ia64_dwarf2out_def_steady_cfa (rtx insn)
8072{
8073  rtx fp = frame_pointer_needed
8074    ? hard_frame_pointer_rtx
8075    : stack_pointer_rtx;
8076
8077  dwarf2out_def_cfa
8078    (ia64_emit_deleted_label_after_insn (insn),
8079     REGNO (fp),
8080     ia64_initial_elimination_offset
8081     (REGNO (arg_pointer_rtx), REGNO (fp))
8082     + ARG_POINTER_CFA_OFFSET (current_function_decl));
8083}
8084
8085/* The generic dwarf2 frame debug info generator does not define a
8086   separate region for the very end of the epilogue, so refrain from
8087   doing so in the IA64-specific code as well.  */
8088
8089#define IA64_CHANGE_CFA_IN_EPILOGUE 0
8090
8091/* The function emits unwind directives for the start of an epilogue.  */
8092
8093static void
8094process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8095{
8096  /* If this isn't the last block of the function, then we need to label the
8097     current state, and copy it back in at the start of the next block.  */
8098
8099  if (!last_block)
8100    {
8101      if (unwind)
8102	fprintf (asm_out_file, "\t.label_state %d\n",
8103		 ++cfun->machine->state_num);
8104      need_copy_state = true;
8105    }
8106
8107  if (unwind)
8108    fprintf (asm_out_file, "\t.restore sp\n");
8109  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8110    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8111		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8112}
8113
8114/* This function processes a SET pattern looking for specific patterns
8115   which result in emitting an assembly directive required for unwinding.  */
8116
8117static int
8118process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8119{
8120  rtx src = SET_SRC (pat);
8121  rtx dest = SET_DEST (pat);
8122  int src_regno, dest_regno;
8123
8124  /* Look for the ALLOC insn.  */
8125  if (GET_CODE (src) == UNSPEC_VOLATILE
8126      && XINT (src, 1) == UNSPECV_ALLOC
8127      && GET_CODE (dest) == REG)
8128    {
8129      dest_regno = REGNO (dest);
8130
8131      /* If this is the final destination for ar.pfs, then this must
8132	 be the alloc in the prologue.  */
8133      if (dest_regno == current_frame_info.reg_save_ar_pfs)
8134	{
8135	  if (unwind)
8136	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8137		     ia64_dbx_register_number (dest_regno));
8138	}
8139      else
8140	{
8141	  /* This must be an alloc before a sibcall.  We must drop the
8142	     old frame info.  The easiest way to drop the old frame
8143	     info is to ensure we had a ".restore sp" directive
8144	     followed by a new prologue.  If the procedure doesn't
8145	     have a memory-stack frame, we'll issue a dummy ".restore
8146	     sp" now.  */
8147	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8148	    /* if haven't done process_epilogue() yet, do it now */
8149	    process_epilogue (asm_out_file, insn, unwind, frame);
8150	  if (unwind)
8151	    fprintf (asm_out_file, "\t.prologue\n");
8152	}
8153      return 1;
8154    }
8155
8156  /* Look for SP = ....  */
8157  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8158    {
8159      if (GET_CODE (src) == PLUS)
8160        {
8161	  rtx op0 = XEXP (src, 0);
8162	  rtx op1 = XEXP (src, 1);
8163
8164	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8165
8166	  if (INTVAL (op1) < 0)
8167	    {
8168	      gcc_assert (!frame_pointer_needed);
8169	      if (unwind)
8170		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8171			 -INTVAL (op1));
8172	      if (frame)
8173		ia64_dwarf2out_def_steady_cfa (insn);
8174	    }
8175	  else
8176	    process_epilogue (asm_out_file, insn, unwind, frame);
8177	}
8178      else
8179	{
8180	  gcc_assert (GET_CODE (src) == REG
8181		      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
8182	  process_epilogue (asm_out_file, insn, unwind, frame);
8183	}
8184
8185      return 1;
8186    }
8187
8188  /* Register move we need to look at.  */
8189  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8190    {
8191      src_regno = REGNO (src);
8192      dest_regno = REGNO (dest);
8193
8194      switch (src_regno)
8195	{
8196	case BR_REG (0):
8197	  /* Saving return address pointer.  */
8198	  gcc_assert (dest_regno == current_frame_info.reg_save_b0);
8199	  if (unwind)
8200	    fprintf (asm_out_file, "\t.save rp, r%d\n",
8201		     ia64_dbx_register_number (dest_regno));
8202	  return 1;
8203
8204	case PR_REG (0):
8205	  gcc_assert (dest_regno == current_frame_info.reg_save_pr);
8206	  if (unwind)
8207	    fprintf (asm_out_file, "\t.save pr, r%d\n",
8208		     ia64_dbx_register_number (dest_regno));
8209	  return 1;
8210
8211	case AR_UNAT_REGNUM:
8212	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
8213	  if (unwind)
8214	    fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8215		     ia64_dbx_register_number (dest_regno));
8216	  return 1;
8217
8218	case AR_LC_REGNUM:
8219	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
8220	  if (unwind)
8221	    fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8222		     ia64_dbx_register_number (dest_regno));
8223	  return 1;
8224
8225	case STACK_POINTER_REGNUM:
8226	  gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8227		      && frame_pointer_needed);
8228	  if (unwind)
8229	    fprintf (asm_out_file, "\t.vframe r%d\n",
8230		     ia64_dbx_register_number (dest_regno));
8231	  if (frame)
8232	    ia64_dwarf2out_def_steady_cfa (insn);
8233	  return 1;
8234
8235	default:
8236	  /* Everything else should indicate being stored to memory.  */
8237	  gcc_unreachable ();
8238	}
8239    }
8240
8241  /* Memory store we need to look at.  */
8242  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
8243    {
8244      long off;
8245      rtx base;
8246      const char *saveop;
8247
8248      if (GET_CODE (XEXP (dest, 0)) == REG)
8249	{
8250	  base = XEXP (dest, 0);
8251	  off = 0;
8252	}
8253      else
8254	{
8255	  gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8256		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
8257	  base = XEXP (XEXP (dest, 0), 0);
8258	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
8259	}
8260
8261      if (base == hard_frame_pointer_rtx)
8262	{
8263	  saveop = ".savepsp";
8264	  off = - off;
8265	}
8266      else
8267	{
8268	  gcc_assert (base == stack_pointer_rtx);
8269	  saveop = ".savesp";
8270	}
8271
8272      src_regno = REGNO (src);
8273      switch (src_regno)
8274	{
8275	case BR_REG (0):
8276	  gcc_assert (!current_frame_info.reg_save_b0);
8277	  if (unwind)
8278	    fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
8279	  return 1;
8280
8281	case PR_REG (0):
8282	  gcc_assert (!current_frame_info.reg_save_pr);
8283	  if (unwind)
8284	    fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
8285	  return 1;
8286
8287	case AR_LC_REGNUM:
8288	  gcc_assert (!current_frame_info.reg_save_ar_lc);
8289	  if (unwind)
8290	    fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
8291	  return 1;
8292
8293	case AR_PFS_REGNUM:
8294	  gcc_assert (!current_frame_info.reg_save_ar_pfs);
8295	  if (unwind)
8296	    fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
8297	  return 1;
8298
8299	case AR_UNAT_REGNUM:
8300	  gcc_assert (!current_frame_info.reg_save_ar_unat);
8301	  if (unwind)
8302	    fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
8303	  return 1;
8304
8305	case GR_REG (4):
8306	case GR_REG (5):
8307	case GR_REG (6):
8308	case GR_REG (7):
8309	  if (unwind)
8310	    fprintf (asm_out_file, "\t.save.g 0x%x\n",
8311		     1 << (src_regno - GR_REG (4)));
8312	  return 1;
8313
8314	case BR_REG (1):
8315	case BR_REG (2):
8316	case BR_REG (3):
8317	case BR_REG (4):
8318	case BR_REG (5):
8319	  if (unwind)
8320	    fprintf (asm_out_file, "\t.save.b 0x%x\n",
8321		     1 << (src_regno - BR_REG (1)));
8322	  return 1;
8323
8324	case FR_REG (2):
8325	case FR_REG (3):
8326	case FR_REG (4):
8327	case FR_REG (5):
8328	  if (unwind)
8329	    fprintf (asm_out_file, "\t.save.f 0x%x\n",
8330		     1 << (src_regno - FR_REG (2)));
8331	  return 1;
8332
8333	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8334	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8335	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8336	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8337	  if (unwind)
8338	    fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
8339		     1 << (src_regno - FR_REG (12)));
8340	  return 1;
8341
8342	default:
8343	  return 0;
8344	}
8345    }
8346
8347  return 0;
8348}
8349
8350
8351/* This function looks at a single insn and emits any directives
8352   required to unwind this insn.  */
8353void
8354process_for_unwind_directive (FILE *asm_out_file, rtx insn)
8355{
8356  bool unwind = (flag_unwind_tables
8357		 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
8358  bool frame = dwarf2out_do_frame ();
8359
8360  if (unwind || frame)
8361    {
8362      rtx pat;
8363
8364      if (GET_CODE (insn) == NOTE
8365	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
8366	{
8367	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
8368
8369	  /* Restore unwind state from immediately before the epilogue.  */
8370	  if (need_copy_state)
8371	    {
8372	      if (unwind)
8373		{
8374		  fprintf (asm_out_file, "\t.body\n");
8375		  fprintf (asm_out_file, "\t.copy_state %d\n",
8376			   cfun->machine->state_num);
8377		}
8378	      if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8379		ia64_dwarf2out_def_steady_cfa (insn);
8380	      need_copy_state = false;
8381	    }
8382	}
8383
8384      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
8385	return;
8386
8387      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
8388      if (pat)
8389	pat = XEXP (pat, 0);
8390      else
8391	pat = PATTERN (insn);
8392
8393      switch (GET_CODE (pat))
8394        {
8395	case SET:
8396	  process_set (asm_out_file, pat, insn, unwind, frame);
8397	  break;
8398
8399	case PARALLEL:
8400	  {
8401	    int par_index;
8402	    int limit = XVECLEN (pat, 0);
8403	    for (par_index = 0; par_index < limit; par_index++)
8404	      {
8405		rtx x = XVECEXP (pat, 0, par_index);
8406		if (GET_CODE (x) == SET)
8407		  process_set (asm_out_file, x, insn, unwind, frame);
8408	      }
8409	    break;
8410	  }
8411
8412	default:
8413	  gcc_unreachable ();
8414	}
8415    }
8416}
8417
8418
8419enum ia64_builtins
8420{
8421  IA64_BUILTIN_BSP,
8422  IA64_BUILTIN_FLUSHRS
8423};
8424
8425void
8426ia64_init_builtins (void)
8427{
8428  tree fpreg_type;
8429  tree float80_type;
8430
8431  /* The __fpreg type.  */
8432  fpreg_type = make_node (REAL_TYPE);
8433  TYPE_PRECISION (fpreg_type) = 82;
8434  layout_type (fpreg_type);
8435  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
8436
8437  /* The __float80 type.  */
8438  float80_type = make_node (REAL_TYPE);
8439  TYPE_PRECISION (float80_type) = 80;
8440  layout_type (float80_type);
8441  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
8442
8443  /* The __float128 type.  */
8444  if (!TARGET_HPUX)
8445    {
8446      tree float128_type = make_node (REAL_TYPE);
8447      TYPE_PRECISION (float128_type) = 128;
8448      layout_type (float128_type);
8449      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
8450    }
8451  else
8452    /* Under HPUX, this is a synonym for "long double".  */
8453    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
8454					       "__float128");
8455
8456#define def_builtin(name, type, code)					\
8457  lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD,	\
8458			       NULL, NULL_TREE)
8459
8460  def_builtin ("__builtin_ia64_bsp",
8461	       build_function_type (ptr_type_node, void_list_node),
8462	       IA64_BUILTIN_BSP);
8463
8464  def_builtin ("__builtin_ia64_flushrs",
8465	       build_function_type (void_type_node, void_list_node),
8466	       IA64_BUILTIN_FLUSHRS);
8467
8468#undef def_builtin
8469}
8470
8471rtx
8472ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
8473		     enum machine_mode mode ATTRIBUTE_UNUSED,
8474		     int ignore ATTRIBUTE_UNUSED)
8475{
8476  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
8477  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
8478
8479  switch (fcode)
8480    {
8481    case IA64_BUILTIN_BSP:
8482      if (! target || ! register_operand (target, DImode))
8483	target = gen_reg_rtx (DImode);
8484      emit_insn (gen_bsp_value (target));
8485#ifdef POINTERS_EXTEND_UNSIGNED
8486      target = convert_memory_address (ptr_mode, target);
8487#endif
8488      return target;
8489
8490    case IA64_BUILTIN_FLUSHRS:
8491      emit_insn (gen_flushrs ());
8492      return const0_rtx;
8493
8494    default:
8495      break;
8496    }
8497
8498  return NULL_RTX;
8499}
8500
8501/* For the HP-UX IA64 aggregate parameters are passed stored in the
8502   most significant bits of the stack slot.  */
8503
8504enum direction
8505ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
8506{
8507   /* Exception to normal case for structures/unions/etc.  */
8508
8509   if (type && AGGREGATE_TYPE_P (type)
8510       && int_size_in_bytes (type) < UNITS_PER_WORD)
8511     return upward;
8512
8513   /* Fall back to the default.  */
8514   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8515}
8516
8517/* Linked list of all external functions that are to be emitted by GCC.
8518   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
8519   order to avoid putting out names that are never really used.  */
8520
8521struct extern_func_list GTY(())
8522{
8523  struct extern_func_list *next;
8524  tree decl;
8525};
8526
8527static GTY(()) struct extern_func_list *extern_func_head;
8528
8529static void
8530ia64_hpux_add_extern_decl (tree decl)
8531{
8532  struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
8533
8534  p->decl = decl;
8535  p->next = extern_func_head;
8536  extern_func_head = p;
8537}
8538
8539/* Print out the list of used global functions.  */
8540
8541static void
8542ia64_hpux_file_end (void)
8543{
8544  struct extern_func_list *p;
8545
8546  for (p = extern_func_head; p; p = p->next)
8547    {
8548      tree decl = p->decl;
8549      tree id = DECL_ASSEMBLER_NAME (decl);
8550
8551      gcc_assert (id);
8552
8553      if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
8554        {
8555	  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
8556
8557	  TREE_ASM_WRITTEN (decl) = 1;
8558	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
8559	  fputs (TYPE_ASM_OP, asm_out_file);
8560	  assemble_name (asm_out_file, name);
8561	  fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
8562        }
8563    }
8564
8565  extern_func_head = 0;
8566}
8567
8568/* Set SImode div/mod functions, init_integral_libfuncs only initializes
8569   modes of word_mode and larger.  Rename the TFmode libfuncs using the
8570   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
8571   backward compatibility. */
8572
8573static void
8574ia64_init_libfuncs (void)
8575{
8576  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
8577  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
8578  set_optab_libfunc (smod_optab, SImode, "__modsi3");
8579  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
8580
8581  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
8582  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
8583  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
8584  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
8585  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
8586
8587  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
8588  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
8589  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
8590  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
8591  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
8592  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
8593
8594  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
8595  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
8596  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
8597  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
8598
8599  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
8600  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
8601}
8602
8603/* Rename all the TFmode libfuncs using the HPUX conventions.  */
8604
8605static void
8606ia64_hpux_init_libfuncs (void)
8607{
8608  ia64_init_libfuncs ();
8609
8610  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
8611  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
8612  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
8613
8614  /* ia64_expand_compare uses this.  */
8615  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
8616
8617  /* These should never be used.  */
8618  set_optab_libfunc (eq_optab, TFmode, 0);
8619  set_optab_libfunc (ne_optab, TFmode, 0);
8620  set_optab_libfunc (gt_optab, TFmode, 0);
8621  set_optab_libfunc (ge_optab, TFmode, 0);
8622  set_optab_libfunc (lt_optab, TFmode, 0);
8623  set_optab_libfunc (le_optab, TFmode, 0);
8624}
8625
8626/* Rename the division and modulus functions in VMS.  */
8627
8628static void
8629ia64_vms_init_libfuncs (void)
8630{
8631  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
8632  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
8633  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
8634  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
8635  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
8636  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
8637  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
8638  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
8639}
8640
8641/* Rename the TFmode libfuncs available from soft-fp in glibc using
8642   the HPUX conventions.  */
8643
8644static void
8645ia64_sysv4_init_libfuncs (void)
8646{
8647  ia64_init_libfuncs ();
8648
8649  /* These functions are not part of the HPUX TFmode interface.  We
8650     use them instead of _U_Qfcmp, which doesn't work the way we
8651     expect.  */
8652  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
8653  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
8654  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
8655  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
8656  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
8657  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
8658
8659  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
8660     glibc doesn't have them.  */
8661}
8662
8663/* Switch to the section to which we should output X.  The only thing
8664   special we do here is to honor small data.  */
8665
8666static void
8667ia64_select_rtx_section (enum machine_mode mode, rtx x,
8668			 unsigned HOST_WIDE_INT align)
8669{
8670  if (GET_MODE_SIZE (mode) > 0
8671      && GET_MODE_SIZE (mode) <= ia64_section_threshold)
8672    sdata_section ();
8673  else
8674    default_elf_select_rtx_section (mode, x, align);
8675}
8676
8677/* It is illegal to have relocations in shared segments on AIX and HPUX.
8678   Pretend flag_pic is always set.  */
8679
8680static void
8681ia64_rwreloc_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
8682{
8683  default_elf_select_section_1 (exp, reloc, align, true);
8684}
8685
8686static void
8687ia64_rwreloc_unique_section (tree decl, int reloc)
8688{
8689  default_unique_section_1 (decl, reloc, true);
8690}
8691
8692static void
8693ia64_rwreloc_select_rtx_section (enum machine_mode mode, rtx x,
8694				 unsigned HOST_WIDE_INT align)
8695{
8696  int save_pic = flag_pic;
8697  flag_pic = 1;
8698  ia64_select_rtx_section (mode, x, align);
8699  flag_pic = save_pic;
8700}
8701
8702#ifndef TARGET_RWRELOC
8703#define TARGET_RWRELOC flag_pic
8704#endif
8705
8706static unsigned int
8707ia64_section_type_flags (tree decl, const char *name, int reloc)
8708{
8709  unsigned int flags = 0;
8710
8711  if (strcmp (name, ".sdata") == 0
8712      || strncmp (name, ".sdata.", 7) == 0
8713      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
8714      || strncmp (name, ".sdata2.", 8) == 0
8715      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
8716      || strcmp (name, ".sbss") == 0
8717      || strncmp (name, ".sbss.", 6) == 0
8718      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
8719    flags = SECTION_SMALL;
8720
8721  flags |= default_section_type_flags_1 (decl, name, reloc, TARGET_RWRELOC);
8722  return flags;
8723}
8724
8725/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
8726   structure type and that the address of that type should be passed
8727   in out0, rather than in r8.  */
8728
8729static bool
8730ia64_struct_retval_addr_is_first_parm_p (tree fntype)
8731{
8732  tree ret_type = TREE_TYPE (fntype);
8733
8734  /* The Itanium C++ ABI requires that out0, rather than r8, be used
8735     as the structure return address parameter, if the return value
8736     type has a non-trivial copy constructor or destructor.  It is not
8737     clear if this same convention should be used for other
8738     programming languages.  Until G++ 3.4, we incorrectly used r8 for
8739     these return values.  */
8740  return (abi_version_at_least (2)
8741	  && ret_type
8742	  && TYPE_MODE (ret_type) == BLKmode
8743	  && TREE_ADDRESSABLE (ret_type)
8744	  && strcmp (lang_hooks.name, "GNU C++") == 0);
8745}
8746
8747/* Output the assembler code for a thunk function.  THUNK_DECL is the
8748   declaration for the thunk function itself, FUNCTION is the decl for
8749   the target function.  DELTA is an immediate constant offset to be
8750   added to THIS.  If VCALL_OFFSET is nonzero, the word at
8751   *(*this + vcall_offset) should be added to THIS.  */
8752
8753static void
8754ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
8755		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8756		      tree function)
8757{
8758  rtx this, insn, funexp;
8759  unsigned int this_parmno;
8760  unsigned int this_regno;
8761
8762  reload_completed = 1;
8763  epilogue_completed = 1;
8764  no_new_pseudos = 1;
8765  reset_block_changes ();
8766
8767  /* Set things up as ia64_expand_prologue might.  */
8768  last_scratch_gr_reg = 15;
8769
8770  memset (&current_frame_info, 0, sizeof (current_frame_info));
8771  current_frame_info.spill_cfa_off = -16;
8772  current_frame_info.n_input_regs = 1;
8773  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
8774
8775  /* Mark the end of the (empty) prologue.  */
8776  emit_note (NOTE_INSN_PROLOGUE_END);
8777
8778  /* Figure out whether "this" will be the first parameter (the
8779     typical case) or the second parameter (as happens when the
8780     virtual function returns certain class objects).  */
8781  this_parmno
8782    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
8783       ? 1 : 0);
8784  this_regno = IN_REG (this_parmno);
8785  if (!TARGET_REG_NAMES)
8786    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
8787
8788  this = gen_rtx_REG (Pmode, this_regno);
8789  if (TARGET_ILP32)
8790    {
8791      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
8792      REG_POINTER (tmp) = 1;
8793      if (delta && CONST_OK_FOR_I (delta))
8794	{
8795	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
8796	  delta = 0;
8797	}
8798      else
8799	emit_insn (gen_ptr_extend (this, tmp));
8800    }
8801
8802  /* Apply the constant offset, if required.  */
8803  if (delta)
8804    {
8805      rtx delta_rtx = GEN_INT (delta);
8806
8807      if (!CONST_OK_FOR_I (delta))
8808	{
8809	  rtx tmp = gen_rtx_REG (Pmode, 2);
8810	  emit_move_insn (tmp, delta_rtx);
8811	  delta_rtx = tmp;
8812	}
8813      emit_insn (gen_adddi3 (this, this, delta_rtx));
8814    }
8815
8816  /* Apply the offset from the vtable, if required.  */
8817  if (vcall_offset)
8818    {
8819      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
8820      rtx tmp = gen_rtx_REG (Pmode, 2);
8821
8822      if (TARGET_ILP32)
8823	{
8824	  rtx t = gen_rtx_REG (ptr_mode, 2);
8825	  REG_POINTER (t) = 1;
8826	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
8827	  if (CONST_OK_FOR_I (vcall_offset))
8828	    {
8829	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
8830						  vcall_offset_rtx));
8831	      vcall_offset = 0;
8832	    }
8833	  else
8834	    emit_insn (gen_ptr_extend (tmp, t));
8835	}
8836      else
8837	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
8838
8839      if (vcall_offset)
8840	{
8841	  if (!CONST_OK_FOR_J (vcall_offset))
8842	    {
8843	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
8844	      emit_move_insn (tmp2, vcall_offset_rtx);
8845	      vcall_offset_rtx = tmp2;
8846	    }
8847	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
8848	}
8849
8850      if (TARGET_ILP32)
8851	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
8852			gen_rtx_MEM (ptr_mode, tmp));
8853      else
8854	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
8855
8856      emit_insn (gen_adddi3 (this, this, tmp));
8857    }
8858
8859  /* Generate a tail call to the target function.  */
8860  if (! TREE_USED (function))
8861    {
8862      assemble_external (function);
8863      TREE_USED (function) = 1;
8864    }
8865  funexp = XEXP (DECL_RTL (function), 0);
8866  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8867  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
8868  insn = get_last_insn ();
8869  SIBLING_CALL_P (insn) = 1;
8870
8871  /* Code generation for calls relies on splitting.  */
8872  reload_completed = 1;
8873  epilogue_completed = 1;
8874  try_split (PATTERN (insn), insn, 0);
8875
8876  emit_barrier ();
8877
8878  /* Run just enough of rest_of_compilation to get the insns emitted.
8879     There's not really enough bulk here to make other passes such as
8880     instruction scheduling worth while.  Note that use_thunk calls
8881     assemble_start_function and assemble_end_function.  */
8882
8883  insn_locators_initialize ();
8884  emit_all_insn_group_barriers (NULL);
8885  insn = get_insns ();
8886  shorten_branches (insn);
8887  final_start_function (insn, file, 1);
8888  final (insn, file, 1);
8889  final_end_function ();
8890
8891  reload_completed = 0;
8892  epilogue_completed = 0;
8893  no_new_pseudos = 0;
8894}
8895
8896/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
8897
8898static rtx
8899ia64_struct_value_rtx (tree fntype,
8900		       int incoming ATTRIBUTE_UNUSED)
8901{
8902  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
8903    return NULL_RTX;
8904  return gen_rtx_REG (Pmode, GR_REG (8));
8905}
8906
8907static bool
8908ia64_scalar_mode_supported_p (enum machine_mode mode)
8909{
8910  switch (mode)
8911    {
8912    case QImode:
8913    case HImode:
8914    case SImode:
8915    case DImode:
8916    case TImode:
8917      return true;
8918
8919    case SFmode:
8920    case DFmode:
8921    case XFmode:
8922    case RFmode:
8923      return true;
8924
8925    case TFmode:
8926      return TARGET_HPUX;
8927
8928    default:
8929      return false;
8930    }
8931}
8932
8933static bool
8934ia64_vector_mode_supported_p (enum machine_mode mode)
8935{
8936  switch (mode)
8937    {
8938    case V8QImode:
8939    case V4HImode:
8940    case V2SImode:
8941      return true;
8942
8943    case V2SFmode:
8944      return true;
8945
8946    default:
8947      return false;
8948    }
8949}
8950
8951/* Implement the FUNCTION_PROFILER macro.  */
8952
8953void
8954ia64_output_function_profiler (FILE *file, int labelno)
8955{
8956  bool indirect_call;
8957
8958  /* If the function needs a static chain and the static chain
8959     register is r15, we use an indirect call so as to bypass
8960     the PLT stub in case the executable is dynamically linked,
8961     because the stub clobbers r15 as per 5.3.6 of the psABI.
8962     We don't need to do that in non canonical PIC mode.  */
8963
8964  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
8965    {
8966      gcc_assert (STATIC_CHAIN_REGNUM == 15);
8967      indirect_call = true;
8968    }
8969  else
8970    indirect_call = false;
8971
8972  if (TARGET_GNU_AS)
8973    fputs ("\t.prologue 4, r40\n", file);
8974  else
8975    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
8976  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
8977
8978  if (NO_PROFILE_COUNTERS)
8979    fputs ("\tmov out3 = r0\n", file);
8980  else
8981    {
8982      char buf[20];
8983      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
8984
8985      if (TARGET_AUTO_PIC)
8986	fputs ("\tmovl out3 = @gprel(", file);
8987      else
8988	fputs ("\taddl out3 = @ltoff(", file);
8989      assemble_name (file, buf);
8990      if (TARGET_AUTO_PIC)
8991	fputs (")\n", file);
8992      else
8993	fputs ("), r1\n", file);
8994    }
8995
8996  if (indirect_call)
8997    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
8998  fputs ("\t;;\n", file);
8999
9000  fputs ("\t.save rp, r42\n", file);
9001  fputs ("\tmov out2 = b0\n", file);
9002  if (indirect_call)
9003    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9004  fputs ("\t.body\n", file);
9005  fputs ("\tmov out1 = r1\n", file);
9006  if (indirect_call)
9007    {
9008      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9009      fputs ("\tmov b6 = r16\n", file);
9010      fputs ("\tld8 r1 = [r14]\n", file);
9011      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9012    }
9013  else
9014    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9015}
9016
9017static GTY(()) rtx mcount_func_rtx;
9018static rtx
9019gen_mcount_func_rtx (void)
9020{
9021  if (!mcount_func_rtx)
9022    mcount_func_rtx = init_one_libfunc ("_mcount");
9023  return mcount_func_rtx;
9024}
9025
9026void
9027ia64_profile_hook (int labelno)
9028{
9029  rtx label, ip;
9030
9031  if (NO_PROFILE_COUNTERS)
9032    label = const0_rtx;
9033  else
9034    {
9035      char buf[30];
9036      const char *label_name;
9037      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9038      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9039      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9040      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9041    }
9042  ip = gen_reg_rtx (Pmode);
9043  emit_insn (gen_ip_value (ip));
9044  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9045                     VOIDmode, 3,
9046		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9047		     ip, Pmode,
9048		     label, Pmode);
9049}
9050
9051/* Return the mangling of TYPE if it is an extended fundamental type.  */
9052
9053static const char *
9054ia64_mangle_fundamental_type (tree type)
9055{
9056  /* On HP-UX, "long double" is mangled as "e" so __float128 is
9057     mangled as "e".  */
9058  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9059    return "g";
9060  /* On HP-UX, "e" is not available as a mangling of __float80 so use
9061     an extended mangling.  Elsewhere, "e" is available since long
9062     double is 80 bits.  */
9063  if (TYPE_MODE (type) == XFmode)
9064    return TARGET_HPUX ? "u9__float80" : "e";
9065  if (TYPE_MODE (type) == RFmode)
9066    return "u7__fpreg";
9067  return NULL;
9068}
9069
9070/* Return the diagnostic message string if conversion from FROMTYPE to
9071   TOTYPE is not allowed, NULL otherwise.  */
9072static const char *
9073ia64_invalid_conversion (tree fromtype, tree totype)
9074{
9075  /* Reject nontrivial conversion to or from __fpreg.  */
9076  if (TYPE_MODE (fromtype) == RFmode
9077      && TYPE_MODE (totype) != RFmode
9078      && TYPE_MODE (totype) != VOIDmode)
9079    return N_("invalid conversion from %<__fpreg%>");
9080  if (TYPE_MODE (totype) == RFmode
9081      && TYPE_MODE (fromtype) != RFmode)
9082    return N_("invalid conversion to %<__fpreg%>");
9083  return NULL;
9084}
9085
9086/* Return the diagnostic message string if the unary operation OP is
9087   not permitted on TYPE, NULL otherwise.  */
9088static const char *
9089ia64_invalid_unary_op (int op, tree type)
9090{
9091  /* Reject operations on __fpreg other than unary + or &.  */
9092  if (TYPE_MODE (type) == RFmode
9093      && op != CONVERT_EXPR
9094      && op != ADDR_EXPR)
9095    return N_("invalid operation on %<__fpreg%>");
9096  return NULL;
9097}
9098
9099/* Return the diagnostic message string if the binary operation OP is
9100   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
9101static const char *
9102ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9103{
9104  /* Reject operations on __fpreg.  */
9105  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9106    return N_("invalid operation on %<__fpreg%>");
9107  return NULL;
9108}
9109
9110#include "gt-ia64.h"
9111