1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
3   Free Software Foundation, Inc.
4   Contributed by James E. Wilson <wilson@cygnus.com> and
5		  David Mosberger <davidm@hpl.hp.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to
21the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22Boston, MA 02110-1301, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "toplev.h"
46#include "sched-int.h"
47#include "timevar.h"
48#include "target.h"
49#include "target-def.h"
50#include "tm_p.h"
51#include "hashtab.h"
52#include "langhooks.h"
53#include "cfglayout.h"
54#include "tree-gimple.h"
55#include "intl.h"
56#include "debug.h"
57#include "params.h"
58
59/* This is used for communication between ASM_OUTPUT_LABEL and
60   ASM_OUTPUT_LABELREF.  */
61int ia64_asm_output_label = 0;
62
63/* Define the information needed to generate branch and scc insns.  This is
64   stored from the compare operation.  */
65struct rtx_def * ia64_compare_op0;
66struct rtx_def * ia64_compare_op1;
67
68/* Register names for ia64_expand_prologue.  */
69static const char * const ia64_reg_numbers[96] =
70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79  "r104","r105","r106","r107","r108","r109","r110","r111",
80  "r112","r113","r114","r115","r116","r117","r118","r119",
81  "r120","r121","r122","r123","r124","r125","r126","r127"};
82
83/* ??? These strings could be shared with REGISTER_NAMES.  */
84static const char * const ia64_input_reg_names[8] =
85{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
86
87/* ??? These strings could be shared with REGISTER_NAMES.  */
88static const char * const ia64_local_reg_names[80] =
89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
100/* ??? These strings could be shared with REGISTER_NAMES.  */
101static const char * const ia64_output_reg_names[8] =
102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
104/* Which cpu are we scheduling for.  */
105enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106
107/* Determines whether we run our final scheduling pass or not.  We always
108   avoid the normal second scheduling pass.  */
109static int ia64_flag_schedule_insns2;
110
111/* Determines whether we run variable tracking in machine dependent
112   reorganization.  */
113static int ia64_flag_var_tracking;
114
115/* Variables which are this size or smaller are put in the sdata/sbss
116   sections.  */
117
118unsigned int ia64_section_threshold;
119
120/* The following variable is used by the DFA insn scheduler.  The value is
121   TRUE if we do insn bundling instead of insn scheduling.  */
122int bundling_p = 0;
123
124/* Structure to be filled in by ia64_compute_frame_size with register
125   save masks and offsets for the current function.  */
126
127struct ia64_frame_info
128{
129  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
130				   the caller's scratch area.  */
131  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
132  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
133  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
134  HARD_REG_SET mask;		/* mask of saved registers.  */
135  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
136				   registers or long-term scratches.  */
137  int n_spilled;		/* number of spilled registers.  */
138  int reg_fp;			/* register for fp.  */
139  int reg_save_b0;		/* save register for b0.  */
140  int reg_save_pr;		/* save register for prs.  */
141  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
142  int reg_save_ar_unat;		/* save register for ar.unat.  */
143  int reg_save_ar_lc;		/* save register for ar.lc.  */
144  int reg_save_gp;		/* save register for gp.  */
145  int n_input_regs;		/* number of input registers used.  */
146  int n_local_regs;		/* number of local registers used.  */
147  int n_output_regs;		/* number of output registers used.  */
148  int n_rotate_regs;		/* number of rotating registers used.  */
149
150  char need_regstk;		/* true if a .regstk directive needed.  */
151  char initialized;		/* true if the data is finalized.  */
152};
153
154/* Current frame information calculated by ia64_compute_frame_size.  */
155static struct ia64_frame_info current_frame_info;
156
157static int ia64_first_cycle_multipass_dfa_lookahead (void);
158static void ia64_dependencies_evaluation_hook (rtx, rtx);
159static void ia64_init_dfa_pre_cycle_insn (void);
160static rtx ia64_dfa_pre_cycle_insn (void);
161static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
162static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
163static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164static void ia64_h_i_d_extended (void);
165static int ia64_mode_to_int (enum machine_mode);
166static void ia64_set_sched_flags (spec_info_t);
167static int ia64_speculate_insn (rtx, ds_t, rtx *);
168static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
169static bool ia64_needs_block_p (rtx);
170static rtx ia64_gen_check (rtx, rtx, bool);
171static int ia64_spec_check_p (rtx);
172static int ia64_spec_check_src_p (rtx);
173static rtx gen_tls_get_addr (void);
174static rtx gen_thread_pointer (void);
175static int find_gr_spill (int);
176static int next_scratch_gr_reg (void);
177static void mark_reg_gr_used_mask (rtx, void *);
178static void ia64_compute_frame_size (HOST_WIDE_INT);
179static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
180static void finish_spill_pointers (void);
181static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
182static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
183static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
184static rtx gen_movdi_x (rtx, rtx, rtx);
185static rtx gen_fr_spill_x (rtx, rtx, rtx);
186static rtx gen_fr_restore_x (rtx, rtx, rtx);
187
188static enum machine_mode hfa_element_mode (tree, bool);
189static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190					 tree, int *, int);
191static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
192				   tree, bool);
193static bool ia64_function_ok_for_sibcall (tree, tree);
194static bool ia64_return_in_memory (tree, tree);
195static bool ia64_rtx_costs (rtx, int, int, int *);
196static void fix_range (const char *);
197static bool ia64_handle_option (size_t, const char *, int);
198static struct machine_function * ia64_init_machine_status (void);
199static void emit_insn_group_barriers (FILE *);
200static void emit_all_insn_group_barriers (FILE *);
201static void final_emit_insn_group_barriers (FILE *);
202static void emit_predicate_relation_info (void);
203static void ia64_reorg (void);
204static bool ia64_in_small_data_p (tree);
205static void process_epilogue (FILE *, rtx, bool, bool);
206static int process_set (FILE *, rtx, rtx, bool, bool);
207
208static bool ia64_assemble_integer (rtx, unsigned int, int);
209static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
210static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
211static void ia64_output_function_end_prologue (FILE *);
212
213static int ia64_issue_rate (void);
214static int ia64_adjust_cost_2 (rtx, int, rtx, int);
215static void ia64_sched_init (FILE *, int, int);
216static void ia64_sched_init_global (FILE *, int, int);
217static void ia64_sched_finish_global (FILE *, int);
218static void ia64_sched_finish (FILE *, int);
219static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
220static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
221static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
222static int ia64_variable_issue (FILE *, int, rtx, int);
223
224static struct bundle_state *get_free_bundle_state (void);
225static void free_bundle_state (struct bundle_state *);
226static void initiate_bundle_states (void);
227static void finish_bundle_states (void);
228static unsigned bundle_state_hash (const void *);
229static int bundle_state_eq_p (const void *, const void *);
230static int insert_bundle_state (struct bundle_state *);
231static void initiate_bundle_state_table (void);
232static void finish_bundle_state_table (void);
233static int try_issue_nops (struct bundle_state *, int);
234static int try_issue_insn (struct bundle_state *, rtx);
235static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
236static int get_max_pos (state_t);
237static int get_template (state_t, int);
238
239static rtx get_next_important_insn (rtx, rtx);
240static void bundling (FILE *, int, rtx, rtx);
241
242static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
243				  HOST_WIDE_INT, tree);
244static void ia64_file_start (void);
245
246static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
247static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
248static section *ia64_select_rtx_section (enum machine_mode, rtx,
249					 unsigned HOST_WIDE_INT);
250static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
251     ATTRIBUTE_UNUSED;
252static unsigned int ia64_section_type_flags (tree, const char *, int);
253static void ia64_init_libfuncs (void)
254     ATTRIBUTE_UNUSED;
255static void ia64_hpux_init_libfuncs (void)
256     ATTRIBUTE_UNUSED;
257static void ia64_sysv4_init_libfuncs (void)
258     ATTRIBUTE_UNUSED;
259static void ia64_vms_init_libfuncs (void)
260     ATTRIBUTE_UNUSED;
261
262static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
263static void ia64_encode_section_info (tree, rtx, int);
264static rtx ia64_struct_value_rtx (tree, int);
265static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
266static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
267static bool ia64_vector_mode_supported_p (enum machine_mode mode);
268static bool ia64_cannot_force_const_mem (rtx);
269static const char *ia64_mangle_fundamental_type (tree);
270static const char *ia64_invalid_conversion (tree, tree);
271static const char *ia64_invalid_unary_op (int, tree);
272static const char *ia64_invalid_binary_op (int, tree, tree);
273
274/* Table of valid machine attributes.  */
275static const struct attribute_spec ia64_attribute_table[] =
276{
277  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
278  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
279  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
280  { NULL,	       0, 0, false, false, false, NULL }
281};
282
283/* Initialize the GCC target structure.  */
284#undef TARGET_ATTRIBUTE_TABLE
285#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
286
287#undef TARGET_INIT_BUILTINS
288#define TARGET_INIT_BUILTINS ia64_init_builtins
289
290#undef TARGET_EXPAND_BUILTIN
291#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
292
293#undef TARGET_ASM_BYTE_OP
294#define TARGET_ASM_BYTE_OP "\tdata1\t"
295#undef TARGET_ASM_ALIGNED_HI_OP
296#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
297#undef TARGET_ASM_ALIGNED_SI_OP
298#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
299#undef TARGET_ASM_ALIGNED_DI_OP
300#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
301#undef TARGET_ASM_UNALIGNED_HI_OP
302#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
303#undef TARGET_ASM_UNALIGNED_SI_OP
304#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
305#undef TARGET_ASM_UNALIGNED_DI_OP
306#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
307#undef TARGET_ASM_INTEGER
308#define TARGET_ASM_INTEGER ia64_assemble_integer
309
310#undef TARGET_ASM_FUNCTION_PROLOGUE
311#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
312#undef TARGET_ASM_FUNCTION_END_PROLOGUE
313#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
314#undef TARGET_ASM_FUNCTION_EPILOGUE
315#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
316
317#undef TARGET_IN_SMALL_DATA_P
318#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
319
320#undef TARGET_SCHED_ADJUST_COST_2
321#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
322#undef TARGET_SCHED_ISSUE_RATE
323#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
324#undef TARGET_SCHED_VARIABLE_ISSUE
325#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
326#undef TARGET_SCHED_INIT
327#define TARGET_SCHED_INIT ia64_sched_init
328#undef TARGET_SCHED_FINISH
329#define TARGET_SCHED_FINISH ia64_sched_finish
330#undef TARGET_SCHED_INIT_GLOBAL
331#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
332#undef TARGET_SCHED_FINISH_GLOBAL
333#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
334#undef TARGET_SCHED_REORDER
335#define TARGET_SCHED_REORDER ia64_sched_reorder
336#undef TARGET_SCHED_REORDER2
337#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
338
339#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
340#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
341
342#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
343#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
344
345#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
346#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
347#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
348#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
349
350#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
351#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
352  ia64_first_cycle_multipass_dfa_lookahead_guard
353
354#undef TARGET_SCHED_DFA_NEW_CYCLE
355#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
356
357#undef TARGET_SCHED_H_I_D_EXTENDED
358#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
359
360#undef TARGET_SCHED_SET_SCHED_FLAGS
361#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
362
363#undef TARGET_SCHED_SPECULATE_INSN
364#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
365
366#undef TARGET_SCHED_NEEDS_BLOCK_P
367#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
368
369#undef TARGET_SCHED_GEN_CHECK
370#define TARGET_SCHED_GEN_CHECK ia64_gen_check
371
372#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
373#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
374  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
375
376#undef TARGET_FUNCTION_OK_FOR_SIBCALL
377#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
378#undef TARGET_ARG_PARTIAL_BYTES
379#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
380
381#undef TARGET_ASM_OUTPUT_MI_THUNK
382#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
383#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
384#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
385
386#undef TARGET_ASM_FILE_START
387#define TARGET_ASM_FILE_START ia64_file_start
388
389#undef TARGET_RTX_COSTS
390#define TARGET_RTX_COSTS ia64_rtx_costs
391#undef TARGET_ADDRESS_COST
392#define TARGET_ADDRESS_COST hook_int_rtx_0
393
394#undef TARGET_MACHINE_DEPENDENT_REORG
395#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
396
397#undef TARGET_ENCODE_SECTION_INFO
398#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
399
400#undef  TARGET_SECTION_TYPE_FLAGS
401#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
402
403#ifdef HAVE_AS_TLS
404#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
405#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
406#endif
407
408/* ??? ABI doesn't allow us to define this.  */
409#if 0
410#undef TARGET_PROMOTE_FUNCTION_ARGS
411#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
412#endif
413
414/* ??? ABI doesn't allow us to define this.  */
415#if 0
416#undef TARGET_PROMOTE_FUNCTION_RETURN
417#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
418#endif
419
420/* ??? Investigate.  */
421#if 0
422#undef TARGET_PROMOTE_PROTOTYPES
423#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
424#endif
425
426#undef TARGET_STRUCT_VALUE_RTX
427#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
428#undef TARGET_RETURN_IN_MEMORY
429#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
430#undef TARGET_SETUP_INCOMING_VARARGS
431#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
432#undef TARGET_STRICT_ARGUMENT_NAMING
433#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
434#undef TARGET_MUST_PASS_IN_STACK
435#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
436
437#undef TARGET_GIMPLIFY_VA_ARG_EXPR
438#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
439
440#undef TARGET_UNWIND_EMIT
441#define TARGET_UNWIND_EMIT process_for_unwind_directive
442
443#undef TARGET_SCALAR_MODE_SUPPORTED_P
444#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
445#undef TARGET_VECTOR_MODE_SUPPORTED_P
446#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
447
448/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
449   in an order different from the specified program order.  */
450#undef TARGET_RELAXED_ORDERING
451#define TARGET_RELAXED_ORDERING true
452
453#undef TARGET_DEFAULT_TARGET_FLAGS
454#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
455#undef TARGET_HANDLE_OPTION
456#define TARGET_HANDLE_OPTION ia64_handle_option
457
458#undef TARGET_CANNOT_FORCE_CONST_MEM
459#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
460
461#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
462#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
463
464#undef TARGET_INVALID_CONVERSION
465#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
466#undef TARGET_INVALID_UNARY_OP
467#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
468#undef TARGET_INVALID_BINARY_OP
469#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
470
471struct gcc_target targetm = TARGET_INITIALIZER;
472
473typedef enum
474  {
475    ADDR_AREA_NORMAL,	/* normal address area */
476    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
477  }
478ia64_addr_area;
479
480static GTY(()) tree small_ident1;
481static GTY(()) tree small_ident2;
482
483static void
484init_idents (void)
485{
486  if (small_ident1 == 0)
487    {
488      small_ident1 = get_identifier ("small");
489      small_ident2 = get_identifier ("__small__");
490    }
491}
492
493/* Retrieve the address area that has been chosen for the given decl.  */
494
495static ia64_addr_area
496ia64_get_addr_area (tree decl)
497{
498  tree model_attr;
499
500  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
501  if (model_attr)
502    {
503      tree id;
504
505      init_idents ();
506      id = TREE_VALUE (TREE_VALUE (model_attr));
507      if (id == small_ident1 || id == small_ident2)
508	return ADDR_AREA_SMALL;
509    }
510  return ADDR_AREA_NORMAL;
511}
512
513static tree
514ia64_handle_model_attribute (tree *node, tree name, tree args,
515			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
516{
517  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
518  ia64_addr_area area;
519  tree arg, decl = *node;
520
521  init_idents ();
522  arg = TREE_VALUE (args);
523  if (arg == small_ident1 || arg == small_ident2)
524    {
525      addr_area = ADDR_AREA_SMALL;
526    }
527  else
528    {
529      warning (OPT_Wattributes, "invalid argument of %qs attribute",
530	       IDENTIFIER_POINTER (name));
531      *no_add_attrs = true;
532    }
533
534  switch (TREE_CODE (decl))
535    {
536    case VAR_DECL:
537      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
538	   == FUNCTION_DECL)
539	  && !TREE_STATIC (decl))
540	{
541	  error ("%Jan address area attribute cannot be specified for "
542		 "local variables", decl);
543	  *no_add_attrs = true;
544	}
545      area = ia64_get_addr_area (decl);
546      if (area != ADDR_AREA_NORMAL && addr_area != area)
547	{
548	  error ("address area of %q+D conflicts with previous "
549		 "declaration", decl);
550	  *no_add_attrs = true;
551	}
552      break;
553
554    case FUNCTION_DECL:
555      error ("%Jaddress area attribute cannot be specified for functions",
556	     decl);
557      *no_add_attrs = true;
558      break;
559
560    default:
561      warning (OPT_Wattributes, "%qs attribute ignored",
562	       IDENTIFIER_POINTER (name));
563      *no_add_attrs = true;
564      break;
565    }
566
567  return NULL_TREE;
568}
569
570static void
571ia64_encode_addr_area (tree decl, rtx symbol)
572{
573  int flags;
574
575  flags = SYMBOL_REF_FLAGS (symbol);
576  switch (ia64_get_addr_area (decl))
577    {
578    case ADDR_AREA_NORMAL: break;
579    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
580    default: gcc_unreachable ();
581    }
582  SYMBOL_REF_FLAGS (symbol) = flags;
583}
584
585static void
586ia64_encode_section_info (tree decl, rtx rtl, int first)
587{
588  default_encode_section_info (decl, rtl, first);
589
590  /* Careful not to prod global register variables.  */
591  if (TREE_CODE (decl) == VAR_DECL
592      && GET_CODE (DECL_RTL (decl)) == MEM
593      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
594      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
595    ia64_encode_addr_area (decl, XEXP (rtl, 0));
596}
597
598/* Implement CONST_OK_FOR_LETTER_P.  */
599
600bool
601ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
602{
603  switch (c)
604    {
605    case 'I':
606      return CONST_OK_FOR_I (value);
607    case 'J':
608      return CONST_OK_FOR_J (value);
609    case 'K':
610      return CONST_OK_FOR_K (value);
611    case 'L':
612      return CONST_OK_FOR_L (value);
613    case 'M':
614      return CONST_OK_FOR_M (value);
615    case 'N':
616      return CONST_OK_FOR_N (value);
617    case 'O':
618      return CONST_OK_FOR_O (value);
619    case 'P':
620      return CONST_OK_FOR_P (value);
621    default:
622      return false;
623    }
624}
625
626/* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
627
628bool
629ia64_const_double_ok_for_letter_p (rtx value, char c)
630{
631  switch (c)
632    {
633    case 'G':
634      return CONST_DOUBLE_OK_FOR_G (value);
635    default:
636      return false;
637    }
638}
639
640/* Implement EXTRA_CONSTRAINT.  */
641
642bool
643ia64_extra_constraint (rtx value, char c)
644{
645  switch (c)
646    {
647    case 'Q':
648      /* Non-volatile memory for FP_REG loads/stores.  */
649      return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
650
651    case 'R':
652      /* 1..4 for shladd arguments.  */
653      return (GET_CODE (value) == CONST_INT
654	      && INTVAL (value) >= 1 && INTVAL (value) <= 4);
655
656    case 'S':
657      /* Non-post-inc memory for asms and other unsavory creatures.  */
658      return (GET_CODE (value) == MEM
659	      && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
660	      && (reload_in_progress || memory_operand (value, VOIDmode)));
661
662    case 'T':
663      /* Symbol ref to small-address-area.  */
664      return small_addr_symbolic_operand (value, VOIDmode);
665
666    case 'U':
667      /* Vector zero.  */
668      return value == CONST0_RTX (GET_MODE (value));
669
670    case 'W':
671      /* An integer vector, such that conversion to an integer yields a
672	 value appropriate for an integer 'J' constraint.  */
673      if (GET_CODE (value) == CONST_VECTOR
674	  && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
675	{
676	  value = simplify_subreg (DImode, value, GET_MODE (value), 0);
677	  return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
678	}
679      return false;
680
681    case 'Y':
682      /* A V2SF vector containing elements that satisfy 'G'.  */
683      return
684	(GET_CODE (value) == CONST_VECTOR
685	 && GET_MODE (value) == V2SFmode
686	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
687	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
688
689    default:
690      return false;
691    }
692}
693
694/* Return 1 if the operands of a move are ok.  */
695
696int
697ia64_move_ok (rtx dst, rtx src)
698{
699  /* If we're under init_recog_no_volatile, we'll not be able to use
700     memory_operand.  So check the code directly and don't worry about
701     the validity of the underlying address, which should have been
702     checked elsewhere anyway.  */
703  if (GET_CODE (dst) != MEM)
704    return 1;
705  if (GET_CODE (src) == MEM)
706    return 0;
707  if (register_operand (src, VOIDmode))
708    return 1;
709
710  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
711  if (INTEGRAL_MODE_P (GET_MODE (dst)))
712    return src == const0_rtx;
713  else
714    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
715}
716
717/* Return 1 if the operands are ok for a floating point load pair.  */
718
719int
720ia64_load_pair_ok (rtx dst, rtx src)
721{
722  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
723    return 0;
724  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
725    return 0;
726  switch (GET_CODE (XEXP (src, 0)))
727    {
728    case REG:
729    case POST_INC:
730      break;
731    case POST_DEC:
732      return 0;
733    case POST_MODIFY:
734      {
735	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
736
737	if (GET_CODE (adjust) != CONST_INT
738	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
739	  return 0;
740      }
741      break;
742    default:
743      abort ();
744    }
745  return 1;
746}
747
748int
749addp4_optimize_ok (rtx op1, rtx op2)
750{
751  return (basereg_operand (op1, GET_MODE(op1)) !=
752	  basereg_operand (op2, GET_MODE(op2)));
753}
754
755/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
756   Return the length of the field, or <= 0 on failure.  */
757
758int
759ia64_depz_field_mask (rtx rop, rtx rshift)
760{
761  unsigned HOST_WIDE_INT op = INTVAL (rop);
762  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
763
764  /* Get rid of the zero bits we're shifting in.  */
765  op >>= shift;
766
767  /* We must now have a solid block of 1's at bit 0.  */
768  return exact_log2 (op + 1);
769}
770
771/* Return the TLS model to use for ADDR.  */
772
773static enum tls_model
774tls_symbolic_operand_type (rtx addr)
775{
776  enum tls_model tls_kind = 0;
777
778  if (GET_CODE (addr) == CONST)
779    {
780      if (GET_CODE (XEXP (addr, 0)) == PLUS
781	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
782        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
783    }
784  else if (GET_CODE (addr) == SYMBOL_REF)
785    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
786
787  return tls_kind;
788}
789
790/* Return true if X is a constant that is valid for some immediate
791   field in an instruction.  */
792
793bool
794ia64_legitimate_constant_p (rtx x)
795{
796  switch (GET_CODE (x))
797    {
798    case CONST_INT:
799    case LABEL_REF:
800      return true;
801
802    case CONST_DOUBLE:
803      if (GET_MODE (x) == VOIDmode)
804	return true;
805      return CONST_DOUBLE_OK_FOR_G (x);
806
807    case CONST:
808    case SYMBOL_REF:
809      /* ??? Short term workaround for PR 28490.  We must make the code here
810	 match the code in ia64_expand_move and move_operand, even though they
811	 are both technically wrong.  */
812      if (tls_symbolic_operand_type (x) == 0)
813	{
814	  HOST_WIDE_INT addend = 0;
815	  rtx op = x;
816
817	  if (GET_CODE (op) == CONST
818	      && GET_CODE (XEXP (op, 0)) == PLUS
819	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
820	    {
821	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
822	      op = XEXP (XEXP (op, 0), 0);
823	    }
824
825          if (any_offset_symbol_operand (op, GET_MODE (op))
826              || function_operand (op, GET_MODE (op)))
827            return true;
828	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
829	    return (addend & 0x3fff) == 0;
830	  return false;
831	}
832      return false;
833
834    case CONST_VECTOR:
835      {
836	enum machine_mode mode = GET_MODE (x);
837
838	if (mode == V2SFmode)
839	  return ia64_extra_constraint (x, 'Y');
840
841	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
842		&& GET_MODE_SIZE (mode) <= 8);
843      }
844
845    default:
846      return false;
847    }
848}
849
850/* Don't allow TLS addresses to get spilled to memory.  */
851
852static bool
853ia64_cannot_force_const_mem (rtx x)
854{
855  return tls_symbolic_operand_type (x) != 0;
856}
857
858/* Expand a symbolic constant load.  */
859
860bool
861ia64_expand_load_address (rtx dest, rtx src)
862{
863  gcc_assert (GET_CODE (dest) == REG);
864
865  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
866     having to pointer-extend the value afterward.  Other forms of address
867     computation below are also more natural to compute as 64-bit quantities.
868     If we've been given an SImode destination register, change it.  */
869  if (GET_MODE (dest) != Pmode)
870    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
871
872  if (TARGET_NO_PIC)
873    return false;
874  if (small_addr_symbolic_operand (src, VOIDmode))
875    return false;
876
877  if (TARGET_AUTO_PIC)
878    emit_insn (gen_load_gprel64 (dest, src));
879  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
880    emit_insn (gen_load_fptr (dest, src));
881  else if (sdata_symbolic_operand (src, VOIDmode))
882    emit_insn (gen_load_gprel (dest, src));
883  else
884    {
885      HOST_WIDE_INT addend = 0;
886      rtx tmp;
887
888      /* We did split constant offsets in ia64_expand_move, and we did try
889	 to keep them split in move_operand, but we also allowed reload to
890	 rematerialize arbitrary constants rather than spill the value to
891	 the stack and reload it.  So we have to be prepared here to split
892	 them apart again.  */
893      if (GET_CODE (src) == CONST)
894	{
895	  HOST_WIDE_INT hi, lo;
896
897	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
898	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
899	  hi = hi - lo;
900
901	  if (lo != 0)
902	    {
903	      addend = lo;
904	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
905	    }
906	}
907
908      tmp = gen_rtx_HIGH (Pmode, src);
909      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
910      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
911
912      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
913      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
914
915      if (addend)
916	{
917	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
918	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
919	}
920    }
921
922  return true;
923}
924
925static GTY(()) rtx gen_tls_tga;
926static rtx
927gen_tls_get_addr (void)
928{
929  if (!gen_tls_tga)
930    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
931  return gen_tls_tga;
932}
933
934static GTY(()) rtx thread_pointer_rtx;
935static rtx
936gen_thread_pointer (void)
937{
938  if (!thread_pointer_rtx)
939    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
940  return thread_pointer_rtx;
941}
942
943static rtx
944ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
945			 rtx orig_op1, HOST_WIDE_INT addend)
946{
947  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
948  rtx orig_op0 = op0;
949  HOST_WIDE_INT addend_lo, addend_hi;
950
951  switch (tls_kind)
952    {
953    case TLS_MODEL_GLOBAL_DYNAMIC:
954      start_sequence ();
955
956      tga_op1 = gen_reg_rtx (Pmode);
957      emit_insn (gen_load_dtpmod (tga_op1, op1));
958
959      tga_op2 = gen_reg_rtx (Pmode);
960      emit_insn (gen_load_dtprel (tga_op2, op1));
961
962      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
963					 LCT_CONST, Pmode, 2, tga_op1,
964					 Pmode, tga_op2, Pmode);
965
966      insns = get_insns ();
967      end_sequence ();
968
969      if (GET_MODE (op0) != Pmode)
970	op0 = tga_ret;
971      emit_libcall_block (insns, op0, tga_ret, op1);
972      break;
973
974    case TLS_MODEL_LOCAL_DYNAMIC:
975      /* ??? This isn't the completely proper way to do local-dynamic
976	 If the call to __tls_get_addr is used only by a single symbol,
977	 then we should (somehow) move the dtprel to the second arg
978	 to avoid the extra add.  */
979      start_sequence ();
980
981      tga_op1 = gen_reg_rtx (Pmode);
982      emit_insn (gen_load_dtpmod (tga_op1, op1));
983
984      tga_op2 = const0_rtx;
985
986      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
987					 LCT_CONST, Pmode, 2, tga_op1,
988					 Pmode, tga_op2, Pmode);
989
990      insns = get_insns ();
991      end_sequence ();
992
993      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
994				UNSPEC_LD_BASE);
995      tmp = gen_reg_rtx (Pmode);
996      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
997
998      if (!register_operand (op0, Pmode))
999	op0 = gen_reg_rtx (Pmode);
1000      if (TARGET_TLS64)
1001	{
1002	  emit_insn (gen_load_dtprel (op0, op1));
1003	  emit_insn (gen_adddi3 (op0, tmp, op0));
1004	}
1005      else
1006	emit_insn (gen_add_dtprel (op0, op1, tmp));
1007      break;
1008
1009    case TLS_MODEL_INITIAL_EXEC:
1010      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1011      addend_hi = addend - addend_lo;
1012
1013      op1 = plus_constant (op1, addend_hi);
1014      addend = addend_lo;
1015
1016      tmp = gen_reg_rtx (Pmode);
1017      emit_insn (gen_load_tprel (tmp, op1));
1018
1019      if (!register_operand (op0, Pmode))
1020	op0 = gen_reg_rtx (Pmode);
1021      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1022      break;
1023
1024    case TLS_MODEL_LOCAL_EXEC:
1025      if (!register_operand (op0, Pmode))
1026	op0 = gen_reg_rtx (Pmode);
1027
1028      op1 = orig_op1;
1029      addend = 0;
1030      if (TARGET_TLS64)
1031	{
1032	  emit_insn (gen_load_tprel (op0, op1));
1033	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1034	}
1035      else
1036	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1037      break;
1038
1039    default:
1040      gcc_unreachable ();
1041    }
1042
1043  if (addend)
1044    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1045			       orig_op0, 1, OPTAB_DIRECT);
1046  if (orig_op0 == op0)
1047    return NULL_RTX;
1048  if (GET_MODE (orig_op0) == Pmode)
1049    return op0;
1050  return gen_lowpart (GET_MODE (orig_op0), op0);
1051}
1052
1053rtx
1054ia64_expand_move (rtx op0, rtx op1)
1055{
1056  enum machine_mode mode = GET_MODE (op0);
1057
1058  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1059    op1 = force_reg (mode, op1);
1060
1061  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1062    {
1063      HOST_WIDE_INT addend = 0;
1064      enum tls_model tls_kind;
1065      rtx sym = op1;
1066
1067      if (GET_CODE (op1) == CONST
1068	  && GET_CODE (XEXP (op1, 0)) == PLUS
1069	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1070	{
1071	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1072	  sym = XEXP (XEXP (op1, 0), 0);
1073	}
1074
1075      tls_kind = tls_symbolic_operand_type (sym);
1076      if (tls_kind)
1077	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1078
1079      if (any_offset_symbol_operand (sym, mode))
1080	addend = 0;
1081      else if (aligned_offset_symbol_operand (sym, mode))
1082	{
1083	  HOST_WIDE_INT addend_lo, addend_hi;
1084
1085	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1086	  addend_hi = addend - addend_lo;
1087
1088	  if (addend_lo != 0)
1089	    {
1090	      op1 = plus_constant (sym, addend_hi);
1091	      addend = addend_lo;
1092	    }
1093	  else
1094	    addend = 0;
1095	}
1096      else
1097	op1 = sym;
1098
1099      if (reload_completed)
1100	{
1101	  /* We really should have taken care of this offset earlier.  */
1102	  gcc_assert (addend == 0);
1103	  if (ia64_expand_load_address (op0, op1))
1104	    return NULL_RTX;
1105	}
1106
1107      if (addend)
1108	{
1109	  rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1110
1111	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1112
1113	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1114				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1115	  if (op0 == op1)
1116	    return NULL_RTX;
1117	}
1118    }
1119
1120  return op1;
1121}
1122
1123/* Split a move from OP1 to OP0 conditional on COND.  */
1124
1125void
1126ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1127{
1128  rtx insn, first = get_last_insn ();
1129
1130  emit_move_insn (op0, op1);
1131
1132  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1133    if (INSN_P (insn))
1134      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1135					  PATTERN (insn));
1136}
1137
1138/* Split a post-reload TImode or TFmode reference into two DImode
1139   components.  This is made extra difficult by the fact that we do
1140   not get any scratch registers to work with, because reload cannot
1141   be prevented from giving us a scratch that overlaps the register
1142   pair involved.  So instead, when addressing memory, we tweak the
1143   pointer register up and back down with POST_INCs.  Or up and not
1144   back down when we can get away with it.
1145
1146   REVERSED is true when the loads must be done in reversed order
1147   (high word first) for correctness.  DEAD is true when the pointer
1148   dies with the second insn we generate and therefore the second
1149   address must not carry a postmodify.
1150
1151   May return an insn which is to be emitted after the moves.  */
1152
1153static rtx
1154ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1155{
1156  rtx fixup = 0;
1157
1158  switch (GET_CODE (in))
1159    {
1160    case REG:
1161      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1162      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1163      break;
1164
1165    case CONST_INT:
1166    case CONST_DOUBLE:
1167      /* Cannot occur reversed.  */
1168      gcc_assert (!reversed);
1169
1170      if (GET_MODE (in) != TFmode)
1171	split_double (in, &out[0], &out[1]);
1172      else
1173	/* split_double does not understand how to split a TFmode
1174	   quantity into a pair of DImode constants.  */
1175	{
1176	  REAL_VALUE_TYPE r;
1177	  unsigned HOST_WIDE_INT p[2];
1178	  long l[4];  /* TFmode is 128 bits */
1179
1180	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1181	  real_to_target (l, &r, TFmode);
1182
1183	  if (FLOAT_WORDS_BIG_ENDIAN)
1184	    {
1185	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1186	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1187	    }
1188	  else
1189	    {
1190	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1191	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1192	    }
1193	  out[0] = GEN_INT (p[0]);
1194	  out[1] = GEN_INT (p[1]);
1195	}
1196      break;
1197
1198    case MEM:
1199      {
1200	rtx base = XEXP (in, 0);
1201	rtx offset;
1202
1203	switch (GET_CODE (base))
1204	  {
1205	  case REG:
1206	    if (!reversed)
1207	      {
1208		out[0] = adjust_automodify_address
1209		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1210		out[1] = adjust_automodify_address
1211		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1212	      }
1213	    else
1214	      {
1215		/* Reversal requires a pre-increment, which can only
1216		   be done as a separate insn.  */
1217		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1218		out[0] = adjust_automodify_address
1219		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1220		out[1] = adjust_address (in, DImode, 0);
1221	      }
1222	    break;
1223
1224	  case POST_INC:
1225	    gcc_assert (!reversed && !dead);
1226
1227	    /* Just do the increment in two steps.  */
1228	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1229	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1230	    break;
1231
1232	  case POST_DEC:
1233	    gcc_assert (!reversed && !dead);
1234
1235	    /* Add 8, subtract 24.  */
1236	    base = XEXP (base, 0);
1237	    out[0] = adjust_automodify_address
1238	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1239	    out[1] = adjust_automodify_address
1240	      (in, DImode,
1241	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1242	       8);
1243	    break;
1244
1245	  case POST_MODIFY:
1246	    gcc_assert (!reversed && !dead);
1247
1248	    /* Extract and adjust the modification.  This case is
1249	       trickier than the others, because we might have an
1250	       index register, or we might have a combined offset that
1251	       doesn't fit a signed 9-bit displacement field.  We can
1252	       assume the incoming expression is already legitimate.  */
1253	    offset = XEXP (base, 1);
1254	    base = XEXP (base, 0);
1255
1256	    out[0] = adjust_automodify_address
1257	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1258
1259	    if (GET_CODE (XEXP (offset, 1)) == REG)
1260	      {
1261		/* Can't adjust the postmodify to match.  Emit the
1262		   original, then a separate addition insn.  */
1263		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1264		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1265	      }
1266	    else
1267	      {
1268		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1269		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1270		  {
1271		    /* Again the postmodify cannot be made to match,
1272		       but in this case it's more efficient to get rid
1273		       of the postmodify entirely and fix up with an
1274		       add insn.  */
1275		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1276		    fixup = gen_adddi3
1277		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1278		  }
1279		else
1280		  {
1281		    /* Combined offset still fits in the displacement field.
1282		       (We cannot overflow it at the high end.)  */
1283		    out[1] = adjust_automodify_address
1284		      (in, DImode, gen_rtx_POST_MODIFY
1285		       (Pmode, base, gen_rtx_PLUS
1286			(Pmode, base,
1287			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1288		       8);
1289		  }
1290	      }
1291	    break;
1292
1293	  default:
1294	    gcc_unreachable ();
1295	  }
1296	break;
1297      }
1298
1299    default:
1300      gcc_unreachable ();
1301    }
1302
1303  return fixup;
1304}
1305
1306/* Split a TImode or TFmode move instruction after reload.
1307   This is used by *movtf_internal and *movti_internal.  */
1308void
1309ia64_split_tmode_move (rtx operands[])
1310{
1311  rtx in[2], out[2], insn;
1312  rtx fixup[2];
1313  bool dead = false;
1314  bool reversed = false;
1315
1316  /* It is possible for reload to decide to overwrite a pointer with
1317     the value it points to.  In that case we have to do the loads in
1318     the appropriate order so that the pointer is not destroyed too
1319     early.  Also we must not generate a postmodify for that second
1320     load, or rws_access_regno will die.  */
1321  if (GET_CODE (operands[1]) == MEM
1322      && reg_overlap_mentioned_p (operands[0], operands[1]))
1323    {
1324      rtx base = XEXP (operands[1], 0);
1325      while (GET_CODE (base) != REG)
1326	base = XEXP (base, 0);
1327
1328      if (REGNO (base) == REGNO (operands[0]))
1329	reversed = true;
1330      dead = true;
1331    }
1332  /* Another reason to do the moves in reversed order is if the first
1333     element of the target register pair is also the second element of
1334     the source register pair.  */
1335  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1336      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1337    reversed = true;
1338
1339  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1340  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1341
1342#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1343  if (GET_CODE (EXP) == MEM						\
1344      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1345	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1346	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1347    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1348					  XEXP (XEXP (EXP, 0), 0),	\
1349					  REG_NOTES (INSN))
1350
1351  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1352  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1353  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1354
1355  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1356  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1357  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1358
1359  if (fixup[0])
1360    emit_insn (fixup[0]);
1361  if (fixup[1])
1362    emit_insn (fixup[1]);
1363
1364#undef MAYBE_ADD_REG_INC_NOTE
1365}
1366
1367/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1368   through memory plus an extra GR scratch register.  Except that you can
1369   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1370   SECONDARY_RELOAD_CLASS, but not both.
1371
1372   We got into problems in the first place by allowing a construct like
1373   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1374   This solution attempts to prevent this situation from occurring.  When
1375   we see something like the above, we spill the inner register to memory.  */
1376
1377static rtx
1378spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1379{
1380  if (GET_CODE (in) == SUBREG
1381      && GET_MODE (SUBREG_REG (in)) == TImode
1382      && GET_CODE (SUBREG_REG (in)) == REG)
1383    {
1384      rtx memt = assign_stack_temp (TImode, 16, 0);
1385      emit_move_insn (memt, SUBREG_REG (in));
1386      return adjust_address (memt, mode, 0);
1387    }
1388  else if (force && GET_CODE (in) == REG)
1389    {
1390      rtx memx = assign_stack_temp (mode, 16, 0);
1391      emit_move_insn (memx, in);
1392      return memx;
1393    }
1394  else
1395    return in;
1396}
1397
1398/* Expand the movxf or movrf pattern (MODE says which) with the given
1399   OPERANDS, returning true if the pattern should then invoke
1400   DONE.  */
1401
1402bool
1403ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1404{
1405  rtx op0 = operands[0];
1406
1407  if (GET_CODE (op0) == SUBREG)
1408    op0 = SUBREG_REG (op0);
1409
1410  /* We must support XFmode loads into general registers for stdarg/vararg,
1411     unprototyped calls, and a rare case where a long double is passed as
1412     an argument after a float HFA fills the FP registers.  We split them into
1413     DImode loads for convenience.  We also need to support XFmode stores
1414     for the last case.  This case does not happen for stdarg/vararg routines,
1415     because we do a block store to memory of unnamed arguments.  */
1416
1417  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1418    {
1419      rtx out[2];
1420
1421      /* We're hoping to transform everything that deals with XFmode
1422	 quantities and GR registers early in the compiler.  */
1423      gcc_assert (!no_new_pseudos);
1424
1425      /* Struct to register can just use TImode instead.  */
1426      if ((GET_CODE (operands[1]) == SUBREG
1427	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1428	  || (GET_CODE (operands[1]) == REG
1429	      && GR_REGNO_P (REGNO (operands[1]))))
1430	{
1431	  rtx op1 = operands[1];
1432
1433	  if (GET_CODE (op1) == SUBREG)
1434	    op1 = SUBREG_REG (op1);
1435	  else
1436	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1437
1438	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1439	  return true;
1440	}
1441
1442      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1443	{
1444	  /* Don't word-swap when reading in the constant.  */
1445	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1446			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1447					   0, mode));
1448	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1449			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1450					   0, mode));
1451	  return true;
1452	}
1453
1454      /* If the quantity is in a register not known to be GR, spill it.  */
1455      if (register_operand (operands[1], mode))
1456	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1457
1458      gcc_assert (GET_CODE (operands[1]) == MEM);
1459
1460      /* Don't word-swap when reading in the value.  */
1461      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1462      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1463
1464      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1465      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1466      return true;
1467    }
1468
1469  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1470    {
1471      /* We're hoping to transform everything that deals with XFmode
1472	 quantities and GR registers early in the compiler.  */
1473      gcc_assert (!no_new_pseudos);
1474
1475      /* Op0 can't be a GR_REG here, as that case is handled above.
1476	 If op0 is a register, then we spill op1, so that we now have a
1477	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1478	 to force the spill.  */
1479      if (register_operand (operands[0], mode))
1480	{
1481	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1482	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1483	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1484	}
1485
1486      else
1487	{
1488	  rtx in[2];
1489
1490	  gcc_assert (GET_CODE (operands[0]) == MEM);
1491
1492	  /* Don't word-swap when writing out the value.  */
1493	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1494	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1495
1496	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1497	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1498	  return true;
1499	}
1500    }
1501
1502  if (!reload_in_progress && !reload_completed)
1503    {
1504      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1505
1506      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1507	{
1508	  rtx memt, memx, in = operands[1];
1509	  if (CONSTANT_P (in))
1510	    in = validize_mem (force_const_mem (mode, in));
1511	  if (GET_CODE (in) == MEM)
1512	    memt = adjust_address (in, TImode, 0);
1513	  else
1514	    {
1515	      memt = assign_stack_temp (TImode, 16, 0);
1516	      memx = adjust_address (memt, mode, 0);
1517	      emit_move_insn (memx, in);
1518	    }
1519	  emit_move_insn (op0, memt);
1520	  return true;
1521	}
1522
1523      if (!ia64_move_ok (operands[0], operands[1]))
1524	operands[1] = force_reg (mode, operands[1]);
1525    }
1526
1527  return false;
1528}
1529
1530/* Emit comparison instruction if necessary, returning the expression
1531   that holds the compare result in the proper mode.  */
1532
1533static GTY(()) rtx cmptf_libfunc;
1534
1535rtx
1536ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1537{
1538  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1539  rtx cmp;
1540
1541  /* If we have a BImode input, then we already have a compare result, and
1542     do not need to emit another comparison.  */
1543  if (GET_MODE (op0) == BImode)
1544    {
1545      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1546      cmp = op0;
1547    }
1548  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1549     magic number as its third argument, that indicates what to do.
1550     The return value is an integer to be compared against zero.  */
1551  else if (GET_MODE (op0) == TFmode)
1552    {
1553      enum qfcmp_magic {
1554	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1555	QCMP_UNORD = 2,
1556	QCMP_EQ = 4,
1557	QCMP_LT = 8,
1558	QCMP_GT = 16
1559      } magic;
1560      enum rtx_code ncode;
1561      rtx ret, insns;
1562
1563      gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1564      switch (code)
1565	{
1566	  /* 1 = equal, 0 = not equal.  Equality operators do
1567	     not raise FP_INVALID when given an SNaN operand.  */
1568	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1569	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1570	  /* isunordered() from C99.  */
1571	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1572	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1573	  /* Relational operators raise FP_INVALID when given
1574	     an SNaN operand.  */
1575	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1576	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1577	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1578	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1579	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1580	     Expanders for buneq etc. weuld have to be added to ia64.md
1581	     for this to be useful.  */
1582	default: gcc_unreachable ();
1583	}
1584
1585      start_sequence ();
1586
1587      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1588				     op0, TFmode, op1, TFmode,
1589				     GEN_INT (magic), DImode);
1590      cmp = gen_reg_rtx (BImode);
1591      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1592			      gen_rtx_fmt_ee (ncode, BImode,
1593					      ret, const0_rtx)));
1594
1595      insns = get_insns ();
1596      end_sequence ();
1597
1598      emit_libcall_block (insns, cmp, cmp,
1599			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1600      code = NE;
1601    }
1602  else
1603    {
1604      cmp = gen_reg_rtx (BImode);
1605      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1606			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1607      code = NE;
1608    }
1609
1610  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1611}
1612
1613/* Generate an integral vector comparison.  Return true if the condition has
1614   been reversed, and so the sense of the comparison should be inverted.  */
1615
1616static bool
1617ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1618			    rtx dest, rtx op0, rtx op1)
1619{
1620  bool negate = false;
1621  rtx x;
1622
1623  /* Canonicalize the comparison to EQ, GT, GTU.  */
1624  switch (code)
1625    {
1626    case EQ:
1627    case GT:
1628    case GTU:
1629      break;
1630
1631    case NE:
1632    case LE:
1633    case LEU:
1634      code = reverse_condition (code);
1635      negate = true;
1636      break;
1637
1638    case GE:
1639    case GEU:
1640      code = reverse_condition (code);
1641      negate = true;
1642      /* FALLTHRU */
1643
1644    case LT:
1645    case LTU:
1646      code = swap_condition (code);
1647      x = op0, op0 = op1, op1 = x;
1648      break;
1649
1650    default:
1651      gcc_unreachable ();
1652    }
1653
1654  /* Unsigned parallel compare is not supported by the hardware.  Play some
1655     tricks to turn this into a signed comparison against 0.  */
1656  if (code == GTU)
1657    {
1658      switch (mode)
1659	{
1660	case V2SImode:
1661	  {
1662	    rtx t1, t2, mask;
1663
1664	    /* Perform a parallel modulo subtraction.  */
1665	    t1 = gen_reg_rtx (V2SImode);
1666	    emit_insn (gen_subv2si3 (t1, op0, op1));
1667
1668	    /* Extract the original sign bit of op0.  */
1669	    mask = GEN_INT (-0x80000000);
1670	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1671	    mask = force_reg (V2SImode, mask);
1672	    t2 = gen_reg_rtx (V2SImode);
1673	    emit_insn (gen_andv2si3 (t2, op0, mask));
1674
1675	    /* XOR it back into the result of the subtraction.  This results
1676	       in the sign bit set iff we saw unsigned underflow.  */
1677	    x = gen_reg_rtx (V2SImode);
1678	    emit_insn (gen_xorv2si3 (x, t1, t2));
1679
1680	    code = GT;
1681	    op0 = x;
1682	    op1 = CONST0_RTX (mode);
1683	  }
1684	  break;
1685
1686	case V8QImode:
1687	case V4HImode:
1688	  /* Perform a parallel unsigned saturating subtraction.  */
1689	  x = gen_reg_rtx (mode);
1690	  emit_insn (gen_rtx_SET (VOIDmode, x,
1691				  gen_rtx_US_MINUS (mode, op0, op1)));
1692
1693	  code = EQ;
1694	  op0 = x;
1695	  op1 = CONST0_RTX (mode);
1696	  negate = !negate;
1697	  break;
1698
1699	default:
1700	  gcc_unreachable ();
1701	}
1702    }
1703
1704  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1705  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1706
1707  return negate;
1708}
1709
1710/* Emit an integral vector conditional move.  */
1711
1712void
1713ia64_expand_vecint_cmov (rtx operands[])
1714{
1715  enum machine_mode mode = GET_MODE (operands[0]);
1716  enum rtx_code code = GET_CODE (operands[3]);
1717  bool negate;
1718  rtx cmp, x, ot, of;
1719
1720  cmp = gen_reg_rtx (mode);
1721  negate = ia64_expand_vecint_compare (code, mode, cmp,
1722				       operands[4], operands[5]);
1723
1724  ot = operands[1+negate];
1725  of = operands[2-negate];
1726
1727  if (ot == CONST0_RTX (mode))
1728    {
1729      if (of == CONST0_RTX (mode))
1730	{
1731	  emit_move_insn (operands[0], ot);
1732	  return;
1733	}
1734
1735      x = gen_rtx_NOT (mode, cmp);
1736      x = gen_rtx_AND (mode, x, of);
1737      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1738    }
1739  else if (of == CONST0_RTX (mode))
1740    {
1741      x = gen_rtx_AND (mode, cmp, ot);
1742      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1743    }
1744  else
1745    {
1746      rtx t, f;
1747
1748      t = gen_reg_rtx (mode);
1749      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1750      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1751
1752      f = gen_reg_rtx (mode);
1753      x = gen_rtx_NOT (mode, cmp);
1754      x = gen_rtx_AND (mode, x, operands[2-negate]);
1755      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1756
1757      x = gen_rtx_IOR (mode, t, f);
1758      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1759    }
1760}
1761
1762/* Emit an integral vector min or max operation.  Return true if all done.  */
1763
1764bool
1765ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1766			   rtx operands[])
1767{
1768  rtx xops[6];
1769
1770  /* These four combinations are supported directly.  */
1771  if (mode == V8QImode && (code == UMIN || code == UMAX))
1772    return false;
1773  if (mode == V4HImode && (code == SMIN || code == SMAX))
1774    return false;
1775
1776  /* This combination can be implemented with only saturating subtraction.  */
1777  if (mode == V4HImode && code == UMAX)
1778    {
1779      rtx x, tmp = gen_reg_rtx (mode);
1780
1781      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1782      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1783
1784      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1785      return true;
1786    }
1787
1788  /* Everything else implemented via vector comparisons.  */
1789  xops[0] = operands[0];
1790  xops[4] = xops[1] = operands[1];
1791  xops[5] = xops[2] = operands[2];
1792
1793  switch (code)
1794    {
1795    case UMIN:
1796      code = LTU;
1797      break;
1798    case UMAX:
1799      code = GTU;
1800      break;
1801    case SMIN:
1802      code = LT;
1803      break;
1804    case SMAX:
1805      code = GT;
1806      break;
1807    default:
1808      gcc_unreachable ();
1809    }
1810  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1811
1812  ia64_expand_vecint_cmov (xops);
1813  return true;
1814}
1815
1816/* Emit an integral vector widening sum operations.  */
1817
1818void
1819ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1820{
1821  rtx l, h, x, s;
1822  enum machine_mode wmode, mode;
1823  rtx (*unpack_l) (rtx, rtx, rtx);
1824  rtx (*unpack_h) (rtx, rtx, rtx);
1825  rtx (*plus) (rtx, rtx, rtx);
1826
1827  wmode = GET_MODE (operands[0]);
1828  mode = GET_MODE (operands[1]);
1829
1830  switch (mode)
1831    {
1832    case V8QImode:
1833      unpack_l = gen_unpack1_l;
1834      unpack_h = gen_unpack1_h;
1835      plus = gen_addv4hi3;
1836      break;
1837    case V4HImode:
1838      unpack_l = gen_unpack2_l;
1839      unpack_h = gen_unpack2_h;
1840      plus = gen_addv2si3;
1841      break;
1842    default:
1843      gcc_unreachable ();
1844    }
1845
1846  /* Fill in x with the sign extension of each element in op1.  */
1847  if (unsignedp)
1848    x = CONST0_RTX (mode);
1849  else
1850    {
1851      bool neg;
1852
1853      x = gen_reg_rtx (mode);
1854
1855      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1856					CONST0_RTX (mode));
1857      gcc_assert (!neg);
1858    }
1859
1860  l = gen_reg_rtx (wmode);
1861  h = gen_reg_rtx (wmode);
1862  s = gen_reg_rtx (wmode);
1863
1864  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1865  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1866  emit_insn (plus (s, l, operands[2]));
1867  emit_insn (plus (operands[0], h, s));
1868}
1869
1870/* Emit a signed or unsigned V8QI dot product operation.  */
1871
1872void
1873ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1874{
1875  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1876
1877  /* Fill in x1 and x2 with the sign extension of each element.  */
1878  if (unsignedp)
1879    x1 = x2 = CONST0_RTX (V8QImode);
1880  else
1881    {
1882      bool neg;
1883
1884      x1 = gen_reg_rtx (V8QImode);
1885      x2 = gen_reg_rtx (V8QImode);
1886
1887      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1888					CONST0_RTX (V8QImode));
1889      gcc_assert (!neg);
1890      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1891					CONST0_RTX (V8QImode));
1892      gcc_assert (!neg);
1893    }
1894
1895  l1 = gen_reg_rtx (V4HImode);
1896  l2 = gen_reg_rtx (V4HImode);
1897  h1 = gen_reg_rtx (V4HImode);
1898  h2 = gen_reg_rtx (V4HImode);
1899
1900  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1901  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1902  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1903  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1904
1905  p1 = gen_reg_rtx (V2SImode);
1906  p2 = gen_reg_rtx (V2SImode);
1907  p3 = gen_reg_rtx (V2SImode);
1908  p4 = gen_reg_rtx (V2SImode);
1909  emit_insn (gen_pmpy2_r (p1, l1, l2));
1910  emit_insn (gen_pmpy2_l (p2, l1, l2));
1911  emit_insn (gen_pmpy2_r (p3, h1, h2));
1912  emit_insn (gen_pmpy2_l (p4, h1, h2));
1913
1914  s1 = gen_reg_rtx (V2SImode);
1915  s2 = gen_reg_rtx (V2SImode);
1916  s3 = gen_reg_rtx (V2SImode);
1917  emit_insn (gen_addv2si3 (s1, p1, p2));
1918  emit_insn (gen_addv2si3 (s2, p3, p4));
1919  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1920  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1921}
1922
1923/* Emit the appropriate sequence for a call.  */
1924
1925void
1926ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1927		  int sibcall_p)
1928{
1929  rtx insn, b0;
1930
1931  addr = XEXP (addr, 0);
1932  addr = convert_memory_address (DImode, addr);
1933  b0 = gen_rtx_REG (DImode, R_BR (0));
1934
1935  /* ??? Should do this for functions known to bind local too.  */
1936  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1937    {
1938      if (sibcall_p)
1939	insn = gen_sibcall_nogp (addr);
1940      else if (! retval)
1941	insn = gen_call_nogp (addr, b0);
1942      else
1943	insn = gen_call_value_nogp (retval, addr, b0);
1944      insn = emit_call_insn (insn);
1945    }
1946  else
1947    {
1948      if (sibcall_p)
1949	insn = gen_sibcall_gp (addr);
1950      else if (! retval)
1951	insn = gen_call_gp (addr, b0);
1952      else
1953	insn = gen_call_value_gp (retval, addr, b0);
1954      insn = emit_call_insn (insn);
1955
1956      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1957    }
1958
1959  if (sibcall_p)
1960    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1961}
1962
1963void
1964ia64_reload_gp (void)
1965{
1966  rtx tmp;
1967
1968  if (current_frame_info.reg_save_gp)
1969    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1970  else
1971    {
1972      HOST_WIDE_INT offset;
1973
1974      offset = (current_frame_info.spill_cfa_off
1975	        + current_frame_info.spill_size);
1976      if (frame_pointer_needed)
1977        {
1978          tmp = hard_frame_pointer_rtx;
1979          offset = -offset;
1980        }
1981      else
1982        {
1983          tmp = stack_pointer_rtx;
1984          offset = current_frame_info.total_size - offset;
1985        }
1986
1987      if (CONST_OK_FOR_I (offset))
1988        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1989			       tmp, GEN_INT (offset)));
1990      else
1991        {
1992          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1993          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1994			         pic_offset_table_rtx, tmp));
1995        }
1996
1997      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
1998    }
1999
2000  emit_move_insn (pic_offset_table_rtx, tmp);
2001}
2002
2003void
2004ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2005		 rtx scratch_b, int noreturn_p, int sibcall_p)
2006{
2007  rtx insn;
2008  bool is_desc = false;
2009
2010  /* If we find we're calling through a register, then we're actually
2011     calling through a descriptor, so load up the values.  */
2012  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2013    {
2014      rtx tmp;
2015      bool addr_dead_p;
2016
2017      /* ??? We are currently constrained to *not* use peep2, because
2018	 we can legitimately change the global lifetime of the GP
2019	 (in the form of killing where previously live).  This is
2020	 because a call through a descriptor doesn't use the previous
2021	 value of the GP, while a direct call does, and we do not
2022	 commit to either form until the split here.
2023
2024	 That said, this means that we lack precise life info for
2025	 whether ADDR is dead after this call.  This is not terribly
2026	 important, since we can fix things up essentially for free
2027	 with the POST_DEC below, but it's nice to not use it when we
2028	 can immediately tell it's not necessary.  */
2029      addr_dead_p = ((noreturn_p || sibcall_p
2030		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2031					    REGNO (addr)))
2032		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2033
2034      /* Load the code address into scratch_b.  */
2035      tmp = gen_rtx_POST_INC (Pmode, addr);
2036      tmp = gen_rtx_MEM (Pmode, tmp);
2037      emit_move_insn (scratch_r, tmp);
2038      emit_move_insn (scratch_b, scratch_r);
2039
2040      /* Load the GP address.  If ADDR is not dead here, then we must
2041	 revert the change made above via the POST_INCREMENT.  */
2042      if (!addr_dead_p)
2043	tmp = gen_rtx_POST_DEC (Pmode, addr);
2044      else
2045	tmp = addr;
2046      tmp = gen_rtx_MEM (Pmode, tmp);
2047      emit_move_insn (pic_offset_table_rtx, tmp);
2048
2049      is_desc = true;
2050      addr = scratch_b;
2051    }
2052
2053  if (sibcall_p)
2054    insn = gen_sibcall_nogp (addr);
2055  else if (retval)
2056    insn = gen_call_value_nogp (retval, addr, retaddr);
2057  else
2058    insn = gen_call_nogp (addr, retaddr);
2059  emit_call_insn (insn);
2060
2061  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2062    ia64_reload_gp ();
2063}
2064
2065/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2066
2067   This differs from the generic code in that we know about the zero-extending
2068   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2069   also know that ld.acq+cmpxchg.rel equals a full barrier.
2070
2071   The loop we want to generate looks like
2072
2073	cmp_reg = mem;
2074      label:
2075        old_reg = cmp_reg;
2076	new_reg = cmp_reg op val;
2077	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2078	if (cmp_reg != old_reg)
2079	  goto label;
2080
2081   Note that we only do the plain load from memory once.  Subsequent
2082   iterations use the value loaded by the compare-and-swap pattern.  */
2083
2084void
2085ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2086		       rtx old_dst, rtx new_dst)
2087{
2088  enum machine_mode mode = GET_MODE (mem);
2089  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2090  enum insn_code icode;
2091
2092  /* Special case for using fetchadd.  */
2093  if ((mode == SImode || mode == DImode)
2094      && (code == PLUS || code == MINUS)
2095      && fetchadd_operand (val, mode))
2096    {
2097      if (code == MINUS)
2098	val = GEN_INT (-INTVAL (val));
2099
2100      if (!old_dst)
2101        old_dst = gen_reg_rtx (mode);
2102
2103      emit_insn (gen_memory_barrier ());
2104
2105      if (mode == SImode)
2106	icode = CODE_FOR_fetchadd_acq_si;
2107      else
2108	icode = CODE_FOR_fetchadd_acq_di;
2109      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2110
2111      if (new_dst)
2112	{
2113	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2114					 true, OPTAB_WIDEN);
2115	  if (new_reg != new_dst)
2116	    emit_move_insn (new_dst, new_reg);
2117	}
2118      return;
2119    }
2120
2121  /* Because of the volatile mem read, we get an ld.acq, which is the
2122     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2123  gcc_assert (MEM_VOLATILE_P (mem));
2124
2125  old_reg = gen_reg_rtx (DImode);
2126  cmp_reg = gen_reg_rtx (DImode);
2127  label = gen_label_rtx ();
2128
2129  if (mode != DImode)
2130    {
2131      val = simplify_gen_subreg (DImode, val, mode, 0);
2132      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2133    }
2134  else
2135    emit_move_insn (cmp_reg, mem);
2136
2137  emit_label (label);
2138
2139  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2140  emit_move_insn (old_reg, cmp_reg);
2141  emit_move_insn (ar_ccv, cmp_reg);
2142
2143  if (old_dst)
2144    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2145
2146  new_reg = cmp_reg;
2147  if (code == NOT)
2148    {
2149      new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2150      code = AND;
2151    }
2152  new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2153				 true, OPTAB_DIRECT);
2154
2155  if (mode != DImode)
2156    new_reg = gen_lowpart (mode, new_reg);
2157  if (new_dst)
2158    emit_move_insn (new_dst, new_reg);
2159
2160  switch (mode)
2161    {
2162    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2163    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2164    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2165    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2166    default:
2167      gcc_unreachable ();
2168    }
2169
2170  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2171
2172  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2173}
2174
2175/* Begin the assembly file.  */
2176
2177static void
2178ia64_file_start (void)
2179{
2180  /* Variable tracking should be run after all optimizations which change order
2181     of insns.  It also needs a valid CFG.  This can't be done in
2182     ia64_override_options, because flag_var_tracking is finalized after
2183     that.  */
2184  ia64_flag_var_tracking = flag_var_tracking;
2185  flag_var_tracking = 0;
2186
2187  default_file_start ();
2188  emit_safe_across_calls ();
2189}
2190
2191void
2192emit_safe_across_calls (void)
2193{
2194  unsigned int rs, re;
2195  int out_state;
2196
2197  rs = 1;
2198  out_state = 0;
2199  while (1)
2200    {
2201      while (rs < 64 && call_used_regs[PR_REG (rs)])
2202	rs++;
2203      if (rs >= 64)
2204	break;
2205      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2206	continue;
2207      if (out_state == 0)
2208	{
2209	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2210	  out_state = 1;
2211	}
2212      else
2213	fputc (',', asm_out_file);
2214      if (re == rs + 1)
2215	fprintf (asm_out_file, "p%u", rs);
2216      else
2217	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2218      rs = re + 1;
2219    }
2220  if (out_state)
2221    fputc ('\n', asm_out_file);
2222}
2223
2224/* Helper function for ia64_compute_frame_size: find an appropriate general
2225   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2226   bits in GR0 to GR31 that have already been allocated by this routine.
2227   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2228
2229static int
2230find_gr_spill (int try_locals)
2231{
2232  int regno;
2233
2234  /* If this is a leaf function, first try an otherwise unused
2235     call-clobbered register.  */
2236  if (current_function_is_leaf)
2237    {
2238      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2239	if (! regs_ever_live[regno]
2240	    && call_used_regs[regno]
2241	    && ! fixed_regs[regno]
2242	    && ! global_regs[regno]
2243	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2244	  {
2245	    current_frame_info.gr_used_mask |= 1 << regno;
2246	    return regno;
2247	  }
2248    }
2249
2250  if (try_locals)
2251    {
2252      regno = current_frame_info.n_local_regs;
2253      /* If there is a frame pointer, then we can't use loc79, because
2254	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2255	 reg_name switching code in ia64_expand_prologue.  */
2256      if (regno < (80 - frame_pointer_needed))
2257	{
2258	  current_frame_info.n_local_regs = regno + 1;
2259	  return LOC_REG (0) + regno;
2260	}
2261    }
2262
2263  /* Failed to find a general register to spill to.  Must use stack.  */
2264  return 0;
2265}
2266
2267/* In order to make for nice schedules, we try to allocate every temporary
2268   to a different register.  We must of course stay away from call-saved,
2269   fixed, and global registers.  We must also stay away from registers
2270   allocated in current_frame_info.gr_used_mask, since those include regs
2271   used all through the prologue.
2272
2273   Any register allocated here must be used immediately.  The idea is to
2274   aid scheduling, not to solve data flow problems.  */
2275
2276static int last_scratch_gr_reg;
2277
2278static int
2279next_scratch_gr_reg (void)
2280{
2281  int i, regno;
2282
2283  for (i = 0; i < 32; ++i)
2284    {
2285      regno = (last_scratch_gr_reg + i + 1) & 31;
2286      if (call_used_regs[regno]
2287	  && ! fixed_regs[regno]
2288	  && ! global_regs[regno]
2289	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2290	{
2291	  last_scratch_gr_reg = regno;
2292	  return regno;
2293	}
2294    }
2295
2296  /* There must be _something_ available.  */
2297  gcc_unreachable ();
2298}
2299
2300/* Helper function for ia64_compute_frame_size, called through
2301   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2302
2303static void
2304mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2305{
2306  unsigned int regno = REGNO (reg);
2307  if (regno < 32)
2308    {
2309      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2310      for (i = 0; i < n; ++i)
2311	current_frame_info.gr_used_mask |= 1 << (regno + i);
2312    }
2313}
2314
2315/* Returns the number of bytes offset between the frame pointer and the stack
2316   pointer for the current function.  SIZE is the number of bytes of space
2317   needed for local variables.  */
2318
2319static void
2320ia64_compute_frame_size (HOST_WIDE_INT size)
2321{
2322  HOST_WIDE_INT total_size;
2323  HOST_WIDE_INT spill_size = 0;
2324  HOST_WIDE_INT extra_spill_size = 0;
2325  HOST_WIDE_INT pretend_args_size;
2326  HARD_REG_SET mask;
2327  int n_spilled = 0;
2328  int spilled_gr_p = 0;
2329  int spilled_fr_p = 0;
2330  unsigned int regno;
2331  int i;
2332
2333  if (current_frame_info.initialized)
2334    return;
2335
2336  memset (&current_frame_info, 0, sizeof current_frame_info);
2337  CLEAR_HARD_REG_SET (mask);
2338
2339  /* Don't allocate scratches to the return register.  */
2340  diddle_return_value (mark_reg_gr_used_mask, NULL);
2341
2342  /* Don't allocate scratches to the EH scratch registers.  */
2343  if (cfun->machine->ia64_eh_epilogue_sp)
2344    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2345  if (cfun->machine->ia64_eh_epilogue_bsp)
2346    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2347
2348  /* Find the size of the register stack frame.  We have only 80 local
2349     registers, because we reserve 8 for the inputs and 8 for the
2350     outputs.  */
2351
2352  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2353     since we'll be adjusting that down later.  */
2354  regno = LOC_REG (78) + ! frame_pointer_needed;
2355  for (; regno >= LOC_REG (0); regno--)
2356    if (regs_ever_live[regno])
2357      break;
2358  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359
2360  /* For functions marked with the syscall_linkage attribute, we must mark
2361     all eight input registers as in use, so that locals aren't visible to
2362     the caller.  */
2363
2364  if (cfun->machine->n_varargs > 0
2365      || lookup_attribute ("syscall_linkage",
2366			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2367    current_frame_info.n_input_regs = 8;
2368  else
2369    {
2370      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2371	if (regs_ever_live[regno])
2372	  break;
2373      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2374    }
2375
2376  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2377    if (regs_ever_live[regno])
2378      break;
2379  i = regno - OUT_REG (0) + 1;
2380
2381#ifndef PROFILE_HOOK
2382  /* When -p profiling, we need one output register for the mcount argument.
2383     Likewise for -a profiling for the bb_init_func argument.  For -ax
2384     profiling, we need two output registers for the two bb_init_trace_func
2385     arguments.  */
2386  if (current_function_profile)
2387    i = MAX (i, 1);
2388#endif
2389  current_frame_info.n_output_regs = i;
2390
2391  /* ??? No rotating register support yet.  */
2392  current_frame_info.n_rotate_regs = 0;
2393
2394  /* Discover which registers need spilling, and how much room that
2395     will take.  Begin with floating point and general registers,
2396     which will always wind up on the stack.  */
2397
2398  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2399    if (regs_ever_live[regno] && ! call_used_regs[regno])
2400      {
2401	SET_HARD_REG_BIT (mask, regno);
2402	spill_size += 16;
2403	n_spilled += 1;
2404	spilled_fr_p = 1;
2405      }
2406
2407  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2408    if (regs_ever_live[regno] && ! call_used_regs[regno])
2409      {
2410	SET_HARD_REG_BIT (mask, regno);
2411	spill_size += 8;
2412	n_spilled += 1;
2413	spilled_gr_p = 1;
2414      }
2415
2416  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2417    if (regs_ever_live[regno] && ! call_used_regs[regno])
2418      {
2419	SET_HARD_REG_BIT (mask, regno);
2420	spill_size += 8;
2421	n_spilled += 1;
2422      }
2423
2424  /* Now come all special registers that might get saved in other
2425     general registers.  */
2426
2427  if (frame_pointer_needed)
2428    {
2429      current_frame_info.reg_fp = find_gr_spill (1);
2430      /* If we did not get a register, then we take LOC79.  This is guaranteed
2431	 to be free, even if regs_ever_live is already set, because this is
2432	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2433	 as we don't count loc79 above.  */
2434      if (current_frame_info.reg_fp == 0)
2435	{
2436	  current_frame_info.reg_fp = LOC_REG (79);
2437	  current_frame_info.n_local_regs++;
2438	}
2439    }
2440
2441  if (! current_function_is_leaf)
2442    {
2443      /* Emit a save of BR0 if we call other functions.  Do this even
2444	 if this function doesn't return, as EH depends on this to be
2445	 able to unwind the stack.  */
2446      SET_HARD_REG_BIT (mask, BR_REG (0));
2447
2448      current_frame_info.reg_save_b0 = find_gr_spill (1);
2449      if (current_frame_info.reg_save_b0 == 0)
2450	{
2451	  extra_spill_size += 8;
2452	  n_spilled += 1;
2453	}
2454
2455      /* Similarly for ar.pfs.  */
2456      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2457      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2458      if (current_frame_info.reg_save_ar_pfs == 0)
2459	{
2460	  extra_spill_size += 8;
2461	  n_spilled += 1;
2462	}
2463
2464      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2465	 registers are clobbered, so we fall back to the stack.  */
2466      current_frame_info.reg_save_gp
2467	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2468      if (current_frame_info.reg_save_gp == 0)
2469	{
2470	  SET_HARD_REG_BIT (mask, GR_REG (1));
2471	  spill_size += 8;
2472	  n_spilled += 1;
2473	}
2474    }
2475  else
2476    {
2477      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2478	{
2479	  SET_HARD_REG_BIT (mask, BR_REG (0));
2480	  extra_spill_size += 8;
2481	  n_spilled += 1;
2482	}
2483
2484      if (regs_ever_live[AR_PFS_REGNUM])
2485	{
2486	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2487	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2488	  if (current_frame_info.reg_save_ar_pfs == 0)
2489	    {
2490	      extra_spill_size += 8;
2491	      n_spilled += 1;
2492	    }
2493	}
2494    }
2495
2496  /* Unwind descriptor hackery: things are most efficient if we allocate
2497     consecutive GR save registers for RP, PFS, FP in that order. However,
2498     it is absolutely critical that FP get the only hard register that's
2499     guaranteed to be free, so we allocated it first.  If all three did
2500     happen to be allocated hard regs, and are consecutive, rearrange them
2501     into the preferred order now.  */
2502  if (current_frame_info.reg_fp != 0
2503      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2504      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2505    {
2506      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2507      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2508      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2509    }
2510
2511  /* See if we need to store the predicate register block.  */
2512  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2513    if (regs_ever_live[regno] && ! call_used_regs[regno])
2514      break;
2515  if (regno <= PR_REG (63))
2516    {
2517      SET_HARD_REG_BIT (mask, PR_REG (0));
2518      current_frame_info.reg_save_pr = find_gr_spill (1);
2519      if (current_frame_info.reg_save_pr == 0)
2520	{
2521	  extra_spill_size += 8;
2522	  n_spilled += 1;
2523	}
2524
2525      /* ??? Mark them all as used so that register renaming and such
2526	 are free to use them.  */
2527      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2528	regs_ever_live[regno] = 1;
2529    }
2530
2531  /* If we're forced to use st8.spill, we're forced to save and restore
2532     ar.unat as well.  The check for existing liveness allows inline asm
2533     to touch ar.unat.  */
2534  if (spilled_gr_p || cfun->machine->n_varargs
2535      || regs_ever_live[AR_UNAT_REGNUM])
2536    {
2537      regs_ever_live[AR_UNAT_REGNUM] = 1;
2538      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2539      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2540      if (current_frame_info.reg_save_ar_unat == 0)
2541	{
2542	  extra_spill_size += 8;
2543	  n_spilled += 1;
2544	}
2545    }
2546
2547  if (regs_ever_live[AR_LC_REGNUM])
2548    {
2549      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2550      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2551      if (current_frame_info.reg_save_ar_lc == 0)
2552	{
2553	  extra_spill_size += 8;
2554	  n_spilled += 1;
2555	}
2556    }
2557
2558  /* If we have an odd number of words of pretend arguments written to
2559     the stack, then the FR save area will be unaligned.  We round the
2560     size of this area up to keep things 16 byte aligned.  */
2561  if (spilled_fr_p)
2562    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2563  else
2564    pretend_args_size = current_function_pretend_args_size;
2565
2566  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2567		+ current_function_outgoing_args_size);
2568  total_size = IA64_STACK_ALIGN (total_size);
2569
2570  /* We always use the 16-byte scratch area provided by the caller, but
2571     if we are a leaf function, there's no one to which we need to provide
2572     a scratch area.  */
2573  if (current_function_is_leaf)
2574    total_size = MAX (0, total_size - 16);
2575
2576  current_frame_info.total_size = total_size;
2577  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2578  current_frame_info.spill_size = spill_size;
2579  current_frame_info.extra_spill_size = extra_spill_size;
2580  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2581  current_frame_info.n_spilled = n_spilled;
2582  current_frame_info.initialized = reload_completed;
2583}
2584
2585/* Compute the initial difference between the specified pair of registers.  */
2586
2587HOST_WIDE_INT
2588ia64_initial_elimination_offset (int from, int to)
2589{
2590  HOST_WIDE_INT offset;
2591
2592  ia64_compute_frame_size (get_frame_size ());
2593  switch (from)
2594    {
2595    case FRAME_POINTER_REGNUM:
2596      switch (to)
2597	{
2598	case HARD_FRAME_POINTER_REGNUM:
2599	  if (current_function_is_leaf)
2600	    offset = -current_frame_info.total_size;
2601	  else
2602	    offset = -(current_frame_info.total_size
2603		       - current_function_outgoing_args_size - 16);
2604	  break;
2605
2606	case STACK_POINTER_REGNUM:
2607	  if (current_function_is_leaf)
2608	    offset = 0;
2609	  else
2610	    offset = 16 + current_function_outgoing_args_size;
2611	  break;
2612
2613	default:
2614	  gcc_unreachable ();
2615	}
2616      break;
2617
2618    case ARG_POINTER_REGNUM:
2619      /* Arguments start above the 16 byte save area, unless stdarg
2620	 in which case we store through the 16 byte save area.  */
2621      switch (to)
2622	{
2623	case HARD_FRAME_POINTER_REGNUM:
2624	  offset = 16 - current_function_pretend_args_size;
2625	  break;
2626
2627	case STACK_POINTER_REGNUM:
2628	  offset = (current_frame_info.total_size
2629		    + 16 - current_function_pretend_args_size);
2630	  break;
2631
2632	default:
2633	  gcc_unreachable ();
2634	}
2635      break;
2636
2637    default:
2638      gcc_unreachable ();
2639    }
2640
2641  return offset;
2642}
2643
2644/* If there are more than a trivial number of register spills, we use
2645   two interleaved iterators so that we can get two memory references
2646   per insn group.
2647
2648   In order to simplify things in the prologue and epilogue expanders,
2649   we use helper functions to fix up the memory references after the
2650   fact with the appropriate offsets to a POST_MODIFY memory mode.
2651   The following data structure tracks the state of the two iterators
2652   while insns are being emitted.  */
2653
2654struct spill_fill_data
2655{
2656  rtx init_after;		/* point at which to emit initializations */
2657  rtx init_reg[2];		/* initial base register */
2658  rtx iter_reg[2];		/* the iterator registers */
2659  rtx *prev_addr[2];		/* address of last memory use */
2660  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2661  HOST_WIDE_INT prev_off[2];	/* last offset */
2662  int n_iter;			/* number of iterators in use */
2663  int next_iter;		/* next iterator to use */
2664  unsigned int save_gr_used_mask;
2665};
2666
2667static struct spill_fill_data spill_fill_data;
2668
2669static void
2670setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2671{
2672  int i;
2673
2674  spill_fill_data.init_after = get_last_insn ();
2675  spill_fill_data.init_reg[0] = init_reg;
2676  spill_fill_data.init_reg[1] = init_reg;
2677  spill_fill_data.prev_addr[0] = NULL;
2678  spill_fill_data.prev_addr[1] = NULL;
2679  spill_fill_data.prev_insn[0] = NULL;
2680  spill_fill_data.prev_insn[1] = NULL;
2681  spill_fill_data.prev_off[0] = cfa_off;
2682  spill_fill_data.prev_off[1] = cfa_off;
2683  spill_fill_data.next_iter = 0;
2684  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2685
2686  spill_fill_data.n_iter = 1 + (n_spills > 2);
2687  for (i = 0; i < spill_fill_data.n_iter; ++i)
2688    {
2689      int regno = next_scratch_gr_reg ();
2690      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2691      current_frame_info.gr_used_mask |= 1 << regno;
2692    }
2693}
2694
2695static void
2696finish_spill_pointers (void)
2697{
2698  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2699}
2700
2701static rtx
2702spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2703{
2704  int iter = spill_fill_data.next_iter;
2705  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2706  rtx disp_rtx = GEN_INT (disp);
2707  rtx mem;
2708
2709  if (spill_fill_data.prev_addr[iter])
2710    {
2711      if (CONST_OK_FOR_N (disp))
2712	{
2713	  *spill_fill_data.prev_addr[iter]
2714	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2715				   gen_rtx_PLUS (DImode,
2716						 spill_fill_data.iter_reg[iter],
2717						 disp_rtx));
2718	  REG_NOTES (spill_fill_data.prev_insn[iter])
2719	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2720				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2721	}
2722      else
2723	{
2724	  /* ??? Could use register post_modify for loads.  */
2725	  if (! CONST_OK_FOR_I (disp))
2726	    {
2727	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2728	      emit_move_insn (tmp, disp_rtx);
2729	      disp_rtx = tmp;
2730	    }
2731	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2732				 spill_fill_data.iter_reg[iter], disp_rtx));
2733	}
2734    }
2735  /* Micro-optimization: if we've created a frame pointer, it's at
2736     CFA 0, which may allow the real iterator to be initialized lower,
2737     slightly increasing parallelism.  Also, if there are few saves
2738     it may eliminate the iterator entirely.  */
2739  else if (disp == 0
2740	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2741	   && frame_pointer_needed)
2742    {
2743      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2744      set_mem_alias_set (mem, get_varargs_alias_set ());
2745      return mem;
2746    }
2747  else
2748    {
2749      rtx seq, insn;
2750
2751      if (disp == 0)
2752	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2753			 spill_fill_data.init_reg[iter]);
2754      else
2755	{
2756	  start_sequence ();
2757
2758	  if (! CONST_OK_FOR_I (disp))
2759	    {
2760	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2761	      emit_move_insn (tmp, disp_rtx);
2762	      disp_rtx = tmp;
2763	    }
2764
2765	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2766				 spill_fill_data.init_reg[iter],
2767				 disp_rtx));
2768
2769	  seq = get_insns ();
2770	  end_sequence ();
2771	}
2772
2773      /* Careful for being the first insn in a sequence.  */
2774      if (spill_fill_data.init_after)
2775	insn = emit_insn_after (seq, spill_fill_data.init_after);
2776      else
2777	{
2778	  rtx first = get_insns ();
2779	  if (first)
2780	    insn = emit_insn_before (seq, first);
2781	  else
2782	    insn = emit_insn (seq);
2783	}
2784      spill_fill_data.init_after = insn;
2785
2786      /* If DISP is 0, we may or may not have a further adjustment
2787	 afterward.  If we do, then the load/store insn may be modified
2788	 to be a post-modify.  If we don't, then this copy may be
2789	 eliminated by copyprop_hardreg_forward, which makes this
2790	 insn garbage, which runs afoul of the sanity check in
2791	 propagate_one_insn.  So mark this insn as legal to delete.  */
2792      if (disp == 0)
2793	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2794					     REG_NOTES (insn));
2795    }
2796
2797  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2798
2799  /* ??? Not all of the spills are for varargs, but some of them are.
2800     The rest of the spills belong in an alias set of their own.  But
2801     it doesn't actually hurt to include them here.  */
2802  set_mem_alias_set (mem, get_varargs_alias_set ());
2803
2804  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2805  spill_fill_data.prev_off[iter] = cfa_off;
2806
2807  if (++iter >= spill_fill_data.n_iter)
2808    iter = 0;
2809  spill_fill_data.next_iter = iter;
2810
2811  return mem;
2812}
2813
2814static void
2815do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2816	  rtx frame_reg)
2817{
2818  int iter = spill_fill_data.next_iter;
2819  rtx mem, insn;
2820
2821  mem = spill_restore_mem (reg, cfa_off);
2822  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2823  spill_fill_data.prev_insn[iter] = insn;
2824
2825  if (frame_reg)
2826    {
2827      rtx base;
2828      HOST_WIDE_INT off;
2829
2830      RTX_FRAME_RELATED_P (insn) = 1;
2831
2832      /* Don't even pretend that the unwind code can intuit its way
2833	 through a pair of interleaved post_modify iterators.  Just
2834	 provide the correct answer.  */
2835
2836      if (frame_pointer_needed)
2837	{
2838	  base = hard_frame_pointer_rtx;
2839	  off = - cfa_off;
2840	}
2841      else
2842	{
2843	  base = stack_pointer_rtx;
2844	  off = current_frame_info.total_size - cfa_off;
2845	}
2846
2847      REG_NOTES (insn)
2848	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2849		gen_rtx_SET (VOIDmode,
2850			     gen_rtx_MEM (GET_MODE (reg),
2851					  plus_constant (base, off)),
2852			     frame_reg),
2853		REG_NOTES (insn));
2854    }
2855}
2856
2857static void
2858do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2859{
2860  int iter = spill_fill_data.next_iter;
2861  rtx insn;
2862
2863  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2864				GEN_INT (cfa_off)));
2865  spill_fill_data.prev_insn[iter] = insn;
2866}
2867
2868/* Wrapper functions that discards the CONST_INT spill offset.  These
2869   exist so that we can give gr_spill/gr_fill the offset they need and
2870   use a consistent function interface.  */
2871
2872static rtx
2873gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2874{
2875  return gen_movdi (dest, src);
2876}
2877
2878static rtx
2879gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2880{
2881  return gen_fr_spill (dest, src);
2882}
2883
2884static rtx
2885gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2886{
2887  return gen_fr_restore (dest, src);
2888}
2889
2890/* Called after register allocation to add any instructions needed for the
2891   prologue.  Using a prologue insn is favored compared to putting all of the
2892   instructions in output_function_prologue(), since it allows the scheduler
2893   to intermix instructions with the saves of the caller saved registers.  In
2894   some cases, it might be necessary to emit a barrier instruction as the last
2895   insn to prevent such scheduling.
2896
2897   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2898   so that the debug info generation code can handle them properly.
2899
2900   The register save area is layed out like so:
2901   cfa+16
2902	[ varargs spill area ]
2903	[ fr register spill area ]
2904	[ br register spill area ]
2905	[ ar register spill area ]
2906	[ pr register spill area ]
2907	[ gr register spill area ] */
2908
2909/* ??? Get inefficient code when the frame size is larger than can fit in an
2910   adds instruction.  */
2911
2912void
2913ia64_expand_prologue (void)
2914{
2915  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2916  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2917  rtx reg, alt_reg;
2918
2919  ia64_compute_frame_size (get_frame_size ());
2920  last_scratch_gr_reg = 15;
2921
2922  /* If there is no epilogue, then we don't need some prologue insns.
2923     We need to avoid emitting the dead prologue insns, because flow
2924     will complain about them.  */
2925  if (optimize)
2926    {
2927      edge e;
2928      edge_iterator ei;
2929
2930      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2931	if ((e->flags & EDGE_FAKE) == 0
2932	    && (e->flags & EDGE_FALLTHRU) != 0)
2933	  break;
2934      epilogue_p = (e != NULL);
2935    }
2936  else
2937    epilogue_p = 1;
2938
2939  /* Set the local, input, and output register names.  We need to do this
2940     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2941     half.  If we use in/loc/out register names, then we get assembler errors
2942     in crtn.S because there is no alloc insn or regstk directive in there.  */
2943  if (! TARGET_REG_NAMES)
2944    {
2945      int inputs = current_frame_info.n_input_regs;
2946      int locals = current_frame_info.n_local_regs;
2947      int outputs = current_frame_info.n_output_regs;
2948
2949      for (i = 0; i < inputs; i++)
2950	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2951      for (i = 0; i < locals; i++)
2952	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2953      for (i = 0; i < outputs; i++)
2954	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2955    }
2956
2957  /* Set the frame pointer register name.  The regnum is logically loc79,
2958     but of course we'll not have allocated that many locals.  Rather than
2959     worrying about renumbering the existing rtxs, we adjust the name.  */
2960  /* ??? This code means that we can never use one local register when
2961     there is a frame pointer.  loc79 gets wasted in this case, as it is
2962     renamed to a register that will never be used.  See also the try_locals
2963     code in find_gr_spill.  */
2964  if (current_frame_info.reg_fp)
2965    {
2966      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2967      reg_names[HARD_FRAME_POINTER_REGNUM]
2968	= reg_names[current_frame_info.reg_fp];
2969      reg_names[current_frame_info.reg_fp] = tmp;
2970    }
2971
2972  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2973  if (current_frame_info.n_local_regs == 0
2974      && current_frame_info.n_output_regs == 0
2975      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2976      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2977    {
2978      /* If there is no alloc, but there are input registers used, then we
2979	 need a .regstk directive.  */
2980      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2981      ar_pfs_save_reg = NULL_RTX;
2982    }
2983  else
2984    {
2985      current_frame_info.need_regstk = 0;
2986
2987      if (current_frame_info.reg_save_ar_pfs)
2988	regno = current_frame_info.reg_save_ar_pfs;
2989      else
2990	regno = next_scratch_gr_reg ();
2991      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2992
2993      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2994				   GEN_INT (current_frame_info.n_input_regs),
2995				   GEN_INT (current_frame_info.n_local_regs),
2996				   GEN_INT (current_frame_info.n_output_regs),
2997				   GEN_INT (current_frame_info.n_rotate_regs)));
2998      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
2999    }
3000
3001  /* Set up frame pointer, stack pointer, and spill iterators.  */
3002
3003  n_varargs = cfun->machine->n_varargs;
3004  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3005			stack_pointer_rtx, 0);
3006
3007  if (frame_pointer_needed)
3008    {
3009      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3010      RTX_FRAME_RELATED_P (insn) = 1;
3011    }
3012
3013  if (current_frame_info.total_size != 0)
3014    {
3015      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3016      rtx offset;
3017
3018      if (CONST_OK_FOR_I (- current_frame_info.total_size))
3019	offset = frame_size_rtx;
3020      else
3021	{
3022	  regno = next_scratch_gr_reg ();
3023	  offset = gen_rtx_REG (DImode, regno);
3024	  emit_move_insn (offset, frame_size_rtx);
3025	}
3026
3027      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3028				    stack_pointer_rtx, offset));
3029
3030      if (! frame_pointer_needed)
3031	{
3032	  RTX_FRAME_RELATED_P (insn) = 1;
3033	  if (GET_CODE (offset) != CONST_INT)
3034	    {
3035	      REG_NOTES (insn)
3036		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3037			gen_rtx_SET (VOIDmode,
3038				     stack_pointer_rtx,
3039				     gen_rtx_PLUS (DImode,
3040						   stack_pointer_rtx,
3041						   frame_size_rtx)),
3042			REG_NOTES (insn));
3043	    }
3044	}
3045
3046      /* ??? At this point we must generate a magic insn that appears to
3047	 modify the stack pointer, the frame pointer, and all spill
3048	 iterators.  This would allow the most scheduling freedom.  For
3049	 now, just hard stop.  */
3050      emit_insn (gen_blockage ());
3051    }
3052
3053  /* Must copy out ar.unat before doing any integer spills.  */
3054  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3055    {
3056      if (current_frame_info.reg_save_ar_unat)
3057	ar_unat_save_reg
3058	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3059      else
3060	{
3061	  alt_regno = next_scratch_gr_reg ();
3062	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3063	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3064	}
3065
3066      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3067      insn = emit_move_insn (ar_unat_save_reg, reg);
3068      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3069
3070      /* Even if we're not going to generate an epilogue, we still
3071	 need to save the register so that EH works.  */
3072      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3073	emit_insn (gen_prologue_use (ar_unat_save_reg));
3074    }
3075  else
3076    ar_unat_save_reg = NULL_RTX;
3077
3078  /* Spill all varargs registers.  Do this before spilling any GR registers,
3079     since we want the UNAT bits for the GR registers to override the UNAT
3080     bits from varargs, which we don't care about.  */
3081
3082  cfa_off = -16;
3083  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3084    {
3085      reg = gen_rtx_REG (DImode, regno);
3086      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3087    }
3088
3089  /* Locate the bottom of the register save area.  */
3090  cfa_off = (current_frame_info.spill_cfa_off
3091	     + current_frame_info.spill_size
3092	     + current_frame_info.extra_spill_size);
3093
3094  /* Save the predicate register block either in a register or in memory.  */
3095  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3096    {
3097      reg = gen_rtx_REG (DImode, PR_REG (0));
3098      if (current_frame_info.reg_save_pr != 0)
3099	{
3100	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3101	  insn = emit_move_insn (alt_reg, reg);
3102
3103	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3104	     64 hard registers.  */
3105	  RTX_FRAME_RELATED_P (insn) = 1;
3106	  REG_NOTES (insn)
3107	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3108			gen_rtx_SET (VOIDmode, alt_reg, reg),
3109			REG_NOTES (insn));
3110
3111	  /* Even if we're not going to generate an epilogue, we still
3112	     need to save the register so that EH works.  */
3113	  if (! epilogue_p)
3114	    emit_insn (gen_prologue_use (alt_reg));
3115	}
3116      else
3117	{
3118	  alt_regno = next_scratch_gr_reg ();
3119	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3120	  insn = emit_move_insn (alt_reg, reg);
3121	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3122	  cfa_off -= 8;
3123	}
3124    }
3125
3126  /* Handle AR regs in numerical order.  All of them get special handling.  */
3127  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3128      && current_frame_info.reg_save_ar_unat == 0)
3129    {
3130      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3131      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3132      cfa_off -= 8;
3133    }
3134
3135  /* The alloc insn already copied ar.pfs into a general register.  The
3136     only thing we have to do now is copy that register to a stack slot
3137     if we'd not allocated a local register for the job.  */
3138  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3139      && current_frame_info.reg_save_ar_pfs == 0)
3140    {
3141      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3142      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3143      cfa_off -= 8;
3144    }
3145
3146  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3147    {
3148      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3149      if (current_frame_info.reg_save_ar_lc != 0)
3150	{
3151	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3152	  insn = emit_move_insn (alt_reg, reg);
3153	  RTX_FRAME_RELATED_P (insn) = 1;
3154
3155	  /* Even if we're not going to generate an epilogue, we still
3156	     need to save the register so that EH works.  */
3157	  if (! epilogue_p)
3158	    emit_insn (gen_prologue_use (alt_reg));
3159	}
3160      else
3161	{
3162	  alt_regno = next_scratch_gr_reg ();
3163	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3164	  emit_move_insn (alt_reg, reg);
3165	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3166	  cfa_off -= 8;
3167	}
3168    }
3169
3170  /* Save the return pointer.  */
3171  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3172    {
3173      reg = gen_rtx_REG (DImode, BR_REG (0));
3174      if (current_frame_info.reg_save_b0 != 0)
3175	{
3176	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3177	  insn = emit_move_insn (alt_reg, reg);
3178	  RTX_FRAME_RELATED_P (insn) = 1;
3179
3180	  /* Even if we're not going to generate an epilogue, we still
3181	     need to save the register so that EH works.  */
3182	  if (! epilogue_p)
3183	    emit_insn (gen_prologue_use (alt_reg));
3184	}
3185      else
3186	{
3187	  alt_regno = next_scratch_gr_reg ();
3188	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3189	  emit_move_insn (alt_reg, reg);
3190	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3191	  cfa_off -= 8;
3192	}
3193    }
3194
3195  if (current_frame_info.reg_save_gp)
3196    {
3197      insn = emit_move_insn (gen_rtx_REG (DImode,
3198					  current_frame_info.reg_save_gp),
3199			     pic_offset_table_rtx);
3200      /* We don't know for sure yet if this is actually needed, since
3201	 we've not split the PIC call patterns.  If all of the calls
3202	 are indirect, and not followed by any uses of the gp, then
3203	 this save is dead.  Allow it to go away.  */
3204      REG_NOTES (insn)
3205	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3206    }
3207
3208  /* We should now be at the base of the gr/br/fr spill area.  */
3209  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3210			  + current_frame_info.spill_size));
3211
3212  /* Spill all general registers.  */
3213  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3214    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3215      {
3216	reg = gen_rtx_REG (DImode, regno);
3217	do_spill (gen_gr_spill, reg, cfa_off, reg);
3218	cfa_off -= 8;
3219      }
3220
3221  /* Spill the rest of the BR registers.  */
3222  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3223    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3224      {
3225	alt_regno = next_scratch_gr_reg ();
3226	alt_reg = gen_rtx_REG (DImode, alt_regno);
3227	reg = gen_rtx_REG (DImode, regno);
3228	emit_move_insn (alt_reg, reg);
3229	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3230	cfa_off -= 8;
3231      }
3232
3233  /* Align the frame and spill all FR registers.  */
3234  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3235    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3236      {
3237        gcc_assert (!(cfa_off & 15));
3238	reg = gen_rtx_REG (XFmode, regno);
3239	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3240	cfa_off -= 16;
3241      }
3242
3243  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3244
3245  finish_spill_pointers ();
3246}
3247
3248/* Called after register allocation to add any instructions needed for the
3249   epilogue.  Using an epilogue insn is favored compared to putting all of the
3250   instructions in output_function_prologue(), since it allows the scheduler
3251   to intermix instructions with the saves of the caller saved registers.  In
3252   some cases, it might be necessary to emit a barrier instruction as the last
3253   insn to prevent such scheduling.  */
3254
3255void
3256ia64_expand_epilogue (int sibcall_p)
3257{
3258  rtx insn, reg, alt_reg, ar_unat_save_reg;
3259  int regno, alt_regno, cfa_off;
3260
3261  ia64_compute_frame_size (get_frame_size ());
3262
3263  /* If there is a frame pointer, then we use it instead of the stack
3264     pointer, so that the stack pointer does not need to be valid when
3265     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3266  if (frame_pointer_needed)
3267    setup_spill_pointers (current_frame_info.n_spilled,
3268			  hard_frame_pointer_rtx, 0);
3269  else
3270    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3271			  current_frame_info.total_size);
3272
3273  if (current_frame_info.total_size != 0)
3274    {
3275      /* ??? At this point we must generate a magic insn that appears to
3276         modify the spill iterators and the frame pointer.  This would
3277	 allow the most scheduling freedom.  For now, just hard stop.  */
3278      emit_insn (gen_blockage ());
3279    }
3280
3281  /* Locate the bottom of the register save area.  */
3282  cfa_off = (current_frame_info.spill_cfa_off
3283	     + current_frame_info.spill_size
3284	     + current_frame_info.extra_spill_size);
3285
3286  /* Restore the predicate registers.  */
3287  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3288    {
3289      if (current_frame_info.reg_save_pr != 0)
3290	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3291      else
3292	{
3293	  alt_regno = next_scratch_gr_reg ();
3294	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3295	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3296	  cfa_off -= 8;
3297	}
3298      reg = gen_rtx_REG (DImode, PR_REG (0));
3299      emit_move_insn (reg, alt_reg);
3300    }
3301
3302  /* Restore the application registers.  */
3303
3304  /* Load the saved unat from the stack, but do not restore it until
3305     after the GRs have been restored.  */
3306  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3307    {
3308      if (current_frame_info.reg_save_ar_unat != 0)
3309        ar_unat_save_reg
3310	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3311      else
3312	{
3313	  alt_regno = next_scratch_gr_reg ();
3314	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3315	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3316	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3317	  cfa_off -= 8;
3318	}
3319    }
3320  else
3321    ar_unat_save_reg = NULL_RTX;
3322
3323  if (current_frame_info.reg_save_ar_pfs != 0)
3324    {
3325      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3326      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3327      emit_move_insn (reg, alt_reg);
3328    }
3329  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3330    {
3331      alt_regno = next_scratch_gr_reg ();
3332      alt_reg = gen_rtx_REG (DImode, alt_regno);
3333      do_restore (gen_movdi_x, alt_reg, cfa_off);
3334      cfa_off -= 8;
3335      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3336      emit_move_insn (reg, alt_reg);
3337    }
3338
3339  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3340    {
3341      if (current_frame_info.reg_save_ar_lc != 0)
3342	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3343      else
3344	{
3345	  alt_regno = next_scratch_gr_reg ();
3346	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3347	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3348	  cfa_off -= 8;
3349	}
3350      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3351      emit_move_insn (reg, alt_reg);
3352    }
3353
3354  /* Restore the return pointer.  */
3355  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3356    {
3357      if (current_frame_info.reg_save_b0 != 0)
3358	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3359      else
3360	{
3361	  alt_regno = next_scratch_gr_reg ();
3362	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3363	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3364	  cfa_off -= 8;
3365	}
3366      reg = gen_rtx_REG (DImode, BR_REG (0));
3367      emit_move_insn (reg, alt_reg);
3368    }
3369
3370  /* We should now be at the base of the gr/br/fr spill area.  */
3371  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3372			  + current_frame_info.spill_size));
3373
3374  /* The GP may be stored on the stack in the prologue, but it's
3375     never restored in the epilogue.  Skip the stack slot.  */
3376  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3377    cfa_off -= 8;
3378
3379  /* Restore all general registers.  */
3380  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3381    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3382      {
3383	reg = gen_rtx_REG (DImode, regno);
3384	do_restore (gen_gr_restore, reg, cfa_off);
3385	cfa_off -= 8;
3386      }
3387
3388  /* Restore the branch registers.  */
3389  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3390    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3391      {
3392	alt_regno = next_scratch_gr_reg ();
3393	alt_reg = gen_rtx_REG (DImode, alt_regno);
3394	do_restore (gen_movdi_x, alt_reg, cfa_off);
3395	cfa_off -= 8;
3396	reg = gen_rtx_REG (DImode, regno);
3397	emit_move_insn (reg, alt_reg);
3398      }
3399
3400  /* Restore floating point registers.  */
3401  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3402    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3403      {
3404        gcc_assert (!(cfa_off & 15));
3405	reg = gen_rtx_REG (XFmode, regno);
3406	do_restore (gen_fr_restore_x, reg, cfa_off);
3407	cfa_off -= 16;
3408      }
3409
3410  /* Restore ar.unat for real.  */
3411  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3412    {
3413      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3414      emit_move_insn (reg, ar_unat_save_reg);
3415    }
3416
3417  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3418
3419  finish_spill_pointers ();
3420
3421  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3422    {
3423      /* ??? At this point we must generate a magic insn that appears to
3424         modify the spill iterators, the stack pointer, and the frame
3425	 pointer.  This would allow the most scheduling freedom.  For now,
3426	 just hard stop.  */
3427      emit_insn (gen_blockage ());
3428    }
3429
3430  if (cfun->machine->ia64_eh_epilogue_sp)
3431    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3432  else if (frame_pointer_needed)
3433    {
3434      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3435      RTX_FRAME_RELATED_P (insn) = 1;
3436    }
3437  else if (current_frame_info.total_size)
3438    {
3439      rtx offset, frame_size_rtx;
3440
3441      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3442      if (CONST_OK_FOR_I (current_frame_info.total_size))
3443	offset = frame_size_rtx;
3444      else
3445	{
3446	  regno = next_scratch_gr_reg ();
3447	  offset = gen_rtx_REG (DImode, regno);
3448	  emit_move_insn (offset, frame_size_rtx);
3449	}
3450
3451      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3452				    offset));
3453
3454      RTX_FRAME_RELATED_P (insn) = 1;
3455      if (GET_CODE (offset) != CONST_INT)
3456	{
3457	  REG_NOTES (insn)
3458	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3459			gen_rtx_SET (VOIDmode,
3460				     stack_pointer_rtx,
3461				     gen_rtx_PLUS (DImode,
3462						   stack_pointer_rtx,
3463						   frame_size_rtx)),
3464			REG_NOTES (insn));
3465	}
3466    }
3467
3468  if (cfun->machine->ia64_eh_epilogue_bsp)
3469    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3470
3471  if (! sibcall_p)
3472    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3473  else
3474    {
3475      int fp = GR_REG (2);
3476      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3477	 first available call clobbered register.  If there was a frame_pointer
3478	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3479	 so we have to make sure we're using the string "r2" when emitting
3480	 the register name for the assembler.  */
3481      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3482	fp = HARD_FRAME_POINTER_REGNUM;
3483
3484      /* We must emit an alloc to force the input registers to become output
3485	 registers.  Otherwise, if the callee tries to pass its parameters
3486	 through to another call without an intervening alloc, then these
3487	 values get lost.  */
3488      /* ??? We don't need to preserve all input registers.  We only need to
3489	 preserve those input registers used as arguments to the sibling call.
3490	 It is unclear how to compute that number here.  */
3491      if (current_frame_info.n_input_regs != 0)
3492	{
3493	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3494	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3495				const0_rtx, const0_rtx,
3496				n_inputs, const0_rtx));
3497	  RTX_FRAME_RELATED_P (insn) = 1;
3498	}
3499    }
3500}
3501
3502/* Return 1 if br.ret can do all the work required to return from a
3503   function.  */
3504
3505int
3506ia64_direct_return (void)
3507{
3508  if (reload_completed && ! frame_pointer_needed)
3509    {
3510      ia64_compute_frame_size (get_frame_size ());
3511
3512      return (current_frame_info.total_size == 0
3513	      && current_frame_info.n_spilled == 0
3514	      && current_frame_info.reg_save_b0 == 0
3515	      && current_frame_info.reg_save_pr == 0
3516	      && current_frame_info.reg_save_ar_pfs == 0
3517	      && current_frame_info.reg_save_ar_unat == 0
3518	      && current_frame_info.reg_save_ar_lc == 0);
3519    }
3520  return 0;
3521}
3522
3523/* Return the magic cookie that we use to hold the return address
3524   during early compilation.  */
3525
3526rtx
3527ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3528{
3529  if (count != 0)
3530    return NULL;
3531  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3532}
3533
3534/* Split this value after reload, now that we know where the return
3535   address is saved.  */
3536
3537void
3538ia64_split_return_addr_rtx (rtx dest)
3539{
3540  rtx src;
3541
3542  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3543    {
3544      if (current_frame_info.reg_save_b0 != 0)
3545	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3546      else
3547	{
3548	  HOST_WIDE_INT off;
3549	  unsigned int regno;
3550
3551	  /* Compute offset from CFA for BR0.  */
3552	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3553	  off = (current_frame_info.spill_cfa_off
3554		 + current_frame_info.spill_size);
3555	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3556	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3557	      off -= 8;
3558
3559	  /* Convert CFA offset to a register based offset.  */
3560	  if (frame_pointer_needed)
3561	    src = hard_frame_pointer_rtx;
3562	  else
3563	    {
3564	      src = stack_pointer_rtx;
3565	      off += current_frame_info.total_size;
3566	    }
3567
3568	  /* Load address into scratch register.  */
3569	  if (CONST_OK_FOR_I (off))
3570	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3571	  else
3572	    {
3573	      emit_move_insn (dest, GEN_INT (off));
3574	      emit_insn (gen_adddi3 (dest, src, dest));
3575	    }
3576
3577	  src = gen_rtx_MEM (Pmode, dest);
3578	}
3579    }
3580  else
3581    src = gen_rtx_REG (DImode, BR_REG (0));
3582
3583  emit_move_insn (dest, src);
3584}
3585
3586int
3587ia64_hard_regno_rename_ok (int from, int to)
3588{
3589  /* Don't clobber any of the registers we reserved for the prologue.  */
3590  if (to == current_frame_info.reg_fp
3591      || to == current_frame_info.reg_save_b0
3592      || to == current_frame_info.reg_save_pr
3593      || to == current_frame_info.reg_save_ar_pfs
3594      || to == current_frame_info.reg_save_ar_unat
3595      || to == current_frame_info.reg_save_ar_lc)
3596    return 0;
3597
3598  if (from == current_frame_info.reg_fp
3599      || from == current_frame_info.reg_save_b0
3600      || from == current_frame_info.reg_save_pr
3601      || from == current_frame_info.reg_save_ar_pfs
3602      || from == current_frame_info.reg_save_ar_unat
3603      || from == current_frame_info.reg_save_ar_lc)
3604    return 0;
3605
3606  /* Don't use output registers outside the register frame.  */
3607  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3608    return 0;
3609
3610  /* Retain even/oddness on predicate register pairs.  */
3611  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3612    return (from & 1) == (to & 1);
3613
3614  return 1;
3615}
3616
3617/* Target hook for assembling integer objects.  Handle word-sized
3618   aligned objects and detect the cases when @fptr is needed.  */
3619
3620static bool
3621ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3622{
3623  if (size == POINTER_SIZE / BITS_PER_UNIT
3624      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3625      && GET_CODE (x) == SYMBOL_REF
3626      && SYMBOL_REF_FUNCTION_P (x))
3627    {
3628      static const char * const directive[2][2] = {
3629	  /* 64-bit pointer */  /* 32-bit pointer */
3630	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3631	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3632      };
3633      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3634      output_addr_const (asm_out_file, x);
3635      fputs (")\n", asm_out_file);
3636      return true;
3637    }
3638  return default_assemble_integer (x, size, aligned_p);
3639}
3640
3641/* Emit the function prologue.  */
3642
3643static void
3644ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3645{
3646  int mask, grsave, grsave_prev;
3647
3648  if (current_frame_info.need_regstk)
3649    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3650	     current_frame_info.n_input_regs,
3651	     current_frame_info.n_local_regs,
3652	     current_frame_info.n_output_regs,
3653	     current_frame_info.n_rotate_regs);
3654
3655  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3656    return;
3657
3658  /* Emit the .prologue directive.  */
3659
3660  mask = 0;
3661  grsave = grsave_prev = 0;
3662  if (current_frame_info.reg_save_b0 != 0)
3663    {
3664      mask |= 8;
3665      grsave = grsave_prev = current_frame_info.reg_save_b0;
3666    }
3667  if (current_frame_info.reg_save_ar_pfs != 0
3668      && (grsave_prev == 0
3669	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3670    {
3671      mask |= 4;
3672      if (grsave_prev == 0)
3673	grsave = current_frame_info.reg_save_ar_pfs;
3674      grsave_prev = current_frame_info.reg_save_ar_pfs;
3675    }
3676  if (current_frame_info.reg_fp != 0
3677      && (grsave_prev == 0
3678	  || current_frame_info.reg_fp == grsave_prev + 1))
3679    {
3680      mask |= 2;
3681      if (grsave_prev == 0)
3682	grsave = HARD_FRAME_POINTER_REGNUM;
3683      grsave_prev = current_frame_info.reg_fp;
3684    }
3685  if (current_frame_info.reg_save_pr != 0
3686      && (grsave_prev == 0
3687	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3688    {
3689      mask |= 1;
3690      if (grsave_prev == 0)
3691	grsave = current_frame_info.reg_save_pr;
3692    }
3693
3694  if (mask && TARGET_GNU_AS)
3695    fprintf (file, "\t.prologue %d, %d\n", mask,
3696	     ia64_dbx_register_number (grsave));
3697  else
3698    fputs ("\t.prologue\n", file);
3699
3700  /* Emit a .spill directive, if necessary, to relocate the base of
3701     the register spill area.  */
3702  if (current_frame_info.spill_cfa_off != -16)
3703    fprintf (file, "\t.spill %ld\n",
3704	     (long) (current_frame_info.spill_cfa_off
3705		     + current_frame_info.spill_size));
3706}
3707
3708/* Emit the .body directive at the scheduled end of the prologue.  */
3709
3710static void
3711ia64_output_function_end_prologue (FILE *file)
3712{
3713  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3714    return;
3715
3716  fputs ("\t.body\n", file);
3717}
3718
3719/* Emit the function epilogue.  */
3720
3721static void
3722ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3723			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3724{
3725  int i;
3726
3727  if (current_frame_info.reg_fp)
3728    {
3729      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3730      reg_names[HARD_FRAME_POINTER_REGNUM]
3731	= reg_names[current_frame_info.reg_fp];
3732      reg_names[current_frame_info.reg_fp] = tmp;
3733    }
3734  if (! TARGET_REG_NAMES)
3735    {
3736      for (i = 0; i < current_frame_info.n_input_regs; i++)
3737	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3738      for (i = 0; i < current_frame_info.n_local_regs; i++)
3739	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3740      for (i = 0; i < current_frame_info.n_output_regs; i++)
3741	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3742    }
3743
3744  current_frame_info.initialized = 0;
3745}
3746
3747int
3748ia64_dbx_register_number (int regno)
3749{
3750  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3751     from its home at loc79 to something inside the register frame.  We
3752     must perform the same renumbering here for the debug info.  */
3753  if (current_frame_info.reg_fp)
3754    {
3755      if (regno == HARD_FRAME_POINTER_REGNUM)
3756	regno = current_frame_info.reg_fp;
3757      else if (regno == current_frame_info.reg_fp)
3758	regno = HARD_FRAME_POINTER_REGNUM;
3759    }
3760
3761  if (IN_REGNO_P (regno))
3762    return 32 + regno - IN_REG (0);
3763  else if (LOC_REGNO_P (regno))
3764    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3765  else if (OUT_REGNO_P (regno))
3766    return (32 + current_frame_info.n_input_regs
3767	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3768  else
3769    return regno;
3770}
3771
3772void
3773ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3774{
3775  rtx addr_reg, eight = GEN_INT (8);
3776
3777  /* The Intel assembler requires that the global __ia64_trampoline symbol
3778     be declared explicitly */
3779  if (!TARGET_GNU_AS)
3780    {
3781      static bool declared_ia64_trampoline = false;
3782
3783      if (!declared_ia64_trampoline)
3784	{
3785	  declared_ia64_trampoline = true;
3786	  (*targetm.asm_out.globalize_label) (asm_out_file,
3787					      "__ia64_trampoline");
3788	}
3789    }
3790
3791  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3792  addr = convert_memory_address (Pmode, addr);
3793  fnaddr = convert_memory_address (Pmode, fnaddr);
3794  static_chain = convert_memory_address (Pmode, static_chain);
3795
3796  /* Load up our iterator.  */
3797  addr_reg = gen_reg_rtx (Pmode);
3798  emit_move_insn (addr_reg, addr);
3799
3800  /* The first two words are the fake descriptor:
3801     __ia64_trampoline, ADDR+16.  */
3802  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3803		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3804  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3805
3806  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3807		  copy_to_reg (plus_constant (addr, 16)));
3808  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3809
3810  /* The third word is the target descriptor.  */
3811  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3812  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3813
3814  /* The fourth word is the static chain.  */
3815  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3816}
3817
3818/* Do any needed setup for a variadic function.  CUM has not been updated
3819   for the last named argument which has type TYPE and mode MODE.
3820
3821   We generate the actual spill instructions during prologue generation.  */
3822
3823static void
3824ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3825			     tree type, int * pretend_size,
3826			     int second_time ATTRIBUTE_UNUSED)
3827{
3828  CUMULATIVE_ARGS next_cum = *cum;
3829
3830  /* Skip the current argument.  */
3831  ia64_function_arg_advance (&next_cum, mode, type, 1);
3832
3833  if (next_cum.words < MAX_ARGUMENT_SLOTS)
3834    {
3835      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3836      *pretend_size = n * UNITS_PER_WORD;
3837      cfun->machine->n_varargs = n;
3838    }
3839}
3840
3841/* Check whether TYPE is a homogeneous floating point aggregate.  If
3842   it is, return the mode of the floating point type that appears
3843   in all leafs.  If it is not, return VOIDmode.
3844
3845   An aggregate is a homogeneous floating point aggregate is if all
3846   fields/elements in it have the same floating point type (e.g,
3847   SFmode).  128-bit quad-precision floats are excluded.
3848
3849   Variable sized aggregates should never arrive here, since we should
3850   have already decided to pass them by reference.  Top-level zero-sized
3851   aggregates are excluded because our parallels crash the middle-end.  */
3852
3853static enum machine_mode
3854hfa_element_mode (tree type, bool nested)
3855{
3856  enum machine_mode element_mode = VOIDmode;
3857  enum machine_mode mode;
3858  enum tree_code code = TREE_CODE (type);
3859  int know_element_mode = 0;
3860  tree t;
3861
3862  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3863    return VOIDmode;
3864
3865  switch (code)
3866    {
3867    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3868    case BOOLEAN_TYPE:	case POINTER_TYPE:
3869    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3870    case LANG_TYPE:		case FUNCTION_TYPE:
3871      return VOIDmode;
3872
3873      /* Fortran complex types are supposed to be HFAs, so we need to handle
3874	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3875	 types though.  */
3876    case COMPLEX_TYPE:
3877      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3878	  && TYPE_MODE (type) != TCmode)
3879	return GET_MODE_INNER (TYPE_MODE (type));
3880      else
3881	return VOIDmode;
3882
3883    case REAL_TYPE:
3884      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3885	 mode if this is contained within an aggregate.  */
3886      if (nested && TYPE_MODE (type) != TFmode)
3887	return TYPE_MODE (type);
3888      else
3889	return VOIDmode;
3890
3891    case ARRAY_TYPE:
3892      return hfa_element_mode (TREE_TYPE (type), 1);
3893
3894    case RECORD_TYPE:
3895    case UNION_TYPE:
3896    case QUAL_UNION_TYPE:
3897      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3898	{
3899	  if (TREE_CODE (t) != FIELD_DECL)
3900	    continue;
3901
3902	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3903	  if (know_element_mode)
3904	    {
3905	      if (mode != element_mode)
3906		return VOIDmode;
3907	    }
3908	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3909	    return VOIDmode;
3910	  else
3911	    {
3912	      know_element_mode = 1;
3913	      element_mode = mode;
3914	    }
3915	}
3916      return element_mode;
3917
3918    default:
3919      /* If we reach here, we probably have some front-end specific type
3920	 that the backend doesn't know about.  This can happen via the
3921	 aggregate_value_p call in init_function_start.  All we can do is
3922	 ignore unknown tree types.  */
3923      return VOIDmode;
3924    }
3925
3926  return VOIDmode;
3927}
3928
3929/* Return the number of words required to hold a quantity of TYPE and MODE
3930   when passed as an argument.  */
3931static int
3932ia64_function_arg_words (tree type, enum machine_mode mode)
3933{
3934  int words;
3935
3936  if (mode == BLKmode)
3937    words = int_size_in_bytes (type);
3938  else
3939    words = GET_MODE_SIZE (mode);
3940
3941  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3942}
3943
3944/* Return the number of registers that should be skipped so the current
3945   argument (described by TYPE and WORDS) will be properly aligned.
3946
3947   Integer and float arguments larger than 8 bytes start at the next
3948   even boundary.  Aggregates larger than 8 bytes start at the next
3949   even boundary if the aggregate has 16 byte alignment.  Note that
3950   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3951   but are still to be aligned in registers.
3952
3953   ??? The ABI does not specify how to handle aggregates with
3954   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3955   all as if they had 16 byte alignment.  Such aggregates can occur
3956   only if gcc extensions are used.  */
3957static int
3958ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3959{
3960  if ((cum->words & 1) == 0)
3961    return 0;
3962
3963  if (type
3964      && TREE_CODE (type) != INTEGER_TYPE
3965      && TREE_CODE (type) != REAL_TYPE)
3966    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3967  else
3968    return words > 1;
3969}
3970
3971/* Return rtx for register where argument is passed, or zero if it is passed
3972   on the stack.  */
3973/* ??? 128-bit quad-precision floats are always passed in general
3974   registers.  */
3975
3976rtx
3977ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3978		   int named, int incoming)
3979{
3980  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3981  int words = ia64_function_arg_words (type, mode);
3982  int offset = ia64_function_arg_offset (cum, type, words);
3983  enum machine_mode hfa_mode = VOIDmode;
3984
3985  /* If all argument slots are used, then it must go on the stack.  */
3986  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3987    return 0;
3988
3989  /* Check for and handle homogeneous FP aggregates.  */
3990  if (type)
3991    hfa_mode = hfa_element_mode (type, 0);
3992
3993  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3994     and unprototyped hfas are passed specially.  */
3995  if (hfa_mode != VOIDmode && (! cum->prototype || named))
3996    {
3997      rtx loc[16];
3998      int i = 0;
3999      int fp_regs = cum->fp_regs;
4000      int int_regs = cum->words + offset;
4001      int hfa_size = GET_MODE_SIZE (hfa_mode);
4002      int byte_size;
4003      int args_byte_size;
4004
4005      /* If prototyped, pass it in FR regs then GR regs.
4006	 If not prototyped, pass it in both FR and GR regs.
4007
4008	 If this is an SFmode aggregate, then it is possible to run out of
4009	 FR regs while GR regs are still left.  In that case, we pass the
4010	 remaining part in the GR regs.  */
4011
4012      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4013	 of the argument, the last FP register, or the last argument slot.  */
4014
4015      byte_size = ((mode == BLKmode)
4016		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4017      args_byte_size = int_regs * UNITS_PER_WORD;
4018      offset = 0;
4019      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4020	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4021	{
4022	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4023				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4024							      + fp_regs)),
4025				      GEN_INT (offset));
4026	  offset += hfa_size;
4027	  args_byte_size += hfa_size;
4028	  fp_regs++;
4029	}
4030
4031      /* If no prototype, then the whole thing must go in GR regs.  */
4032      if (! cum->prototype)
4033	offset = 0;
4034      /* If this is an SFmode aggregate, then we might have some left over
4035	 that needs to go in GR regs.  */
4036      else if (byte_size != offset)
4037	int_regs += offset / UNITS_PER_WORD;
4038
4039      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4040
4041      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4042	{
4043	  enum machine_mode gr_mode = DImode;
4044	  unsigned int gr_size;
4045
4046	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4047	     then this goes in a GR reg left adjusted/little endian, right
4048	     adjusted/big endian.  */
4049	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4050	     always right adjusted/little endian.  */
4051	  if (offset & 0x4)
4052	    gr_mode = SImode;
4053	  /* If we have an even 4 byte hunk because the aggregate is a
4054	     multiple of 4 bytes in size, then this goes in a GR reg right
4055	     adjusted/little endian.  */
4056	  else if (byte_size - offset == 4)
4057	    gr_mode = SImode;
4058
4059	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4060				      gen_rtx_REG (gr_mode, (basereg
4061							     + int_regs)),
4062				      GEN_INT (offset));
4063
4064	  gr_size = GET_MODE_SIZE (gr_mode);
4065	  offset += gr_size;
4066	  if (gr_size == UNITS_PER_WORD
4067	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4068	    int_regs++;
4069	  else if (gr_size > UNITS_PER_WORD)
4070	    int_regs += gr_size / UNITS_PER_WORD;
4071	}
4072      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4073    }
4074
4075  /* Integral and aggregates go in general registers.  If we have run out of
4076     FR registers, then FP values must also go in general registers.  This can
4077     happen when we have a SFmode HFA.  */
4078  else if (mode == TFmode || mode == TCmode
4079	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4080    {
4081      int byte_size = ((mode == BLKmode)
4082                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4083      if (BYTES_BIG_ENDIAN
4084	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4085	&& byte_size < UNITS_PER_WORD
4086	&& byte_size > 0)
4087	{
4088	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4089					  gen_rtx_REG (DImode,
4090						       (basereg + cum->words
4091							+ offset)),
4092					  const0_rtx);
4093	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4094	}
4095      else
4096	return gen_rtx_REG (mode, basereg + cum->words + offset);
4097
4098    }
4099
4100  /* If there is a prototype, then FP values go in a FR register when
4101     named, and in a GR register when unnamed.  */
4102  else if (cum->prototype)
4103    {
4104      if (named)
4105	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4106      /* In big-endian mode, an anonymous SFmode value must be represented
4107         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4108	 the value into the high half of the general register.  */
4109      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4110	return gen_rtx_PARALLEL (mode,
4111		 gen_rtvec (1,
4112                   gen_rtx_EXPR_LIST (VOIDmode,
4113		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4114				      const0_rtx)));
4115      else
4116	return gen_rtx_REG (mode, basereg + cum->words + offset);
4117    }
4118  /* If there is no prototype, then FP values go in both FR and GR
4119     registers.  */
4120  else
4121    {
4122      /* See comment above.  */
4123      enum machine_mode inner_mode =
4124	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4125
4126      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4127				      gen_rtx_REG (mode, (FR_ARG_FIRST
4128							  + cum->fp_regs)),
4129				      const0_rtx);
4130      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4131				      gen_rtx_REG (inner_mode,
4132						   (basereg + cum->words
4133						    + offset)),
4134				      const0_rtx);
4135
4136      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4137    }
4138}
4139
4140/* Return number of bytes, at the beginning of the argument, that must be
4141   put in registers.  0 is the argument is entirely in registers or entirely
4142   in memory.  */
4143
4144static int
4145ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4146			tree type, bool named ATTRIBUTE_UNUSED)
4147{
4148  int words = ia64_function_arg_words (type, mode);
4149  int offset = ia64_function_arg_offset (cum, type, words);
4150
4151  /* If all argument slots are used, then it must go on the stack.  */
4152  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4153    return 0;
4154
4155  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4156     it fits within the 8 argument slots, then it goes entirely in
4157     registers.  If it extends past the last argument slot, then the rest
4158     goes on the stack.  */
4159
4160  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4161    return 0;
4162
4163  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4164}
4165
4166/* Update CUM to point after this argument.  This is patterned after
4167   ia64_function_arg.  */
4168
4169void
4170ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4171			   tree type, int named)
4172{
4173  int words = ia64_function_arg_words (type, mode);
4174  int offset = ia64_function_arg_offset (cum, type, words);
4175  enum machine_mode hfa_mode = VOIDmode;
4176
4177  /* If all arg slots are already full, then there is nothing to do.  */
4178  if (cum->words >= MAX_ARGUMENT_SLOTS)
4179    return;
4180
4181  cum->words += words + offset;
4182
4183  /* Check for and handle homogeneous FP aggregates.  */
4184  if (type)
4185    hfa_mode = hfa_element_mode (type, 0);
4186
4187  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4188     and unprototyped hfas are passed specially.  */
4189  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4190    {
4191      int fp_regs = cum->fp_regs;
4192      /* This is the original value of cum->words + offset.  */
4193      int int_regs = cum->words - words;
4194      int hfa_size = GET_MODE_SIZE (hfa_mode);
4195      int byte_size;
4196      int args_byte_size;
4197
4198      /* If prototyped, pass it in FR regs then GR regs.
4199	 If not prototyped, pass it in both FR and GR regs.
4200
4201	 If this is an SFmode aggregate, then it is possible to run out of
4202	 FR regs while GR regs are still left.  In that case, we pass the
4203	 remaining part in the GR regs.  */
4204
4205      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4206	 of the argument, the last FP register, or the last argument slot.  */
4207
4208      byte_size = ((mode == BLKmode)
4209		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4210      args_byte_size = int_regs * UNITS_PER_WORD;
4211      offset = 0;
4212      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4213	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4214	{
4215	  offset += hfa_size;
4216	  args_byte_size += hfa_size;
4217	  fp_regs++;
4218	}
4219
4220      cum->fp_regs = fp_regs;
4221    }
4222
4223  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4224     If we have run out of FR registers, then other FP values must also go in
4225     general registers.  This can happen when we have a SFmode HFA.  */
4226  else if (mode == TFmode || mode == TCmode
4227           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4228    cum->int_regs = cum->words;
4229
4230  /* If there is a prototype, then FP values go in a FR register when
4231     named, and in a GR register when unnamed.  */
4232  else if (cum->prototype)
4233    {
4234      if (! named)
4235	cum->int_regs = cum->words;
4236      else
4237	/* ??? Complex types should not reach here.  */
4238	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4239    }
4240  /* If there is no prototype, then FP values go in both FR and GR
4241     registers.  */
4242  else
4243    {
4244      /* ??? Complex types should not reach here.  */
4245      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4246      cum->int_regs = cum->words;
4247    }
4248}
4249
4250/* Arguments with alignment larger than 8 bytes start at the next even
4251   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4252   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4253
4254int
4255ia64_function_arg_boundary (enum machine_mode mode, tree type)
4256{
4257
4258  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4259    return PARM_BOUNDARY * 2;
4260
4261  if (type)
4262    {
4263      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4264        return PARM_BOUNDARY * 2;
4265      else
4266        return PARM_BOUNDARY;
4267    }
4268
4269  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4270    return PARM_BOUNDARY * 2;
4271  else
4272    return PARM_BOUNDARY;
4273}
4274
4275/* True if it is OK to do sibling call optimization for the specified
4276   call expression EXP.  DECL will be the called function, or NULL if
4277   this is an indirect call.  */
4278static bool
4279ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4280{
4281  /* We can't perform a sibcall if the current function has the syscall_linkage
4282     attribute.  */
4283  if (lookup_attribute ("syscall_linkage",
4284			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4285    return false;
4286
4287  /* We must always return with our current GP.  This means we can
4288     only sibcall to functions defined in the current module.  */
4289  return decl && (*targetm.binds_local_p) (decl);
4290}
4291
4292
4293/* Implement va_arg.  */
4294
4295static tree
4296ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4297{
4298  /* Variable sized types are passed by reference.  */
4299  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4300    {
4301      tree ptrtype = build_pointer_type (type);
4302      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4303      return build_va_arg_indirect_ref (addr);
4304    }
4305
4306  /* Aggregate arguments with alignment larger than 8 bytes start at
4307     the next even boundary.  Integer and floating point arguments
4308     do so if they are larger than 8 bytes, whether or not they are
4309     also aligned larger than 8 bytes.  */
4310  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4311      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4312    {
4313      tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
4314		       build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4315      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4316		  build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4317      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4318      gimplify_and_add (t, pre_p);
4319    }
4320
4321  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4322}
4323
4324/* Return 1 if function return value returned in memory.  Return 0 if it is
4325   in a register.  */
4326
4327static bool
4328ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4329{
4330  enum machine_mode mode;
4331  enum machine_mode hfa_mode;
4332  HOST_WIDE_INT byte_size;
4333
4334  mode = TYPE_MODE (valtype);
4335  byte_size = GET_MODE_SIZE (mode);
4336  if (mode == BLKmode)
4337    {
4338      byte_size = int_size_in_bytes (valtype);
4339      if (byte_size < 0)
4340	return true;
4341    }
4342
4343  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4344
4345  hfa_mode = hfa_element_mode (valtype, 0);
4346  if (hfa_mode != VOIDmode)
4347    {
4348      int hfa_size = GET_MODE_SIZE (hfa_mode);
4349
4350      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4351	return true;
4352      else
4353	return false;
4354    }
4355  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4356    return true;
4357  else
4358    return false;
4359}
4360
4361/* Return rtx for register that holds the function return value.  */
4362
4363rtx
4364ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4365{
4366  enum machine_mode mode;
4367  enum machine_mode hfa_mode;
4368
4369  mode = TYPE_MODE (valtype);
4370  hfa_mode = hfa_element_mode (valtype, 0);
4371
4372  if (hfa_mode != VOIDmode)
4373    {
4374      rtx loc[8];
4375      int i;
4376      int hfa_size;
4377      int byte_size;
4378      int offset;
4379
4380      hfa_size = GET_MODE_SIZE (hfa_mode);
4381      byte_size = ((mode == BLKmode)
4382		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4383      offset = 0;
4384      for (i = 0; offset < byte_size; i++)
4385	{
4386	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4387				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4388				      GEN_INT (offset));
4389	  offset += hfa_size;
4390	}
4391      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4392    }
4393  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4394    return gen_rtx_REG (mode, FR_ARG_FIRST);
4395  else
4396    {
4397      bool need_parallel = false;
4398
4399      /* In big-endian mode, we need to manage the layout of aggregates
4400	 in the registers so that we get the bits properly aligned in
4401	 the highpart of the registers.  */
4402      if (BYTES_BIG_ENDIAN
4403	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4404	need_parallel = true;
4405
4406      /* Something like struct S { long double x; char a[0] } is not an
4407	 HFA structure, and therefore doesn't go in fp registers.  But
4408	 the middle-end will give it XFmode anyway, and XFmode values
4409	 don't normally fit in integer registers.  So we need to smuggle
4410	 the value inside a parallel.  */
4411      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4412	need_parallel = true;
4413
4414      if (need_parallel)
4415	{
4416	  rtx loc[8];
4417	  int offset;
4418	  int bytesize;
4419	  int i;
4420
4421	  offset = 0;
4422	  bytesize = int_size_in_bytes (valtype);
4423	  /* An empty PARALLEL is invalid here, but the return value
4424	     doesn't matter for empty structs.  */
4425	  if (bytesize == 0)
4426	    return gen_rtx_REG (mode, GR_RET_FIRST);
4427	  for (i = 0; offset < bytesize; i++)
4428	    {
4429	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4430					  gen_rtx_REG (DImode,
4431						       GR_RET_FIRST + i),
4432					  GEN_INT (offset));
4433	      offset += UNITS_PER_WORD;
4434	    }
4435	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4436	}
4437
4438      return gen_rtx_REG (mode, GR_RET_FIRST);
4439    }
4440}
4441
4442/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4443   We need to emit DTP-relative relocations.  */
4444
4445static void
4446ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4447{
4448  gcc_assert (size == 4 || size == 8);
4449  if (size == 4)
4450    fputs ("\tdata4.ua\t@dtprel(", file);
4451  else
4452    fputs ("\tdata8.ua\t@dtprel(", file);
4453  output_addr_const (file, x);
4454  fputs (")", file);
4455}
4456
4457/* Print a memory address as an operand to reference that memory location.  */
4458
4459/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4460   also call this from ia64_print_operand for memory addresses.  */
4461
4462void
4463ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4464			    rtx address ATTRIBUTE_UNUSED)
4465{
4466}
4467
4468/* Print an operand to an assembler instruction.
4469   C	Swap and print a comparison operator.
4470   D	Print an FP comparison operator.
4471   E    Print 32 - constant, for SImode shifts as extract.
4472   e    Print 64 - constant, for DImode rotates.
4473   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4474        a floating point register emitted normally.
4475   I	Invert a predicate register by adding 1.
4476   J    Select the proper predicate register for a condition.
4477   j    Select the inverse predicate register for a condition.
4478   O	Append .acq for volatile load.
4479   P	Postincrement of a MEM.
4480   Q	Append .rel for volatile store.
4481   S	Shift amount for shladd instruction.
4482   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4483	for Intel assembler.
4484   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4485	for Intel assembler.
4486   X	A pair of floating point registers.
4487   r	Print register name, or constant 0 as r0.  HP compatibility for
4488	Linux kernel.
4489   v    Print vector constant value as an 8-byte integer value.  */
4490
4491void
4492ia64_print_operand (FILE * file, rtx x, int code)
4493{
4494  const char *str;
4495
4496  switch (code)
4497    {
4498    case 0:
4499      /* Handled below.  */
4500      break;
4501
4502    case 'C':
4503      {
4504	enum rtx_code c = swap_condition (GET_CODE (x));
4505	fputs (GET_RTX_NAME (c), file);
4506	return;
4507      }
4508
4509    case 'D':
4510      switch (GET_CODE (x))
4511	{
4512	case NE:
4513	  str = "neq";
4514	  break;
4515	case UNORDERED:
4516	  str = "unord";
4517	  break;
4518	case ORDERED:
4519	  str = "ord";
4520	  break;
4521	default:
4522	  str = GET_RTX_NAME (GET_CODE (x));
4523	  break;
4524	}
4525      fputs (str, file);
4526      return;
4527
4528    case 'E':
4529      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4530      return;
4531
4532    case 'e':
4533      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4534      return;
4535
4536    case 'F':
4537      if (x == CONST0_RTX (GET_MODE (x)))
4538	str = reg_names [FR_REG (0)];
4539      else if (x == CONST1_RTX (GET_MODE (x)))
4540	str = reg_names [FR_REG (1)];
4541      else
4542	{
4543	  gcc_assert (GET_CODE (x) == REG);
4544	  str = reg_names [REGNO (x)];
4545	}
4546      fputs (str, file);
4547      return;
4548
4549    case 'I':
4550      fputs (reg_names [REGNO (x) + 1], file);
4551      return;
4552
4553    case 'J':
4554    case 'j':
4555      {
4556	unsigned int regno = REGNO (XEXP (x, 0));
4557	if (GET_CODE (x) == EQ)
4558	  regno += 1;
4559	if (code == 'j')
4560	  regno ^= 1;
4561        fputs (reg_names [regno], file);
4562      }
4563      return;
4564
4565    case 'O':
4566      if (MEM_VOLATILE_P (x))
4567	fputs(".acq", file);
4568      return;
4569
4570    case 'P':
4571      {
4572	HOST_WIDE_INT value;
4573
4574	switch (GET_CODE (XEXP (x, 0)))
4575	  {
4576	  default:
4577	    return;
4578
4579	  case POST_MODIFY:
4580	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4581	    if (GET_CODE (x) == CONST_INT)
4582	      value = INTVAL (x);
4583	    else
4584	      {
4585		gcc_assert (GET_CODE (x) == REG);
4586		fprintf (file, ", %s", reg_names[REGNO (x)]);
4587		return;
4588	      }
4589	    break;
4590
4591	  case POST_INC:
4592	    value = GET_MODE_SIZE (GET_MODE (x));
4593	    break;
4594
4595	  case POST_DEC:
4596	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4597	    break;
4598	  }
4599
4600	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4601	return;
4602      }
4603
4604    case 'Q':
4605      if (MEM_VOLATILE_P (x))
4606	fputs(".rel", file);
4607      return;
4608
4609    case 'S':
4610      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4611      return;
4612
4613    case 'T':
4614      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4615	{
4616	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4617	  return;
4618	}
4619      break;
4620
4621    case 'U':
4622      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4623	{
4624	  const char *prefix = "0x";
4625	  if (INTVAL (x) & 0x80000000)
4626	    {
4627	      fprintf (file, "0xffffffff");
4628	      prefix = "";
4629	    }
4630	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4631	  return;
4632	}
4633      break;
4634
4635    case 'X':
4636      {
4637	unsigned int regno = REGNO (x);
4638	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4639      }
4640      return;
4641
4642    case 'r':
4643      /* If this operand is the constant zero, write it as register zero.
4644	 Any register, zero, or CONST_INT value is OK here.  */
4645      if (GET_CODE (x) == REG)
4646	fputs (reg_names[REGNO (x)], file);
4647      else if (x == CONST0_RTX (GET_MODE (x)))
4648	fputs ("r0", file);
4649      else if (GET_CODE (x) == CONST_INT)
4650	output_addr_const (file, x);
4651      else
4652	output_operand_lossage ("invalid %%r value");
4653      return;
4654
4655    case 'v':
4656      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4657      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4658      break;
4659
4660    case '+':
4661      {
4662	const char *which;
4663
4664	/* For conditional branches, returns or calls, substitute
4665	   sptk, dptk, dpnt, or spnt for %s.  */
4666	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4667	if (x)
4668	  {
4669	    int pred_val = INTVAL (XEXP (x, 0));
4670
4671	    /* Guess top and bottom 10% statically predicted.  */
4672	    if (pred_val < REG_BR_PROB_BASE / 50
4673		&& br_prob_note_reliable_p (x))
4674	      which = ".spnt";
4675	    else if (pred_val < REG_BR_PROB_BASE / 2)
4676	      which = ".dpnt";
4677	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4678		     || !br_prob_note_reliable_p (x))
4679	      which = ".dptk";
4680	    else
4681	      which = ".sptk";
4682	  }
4683	else if (GET_CODE (current_output_insn) == CALL_INSN)
4684	  which = ".sptk";
4685	else
4686	  which = ".dptk";
4687
4688	fputs (which, file);
4689	return;
4690      }
4691
4692    case ',':
4693      x = current_insn_predicate;
4694      if (x)
4695	{
4696	  unsigned int regno = REGNO (XEXP (x, 0));
4697	  if (GET_CODE (x) == EQ)
4698	    regno += 1;
4699          fprintf (file, "(%s) ", reg_names [regno]);
4700	}
4701      return;
4702
4703    default:
4704      output_operand_lossage ("ia64_print_operand: unknown code");
4705      return;
4706    }
4707
4708  switch (GET_CODE (x))
4709    {
4710      /* This happens for the spill/restore instructions.  */
4711    case POST_INC:
4712    case POST_DEC:
4713    case POST_MODIFY:
4714      x = XEXP (x, 0);
4715      /* ... fall through ...  */
4716
4717    case REG:
4718      fputs (reg_names [REGNO (x)], file);
4719      break;
4720
4721    case MEM:
4722      {
4723	rtx addr = XEXP (x, 0);
4724	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4725	  addr = XEXP (addr, 0);
4726	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4727	break;
4728      }
4729
4730    default:
4731      output_addr_const (file, x);
4732      break;
4733    }
4734
4735  return;
4736}
4737
4738/* Compute a (partial) cost for rtx X.  Return true if the complete
4739   cost has been computed, and false if subexpressions should be
4740   scanned.  In either case, *TOTAL contains the cost result.  */
4741/* ??? This is incomplete.  */
4742
4743static bool
4744ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4745{
4746  switch (code)
4747    {
4748    case CONST_INT:
4749      switch (outer_code)
4750        {
4751        case SET:
4752	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4753	  return true;
4754        case PLUS:
4755	  if (CONST_OK_FOR_I (INTVAL (x)))
4756	    *total = 0;
4757	  else if (CONST_OK_FOR_J (INTVAL (x)))
4758	    *total = 1;
4759	  else
4760	    *total = COSTS_N_INSNS (1);
4761	  return true;
4762        default:
4763	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4764	    *total = 0;
4765	  else
4766	    *total = COSTS_N_INSNS (1);
4767	  return true;
4768	}
4769
4770    case CONST_DOUBLE:
4771      *total = COSTS_N_INSNS (1);
4772      return true;
4773
4774    case CONST:
4775    case SYMBOL_REF:
4776    case LABEL_REF:
4777      *total = COSTS_N_INSNS (3);
4778      return true;
4779
4780    case MULT:
4781      /* For multiplies wider than HImode, we have to go to the FPU,
4782         which normally involves copies.  Plus there's the latency
4783         of the multiply itself, and the latency of the instructions to
4784         transfer integer regs to FP regs.  */
4785      /* ??? Check for FP mode.  */
4786      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4787        *total = COSTS_N_INSNS (10);
4788      else
4789	*total = COSTS_N_INSNS (2);
4790      return true;
4791
4792    case PLUS:
4793    case MINUS:
4794    case ASHIFT:
4795    case ASHIFTRT:
4796    case LSHIFTRT:
4797      *total = COSTS_N_INSNS (1);
4798      return true;
4799
4800    case DIV:
4801    case UDIV:
4802    case MOD:
4803    case UMOD:
4804      /* We make divide expensive, so that divide-by-constant will be
4805         optimized to a multiply.  */
4806      *total = COSTS_N_INSNS (60);
4807      return true;
4808
4809    default:
4810      return false;
4811    }
4812}
4813
4814/* Calculate the cost of moving data from a register in class FROM to
4815   one in class TO, using MODE.  */
4816
4817int
4818ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4819			 enum reg_class to)
4820{
4821  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4822  if (to == ADDL_REGS)
4823    to = GR_REGS;
4824  if (from == ADDL_REGS)
4825    from = GR_REGS;
4826
4827  /* All costs are symmetric, so reduce cases by putting the
4828     lower number class as the destination.  */
4829  if (from < to)
4830    {
4831      enum reg_class tmp = to;
4832      to = from, from = tmp;
4833    }
4834
4835  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4836     so that we get secondary memory reloads.  Between FR_REGS,
4837     we have to make this at least as expensive as MEMORY_MOVE_COST
4838     to avoid spectacularly poor register class preferencing.  */
4839  if (mode == XFmode || mode == RFmode)
4840    {
4841      if (to != GR_REGS || from != GR_REGS)
4842        return MEMORY_MOVE_COST (mode, to, 0);
4843      else
4844	return 3;
4845    }
4846
4847  switch (to)
4848    {
4849    case PR_REGS:
4850      /* Moving between PR registers takes two insns.  */
4851      if (from == PR_REGS)
4852	return 3;
4853      /* Moving between PR and anything but GR is impossible.  */
4854      if (from != GR_REGS)
4855	return MEMORY_MOVE_COST (mode, to, 0);
4856      break;
4857
4858    case BR_REGS:
4859      /* Moving between BR and anything but GR is impossible.  */
4860      if (from != GR_REGS && from != GR_AND_BR_REGS)
4861	return MEMORY_MOVE_COST (mode, to, 0);
4862      break;
4863
4864    case AR_I_REGS:
4865    case AR_M_REGS:
4866      /* Moving between AR and anything but GR is impossible.  */
4867      if (from != GR_REGS)
4868	return MEMORY_MOVE_COST (mode, to, 0);
4869      break;
4870
4871    case GR_REGS:
4872    case FR_REGS:
4873    case FP_REGS:
4874    case GR_AND_FR_REGS:
4875    case GR_AND_BR_REGS:
4876    case ALL_REGS:
4877      break;
4878
4879    default:
4880      gcc_unreachable ();
4881    }
4882
4883  return 2;
4884}
4885
4886/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
4887   to use when copying X into that class.  */
4888
4889enum reg_class
4890ia64_preferred_reload_class (rtx x, enum reg_class class)
4891{
4892  switch (class)
4893    {
4894    case FR_REGS:
4895    case FP_REGS:
4896      /* Don't allow volatile mem reloads into floating point registers.
4897	 This is defined to force reload to choose the r/m case instead
4898	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
4899      if (MEM_P (x) && MEM_VOLATILE_P (x))
4900	return NO_REGS;
4901
4902      /* Force all unrecognized constants into the constant pool.  */
4903      if (CONSTANT_P (x))
4904	return NO_REGS;
4905      break;
4906
4907    case AR_M_REGS:
4908    case AR_I_REGS:
4909      if (!OBJECT_P (x))
4910	return NO_REGS;
4911      break;
4912
4913    default:
4914      break;
4915    }
4916
4917  return class;
4918}
4919
4920/* This function returns the register class required for a secondary
4921   register when copying between one of the registers in CLASS, and X,
4922   using MODE.  A return value of NO_REGS means that no secondary register
4923   is required.  */
4924
4925enum reg_class
4926ia64_secondary_reload_class (enum reg_class class,
4927			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4928{
4929  int regno = -1;
4930
4931  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4932    regno = true_regnum (x);
4933
4934  switch (class)
4935    {
4936    case BR_REGS:
4937    case AR_M_REGS:
4938    case AR_I_REGS:
4939      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4940	 interaction.  We end up with two pseudos with overlapping lifetimes
4941	 both of which are equiv to the same constant, and both which need
4942	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4943	 changes depending on the path length, which means the qty_first_reg
4944	 check in make_regs_eqv can give different answers at different times.
4945	 At some point I'll probably need a reload_indi pattern to handle
4946	 this.
4947
4948	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4949	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4950	 non-general registers for good measure.  */
4951      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4952	return GR_REGS;
4953
4954      /* This is needed if a pseudo used as a call_operand gets spilled to a
4955	 stack slot.  */
4956      if (GET_CODE (x) == MEM)
4957	return GR_REGS;
4958      break;
4959
4960    case FR_REGS:
4961    case FP_REGS:
4962      /* Need to go through general registers to get to other class regs.  */
4963      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4964	return GR_REGS;
4965
4966      /* This can happen when a paradoxical subreg is an operand to the
4967	 muldi3 pattern.  */
4968      /* ??? This shouldn't be necessary after instruction scheduling is
4969	 enabled, because paradoxical subregs are not accepted by
4970	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4971	 stop the paradoxical subreg stupidity in the *_operand functions
4972	 in recog.c.  */
4973      if (GET_CODE (x) == MEM
4974	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4975	      || GET_MODE (x) == QImode))
4976	return GR_REGS;
4977
4978      /* This can happen because of the ior/and/etc patterns that accept FP
4979	 registers as operands.  If the third operand is a constant, then it
4980	 needs to be reloaded into a FP register.  */
4981      if (GET_CODE (x) == CONST_INT)
4982	return GR_REGS;
4983
4984      /* This can happen because of register elimination in a muldi3 insn.
4985	 E.g. `26107 * (unsigned long)&u'.  */
4986      if (GET_CODE (x) == PLUS)
4987	return GR_REGS;
4988      break;
4989
4990    case PR_REGS:
4991      /* ??? This happens if we cse/gcse a BImode value across a call,
4992	 and the function has a nonlocal goto.  This is because global
4993	 does not allocate call crossing pseudos to hard registers when
4994	 current_function_has_nonlocal_goto is true.  This is relatively
4995	 common for C++ programs that use exceptions.  To reproduce,
4996	 return NO_REGS and compile libstdc++.  */
4997      if (GET_CODE (x) == MEM)
4998	return GR_REGS;
4999
5000      /* This can happen when we take a BImode subreg of a DImode value,
5001	 and that DImode value winds up in some non-GR register.  */
5002      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5003	return GR_REGS;
5004      break;
5005
5006    default:
5007      break;
5008    }
5009
5010  return NO_REGS;
5011}
5012
5013
5014/* Parse the -mfixed-range= option string.  */
5015
5016static void
5017fix_range (const char *const_str)
5018{
5019  int i, first, last;
5020  char *str, *dash, *comma;
5021
5022  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5023     REG2 are either register names or register numbers.  The effect
5024     of this option is to mark the registers in the range from REG1 to
5025     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5026     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5027
5028  i = strlen (const_str);
5029  str = (char *) alloca (i + 1);
5030  memcpy (str, const_str, i + 1);
5031
5032  while (1)
5033    {
5034      dash = strchr (str, '-');
5035      if (!dash)
5036	{
5037	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5038	  return;
5039	}
5040      *dash = '\0';
5041
5042      comma = strchr (dash + 1, ',');
5043      if (comma)
5044	*comma = '\0';
5045
5046      first = decode_reg_name (str);
5047      if (first < 0)
5048	{
5049	  warning (0, "unknown register name: %s", str);
5050	  return;
5051	}
5052
5053      last = decode_reg_name (dash + 1);
5054      if (last < 0)
5055	{
5056	  warning (0, "unknown register name: %s", dash + 1);
5057	  return;
5058	}
5059
5060      *dash = '-';
5061
5062      if (first > last)
5063	{
5064	  warning (0, "%s-%s is an empty range", str, dash + 1);
5065	  return;
5066	}
5067
5068      for (i = first; i <= last; ++i)
5069	fixed_regs[i] = call_used_regs[i] = 1;
5070
5071      if (!comma)
5072	break;
5073
5074      *comma = ',';
5075      str = comma + 1;
5076    }
5077}
5078
5079/* Implement TARGET_HANDLE_OPTION.  */
5080
5081static bool
5082ia64_handle_option (size_t code, const char *arg, int value)
5083{
5084  switch (code)
5085    {
5086    case OPT_mfixed_range_:
5087      fix_range (arg);
5088      return true;
5089
5090    case OPT_mtls_size_:
5091      if (value != 14 && value != 22 && value != 64)
5092	error ("bad value %<%s%> for -mtls-size= switch", arg);
5093      return true;
5094
5095    case OPT_mtune_:
5096      {
5097	static struct pta
5098	  {
5099	    const char *name;		/* processor name or nickname.  */
5100	    enum processor_type processor;
5101	  }
5102	const processor_alias_table[] =
5103	  {
5104	    {"itanium", PROCESSOR_ITANIUM},
5105	    {"itanium1", PROCESSOR_ITANIUM},
5106	    {"merced", PROCESSOR_ITANIUM},
5107	    {"itanium2", PROCESSOR_ITANIUM2},
5108	    {"mckinley", PROCESSOR_ITANIUM2},
5109	  };
5110	int const pta_size = ARRAY_SIZE (processor_alias_table);
5111	int i;
5112
5113	for (i = 0; i < pta_size; i++)
5114	  if (!strcmp (arg, processor_alias_table[i].name))
5115	    {
5116	      ia64_tune = processor_alias_table[i].processor;
5117	      break;
5118	    }
5119	if (i == pta_size)
5120	  error ("bad value %<%s%> for -mtune= switch", arg);
5121	return true;
5122      }
5123
5124    default:
5125      return true;
5126    }
5127}
5128
5129/* Implement OVERRIDE_OPTIONS.  */
5130
5131void
5132ia64_override_options (void)
5133{
5134  if (TARGET_AUTO_PIC)
5135    target_flags |= MASK_CONST_GP;
5136
5137  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5138    {
5139      warning (0, "not yet implemented: latency-optimized inline square root");
5140      TARGET_INLINE_SQRT = INL_MAX_THR;
5141    }
5142
5143  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5144  flag_schedule_insns_after_reload = 0;
5145
5146  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5147
5148  init_machine_status = ia64_init_machine_status;
5149}
5150
5151static struct machine_function *
5152ia64_init_machine_status (void)
5153{
5154  return ggc_alloc_cleared (sizeof (struct machine_function));
5155}
5156
5157static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5158static enum attr_type ia64_safe_type (rtx);
5159
5160static enum attr_itanium_class
5161ia64_safe_itanium_class (rtx insn)
5162{
5163  if (recog_memoized (insn) >= 0)
5164    return get_attr_itanium_class (insn);
5165  else
5166    return ITANIUM_CLASS_UNKNOWN;
5167}
5168
5169static enum attr_type
5170ia64_safe_type (rtx insn)
5171{
5172  if (recog_memoized (insn) >= 0)
5173    return get_attr_type (insn);
5174  else
5175    return TYPE_UNKNOWN;
5176}
5177
5178/* The following collection of routines emit instruction group stop bits as
5179   necessary to avoid dependencies.  */
5180
5181/* Need to track some additional registers as far as serialization is
5182   concerned so we can properly handle br.call and br.ret.  We could
5183   make these registers visible to gcc, but since these registers are
5184   never explicitly used in gcc generated code, it seems wasteful to
5185   do so (plus it would make the call and return patterns needlessly
5186   complex).  */
5187#define REG_RP		(BR_REG (0))
5188#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
5189/* This is used for volatile asms which may require a stop bit immediately
5190   before and after them.  */
5191#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
5192#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
5193#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
5194
5195/* For each register, we keep track of how it has been written in the
5196   current instruction group.
5197
5198   If a register is written unconditionally (no qualifying predicate),
5199   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5200
5201   If a register is written if its qualifying predicate P is true, we
5202   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5203   may be written again by the complement of P (P^1) and when this happens,
5204   WRITE_COUNT gets set to 2.
5205
5206   The result of this is that whenever an insn attempts to write a register
5207   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5208
5209   If a predicate register is written by a floating-point insn, we set
5210   WRITTEN_BY_FP to true.
5211
5212   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5213   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5214
5215struct reg_write_state
5216{
5217  unsigned int write_count : 2;
5218  unsigned int first_pred : 16;
5219  unsigned int written_by_fp : 1;
5220  unsigned int written_by_and : 1;
5221  unsigned int written_by_or : 1;
5222};
5223
5224/* Cumulative info for the current instruction group.  */
5225struct reg_write_state rws_sum[NUM_REGS];
5226/* Info for the current instruction.  This gets copied to rws_sum after a
5227   stop bit is emitted.  */
5228struct reg_write_state rws_insn[NUM_REGS];
5229
5230/* Indicates whether this is the first instruction after a stop bit,
5231   in which case we don't need another stop bit.  Without this,
5232   ia64_variable_issue will die when scheduling an alloc.  */
5233static int first_instruction;
5234
5235/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5236   RTL for one instruction.  */
5237struct reg_flags
5238{
5239  unsigned int is_write : 1;	/* Is register being written?  */
5240  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
5241  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
5242  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
5243  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
5244  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
5245};
5246
5247static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5248static int rws_access_regno (int, struct reg_flags, int);
5249static int rws_access_reg (rtx, struct reg_flags, int);
5250static void update_set_flags (rtx, struct reg_flags *);
5251static int set_src_needs_barrier (rtx, struct reg_flags, int);
5252static int rtx_needs_barrier (rtx, struct reg_flags, int);
5253static void init_insn_group_barriers (void);
5254static int group_barrier_needed (rtx);
5255static int safe_group_barrier_needed (rtx);
5256
5257/* Update *RWS for REGNO, which is being written by the current instruction,
5258   with predicate PRED, and associated register flags in FLAGS.  */
5259
5260static void
5261rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5262{
5263  if (pred)
5264    rws[regno].write_count++;
5265  else
5266    rws[regno].write_count = 2;
5267  rws[regno].written_by_fp |= flags.is_fp;
5268  /* ??? Not tracking and/or across differing predicates.  */
5269  rws[regno].written_by_and = flags.is_and;
5270  rws[regno].written_by_or = flags.is_or;
5271  rws[regno].first_pred = pred;
5272}
5273
5274/* Handle an access to register REGNO of type FLAGS using predicate register
5275   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
5276   a dependency with an earlier instruction in the same group.  */
5277
5278static int
5279rws_access_regno (int regno, struct reg_flags flags, int pred)
5280{
5281  int need_barrier = 0;
5282
5283  gcc_assert (regno < NUM_REGS);
5284
5285  if (! PR_REGNO_P (regno))
5286    flags.is_and = flags.is_or = 0;
5287
5288  if (flags.is_write)
5289    {
5290      int write_count;
5291
5292      /* One insn writes same reg multiple times?  */
5293      gcc_assert (!rws_insn[regno].write_count);
5294
5295      /* Update info for current instruction.  */
5296      rws_update (rws_insn, regno, flags, pred);
5297      write_count = rws_sum[regno].write_count;
5298
5299      switch (write_count)
5300	{
5301	case 0:
5302	  /* The register has not been written yet.  */
5303	  rws_update (rws_sum, regno, flags, pred);
5304	  break;
5305
5306	case 1:
5307	  /* The register has been written via a predicate.  If this is
5308	     not a complementary predicate, then we need a barrier.  */
5309	  /* ??? This assumes that P and P+1 are always complementary
5310	     predicates for P even.  */
5311	  if (flags.is_and && rws_sum[regno].written_by_and)
5312	    ;
5313	  else if (flags.is_or && rws_sum[regno].written_by_or)
5314	    ;
5315	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
5316	    need_barrier = 1;
5317	  rws_update (rws_sum, regno, flags, pred);
5318	  break;
5319
5320	case 2:
5321	  /* The register has been unconditionally written already.  We
5322	     need a barrier.  */
5323	  if (flags.is_and && rws_sum[regno].written_by_and)
5324	    ;
5325	  else if (flags.is_or && rws_sum[regno].written_by_or)
5326	    ;
5327	  else
5328	    need_barrier = 1;
5329	  rws_sum[regno].written_by_and = flags.is_and;
5330	  rws_sum[regno].written_by_or = flags.is_or;
5331	  break;
5332
5333	default:
5334	  gcc_unreachable ();
5335	}
5336    }
5337  else
5338    {
5339      if (flags.is_branch)
5340	{
5341	  /* Branches have several RAW exceptions that allow to avoid
5342	     barriers.  */
5343
5344	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5345	    /* RAW dependencies on branch regs are permissible as long
5346	       as the writer is a non-branch instruction.  Since we
5347	       never generate code that uses a branch register written
5348	       by a branch instruction, handling this case is
5349	       easy.  */
5350	    return 0;
5351
5352	  if (REGNO_REG_CLASS (regno) == PR_REGS
5353	      && ! rws_sum[regno].written_by_fp)
5354	    /* The predicates of a branch are available within the
5355	       same insn group as long as the predicate was written by
5356	       something other than a floating-point instruction.  */
5357	    return 0;
5358	}
5359
5360      if (flags.is_and && rws_sum[regno].written_by_and)
5361	return 0;
5362      if (flags.is_or && rws_sum[regno].written_by_or)
5363	return 0;
5364
5365      switch (rws_sum[regno].write_count)
5366	{
5367	case 0:
5368	  /* The register has not been written yet.  */
5369	  break;
5370
5371	case 1:
5372	  /* The register has been written via a predicate.  If this is
5373	     not a complementary predicate, then we need a barrier.  */
5374	  /* ??? This assumes that P and P+1 are always complementary
5375	     predicates for P even.  */
5376	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5377	    need_barrier = 1;
5378	  break;
5379
5380	case 2:
5381	  /* The register has been unconditionally written already.  We
5382	     need a barrier.  */
5383	  need_barrier = 1;
5384	  break;
5385
5386	default:
5387	  gcc_unreachable ();
5388	}
5389    }
5390
5391  return need_barrier;
5392}
5393
5394static int
5395rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5396{
5397  int regno = REGNO (reg);
5398  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5399
5400  if (n == 1)
5401    return rws_access_regno (regno, flags, pred);
5402  else
5403    {
5404      int need_barrier = 0;
5405      while (--n >= 0)
5406	need_barrier |= rws_access_regno (regno + n, flags, pred);
5407      return need_barrier;
5408    }
5409}
5410
5411/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5412   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5413
5414static void
5415update_set_flags (rtx x, struct reg_flags *pflags)
5416{
5417  rtx src = SET_SRC (x);
5418
5419  switch (GET_CODE (src))
5420    {
5421    case CALL:
5422      return;
5423
5424    case IF_THEN_ELSE:
5425      /* There are four cases here:
5426	 (1) The destination is (pc), in which case this is a branch,
5427	 nothing here applies.
5428	 (2) The destination is ar.lc, in which case this is a
5429	 doloop_end_internal,
5430	 (3) The destination is an fp register, in which case this is
5431	 an fselect instruction.
5432	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5433	 this is a check load.
5434	 In all cases, nothing we do in this function applies.  */
5435      return;
5436
5437    default:
5438      if (COMPARISON_P (src)
5439	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5440	/* Set pflags->is_fp to 1 so that we know we're dealing
5441	   with a floating point comparison when processing the
5442	   destination of the SET.  */
5443	pflags->is_fp = 1;
5444
5445      /* Discover if this is a parallel comparison.  We only handle
5446	 and.orcm and or.andcm at present, since we must retain a
5447	 strict inverse on the predicate pair.  */
5448      else if (GET_CODE (src) == AND)
5449	pflags->is_and = 1;
5450      else if (GET_CODE (src) == IOR)
5451	pflags->is_or = 1;
5452
5453      break;
5454    }
5455}
5456
5457/* Subroutine of rtx_needs_barrier; this function determines whether the
5458   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5459   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5460   for this insn.  */
5461
5462static int
5463set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5464{
5465  int need_barrier = 0;
5466  rtx dst;
5467  rtx src = SET_SRC (x);
5468
5469  if (GET_CODE (src) == CALL)
5470    /* We don't need to worry about the result registers that
5471       get written by subroutine call.  */
5472    return rtx_needs_barrier (src, flags, pred);
5473  else if (SET_DEST (x) == pc_rtx)
5474    {
5475      /* X is a conditional branch.  */
5476      /* ??? This seems redundant, as the caller sets this bit for
5477	 all JUMP_INSNs.  */
5478      if (!ia64_spec_check_src_p (src))
5479	flags.is_branch = 1;
5480      return rtx_needs_barrier (src, flags, pred);
5481    }
5482
5483  if (ia64_spec_check_src_p (src))
5484    /* Avoid checking one register twice (in condition
5485       and in 'then' section) for ldc pattern.  */
5486    {
5487      gcc_assert (REG_P (XEXP (src, 2)));
5488      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5489
5490      /* We process MEM below.  */
5491      src = XEXP (src, 1);
5492    }
5493
5494  need_barrier |= rtx_needs_barrier (src, flags, pred);
5495
5496  dst = SET_DEST (x);
5497  if (GET_CODE (dst) == ZERO_EXTRACT)
5498    {
5499      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5500      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5501    }
5502  return need_barrier;
5503}
5504
5505/* Handle an access to rtx X of type FLAGS using predicate register
5506   PRED.  Return 1 if this access creates a dependency with an earlier
5507   instruction in the same group.  */
5508
5509static int
5510rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5511{
5512  int i, j;
5513  int is_complemented = 0;
5514  int need_barrier = 0;
5515  const char *format_ptr;
5516  struct reg_flags new_flags;
5517  rtx cond;
5518
5519  if (! x)
5520    return 0;
5521
5522  new_flags = flags;
5523
5524  switch (GET_CODE (x))
5525    {
5526    case SET:
5527      update_set_flags (x, &new_flags);
5528      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5529      if (GET_CODE (SET_SRC (x)) != CALL)
5530	{
5531	  new_flags.is_write = 1;
5532	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5533	}
5534      break;
5535
5536    case CALL:
5537      new_flags.is_write = 0;
5538      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5539
5540      /* Avoid multiple register writes, in case this is a pattern with
5541	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5542      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5543	{
5544	  new_flags.is_write = 1;
5545	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5546	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5547	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5548	}
5549      break;
5550
5551    case COND_EXEC:
5552      /* X is a predicated instruction.  */
5553
5554      cond = COND_EXEC_TEST (x);
5555      gcc_assert (!pred);
5556      need_barrier = rtx_needs_barrier (cond, flags, 0);
5557
5558      if (GET_CODE (cond) == EQ)
5559	is_complemented = 1;
5560      cond = XEXP (cond, 0);
5561      gcc_assert (GET_CODE (cond) == REG
5562		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5563      pred = REGNO (cond);
5564      if (is_complemented)
5565	++pred;
5566
5567      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5568      return need_barrier;
5569
5570    case CLOBBER:
5571    case USE:
5572      /* Clobber & use are for earlier compiler-phases only.  */
5573      break;
5574
5575    case ASM_OPERANDS:
5576    case ASM_INPUT:
5577      /* We always emit stop bits for traditional asms.  We emit stop bits
5578	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5579      if (GET_CODE (x) != ASM_OPERANDS
5580	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5581	{
5582	  /* Avoid writing the register multiple times if we have multiple
5583	     asm outputs.  This avoids a failure in rws_access_reg.  */
5584	  if (! rws_insn[REG_VOLATILE].write_count)
5585	    {
5586	      new_flags.is_write = 1;
5587	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5588	    }
5589	  return 1;
5590	}
5591
5592      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5593	 We cannot just fall through here since then we would be confused
5594	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5595	 traditional asms unlike their normal usage.  */
5596
5597      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5598	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5599	  need_barrier = 1;
5600      break;
5601
5602    case PARALLEL:
5603      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5604	{
5605	  rtx pat = XVECEXP (x, 0, i);
5606	  switch (GET_CODE (pat))
5607	    {
5608	    case SET:
5609	      update_set_flags (pat, &new_flags);
5610	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5611	      break;
5612
5613	    case USE:
5614	    case CALL:
5615	    case ASM_OPERANDS:
5616	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
5617	      break;
5618
5619	    case CLOBBER:
5620	    case RETURN:
5621	      break;
5622
5623	    default:
5624	      gcc_unreachable ();
5625	    }
5626	}
5627      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5628	{
5629	  rtx pat = XVECEXP (x, 0, i);
5630	  if (GET_CODE (pat) == SET)
5631	    {
5632	      if (GET_CODE (SET_SRC (pat)) != CALL)
5633		{
5634		  new_flags.is_write = 1;
5635		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5636						     pred);
5637		}
5638	    }
5639	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5640	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5641	}
5642      break;
5643
5644    case SUBREG:
5645      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5646      break;
5647    case REG:
5648      if (REGNO (x) == AR_UNAT_REGNUM)
5649	{
5650	  for (i = 0; i < 64; ++i)
5651	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5652	}
5653      else
5654	need_barrier = rws_access_reg (x, flags, pred);
5655      break;
5656
5657    case MEM:
5658      /* Find the regs used in memory address computation.  */
5659      new_flags.is_write = 0;
5660      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5661      break;
5662
5663    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
5664    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5665      break;
5666
5667      /* Operators with side-effects.  */
5668    case POST_INC:    case POST_DEC:
5669      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5670
5671      new_flags.is_write = 0;
5672      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5673      new_flags.is_write = 1;
5674      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5675      break;
5676
5677    case POST_MODIFY:
5678      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5679
5680      new_flags.is_write = 0;
5681      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5682      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5683      new_flags.is_write = 1;
5684      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5685      break;
5686
5687      /* Handle common unary and binary ops for efficiency.  */
5688    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5689    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5690    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5691    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5692    case NE:       case EQ:      case GE:      case GT:        case LE:
5693    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5694      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5695      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5696      break;
5697
5698    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5699    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5700    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5701    case SQRT:     case FFS:		case POPCOUNT:
5702      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5703      break;
5704
5705    case VEC_SELECT:
5706      /* VEC_SELECT's second argument is a PARALLEL with integers that
5707	 describe the elements selected.  On ia64, those integers are
5708	 always constants.  Avoid walking the PARALLEL so that we don't
5709	 get confused with "normal" parallels and then die.  */
5710      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5711      break;
5712
5713    case UNSPEC:
5714      switch (XINT (x, 1))
5715	{
5716	case UNSPEC_LTOFF_DTPMOD:
5717	case UNSPEC_LTOFF_DTPREL:
5718	case UNSPEC_DTPREL:
5719	case UNSPEC_LTOFF_TPREL:
5720	case UNSPEC_TPREL:
5721	case UNSPEC_PRED_REL_MUTEX:
5722	case UNSPEC_PIC_CALL:
5723        case UNSPEC_MF:
5724        case UNSPEC_FETCHADD_ACQ:
5725	case UNSPEC_BSP_VALUE:
5726	case UNSPEC_FLUSHRS:
5727	case UNSPEC_BUNDLE_SELECTOR:
5728          break;
5729
5730	case UNSPEC_GR_SPILL:
5731	case UNSPEC_GR_RESTORE:
5732	  {
5733	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5734	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5735
5736	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5737	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5738	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5739					      new_flags, pred);
5740	    break;
5741	  }
5742
5743	case UNSPEC_FR_SPILL:
5744	case UNSPEC_FR_RESTORE:
5745	case UNSPEC_GETF_EXP:
5746	case UNSPEC_SETF_EXP:
5747        case UNSPEC_ADDP4:
5748	case UNSPEC_FR_SQRT_RECIP_APPROX:
5749	case UNSPEC_LDA:
5750	case UNSPEC_LDS:
5751	case UNSPEC_LDSA:
5752	case UNSPEC_CHKACLR:
5753        case UNSPEC_CHKS:
5754	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5755	  break;
5756
5757	case UNSPEC_FR_RECIP_APPROX:
5758	case UNSPEC_SHRP:
5759	case UNSPEC_COPYSIGN:
5760	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5761	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5762	  break;
5763
5764        case UNSPEC_CMPXCHG_ACQ:
5765	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5766	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5767	  break;
5768
5769	default:
5770	  gcc_unreachable ();
5771	}
5772      break;
5773
5774    case UNSPEC_VOLATILE:
5775      switch (XINT (x, 1))
5776	{
5777	case UNSPECV_ALLOC:
5778	  /* Alloc must always be the first instruction of a group.
5779	     We force this by always returning true.  */
5780	  /* ??? We might get better scheduling if we explicitly check for
5781	     input/local/output register dependencies, and modify the
5782	     scheduler so that alloc is always reordered to the start of
5783	     the current group.  We could then eliminate all of the
5784	     first_instruction code.  */
5785	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5786
5787	  new_flags.is_write = 1;
5788	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5789	  return 1;
5790
5791	case UNSPECV_SET_BSP:
5792	  need_barrier = 1;
5793          break;
5794
5795	case UNSPECV_BLOCKAGE:
5796	case UNSPECV_INSN_GROUP_BARRIER:
5797	case UNSPECV_BREAK:
5798	case UNSPECV_PSAC_ALL:
5799	case UNSPECV_PSAC_NORMAL:
5800	  return 0;
5801
5802	default:
5803	  gcc_unreachable ();
5804	}
5805      break;
5806
5807    case RETURN:
5808      new_flags.is_write = 0;
5809      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5810      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5811
5812      new_flags.is_write = 1;
5813      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5814      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5815      break;
5816
5817    default:
5818      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5819      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5820	switch (format_ptr[i])
5821	  {
5822	  case '0':	/* unused field */
5823	  case 'i':	/* integer */
5824	  case 'n':	/* note */
5825	  case 'w':	/* wide integer */
5826	  case 's':	/* pointer to string */
5827	  case 'S':	/* optional pointer to string */
5828	    break;
5829
5830	  case 'e':
5831	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5832	      need_barrier = 1;
5833	    break;
5834
5835	  case 'E':
5836	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5837	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5838		need_barrier = 1;
5839	    break;
5840
5841	  default:
5842	    gcc_unreachable ();
5843	  }
5844      break;
5845    }
5846  return need_barrier;
5847}
5848
5849/* Clear out the state for group_barrier_needed at the start of a
5850   sequence of insns.  */
5851
5852static void
5853init_insn_group_barriers (void)
5854{
5855  memset (rws_sum, 0, sizeof (rws_sum));
5856  first_instruction = 1;
5857}
5858
5859/* Given the current state, determine whether a group barrier (a stop bit) is
5860   necessary before INSN.  Return nonzero if so.  This modifies the state to
5861   include the effects of INSN as a side-effect.  */
5862
5863static int
5864group_barrier_needed (rtx insn)
5865{
5866  rtx pat;
5867  int need_barrier = 0;
5868  struct reg_flags flags;
5869
5870  memset (&flags, 0, sizeof (flags));
5871  switch (GET_CODE (insn))
5872    {
5873    case NOTE:
5874      break;
5875
5876    case BARRIER:
5877      /* A barrier doesn't imply an instruction group boundary.  */
5878      break;
5879
5880    case CODE_LABEL:
5881      memset (rws_insn, 0, sizeof (rws_insn));
5882      return 1;
5883
5884    case CALL_INSN:
5885      flags.is_branch = 1;
5886      flags.is_sibcall = SIBLING_CALL_P (insn);
5887      memset (rws_insn, 0, sizeof (rws_insn));
5888
5889      /* Don't bundle a call following another call.  */
5890      if ((pat = prev_active_insn (insn))
5891	  && GET_CODE (pat) == CALL_INSN)
5892	{
5893	  need_barrier = 1;
5894	  break;
5895	}
5896
5897      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5898      break;
5899
5900    case JUMP_INSN:
5901      if (!ia64_spec_check_p (insn))
5902	flags.is_branch = 1;
5903
5904      /* Don't bundle a jump following a call.  */
5905      if ((pat = prev_active_insn (insn))
5906	  && GET_CODE (pat) == CALL_INSN)
5907	{
5908	  need_barrier = 1;
5909	  break;
5910	}
5911      /* FALLTHRU */
5912
5913    case INSN:
5914      if (GET_CODE (PATTERN (insn)) == USE
5915	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5916	/* Don't care about USE and CLOBBER "insns"---those are used to
5917	   indicate to the optimizer that it shouldn't get rid of
5918	   certain operations.  */
5919	break;
5920
5921      pat = PATTERN (insn);
5922
5923      /* Ug.  Hack hacks hacked elsewhere.  */
5924      switch (recog_memoized (insn))
5925	{
5926	  /* We play dependency tricks with the epilogue in order
5927	     to get proper schedules.  Undo this for dv analysis.  */
5928	case CODE_FOR_epilogue_deallocate_stack:
5929	case CODE_FOR_prologue_allocate_stack:
5930	  pat = XVECEXP (pat, 0, 0);
5931	  break;
5932
5933	  /* The pattern we use for br.cloop confuses the code above.
5934	     The second element of the vector is representative.  */
5935	case CODE_FOR_doloop_end_internal:
5936	  pat = XVECEXP (pat, 0, 1);
5937	  break;
5938
5939	  /* Doesn't generate code.  */
5940	case CODE_FOR_pred_rel_mutex:
5941	case CODE_FOR_prologue_use:
5942	  return 0;
5943
5944	default:
5945	  break;
5946	}
5947
5948      memset (rws_insn, 0, sizeof (rws_insn));
5949      need_barrier = rtx_needs_barrier (pat, flags, 0);
5950
5951      /* Check to see if the previous instruction was a volatile
5952	 asm.  */
5953      if (! need_barrier)
5954	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
5955      break;
5956
5957    default:
5958      gcc_unreachable ();
5959    }
5960
5961  if (first_instruction && INSN_P (insn)
5962      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
5963      && GET_CODE (PATTERN (insn)) != USE
5964      && GET_CODE (PATTERN (insn)) != CLOBBER)
5965    {
5966      need_barrier = 0;
5967      first_instruction = 0;
5968    }
5969
5970  return need_barrier;
5971}
5972
5973/* Like group_barrier_needed, but do not clobber the current state.  */
5974
5975static int
5976safe_group_barrier_needed (rtx insn)
5977{
5978  struct reg_write_state rws_saved[NUM_REGS];
5979  int saved_first_instruction;
5980  int t;
5981
5982  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
5983  saved_first_instruction = first_instruction;
5984
5985  t = group_barrier_needed (insn);
5986
5987  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
5988  first_instruction = saved_first_instruction;
5989
5990  return t;
5991}
5992
5993/* Scan the current function and insert stop bits as necessary to
5994   eliminate dependencies.  This function assumes that a final
5995   instruction scheduling pass has been run which has already
5996   inserted most of the necessary stop bits.  This function only
5997   inserts new ones at basic block boundaries, since these are
5998   invisible to the scheduler.  */
5999
6000static void
6001emit_insn_group_barriers (FILE *dump)
6002{
6003  rtx insn;
6004  rtx last_label = 0;
6005  int insns_since_last_label = 0;
6006
6007  init_insn_group_barriers ();
6008
6009  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6010    {
6011      if (GET_CODE (insn) == CODE_LABEL)
6012	{
6013	  if (insns_since_last_label)
6014	    last_label = insn;
6015	  insns_since_last_label = 0;
6016	}
6017      else if (GET_CODE (insn) == NOTE
6018	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6019	{
6020	  if (insns_since_last_label)
6021	    last_label = insn;
6022	  insns_since_last_label = 0;
6023	}
6024      else if (GET_CODE (insn) == INSN
6025	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6026	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6027	{
6028	  init_insn_group_barriers ();
6029	  last_label = 0;
6030	}
6031      else if (INSN_P (insn))
6032	{
6033	  insns_since_last_label = 1;
6034
6035	  if (group_barrier_needed (insn))
6036	    {
6037	      if (last_label)
6038		{
6039		  if (dump)
6040		    fprintf (dump, "Emitting stop before label %d\n",
6041			     INSN_UID (last_label));
6042		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6043		  insn = last_label;
6044
6045		  init_insn_group_barriers ();
6046		  last_label = 0;
6047		}
6048	    }
6049	}
6050    }
6051}
6052
6053/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6054   This function has to emit all necessary group barriers.  */
6055
6056static void
6057emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6058{
6059  rtx insn;
6060
6061  init_insn_group_barriers ();
6062
6063  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6064    {
6065      if (GET_CODE (insn) == BARRIER)
6066	{
6067	  rtx last = prev_active_insn (insn);
6068
6069	  if (! last)
6070	    continue;
6071	  if (GET_CODE (last) == JUMP_INSN
6072	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6073	    last = prev_active_insn (last);
6074	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6075	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6076
6077	  init_insn_group_barriers ();
6078	}
6079      else if (INSN_P (insn))
6080	{
6081	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6082	    init_insn_group_barriers ();
6083	  else if (group_barrier_needed (insn))
6084	    {
6085	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6086	      init_insn_group_barriers ();
6087	      group_barrier_needed (insn);
6088	    }
6089	}
6090    }
6091}
6092
6093
6094
6095/* Instruction scheduling support.  */
6096
6097#define NR_BUNDLES 10
6098
6099/* A list of names of all available bundles.  */
6100
6101static const char *bundle_name [NR_BUNDLES] =
6102{
6103  ".mii",
6104  ".mmi",
6105  ".mfi",
6106  ".mmf",
6107#if NR_BUNDLES == 10
6108  ".bbb",
6109  ".mbb",
6110#endif
6111  ".mib",
6112  ".mmb",
6113  ".mfb",
6114  ".mlx"
6115};
6116
6117/* Nonzero if we should insert stop bits into the schedule.  */
6118
6119int ia64_final_schedule = 0;
6120
6121/* Codes of the corresponding queried units: */
6122
6123static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6124static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6125
6126static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6127static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6128
6129static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6130
6131/* The following variable value is an insn group barrier.  */
6132
6133static rtx dfa_stop_insn;
6134
6135/* The following variable value is the last issued insn.  */
6136
6137static rtx last_scheduled_insn;
6138
6139/* The following variable value is size of the DFA state.  */
6140
6141static size_t dfa_state_size;
6142
6143/* The following variable value is pointer to a DFA state used as
6144   temporary variable.  */
6145
6146static state_t temp_dfa_state = NULL;
6147
6148/* The following variable value is DFA state after issuing the last
6149   insn.  */
6150
6151static state_t prev_cycle_state = NULL;
6152
6153/* The following array element values are TRUE if the corresponding
6154   insn requires to add stop bits before it.  */
6155
6156static char *stops_p = NULL;
6157
6158/* The following array element values are ZERO for non-speculative
6159   instructions and hold corresponding speculation check number for
6160   speculative instructions.  */
6161static int *spec_check_no = NULL;
6162
6163/* Size of spec_check_no array.  */
6164static int max_uid = 0;
6165
6166/* The following variable is used to set up the mentioned above array.  */
6167
6168static int stop_before_p = 0;
6169
6170/* The following variable value is length of the arrays `clocks' and
6171   `add_cycles'. */
6172
6173static int clocks_length;
6174
6175/* The following array element values are cycles on which the
6176   corresponding insn will be issued.  The array is used only for
6177   Itanium1.  */
6178
6179static int *clocks;
6180
6181/* The following array element values are numbers of cycles should be
6182   added to improve insn scheduling for MM_insns for Itanium1.  */
6183
6184static int *add_cycles;
6185
6186/* The following variable value is number of data speculations in progress.  */
6187static int pending_data_specs = 0;
6188
6189static rtx ia64_single_set (rtx);
6190static void ia64_emit_insn_before (rtx, rtx);
6191
6192/* Map a bundle number to its pseudo-op.  */
6193
6194const char *
6195get_bundle_name (int b)
6196{
6197  return bundle_name[b];
6198}
6199
6200
6201/* Return the maximum number of instructions a cpu can issue.  */
6202
6203static int
6204ia64_issue_rate (void)
6205{
6206  return 6;
6207}
6208
6209/* Helper function - like single_set, but look inside COND_EXEC.  */
6210
6211static rtx
6212ia64_single_set (rtx insn)
6213{
6214  rtx x = PATTERN (insn), ret;
6215  if (GET_CODE (x) == COND_EXEC)
6216    x = COND_EXEC_CODE (x);
6217  if (GET_CODE (x) == SET)
6218    return x;
6219
6220  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6221     Although they are not classical single set, the second set is there just
6222     to protect it from moving past FP-relative stack accesses.  */
6223  switch (recog_memoized (insn))
6224    {
6225    case CODE_FOR_prologue_allocate_stack:
6226    case CODE_FOR_epilogue_deallocate_stack:
6227      ret = XVECEXP (x, 0, 0);
6228      break;
6229
6230    default:
6231      ret = single_set_2 (insn, x);
6232      break;
6233    }
6234
6235  return ret;
6236}
6237
6238/* Adjust the cost of a scheduling dependency.
6239   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6240   COST is the current cost.  */
6241
6242static int
6243ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
6244{
6245  enum reg_note dep_type = (enum reg_note) dep_type1;
6246  enum attr_itanium_class dep_class;
6247  enum attr_itanium_class insn_class;
6248
6249  if (dep_type != REG_DEP_OUTPUT)
6250    return cost;
6251
6252  insn_class = ia64_safe_itanium_class (insn);
6253  dep_class = ia64_safe_itanium_class (dep_insn);
6254  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6255      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6256    return 0;
6257
6258  return cost;
6259}
6260
6261/* Like emit_insn_before, but skip cycle_display notes.
6262   ??? When cycle display notes are implemented, update this.  */
6263
6264static void
6265ia64_emit_insn_before (rtx insn, rtx before)
6266{
6267  emit_insn_before (insn, before);
6268}
6269
6270/* The following function marks insns who produce addresses for load
6271   and store insns.  Such insns will be placed into M slots because it
6272   decrease latency time for Itanium1 (see function
6273   `ia64_produce_address_p' and the DFA descriptions).  */
6274
6275static void
6276ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6277{
6278  rtx insn, link, next, next_tail;
6279
6280  /* Before reload, which_alternative is not set, which means that
6281     ia64_safe_itanium_class will produce wrong results for (at least)
6282     move instructions.  */
6283  if (!reload_completed)
6284    return;
6285
6286  next_tail = NEXT_INSN (tail);
6287  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6288    if (INSN_P (insn))
6289      insn->call = 0;
6290  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6291    if (INSN_P (insn)
6292	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6293      {
6294	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6295	  {
6296	    enum attr_itanium_class c;
6297
6298	    if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6299	      continue;
6300	    next = XEXP (link, 0);
6301	    c = ia64_safe_itanium_class (next);
6302	    if ((c == ITANIUM_CLASS_ST
6303		 || c == ITANIUM_CLASS_STF)
6304		&& ia64_st_address_bypass_p (insn, next))
6305	      break;
6306	    else if ((c == ITANIUM_CLASS_LD
6307		      || c == ITANIUM_CLASS_FLD
6308		      || c == ITANIUM_CLASS_FLDP)
6309		     && ia64_ld_address_bypass_p (insn, next))
6310	      break;
6311	  }
6312	insn->call = link != 0;
6313      }
6314}
6315
6316/* We're beginning a new block.  Initialize data structures as necessary.  */
6317
6318static void
6319ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6320		 int sched_verbose ATTRIBUTE_UNUSED,
6321		 int max_ready ATTRIBUTE_UNUSED)
6322{
6323#ifdef ENABLE_CHECKING
6324  rtx insn;
6325
6326  if (reload_completed)
6327    for (insn = NEXT_INSN (current_sched_info->prev_head);
6328	 insn != current_sched_info->next_tail;
6329	 insn = NEXT_INSN (insn))
6330      gcc_assert (!SCHED_GROUP_P (insn));
6331#endif
6332  last_scheduled_insn = NULL_RTX;
6333  init_insn_group_barriers ();
6334}
6335
6336/* We're beginning a scheduling pass.  Check assertion.  */
6337
6338static void
6339ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6340                        int sched_verbose ATTRIBUTE_UNUSED,
6341                        int max_ready ATTRIBUTE_UNUSED)
6342{
6343  gcc_assert (!pending_data_specs);
6344}
6345
6346/* Scheduling pass is now finished.  Free/reset static variable.  */
6347static void
6348ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6349			  int sched_verbose ATTRIBUTE_UNUSED)
6350{
6351  free (spec_check_no);
6352  spec_check_no = 0;
6353  max_uid = 0;
6354}
6355
6356/* We are about to being issuing insns for this clock cycle.
6357   Override the default sort algorithm to better slot instructions.  */
6358
6359static int
6360ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6361			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6362			int reorder_type)
6363{
6364  int n_asms;
6365  int n_ready = *pn_ready;
6366  rtx *e_ready = ready + n_ready;
6367  rtx *insnp;
6368
6369  if (sched_verbose)
6370    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6371
6372  if (reorder_type == 0)
6373    {
6374      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6375      n_asms = 0;
6376      for (insnp = ready; insnp < e_ready; insnp++)
6377	if (insnp < e_ready)
6378	  {
6379	    rtx insn = *insnp;
6380	    enum attr_type t = ia64_safe_type (insn);
6381	    if (t == TYPE_UNKNOWN)
6382	      {
6383		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6384		    || asm_noperands (PATTERN (insn)) >= 0)
6385		  {
6386		    rtx lowest = ready[n_asms];
6387		    ready[n_asms] = insn;
6388		    *insnp = lowest;
6389		    n_asms++;
6390		  }
6391		else
6392		  {
6393		    rtx highest = ready[n_ready - 1];
6394		    ready[n_ready - 1] = insn;
6395		    *insnp = highest;
6396		    return 1;
6397		  }
6398	      }
6399	  }
6400
6401      if (n_asms < n_ready)
6402	{
6403	  /* Some normal insns to process.  Skip the asms.  */
6404	  ready += n_asms;
6405	  n_ready -= n_asms;
6406	}
6407      else if (n_ready > 0)
6408	return 1;
6409    }
6410
6411  if (ia64_final_schedule)
6412    {
6413      int deleted = 0;
6414      int nr_need_stop = 0;
6415
6416      for (insnp = ready; insnp < e_ready; insnp++)
6417	if (safe_group_barrier_needed (*insnp))
6418	  nr_need_stop++;
6419
6420      if (reorder_type == 1 && n_ready == nr_need_stop)
6421	return 0;
6422      if (reorder_type == 0)
6423	return 1;
6424      insnp = e_ready;
6425      /* Move down everything that needs a stop bit, preserving
6426	 relative order.  */
6427      while (insnp-- > ready + deleted)
6428	while (insnp >= ready + deleted)
6429	  {
6430	    rtx insn = *insnp;
6431	    if (! safe_group_barrier_needed (insn))
6432	      break;
6433	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6434	    *ready = insn;
6435	    deleted++;
6436	  }
6437      n_ready -= deleted;
6438      ready += deleted;
6439    }
6440
6441  return 1;
6442}
6443
6444/* We are about to being issuing insns for this clock cycle.  Override
6445   the default sort algorithm to better slot instructions.  */
6446
6447static int
6448ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6449		    int clock_var)
6450{
6451  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6452				 pn_ready, clock_var, 0);
6453}
6454
6455/* Like ia64_sched_reorder, but called after issuing each insn.
6456   Override the default sort algorithm to better slot instructions.  */
6457
6458static int
6459ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6460		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6461		     int *pn_ready, int clock_var)
6462{
6463  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6464    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6465  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6466				 clock_var, 1);
6467}
6468
6469/* We are about to issue INSN.  Return the number of insns left on the
6470   ready queue that can be issued this cycle.  */
6471
6472static int
6473ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6474		     int sched_verbose ATTRIBUTE_UNUSED,
6475		     rtx insn ATTRIBUTE_UNUSED,
6476		     int can_issue_more ATTRIBUTE_UNUSED)
6477{
6478  if (current_sched_info->flags & DO_SPECULATION)
6479    /* Modulo scheduling does not extend h_i_d when emitting
6480       new instructions.  Deal with it.  */
6481    {
6482      if (DONE_SPEC (insn) & BEGIN_DATA)
6483	pending_data_specs++;
6484      if (CHECK_SPEC (insn) & BEGIN_DATA)
6485	pending_data_specs--;
6486    }
6487
6488  last_scheduled_insn = insn;
6489  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6490  if (reload_completed)
6491    {
6492      int needed = group_barrier_needed (insn);
6493
6494      gcc_assert (!needed);
6495      if (GET_CODE (insn) == CALL_INSN)
6496	init_insn_group_barriers ();
6497      stops_p [INSN_UID (insn)] = stop_before_p;
6498      stop_before_p = 0;
6499    }
6500  return 1;
6501}
6502
6503/* We are choosing insn from the ready queue.  Return nonzero if INSN
6504   can be chosen.  */
6505
6506static int
6507ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6508{
6509  gcc_assert (insn  && INSN_P (insn));
6510  return ((!reload_completed
6511	   || !safe_group_barrier_needed (insn))
6512	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6513}
6514
6515/* We are choosing insn from the ready queue.  Return nonzero if INSN
6516   can be chosen.  */
6517
6518static bool
6519ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
6520{
6521  gcc_assert (insn  && INSN_P (insn));
6522  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
6523     we keep ALAT half-empty.  */
6524  return (pending_data_specs < 16
6525	  || !(TODO_SPEC (insn) & BEGIN_DATA));
6526}
6527
6528/* The following variable value is pseudo-insn used by the DFA insn
6529   scheduler to change the DFA state when the simulated clock is
6530   increased.  */
6531
6532static rtx dfa_pre_cycle_insn;
6533
6534/* We are about to being issuing INSN.  Return nonzero if we cannot
6535   issue it on given cycle CLOCK and return zero if we should not sort
6536   the ready queue on the next clock start.  */
6537
6538static int
6539ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6540		    int clock, int *sort_p)
6541{
6542  int setup_clocks_p = FALSE;
6543
6544  gcc_assert (insn && INSN_P (insn));
6545  if ((reload_completed && safe_group_barrier_needed (insn))
6546      || (last_scheduled_insn
6547	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6548	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6549	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6550    {
6551      init_insn_group_barriers ();
6552      if (verbose && dump)
6553	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6554		 last_clock == clock ? " + cycle advance" : "");
6555      stop_before_p = 1;
6556      if (last_clock == clock)
6557	{
6558	  state_transition (curr_state, dfa_stop_insn);
6559	  if (TARGET_EARLY_STOP_BITS)
6560	    *sort_p = (last_scheduled_insn == NULL_RTX
6561		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6562	  else
6563	    *sort_p = 0;
6564	  return 1;
6565	}
6566      else if (reload_completed)
6567	setup_clocks_p = TRUE;
6568      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6569	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6570	state_reset (curr_state);
6571      else
6572	{
6573	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6574	  state_transition (curr_state, dfa_stop_insn);
6575	  state_transition (curr_state, dfa_pre_cycle_insn);
6576	  state_transition (curr_state, NULL);
6577	}
6578    }
6579  else if (reload_completed)
6580    setup_clocks_p = TRUE;
6581  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6582      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6583      && asm_noperands (PATTERN (insn)) < 0)
6584    {
6585      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6586
6587      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6588	{
6589	  rtx link;
6590	  int d = -1;
6591
6592	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6593	    if (REG_NOTE_KIND (link) == 0)
6594	      {
6595		enum attr_itanium_class dep_class;
6596		rtx dep_insn = XEXP (link, 0);
6597
6598		dep_class = ia64_safe_itanium_class (dep_insn);
6599		if ((dep_class == ITANIUM_CLASS_MMMUL
6600		     || dep_class == ITANIUM_CLASS_MMSHF)
6601		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6602		    && (d < 0
6603			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6604		  d = last_clock - clocks [INSN_UID (dep_insn)];
6605	      }
6606	  if (d >= 0)
6607	    add_cycles [INSN_UID (insn)] = 3 - d;
6608	}
6609    }
6610  return 0;
6611}
6612
6613/* Implement targetm.sched.h_i_d_extended hook.
6614   Extend internal data structures.  */
6615static void
6616ia64_h_i_d_extended (void)
6617{
6618  if (current_sched_info->flags & DO_SPECULATION)
6619    {
6620      int new_max_uid = get_max_uid () + 1;
6621
6622      spec_check_no = xrecalloc (spec_check_no, new_max_uid,
6623				 max_uid, sizeof (*spec_check_no));
6624      max_uid = new_max_uid;
6625    }
6626
6627  if (stops_p != NULL)
6628    {
6629      int new_clocks_length = get_max_uid () + 1;
6630
6631      stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6632
6633      if (ia64_tune == PROCESSOR_ITANIUM)
6634	{
6635	  clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
6636			      sizeof (int));
6637	  add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
6638				  sizeof (int));
6639	}
6640
6641      clocks_length = new_clocks_length;
6642    }
6643}
6644
6645/* Constants that help mapping 'enum machine_mode' to int.  */
6646enum SPEC_MODES
6647  {
6648    SPEC_MODE_INVALID = -1,
6649    SPEC_MODE_FIRST = 0,
6650    SPEC_MODE_FOR_EXTEND_FIRST = 1,
6651    SPEC_MODE_FOR_EXTEND_LAST = 3,
6652    SPEC_MODE_LAST = 8
6653  };
6654
6655/* Return index of the MODE.  */
6656static int
6657ia64_mode_to_int (enum machine_mode mode)
6658{
6659  switch (mode)
6660    {
6661    case BImode: return 0; /* SPEC_MODE_FIRST  */
6662    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
6663    case HImode: return 2;
6664    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
6665    case DImode: return 4;
6666    case SFmode: return 5;
6667    case DFmode: return 6;
6668    case XFmode: return 7;
6669    case TImode:
6670      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
6671	 mentioned in itanium[12].md.  Predicate fp_register_operand also
6672	 needs to be defined.  Bottom line: better disable for now.  */
6673      return SPEC_MODE_INVALID;
6674    default:     return SPEC_MODE_INVALID;
6675    }
6676}
6677
6678/* Provide information about speculation capabilities.  */
6679static void
6680ia64_set_sched_flags (spec_info_t spec_info)
6681{
6682  unsigned int *flags = &(current_sched_info->flags);
6683
6684  if (*flags & SCHED_RGN
6685      || *flags & SCHED_EBB)
6686    {
6687      int mask = 0;
6688
6689      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
6690	  || (mflag_sched_ar_data_spec && reload_completed))
6691	{
6692	  mask |= BEGIN_DATA;
6693
6694	  if ((mflag_sched_br_in_data_spec && !reload_completed)
6695	      || (mflag_sched_ar_in_data_spec && reload_completed))
6696	    mask |= BE_IN_DATA;
6697	}
6698
6699      if (mflag_sched_control_spec)
6700	{
6701	  mask |= BEGIN_CONTROL;
6702
6703	  if (mflag_sched_in_control_spec)
6704	    mask |= BE_IN_CONTROL;
6705	}
6706
6707      gcc_assert (*flags & USE_GLAT);
6708
6709      if (mask)
6710	{
6711	  *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
6712
6713	  spec_info->mask = mask;
6714	  spec_info->flags = 0;
6715
6716	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6717	    spec_info->flags |= PREFER_NON_DATA_SPEC;
6718
6719	  if ((mask & CONTROL_SPEC)
6720	      && mflag_sched_prefer_non_control_spec_insns)
6721	    spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6722
6723	  if (mflag_sched_spec_verbose)
6724	    {
6725	      if (sched_verbose >= 1)
6726		spec_info->dump = sched_dump;
6727	      else
6728		spec_info->dump = stderr;
6729	    }
6730	  else
6731	    spec_info->dump = 0;
6732
6733	  if (mflag_sched_count_spec_in_critical_path)
6734	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6735	}
6736    }
6737}
6738
6739/* Implement targetm.sched.speculate_insn hook.
6740   Check if the INSN can be TS speculative.
6741   If 'no' - return -1.
6742   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6743   If current pattern of the INSN already provides TS speculation, return 0.  */
6744static int
6745ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6746{
6747  rtx pat, reg, mem, mem_reg;
6748  int mode_no, gen_p = 1;
6749  bool extend_p;
6750
6751  gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6752
6753  pat = PATTERN (insn);
6754
6755  if (GET_CODE (pat) == COND_EXEC)
6756    pat = COND_EXEC_CODE (pat);
6757
6758  /* This should be a SET ...  */
6759  if (GET_CODE (pat) != SET)
6760    return -1;
6761
6762  reg = SET_DEST (pat);
6763  /* ... to the general/fp register ...  */
6764  if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
6765    return -1;
6766
6767  /* ... from the mem ...  */
6768  mem = SET_SRC (pat);
6769
6770  /* ... that can, possibly, be a zero_extend ...  */
6771  if (GET_CODE (mem) == ZERO_EXTEND)
6772    {
6773      mem = XEXP (mem, 0);
6774      extend_p = true;
6775    }
6776  else
6777    extend_p = false;
6778
6779  /* ... or a speculative load.  */
6780  if (GET_CODE (mem) == UNSPEC)
6781    {
6782      int code;
6783
6784      code = XINT (mem, 1);
6785      if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6786	return -1;
6787
6788      if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6789	  || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6790	  || code == UNSPEC_LDSA)
6791	gen_p = 0;
6792
6793      mem = XVECEXP (mem, 0, 0);
6794      gcc_assert (MEM_P (mem));
6795    }
6796
6797  /* Source should be a mem ...  */
6798  if (!MEM_P (mem))
6799    return -1;
6800
6801  /* ... addressed by a register.  */
6802  mem_reg = XEXP (mem, 0);
6803  if (!REG_P (mem_reg))
6804    return -1;
6805
6806  /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6807     will always be DImode.  */
6808  mode_no = ia64_mode_to_int (GET_MODE (mem));
6809
6810  if (mode_no == SPEC_MODE_INVALID
6811      || (extend_p
6812	  && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6813	       && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6814    return -1;
6815
6816  extract_insn_cached (insn);
6817  gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
6818
6819  *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6820
6821  return gen_p;
6822}
6823
6824enum
6825  {
6826    /* Offset to reach ZERO_EXTEND patterns.  */
6827    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6828    /* Number of patterns for each speculation mode.  */
6829    SPEC_N = (SPEC_MODE_LAST
6830              + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6831  };
6832
6833enum SPEC_GEN_LD_MAP
6834  {
6835    /* Offset to ld.a patterns.  */
6836    SPEC_GEN_A = 0 * SPEC_N,
6837    /* Offset to ld.s patterns.  */
6838    SPEC_GEN_S = 1 * SPEC_N,
6839    /* Offset to ld.sa patterns.  */
6840    SPEC_GEN_SA = 2 * SPEC_N,
6841    /* Offset to ld.sa patterns.  For this patterns corresponding ld.c will
6842       mutate to chk.s.  */
6843    SPEC_GEN_SA_FOR_S = 3 * SPEC_N
6844  };
6845
6846/* These offsets are used to get (4 * SPEC_N).  */
6847enum SPEC_GEN_CHECK_OFFSET
6848  {
6849    SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
6850    SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
6851  };
6852
6853/* If GEN_P is true, calculate the index of needed speculation check and return
6854   speculative pattern for INSN with speculative mode TS, machine mode
6855   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
6856   If GEN_P is false, just calculate the index of needed speculation check.  */
6857static rtx
6858ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
6859{
6860  rtx pat, new_pat;
6861  int load_no;
6862  int shift = 0;
6863
6864  static rtx (* const gen_load[]) (rtx, rtx) = {
6865    gen_movbi_advanced,
6866    gen_movqi_advanced,
6867    gen_movhi_advanced,
6868    gen_movsi_advanced,
6869    gen_movdi_advanced,
6870    gen_movsf_advanced,
6871    gen_movdf_advanced,
6872    gen_movxf_advanced,
6873    gen_movti_advanced,
6874    gen_zero_extendqidi2_advanced,
6875    gen_zero_extendhidi2_advanced,
6876    gen_zero_extendsidi2_advanced,
6877
6878    gen_movbi_speculative,
6879    gen_movqi_speculative,
6880    gen_movhi_speculative,
6881    gen_movsi_speculative,
6882    gen_movdi_speculative,
6883    gen_movsf_speculative,
6884    gen_movdf_speculative,
6885    gen_movxf_speculative,
6886    gen_movti_speculative,
6887    gen_zero_extendqidi2_speculative,
6888    gen_zero_extendhidi2_speculative,
6889    gen_zero_extendsidi2_speculative,
6890
6891    gen_movbi_speculative_advanced,
6892    gen_movqi_speculative_advanced,
6893    gen_movhi_speculative_advanced,
6894    gen_movsi_speculative_advanced,
6895    gen_movdi_speculative_advanced,
6896    gen_movsf_speculative_advanced,
6897    gen_movdf_speculative_advanced,
6898    gen_movxf_speculative_advanced,
6899    gen_movti_speculative_advanced,
6900    gen_zero_extendqidi2_speculative_advanced,
6901    gen_zero_extendhidi2_speculative_advanced,
6902    gen_zero_extendsidi2_speculative_advanced,
6903
6904    gen_movbi_speculative_advanced,
6905    gen_movqi_speculative_advanced,
6906    gen_movhi_speculative_advanced,
6907    gen_movsi_speculative_advanced,
6908    gen_movdi_speculative_advanced,
6909    gen_movsf_speculative_advanced,
6910    gen_movdf_speculative_advanced,
6911    gen_movxf_speculative_advanced,
6912    gen_movti_speculative_advanced,
6913    gen_zero_extendqidi2_speculative_advanced,
6914    gen_zero_extendhidi2_speculative_advanced,
6915    gen_zero_extendsidi2_speculative_advanced
6916  };
6917
6918  load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
6919
6920  if (ts & BEGIN_DATA)
6921    {
6922      /* We don't need recovery because even if this is ld.sa
6923	 ALAT entry will be allocated only if NAT bit is set to zero.
6924	 So it is enough to use ld.c here.  */
6925
6926      if (ts & BEGIN_CONTROL)
6927	{
6928	  load_no += SPEC_GEN_SA;
6929
6930	  if (!mflag_sched_ldc)
6931	    shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
6932	}
6933      else
6934	{
6935	  load_no += SPEC_GEN_A;
6936
6937	  if (!mflag_sched_ldc)
6938	    shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
6939	}
6940    }
6941  else if (ts & BEGIN_CONTROL)
6942    {
6943      /* ld.sa can be used instead of ld.s to avoid basic block splitting.  */
6944      if (!mflag_control_ldc)
6945	load_no += SPEC_GEN_S;
6946      else
6947	{
6948	  gcc_assert (mflag_sched_ldc);
6949	  load_no += SPEC_GEN_SA_FOR_S;
6950	}
6951    }
6952  else
6953    gcc_unreachable ();
6954
6955  /* Set the desired check index.  We add '1', because zero element in this
6956     array means, that instruction with such uid is non-speculative.  */
6957  spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
6958
6959  if (!gen_p)
6960    return 0;
6961
6962  new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
6963			       copy_rtx (recog_data.operand[1]));
6964
6965  pat = PATTERN (insn);
6966  if (GET_CODE (pat) == COND_EXEC)
6967    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
6968				 (COND_EXEC_TEST (pat)), new_pat);
6969
6970  return new_pat;
6971}
6972
6973/* Offset to branchy checks.  */
6974enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
6975
6976/* Return nonzero, if INSN needs branchy recovery check.  */
6977static bool
6978ia64_needs_block_p (rtx insn)
6979{
6980  int check_no;
6981
6982  check_no = spec_check_no[INSN_UID(insn)] - 1;
6983  gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
6984
6985  return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
6986	  || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
6987}
6988
6989/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
6990   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
6991   Otherwise, generate a simple check.  */
6992static rtx
6993ia64_gen_check (rtx insn, rtx label, bool mutate_p)
6994{
6995  rtx op1, pat, check_pat;
6996
6997  static rtx (* const gen_check[]) (rtx, rtx) = {
6998    gen_movbi_clr,
6999    gen_movqi_clr,
7000    gen_movhi_clr,
7001    gen_movsi_clr,
7002    gen_movdi_clr,
7003    gen_movsf_clr,
7004    gen_movdf_clr,
7005    gen_movxf_clr,
7006    gen_movti_clr,
7007    gen_zero_extendqidi2_clr,
7008    gen_zero_extendhidi2_clr,
7009    gen_zero_extendsidi2_clr,
7010
7011    gen_speculation_check_bi,
7012    gen_speculation_check_qi,
7013    gen_speculation_check_hi,
7014    gen_speculation_check_si,
7015    gen_speculation_check_di,
7016    gen_speculation_check_sf,
7017    gen_speculation_check_df,
7018    gen_speculation_check_xf,
7019    gen_speculation_check_ti,
7020    gen_speculation_check_di,
7021    gen_speculation_check_di,
7022    gen_speculation_check_di,
7023
7024    gen_movbi_clr,
7025    gen_movqi_clr,
7026    gen_movhi_clr,
7027    gen_movsi_clr,
7028    gen_movdi_clr,
7029    gen_movsf_clr,
7030    gen_movdf_clr,
7031    gen_movxf_clr,
7032    gen_movti_clr,
7033    gen_zero_extendqidi2_clr,
7034    gen_zero_extendhidi2_clr,
7035    gen_zero_extendsidi2_clr,
7036
7037    gen_movbi_clr,
7038    gen_movqi_clr,
7039    gen_movhi_clr,
7040    gen_movsi_clr,
7041    gen_movdi_clr,
7042    gen_movsf_clr,
7043    gen_movdf_clr,
7044    gen_movxf_clr,
7045    gen_movti_clr,
7046    gen_zero_extendqidi2_clr,
7047    gen_zero_extendhidi2_clr,
7048    gen_zero_extendsidi2_clr,
7049
7050    gen_advanced_load_check_clr_bi,
7051    gen_advanced_load_check_clr_qi,
7052    gen_advanced_load_check_clr_hi,
7053    gen_advanced_load_check_clr_si,
7054    gen_advanced_load_check_clr_di,
7055    gen_advanced_load_check_clr_sf,
7056    gen_advanced_load_check_clr_df,
7057    gen_advanced_load_check_clr_xf,
7058    gen_advanced_load_check_clr_ti,
7059    gen_advanced_load_check_clr_di,
7060    gen_advanced_load_check_clr_di,
7061    gen_advanced_load_check_clr_di,
7062
7063    /* Following checks are generated during mutation.  */
7064    gen_advanced_load_check_clr_bi,
7065    gen_advanced_load_check_clr_qi,
7066    gen_advanced_load_check_clr_hi,
7067    gen_advanced_load_check_clr_si,
7068    gen_advanced_load_check_clr_di,
7069    gen_advanced_load_check_clr_sf,
7070    gen_advanced_load_check_clr_df,
7071    gen_advanced_load_check_clr_xf,
7072    gen_advanced_load_check_clr_ti,
7073    gen_advanced_load_check_clr_di,
7074    gen_advanced_load_check_clr_di,
7075    gen_advanced_load_check_clr_di,
7076
7077    0,0,0,0,0,0,0,0,0,0,0,0,
7078
7079    gen_advanced_load_check_clr_bi,
7080    gen_advanced_load_check_clr_qi,
7081    gen_advanced_load_check_clr_hi,
7082    gen_advanced_load_check_clr_si,
7083    gen_advanced_load_check_clr_di,
7084    gen_advanced_load_check_clr_sf,
7085    gen_advanced_load_check_clr_df,
7086    gen_advanced_load_check_clr_xf,
7087    gen_advanced_load_check_clr_ti,
7088    gen_advanced_load_check_clr_di,
7089    gen_advanced_load_check_clr_di,
7090    gen_advanced_load_check_clr_di,
7091
7092    gen_speculation_check_bi,
7093    gen_speculation_check_qi,
7094    gen_speculation_check_hi,
7095    gen_speculation_check_si,
7096    gen_speculation_check_di,
7097    gen_speculation_check_sf,
7098    gen_speculation_check_df,
7099    gen_speculation_check_xf,
7100    gen_speculation_check_ti,
7101    gen_speculation_check_di,
7102    gen_speculation_check_di,
7103    gen_speculation_check_di
7104  };
7105
7106  extract_insn_cached (insn);
7107
7108  if (label)
7109    {
7110      gcc_assert (mutate_p || ia64_needs_block_p (insn));
7111      op1 = label;
7112    }
7113  else
7114    {
7115      gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7116      op1 = copy_rtx (recog_data.operand[1]);
7117    }
7118
7119  if (mutate_p)
7120    /* INSN is ld.c.
7121       Find the speculation check number by searching for original
7122       speculative load in the RESOLVED_DEPS list of INSN.
7123       As long as patterns are unique for each instruction, this can be
7124       accomplished by matching ORIG_PAT fields.  */
7125    {
7126      rtx link;
7127      int check_no = 0;
7128      rtx orig_pat = ORIG_PAT (insn);
7129
7130      for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
7131	{
7132	  rtx x = XEXP (link, 0);
7133
7134	  if (ORIG_PAT (x) == orig_pat)
7135	    check_no = spec_check_no[INSN_UID (x)];
7136	}
7137      gcc_assert (check_no);
7138
7139      spec_check_no[INSN_UID (insn)] = (check_no
7140					+ SPEC_GEN_CHECK_MUTATION_OFFSET);
7141    }
7142
7143  check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7144	       (copy_rtx (recog_data.operand[0]), op1));
7145
7146  pat = PATTERN (insn);
7147  if (GET_CODE (pat) == COND_EXEC)
7148    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7149				   check_pat);
7150
7151  return check_pat;
7152}
7153
7154/* Return nonzero, if X is branchy recovery check.  */
7155static int
7156ia64_spec_check_p (rtx x)
7157{
7158  x = PATTERN (x);
7159  if (GET_CODE (x) == COND_EXEC)
7160    x = COND_EXEC_CODE (x);
7161  if (GET_CODE (x) == SET)
7162    return ia64_spec_check_src_p (SET_SRC (x));
7163  return 0;
7164}
7165
7166/* Return nonzero, if SRC belongs to recovery check.  */
7167static int
7168ia64_spec_check_src_p (rtx src)
7169{
7170  if (GET_CODE (src) == IF_THEN_ELSE)
7171    {
7172      rtx t;
7173
7174      t = XEXP (src, 0);
7175      if (GET_CODE (t) == NE)
7176	{
7177	  t = XEXP (t, 0);
7178
7179	  if (GET_CODE (t) == UNSPEC)
7180	    {
7181	      int code;
7182
7183	      code = XINT (t, 1);
7184
7185	      if (code == UNSPEC_CHKACLR
7186		  || code == UNSPEC_CHKS
7187		  || code == UNSPEC_LDCCLR)
7188		{
7189		  gcc_assert (code != 0);
7190		  return code;
7191		}
7192	    }
7193	}
7194    }
7195  return 0;
7196}
7197
7198
7199/* The following page contains abstract data `bundle states' which are
7200   used for bundling insns (inserting nops and template generation).  */
7201
7202/* The following describes state of insn bundling.  */
7203
7204struct bundle_state
7205{
7206  /* Unique bundle state number to identify them in the debugging
7207     output  */
7208  int unique_num;
7209  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7210  /* number nops before and after the insn  */
7211  short before_nops_num, after_nops_num;
7212  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7213                   insn */
7214  int cost;     /* cost of the state in cycles */
7215  int accumulated_insns_num; /* number of all previous insns including
7216				nops.  L is considered as 2 insns */
7217  int branch_deviation; /* deviation of previous branches from 3rd slots  */
7218  struct bundle_state *next;  /* next state with the same insn_num  */
7219  struct bundle_state *originator; /* originator (previous insn state)  */
7220  /* All bundle states are in the following chain.  */
7221  struct bundle_state *allocated_states_chain;
7222  /* The DFA State after issuing the insn and the nops.  */
7223  state_t dfa_state;
7224};
7225
7226/* The following is map insn number to the corresponding bundle state.  */
7227
7228static struct bundle_state **index_to_bundle_states;
7229
7230/* The unique number of next bundle state.  */
7231
7232static int bundle_states_num;
7233
7234/* All allocated bundle states are in the following chain.  */
7235
7236static struct bundle_state *allocated_bundle_states_chain;
7237
7238/* All allocated but not used bundle states are in the following
7239   chain.  */
7240
7241static struct bundle_state *free_bundle_state_chain;
7242
7243
7244/* The following function returns a free bundle state.  */
7245
7246static struct bundle_state *
7247get_free_bundle_state (void)
7248{
7249  struct bundle_state *result;
7250
7251  if (free_bundle_state_chain != NULL)
7252    {
7253      result = free_bundle_state_chain;
7254      free_bundle_state_chain = result->next;
7255    }
7256  else
7257    {
7258      result = xmalloc (sizeof (struct bundle_state));
7259      result->dfa_state = xmalloc (dfa_state_size);
7260      result->allocated_states_chain = allocated_bundle_states_chain;
7261      allocated_bundle_states_chain = result;
7262    }
7263  result->unique_num = bundle_states_num++;
7264  return result;
7265
7266}
7267
7268/* The following function frees given bundle state.  */
7269
7270static void
7271free_bundle_state (struct bundle_state *state)
7272{
7273  state->next = free_bundle_state_chain;
7274  free_bundle_state_chain = state;
7275}
7276
7277/* Start work with abstract data `bundle states'.  */
7278
7279static void
7280initiate_bundle_states (void)
7281{
7282  bundle_states_num = 0;
7283  free_bundle_state_chain = NULL;
7284  allocated_bundle_states_chain = NULL;
7285}
7286
7287/* Finish work with abstract data `bundle states'.  */
7288
7289static void
7290finish_bundle_states (void)
7291{
7292  struct bundle_state *curr_state, *next_state;
7293
7294  for (curr_state = allocated_bundle_states_chain;
7295       curr_state != NULL;
7296       curr_state = next_state)
7297    {
7298      next_state = curr_state->allocated_states_chain;
7299      free (curr_state->dfa_state);
7300      free (curr_state);
7301    }
7302}
7303
7304/* Hash table of the bundle states.  The key is dfa_state and insn_num
7305   of the bundle states.  */
7306
7307static htab_t bundle_state_table;
7308
7309/* The function returns hash of BUNDLE_STATE.  */
7310
7311static unsigned
7312bundle_state_hash (const void *bundle_state)
7313{
7314  const struct bundle_state *state = (struct bundle_state *) bundle_state;
7315  unsigned result, i;
7316
7317  for (result = i = 0; i < dfa_state_size; i++)
7318    result += (((unsigned char *) state->dfa_state) [i]
7319	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7320  return result + state->insn_num;
7321}
7322
7323/* The function returns nonzero if the bundle state keys are equal.  */
7324
7325static int
7326bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7327{
7328  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
7329  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
7330
7331  return (state1->insn_num == state2->insn_num
7332	  && memcmp (state1->dfa_state, state2->dfa_state,
7333		     dfa_state_size) == 0);
7334}
7335
7336/* The function inserts the BUNDLE_STATE into the hash table.  The
7337   function returns nonzero if the bundle has been inserted into the
7338   table.  The table contains the best bundle state with given key.  */
7339
7340static int
7341insert_bundle_state (struct bundle_state *bundle_state)
7342{
7343  void **entry_ptr;
7344
7345  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7346  if (*entry_ptr == NULL)
7347    {
7348      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7349      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7350      *entry_ptr = (void *) bundle_state;
7351      return TRUE;
7352    }
7353  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7354	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7355	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7356		   > bundle_state->accumulated_insns_num
7357		   || (((struct bundle_state *)
7358			*entry_ptr)->accumulated_insns_num
7359		       == bundle_state->accumulated_insns_num
7360		       && ((struct bundle_state *)
7361			   *entry_ptr)->branch_deviation
7362		       > bundle_state->branch_deviation))))
7363
7364    {
7365      struct bundle_state temp;
7366
7367      temp = *(struct bundle_state *) *entry_ptr;
7368      *(struct bundle_state *) *entry_ptr = *bundle_state;
7369      ((struct bundle_state *) *entry_ptr)->next = temp.next;
7370      *bundle_state = temp;
7371    }
7372  return FALSE;
7373}
7374
7375/* Start work with the hash table.  */
7376
7377static void
7378initiate_bundle_state_table (void)
7379{
7380  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7381				    (htab_del) 0);
7382}
7383
7384/* Finish work with the hash table.  */
7385
7386static void
7387finish_bundle_state_table (void)
7388{
7389  htab_delete (bundle_state_table);
7390}
7391
7392
7393
7394/* The following variable is a insn `nop' used to check bundle states
7395   with different number of inserted nops.  */
7396
7397static rtx ia64_nop;
7398
7399/* The following function tries to issue NOPS_NUM nops for the current
7400   state without advancing processor cycle.  If it failed, the
7401   function returns FALSE and frees the current state.  */
7402
7403static int
7404try_issue_nops (struct bundle_state *curr_state, int nops_num)
7405{
7406  int i;
7407
7408  for (i = 0; i < nops_num; i++)
7409    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7410      {
7411	free_bundle_state (curr_state);
7412	return FALSE;
7413      }
7414  return TRUE;
7415}
7416
7417/* The following function tries to issue INSN for the current
7418   state without advancing processor cycle.  If it failed, the
7419   function returns FALSE and frees the current state.  */
7420
7421static int
7422try_issue_insn (struct bundle_state *curr_state, rtx insn)
7423{
7424  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7425    {
7426      free_bundle_state (curr_state);
7427      return FALSE;
7428    }
7429  return TRUE;
7430}
7431
7432/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7433   starting with ORIGINATOR without advancing processor cycle.  If
7434   TRY_BUNDLE_END_P is TRUE, the function also/only (if
7435   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7436   If it was successful, the function creates new bundle state and
7437   insert into the hash table and into `index_to_bundle_states'.  */
7438
7439static void
7440issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7441		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
7442{
7443  struct bundle_state *curr_state;
7444
7445  curr_state = get_free_bundle_state ();
7446  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7447  curr_state->insn = insn;
7448  curr_state->insn_num = originator->insn_num + 1;
7449  curr_state->cost = originator->cost;
7450  curr_state->originator = originator;
7451  curr_state->before_nops_num = before_nops_num;
7452  curr_state->after_nops_num = 0;
7453  curr_state->accumulated_insns_num
7454    = originator->accumulated_insns_num + before_nops_num;
7455  curr_state->branch_deviation = originator->branch_deviation;
7456  gcc_assert (insn);
7457  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
7458    {
7459      gcc_assert (GET_MODE (insn) != TImode);
7460      if (!try_issue_nops (curr_state, before_nops_num))
7461	return;
7462      if (!try_issue_insn (curr_state, insn))
7463	return;
7464      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7465      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7466	  && curr_state->accumulated_insns_num % 3 != 0)
7467	{
7468	  free_bundle_state (curr_state);
7469	  return;
7470	}
7471    }
7472  else if (GET_MODE (insn) != TImode)
7473    {
7474      if (!try_issue_nops (curr_state, before_nops_num))
7475	return;
7476      if (!try_issue_insn (curr_state, insn))
7477	return;
7478      curr_state->accumulated_insns_num++;
7479      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7480		  && asm_noperands (PATTERN (insn)) < 0);
7481
7482      if (ia64_safe_type (insn) == TYPE_L)
7483	curr_state->accumulated_insns_num++;
7484    }
7485  else
7486    {
7487      /* If this is an insn that must be first in a group, then don't allow
7488	 nops to be emitted before it.  Currently, alloc is the only such
7489	 supported instruction.  */
7490      /* ??? The bundling automatons should handle this for us, but they do
7491	 not yet have support for the first_insn attribute.  */
7492      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7493	{
7494	  free_bundle_state (curr_state);
7495	  return;
7496	}
7497
7498      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7499      state_transition (curr_state->dfa_state, NULL);
7500      curr_state->cost++;
7501      if (!try_issue_nops (curr_state, before_nops_num))
7502	return;
7503      if (!try_issue_insn (curr_state, insn))
7504	return;
7505      curr_state->accumulated_insns_num++;
7506      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7507	  || asm_noperands (PATTERN (insn)) >= 0)
7508	{
7509	  /* Finish bundle containing asm insn.  */
7510	  curr_state->after_nops_num
7511	    = 3 - curr_state->accumulated_insns_num % 3;
7512	  curr_state->accumulated_insns_num
7513	    += 3 - curr_state->accumulated_insns_num % 3;
7514	}
7515      else if (ia64_safe_type (insn) == TYPE_L)
7516	curr_state->accumulated_insns_num++;
7517    }
7518  if (ia64_safe_type (insn) == TYPE_B)
7519    curr_state->branch_deviation
7520      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7521  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7522    {
7523      if (!only_bundle_end_p && insert_bundle_state (curr_state))
7524	{
7525	  state_t dfa_state;
7526	  struct bundle_state *curr_state1;
7527	  struct bundle_state *allocated_states_chain;
7528
7529	  curr_state1 = get_free_bundle_state ();
7530	  dfa_state = curr_state1->dfa_state;
7531	  allocated_states_chain = curr_state1->allocated_states_chain;
7532	  *curr_state1 = *curr_state;
7533	  curr_state1->dfa_state = dfa_state;
7534	  curr_state1->allocated_states_chain = allocated_states_chain;
7535	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7536		  dfa_state_size);
7537	  curr_state = curr_state1;
7538	}
7539      if (!try_issue_nops (curr_state,
7540			   3 - curr_state->accumulated_insns_num % 3))
7541	return;
7542      curr_state->after_nops_num
7543	= 3 - curr_state->accumulated_insns_num % 3;
7544      curr_state->accumulated_insns_num
7545	+= 3 - curr_state->accumulated_insns_num % 3;
7546    }
7547  if (!insert_bundle_state (curr_state))
7548    free_bundle_state (curr_state);
7549  return;
7550}
7551
7552/* The following function returns position in the two window bundle
7553   for given STATE.  */
7554
7555static int
7556get_max_pos (state_t state)
7557{
7558  if (cpu_unit_reservation_p (state, pos_6))
7559    return 6;
7560  else if (cpu_unit_reservation_p (state, pos_5))
7561    return 5;
7562  else if (cpu_unit_reservation_p (state, pos_4))
7563    return 4;
7564  else if (cpu_unit_reservation_p (state, pos_3))
7565    return 3;
7566  else if (cpu_unit_reservation_p (state, pos_2))
7567    return 2;
7568  else if (cpu_unit_reservation_p (state, pos_1))
7569    return 1;
7570  else
7571    return 0;
7572}
7573
7574/* The function returns code of a possible template for given position
7575   and state.  The function should be called only with 2 values of
7576   position equal to 3 or 6.  We avoid generating F NOPs by putting
7577   templates containing F insns at the end of the template search
7578   because undocumented anomaly in McKinley derived cores which can
7579   cause stalls if an F-unit insn (including a NOP) is issued within a
7580   six-cycle window after reading certain application registers (such
7581   as ar.bsp).  Furthermore, power-considerations also argue against
7582   the use of F-unit instructions unless they're really needed.  */
7583
7584static int
7585get_template (state_t state, int pos)
7586{
7587  switch (pos)
7588    {
7589    case 3:
7590      if (cpu_unit_reservation_p (state, _0mmi_))
7591	return 1;
7592      else if (cpu_unit_reservation_p (state, _0mii_))
7593	return 0;
7594      else if (cpu_unit_reservation_p (state, _0mmb_))
7595	return 7;
7596      else if (cpu_unit_reservation_p (state, _0mib_))
7597	return 6;
7598      else if (cpu_unit_reservation_p (state, _0mbb_))
7599	return 5;
7600      else if (cpu_unit_reservation_p (state, _0bbb_))
7601	return 4;
7602      else if (cpu_unit_reservation_p (state, _0mmf_))
7603	return 3;
7604      else if (cpu_unit_reservation_p (state, _0mfi_))
7605	return 2;
7606      else if (cpu_unit_reservation_p (state, _0mfb_))
7607	return 8;
7608      else if (cpu_unit_reservation_p (state, _0mlx_))
7609	return 9;
7610      else
7611	gcc_unreachable ();
7612    case 6:
7613      if (cpu_unit_reservation_p (state, _1mmi_))
7614	return 1;
7615      else if (cpu_unit_reservation_p (state, _1mii_))
7616	return 0;
7617      else if (cpu_unit_reservation_p (state, _1mmb_))
7618	return 7;
7619      else if (cpu_unit_reservation_p (state, _1mib_))
7620	return 6;
7621      else if (cpu_unit_reservation_p (state, _1mbb_))
7622	return 5;
7623      else if (cpu_unit_reservation_p (state, _1bbb_))
7624	return 4;
7625      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7626	return 3;
7627      else if (cpu_unit_reservation_p (state, _1mfi_))
7628	return 2;
7629      else if (cpu_unit_reservation_p (state, _1mfb_))
7630	return 8;
7631      else if (cpu_unit_reservation_p (state, _1mlx_))
7632	return 9;
7633      else
7634	gcc_unreachable ();
7635    default:
7636      gcc_unreachable ();
7637    }
7638}
7639
7640/* The following function returns an insn important for insn bundling
7641   followed by INSN and before TAIL.  */
7642
7643static rtx
7644get_next_important_insn (rtx insn, rtx tail)
7645{
7646  for (; insn && insn != tail; insn = NEXT_INSN (insn))
7647    if (INSN_P (insn)
7648	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7649	&& GET_CODE (PATTERN (insn)) != USE
7650	&& GET_CODE (PATTERN (insn)) != CLOBBER)
7651      return insn;
7652  return NULL_RTX;
7653}
7654
7655/* Add a bundle selector TEMPLATE0 before INSN.  */
7656
7657static void
7658ia64_add_bundle_selector_before (int template0, rtx insn)
7659{
7660  rtx b = gen_bundle_selector (GEN_INT (template0));
7661
7662  ia64_emit_insn_before (b, insn);
7663#if NR_BUNDLES == 10
7664  if ((template0 == 4 || template0 == 5)
7665      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7666    {
7667      int i;
7668      rtx note = NULL_RTX;
7669
7670      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7671	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
7672	 to following nops, as br.call sets rp to the address of following
7673	 bundle and therefore an EH region end must be on a bundle
7674	 boundary.  */
7675      insn = PREV_INSN (insn);
7676      for (i = 0; i < 3; i++)
7677	{
7678	  do
7679	    insn = next_active_insn (insn);
7680	  while (GET_CODE (insn) == INSN
7681		 && get_attr_empty (insn) == EMPTY_YES);
7682	  if (GET_CODE (insn) == CALL_INSN)
7683	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7684	  else if (note)
7685	    {
7686	      int code;
7687
7688	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7689			  || code == CODE_FOR_nop_b);
7690	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7691		note = NULL_RTX;
7692	      else
7693		REG_NOTES (insn)
7694		  = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7695				       REG_NOTES (insn));
7696	    }
7697	}
7698    }
7699#endif
7700}
7701
7702/* The following function does insn bundling.  Bundling means
7703   inserting templates and nop insns to fit insn groups into permitted
7704   templates.  Instruction scheduling uses NDFA (non-deterministic
7705   finite automata) encoding informations about the templates and the
7706   inserted nops.  Nondeterminism of the automata permits follows
7707   all possible insn sequences very fast.
7708
7709   Unfortunately it is not possible to get information about inserting
7710   nop insns and used templates from the automata states.  The
7711   automata only says that we can issue an insn possibly inserting
7712   some nops before it and using some template.  Therefore insn
7713   bundling in this function is implemented by using DFA
7714   (deterministic finite automata).  We follow all possible insn
7715   sequences by inserting 0-2 nops (that is what the NDFA describe for
7716   insn scheduling) before/after each insn being bundled.  We know the
7717   start of simulated processor cycle from insn scheduling (insn
7718   starting a new cycle has TImode).
7719
7720   Simple implementation of insn bundling would create enormous
7721   number of possible insn sequences satisfying information about new
7722   cycle ticks taken from the insn scheduling.  To make the algorithm
7723   practical we use dynamic programming.  Each decision (about
7724   inserting nops and implicitly about previous decisions) is described
7725   by structure bundle_state (see above).  If we generate the same
7726   bundle state (key is automaton state after issuing the insns and
7727   nops for it), we reuse already generated one.  As consequence we
7728   reject some decisions which cannot improve the solution and
7729   reduce memory for the algorithm.
7730
7731   When we reach the end of EBB (extended basic block), we choose the
7732   best sequence and then, moving back in EBB, insert templates for
7733   the best alternative.  The templates are taken from querying
7734   automaton state for each insn in chosen bundle states.
7735
7736   So the algorithm makes two (forward and backward) passes through
7737   EBB.  There is an additional forward pass through EBB for Itanium1
7738   processor.  This pass inserts more nops to make dependency between
7739   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
7740
7741static void
7742bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7743{
7744  struct bundle_state *curr_state, *next_state, *best_state;
7745  rtx insn, next_insn;
7746  int insn_num;
7747  int i, bundle_end_p, only_bundle_end_p, asm_p;
7748  int pos = 0, max_pos, template0, template1;
7749  rtx b;
7750  rtx nop;
7751  enum attr_type type;
7752
7753  insn_num = 0;
7754  /* Count insns in the EBB.  */
7755  for (insn = NEXT_INSN (prev_head_insn);
7756       insn && insn != tail;
7757       insn = NEXT_INSN (insn))
7758    if (INSN_P (insn))
7759      insn_num++;
7760  if (insn_num == 0)
7761    return;
7762  bundling_p = 1;
7763  dfa_clean_insn_cache ();
7764  initiate_bundle_state_table ();
7765  index_to_bundle_states = xmalloc ((insn_num + 2)
7766				    * sizeof (struct bundle_state *));
7767  /* First (forward) pass -- generation of bundle states.  */
7768  curr_state = get_free_bundle_state ();
7769  curr_state->insn = NULL;
7770  curr_state->before_nops_num = 0;
7771  curr_state->after_nops_num = 0;
7772  curr_state->insn_num = 0;
7773  curr_state->cost = 0;
7774  curr_state->accumulated_insns_num = 0;
7775  curr_state->branch_deviation = 0;
7776  curr_state->next = NULL;
7777  curr_state->originator = NULL;
7778  state_reset (curr_state->dfa_state);
7779  index_to_bundle_states [0] = curr_state;
7780  insn_num = 0;
7781  /* Shift cycle mark if it is put on insn which could be ignored.  */
7782  for (insn = NEXT_INSN (prev_head_insn);
7783       insn != tail;
7784       insn = NEXT_INSN (insn))
7785    if (INSN_P (insn)
7786	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7787	    || GET_CODE (PATTERN (insn)) == USE
7788	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7789	&& GET_MODE (insn) == TImode)
7790      {
7791	PUT_MODE (insn, VOIDmode);
7792	for (next_insn = NEXT_INSN (insn);
7793	     next_insn != tail;
7794	     next_insn = NEXT_INSN (next_insn))
7795	  if (INSN_P (next_insn)
7796	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7797	      && GET_CODE (PATTERN (next_insn)) != USE
7798	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7799	    {
7800	      PUT_MODE (next_insn, TImode);
7801	      break;
7802	    }
7803      }
7804  /* Forward pass: generation of bundle states.  */
7805  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7806       insn != NULL_RTX;
7807       insn = next_insn)
7808    {
7809      gcc_assert (INSN_P (insn)
7810		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7811		  && GET_CODE (PATTERN (insn)) != USE
7812		  && GET_CODE (PATTERN (insn)) != CLOBBER);
7813      type = ia64_safe_type (insn);
7814      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7815      insn_num++;
7816      index_to_bundle_states [insn_num] = NULL;
7817      for (curr_state = index_to_bundle_states [insn_num - 1];
7818	   curr_state != NULL;
7819	   curr_state = next_state)
7820	{
7821	  pos = curr_state->accumulated_insns_num % 3;
7822	  next_state = curr_state->next;
7823	  /* We must fill up the current bundle in order to start a
7824	     subsequent asm insn in a new bundle.  Asm insn is always
7825	     placed in a separate bundle.  */
7826	  only_bundle_end_p
7827	    = (next_insn != NULL_RTX
7828	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7829	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7830	  /* We may fill up the current bundle if it is the cycle end
7831	     without a group barrier.  */
7832	  bundle_end_p
7833	    = (only_bundle_end_p || next_insn == NULL_RTX
7834	       || (GET_MODE (next_insn) == TImode
7835		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7836	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7837	      || type == TYPE_S
7838	      /* We need to insert 2 nops for cases like M_MII.  To
7839		 guarantee issuing all insns on the same cycle for
7840		 Itanium 1, we need to issue 2 nops after the first M
7841		 insn (MnnMII where n is a nop insn).  */
7842	      || ((type == TYPE_M || type == TYPE_A)
7843		  && ia64_tune == PROCESSOR_ITANIUM
7844		  && !bundle_end_p && pos == 1))
7845	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7846				 only_bundle_end_p);
7847	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7848			       only_bundle_end_p);
7849	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7850			       only_bundle_end_p);
7851	}
7852      gcc_assert (index_to_bundle_states [insn_num]);
7853      for (curr_state = index_to_bundle_states [insn_num];
7854	   curr_state != NULL;
7855	   curr_state = curr_state->next)
7856	if (verbose >= 2 && dump)
7857	  {
7858	    /* This structure is taken from generated code of the
7859	       pipeline hazard recognizer (see file insn-attrtab.c).
7860	       Please don't forget to change the structure if a new
7861	       automaton is added to .md file.  */
7862	    struct DFA_chip
7863	    {
7864	      unsigned short one_automaton_state;
7865	      unsigned short oneb_automaton_state;
7866	      unsigned short two_automaton_state;
7867	      unsigned short twob_automaton_state;
7868	    };
7869
7870	    fprintf
7871	      (dump,
7872	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7873	       curr_state->unique_num,
7874	       (curr_state->originator == NULL
7875		? -1 : curr_state->originator->unique_num),
7876	       curr_state->cost,
7877	       curr_state->before_nops_num, curr_state->after_nops_num,
7878	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
7879	       (ia64_tune == PROCESSOR_ITANIUM
7880		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7881		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7882	       INSN_UID (insn));
7883	  }
7884    }
7885
7886  /* We should find a solution because the 2nd insn scheduling has
7887     found one.  */
7888  gcc_assert (index_to_bundle_states [insn_num]);
7889  /* Find a state corresponding to the best insn sequence.  */
7890  best_state = NULL;
7891  for (curr_state = index_to_bundle_states [insn_num];
7892       curr_state != NULL;
7893       curr_state = curr_state->next)
7894    /* We are just looking at the states with fully filled up last
7895       bundle.  The first we prefer insn sequences with minimal cost
7896       then with minimal inserted nops and finally with branch insns
7897       placed in the 3rd slots.  */
7898    if (curr_state->accumulated_insns_num % 3 == 0
7899	&& (best_state == NULL || best_state->cost > curr_state->cost
7900	    || (best_state->cost == curr_state->cost
7901		&& (curr_state->accumulated_insns_num
7902		    < best_state->accumulated_insns_num
7903		    || (curr_state->accumulated_insns_num
7904			== best_state->accumulated_insns_num
7905			&& curr_state->branch_deviation
7906			< best_state->branch_deviation)))))
7907      best_state = curr_state;
7908  /* Second (backward) pass: adding nops and templates.  */
7909  insn_num = best_state->before_nops_num;
7910  template0 = template1 = -1;
7911  for (curr_state = best_state;
7912       curr_state->originator != NULL;
7913       curr_state = curr_state->originator)
7914    {
7915      insn = curr_state->insn;
7916      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7917	       || asm_noperands (PATTERN (insn)) >= 0);
7918      insn_num++;
7919      if (verbose >= 2 && dump)
7920	{
7921	  struct DFA_chip
7922	  {
7923	    unsigned short one_automaton_state;
7924	    unsigned short oneb_automaton_state;
7925	    unsigned short two_automaton_state;
7926	    unsigned short twob_automaton_state;
7927	  };
7928
7929	  fprintf
7930	    (dump,
7931	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7932	     curr_state->unique_num,
7933	     (curr_state->originator == NULL
7934	      ? -1 : curr_state->originator->unique_num),
7935	     curr_state->cost,
7936	     curr_state->before_nops_num, curr_state->after_nops_num,
7937	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
7938	     (ia64_tune == PROCESSOR_ITANIUM
7939	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7940	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7941	     INSN_UID (insn));
7942	}
7943      /* Find the position in the current bundle window.  The window can
7944	 contain at most two bundles.  Two bundle window means that
7945	 the processor will make two bundle rotation.  */
7946      max_pos = get_max_pos (curr_state->dfa_state);
7947      if (max_pos == 6
7948	  /* The following (negative template number) means that the
7949	     processor did one bundle rotation.  */
7950	  || (max_pos == 3 && template0 < 0))
7951	{
7952	  /* We are at the end of the window -- find template(s) for
7953	     its bundle(s).  */
7954	  pos = max_pos;
7955	  if (max_pos == 3)
7956	    template0 = get_template (curr_state->dfa_state, 3);
7957	  else
7958	    {
7959	      template1 = get_template (curr_state->dfa_state, 3);
7960	      template0 = get_template (curr_state->dfa_state, 6);
7961	    }
7962	}
7963      if (max_pos > 3 && template1 < 0)
7964	/* It may happen when we have the stop inside a bundle.  */
7965	{
7966	  gcc_assert (pos <= 3);
7967	  template1 = get_template (curr_state->dfa_state, 3);
7968	  pos += 3;
7969	}
7970      if (!asm_p)
7971	/* Emit nops after the current insn.  */
7972	for (i = 0; i < curr_state->after_nops_num; i++)
7973	  {
7974	    nop = gen_nop ();
7975	    emit_insn_after (nop, insn);
7976	    pos--;
7977	    gcc_assert (pos >= 0);
7978	    if (pos % 3 == 0)
7979	      {
7980		/* We are at the start of a bundle: emit the template
7981		   (it should be defined).  */
7982		gcc_assert (template0 >= 0);
7983		ia64_add_bundle_selector_before (template0, nop);
7984		/* If we have two bundle window, we make one bundle
7985		   rotation.  Otherwise template0 will be undefined
7986		   (negative value).  */
7987		template0 = template1;
7988		template1 = -1;
7989	      }
7990	  }
7991      /* Move the position backward in the window.  Group barrier has
7992	 no slot.  Asm insn takes all bundle.  */
7993      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
7994	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
7995	  && asm_noperands (PATTERN (insn)) < 0)
7996	pos--;
7997      /* Long insn takes 2 slots.  */
7998      if (ia64_safe_type (insn) == TYPE_L)
7999	pos--;
8000      gcc_assert (pos >= 0);
8001      if (pos % 3 == 0
8002	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8003	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8004	  && asm_noperands (PATTERN (insn)) < 0)
8005	{
8006	  /* The current insn is at the bundle start: emit the
8007	     template.  */
8008	  gcc_assert (template0 >= 0);
8009	  ia64_add_bundle_selector_before (template0, insn);
8010	  b = PREV_INSN (insn);
8011	  insn = b;
8012	  /* See comment above in analogous place for emitting nops
8013	     after the insn.  */
8014	  template0 = template1;
8015	  template1 = -1;
8016	}
8017      /* Emit nops after the current insn.  */
8018      for (i = 0; i < curr_state->before_nops_num; i++)
8019	{
8020	  nop = gen_nop ();
8021	  ia64_emit_insn_before (nop, insn);
8022	  nop = PREV_INSN (insn);
8023	  insn = nop;
8024	  pos--;
8025	  gcc_assert (pos >= 0);
8026	  if (pos % 3 == 0)
8027	    {
8028	      /* See comment above in analogous place for emitting nops
8029		 after the insn.  */
8030	      gcc_assert (template0 >= 0);
8031	      ia64_add_bundle_selector_before (template0, insn);
8032	      b = PREV_INSN (insn);
8033	      insn = b;
8034	      template0 = template1;
8035	      template1 = -1;
8036	    }
8037	}
8038    }
8039  if (ia64_tune == PROCESSOR_ITANIUM)
8040    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8041       Itanium1 has a strange design, if the distance between an insn
8042       and dependent MM-insn is less 4 then we have a 6 additional
8043       cycles stall.  So we make the distance equal to 4 cycles if it
8044       is less.  */
8045    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8046	 insn != NULL_RTX;
8047	 insn = next_insn)
8048      {
8049	gcc_assert (INSN_P (insn)
8050		    && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8051		    && GET_CODE (PATTERN (insn)) != USE
8052		    && GET_CODE (PATTERN (insn)) != CLOBBER);
8053	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8054	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8055	  /* We found a MM-insn which needs additional cycles.  */
8056	  {
8057	    rtx last;
8058	    int i, j, n;
8059	    int pred_stop_p;
8060
8061	    /* Now we are searching for a template of the bundle in
8062	       which the MM-insn is placed and the position of the
8063	       insn in the bundle (0, 1, 2).  Also we are searching
8064	       for that there is a stop before the insn.  */
8065	    last = prev_active_insn (insn);
8066	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8067	    if (pred_stop_p)
8068	      last = prev_active_insn (last);
8069	    n = 0;
8070	    for (;; last = prev_active_insn (last))
8071	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
8072		{
8073		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8074		  if (template0 == 9)
8075		    /* The insn is in MLX bundle.  Change the template
8076		       onto MFI because we will add nops before the
8077		       insn.  It simplifies subsequent code a lot.  */
8078		    PATTERN (last)
8079		      = gen_bundle_selector (const2_rtx); /* -> MFI */
8080		  break;
8081		}
8082	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8083		       && (ia64_safe_itanium_class (last)
8084			   != ITANIUM_CLASS_IGNORE))
8085		n++;
8086	    /* Some check of correctness: the stop is not at the
8087	       bundle start, there are no more 3 insns in the bundle,
8088	       and the MM-insn is not at the start of bundle with
8089	       template MLX.  */
8090	    gcc_assert ((!pred_stop_p || n)
8091			&& n <= 2
8092			&& (template0 != 9 || !n));
8093	    /* Put nops after the insn in the bundle.  */
8094	    for (j = 3 - n; j > 0; j --)
8095	      ia64_emit_insn_before (gen_nop (), insn);
8096	    /* It takes into account that we will add more N nops
8097	       before the insn lately -- please see code below.  */
8098	    add_cycles [INSN_UID (insn)]--;
8099	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8100	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8101				     insn);
8102	    if (pred_stop_p)
8103	      add_cycles [INSN_UID (insn)]--;
8104	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8105	      {
8106		/* Insert "MII;" template.  */
8107		ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8108				       insn);
8109		ia64_emit_insn_before (gen_nop (), insn);
8110		ia64_emit_insn_before (gen_nop (), insn);
8111		if (i > 1)
8112		  {
8113		    /* To decrease code size, we use "MI;I;"
8114		       template.  */
8115		    ia64_emit_insn_before
8116		      (gen_insn_group_barrier (GEN_INT (3)), insn);
8117		    i--;
8118		  }
8119		ia64_emit_insn_before (gen_nop (), insn);
8120		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8121				       insn);
8122	      }
8123	    /* Put the MM-insn in the same slot of a bundle with the
8124	       same template as the original one.  */
8125	    ia64_add_bundle_selector_before (template0, insn);
8126	    /* To put the insn in the same slot, add necessary number
8127	       of nops.  */
8128	    for (j = n; j > 0; j --)
8129	      ia64_emit_insn_before (gen_nop (), insn);
8130	    /* Put the stop if the original bundle had it.  */
8131	    if (pred_stop_p)
8132	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8133				     insn);
8134	  }
8135      }
8136  free (index_to_bundle_states);
8137  finish_bundle_state_table ();
8138  bundling_p = 0;
8139  dfa_clean_insn_cache ();
8140}
8141
8142/* The following function is called at the end of scheduling BB or
8143   EBB.  After reload, it inserts stop bits and does insn bundling.  */
8144
8145static void
8146ia64_sched_finish (FILE *dump, int sched_verbose)
8147{
8148  if (sched_verbose)
8149    fprintf (dump, "// Finishing schedule.\n");
8150  if (!reload_completed)
8151    return;
8152  if (reload_completed)
8153    {
8154      final_emit_insn_group_barriers (dump);
8155      bundling (dump, sched_verbose, current_sched_info->prev_head,
8156		current_sched_info->next_tail);
8157      if (sched_verbose && dump)
8158	fprintf (dump, "//    finishing %d-%d\n",
8159		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8160		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8161
8162      return;
8163    }
8164}
8165
8166/* The following function inserts stop bits in scheduled BB or EBB.  */
8167
8168static void
8169final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8170{
8171  rtx insn;
8172  int need_barrier_p = 0;
8173  rtx prev_insn = NULL_RTX;
8174
8175  init_insn_group_barriers ();
8176
8177  for (insn = NEXT_INSN (current_sched_info->prev_head);
8178       insn != current_sched_info->next_tail;
8179       insn = NEXT_INSN (insn))
8180    {
8181      if (GET_CODE (insn) == BARRIER)
8182	{
8183	  rtx last = prev_active_insn (insn);
8184
8185	  if (! last)
8186	    continue;
8187	  if (GET_CODE (last) == JUMP_INSN
8188	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8189	    last = prev_active_insn (last);
8190	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8191	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8192
8193	  init_insn_group_barriers ();
8194	  need_barrier_p = 0;
8195	  prev_insn = NULL_RTX;
8196	}
8197      else if (INSN_P (insn))
8198	{
8199	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8200	    {
8201	      init_insn_group_barriers ();
8202	      need_barrier_p = 0;
8203	      prev_insn = NULL_RTX;
8204	    }
8205	  else if (need_barrier_p || group_barrier_needed (insn))
8206	    {
8207	      if (TARGET_EARLY_STOP_BITS)
8208		{
8209		  rtx last;
8210
8211		  for (last = insn;
8212		       last != current_sched_info->prev_head;
8213		       last = PREV_INSN (last))
8214		    if (INSN_P (last) && GET_MODE (last) == TImode
8215			&& stops_p [INSN_UID (last)])
8216		      break;
8217		  if (last == current_sched_info->prev_head)
8218		    last = insn;
8219		  last = prev_active_insn (last);
8220		  if (last
8221		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8222		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8223				     last);
8224		  init_insn_group_barriers ();
8225		  for (last = NEXT_INSN (last);
8226		       last != insn;
8227		       last = NEXT_INSN (last))
8228		    if (INSN_P (last))
8229		      group_barrier_needed (last);
8230		}
8231	      else
8232		{
8233		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8234				    insn);
8235		  init_insn_group_barriers ();
8236		}
8237	      group_barrier_needed (insn);
8238	      prev_insn = NULL_RTX;
8239	    }
8240	  else if (recog_memoized (insn) >= 0)
8241	    prev_insn = insn;
8242	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
8243			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
8244			    || asm_noperands (PATTERN (insn)) >= 0);
8245	}
8246    }
8247}
8248
8249
8250
8251/* If the following function returns TRUE, we will use the DFA
8252   insn scheduler.  */
8253
8254static int
8255ia64_first_cycle_multipass_dfa_lookahead (void)
8256{
8257  return (reload_completed ? 6 : 4);
8258}
8259
8260/* The following function initiates variable `dfa_pre_cycle_insn'.  */
8261
8262static void
8263ia64_init_dfa_pre_cycle_insn (void)
8264{
8265  if (temp_dfa_state == NULL)
8266    {
8267      dfa_state_size = state_size ();
8268      temp_dfa_state = xmalloc (dfa_state_size);
8269      prev_cycle_state = xmalloc (dfa_state_size);
8270    }
8271  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8272  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8273  recog_memoized (dfa_pre_cycle_insn);
8274  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8275  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8276  recog_memoized (dfa_stop_insn);
8277}
8278
8279/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8280   used by the DFA insn scheduler.  */
8281
8282static rtx
8283ia64_dfa_pre_cycle_insn (void)
8284{
8285  return dfa_pre_cycle_insn;
8286}
8287
8288/* The following function returns TRUE if PRODUCER (of type ilog or
8289   ld) produces address for CONSUMER (of type st or stf). */
8290
8291int
8292ia64_st_address_bypass_p (rtx producer, rtx consumer)
8293{
8294  rtx dest, reg, mem;
8295
8296  gcc_assert (producer && consumer);
8297  dest = ia64_single_set (producer);
8298  gcc_assert (dest);
8299  reg = SET_DEST (dest);
8300  gcc_assert (reg);
8301  if (GET_CODE (reg) == SUBREG)
8302    reg = SUBREG_REG (reg);
8303  gcc_assert (GET_CODE (reg) == REG);
8304
8305  dest = ia64_single_set (consumer);
8306  gcc_assert (dest);
8307  mem = SET_DEST (dest);
8308  gcc_assert (mem && GET_CODE (mem) == MEM);
8309  return reg_mentioned_p (reg, mem);
8310}
8311
8312/* The following function returns TRUE if PRODUCER (of type ilog or
8313   ld) produces address for CONSUMER (of type ld or fld). */
8314
8315int
8316ia64_ld_address_bypass_p (rtx producer, rtx consumer)
8317{
8318  rtx dest, src, reg, mem;
8319
8320  gcc_assert (producer && consumer);
8321  dest = ia64_single_set (producer);
8322  gcc_assert (dest);
8323  reg = SET_DEST (dest);
8324  gcc_assert (reg);
8325  if (GET_CODE (reg) == SUBREG)
8326    reg = SUBREG_REG (reg);
8327  gcc_assert (GET_CODE (reg) == REG);
8328
8329  src = ia64_single_set (consumer);
8330  gcc_assert (src);
8331  mem = SET_SRC (src);
8332  gcc_assert (mem);
8333
8334  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8335    mem = XVECEXP (mem, 0, 0);
8336  else if (GET_CODE (mem) == IF_THEN_ELSE)
8337    /* ??? Is this bypass necessary for ld.c?  */
8338    {
8339      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8340      mem = XEXP (mem, 1);
8341    }
8342
8343  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8344    mem = XEXP (mem, 0);
8345
8346  if (GET_CODE (mem) == UNSPEC)
8347    {
8348      int c = XINT (mem, 1);
8349
8350      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8351      mem = XVECEXP (mem, 0, 0);
8352    }
8353
8354  /* Note that LO_SUM is used for GOT loads.  */
8355  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
8356
8357  return reg_mentioned_p (reg, mem);
8358}
8359
8360/* The following function returns TRUE if INSN produces address for a
8361   load/store insn.  We will place such insns into M slot because it
8362   decreases its latency time.  */
8363
8364int
8365ia64_produce_address_p (rtx insn)
8366{
8367  return insn->call;
8368}
8369
8370
8371/* Emit pseudo-ops for the assembler to describe predicate relations.
8372   At present this assumes that we only consider predicate pairs to
8373   be mutex, and that the assembler can deduce proper values from
8374   straight-line code.  */
8375
8376static void
8377emit_predicate_relation_info (void)
8378{
8379  basic_block bb;
8380
8381  FOR_EACH_BB_REVERSE (bb)
8382    {
8383      int r;
8384      rtx head = BB_HEAD (bb);
8385
8386      /* We only need such notes at code labels.  */
8387      if (GET_CODE (head) != CODE_LABEL)
8388	continue;
8389      if (GET_CODE (NEXT_INSN (head)) == NOTE
8390	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
8391	head = NEXT_INSN (head);
8392
8393      /* Skip p0, which may be thought to be live due to (reg:DI p0)
8394	 grabbing the entire block of predicate registers.  */
8395      for (r = PR_REG (2); r < PR_REG (64); r += 2)
8396	if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
8397	  {
8398	    rtx p = gen_rtx_REG (BImode, r);
8399	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
8400	    if (head == BB_END (bb))
8401	      BB_END (bb) = n;
8402	    head = n;
8403	  }
8404    }
8405
8406  /* Look for conditional calls that do not return, and protect predicate
8407     relations around them.  Otherwise the assembler will assume the call
8408     returns, and complain about uses of call-clobbered predicates after
8409     the call.  */
8410  FOR_EACH_BB_REVERSE (bb)
8411    {
8412      rtx insn = BB_HEAD (bb);
8413
8414      while (1)
8415	{
8416	  if (GET_CODE (insn) == CALL_INSN
8417	      && GET_CODE (PATTERN (insn)) == COND_EXEC
8418	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8419	    {
8420	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8421	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
8422	      if (BB_HEAD (bb) == insn)
8423		BB_HEAD (bb) = b;
8424	      if (BB_END (bb) == insn)
8425		BB_END (bb) = a;
8426	    }
8427
8428	  if (insn == BB_END (bb))
8429	    break;
8430	  insn = NEXT_INSN (insn);
8431	}
8432    }
8433}
8434
8435/* Perform machine dependent operations on the rtl chain INSNS.  */
8436
8437static void
8438ia64_reorg (void)
8439{
8440  /* We are freeing block_for_insn in the toplev to keep compatibility
8441     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
8442  compute_bb_for_insn ();
8443
8444  /* If optimizing, we'll have split before scheduling.  */
8445  if (optimize == 0)
8446    split_all_insns (0);
8447
8448  /* ??? update_life_info_in_dirty_blocks fails to terminate during
8449     non-optimizing bootstrap.  */
8450  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
8451
8452  if (optimize && ia64_flag_schedule_insns2)
8453    {
8454      timevar_push (TV_SCHED2);
8455      ia64_final_schedule = 1;
8456
8457      initiate_bundle_states ();
8458      ia64_nop = make_insn_raw (gen_nop ());
8459      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8460      recog_memoized (ia64_nop);
8461      clocks_length = get_max_uid () + 1;
8462      stops_p = xcalloc (1, clocks_length);
8463      if (ia64_tune == PROCESSOR_ITANIUM)
8464	{
8465	  clocks = xcalloc (clocks_length, sizeof (int));
8466	  add_cycles = xcalloc (clocks_length, sizeof (int));
8467	}
8468      if (ia64_tune == PROCESSOR_ITANIUM2)
8469	{
8470	  pos_1 = get_cpu_unit_code ("2_1");
8471	  pos_2 = get_cpu_unit_code ("2_2");
8472	  pos_3 = get_cpu_unit_code ("2_3");
8473	  pos_4 = get_cpu_unit_code ("2_4");
8474	  pos_5 = get_cpu_unit_code ("2_5");
8475	  pos_6 = get_cpu_unit_code ("2_6");
8476	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
8477	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8478	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8479	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8480	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8481	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8482	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
8483	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8484	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8485	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8486	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
8487	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8488	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8489	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8490	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8491	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8492	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
8493	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8494	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8495	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8496	}
8497      else
8498	{
8499	  pos_1 = get_cpu_unit_code ("1_1");
8500	  pos_2 = get_cpu_unit_code ("1_2");
8501	  pos_3 = get_cpu_unit_code ("1_3");
8502	  pos_4 = get_cpu_unit_code ("1_4");
8503	  pos_5 = get_cpu_unit_code ("1_5");
8504	  pos_6 = get_cpu_unit_code ("1_6");
8505	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
8506	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8507	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8508	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8509	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8510	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8511	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
8512	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8513	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8514	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8515	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
8516	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8517	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8518	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8519	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8520	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8521	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
8522	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8523	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8524	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8525	}
8526      schedule_ebbs ();
8527      finish_bundle_states ();
8528      if (ia64_tune == PROCESSOR_ITANIUM)
8529	{
8530	  free (add_cycles);
8531	  free (clocks);
8532	}
8533      free (stops_p);
8534      stops_p = NULL;
8535      emit_insn_group_barriers (dump_file);
8536
8537      ia64_final_schedule = 0;
8538      timevar_pop (TV_SCHED2);
8539    }
8540  else
8541    emit_all_insn_group_barriers (dump_file);
8542
8543  /* A call must not be the last instruction in a function, so that the
8544     return address is still within the function, so that unwinding works
8545     properly.  Note that IA-64 differs from dwarf2 on this point.  */
8546  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8547    {
8548      rtx insn;
8549      int saw_stop = 0;
8550
8551      insn = get_last_insn ();
8552      if (! INSN_P (insn))
8553        insn = prev_active_insn (insn);
8554      /* Skip over insns that expand to nothing.  */
8555      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8556        {
8557	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8558	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8559	    saw_stop = 1;
8560	  insn = prev_active_insn (insn);
8561	}
8562      if (GET_CODE (insn) == CALL_INSN)
8563	{
8564	  if (! saw_stop)
8565	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8566	  emit_insn (gen_break_f ());
8567	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8568	}
8569    }
8570
8571  emit_predicate_relation_info ();
8572
8573  if (ia64_flag_var_tracking)
8574    {
8575      timevar_push (TV_VAR_TRACKING);
8576      variable_tracking_main ();
8577      timevar_pop (TV_VAR_TRACKING);
8578    }
8579}
8580
8581/* Return true if REGNO is used by the epilogue.  */
8582
8583int
8584ia64_epilogue_uses (int regno)
8585{
8586  switch (regno)
8587    {
8588    case R_GR (1):
8589      /* With a call to a function in another module, we will write a new
8590	 value to "gp".  After returning from such a call, we need to make
8591	 sure the function restores the original gp-value, even if the
8592	 function itself does not use the gp anymore.  */
8593      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
8594
8595    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8596    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8597      /* For functions defined with the syscall_linkage attribute, all
8598	 input registers are marked as live at all function exits.  This
8599	 prevents the register allocator from using the input registers,
8600	 which in turn makes it possible to restart a system call after
8601	 an interrupt without having to save/restore the input registers.
8602	 This also prevents kernel data from leaking to application code.  */
8603      return lookup_attribute ("syscall_linkage",
8604	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8605
8606    case R_BR (0):
8607      /* Conditional return patterns can't represent the use of `b0' as
8608         the return address, so we force the value live this way.  */
8609      return 1;
8610
8611    case AR_PFS_REGNUM:
8612      /* Likewise for ar.pfs, which is used by br.ret.  */
8613      return 1;
8614
8615    default:
8616      return 0;
8617    }
8618}
8619
8620/* Return true if REGNO is used by the frame unwinder.  */
8621
8622int
8623ia64_eh_uses (int regno)
8624{
8625  if (! reload_completed)
8626    return 0;
8627
8628  if (current_frame_info.reg_save_b0
8629      && regno == current_frame_info.reg_save_b0)
8630    return 1;
8631  if (current_frame_info.reg_save_pr
8632      && regno == current_frame_info.reg_save_pr)
8633    return 1;
8634  if (current_frame_info.reg_save_ar_pfs
8635      && regno == current_frame_info.reg_save_ar_pfs)
8636    return 1;
8637  if (current_frame_info.reg_save_ar_unat
8638      && regno == current_frame_info.reg_save_ar_unat)
8639    return 1;
8640  if (current_frame_info.reg_save_ar_lc
8641      && regno == current_frame_info.reg_save_ar_lc)
8642    return 1;
8643
8644  return 0;
8645}
8646
8647/* Return true if this goes in small data/bss.  */
8648
8649/* ??? We could also support own long data here.  Generating movl/add/ld8
8650   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
8651   code faster because there is one less load.  This also includes incomplete
8652   types which can't go in sdata/sbss.  */
8653
8654static bool
8655ia64_in_small_data_p (tree exp)
8656{
8657  if (TARGET_NO_SDATA)
8658    return false;
8659
8660  /* We want to merge strings, so we never consider them small data.  */
8661  if (TREE_CODE (exp) == STRING_CST)
8662    return false;
8663
8664  /* Functions are never small data.  */
8665  if (TREE_CODE (exp) == FUNCTION_DECL)
8666    return false;
8667
8668  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8669    {
8670      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8671
8672      if (strcmp (section, ".sdata") == 0
8673	  || strncmp (section, ".sdata.", 7) == 0
8674	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8675	  || strcmp (section, ".sbss") == 0
8676	  || strncmp (section, ".sbss.", 6) == 0
8677	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8678	return true;
8679    }
8680  else
8681    {
8682      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8683
8684      /* If this is an incomplete type with size 0, then we can't put it
8685	 in sdata because it might be too big when completed.  */
8686      if (size > 0 && size <= ia64_section_threshold)
8687	return true;
8688    }
8689
8690  return false;
8691}
8692
8693/* Output assembly directives for prologue regions.  */
8694
8695/* The current basic block number.  */
8696
8697static bool last_block;
8698
8699/* True if we need a copy_state command at the start of the next block.  */
8700
8701static bool need_copy_state;
8702
8703#ifndef MAX_ARTIFICIAL_LABEL_BYTES
8704# define MAX_ARTIFICIAL_LABEL_BYTES 30
8705#endif
8706
8707/* Emit a debugging label after a call-frame-related insn.  We'd
8708   rather output the label right away, but we'd have to output it
8709   after, not before, the instruction, and the instruction has not
8710   been output yet.  So we emit the label after the insn, delete it to
8711   avoid introducing basic blocks, and mark it as preserved, such that
8712   it is still output, given that it is referenced in debug info.  */
8713
8714static const char *
8715ia64_emit_deleted_label_after_insn (rtx insn)
8716{
8717  char label[MAX_ARTIFICIAL_LABEL_BYTES];
8718  rtx lb = gen_label_rtx ();
8719  rtx label_insn = emit_label_after (lb, insn);
8720
8721  LABEL_PRESERVE_P (lb) = 1;
8722
8723  delete_insn (label_insn);
8724
8725  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8726
8727  return xstrdup (label);
8728}
8729
8730/* Define the CFA after INSN with the steady-state definition.  */
8731
8732static void
8733ia64_dwarf2out_def_steady_cfa (rtx insn)
8734{
8735  rtx fp = frame_pointer_needed
8736    ? hard_frame_pointer_rtx
8737    : stack_pointer_rtx;
8738
8739  dwarf2out_def_cfa
8740    (ia64_emit_deleted_label_after_insn (insn),
8741     REGNO (fp),
8742     ia64_initial_elimination_offset
8743     (REGNO (arg_pointer_rtx), REGNO (fp))
8744     + ARG_POINTER_CFA_OFFSET (current_function_decl));
8745}
8746
8747/* The generic dwarf2 frame debug info generator does not define a
8748   separate region for the very end of the epilogue, so refrain from
8749   doing so in the IA64-specific code as well.  */
8750
8751#define IA64_CHANGE_CFA_IN_EPILOGUE 0
8752
8753/* The function emits unwind directives for the start of an epilogue.  */
8754
8755static void
8756process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8757{
8758  /* If this isn't the last block of the function, then we need to label the
8759     current state, and copy it back in at the start of the next block.  */
8760
8761  if (!last_block)
8762    {
8763      if (unwind)
8764	fprintf (asm_out_file, "\t.label_state %d\n",
8765		 ++cfun->machine->state_num);
8766      need_copy_state = true;
8767    }
8768
8769  if (unwind)
8770    fprintf (asm_out_file, "\t.restore sp\n");
8771  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8772    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8773		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8774}
8775
8776/* This function processes a SET pattern looking for specific patterns
8777   which result in emitting an assembly directive required for unwinding.  */
8778
8779static int
8780process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8781{
8782  rtx src = SET_SRC (pat);
8783  rtx dest = SET_DEST (pat);
8784  int src_regno, dest_regno;
8785
8786  /* Look for the ALLOC insn.  */
8787  if (GET_CODE (src) == UNSPEC_VOLATILE
8788      && XINT (src, 1) == UNSPECV_ALLOC
8789      && GET_CODE (dest) == REG)
8790    {
8791      dest_regno = REGNO (dest);
8792
8793      /* If this is the final destination for ar.pfs, then this must
8794	 be the alloc in the prologue.  */
8795      if (dest_regno == current_frame_info.reg_save_ar_pfs)
8796	{
8797	  if (unwind)
8798	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8799		     ia64_dbx_register_number (dest_regno));
8800	}
8801      else
8802	{
8803	  /* This must be an alloc before a sibcall.  We must drop the
8804	     old frame info.  The easiest way to drop the old frame
8805	     info is to ensure we had a ".restore sp" directive
8806	     followed by a new prologue.  If the procedure doesn't
8807	     have a memory-stack frame, we'll issue a dummy ".restore
8808	     sp" now.  */
8809	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8810	    /* if haven't done process_epilogue() yet, do it now */
8811	    process_epilogue (asm_out_file, insn, unwind, frame);
8812	  if (unwind)
8813	    fprintf (asm_out_file, "\t.prologue\n");
8814	}
8815      return 1;
8816    }
8817
8818  /* Look for SP = ....  */
8819  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8820    {
8821      if (GET_CODE (src) == PLUS)
8822        {
8823	  rtx op0 = XEXP (src, 0);
8824	  rtx op1 = XEXP (src, 1);
8825
8826	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8827
8828	  if (INTVAL (op1) < 0)
8829	    {
8830	      gcc_assert (!frame_pointer_needed);
8831	      if (unwind)
8832		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8833			 -INTVAL (op1));
8834	      if (frame)
8835		ia64_dwarf2out_def_steady_cfa (insn);
8836	    }
8837	  else
8838	    process_epilogue (asm_out_file, insn, unwind, frame);
8839	}
8840      else
8841	{
8842	  gcc_assert (GET_CODE (src) == REG
8843		      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
8844	  process_epilogue (asm_out_file, insn, unwind, frame);
8845	}
8846
8847      return 1;
8848    }
8849
8850  /* Register move we need to look at.  */
8851  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8852    {
8853      src_regno = REGNO (src);
8854      dest_regno = REGNO (dest);
8855
8856      switch (src_regno)
8857	{
8858	case BR_REG (0):
8859	  /* Saving return address pointer.  */
8860	  gcc_assert (dest_regno == current_frame_info.reg_save_b0);
8861	  if (unwind)
8862	    fprintf (asm_out_file, "\t.save rp, r%d\n",
8863		     ia64_dbx_register_number (dest_regno));
8864	  return 1;
8865
8866	case PR_REG (0):
8867	  gcc_assert (dest_regno == current_frame_info.reg_save_pr);
8868	  if (unwind)
8869	    fprintf (asm_out_file, "\t.save pr, r%d\n",
8870		     ia64_dbx_register_number (dest_regno));
8871	  return 1;
8872
8873	case AR_UNAT_REGNUM:
8874	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
8875	  if (unwind)
8876	    fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8877		     ia64_dbx_register_number (dest_regno));
8878	  return 1;
8879
8880	case AR_LC_REGNUM:
8881	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
8882	  if (unwind)
8883	    fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8884		     ia64_dbx_register_number (dest_regno));
8885	  return 1;
8886
8887	case STACK_POINTER_REGNUM:
8888	  gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8889		      && frame_pointer_needed);
8890	  if (unwind)
8891	    fprintf (asm_out_file, "\t.vframe r%d\n",
8892		     ia64_dbx_register_number (dest_regno));
8893	  if (frame)
8894	    ia64_dwarf2out_def_steady_cfa (insn);
8895	  return 1;
8896
8897	default:
8898	  /* Everything else should indicate being stored to memory.  */
8899	  gcc_unreachable ();
8900	}
8901    }
8902
8903  /* Memory store we need to look at.  */
8904  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
8905    {
8906      long off;
8907      rtx base;
8908      const char *saveop;
8909
8910      if (GET_CODE (XEXP (dest, 0)) == REG)
8911	{
8912	  base = XEXP (dest, 0);
8913	  off = 0;
8914	}
8915      else
8916	{
8917	  gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8918		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
8919	  base = XEXP (XEXP (dest, 0), 0);
8920	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
8921	}
8922
8923      if (base == hard_frame_pointer_rtx)
8924	{
8925	  saveop = ".savepsp";
8926	  off = - off;
8927	}
8928      else
8929	{
8930	  gcc_assert (base == stack_pointer_rtx);
8931	  saveop = ".savesp";
8932	}
8933
8934      src_regno = REGNO (src);
8935      switch (src_regno)
8936	{
8937	case BR_REG (0):
8938	  gcc_assert (!current_frame_info.reg_save_b0);
8939	  if (unwind)
8940	    fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
8941	  return 1;
8942
8943	case PR_REG (0):
8944	  gcc_assert (!current_frame_info.reg_save_pr);
8945	  if (unwind)
8946	    fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
8947	  return 1;
8948
8949	case AR_LC_REGNUM:
8950	  gcc_assert (!current_frame_info.reg_save_ar_lc);
8951	  if (unwind)
8952	    fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
8953	  return 1;
8954
8955	case AR_PFS_REGNUM:
8956	  gcc_assert (!current_frame_info.reg_save_ar_pfs);
8957	  if (unwind)
8958	    fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
8959	  return 1;
8960
8961	case AR_UNAT_REGNUM:
8962	  gcc_assert (!current_frame_info.reg_save_ar_unat);
8963	  if (unwind)
8964	    fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
8965	  return 1;
8966
8967	case GR_REG (4):
8968	case GR_REG (5):
8969	case GR_REG (6):
8970	case GR_REG (7):
8971	  if (unwind)
8972	    fprintf (asm_out_file, "\t.save.g 0x%x\n",
8973		     1 << (src_regno - GR_REG (4)));
8974	  return 1;
8975
8976	case BR_REG (1):
8977	case BR_REG (2):
8978	case BR_REG (3):
8979	case BR_REG (4):
8980	case BR_REG (5):
8981	  if (unwind)
8982	    fprintf (asm_out_file, "\t.save.b 0x%x\n",
8983		     1 << (src_regno - BR_REG (1)));
8984	  return 1;
8985
8986	case FR_REG (2):
8987	case FR_REG (3):
8988	case FR_REG (4):
8989	case FR_REG (5):
8990	  if (unwind)
8991	    fprintf (asm_out_file, "\t.save.f 0x%x\n",
8992		     1 << (src_regno - FR_REG (2)));
8993	  return 1;
8994
8995	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
8996	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
8997	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
8998	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
8999	  if (unwind)
9000	    fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9001		     1 << (src_regno - FR_REG (12)));
9002	  return 1;
9003
9004	default:
9005	  return 0;
9006	}
9007    }
9008
9009  return 0;
9010}
9011
9012
9013/* This function looks at a single insn and emits any directives
9014   required to unwind this insn.  */
9015void
9016process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9017{
9018  bool unwind = (flag_unwind_tables
9019		 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9020  bool frame = dwarf2out_do_frame ();
9021
9022  if (unwind || frame)
9023    {
9024      rtx pat;
9025
9026      if (GET_CODE (insn) == NOTE
9027	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
9028	{
9029	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9030
9031	  /* Restore unwind state from immediately before the epilogue.  */
9032	  if (need_copy_state)
9033	    {
9034	      if (unwind)
9035		{
9036		  fprintf (asm_out_file, "\t.body\n");
9037		  fprintf (asm_out_file, "\t.copy_state %d\n",
9038			   cfun->machine->state_num);
9039		}
9040	      if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9041		ia64_dwarf2out_def_steady_cfa (insn);
9042	      need_copy_state = false;
9043	    }
9044	}
9045
9046      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9047	return;
9048
9049      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9050      if (pat)
9051	pat = XEXP (pat, 0);
9052      else
9053	pat = PATTERN (insn);
9054
9055      switch (GET_CODE (pat))
9056        {
9057	case SET:
9058	  process_set (asm_out_file, pat, insn, unwind, frame);
9059	  break;
9060
9061	case PARALLEL:
9062	  {
9063	    int par_index;
9064	    int limit = XVECLEN (pat, 0);
9065	    for (par_index = 0; par_index < limit; par_index++)
9066	      {
9067		rtx x = XVECEXP (pat, 0, par_index);
9068		if (GET_CODE (x) == SET)
9069		  process_set (asm_out_file, x, insn, unwind, frame);
9070	      }
9071	    break;
9072	  }
9073
9074	default:
9075	  gcc_unreachable ();
9076	}
9077    }
9078}
9079
9080
9081enum ia64_builtins
9082{
9083  IA64_BUILTIN_BSP,
9084  IA64_BUILTIN_FLUSHRS
9085};
9086
9087void
9088ia64_init_builtins (void)
9089{
9090  tree fpreg_type;
9091  tree float80_type;
9092
9093  /* The __fpreg type.  */
9094  fpreg_type = make_node (REAL_TYPE);
9095  TYPE_PRECISION (fpreg_type) = 82;
9096  layout_type (fpreg_type);
9097  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9098
9099  /* The __float80 type.  */
9100  float80_type = make_node (REAL_TYPE);
9101  TYPE_PRECISION (float80_type) = 80;
9102  layout_type (float80_type);
9103  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9104
9105  /* The __float128 type.  */
9106  if (!TARGET_HPUX)
9107    {
9108      tree float128_type = make_node (REAL_TYPE);
9109      TYPE_PRECISION (float128_type) = 128;
9110      layout_type (float128_type);
9111      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9112    }
9113  else
9114    /* Under HPUX, this is a synonym for "long double".  */
9115    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9116					       "__float128");
9117
9118#define def_builtin(name, type, code)					\
9119  lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD,	\
9120			       NULL, NULL_TREE)
9121
9122  def_builtin ("__builtin_ia64_bsp",
9123	       build_function_type (ptr_type_node, void_list_node),
9124	       IA64_BUILTIN_BSP);
9125
9126  def_builtin ("__builtin_ia64_flushrs",
9127	       build_function_type (void_type_node, void_list_node),
9128	       IA64_BUILTIN_FLUSHRS);
9129
9130#undef def_builtin
9131}
9132
9133rtx
9134ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9135		     enum machine_mode mode ATTRIBUTE_UNUSED,
9136		     int ignore ATTRIBUTE_UNUSED)
9137{
9138  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9139  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9140
9141  switch (fcode)
9142    {
9143    case IA64_BUILTIN_BSP:
9144      if (! target || ! register_operand (target, DImode))
9145	target = gen_reg_rtx (DImode);
9146      emit_insn (gen_bsp_value (target));
9147#ifdef POINTERS_EXTEND_UNSIGNED
9148      target = convert_memory_address (ptr_mode, target);
9149#endif
9150      return target;
9151
9152    case IA64_BUILTIN_FLUSHRS:
9153      emit_insn (gen_flushrs ());
9154      return const0_rtx;
9155
9156    default:
9157      break;
9158    }
9159
9160  return NULL_RTX;
9161}
9162
9163/* For the HP-UX IA64 aggregate parameters are passed stored in the
9164   most significant bits of the stack slot.  */
9165
9166enum direction
9167ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
9168{
9169   /* Exception to normal case for structures/unions/etc.  */
9170
9171   if (type && AGGREGATE_TYPE_P (type)
9172       && int_size_in_bytes (type) < UNITS_PER_WORD)
9173     return upward;
9174
9175   /* Fall back to the default.  */
9176   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9177}
9178
9179/* Emit text to declare externally defined variables and functions, because
9180   the Intel assembler does not support undefined externals.  */
9181
9182void
9183ia64_asm_output_external (FILE *file, tree decl, const char *name)
9184{
9185  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
9186     set in order to avoid putting out names that are never really
9187     used. */
9188  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
9189    {
9190      /* maybe_assemble_visibility will return 1 if the assembler
9191	 visibility directive is outputed.  */
9192      int need_visibility = ((*targetm.binds_local_p) (decl)
9193			     && maybe_assemble_visibility (decl));
9194
9195      /* GNU as does not need anything here, but the HP linker does
9196	 need something for external functions.  */
9197      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
9198	  && TREE_CODE (decl) == FUNCTION_DECL)
9199	{
9200	  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
9201	  (*targetm.asm_out.globalize_label) (file, name);
9202	}
9203      else if (need_visibility && !TARGET_GNU_AS)
9204	(*targetm.asm_out.globalize_label) (file, name);
9205    }
9206}
9207
9208/* Set SImode div/mod functions, init_integral_libfuncs only initializes
9209   modes of word_mode and larger.  Rename the TFmode libfuncs using the
9210   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9211   backward compatibility. */
9212
9213static void
9214ia64_init_libfuncs (void)
9215{
9216  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9217  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9218  set_optab_libfunc (smod_optab, SImode, "__modsi3");
9219  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9220
9221  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9222  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9223  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9224  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9225  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9226
9227  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9228  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9229  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9230  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9231  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9232  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9233
9234  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9235  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9236  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9237  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9238  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9239
9240  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9241  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9242  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9243  /* HP-UX 11.23 libc does not have a function for unsigned
9244     SImode-to-TFmode conversion.  */
9245  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
9246}
9247
9248/* Rename all the TFmode libfuncs using the HPUX conventions.  */
9249
9250static void
9251ia64_hpux_init_libfuncs (void)
9252{
9253  ia64_init_libfuncs ();
9254
9255  /* The HP SI millicode division and mod functions expect DI arguments.
9256     By turning them off completely we avoid using both libgcc and the
9257     non-standard millicode routines and use the HP DI millicode routines
9258     instead.  */
9259
9260  set_optab_libfunc (sdiv_optab, SImode, 0);
9261  set_optab_libfunc (udiv_optab, SImode, 0);
9262  set_optab_libfunc (smod_optab, SImode, 0);
9263  set_optab_libfunc (umod_optab, SImode, 0);
9264
9265  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9266  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9267  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9268  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9269
9270  /* HP-UX libc has TF min/max/abs routines in it.  */
9271  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9272  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9273  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
9274
9275  /* ia64_expand_compare uses this.  */
9276  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9277
9278  /* These should never be used.  */
9279  set_optab_libfunc (eq_optab, TFmode, 0);
9280  set_optab_libfunc (ne_optab, TFmode, 0);
9281  set_optab_libfunc (gt_optab, TFmode, 0);
9282  set_optab_libfunc (ge_optab, TFmode, 0);
9283  set_optab_libfunc (lt_optab, TFmode, 0);
9284  set_optab_libfunc (le_optab, TFmode, 0);
9285}
9286
9287/* Rename the division and modulus functions in VMS.  */
9288
9289static void
9290ia64_vms_init_libfuncs (void)
9291{
9292  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9293  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9294  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9295  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9296  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9297  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9298  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9299  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9300}
9301
9302/* Rename the TFmode libfuncs available from soft-fp in glibc using
9303   the HPUX conventions.  */
9304
9305static void
9306ia64_sysv4_init_libfuncs (void)
9307{
9308  ia64_init_libfuncs ();
9309
9310  /* These functions are not part of the HPUX TFmode interface.  We
9311     use them instead of _U_Qfcmp, which doesn't work the way we
9312     expect.  */
9313  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9314  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9315  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9316  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9317  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9318  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9319
9320  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9321     glibc doesn't have them.  */
9322}
9323
9324/* For HPUX, it is illegal to have relocations in shared segments.  */
9325
9326static int
9327ia64_hpux_reloc_rw_mask (void)
9328{
9329  return 3;
9330}
9331
9332/* For others, relax this so that relocations to local data goes in
9333   read-only segments, but we still cannot allow global relocations
9334   in read-only segments.  */
9335
9336static int
9337ia64_reloc_rw_mask (void)
9338{
9339  return flag_pic ? 3 : 2;
9340}
9341
9342/* Return the section to use for X.  The only special thing we do here
9343   is to honor small data.  */
9344
9345static section *
9346ia64_select_rtx_section (enum machine_mode mode, rtx x,
9347			 unsigned HOST_WIDE_INT align)
9348{
9349  if (GET_MODE_SIZE (mode) > 0
9350      && GET_MODE_SIZE (mode) <= ia64_section_threshold
9351      && !TARGET_NO_SDATA)
9352    return sdata_section;
9353  else
9354    return default_elf_select_rtx_section (mode, x, align);
9355}
9356
9357static unsigned int
9358ia64_section_type_flags (tree decl, const char *name, int reloc)
9359{
9360  unsigned int flags = 0;
9361
9362  if (strcmp (name, ".sdata") == 0
9363      || strncmp (name, ".sdata.", 7) == 0
9364      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9365      || strncmp (name, ".sdata2.", 8) == 0
9366      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9367      || strcmp (name, ".sbss") == 0
9368      || strncmp (name, ".sbss.", 6) == 0
9369      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9370    flags = SECTION_SMALL;
9371
9372  flags |= default_section_type_flags (decl, name, reloc);
9373  return flags;
9374}
9375
9376/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9377   structure type and that the address of that type should be passed
9378   in out0, rather than in r8.  */
9379
9380static bool
9381ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9382{
9383  tree ret_type = TREE_TYPE (fntype);
9384
9385  /* The Itanium C++ ABI requires that out0, rather than r8, be used
9386     as the structure return address parameter, if the return value
9387     type has a non-trivial copy constructor or destructor.  It is not
9388     clear if this same convention should be used for other
9389     programming languages.  Until G++ 3.4, we incorrectly used r8 for
9390     these return values.  */
9391  return (abi_version_at_least (2)
9392	  && ret_type
9393	  && TYPE_MODE (ret_type) == BLKmode
9394	  && TREE_ADDRESSABLE (ret_type)
9395	  && strcmp (lang_hooks.name, "GNU C++") == 0);
9396}
9397
9398/* Output the assembler code for a thunk function.  THUNK_DECL is the
9399   declaration for the thunk function itself, FUNCTION is the decl for
9400   the target function.  DELTA is an immediate constant offset to be
9401   added to THIS.  If VCALL_OFFSET is nonzero, the word at
9402   *(*this + vcall_offset) should be added to THIS.  */
9403
9404static void
9405ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9406		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9407		      tree function)
9408{
9409  rtx this, insn, funexp;
9410  unsigned int this_parmno;
9411  unsigned int this_regno;
9412
9413  reload_completed = 1;
9414  epilogue_completed = 1;
9415  no_new_pseudos = 1;
9416  reset_block_changes ();
9417
9418  /* Set things up as ia64_expand_prologue might.  */
9419  last_scratch_gr_reg = 15;
9420
9421  memset (&current_frame_info, 0, sizeof (current_frame_info));
9422  current_frame_info.spill_cfa_off = -16;
9423  current_frame_info.n_input_regs = 1;
9424  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9425
9426  /* Mark the end of the (empty) prologue.  */
9427  emit_note (NOTE_INSN_PROLOGUE_END);
9428
9429  /* Figure out whether "this" will be the first parameter (the
9430     typical case) or the second parameter (as happens when the
9431     virtual function returns certain class objects).  */
9432  this_parmno
9433    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9434       ? 1 : 0);
9435  this_regno = IN_REG (this_parmno);
9436  if (!TARGET_REG_NAMES)
9437    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9438
9439  this = gen_rtx_REG (Pmode, this_regno);
9440  if (TARGET_ILP32)
9441    {
9442      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
9443      REG_POINTER (tmp) = 1;
9444      if (delta && CONST_OK_FOR_I (delta))
9445	{
9446	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
9447	  delta = 0;
9448	}
9449      else
9450	emit_insn (gen_ptr_extend (this, tmp));
9451    }
9452
9453  /* Apply the constant offset, if required.  */
9454  if (delta)
9455    {
9456      rtx delta_rtx = GEN_INT (delta);
9457
9458      if (!CONST_OK_FOR_I (delta))
9459	{
9460	  rtx tmp = gen_rtx_REG (Pmode, 2);
9461	  emit_move_insn (tmp, delta_rtx);
9462	  delta_rtx = tmp;
9463	}
9464      emit_insn (gen_adddi3 (this, this, delta_rtx));
9465    }
9466
9467  /* Apply the offset from the vtable, if required.  */
9468  if (vcall_offset)
9469    {
9470      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9471      rtx tmp = gen_rtx_REG (Pmode, 2);
9472
9473      if (TARGET_ILP32)
9474	{
9475	  rtx t = gen_rtx_REG (ptr_mode, 2);
9476	  REG_POINTER (t) = 1;
9477	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
9478	  if (CONST_OK_FOR_I (vcall_offset))
9479	    {
9480	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
9481						  vcall_offset_rtx));
9482	      vcall_offset = 0;
9483	    }
9484	  else
9485	    emit_insn (gen_ptr_extend (tmp, t));
9486	}
9487      else
9488	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
9489
9490      if (vcall_offset)
9491	{
9492	  if (!CONST_OK_FOR_J (vcall_offset))
9493	    {
9494	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9495	      emit_move_insn (tmp2, vcall_offset_rtx);
9496	      vcall_offset_rtx = tmp2;
9497	    }
9498	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9499	}
9500
9501      if (TARGET_ILP32)
9502	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9503			gen_rtx_MEM (ptr_mode, tmp));
9504      else
9505	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9506
9507      emit_insn (gen_adddi3 (this, this, tmp));
9508    }
9509
9510  /* Generate a tail call to the target function.  */
9511  if (! TREE_USED (function))
9512    {
9513      assemble_external (function);
9514      TREE_USED (function) = 1;
9515    }
9516  funexp = XEXP (DECL_RTL (function), 0);
9517  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9518  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9519  insn = get_last_insn ();
9520  SIBLING_CALL_P (insn) = 1;
9521
9522  /* Code generation for calls relies on splitting.  */
9523  reload_completed = 1;
9524  epilogue_completed = 1;
9525  try_split (PATTERN (insn), insn, 0);
9526
9527  emit_barrier ();
9528
9529  /* Run just enough of rest_of_compilation to get the insns emitted.
9530     There's not really enough bulk here to make other passes such as
9531     instruction scheduling worth while.  Note that use_thunk calls
9532     assemble_start_function and assemble_end_function.  */
9533
9534  insn_locators_initialize ();
9535  emit_all_insn_group_barriers (NULL);
9536  insn = get_insns ();
9537  shorten_branches (insn);
9538  final_start_function (insn, file, 1);
9539  final (insn, file, 1);
9540  final_end_function ();
9541
9542  reload_completed = 0;
9543  epilogue_completed = 0;
9544  no_new_pseudos = 0;
9545}
9546
9547/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9548
9549static rtx
9550ia64_struct_value_rtx (tree fntype,
9551		       int incoming ATTRIBUTE_UNUSED)
9552{
9553  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9554    return NULL_RTX;
9555  return gen_rtx_REG (Pmode, GR_REG (8));
9556}
9557
9558static bool
9559ia64_scalar_mode_supported_p (enum machine_mode mode)
9560{
9561  switch (mode)
9562    {
9563    case QImode:
9564    case HImode:
9565    case SImode:
9566    case DImode:
9567    case TImode:
9568      return true;
9569
9570    case SFmode:
9571    case DFmode:
9572    case XFmode:
9573    case RFmode:
9574      return true;
9575
9576    case TFmode:
9577      return TARGET_HPUX;
9578
9579    default:
9580      return false;
9581    }
9582}
9583
9584static bool
9585ia64_vector_mode_supported_p (enum machine_mode mode)
9586{
9587  switch (mode)
9588    {
9589    case V8QImode:
9590    case V4HImode:
9591    case V2SImode:
9592      return true;
9593
9594    case V2SFmode:
9595      return true;
9596
9597    default:
9598      return false;
9599    }
9600}
9601
9602/* Implement the FUNCTION_PROFILER macro.  */
9603
9604void
9605ia64_output_function_profiler (FILE *file, int labelno)
9606{
9607  bool indirect_call;
9608
9609  /* If the function needs a static chain and the static chain
9610     register is r15, we use an indirect call so as to bypass
9611     the PLT stub in case the executable is dynamically linked,
9612     because the stub clobbers r15 as per 5.3.6 of the psABI.
9613     We don't need to do that in non canonical PIC mode.  */
9614
9615  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9616    {
9617      gcc_assert (STATIC_CHAIN_REGNUM == 15);
9618      indirect_call = true;
9619    }
9620  else
9621    indirect_call = false;
9622
9623  if (TARGET_GNU_AS)
9624    fputs ("\t.prologue 4, r40\n", file);
9625  else
9626    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9627  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
9628
9629  if (NO_PROFILE_COUNTERS)
9630    fputs ("\tmov out3 = r0\n", file);
9631  else
9632    {
9633      char buf[20];
9634      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9635
9636      if (TARGET_AUTO_PIC)
9637	fputs ("\tmovl out3 = @gprel(", file);
9638      else
9639	fputs ("\taddl out3 = @ltoff(", file);
9640      assemble_name (file, buf);
9641      if (TARGET_AUTO_PIC)
9642	fputs (")\n", file);
9643      else
9644	fputs ("), r1\n", file);
9645    }
9646
9647  if (indirect_call)
9648    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9649  fputs ("\t;;\n", file);
9650
9651  fputs ("\t.save rp, r42\n", file);
9652  fputs ("\tmov out2 = b0\n", file);
9653  if (indirect_call)
9654    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9655  fputs ("\t.body\n", file);
9656  fputs ("\tmov out1 = r1\n", file);
9657  if (indirect_call)
9658    {
9659      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9660      fputs ("\tmov b6 = r16\n", file);
9661      fputs ("\tld8 r1 = [r14]\n", file);
9662      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9663    }
9664  else
9665    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9666}
9667
9668static GTY(()) rtx mcount_func_rtx;
9669static rtx
9670gen_mcount_func_rtx (void)
9671{
9672  if (!mcount_func_rtx)
9673    mcount_func_rtx = init_one_libfunc ("_mcount");
9674  return mcount_func_rtx;
9675}
9676
9677void
9678ia64_profile_hook (int labelno)
9679{
9680  rtx label, ip;
9681
9682  if (NO_PROFILE_COUNTERS)
9683    label = const0_rtx;
9684  else
9685    {
9686      char buf[30];
9687      const char *label_name;
9688      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9689      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9690      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9691      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9692    }
9693  ip = gen_reg_rtx (Pmode);
9694  emit_insn (gen_ip_value (ip));
9695  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9696                     VOIDmode, 3,
9697		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9698		     ip, Pmode,
9699		     label, Pmode);
9700}
9701
9702/* Return the mangling of TYPE if it is an extended fundamental type.  */
9703
9704static const char *
9705ia64_mangle_fundamental_type (tree type)
9706{
9707  /* On HP-UX, "long double" is mangled as "e" so __float128 is
9708     mangled as "e".  */
9709  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9710    return "g";
9711  /* On HP-UX, "e" is not available as a mangling of __float80 so use
9712     an extended mangling.  Elsewhere, "e" is available since long
9713     double is 80 bits.  */
9714  if (TYPE_MODE (type) == XFmode)
9715    return TARGET_HPUX ? "u9__float80" : "e";
9716  if (TYPE_MODE (type) == RFmode)
9717    return "u7__fpreg";
9718  return NULL;
9719}
9720
9721/* Return the diagnostic message string if conversion from FROMTYPE to
9722   TOTYPE is not allowed, NULL otherwise.  */
9723static const char *
9724ia64_invalid_conversion (tree fromtype, tree totype)
9725{
9726  /* Reject nontrivial conversion to or from __fpreg.  */
9727  if (TYPE_MODE (fromtype) == RFmode
9728      && TYPE_MODE (totype) != RFmode
9729      && TYPE_MODE (totype) != VOIDmode)
9730    return N_("invalid conversion from %<__fpreg%>");
9731  if (TYPE_MODE (totype) == RFmode
9732      && TYPE_MODE (fromtype) != RFmode)
9733    return N_("invalid conversion to %<__fpreg%>");
9734  return NULL;
9735}
9736
9737/* Return the diagnostic message string if the unary operation OP is
9738   not permitted on TYPE, NULL otherwise.  */
9739static const char *
9740ia64_invalid_unary_op (int op, tree type)
9741{
9742  /* Reject operations on __fpreg other than unary + or &.  */
9743  if (TYPE_MODE (type) == RFmode
9744      && op != CONVERT_EXPR
9745      && op != ADDR_EXPR)
9746    return N_("invalid operation on %<__fpreg%>");
9747  return NULL;
9748}
9749
9750/* Return the diagnostic message string if the binary operation OP is
9751   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
9752static const char *
9753ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9754{
9755  /* Reject operations on __fpreg.  */
9756  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9757    return N_("invalid operation on %<__fpreg%>");
9758  return NULL;
9759}
9760
9761/* Implement overriding of the optimization options.  */
9762void
9763ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9764                           int size ATTRIBUTE_UNUSED)
9765{
9766  /* Let the scheduler form additional regions.  */
9767  set_param_value ("max-sched-extend-regions-iters", 2);
9768}
9769
9770#include "gt-ia64.h"
9771