1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999-2015 Free Software Foundation, Inc.
3   Contributed by James E. Wilson <wilson@cygnus.com> and
4		  David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stringpool.h"
39#include "stor-layout.h"
40#include "calls.h"
41#include "varasm.h"
42#include "regs.h"
43#include "hard-reg-set.h"
44#include "insn-config.h"
45#include "conditions.h"
46#include "output.h"
47#include "insn-attr.h"
48#include "flags.h"
49#include "recog.h"
50#include "hashtab.h"
51#include "function.h"
52#include "statistics.h"
53#include "real.h"
54#include "fixed-value.h"
55#include "expmed.h"
56#include "dojump.h"
57#include "explow.h"
58#include "emit-rtl.h"
59#include "stmt.h"
60#include "expr.h"
61#include "insn-codes.h"
62#include "optabs.h"
63#include "except.h"
64#include "ggc.h"
65#include "predict.h"
66#include "dominance.h"
67#include "cfg.h"
68#include "cfgrtl.h"
69#include "cfganal.h"
70#include "lcm.h"
71#include "cfgbuild.h"
72#include "cfgcleanup.h"
73#include "basic-block.h"
74#include "libfuncs.h"
75#include "diagnostic-core.h"
76#include "sched-int.h"
77#include "timevar.h"
78#include "target.h"
79#include "target-def.h"
80#include "common/common-target.h"
81#include "tm_p.h"
82#include "hash-table.h"
83#include "langhooks.h"
84#include "tree-ssa-alias.h"
85#include "internal-fn.h"
86#include "gimple-fold.h"
87#include "tree-eh.h"
88#include "gimple-expr.h"
89#include "is-a.h"
90#include "gimple.h"
91#include "gimplify.h"
92#include "intl.h"
93#include "df.h"
94#include "debug.h"
95#include "params.h"
96#include "dbgcnt.h"
97#include "tm-constrs.h"
98#include "sel-sched.h"
99#include "reload.h"
100#include "opts.h"
101#include "dumpfile.h"
102#include "builtins.h"
103
104/* This is used for communication between ASM_OUTPUT_LABEL and
105   ASM_OUTPUT_LABELREF.  */
106int ia64_asm_output_label = 0;
107
108/* Register names for ia64_expand_prologue.  */
109static const char * const ia64_reg_numbers[96] =
110{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
111  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
112  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
113  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
114  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
115  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
116  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
117  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
118  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
119  "r104","r105","r106","r107","r108","r109","r110","r111",
120  "r112","r113","r114","r115","r116","r117","r118","r119",
121  "r120","r121","r122","r123","r124","r125","r126","r127"};
122
123/* ??? These strings could be shared with REGISTER_NAMES.  */
124static const char * const ia64_input_reg_names[8] =
125{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
126
127/* ??? These strings could be shared with REGISTER_NAMES.  */
128static const char * const ia64_local_reg_names[80] =
129{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
130  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
131  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
132  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
133  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
134  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
135  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
136  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
137  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
138  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
139
140/* ??? These strings could be shared with REGISTER_NAMES.  */
141static const char * const ia64_output_reg_names[8] =
142{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
143
144/* Variables which are this size or smaller are put in the sdata/sbss
145   sections.  */
146
147unsigned int ia64_section_threshold;
148
149/* The following variable is used by the DFA insn scheduler.  The value is
150   TRUE if we do insn bundling instead of insn scheduling.  */
151int bundling_p = 0;
152
153enum ia64_frame_regs
154{
155   reg_fp,
156   reg_save_b0,
157   reg_save_pr,
158   reg_save_ar_pfs,
159   reg_save_ar_unat,
160   reg_save_ar_lc,
161   reg_save_gp,
162   number_of_ia64_frame_regs
163};
164
165/* Structure to be filled in by ia64_compute_frame_size with register
166   save masks and offsets for the current function.  */
167
168struct ia64_frame_info
169{
170  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
171				   the caller's scratch area.  */
172  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
173  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
174  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
175  HARD_REG_SET mask;		/* mask of saved registers.  */
176  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
177				   registers or long-term scratches.  */
178  int n_spilled;		/* number of spilled registers.  */
179  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
180  int n_input_regs;		/* number of input registers used.  */
181  int n_local_regs;		/* number of local registers used.  */
182  int n_output_regs;		/* number of output registers used.  */
183  int n_rotate_regs;		/* number of rotating registers used.  */
184
185  char need_regstk;		/* true if a .regstk directive needed.  */
186  char initialized;		/* true if the data is finalized.  */
187};
188
189/* Current frame information calculated by ia64_compute_frame_size.  */
190static struct ia64_frame_info current_frame_info;
191/* The actual registers that are emitted.  */
192static int emitted_frame_related_regs[number_of_ia64_frame_regs];
193
194static int ia64_first_cycle_multipass_dfa_lookahead (void);
195static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
196static void ia64_init_dfa_pre_cycle_insn (void);
197static rtx ia64_dfa_pre_cycle_insn (void);
198static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
199static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
200static void ia64_h_i_d_extended (void);
201static void * ia64_alloc_sched_context (void);
202static void ia64_init_sched_context (void *, bool);
203static void ia64_set_sched_context (void *);
204static void ia64_clear_sched_context (void *);
205static void ia64_free_sched_context (void *);
206static int ia64_mode_to_int (machine_mode);
207static void ia64_set_sched_flags (spec_info_t);
208static ds_t ia64_get_insn_spec_ds (rtx_insn *);
209static ds_t ia64_get_insn_checked_ds (rtx_insn *);
210static bool ia64_skip_rtx_p (const_rtx);
211static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
212static bool ia64_needs_block_p (ds_t);
213static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
214static int ia64_spec_check_p (rtx);
215static int ia64_spec_check_src_p (rtx);
216static rtx gen_tls_get_addr (void);
217static rtx gen_thread_pointer (void);
218static int find_gr_spill (enum ia64_frame_regs, int);
219static int next_scratch_gr_reg (void);
220static void mark_reg_gr_used_mask (rtx, void *);
221static void ia64_compute_frame_size (HOST_WIDE_INT);
222static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
223static void finish_spill_pointers (void);
224static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
225static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
226static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
227static rtx gen_movdi_x (rtx, rtx, rtx);
228static rtx gen_fr_spill_x (rtx, rtx, rtx);
229static rtx gen_fr_restore_x (rtx, rtx, rtx);
230
231static void ia64_option_override (void);
232static bool ia64_can_eliminate (const int, const int);
233static machine_mode hfa_element_mode (const_tree, bool);
234static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
235					 tree, int *, int);
236static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
237				   tree, bool);
238static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
239				const_tree, bool, bool);
240static rtx ia64_function_arg (cumulative_args_t, machine_mode,
241			      const_tree, bool);
242static rtx ia64_function_incoming_arg (cumulative_args_t,
243				       machine_mode, const_tree, bool);
244static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
245				       const_tree, bool);
246static unsigned int ia64_function_arg_boundary (machine_mode,
247						const_tree);
248static bool ia64_function_ok_for_sibcall (tree, tree);
249static bool ia64_return_in_memory (const_tree, const_tree);
250static rtx ia64_function_value (const_tree, const_tree, bool);
251static rtx ia64_libcall_value (machine_mode, const_rtx);
252static bool ia64_function_value_regno_p (const unsigned int);
253static int ia64_register_move_cost (machine_mode, reg_class_t,
254                                    reg_class_t);
255static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
256				  bool);
257static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
258static int ia64_unspec_may_trap_p (const_rtx, unsigned);
259static void fix_range (const char *);
260static struct machine_function * ia64_init_machine_status (void);
261static void emit_insn_group_barriers (FILE *);
262static void emit_all_insn_group_barriers (FILE *);
263static void final_emit_insn_group_barriers (FILE *);
264static void emit_predicate_relation_info (void);
265static void ia64_reorg (void);
266static bool ia64_in_small_data_p (const_tree);
267static void process_epilogue (FILE *, rtx, bool, bool);
268
269static bool ia64_assemble_integer (rtx, unsigned int, int);
270static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
271static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
272static void ia64_output_function_end_prologue (FILE *);
273
274static void ia64_print_operand (FILE *, rtx, int);
275static void ia64_print_operand_address (FILE *, rtx);
276static bool ia64_print_operand_punct_valid_p (unsigned char code);
277
278static int ia64_issue_rate (void);
279static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
280static void ia64_sched_init (FILE *, int, int);
281static void ia64_sched_init_global (FILE *, int, int);
282static void ia64_sched_finish_global (FILE *, int);
283static void ia64_sched_finish (FILE *, int);
284static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
285static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
286static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
287static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
288
289static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
290static void ia64_asm_emit_except_personality (rtx);
291static void ia64_asm_init_sections (void);
292
293static enum unwind_info_type ia64_debug_unwind_info (void);
294
295static struct bundle_state *get_free_bundle_state (void);
296static void free_bundle_state (struct bundle_state *);
297static void initiate_bundle_states (void);
298static void finish_bundle_states (void);
299static int insert_bundle_state (struct bundle_state *);
300static void initiate_bundle_state_table (void);
301static void finish_bundle_state_table (void);
302static int try_issue_nops (struct bundle_state *, int);
303static int try_issue_insn (struct bundle_state *, rtx);
304static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
305				 int, int);
306static int get_max_pos (state_t);
307static int get_template (state_t, int);
308
309static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
310static bool important_for_bundling_p (rtx_insn *);
311static bool unknown_for_bundling_p (rtx_insn *);
312static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
313
314static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
315				  HOST_WIDE_INT, tree);
316static void ia64_file_start (void);
317static void ia64_globalize_decl_name (FILE *, tree);
318
319static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
320static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
321static section *ia64_select_rtx_section (machine_mode, rtx,
322					 unsigned HOST_WIDE_INT);
323static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
324     ATTRIBUTE_UNUSED;
325static unsigned int ia64_section_type_flags (tree, const char *, int);
326static void ia64_init_libfuncs (void)
327     ATTRIBUTE_UNUSED;
328static void ia64_hpux_init_libfuncs (void)
329     ATTRIBUTE_UNUSED;
330static void ia64_sysv4_init_libfuncs (void)
331     ATTRIBUTE_UNUSED;
332static void ia64_vms_init_libfuncs (void)
333     ATTRIBUTE_UNUSED;
334static void ia64_soft_fp_init_libfuncs (void)
335     ATTRIBUTE_UNUSED;
336static bool ia64_vms_valid_pointer_mode (machine_mode mode)
337     ATTRIBUTE_UNUSED;
338static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
339     ATTRIBUTE_UNUSED;
340
341static bool ia64_attribute_takes_identifier_p (const_tree);
342static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
343static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
344static void ia64_encode_section_info (tree, rtx, int);
345static rtx ia64_struct_value_rtx (tree, int);
346static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
347static bool ia64_scalar_mode_supported_p (machine_mode mode);
348static bool ia64_vector_mode_supported_p (machine_mode mode);
349static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
350static bool ia64_legitimate_constant_p (machine_mode, rtx);
351static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
352static bool ia64_cannot_force_const_mem (machine_mode, rtx);
353static const char *ia64_mangle_type (const_tree);
354static const char *ia64_invalid_conversion (const_tree, const_tree);
355static const char *ia64_invalid_unary_op (int, const_tree);
356static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
357static machine_mode ia64_c_mode_for_suffix (char);
358static void ia64_trampoline_init (rtx, tree, rtx);
359static void ia64_override_options_after_change (void);
360static bool ia64_member_type_forces_blk (const_tree, machine_mode);
361
362static tree ia64_builtin_decl (unsigned, bool);
363
364static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
365static machine_mode ia64_get_reg_raw_mode (int regno);
366static section * ia64_hpux_function_section (tree, enum node_frequency,
367					     bool, bool);
368
369static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
370					      const unsigned char *sel);
371
372#define MAX_VECT_LEN	8
373
374struct expand_vec_perm_d
375{
376  rtx target, op0, op1;
377  unsigned char perm[MAX_VECT_LEN];
378  machine_mode vmode;
379  unsigned char nelt;
380  bool one_operand_p;
381  bool testing_p;
382};
383
384static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
385
386
387/* Table of valid machine attributes.  */
388static const struct attribute_spec ia64_attribute_table[] =
389{
390  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
391       affects_type_identity } */
392  { "syscall_linkage", 0, 0, false, true,  true,  NULL, false },
393  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute,
394    false },
395#if TARGET_ABI_OPEN_VMS
396  { "common_object",   1, 1, true, false, false,
397    ia64_vms_common_object_attribute, false },
398#endif
399  { "version_id",      1, 1, true, false, false,
400    ia64_handle_version_id_attribute, false },
401  { NULL,	       0, 0, false, false, false, NULL, false }
402};
403
404/* Initialize the GCC target structure.  */
405#undef TARGET_ATTRIBUTE_TABLE
406#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
407
408#undef TARGET_INIT_BUILTINS
409#define TARGET_INIT_BUILTINS ia64_init_builtins
410
411#undef TARGET_EXPAND_BUILTIN
412#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
413
414#undef TARGET_BUILTIN_DECL
415#define TARGET_BUILTIN_DECL ia64_builtin_decl
416
417#undef TARGET_ASM_BYTE_OP
418#define TARGET_ASM_BYTE_OP "\tdata1\t"
419#undef TARGET_ASM_ALIGNED_HI_OP
420#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
421#undef TARGET_ASM_ALIGNED_SI_OP
422#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
423#undef TARGET_ASM_ALIGNED_DI_OP
424#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
425#undef TARGET_ASM_UNALIGNED_HI_OP
426#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
427#undef TARGET_ASM_UNALIGNED_SI_OP
428#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
429#undef TARGET_ASM_UNALIGNED_DI_OP
430#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
431#undef TARGET_ASM_INTEGER
432#define TARGET_ASM_INTEGER ia64_assemble_integer
433
434#undef TARGET_OPTION_OVERRIDE
435#define TARGET_OPTION_OVERRIDE ia64_option_override
436
437#undef TARGET_ASM_FUNCTION_PROLOGUE
438#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
439#undef TARGET_ASM_FUNCTION_END_PROLOGUE
440#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
441#undef TARGET_ASM_FUNCTION_EPILOGUE
442#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
443
444#undef TARGET_PRINT_OPERAND
445#define TARGET_PRINT_OPERAND ia64_print_operand
446#undef TARGET_PRINT_OPERAND_ADDRESS
447#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
448#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
449#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
450
451#undef TARGET_IN_SMALL_DATA_P
452#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
453
454#undef TARGET_SCHED_ADJUST_COST_2
455#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
456#undef TARGET_SCHED_ISSUE_RATE
457#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
458#undef TARGET_SCHED_VARIABLE_ISSUE
459#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
460#undef TARGET_SCHED_INIT
461#define TARGET_SCHED_INIT ia64_sched_init
462#undef TARGET_SCHED_FINISH
463#define TARGET_SCHED_FINISH ia64_sched_finish
464#undef TARGET_SCHED_INIT_GLOBAL
465#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
466#undef TARGET_SCHED_FINISH_GLOBAL
467#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
468#undef TARGET_SCHED_REORDER
469#define TARGET_SCHED_REORDER ia64_sched_reorder
470#undef TARGET_SCHED_REORDER2
471#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
472
473#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
474#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
475
476#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
477#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
478
479#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
480#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
481#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
482#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
483
484#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
485#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
486  ia64_first_cycle_multipass_dfa_lookahead_guard
487
488#undef TARGET_SCHED_DFA_NEW_CYCLE
489#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
490
491#undef TARGET_SCHED_H_I_D_EXTENDED
492#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
493
494#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
495#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
496
497#undef TARGET_SCHED_INIT_SCHED_CONTEXT
498#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
499
500#undef TARGET_SCHED_SET_SCHED_CONTEXT
501#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
502
503#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
504#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
505
506#undef TARGET_SCHED_FREE_SCHED_CONTEXT
507#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
508
509#undef TARGET_SCHED_SET_SCHED_FLAGS
510#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
511
512#undef TARGET_SCHED_GET_INSN_SPEC_DS
513#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
514
515#undef TARGET_SCHED_GET_INSN_CHECKED_DS
516#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
517
518#undef TARGET_SCHED_SPECULATE_INSN
519#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
520
521#undef TARGET_SCHED_NEEDS_BLOCK_P
522#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
523
524#undef TARGET_SCHED_GEN_SPEC_CHECK
525#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
526
527#undef TARGET_SCHED_SKIP_RTX_P
528#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
529
530#undef TARGET_FUNCTION_OK_FOR_SIBCALL
531#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
532#undef TARGET_ARG_PARTIAL_BYTES
533#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
534#undef TARGET_FUNCTION_ARG
535#define TARGET_FUNCTION_ARG ia64_function_arg
536#undef TARGET_FUNCTION_INCOMING_ARG
537#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
538#undef TARGET_FUNCTION_ARG_ADVANCE
539#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
540#undef TARGET_FUNCTION_ARG_BOUNDARY
541#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
542
543#undef TARGET_ASM_OUTPUT_MI_THUNK
544#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
545#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
546#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
547
548#undef TARGET_ASM_FILE_START
549#define TARGET_ASM_FILE_START ia64_file_start
550
551#undef TARGET_ASM_GLOBALIZE_DECL_NAME
552#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
553
554#undef TARGET_REGISTER_MOVE_COST
555#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
556#undef TARGET_MEMORY_MOVE_COST
557#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
558#undef TARGET_RTX_COSTS
559#define TARGET_RTX_COSTS ia64_rtx_costs
560#undef TARGET_ADDRESS_COST
561#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
562
563#undef TARGET_UNSPEC_MAY_TRAP_P
564#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
565
566#undef TARGET_MACHINE_DEPENDENT_REORG
567#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
568
569#undef TARGET_ENCODE_SECTION_INFO
570#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
571
572#undef  TARGET_SECTION_TYPE_FLAGS
573#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
574
575#ifdef HAVE_AS_TLS
576#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
577#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
578#endif
579
580/* ??? Investigate.  */
581#if 0
582#undef TARGET_PROMOTE_PROTOTYPES
583#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
584#endif
585
586#undef TARGET_FUNCTION_VALUE
587#define TARGET_FUNCTION_VALUE ia64_function_value
588#undef TARGET_LIBCALL_VALUE
589#define TARGET_LIBCALL_VALUE ia64_libcall_value
590#undef TARGET_FUNCTION_VALUE_REGNO_P
591#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
592
593#undef TARGET_STRUCT_VALUE_RTX
594#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
595#undef TARGET_RETURN_IN_MEMORY
596#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
597#undef TARGET_SETUP_INCOMING_VARARGS
598#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
599#undef TARGET_STRICT_ARGUMENT_NAMING
600#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
601#undef TARGET_MUST_PASS_IN_STACK
602#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
603#undef TARGET_GET_RAW_RESULT_MODE
604#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
605#undef TARGET_GET_RAW_ARG_MODE
606#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
607
608#undef TARGET_MEMBER_TYPE_FORCES_BLK
609#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
610
611#undef TARGET_GIMPLIFY_VA_ARG_EXPR
612#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
613
614#undef TARGET_ASM_UNWIND_EMIT
615#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
616#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
617#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
618#undef TARGET_ASM_INIT_SECTIONS
619#define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
620
621#undef TARGET_DEBUG_UNWIND_INFO
622#define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
623
624#undef TARGET_SCALAR_MODE_SUPPORTED_P
625#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
626#undef TARGET_VECTOR_MODE_SUPPORTED_P
627#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
628
629#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
630#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
631  ia64_libgcc_floating_mode_supported_p
632
633/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
634   in an order different from the specified program order.  */
635#undef TARGET_RELAXED_ORDERING
636#define TARGET_RELAXED_ORDERING true
637
638#undef TARGET_LEGITIMATE_CONSTANT_P
639#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
640#undef TARGET_LEGITIMATE_ADDRESS_P
641#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
642
643#undef TARGET_CANNOT_FORCE_CONST_MEM
644#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
645
646#undef TARGET_MANGLE_TYPE
647#define TARGET_MANGLE_TYPE ia64_mangle_type
648
649#undef TARGET_INVALID_CONVERSION
650#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
651#undef TARGET_INVALID_UNARY_OP
652#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
653#undef TARGET_INVALID_BINARY_OP
654#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
655
656#undef TARGET_C_MODE_FOR_SUFFIX
657#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
658
659#undef TARGET_CAN_ELIMINATE
660#define TARGET_CAN_ELIMINATE ia64_can_eliminate
661
662#undef TARGET_TRAMPOLINE_INIT
663#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
664
665#undef TARGET_CAN_USE_DOLOOP_P
666#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
667#undef TARGET_INVALID_WITHIN_DOLOOP
668#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
669
670#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
671#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
672
673#undef TARGET_PREFERRED_RELOAD_CLASS
674#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
675
676#undef TARGET_DELAY_SCHED2
677#define TARGET_DELAY_SCHED2 true
678
679/* Variable tracking should be run after all optimizations which
680   change order of insns.  It also needs a valid CFG.  */
681#undef TARGET_DELAY_VARTRACK
682#define TARGET_DELAY_VARTRACK true
683
684#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
685#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
686
687#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
688#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
689
690struct gcc_target targetm = TARGET_INITIALIZER;
691
692/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
693   identifier as an argument, so the front end shouldn't look it up.  */
694
695static bool
696ia64_attribute_takes_identifier_p (const_tree attr_id)
697{
698  if (is_attribute_p ("model", attr_id))
699    return true;
700#if TARGET_ABI_OPEN_VMS
701  if (is_attribute_p ("common_object", attr_id))
702    return true;
703#endif
704  return false;
705}
706
707typedef enum
708  {
709    ADDR_AREA_NORMAL,	/* normal address area */
710    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
711  }
712ia64_addr_area;
713
714static GTY(()) tree small_ident1;
715static GTY(()) tree small_ident2;
716
717static void
718init_idents (void)
719{
720  if (small_ident1 == 0)
721    {
722      small_ident1 = get_identifier ("small");
723      small_ident2 = get_identifier ("__small__");
724    }
725}
726
727/* Retrieve the address area that has been chosen for the given decl.  */
728
729static ia64_addr_area
730ia64_get_addr_area (tree decl)
731{
732  tree model_attr;
733
734  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
735  if (model_attr)
736    {
737      tree id;
738
739      init_idents ();
740      id = TREE_VALUE (TREE_VALUE (model_attr));
741      if (id == small_ident1 || id == small_ident2)
742	return ADDR_AREA_SMALL;
743    }
744  return ADDR_AREA_NORMAL;
745}
746
747static tree
748ia64_handle_model_attribute (tree *node, tree name, tree args,
749			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
750{
751  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
752  ia64_addr_area area;
753  tree arg, decl = *node;
754
755  init_idents ();
756  arg = TREE_VALUE (args);
757  if (arg == small_ident1 || arg == small_ident2)
758    {
759      addr_area = ADDR_AREA_SMALL;
760    }
761  else
762    {
763      warning (OPT_Wattributes, "invalid argument of %qE attribute",
764	       name);
765      *no_add_attrs = true;
766    }
767
768  switch (TREE_CODE (decl))
769    {
770    case VAR_DECL:
771      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
772	   == FUNCTION_DECL)
773	  && !TREE_STATIC (decl))
774	{
775	  error_at (DECL_SOURCE_LOCATION (decl),
776		    "an address area attribute cannot be specified for "
777		    "local variables");
778	  *no_add_attrs = true;
779	}
780      area = ia64_get_addr_area (decl);
781      if (area != ADDR_AREA_NORMAL && addr_area != area)
782	{
783	  error ("address area of %q+D conflicts with previous "
784		 "declaration", decl);
785	  *no_add_attrs = true;
786	}
787      break;
788
789    case FUNCTION_DECL:
790      error_at (DECL_SOURCE_LOCATION (decl),
791		"address area attribute cannot be specified for "
792		"functions");
793      *no_add_attrs = true;
794      break;
795
796    default:
797      warning (OPT_Wattributes, "%qE attribute ignored",
798	       name);
799      *no_add_attrs = true;
800      break;
801    }
802
803  return NULL_TREE;
804}
805
806/* Part of the low level implementation of DEC Ada pragma Common_Object which
807   enables the shared use of variables stored in overlaid linker areas
808   corresponding to the use of Fortran COMMON.  */
809
810static tree
811ia64_vms_common_object_attribute (tree *node, tree name, tree args,
812				  int flags ATTRIBUTE_UNUSED,
813				  bool *no_add_attrs)
814{
815    tree decl = *node;
816    tree id;
817
818    gcc_assert (DECL_P (decl));
819
820    DECL_COMMON (decl) = 1;
821    id = TREE_VALUE (args);
822    if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
823      {
824	error ("%qE attribute requires a string constant argument", name);
825	*no_add_attrs = true;
826	return NULL_TREE;
827      }
828    return NULL_TREE;
829}
830
831/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
832
833void
834ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
835				     unsigned HOST_WIDE_INT size,
836				     unsigned int align)
837{
838  tree attr = DECL_ATTRIBUTES (decl);
839
840  if (attr)
841    attr = lookup_attribute ("common_object", attr);
842  if (attr)
843    {
844      tree id = TREE_VALUE (TREE_VALUE (attr));
845      const char *name;
846
847      if (TREE_CODE (id) == IDENTIFIER_NODE)
848        name = IDENTIFIER_POINTER (id);
849      else if (TREE_CODE (id) == STRING_CST)
850        name = TREE_STRING_POINTER (id);
851      else
852        abort ();
853
854      fprintf (file, "\t.vms_common\t\"%s\",", name);
855    }
856  else
857    fprintf (file, "%s", COMMON_ASM_OP);
858
859  /*  Code from elfos.h.  */
860  assemble_name (file, name);
861  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
862           size, align / BITS_PER_UNIT);
863
864  fputc ('\n', file);
865}
866
867static void
868ia64_encode_addr_area (tree decl, rtx symbol)
869{
870  int flags;
871
872  flags = SYMBOL_REF_FLAGS (symbol);
873  switch (ia64_get_addr_area (decl))
874    {
875    case ADDR_AREA_NORMAL: break;
876    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
877    default: gcc_unreachable ();
878    }
879  SYMBOL_REF_FLAGS (symbol) = flags;
880}
881
882static void
883ia64_encode_section_info (tree decl, rtx rtl, int first)
884{
885  default_encode_section_info (decl, rtl, first);
886
887  /* Careful not to prod global register variables.  */
888  if (TREE_CODE (decl) == VAR_DECL
889      && GET_CODE (DECL_RTL (decl)) == MEM
890      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
891      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
892    ia64_encode_addr_area (decl, XEXP (rtl, 0));
893}
894
895/* Return 1 if the operands of a move are ok.  */
896
897int
898ia64_move_ok (rtx dst, rtx src)
899{
900  /* If we're under init_recog_no_volatile, we'll not be able to use
901     memory_operand.  So check the code directly and don't worry about
902     the validity of the underlying address, which should have been
903     checked elsewhere anyway.  */
904  if (GET_CODE (dst) != MEM)
905    return 1;
906  if (GET_CODE (src) == MEM)
907    return 0;
908  if (register_operand (src, VOIDmode))
909    return 1;
910
911  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
912  if (INTEGRAL_MODE_P (GET_MODE (dst)))
913    return src == const0_rtx;
914  else
915    return satisfies_constraint_G (src);
916}
917
918/* Return 1 if the operands are ok for a floating point load pair.  */
919
920int
921ia64_load_pair_ok (rtx dst, rtx src)
922{
923  /* ??? There is a thinko in the implementation of the "x" constraint and the
924     FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
925     also return false for it.  */
926  if (GET_CODE (dst) != REG
927      || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
928    return 0;
929  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
930    return 0;
931  switch (GET_CODE (XEXP (src, 0)))
932    {
933    case REG:
934    case POST_INC:
935      break;
936    case POST_DEC:
937      return 0;
938    case POST_MODIFY:
939      {
940	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
941
942	if (GET_CODE (adjust) != CONST_INT
943	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
944	  return 0;
945      }
946      break;
947    default:
948      abort ();
949    }
950  return 1;
951}
952
953int
954addp4_optimize_ok (rtx op1, rtx op2)
955{
956  return (basereg_operand (op1, GET_MODE(op1)) !=
957	  basereg_operand (op2, GET_MODE(op2)));
958}
959
960/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
961   Return the length of the field, or <= 0 on failure.  */
962
963int
964ia64_depz_field_mask (rtx rop, rtx rshift)
965{
966  unsigned HOST_WIDE_INT op = INTVAL (rop);
967  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
968
969  /* Get rid of the zero bits we're shifting in.  */
970  op >>= shift;
971
972  /* We must now have a solid block of 1's at bit 0.  */
973  return exact_log2 (op + 1);
974}
975
976/* Return the TLS model to use for ADDR.  */
977
978static enum tls_model
979tls_symbolic_operand_type (rtx addr)
980{
981  enum tls_model tls_kind = TLS_MODEL_NONE;
982
983  if (GET_CODE (addr) == CONST)
984    {
985      if (GET_CODE (XEXP (addr, 0)) == PLUS
986	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
987        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
988    }
989  else if (GET_CODE (addr) == SYMBOL_REF)
990    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
991
992  return tls_kind;
993}
994
995/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
996   as a base register.  */
997
998static inline bool
999ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
1000{
1001  if (strict
1002      && REGNO_OK_FOR_BASE_P (REGNO (reg)))
1003    return true;
1004  else if (!strict
1005	   && (GENERAL_REGNO_P (REGNO (reg))
1006	       || !HARD_REGISTER_P (reg)))
1007    return true;
1008  else
1009    return false;
1010}
1011
1012static bool
1013ia64_legitimate_address_reg (const_rtx reg, bool strict)
1014{
1015  if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1016      || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1017	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1018    return true;
1019
1020  return false;
1021}
1022
1023static bool
1024ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1025{
1026  if (GET_CODE (disp) == PLUS
1027      && rtx_equal_p (reg, XEXP (disp, 0))
1028      && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1029	  || (CONST_INT_P (XEXP (disp, 1))
1030	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1031    return true;
1032
1033  return false;
1034}
1035
1036/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
1037
1038static bool
1039ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1040			   rtx x, bool strict)
1041{
1042  if (ia64_legitimate_address_reg (x, strict))
1043    return true;
1044  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1045	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1046	   && XEXP (x, 0) != arg_pointer_rtx)
1047    return true;
1048  else if (GET_CODE (x) == POST_MODIFY
1049	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1050	   && XEXP (x, 0) != arg_pointer_rtx
1051	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1052    return true;
1053  else
1054    return false;
1055}
1056
1057/* Return true if X is a constant that is valid for some immediate
1058   field in an instruction.  */
1059
1060static bool
1061ia64_legitimate_constant_p (machine_mode mode, rtx x)
1062{
1063  switch (GET_CODE (x))
1064    {
1065    case CONST_INT:
1066    case LABEL_REF:
1067      return true;
1068
1069    case CONST_DOUBLE:
1070      if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1071	return true;
1072      return satisfies_constraint_G (x);
1073
1074    case CONST:
1075    case SYMBOL_REF:
1076      /* ??? Short term workaround for PR 28490.  We must make the code here
1077	 match the code in ia64_expand_move and move_operand, even though they
1078	 are both technically wrong.  */
1079      if (tls_symbolic_operand_type (x) == 0)
1080	{
1081	  HOST_WIDE_INT addend = 0;
1082	  rtx op = x;
1083
1084	  if (GET_CODE (op) == CONST
1085	      && GET_CODE (XEXP (op, 0)) == PLUS
1086	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1087	    {
1088	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
1089	      op = XEXP (XEXP (op, 0), 0);
1090	    }
1091
1092          if (any_offset_symbol_operand (op, mode)
1093              || function_operand (op, mode))
1094            return true;
1095	  if (aligned_offset_symbol_operand (op, mode))
1096	    return (addend & 0x3fff) == 0;
1097	  return false;
1098	}
1099      return false;
1100
1101    case CONST_VECTOR:
1102      if (mode == V2SFmode)
1103	return satisfies_constraint_Y (x);
1104
1105      return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1106	      && GET_MODE_SIZE (mode) <= 8);
1107
1108    default:
1109      return false;
1110    }
1111}
1112
1113/* Don't allow TLS addresses to get spilled to memory.  */
1114
1115static bool
1116ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1117{
1118  if (mode == RFmode)
1119    return true;
1120  return tls_symbolic_operand_type (x) != 0;
1121}
1122
1123/* Expand a symbolic constant load.  */
1124
1125bool
1126ia64_expand_load_address (rtx dest, rtx src)
1127{
1128  gcc_assert (GET_CODE (dest) == REG);
1129
1130  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1131     having to pointer-extend the value afterward.  Other forms of address
1132     computation below are also more natural to compute as 64-bit quantities.
1133     If we've been given an SImode destination register, change it.  */
1134  if (GET_MODE (dest) != Pmode)
1135    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1136			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
1137
1138  if (TARGET_NO_PIC)
1139    return false;
1140  if (small_addr_symbolic_operand (src, VOIDmode))
1141    return false;
1142
1143  if (TARGET_AUTO_PIC)
1144    emit_insn (gen_load_gprel64 (dest, src));
1145  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1146    emit_insn (gen_load_fptr (dest, src));
1147  else if (sdata_symbolic_operand (src, VOIDmode))
1148    emit_insn (gen_load_gprel (dest, src));
1149  else if (local_symbolic_operand64 (src, VOIDmode))
1150    {
1151      /* We want to use @gprel rather than @ltoff relocations for local
1152	 symbols:
1153	  - @gprel does not require dynamic linker
1154	  - and does not use .sdata section
1155	 https://gcc.gnu.org/bugzilla/60465 */
1156      emit_insn (gen_load_gprel64 (dest, src));
1157    }
1158  else
1159    {
1160      HOST_WIDE_INT addend = 0;
1161      rtx tmp;
1162
1163      /* We did split constant offsets in ia64_expand_move, and we did try
1164	 to keep them split in move_operand, but we also allowed reload to
1165	 rematerialize arbitrary constants rather than spill the value to
1166	 the stack and reload it.  So we have to be prepared here to split
1167	 them apart again.  */
1168      if (GET_CODE (src) == CONST)
1169	{
1170	  HOST_WIDE_INT hi, lo;
1171
1172	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
1173	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1174	  hi = hi - lo;
1175
1176	  if (lo != 0)
1177	    {
1178	      addend = lo;
1179	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1180	    }
1181	}
1182
1183      tmp = gen_rtx_HIGH (Pmode, src);
1184      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1185      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1186
1187      tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1188      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1189
1190      if (addend)
1191	{
1192	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1193	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
1194	}
1195    }
1196
1197  return true;
1198}
1199
1200static GTY(()) rtx gen_tls_tga;
1201static rtx
1202gen_tls_get_addr (void)
1203{
1204  if (!gen_tls_tga)
1205    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1206  return gen_tls_tga;
1207}
1208
1209static GTY(()) rtx thread_pointer_rtx;
1210static rtx
1211gen_thread_pointer (void)
1212{
1213  if (!thread_pointer_rtx)
1214    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1215  return thread_pointer_rtx;
1216}
1217
1218static rtx
1219ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1220			 rtx orig_op1, HOST_WIDE_INT addend)
1221{
1222  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1223  rtx_insn *insns;
1224  rtx orig_op0 = op0;
1225  HOST_WIDE_INT addend_lo, addend_hi;
1226
1227  switch (tls_kind)
1228    {
1229    case TLS_MODEL_GLOBAL_DYNAMIC:
1230      start_sequence ();
1231
1232      tga_op1 = gen_reg_rtx (Pmode);
1233      emit_insn (gen_load_dtpmod (tga_op1, op1));
1234
1235      tga_op2 = gen_reg_rtx (Pmode);
1236      emit_insn (gen_load_dtprel (tga_op2, op1));
1237
1238      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1239					 LCT_CONST, Pmode, 2, tga_op1,
1240					 Pmode, tga_op2, Pmode);
1241
1242      insns = get_insns ();
1243      end_sequence ();
1244
1245      if (GET_MODE (op0) != Pmode)
1246	op0 = tga_ret;
1247      emit_libcall_block (insns, op0, tga_ret, op1);
1248      break;
1249
1250    case TLS_MODEL_LOCAL_DYNAMIC:
1251      /* ??? This isn't the completely proper way to do local-dynamic
1252	 If the call to __tls_get_addr is used only by a single symbol,
1253	 then we should (somehow) move the dtprel to the second arg
1254	 to avoid the extra add.  */
1255      start_sequence ();
1256
1257      tga_op1 = gen_reg_rtx (Pmode);
1258      emit_insn (gen_load_dtpmod (tga_op1, op1));
1259
1260      tga_op2 = const0_rtx;
1261
1262      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1263					 LCT_CONST, Pmode, 2, tga_op1,
1264					 Pmode, tga_op2, Pmode);
1265
1266      insns = get_insns ();
1267      end_sequence ();
1268
1269      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1270				UNSPEC_LD_BASE);
1271      tmp = gen_reg_rtx (Pmode);
1272      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1273
1274      if (!register_operand (op0, Pmode))
1275	op0 = gen_reg_rtx (Pmode);
1276      if (TARGET_TLS64)
1277	{
1278	  emit_insn (gen_load_dtprel (op0, op1));
1279	  emit_insn (gen_adddi3 (op0, tmp, op0));
1280	}
1281      else
1282	emit_insn (gen_add_dtprel (op0, op1, tmp));
1283      break;
1284
1285    case TLS_MODEL_INITIAL_EXEC:
1286      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1287      addend_hi = addend - addend_lo;
1288
1289      op1 = plus_constant (Pmode, op1, addend_hi);
1290      addend = addend_lo;
1291
1292      tmp = gen_reg_rtx (Pmode);
1293      emit_insn (gen_load_tprel (tmp, op1));
1294
1295      if (!register_operand (op0, Pmode))
1296	op0 = gen_reg_rtx (Pmode);
1297      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1298      break;
1299
1300    case TLS_MODEL_LOCAL_EXEC:
1301      if (!register_operand (op0, Pmode))
1302	op0 = gen_reg_rtx (Pmode);
1303
1304      op1 = orig_op1;
1305      addend = 0;
1306      if (TARGET_TLS64)
1307	{
1308	  emit_insn (gen_load_tprel (op0, op1));
1309	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1310	}
1311      else
1312	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1313      break;
1314
1315    default:
1316      gcc_unreachable ();
1317    }
1318
1319  if (addend)
1320    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1321			       orig_op0, 1, OPTAB_DIRECT);
1322  if (orig_op0 == op0)
1323    return NULL_RTX;
1324  if (GET_MODE (orig_op0) == Pmode)
1325    return op0;
1326  return gen_lowpart (GET_MODE (orig_op0), op0);
1327}
1328
1329rtx
1330ia64_expand_move (rtx op0, rtx op1)
1331{
1332  machine_mode mode = GET_MODE (op0);
1333
1334  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1335    op1 = force_reg (mode, op1);
1336
1337  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1338    {
1339      HOST_WIDE_INT addend = 0;
1340      enum tls_model tls_kind;
1341      rtx sym = op1;
1342
1343      if (GET_CODE (op1) == CONST
1344	  && GET_CODE (XEXP (op1, 0)) == PLUS
1345	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1346	{
1347	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1348	  sym = XEXP (XEXP (op1, 0), 0);
1349	}
1350
1351      tls_kind = tls_symbolic_operand_type (sym);
1352      if (tls_kind)
1353	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1354
1355      if (any_offset_symbol_operand (sym, mode))
1356	addend = 0;
1357      else if (aligned_offset_symbol_operand (sym, mode))
1358	{
1359	  HOST_WIDE_INT addend_lo, addend_hi;
1360
1361	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1362	  addend_hi = addend - addend_lo;
1363
1364	  if (addend_lo != 0)
1365	    {
1366	      op1 = plus_constant (mode, sym, addend_hi);
1367	      addend = addend_lo;
1368	    }
1369	  else
1370	    addend = 0;
1371	}
1372      else
1373	op1 = sym;
1374
1375      if (reload_completed)
1376	{
1377	  /* We really should have taken care of this offset earlier.  */
1378	  gcc_assert (addend == 0);
1379	  if (ia64_expand_load_address (op0, op1))
1380	    return NULL_RTX;
1381	}
1382
1383      if (addend)
1384	{
1385	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1386
1387	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1388
1389	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1390				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1391	  if (op0 == op1)
1392	    return NULL_RTX;
1393	}
1394    }
1395
1396  return op1;
1397}
1398
1399/* Split a move from OP1 to OP0 conditional on COND.  */
1400
1401void
1402ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1403{
1404  rtx_insn *insn, *first = get_last_insn ();
1405
1406  emit_move_insn (op0, op1);
1407
1408  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1409    if (INSN_P (insn))
1410      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1411					  PATTERN (insn));
1412}
1413
1414/* Split a post-reload TImode or TFmode reference into two DImode
1415   components.  This is made extra difficult by the fact that we do
1416   not get any scratch registers to work with, because reload cannot
1417   be prevented from giving us a scratch that overlaps the register
1418   pair involved.  So instead, when addressing memory, we tweak the
1419   pointer register up and back down with POST_INCs.  Or up and not
1420   back down when we can get away with it.
1421
1422   REVERSED is true when the loads must be done in reversed order
1423   (high word first) for correctness.  DEAD is true when the pointer
1424   dies with the second insn we generate and therefore the second
1425   address must not carry a postmodify.
1426
1427   May return an insn which is to be emitted after the moves.  */
1428
1429static rtx
1430ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1431{
1432  rtx fixup = 0;
1433
1434  switch (GET_CODE (in))
1435    {
1436    case REG:
1437      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1438      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1439      break;
1440
1441    case CONST_INT:
1442    case CONST_DOUBLE:
1443      /* Cannot occur reversed.  */
1444      gcc_assert (!reversed);
1445
1446      if (GET_MODE (in) != TFmode)
1447	split_double (in, &out[0], &out[1]);
1448      else
1449	/* split_double does not understand how to split a TFmode
1450	   quantity into a pair of DImode constants.  */
1451	{
1452	  REAL_VALUE_TYPE r;
1453	  unsigned HOST_WIDE_INT p[2];
1454	  long l[4];  /* TFmode is 128 bits */
1455
1456	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1457	  real_to_target (l, &r, TFmode);
1458
1459	  if (FLOAT_WORDS_BIG_ENDIAN)
1460	    {
1461	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1462	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1463	    }
1464	  else
1465	    {
1466	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1467	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1468	    }
1469	  out[0] = GEN_INT (p[0]);
1470	  out[1] = GEN_INT (p[1]);
1471	}
1472      break;
1473
1474    case MEM:
1475      {
1476	rtx base = XEXP (in, 0);
1477	rtx offset;
1478
1479	switch (GET_CODE (base))
1480	  {
1481	  case REG:
1482	    if (!reversed)
1483	      {
1484		out[0] = adjust_automodify_address
1485		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1486		out[1] = adjust_automodify_address
1487		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1488	      }
1489	    else
1490	      {
1491		/* Reversal requires a pre-increment, which can only
1492		   be done as a separate insn.  */
1493		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1494		out[0] = adjust_automodify_address
1495		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1496		out[1] = adjust_address (in, DImode, 0);
1497	      }
1498	    break;
1499
1500	  case POST_INC:
1501	    gcc_assert (!reversed && !dead);
1502
1503	    /* Just do the increment in two steps.  */
1504	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1505	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1506	    break;
1507
1508	  case POST_DEC:
1509	    gcc_assert (!reversed && !dead);
1510
1511	    /* Add 8, subtract 24.  */
1512	    base = XEXP (base, 0);
1513	    out[0] = adjust_automodify_address
1514	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1515	    out[1] = adjust_automodify_address
1516	      (in, DImode,
1517	       gen_rtx_POST_MODIFY (Pmode, base,
1518				    plus_constant (Pmode, base, -24)),
1519	       8);
1520	    break;
1521
1522	  case POST_MODIFY:
1523	    gcc_assert (!reversed && !dead);
1524
1525	    /* Extract and adjust the modification.  This case is
1526	       trickier than the others, because we might have an
1527	       index register, or we might have a combined offset that
1528	       doesn't fit a signed 9-bit displacement field.  We can
1529	       assume the incoming expression is already legitimate.  */
1530	    offset = XEXP (base, 1);
1531	    base = XEXP (base, 0);
1532
1533	    out[0] = adjust_automodify_address
1534	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1535
1536	    if (GET_CODE (XEXP (offset, 1)) == REG)
1537	      {
1538		/* Can't adjust the postmodify to match.  Emit the
1539		   original, then a separate addition insn.  */
1540		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1541		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1542	      }
1543	    else
1544	      {
1545		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1546		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1547		  {
1548		    /* Again the postmodify cannot be made to match,
1549		       but in this case it's more efficient to get rid
1550		       of the postmodify entirely and fix up with an
1551		       add insn.  */
1552		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1553		    fixup = gen_adddi3
1554		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1555		  }
1556		else
1557		  {
1558		    /* Combined offset still fits in the displacement field.
1559		       (We cannot overflow it at the high end.)  */
1560		    out[1] = adjust_automodify_address
1561		      (in, DImode, gen_rtx_POST_MODIFY
1562		       (Pmode, base, gen_rtx_PLUS
1563			(Pmode, base,
1564			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1565		       8);
1566		  }
1567	      }
1568	    break;
1569
1570	  default:
1571	    gcc_unreachable ();
1572	  }
1573	break;
1574      }
1575
1576    default:
1577      gcc_unreachable ();
1578    }
1579
1580  return fixup;
1581}
1582
1583/* Split a TImode or TFmode move instruction after reload.
1584   This is used by *movtf_internal and *movti_internal.  */
1585void
1586ia64_split_tmode_move (rtx operands[])
1587{
1588  rtx in[2], out[2], insn;
1589  rtx fixup[2];
1590  bool dead = false;
1591  bool reversed = false;
1592
1593  /* It is possible for reload to decide to overwrite a pointer with
1594     the value it points to.  In that case we have to do the loads in
1595     the appropriate order so that the pointer is not destroyed too
1596     early.  Also we must not generate a postmodify for that second
1597     load, or rws_access_regno will die.  And we must not generate a
1598     postmodify for the second load if the destination register
1599     overlaps with the base register.  */
1600  if (GET_CODE (operands[1]) == MEM
1601      && reg_overlap_mentioned_p (operands[0], operands[1]))
1602    {
1603      rtx base = XEXP (operands[1], 0);
1604      while (GET_CODE (base) != REG)
1605	base = XEXP (base, 0);
1606
1607      if (REGNO (base) == REGNO (operands[0]))
1608	reversed = true;
1609
1610      if (refers_to_regno_p (REGNO (operands[0]),
1611			     REGNO (operands[0])+2,
1612			     base, 0))
1613	dead = true;
1614    }
1615  /* Another reason to do the moves in reversed order is if the first
1616     element of the target register pair is also the second element of
1617     the source register pair.  */
1618  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1619      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1620    reversed = true;
1621
1622  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1623  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1624
1625#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1626  if (GET_CODE (EXP) == MEM						\
1627      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1628	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1629	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1630    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1631
1632  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1633  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1634  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1635
1636  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1637  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1638  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1639
1640  if (fixup[0])
1641    emit_insn (fixup[0]);
1642  if (fixup[1])
1643    emit_insn (fixup[1]);
1644
1645#undef MAYBE_ADD_REG_INC_NOTE
1646}
1647
1648/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1649   through memory plus an extra GR scratch register.  Except that you can
1650   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1651   SECONDARY_RELOAD_CLASS, but not both.
1652
1653   We got into problems in the first place by allowing a construct like
1654   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1655   This solution attempts to prevent this situation from occurring.  When
1656   we see something like the above, we spill the inner register to memory.  */
1657
1658static rtx
1659spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1660{
1661  if (GET_CODE (in) == SUBREG
1662      && GET_MODE (SUBREG_REG (in)) == TImode
1663      && GET_CODE (SUBREG_REG (in)) == REG)
1664    {
1665      rtx memt = assign_stack_temp (TImode, 16);
1666      emit_move_insn (memt, SUBREG_REG (in));
1667      return adjust_address (memt, mode, 0);
1668    }
1669  else if (force && GET_CODE (in) == REG)
1670    {
1671      rtx memx = assign_stack_temp (mode, 16);
1672      emit_move_insn (memx, in);
1673      return memx;
1674    }
1675  else
1676    return in;
1677}
1678
1679/* Expand the movxf or movrf pattern (MODE says which) with the given
1680   OPERANDS, returning true if the pattern should then invoke
1681   DONE.  */
1682
1683bool
1684ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1685{
1686  rtx op0 = operands[0];
1687
1688  if (GET_CODE (op0) == SUBREG)
1689    op0 = SUBREG_REG (op0);
1690
1691  /* We must support XFmode loads into general registers for stdarg/vararg,
1692     unprototyped calls, and a rare case where a long double is passed as
1693     an argument after a float HFA fills the FP registers.  We split them into
1694     DImode loads for convenience.  We also need to support XFmode stores
1695     for the last case.  This case does not happen for stdarg/vararg routines,
1696     because we do a block store to memory of unnamed arguments.  */
1697
1698  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1699    {
1700      rtx out[2];
1701
1702      /* We're hoping to transform everything that deals with XFmode
1703	 quantities and GR registers early in the compiler.  */
1704      gcc_assert (can_create_pseudo_p ());
1705
1706      /* Struct to register can just use TImode instead.  */
1707      if ((GET_CODE (operands[1]) == SUBREG
1708	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1709	  || (GET_CODE (operands[1]) == REG
1710	      && GR_REGNO_P (REGNO (operands[1]))))
1711	{
1712	  rtx op1 = operands[1];
1713
1714	  if (GET_CODE (op1) == SUBREG)
1715	    op1 = SUBREG_REG (op1);
1716	  else
1717	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1718
1719	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1720	  return true;
1721	}
1722
1723      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1724	{
1725	  /* Don't word-swap when reading in the constant.  */
1726	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1727			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1728					   0, mode));
1729	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1730			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1731					   0, mode));
1732	  return true;
1733	}
1734
1735      /* If the quantity is in a register not known to be GR, spill it.  */
1736      if (register_operand (operands[1], mode))
1737	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1738
1739      gcc_assert (GET_CODE (operands[1]) == MEM);
1740
1741      /* Don't word-swap when reading in the value.  */
1742      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1743      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1744
1745      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1746      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1747      return true;
1748    }
1749
1750  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1751    {
1752      /* We're hoping to transform everything that deals with XFmode
1753	 quantities and GR registers early in the compiler.  */
1754      gcc_assert (can_create_pseudo_p ());
1755
1756      /* Op0 can't be a GR_REG here, as that case is handled above.
1757	 If op0 is a register, then we spill op1, so that we now have a
1758	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1759	 to force the spill.  */
1760      if (register_operand (operands[0], mode))
1761	{
1762	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1763	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1764	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1765	}
1766
1767      else
1768	{
1769	  rtx in[2];
1770
1771	  gcc_assert (GET_CODE (operands[0]) == MEM);
1772
1773	  /* Don't word-swap when writing out the value.  */
1774	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1775	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1776
1777	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1778	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1779	  return true;
1780	}
1781    }
1782
1783  if (!reload_in_progress && !reload_completed)
1784    {
1785      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1786
1787      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1788	{
1789	  rtx memt, memx, in = operands[1];
1790	  if (CONSTANT_P (in))
1791	    in = validize_mem (force_const_mem (mode, in));
1792	  if (GET_CODE (in) == MEM)
1793	    memt = adjust_address (in, TImode, 0);
1794	  else
1795	    {
1796	      memt = assign_stack_temp (TImode, 16);
1797	      memx = adjust_address (memt, mode, 0);
1798	      emit_move_insn (memx, in);
1799	    }
1800	  emit_move_insn (op0, memt);
1801	  return true;
1802	}
1803
1804      if (!ia64_move_ok (operands[0], operands[1]))
1805	operands[1] = force_reg (mode, operands[1]);
1806    }
1807
1808  return false;
1809}
1810
1811/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1812   with the expression that holds the compare result (in VOIDmode).  */
1813
1814static GTY(()) rtx cmptf_libfunc;
1815
1816void
1817ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1818{
1819  enum rtx_code code = GET_CODE (*expr);
1820  rtx cmp;
1821
1822  /* If we have a BImode input, then we already have a compare result, and
1823     do not need to emit another comparison.  */
1824  if (GET_MODE (*op0) == BImode)
1825    {
1826      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1827      cmp = *op0;
1828    }
1829  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1830     magic number as its third argument, that indicates what to do.
1831     The return value is an integer to be compared against zero.  */
1832  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1833    {
1834      enum qfcmp_magic {
1835	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
1836	QCMP_UNORD = 2,
1837	QCMP_EQ = 4,
1838	QCMP_LT = 8,
1839	QCMP_GT = 16
1840      };
1841      int magic;
1842      enum rtx_code ncode;
1843      rtx ret, insns;
1844
1845      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1846      switch (code)
1847	{
1848	  /* 1 = equal, 0 = not equal.  Equality operators do
1849	     not raise FP_INVALID when given a NaN operand.  */
1850	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1851	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1852	  /* isunordered() from C99.  */
1853	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1854	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1855	  /* Relational operators raise FP_INVALID when given
1856	     a NaN operand.  */
1857	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1858	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1859	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1860	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1861          /* Unordered relational operators do not raise FP_INVALID
1862	     when given a NaN operand.  */
1863	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
1864	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1865	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
1866	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1867	  /* Not supported.  */
1868	case UNEQ:
1869	case LTGT:
1870	default: gcc_unreachable ();
1871	}
1872
1873      start_sequence ();
1874
1875      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1876				     *op0, TFmode, *op1, TFmode,
1877				     GEN_INT (magic), DImode);
1878      cmp = gen_reg_rtx (BImode);
1879      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1880			      gen_rtx_fmt_ee (ncode, BImode,
1881					      ret, const0_rtx)));
1882
1883      insns = get_insns ();
1884      end_sequence ();
1885
1886      emit_libcall_block (insns, cmp, cmp,
1887			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1888      code = NE;
1889    }
1890  else
1891    {
1892      cmp = gen_reg_rtx (BImode);
1893      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1894			      gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1895      code = NE;
1896    }
1897
1898  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1899  *op0 = cmp;
1900  *op1 = const0_rtx;
1901}
1902
1903/* Generate an integral vector comparison.  Return true if the condition has
1904   been reversed, and so the sense of the comparison should be inverted.  */
1905
1906static bool
1907ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1908			    rtx dest, rtx op0, rtx op1)
1909{
1910  bool negate = false;
1911  rtx x;
1912
1913  /* Canonicalize the comparison to EQ, GT, GTU.  */
1914  switch (code)
1915    {
1916    case EQ:
1917    case GT:
1918    case GTU:
1919      break;
1920
1921    case NE:
1922    case LE:
1923    case LEU:
1924      code = reverse_condition (code);
1925      negate = true;
1926      break;
1927
1928    case GE:
1929    case GEU:
1930      code = reverse_condition (code);
1931      negate = true;
1932      /* FALLTHRU */
1933
1934    case LT:
1935    case LTU:
1936      code = swap_condition (code);
1937      x = op0, op0 = op1, op1 = x;
1938      break;
1939
1940    default:
1941      gcc_unreachable ();
1942    }
1943
1944  /* Unsigned parallel compare is not supported by the hardware.  Play some
1945     tricks to turn this into a signed comparison against 0.  */
1946  if (code == GTU)
1947    {
1948      switch (mode)
1949	{
1950	case V2SImode:
1951	  {
1952	    rtx t1, t2, mask;
1953
1954	    /* Subtract (-(INT MAX) - 1) from both operands to make
1955	       them signed.  */
1956	    mask = gen_int_mode (0x80000000, SImode);
1957	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1958	    mask = force_reg (mode, mask);
1959	    t1 = gen_reg_rtx (mode);
1960	    emit_insn (gen_subv2si3 (t1, op0, mask));
1961	    t2 = gen_reg_rtx (mode);
1962	    emit_insn (gen_subv2si3 (t2, op1, mask));
1963	    op0 = t1;
1964	    op1 = t2;
1965	    code = GT;
1966	  }
1967	  break;
1968
1969	case V8QImode:
1970	case V4HImode:
1971	  /* Perform a parallel unsigned saturating subtraction.  */
1972	  x = gen_reg_rtx (mode);
1973	  emit_insn (gen_rtx_SET (VOIDmode, x,
1974				  gen_rtx_US_MINUS (mode, op0, op1)));
1975
1976	  code = EQ;
1977	  op0 = x;
1978	  op1 = CONST0_RTX (mode);
1979	  negate = !negate;
1980	  break;
1981
1982	default:
1983	  gcc_unreachable ();
1984	}
1985    }
1986
1987  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1988  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1989
1990  return negate;
1991}
1992
1993/* Emit an integral vector conditional move.  */
1994
1995void
1996ia64_expand_vecint_cmov (rtx operands[])
1997{
1998  machine_mode mode = GET_MODE (operands[0]);
1999  enum rtx_code code = GET_CODE (operands[3]);
2000  bool negate;
2001  rtx cmp, x, ot, of;
2002
2003  cmp = gen_reg_rtx (mode);
2004  negate = ia64_expand_vecint_compare (code, mode, cmp,
2005				       operands[4], operands[5]);
2006
2007  ot = operands[1+negate];
2008  of = operands[2-negate];
2009
2010  if (ot == CONST0_RTX (mode))
2011    {
2012      if (of == CONST0_RTX (mode))
2013	{
2014	  emit_move_insn (operands[0], ot);
2015	  return;
2016	}
2017
2018      x = gen_rtx_NOT (mode, cmp);
2019      x = gen_rtx_AND (mode, x, of);
2020      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2021    }
2022  else if (of == CONST0_RTX (mode))
2023    {
2024      x = gen_rtx_AND (mode, cmp, ot);
2025      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2026    }
2027  else
2028    {
2029      rtx t, f;
2030
2031      t = gen_reg_rtx (mode);
2032      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2033      emit_insn (gen_rtx_SET (VOIDmode, t, x));
2034
2035      f = gen_reg_rtx (mode);
2036      x = gen_rtx_NOT (mode, cmp);
2037      x = gen_rtx_AND (mode, x, operands[2-negate]);
2038      emit_insn (gen_rtx_SET (VOIDmode, f, x));
2039
2040      x = gen_rtx_IOR (mode, t, f);
2041      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
2042    }
2043}
2044
2045/* Emit an integral vector min or max operation.  Return true if all done.  */
2046
2047bool
2048ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2049			   rtx operands[])
2050{
2051  rtx xops[6];
2052
2053  /* These four combinations are supported directly.  */
2054  if (mode == V8QImode && (code == UMIN || code == UMAX))
2055    return false;
2056  if (mode == V4HImode && (code == SMIN || code == SMAX))
2057    return false;
2058
2059  /* This combination can be implemented with only saturating subtraction.  */
2060  if (mode == V4HImode && code == UMAX)
2061    {
2062      rtx x, tmp = gen_reg_rtx (mode);
2063
2064      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2065      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
2066
2067      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2068      return true;
2069    }
2070
2071  /* Everything else implemented via vector comparisons.  */
2072  xops[0] = operands[0];
2073  xops[4] = xops[1] = operands[1];
2074  xops[5] = xops[2] = operands[2];
2075
2076  switch (code)
2077    {
2078    case UMIN:
2079      code = LTU;
2080      break;
2081    case UMAX:
2082      code = GTU;
2083      break;
2084    case SMIN:
2085      code = LT;
2086      break;
2087    case SMAX:
2088      code = GT;
2089      break;
2090    default:
2091      gcc_unreachable ();
2092    }
2093  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2094
2095  ia64_expand_vecint_cmov (xops);
2096  return true;
2097}
2098
2099/* The vectors LO and HI each contain N halves of a double-wide vector.
2100   Reassemble either the first N/2 or the second N/2 elements.  */
2101
2102void
2103ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2104{
2105  machine_mode vmode = GET_MODE (lo);
2106  unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2107  struct expand_vec_perm_d d;
2108  bool ok;
2109
2110  d.target = gen_lowpart (vmode, out);
2111  d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2112  d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2113  d.vmode = vmode;
2114  d.nelt = nelt;
2115  d.one_operand_p = false;
2116  d.testing_p = false;
2117
2118  high = (highp ? nelt / 2 : 0);
2119  for (i = 0; i < nelt / 2; ++i)
2120    {
2121      d.perm[i * 2] = i + high;
2122      d.perm[i * 2 + 1] = i + high + nelt;
2123    }
2124
2125  ok = ia64_expand_vec_perm_const_1 (&d);
2126  gcc_assert (ok);
2127}
2128
2129/* Return a vector of the sign-extension of VEC.  */
2130
2131static rtx
2132ia64_unpack_sign (rtx vec, bool unsignedp)
2133{
2134  machine_mode mode = GET_MODE (vec);
2135  rtx zero = CONST0_RTX (mode);
2136
2137  if (unsignedp)
2138    return zero;
2139  else
2140    {
2141      rtx sign = gen_reg_rtx (mode);
2142      bool neg;
2143
2144      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2145      gcc_assert (!neg);
2146
2147      return sign;
2148    }
2149}
2150
2151/* Emit an integral vector unpack operation.  */
2152
2153void
2154ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2155{
2156  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2157  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2158}
2159
2160/* Emit an integral vector widening sum operations.  */
2161
2162void
2163ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2164{
2165  machine_mode wmode;
2166  rtx l, h, t, sign;
2167
2168  sign = ia64_unpack_sign (operands[1], unsignedp);
2169
2170  wmode = GET_MODE (operands[0]);
2171  l = gen_reg_rtx (wmode);
2172  h = gen_reg_rtx (wmode);
2173
2174  ia64_unpack_assemble (l, operands[1], sign, false);
2175  ia64_unpack_assemble (h, operands[1], sign, true);
2176
2177  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2178  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2179  if (t != operands[0])
2180    emit_move_insn (operands[0], t);
2181}
2182
2183/* Emit the appropriate sequence for a call.  */
2184
2185void
2186ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2187		  int sibcall_p)
2188{
2189  rtx insn, b0;
2190
2191  addr = XEXP (addr, 0);
2192  addr = convert_memory_address (DImode, addr);
2193  b0 = gen_rtx_REG (DImode, R_BR (0));
2194
2195  /* ??? Should do this for functions known to bind local too.  */
2196  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2197    {
2198      if (sibcall_p)
2199	insn = gen_sibcall_nogp (addr);
2200      else if (! retval)
2201	insn = gen_call_nogp (addr, b0);
2202      else
2203	insn = gen_call_value_nogp (retval, addr, b0);
2204      insn = emit_call_insn (insn);
2205    }
2206  else
2207    {
2208      if (sibcall_p)
2209	insn = gen_sibcall_gp (addr);
2210      else if (! retval)
2211	insn = gen_call_gp (addr, b0);
2212      else
2213	insn = gen_call_value_gp (retval, addr, b0);
2214      insn = emit_call_insn (insn);
2215
2216      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2217    }
2218
2219  if (sibcall_p)
2220    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2221
2222  if (TARGET_ABI_OPEN_VMS)
2223    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2224	     gen_rtx_REG (DImode, GR_REG (25)));
2225}
2226
2227static void
2228reg_emitted (enum ia64_frame_regs r)
2229{
2230  if (emitted_frame_related_regs[r] == 0)
2231    emitted_frame_related_regs[r] = current_frame_info.r[r];
2232  else
2233    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2234}
2235
2236static int
2237get_reg (enum ia64_frame_regs r)
2238{
2239  reg_emitted (r);
2240  return current_frame_info.r[r];
2241}
2242
2243static bool
2244is_emitted (int regno)
2245{
2246  unsigned int r;
2247
2248  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2249    if (emitted_frame_related_regs[r] == regno)
2250      return true;
2251  return false;
2252}
2253
2254void
2255ia64_reload_gp (void)
2256{
2257  rtx tmp;
2258
2259  if (current_frame_info.r[reg_save_gp])
2260    {
2261      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2262    }
2263  else
2264    {
2265      HOST_WIDE_INT offset;
2266      rtx offset_r;
2267
2268      offset = (current_frame_info.spill_cfa_off
2269	        + current_frame_info.spill_size);
2270      if (frame_pointer_needed)
2271        {
2272          tmp = hard_frame_pointer_rtx;
2273          offset = -offset;
2274        }
2275      else
2276        {
2277          tmp = stack_pointer_rtx;
2278          offset = current_frame_info.total_size - offset;
2279        }
2280
2281      offset_r = GEN_INT (offset);
2282      if (satisfies_constraint_I (offset_r))
2283        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2284      else
2285        {
2286          emit_move_insn (pic_offset_table_rtx, offset_r);
2287          emit_insn (gen_adddi3 (pic_offset_table_rtx,
2288			         pic_offset_table_rtx, tmp));
2289        }
2290
2291      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2292    }
2293
2294  emit_move_insn (pic_offset_table_rtx, tmp);
2295}
2296
2297void
2298ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2299		 rtx scratch_b, int noreturn_p, int sibcall_p)
2300{
2301  rtx insn;
2302  bool is_desc = false;
2303
2304  /* If we find we're calling through a register, then we're actually
2305     calling through a descriptor, so load up the values.  */
2306  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2307    {
2308      rtx tmp;
2309      bool addr_dead_p;
2310
2311      /* ??? We are currently constrained to *not* use peep2, because
2312	 we can legitimately change the global lifetime of the GP
2313	 (in the form of killing where previously live).  This is
2314	 because a call through a descriptor doesn't use the previous
2315	 value of the GP, while a direct call does, and we do not
2316	 commit to either form until the split here.
2317
2318	 That said, this means that we lack precise life info for
2319	 whether ADDR is dead after this call.  This is not terribly
2320	 important, since we can fix things up essentially for free
2321	 with the POST_DEC below, but it's nice to not use it when we
2322	 can immediately tell it's not necessary.  */
2323      addr_dead_p = ((noreturn_p || sibcall_p
2324		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2325					    REGNO (addr)))
2326		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2327
2328      /* Load the code address into scratch_b.  */
2329      tmp = gen_rtx_POST_INC (Pmode, addr);
2330      tmp = gen_rtx_MEM (Pmode, tmp);
2331      emit_move_insn (scratch_r, tmp);
2332      emit_move_insn (scratch_b, scratch_r);
2333
2334      /* Load the GP address.  If ADDR is not dead here, then we must
2335	 revert the change made above via the POST_INCREMENT.  */
2336      if (!addr_dead_p)
2337	tmp = gen_rtx_POST_DEC (Pmode, addr);
2338      else
2339	tmp = addr;
2340      tmp = gen_rtx_MEM (Pmode, tmp);
2341      emit_move_insn (pic_offset_table_rtx, tmp);
2342
2343      is_desc = true;
2344      addr = scratch_b;
2345    }
2346
2347  if (sibcall_p)
2348    insn = gen_sibcall_nogp (addr);
2349  else if (retval)
2350    insn = gen_call_value_nogp (retval, addr, retaddr);
2351  else
2352    insn = gen_call_nogp (addr, retaddr);
2353  emit_call_insn (insn);
2354
2355  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2356    ia64_reload_gp ();
2357}
2358
2359/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2360
2361   This differs from the generic code in that we know about the zero-extending
2362   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2363   also know that ld.acq+cmpxchg.rel equals a full barrier.
2364
2365   The loop we want to generate looks like
2366
2367	cmp_reg = mem;
2368      label:
2369        old_reg = cmp_reg;
2370	new_reg = cmp_reg op val;
2371	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2372	if (cmp_reg != old_reg)
2373	  goto label;
2374
2375   Note that we only do the plain load from memory once.  Subsequent
2376   iterations use the value loaded by the compare-and-swap pattern.  */
2377
2378void
2379ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2380		       rtx old_dst, rtx new_dst, enum memmodel model)
2381{
2382  machine_mode mode = GET_MODE (mem);
2383  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2384  enum insn_code icode;
2385
2386  /* Special case for using fetchadd.  */
2387  if ((mode == SImode || mode == DImode)
2388      && (code == PLUS || code == MINUS)
2389      && fetchadd_operand (val, mode))
2390    {
2391      if (code == MINUS)
2392	val = GEN_INT (-INTVAL (val));
2393
2394      if (!old_dst)
2395        old_dst = gen_reg_rtx (mode);
2396
2397      switch (model)
2398	{
2399	case MEMMODEL_ACQ_REL:
2400	case MEMMODEL_SEQ_CST:
2401	case MEMMODEL_SYNC_SEQ_CST:
2402	  emit_insn (gen_memory_barrier ());
2403	  /* FALLTHRU */
2404	case MEMMODEL_RELAXED:
2405	case MEMMODEL_ACQUIRE:
2406	case MEMMODEL_SYNC_ACQUIRE:
2407	case MEMMODEL_CONSUME:
2408	  if (mode == SImode)
2409	    icode = CODE_FOR_fetchadd_acq_si;
2410	  else
2411	    icode = CODE_FOR_fetchadd_acq_di;
2412	  break;
2413	case MEMMODEL_RELEASE:
2414	case MEMMODEL_SYNC_RELEASE:
2415	  if (mode == SImode)
2416	    icode = CODE_FOR_fetchadd_rel_si;
2417	  else
2418	    icode = CODE_FOR_fetchadd_rel_di;
2419	  break;
2420
2421	default:
2422	  gcc_unreachable ();
2423	}
2424
2425      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2426
2427      if (new_dst)
2428	{
2429	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2430					 true, OPTAB_WIDEN);
2431	  if (new_reg != new_dst)
2432	    emit_move_insn (new_dst, new_reg);
2433	}
2434      return;
2435    }
2436
2437  /* Because of the volatile mem read, we get an ld.acq, which is the
2438     front half of the full barrier.  The end half is the cmpxchg.rel.
2439     For relaxed and release memory models, we don't need this.  But we
2440     also don't bother trying to prevent it either.  */
2441  gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2442	      || MEM_VOLATILE_P (mem));
2443
2444  old_reg = gen_reg_rtx (DImode);
2445  cmp_reg = gen_reg_rtx (DImode);
2446  label = gen_label_rtx ();
2447
2448  if (mode != DImode)
2449    {
2450      val = simplify_gen_subreg (DImode, val, mode, 0);
2451      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2452    }
2453  else
2454    emit_move_insn (cmp_reg, mem);
2455
2456  emit_label (label);
2457
2458  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2459  emit_move_insn (old_reg, cmp_reg);
2460  emit_move_insn (ar_ccv, cmp_reg);
2461
2462  if (old_dst)
2463    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2464
2465  new_reg = cmp_reg;
2466  if (code == NOT)
2467    {
2468      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2469				     true, OPTAB_DIRECT);
2470      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2471    }
2472  else
2473    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2474				   true, OPTAB_DIRECT);
2475
2476  if (mode != DImode)
2477    new_reg = gen_lowpart (mode, new_reg);
2478  if (new_dst)
2479    emit_move_insn (new_dst, new_reg);
2480
2481  switch (model)
2482    {
2483    case MEMMODEL_RELAXED:
2484    case MEMMODEL_ACQUIRE:
2485    case MEMMODEL_SYNC_ACQUIRE:
2486    case MEMMODEL_CONSUME:
2487      switch (mode)
2488	{
2489	case QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2490	case HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2491	case SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2492	case DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2493	default:
2494	  gcc_unreachable ();
2495	}
2496      break;
2497
2498    case MEMMODEL_RELEASE:
2499    case MEMMODEL_SYNC_RELEASE:
2500    case MEMMODEL_ACQ_REL:
2501    case MEMMODEL_SEQ_CST:
2502    case MEMMODEL_SYNC_SEQ_CST:
2503      switch (mode)
2504	{
2505	case QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2506	case HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2507	case SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2508	case DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2509	default:
2510	  gcc_unreachable ();
2511	}
2512      break;
2513
2514    default:
2515      gcc_unreachable ();
2516    }
2517
2518  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2519
2520  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2521}
2522
2523/* Begin the assembly file.  */
2524
2525static void
2526ia64_file_start (void)
2527{
2528  default_file_start ();
2529  emit_safe_across_calls ();
2530}
2531
2532void
2533emit_safe_across_calls (void)
2534{
2535  unsigned int rs, re;
2536  int out_state;
2537
2538  rs = 1;
2539  out_state = 0;
2540  while (1)
2541    {
2542      while (rs < 64 && call_used_regs[PR_REG (rs)])
2543	rs++;
2544      if (rs >= 64)
2545	break;
2546      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2547	continue;
2548      if (out_state == 0)
2549	{
2550	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2551	  out_state = 1;
2552	}
2553      else
2554	fputc (',', asm_out_file);
2555      if (re == rs + 1)
2556	fprintf (asm_out_file, "p%u", rs);
2557      else
2558	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2559      rs = re + 1;
2560    }
2561  if (out_state)
2562    fputc ('\n', asm_out_file);
2563}
2564
2565/* Globalize a declaration.  */
2566
2567static void
2568ia64_globalize_decl_name (FILE * stream, tree decl)
2569{
2570  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2571  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2572  if (version_attr)
2573    {
2574      tree v = TREE_VALUE (TREE_VALUE (version_attr));
2575      const char *p = TREE_STRING_POINTER (v);
2576      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2577    }
2578  targetm.asm_out.globalize_label (stream, name);
2579  if (TREE_CODE (decl) == FUNCTION_DECL)
2580    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2581}
2582
2583/* Helper function for ia64_compute_frame_size: find an appropriate general
2584   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2585   bits in GR0 to GR31 that have already been allocated by this routine.
2586   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2587
2588static int
2589find_gr_spill (enum ia64_frame_regs r, int try_locals)
2590{
2591  int regno;
2592
2593  if (emitted_frame_related_regs[r] != 0)
2594    {
2595      regno = emitted_frame_related_regs[r];
2596      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2597	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2598        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2599      else if (crtl->is_leaf
2600               && regno >= GR_REG (1) && regno <= GR_REG (31))
2601        current_frame_info.gr_used_mask |= 1 << regno;
2602
2603      return regno;
2604    }
2605
2606  /* If this is a leaf function, first try an otherwise unused
2607     call-clobbered register.  */
2608  if (crtl->is_leaf)
2609    {
2610      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2611	if (! df_regs_ever_live_p (regno)
2612	    && call_used_regs[regno]
2613	    && ! fixed_regs[regno]
2614	    && ! global_regs[regno]
2615	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2616            && ! is_emitted (regno))
2617	  {
2618	    current_frame_info.gr_used_mask |= 1 << regno;
2619	    return regno;
2620	  }
2621    }
2622
2623  if (try_locals)
2624    {
2625      regno = current_frame_info.n_local_regs;
2626      /* If there is a frame pointer, then we can't use loc79, because
2627	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2628	 reg_name switching code in ia64_expand_prologue.  */
2629      while (regno < (80 - frame_pointer_needed))
2630	if (! is_emitted (LOC_REG (regno++)))
2631	  {
2632	    current_frame_info.n_local_regs = regno;
2633	    return LOC_REG (regno - 1);
2634	  }
2635    }
2636
2637  /* Failed to find a general register to spill to.  Must use stack.  */
2638  return 0;
2639}
2640
2641/* In order to make for nice schedules, we try to allocate every temporary
2642   to a different register.  We must of course stay away from call-saved,
2643   fixed, and global registers.  We must also stay away from registers
2644   allocated in current_frame_info.gr_used_mask, since those include regs
2645   used all through the prologue.
2646
2647   Any register allocated here must be used immediately.  The idea is to
2648   aid scheduling, not to solve data flow problems.  */
2649
2650static int last_scratch_gr_reg;
2651
2652static int
2653next_scratch_gr_reg (void)
2654{
2655  int i, regno;
2656
2657  for (i = 0; i < 32; ++i)
2658    {
2659      regno = (last_scratch_gr_reg + i + 1) & 31;
2660      if (call_used_regs[regno]
2661	  && ! fixed_regs[regno]
2662	  && ! global_regs[regno]
2663	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2664	{
2665	  last_scratch_gr_reg = regno;
2666	  return regno;
2667	}
2668    }
2669
2670  /* There must be _something_ available.  */
2671  gcc_unreachable ();
2672}
2673
2674/* Helper function for ia64_compute_frame_size, called through
2675   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2676
2677static void
2678mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2679{
2680  unsigned int regno = REGNO (reg);
2681  if (regno < 32)
2682    {
2683      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2684      for (i = 0; i < n; ++i)
2685	current_frame_info.gr_used_mask |= 1 << (regno + i);
2686    }
2687}
2688
2689
2690/* Returns the number of bytes offset between the frame pointer and the stack
2691   pointer for the current function.  SIZE is the number of bytes of space
2692   needed for local variables.  */
2693
2694static void
2695ia64_compute_frame_size (HOST_WIDE_INT size)
2696{
2697  HOST_WIDE_INT total_size;
2698  HOST_WIDE_INT spill_size = 0;
2699  HOST_WIDE_INT extra_spill_size = 0;
2700  HOST_WIDE_INT pretend_args_size;
2701  HARD_REG_SET mask;
2702  int n_spilled = 0;
2703  int spilled_gr_p = 0;
2704  int spilled_fr_p = 0;
2705  unsigned int regno;
2706  int min_regno;
2707  int max_regno;
2708  int i;
2709
2710  if (current_frame_info.initialized)
2711    return;
2712
2713  memset (&current_frame_info, 0, sizeof current_frame_info);
2714  CLEAR_HARD_REG_SET (mask);
2715
2716  /* Don't allocate scratches to the return register.  */
2717  diddle_return_value (mark_reg_gr_used_mask, NULL);
2718
2719  /* Don't allocate scratches to the EH scratch registers.  */
2720  if (cfun->machine->ia64_eh_epilogue_sp)
2721    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2722  if (cfun->machine->ia64_eh_epilogue_bsp)
2723    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2724
2725  /* Static stack checking uses r2 and r3.  */
2726  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
2727    current_frame_info.gr_used_mask |= 0xc;
2728
2729  /* Find the size of the register stack frame.  We have only 80 local
2730     registers, because we reserve 8 for the inputs and 8 for the
2731     outputs.  */
2732
2733  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2734     since we'll be adjusting that down later.  */
2735  regno = LOC_REG (78) + ! frame_pointer_needed;
2736  for (; regno >= LOC_REG (0); regno--)
2737    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2738      break;
2739  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2740
2741  /* For functions marked with the syscall_linkage attribute, we must mark
2742     all eight input registers as in use, so that locals aren't visible to
2743     the caller.  */
2744
2745  if (cfun->machine->n_varargs > 0
2746      || lookup_attribute ("syscall_linkage",
2747			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2748    current_frame_info.n_input_regs = 8;
2749  else
2750    {
2751      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2752	if (df_regs_ever_live_p (regno))
2753	  break;
2754      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2755    }
2756
2757  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2758    if (df_regs_ever_live_p (regno))
2759      break;
2760  i = regno - OUT_REG (0) + 1;
2761
2762#ifndef PROFILE_HOOK
2763  /* When -p profiling, we need one output register for the mcount argument.
2764     Likewise for -a profiling for the bb_init_func argument.  For -ax
2765     profiling, we need two output registers for the two bb_init_trace_func
2766     arguments.  */
2767  if (crtl->profile)
2768    i = MAX (i, 1);
2769#endif
2770  current_frame_info.n_output_regs = i;
2771
2772  /* ??? No rotating register support yet.  */
2773  current_frame_info.n_rotate_regs = 0;
2774
2775  /* Discover which registers need spilling, and how much room that
2776     will take.  Begin with floating point and general registers,
2777     which will always wind up on the stack.  */
2778
2779  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2780    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2781      {
2782	SET_HARD_REG_BIT (mask, regno);
2783	spill_size += 16;
2784	n_spilled += 1;
2785	spilled_fr_p = 1;
2786      }
2787
2788  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2789    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2790      {
2791	SET_HARD_REG_BIT (mask, regno);
2792	spill_size += 8;
2793	n_spilled += 1;
2794	spilled_gr_p = 1;
2795      }
2796
2797  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2798    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2799      {
2800	SET_HARD_REG_BIT (mask, regno);
2801	spill_size += 8;
2802	n_spilled += 1;
2803      }
2804
2805  /* Now come all special registers that might get saved in other
2806     general registers.  */
2807
2808  if (frame_pointer_needed)
2809    {
2810      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2811      /* If we did not get a register, then we take LOC79.  This is guaranteed
2812	 to be free, even if regs_ever_live is already set, because this is
2813	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2814	 as we don't count loc79 above.  */
2815      if (current_frame_info.r[reg_fp] == 0)
2816	{
2817	  current_frame_info.r[reg_fp] = LOC_REG (79);
2818	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2819	}
2820    }
2821
2822  if (! crtl->is_leaf)
2823    {
2824      /* Emit a save of BR0 if we call other functions.  Do this even
2825	 if this function doesn't return, as EH depends on this to be
2826	 able to unwind the stack.  */
2827      SET_HARD_REG_BIT (mask, BR_REG (0));
2828
2829      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2830      if (current_frame_info.r[reg_save_b0] == 0)
2831	{
2832	  extra_spill_size += 8;
2833	  n_spilled += 1;
2834	}
2835
2836      /* Similarly for ar.pfs.  */
2837      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2838      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2839      if (current_frame_info.r[reg_save_ar_pfs] == 0)
2840	{
2841	  extra_spill_size += 8;
2842	  n_spilled += 1;
2843	}
2844
2845      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2846	 registers are clobbered, so we fall back to the stack.  */
2847      current_frame_info.r[reg_save_gp]
2848	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2849      if (current_frame_info.r[reg_save_gp] == 0)
2850	{
2851	  SET_HARD_REG_BIT (mask, GR_REG (1));
2852	  spill_size += 8;
2853	  n_spilled += 1;
2854	}
2855    }
2856  else
2857    {
2858      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2859	{
2860	  SET_HARD_REG_BIT (mask, BR_REG (0));
2861	  extra_spill_size += 8;
2862	  n_spilled += 1;
2863	}
2864
2865      if (df_regs_ever_live_p (AR_PFS_REGNUM))
2866	{
2867	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2868 	  current_frame_info.r[reg_save_ar_pfs]
2869            = find_gr_spill (reg_save_ar_pfs, 1);
2870	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2871	    {
2872	      extra_spill_size += 8;
2873	      n_spilled += 1;
2874	    }
2875	}
2876    }
2877
2878  /* Unwind descriptor hackery: things are most efficient if we allocate
2879     consecutive GR save registers for RP, PFS, FP in that order. However,
2880     it is absolutely critical that FP get the only hard register that's
2881     guaranteed to be free, so we allocated it first.  If all three did
2882     happen to be allocated hard regs, and are consecutive, rearrange them
2883     into the preferred order now.
2884
2885     If we have already emitted code for any of those registers,
2886     then it's already too late to change.  */
2887  min_regno = MIN (current_frame_info.r[reg_fp],
2888		   MIN (current_frame_info.r[reg_save_b0],
2889			current_frame_info.r[reg_save_ar_pfs]));
2890  max_regno = MAX (current_frame_info.r[reg_fp],
2891		   MAX (current_frame_info.r[reg_save_b0],
2892			current_frame_info.r[reg_save_ar_pfs]));
2893  if (min_regno > 0
2894      && min_regno + 2 == max_regno
2895      && (current_frame_info.r[reg_fp] == min_regno + 1
2896	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2897	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2898      && (emitted_frame_related_regs[reg_save_b0] == 0
2899	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2900      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2901	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2902      && (emitted_frame_related_regs[reg_fp] == 0
2903	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2904    {
2905      current_frame_info.r[reg_save_b0] = min_regno;
2906      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2907      current_frame_info.r[reg_fp] = min_regno + 2;
2908    }
2909
2910  /* See if we need to store the predicate register block.  */
2911  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2912    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2913      break;
2914  if (regno <= PR_REG (63))
2915    {
2916      SET_HARD_REG_BIT (mask, PR_REG (0));
2917      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2918      if (current_frame_info.r[reg_save_pr] == 0)
2919	{
2920	  extra_spill_size += 8;
2921	  n_spilled += 1;
2922	}
2923
2924      /* ??? Mark them all as used so that register renaming and such
2925	 are free to use them.  */
2926      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2927	df_set_regs_ever_live (regno, true);
2928    }
2929
2930  /* If we're forced to use st8.spill, we're forced to save and restore
2931     ar.unat as well.  The check for existing liveness allows inline asm
2932     to touch ar.unat.  */
2933  if (spilled_gr_p || cfun->machine->n_varargs
2934      || df_regs_ever_live_p (AR_UNAT_REGNUM))
2935    {
2936      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2937      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2938      current_frame_info.r[reg_save_ar_unat]
2939        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2940      if (current_frame_info.r[reg_save_ar_unat] == 0)
2941	{
2942	  extra_spill_size += 8;
2943	  n_spilled += 1;
2944	}
2945    }
2946
2947  if (df_regs_ever_live_p (AR_LC_REGNUM))
2948    {
2949      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2950      current_frame_info.r[reg_save_ar_lc]
2951        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2952      if (current_frame_info.r[reg_save_ar_lc] == 0)
2953	{
2954	  extra_spill_size += 8;
2955	  n_spilled += 1;
2956	}
2957    }
2958
2959  /* If we have an odd number of words of pretend arguments written to
2960     the stack, then the FR save area will be unaligned.  We round the
2961     size of this area up to keep things 16 byte aligned.  */
2962  if (spilled_fr_p)
2963    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2964  else
2965    pretend_args_size = crtl->args.pretend_args_size;
2966
2967  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2968		+ crtl->outgoing_args_size);
2969  total_size = IA64_STACK_ALIGN (total_size);
2970
2971  /* We always use the 16-byte scratch area provided by the caller, but
2972     if we are a leaf function, there's no one to which we need to provide
2973     a scratch area.  However, if the function allocates dynamic stack space,
2974     the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2975     so we need to cope.  */
2976  if (crtl->is_leaf && !cfun->calls_alloca)
2977    total_size = MAX (0, total_size - 16);
2978
2979  current_frame_info.total_size = total_size;
2980  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2981  current_frame_info.spill_size = spill_size;
2982  current_frame_info.extra_spill_size = extra_spill_size;
2983  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2984  current_frame_info.n_spilled = n_spilled;
2985  current_frame_info.initialized = reload_completed;
2986}
2987
2988/* Worker function for TARGET_CAN_ELIMINATE.  */
2989
2990bool
2991ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2992{
2993  return (to == BR_REG (0) ? crtl->is_leaf : true);
2994}
2995
2996/* Compute the initial difference between the specified pair of registers.  */
2997
2998HOST_WIDE_INT
2999ia64_initial_elimination_offset (int from, int to)
3000{
3001  HOST_WIDE_INT offset;
3002
3003  ia64_compute_frame_size (get_frame_size ());
3004  switch (from)
3005    {
3006    case FRAME_POINTER_REGNUM:
3007      switch (to)
3008	{
3009	case HARD_FRAME_POINTER_REGNUM:
3010	  offset = -current_frame_info.total_size;
3011	  if (!crtl->is_leaf || cfun->calls_alloca)
3012	    offset += 16 + crtl->outgoing_args_size;
3013	  break;
3014
3015	case STACK_POINTER_REGNUM:
3016	  offset = 0;
3017	  if (!crtl->is_leaf || cfun->calls_alloca)
3018	    offset += 16 + crtl->outgoing_args_size;
3019	  break;
3020
3021	default:
3022	  gcc_unreachable ();
3023	}
3024      break;
3025
3026    case ARG_POINTER_REGNUM:
3027      /* Arguments start above the 16 byte save area, unless stdarg
3028	 in which case we store through the 16 byte save area.  */
3029      switch (to)
3030	{
3031	case HARD_FRAME_POINTER_REGNUM:
3032	  offset = 16 - crtl->args.pretend_args_size;
3033	  break;
3034
3035	case STACK_POINTER_REGNUM:
3036	  offset = (current_frame_info.total_size
3037		    + 16 - crtl->args.pretend_args_size);
3038	  break;
3039
3040	default:
3041	  gcc_unreachable ();
3042	}
3043      break;
3044
3045    default:
3046      gcc_unreachable ();
3047    }
3048
3049  return offset;
3050}
3051
3052/* If there are more than a trivial number of register spills, we use
3053   two interleaved iterators so that we can get two memory references
3054   per insn group.
3055
3056   In order to simplify things in the prologue and epilogue expanders,
3057   we use helper functions to fix up the memory references after the
3058   fact with the appropriate offsets to a POST_MODIFY memory mode.
3059   The following data structure tracks the state of the two iterators
3060   while insns are being emitted.  */
3061
3062struct spill_fill_data
3063{
3064  rtx_insn *init_after;		/* point at which to emit initializations */
3065  rtx init_reg[2];		/* initial base register */
3066  rtx iter_reg[2];		/* the iterator registers */
3067  rtx *prev_addr[2];		/* address of last memory use */
3068  rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
3069  HOST_WIDE_INT prev_off[2];	/* last offset */
3070  int n_iter;			/* number of iterators in use */
3071  int next_iter;		/* next iterator to use */
3072  unsigned int save_gr_used_mask;
3073};
3074
3075static struct spill_fill_data spill_fill_data;
3076
3077static void
3078setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3079{
3080  int i;
3081
3082  spill_fill_data.init_after = get_last_insn ();
3083  spill_fill_data.init_reg[0] = init_reg;
3084  spill_fill_data.init_reg[1] = init_reg;
3085  spill_fill_data.prev_addr[0] = NULL;
3086  spill_fill_data.prev_addr[1] = NULL;
3087  spill_fill_data.prev_insn[0] = NULL;
3088  spill_fill_data.prev_insn[1] = NULL;
3089  spill_fill_data.prev_off[0] = cfa_off;
3090  spill_fill_data.prev_off[1] = cfa_off;
3091  spill_fill_data.next_iter = 0;
3092  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3093
3094  spill_fill_data.n_iter = 1 + (n_spills > 2);
3095  for (i = 0; i < spill_fill_data.n_iter; ++i)
3096    {
3097      int regno = next_scratch_gr_reg ();
3098      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3099      current_frame_info.gr_used_mask |= 1 << regno;
3100    }
3101}
3102
3103static void
3104finish_spill_pointers (void)
3105{
3106  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3107}
3108
3109static rtx
3110spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3111{
3112  int iter = spill_fill_data.next_iter;
3113  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3114  rtx disp_rtx = GEN_INT (disp);
3115  rtx mem;
3116
3117  if (spill_fill_data.prev_addr[iter])
3118    {
3119      if (satisfies_constraint_N (disp_rtx))
3120	{
3121	  *spill_fill_data.prev_addr[iter]
3122	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3123				   gen_rtx_PLUS (DImode,
3124						 spill_fill_data.iter_reg[iter],
3125						 disp_rtx));
3126	  add_reg_note (spill_fill_data.prev_insn[iter],
3127			REG_INC, spill_fill_data.iter_reg[iter]);
3128	}
3129      else
3130	{
3131	  /* ??? Could use register post_modify for loads.  */
3132	  if (!satisfies_constraint_I (disp_rtx))
3133	    {
3134	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3135	      emit_move_insn (tmp, disp_rtx);
3136	      disp_rtx = tmp;
3137	    }
3138	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3139				 spill_fill_data.iter_reg[iter], disp_rtx));
3140	}
3141    }
3142  /* Micro-optimization: if we've created a frame pointer, it's at
3143     CFA 0, which may allow the real iterator to be initialized lower,
3144     slightly increasing parallelism.  Also, if there are few saves
3145     it may eliminate the iterator entirely.  */
3146  else if (disp == 0
3147	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3148	   && frame_pointer_needed)
3149    {
3150      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3151      set_mem_alias_set (mem, get_varargs_alias_set ());
3152      return mem;
3153    }
3154  else
3155    {
3156      rtx seq;
3157      rtx_insn *insn;
3158
3159      if (disp == 0)
3160	seq = gen_movdi (spill_fill_data.iter_reg[iter],
3161			 spill_fill_data.init_reg[iter]);
3162      else
3163	{
3164	  start_sequence ();
3165
3166	  if (!satisfies_constraint_I (disp_rtx))
3167	    {
3168	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3169	      emit_move_insn (tmp, disp_rtx);
3170	      disp_rtx = tmp;
3171	    }
3172
3173	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3174				 spill_fill_data.init_reg[iter],
3175				 disp_rtx));
3176
3177	  seq = get_insns ();
3178	  end_sequence ();
3179	}
3180
3181      /* Careful for being the first insn in a sequence.  */
3182      if (spill_fill_data.init_after)
3183	insn = emit_insn_after (seq, spill_fill_data.init_after);
3184      else
3185	{
3186	  rtx_insn *first = get_insns ();
3187	  if (first)
3188	    insn = emit_insn_before (seq, first);
3189	  else
3190	    insn = emit_insn (seq);
3191	}
3192      spill_fill_data.init_after = insn;
3193    }
3194
3195  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3196
3197  /* ??? Not all of the spills are for varargs, but some of them are.
3198     The rest of the spills belong in an alias set of their own.  But
3199     it doesn't actually hurt to include them here.  */
3200  set_mem_alias_set (mem, get_varargs_alias_set ());
3201
3202  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3203  spill_fill_data.prev_off[iter] = cfa_off;
3204
3205  if (++iter >= spill_fill_data.n_iter)
3206    iter = 0;
3207  spill_fill_data.next_iter = iter;
3208
3209  return mem;
3210}
3211
3212static void
3213do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3214	  rtx frame_reg)
3215{
3216  int iter = spill_fill_data.next_iter;
3217  rtx mem;
3218  rtx_insn *insn;
3219
3220  mem = spill_restore_mem (reg, cfa_off);
3221  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3222  spill_fill_data.prev_insn[iter] = insn;
3223
3224  if (frame_reg)
3225    {
3226      rtx base;
3227      HOST_WIDE_INT off;
3228
3229      RTX_FRAME_RELATED_P (insn) = 1;
3230
3231      /* Don't even pretend that the unwind code can intuit its way
3232	 through a pair of interleaved post_modify iterators.  Just
3233	 provide the correct answer.  */
3234
3235      if (frame_pointer_needed)
3236	{
3237	  base = hard_frame_pointer_rtx;
3238	  off = - cfa_off;
3239	}
3240      else
3241	{
3242	  base = stack_pointer_rtx;
3243	  off = current_frame_info.total_size - cfa_off;
3244	}
3245
3246      add_reg_note (insn, REG_CFA_OFFSET,
3247		    gen_rtx_SET (VOIDmode,
3248				 gen_rtx_MEM (GET_MODE (reg),
3249					      plus_constant (Pmode,
3250							     base, off)),
3251				 frame_reg));
3252    }
3253}
3254
3255static void
3256do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3257{
3258  int iter = spill_fill_data.next_iter;
3259  rtx_insn *insn;
3260
3261  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3262				GEN_INT (cfa_off)));
3263  spill_fill_data.prev_insn[iter] = insn;
3264}
3265
3266/* Wrapper functions that discards the CONST_INT spill offset.  These
3267   exist so that we can give gr_spill/gr_fill the offset they need and
3268   use a consistent function interface.  */
3269
3270static rtx
3271gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3272{
3273  return gen_movdi (dest, src);
3274}
3275
3276static rtx
3277gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3278{
3279  return gen_fr_spill (dest, src);
3280}
3281
3282static rtx
3283gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3284{
3285  return gen_fr_restore (dest, src);
3286}
3287
3288#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3289
3290/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
3291#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3292
3293/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3294   inclusive.  These are offsets from the current stack pointer.  BS_SIZE
3295   is the size of the backing store.  ??? This clobbers r2 and r3.  */
3296
3297static void
3298ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3299			     int bs_size)
3300{
3301  rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3302  rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3303  rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3304
3305  /* On the IA-64 there is a second stack in memory, namely the Backing Store
3306     of the Register Stack Engine.  We also need to probe it after checking
3307     that the 2 stacks don't overlap.  */
3308  emit_insn (gen_bsp_value (r3));
3309  emit_move_insn (r2, GEN_INT (-(first + size)));
3310
3311  /* Compare current value of BSP and SP registers.  */
3312  emit_insn (gen_rtx_SET (VOIDmode, p6,
3313			  gen_rtx_fmt_ee (LTU, BImode,
3314					  r3, stack_pointer_rtx)));
3315
3316  /* Compute the address of the probe for the Backing Store (which grows
3317     towards higher addresses).  We probe only at the first offset of
3318     the next page because some OS (eg Linux/ia64) only extend the
3319     backing store when this specific address is hit (but generate a SEGV
3320     on other address).  Page size is the worst case (4KB).  The reserve
3321     size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3322     Also compute the address of the last probe for the memory stack
3323     (which grows towards lower addresses).  */
3324  emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
3325  emit_insn (gen_rtx_SET (VOIDmode, r2,
3326			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3327
3328  /* Compare them and raise SEGV if the former has topped the latter.  */
3329  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3330				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3331				gen_rtx_SET (VOIDmode, p6,
3332					     gen_rtx_fmt_ee (GEU, BImode,
3333							     r3, r2))));
3334  emit_insn (gen_rtx_SET (VOIDmode,
3335			  gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3336						const0_rtx),
3337			  const0_rtx));
3338  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3339				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3340				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3341						 GEN_INT (11))));
3342
3343  /* Probe the Backing Store if necessary.  */
3344  if (bs_size > 0)
3345    emit_stack_probe (r3);
3346
3347  /* Probe the memory stack if necessary.  */
3348  if (size == 0)
3349    ;
3350
3351  /* See if we have a constant small number of probes to generate.  If so,
3352     that's the easy case.  */
3353  else if (size <= PROBE_INTERVAL)
3354    emit_stack_probe (r2);
3355
3356  /* The run-time loop is made up of 8 insns in the generic case while this
3357     compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
3358  else if (size <= 4 * PROBE_INTERVAL)
3359    {
3360      HOST_WIDE_INT i;
3361
3362      emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3363      emit_insn (gen_rtx_SET (VOIDmode, r2,
3364			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3365      emit_stack_probe (r2);
3366
3367      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3368	 it exceeds SIZE.  If only two probes are needed, this will not
3369	 generate any code.  Then probe at FIRST + SIZE.  */
3370      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3371	{
3372	  emit_insn (gen_rtx_SET (VOIDmode, r2,
3373				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3374	  emit_stack_probe (r2);
3375	}
3376
3377      emit_insn (gen_rtx_SET (VOIDmode, r2,
3378			      plus_constant (Pmode, r2,
3379					     (i - PROBE_INTERVAL) - size)));
3380      emit_stack_probe (r2);
3381    }
3382
3383  /* Otherwise, do the same as above, but in a loop.  Note that we must be
3384     extra careful with variables wrapping around because we might be at
3385     the very top (or the very bottom) of the address space and we have
3386     to be able to handle this case properly; in particular, we use an
3387     equality test for the loop condition.  */
3388  else
3389    {
3390      HOST_WIDE_INT rounded_size;
3391
3392      emit_move_insn (r2, GEN_INT (-first));
3393
3394
3395      /* Step 1: round SIZE to the previous multiple of the interval.  */
3396
3397      rounded_size = size & -PROBE_INTERVAL;
3398
3399
3400      /* Step 2: compute initial and final value of the loop counter.  */
3401
3402      /* TEST_ADDR = SP + FIRST.  */
3403      emit_insn (gen_rtx_SET (VOIDmode, r2,
3404			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3405
3406      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
3407      if (rounded_size > (1 << 21))
3408	{
3409	  emit_move_insn (r3, GEN_INT (-rounded_size));
3410	  emit_insn (gen_rtx_SET (VOIDmode, r3, gen_rtx_PLUS (Pmode, r2, r3)));
3411	}
3412      else
3413        emit_insn (gen_rtx_SET (VOIDmode, r3,
3414				gen_rtx_PLUS (Pmode, r2,
3415					      GEN_INT (-rounded_size))));
3416
3417
3418      /* Step 3: the loop
3419
3420	 while (TEST_ADDR != LAST_ADDR)
3421	   {
3422	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3423	     probe at TEST_ADDR
3424	   }
3425
3426	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3427	 until it is equal to ROUNDED_SIZE.  */
3428
3429      emit_insn (gen_probe_stack_range (r2, r2, r3));
3430
3431
3432      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3433	 that SIZE is equal to ROUNDED_SIZE.  */
3434
3435      /* TEMP = SIZE - ROUNDED_SIZE.  */
3436      if (size != rounded_size)
3437	{
3438	  emit_insn (gen_rtx_SET (VOIDmode, r2,
3439				  plus_constant (Pmode, r2,
3440						 rounded_size - size)));
3441	  emit_stack_probe (r2);
3442	}
3443    }
3444
3445  /* Make sure nothing is scheduled before we are done.  */
3446  emit_insn (gen_blockage ());
3447}
3448
3449/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
3450   absolute addresses.  */
3451
3452const char *
3453output_probe_stack_range (rtx reg1, rtx reg2)
3454{
3455  static int labelno = 0;
3456  char loop_lab[32], end_lab[32];
3457  rtx xops[3];
3458
3459  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
3460  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
3461
3462  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3463
3464  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
3465  xops[0] = reg1;
3466  xops[1] = reg2;
3467  xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3468  output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3469  fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]);
3470  assemble_name_raw (asm_out_file, end_lab);
3471  fputc ('\n', asm_out_file);
3472
3473  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
3474  xops[1] = GEN_INT (-PROBE_INTERVAL);
3475  output_asm_insn ("addl %0 = %1, %0", xops);
3476  fputs ("\t;;\n", asm_out_file);
3477
3478  /* Probe at TEST_ADDR and branch.  */
3479  output_asm_insn ("probe.w.fault %0, 0", xops);
3480  fprintf (asm_out_file, "\tbr ");
3481  assemble_name_raw (asm_out_file, loop_lab);
3482  fputc ('\n', asm_out_file);
3483
3484  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
3485
3486  return "";
3487}
3488
3489/* Called after register allocation to add any instructions needed for the
3490   prologue.  Using a prologue insn is favored compared to putting all of the
3491   instructions in output_function_prologue(), since it allows the scheduler
3492   to intermix instructions with the saves of the caller saved registers.  In
3493   some cases, it might be necessary to emit a barrier instruction as the last
3494   insn to prevent such scheduling.
3495
3496   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3497   so that the debug info generation code can handle them properly.
3498
3499   The register save area is laid out like so:
3500   cfa+16
3501	[ varargs spill area ]
3502	[ fr register spill area ]
3503	[ br register spill area ]
3504	[ ar register spill area ]
3505	[ pr register spill area ]
3506	[ gr register spill area ] */
3507
3508/* ??? Get inefficient code when the frame size is larger than can fit in an
3509   adds instruction.  */
3510
3511void
3512ia64_expand_prologue (void)
3513{
3514  rtx_insn *insn;
3515  rtx ar_pfs_save_reg, ar_unat_save_reg;
3516  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3517  rtx reg, alt_reg;
3518
3519  ia64_compute_frame_size (get_frame_size ());
3520  last_scratch_gr_reg = 15;
3521
3522  if (flag_stack_usage_info)
3523    current_function_static_stack_size = current_frame_info.total_size;
3524
3525  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
3526    {
3527      HOST_WIDE_INT size = current_frame_info.total_size;
3528      int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3529					  + current_frame_info.n_local_regs);
3530
3531      if (crtl->is_leaf && !cfun->calls_alloca)
3532	{
3533	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
3534	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
3535					 size - STACK_CHECK_PROTECT,
3536					 bs_size);
3537	  else if (size + bs_size > STACK_CHECK_PROTECT)
3538	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
3539	}
3540      else if (size + bs_size > 0)
3541	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
3542    }
3543
3544  if (dump_file)
3545    {
3546      fprintf (dump_file, "ia64 frame related registers "
3547               "recorded in current_frame_info.r[]:\n");
3548#define PRINTREG(a) if (current_frame_info.r[a]) \
3549        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3550      PRINTREG(reg_fp);
3551      PRINTREG(reg_save_b0);
3552      PRINTREG(reg_save_pr);
3553      PRINTREG(reg_save_ar_pfs);
3554      PRINTREG(reg_save_ar_unat);
3555      PRINTREG(reg_save_ar_lc);
3556      PRINTREG(reg_save_gp);
3557#undef PRINTREG
3558    }
3559
3560  /* If there is no epilogue, then we don't need some prologue insns.
3561     We need to avoid emitting the dead prologue insns, because flow
3562     will complain about them.  */
3563  if (optimize)
3564    {
3565      edge e;
3566      edge_iterator ei;
3567
3568      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3569	if ((e->flags & EDGE_FAKE) == 0
3570	    && (e->flags & EDGE_FALLTHRU) != 0)
3571	  break;
3572      epilogue_p = (e != NULL);
3573    }
3574  else
3575    epilogue_p = 1;
3576
3577  /* Set the local, input, and output register names.  We need to do this
3578     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3579     half.  If we use in/loc/out register names, then we get assembler errors
3580     in crtn.S because there is no alloc insn or regstk directive in there.  */
3581  if (! TARGET_REG_NAMES)
3582    {
3583      int inputs = current_frame_info.n_input_regs;
3584      int locals = current_frame_info.n_local_regs;
3585      int outputs = current_frame_info.n_output_regs;
3586
3587      for (i = 0; i < inputs; i++)
3588	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3589      for (i = 0; i < locals; i++)
3590	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3591      for (i = 0; i < outputs; i++)
3592	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3593    }
3594
3595  /* Set the frame pointer register name.  The regnum is logically loc79,
3596     but of course we'll not have allocated that many locals.  Rather than
3597     worrying about renumbering the existing rtxs, we adjust the name.  */
3598  /* ??? This code means that we can never use one local register when
3599     there is a frame pointer.  loc79 gets wasted in this case, as it is
3600     renamed to a register that will never be used.  See also the try_locals
3601     code in find_gr_spill.  */
3602  if (current_frame_info.r[reg_fp])
3603    {
3604      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3605      reg_names[HARD_FRAME_POINTER_REGNUM]
3606	= reg_names[current_frame_info.r[reg_fp]];
3607      reg_names[current_frame_info.r[reg_fp]] = tmp;
3608    }
3609
3610  /* We don't need an alloc instruction if we've used no outputs or locals.  */
3611  if (current_frame_info.n_local_regs == 0
3612      && current_frame_info.n_output_regs == 0
3613      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3614      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3615    {
3616      /* If there is no alloc, but there are input registers used, then we
3617	 need a .regstk directive.  */
3618      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3619      ar_pfs_save_reg = NULL_RTX;
3620    }
3621  else
3622    {
3623      current_frame_info.need_regstk = 0;
3624
3625      if (current_frame_info.r[reg_save_ar_pfs])
3626        {
3627	  regno = current_frame_info.r[reg_save_ar_pfs];
3628	  reg_emitted (reg_save_ar_pfs);
3629	}
3630      else
3631	regno = next_scratch_gr_reg ();
3632      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3633
3634      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3635				   GEN_INT (current_frame_info.n_input_regs),
3636				   GEN_INT (current_frame_info.n_local_regs),
3637				   GEN_INT (current_frame_info.n_output_regs),
3638				   GEN_INT (current_frame_info.n_rotate_regs)));
3639      if (current_frame_info.r[reg_save_ar_pfs])
3640	{
3641	  RTX_FRAME_RELATED_P (insn) = 1;
3642	  add_reg_note (insn, REG_CFA_REGISTER,
3643			gen_rtx_SET (VOIDmode,
3644				     ar_pfs_save_reg,
3645				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3646	}
3647    }
3648
3649  /* Set up frame pointer, stack pointer, and spill iterators.  */
3650
3651  n_varargs = cfun->machine->n_varargs;
3652  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3653			stack_pointer_rtx, 0);
3654
3655  if (frame_pointer_needed)
3656    {
3657      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3658      RTX_FRAME_RELATED_P (insn) = 1;
3659
3660      /* Force the unwind info to recognize this as defining a new CFA,
3661	 rather than some temp register setup.  */
3662      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3663    }
3664
3665  if (current_frame_info.total_size != 0)
3666    {
3667      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3668      rtx offset;
3669
3670      if (satisfies_constraint_I (frame_size_rtx))
3671	offset = frame_size_rtx;
3672      else
3673	{
3674	  regno = next_scratch_gr_reg ();
3675	  offset = gen_rtx_REG (DImode, regno);
3676	  emit_move_insn (offset, frame_size_rtx);
3677	}
3678
3679      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3680				    stack_pointer_rtx, offset));
3681
3682      if (! frame_pointer_needed)
3683	{
3684	  RTX_FRAME_RELATED_P (insn) = 1;
3685	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3686			gen_rtx_SET (VOIDmode,
3687				     stack_pointer_rtx,
3688				     gen_rtx_PLUS (DImode,
3689						   stack_pointer_rtx,
3690						   frame_size_rtx)));
3691	}
3692
3693      /* ??? At this point we must generate a magic insn that appears to
3694	 modify the stack pointer, the frame pointer, and all spill
3695	 iterators.  This would allow the most scheduling freedom.  For
3696	 now, just hard stop.  */
3697      emit_insn (gen_blockage ());
3698    }
3699
3700  /* Must copy out ar.unat before doing any integer spills.  */
3701  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3702    {
3703      if (current_frame_info.r[reg_save_ar_unat])
3704        {
3705	  ar_unat_save_reg
3706	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3707	  reg_emitted (reg_save_ar_unat);
3708	}
3709      else
3710	{
3711	  alt_regno = next_scratch_gr_reg ();
3712	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3713	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3714	}
3715
3716      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3717      insn = emit_move_insn (ar_unat_save_reg, reg);
3718      if (current_frame_info.r[reg_save_ar_unat])
3719	{
3720	  RTX_FRAME_RELATED_P (insn) = 1;
3721	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3722	}
3723
3724      /* Even if we're not going to generate an epilogue, we still
3725	 need to save the register so that EH works.  */
3726      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3727	emit_insn (gen_prologue_use (ar_unat_save_reg));
3728    }
3729  else
3730    ar_unat_save_reg = NULL_RTX;
3731
3732  /* Spill all varargs registers.  Do this before spilling any GR registers,
3733     since we want the UNAT bits for the GR registers to override the UNAT
3734     bits from varargs, which we don't care about.  */
3735
3736  cfa_off = -16;
3737  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3738    {
3739      reg = gen_rtx_REG (DImode, regno);
3740      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3741    }
3742
3743  /* Locate the bottom of the register save area.  */
3744  cfa_off = (current_frame_info.spill_cfa_off
3745	     + current_frame_info.spill_size
3746	     + current_frame_info.extra_spill_size);
3747
3748  /* Save the predicate register block either in a register or in memory.  */
3749  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3750    {
3751      reg = gen_rtx_REG (DImode, PR_REG (0));
3752      if (current_frame_info.r[reg_save_pr] != 0)
3753	{
3754	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3755	  reg_emitted (reg_save_pr);
3756	  insn = emit_move_insn (alt_reg, reg);
3757
3758	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3759	     64 hard registers.  */
3760	  RTX_FRAME_RELATED_P (insn) = 1;
3761	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3762
3763	  /* Even if we're not going to generate an epilogue, we still
3764	     need to save the register so that EH works.  */
3765	  if (! epilogue_p)
3766	    emit_insn (gen_prologue_use (alt_reg));
3767	}
3768      else
3769	{
3770	  alt_regno = next_scratch_gr_reg ();
3771	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3772	  insn = emit_move_insn (alt_reg, reg);
3773	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3774	  cfa_off -= 8;
3775	}
3776    }
3777
3778  /* Handle AR regs in numerical order.  All of them get special handling.  */
3779  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3780      && current_frame_info.r[reg_save_ar_unat] == 0)
3781    {
3782      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3783      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3784      cfa_off -= 8;
3785    }
3786
3787  /* The alloc insn already copied ar.pfs into a general register.  The
3788     only thing we have to do now is copy that register to a stack slot
3789     if we'd not allocated a local register for the job.  */
3790  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3791      && current_frame_info.r[reg_save_ar_pfs] == 0)
3792    {
3793      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3794      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3795      cfa_off -= 8;
3796    }
3797
3798  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3799    {
3800      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3801      if (current_frame_info.r[reg_save_ar_lc] != 0)
3802	{
3803	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3804	  reg_emitted (reg_save_ar_lc);
3805	  insn = emit_move_insn (alt_reg, reg);
3806	  RTX_FRAME_RELATED_P (insn) = 1;
3807	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3808
3809	  /* Even if we're not going to generate an epilogue, we still
3810	     need to save the register so that EH works.  */
3811	  if (! epilogue_p)
3812	    emit_insn (gen_prologue_use (alt_reg));
3813	}
3814      else
3815	{
3816	  alt_regno = next_scratch_gr_reg ();
3817	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3818	  emit_move_insn (alt_reg, reg);
3819	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3820	  cfa_off -= 8;
3821	}
3822    }
3823
3824  /* Save the return pointer.  */
3825  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3826    {
3827      reg = gen_rtx_REG (DImode, BR_REG (0));
3828      if (current_frame_info.r[reg_save_b0] != 0)
3829	{
3830          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3831          reg_emitted (reg_save_b0);
3832	  insn = emit_move_insn (alt_reg, reg);
3833	  RTX_FRAME_RELATED_P (insn) = 1;
3834	  add_reg_note (insn, REG_CFA_REGISTER,
3835			gen_rtx_SET (VOIDmode, alt_reg, pc_rtx));
3836
3837	  /* Even if we're not going to generate an epilogue, we still
3838	     need to save the register so that EH works.  */
3839	  if (! epilogue_p)
3840	    emit_insn (gen_prologue_use (alt_reg));
3841	}
3842      else
3843	{
3844	  alt_regno = next_scratch_gr_reg ();
3845	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3846	  emit_move_insn (alt_reg, reg);
3847	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3848	  cfa_off -= 8;
3849	}
3850    }
3851
3852  if (current_frame_info.r[reg_save_gp])
3853    {
3854      reg_emitted (reg_save_gp);
3855      insn = emit_move_insn (gen_rtx_REG (DImode,
3856					  current_frame_info.r[reg_save_gp]),
3857			     pic_offset_table_rtx);
3858    }
3859
3860  /* We should now be at the base of the gr/br/fr spill area.  */
3861  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3862			  + current_frame_info.spill_size));
3863
3864  /* Spill all general registers.  */
3865  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3866    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3867      {
3868	reg = gen_rtx_REG (DImode, regno);
3869	do_spill (gen_gr_spill, reg, cfa_off, reg);
3870	cfa_off -= 8;
3871      }
3872
3873  /* Spill the rest of the BR registers.  */
3874  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3875    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3876      {
3877	alt_regno = next_scratch_gr_reg ();
3878	alt_reg = gen_rtx_REG (DImode, alt_regno);
3879	reg = gen_rtx_REG (DImode, regno);
3880	emit_move_insn (alt_reg, reg);
3881	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3882	cfa_off -= 8;
3883      }
3884
3885  /* Align the frame and spill all FR registers.  */
3886  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3887    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3888      {
3889        gcc_assert (!(cfa_off & 15));
3890	reg = gen_rtx_REG (XFmode, regno);
3891	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3892	cfa_off -= 16;
3893      }
3894
3895  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3896
3897  finish_spill_pointers ();
3898}
3899
3900/* Output the textual info surrounding the prologue.  */
3901
3902void
3903ia64_start_function (FILE *file, const char *fnname,
3904		     tree decl ATTRIBUTE_UNUSED)
3905{
3906#if TARGET_ABI_OPEN_VMS
3907  vms_start_function (fnname);
3908#endif
3909
3910  fputs ("\t.proc ", file);
3911  assemble_name (file, fnname);
3912  fputc ('\n', file);
3913  ASM_OUTPUT_LABEL (file, fnname);
3914}
3915
3916/* Called after register allocation to add any instructions needed for the
3917   epilogue.  Using an epilogue insn is favored compared to putting all of the
3918   instructions in output_function_prologue(), since it allows the scheduler
3919   to intermix instructions with the saves of the caller saved registers.  In
3920   some cases, it might be necessary to emit a barrier instruction as the last
3921   insn to prevent such scheduling.  */
3922
3923void
3924ia64_expand_epilogue (int sibcall_p)
3925{
3926  rtx_insn *insn;
3927  rtx reg, alt_reg, ar_unat_save_reg;
3928  int regno, alt_regno, cfa_off;
3929
3930  ia64_compute_frame_size (get_frame_size ());
3931
3932  /* If there is a frame pointer, then we use it instead of the stack
3933     pointer, so that the stack pointer does not need to be valid when
3934     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3935  if (frame_pointer_needed)
3936    setup_spill_pointers (current_frame_info.n_spilled,
3937			  hard_frame_pointer_rtx, 0);
3938  else
3939    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3940			  current_frame_info.total_size);
3941
3942  if (current_frame_info.total_size != 0)
3943    {
3944      /* ??? At this point we must generate a magic insn that appears to
3945         modify the spill iterators and the frame pointer.  This would
3946	 allow the most scheduling freedom.  For now, just hard stop.  */
3947      emit_insn (gen_blockage ());
3948    }
3949
3950  /* Locate the bottom of the register save area.  */
3951  cfa_off = (current_frame_info.spill_cfa_off
3952	     + current_frame_info.spill_size
3953	     + current_frame_info.extra_spill_size);
3954
3955  /* Restore the predicate registers.  */
3956  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3957    {
3958      if (current_frame_info.r[reg_save_pr] != 0)
3959        {
3960	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3961	  reg_emitted (reg_save_pr);
3962	}
3963      else
3964	{
3965	  alt_regno = next_scratch_gr_reg ();
3966	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3967	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3968	  cfa_off -= 8;
3969	}
3970      reg = gen_rtx_REG (DImode, PR_REG (0));
3971      emit_move_insn (reg, alt_reg);
3972    }
3973
3974  /* Restore the application registers.  */
3975
3976  /* Load the saved unat from the stack, but do not restore it until
3977     after the GRs have been restored.  */
3978  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3979    {
3980      if (current_frame_info.r[reg_save_ar_unat] != 0)
3981        {
3982          ar_unat_save_reg
3983	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3984	  reg_emitted (reg_save_ar_unat);
3985	}
3986      else
3987	{
3988	  alt_regno = next_scratch_gr_reg ();
3989	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3990	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3991	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3992	  cfa_off -= 8;
3993	}
3994    }
3995  else
3996    ar_unat_save_reg = NULL_RTX;
3997
3998  if (current_frame_info.r[reg_save_ar_pfs] != 0)
3999    {
4000      reg_emitted (reg_save_ar_pfs);
4001      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
4002      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
4003      emit_move_insn (reg, alt_reg);
4004    }
4005  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
4006    {
4007      alt_regno = next_scratch_gr_reg ();
4008      alt_reg = gen_rtx_REG (DImode, alt_regno);
4009      do_restore (gen_movdi_x, alt_reg, cfa_off);
4010      cfa_off -= 8;
4011      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
4012      emit_move_insn (reg, alt_reg);
4013    }
4014
4015  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
4016    {
4017      if (current_frame_info.r[reg_save_ar_lc] != 0)
4018        {
4019	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
4020          reg_emitted (reg_save_ar_lc);
4021	}
4022      else
4023	{
4024	  alt_regno = next_scratch_gr_reg ();
4025	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4026	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4027	  cfa_off -= 8;
4028	}
4029      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4030      emit_move_insn (reg, alt_reg);
4031    }
4032
4033  /* Restore the return pointer.  */
4034  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4035    {
4036      if (current_frame_info.r[reg_save_b0] != 0)
4037        {
4038         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4039         reg_emitted (reg_save_b0);
4040        }
4041      else
4042	{
4043	  alt_regno = next_scratch_gr_reg ();
4044	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4045	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4046	  cfa_off -= 8;
4047	}
4048      reg = gen_rtx_REG (DImode, BR_REG (0));
4049      emit_move_insn (reg, alt_reg);
4050    }
4051
4052  /* We should now be at the base of the gr/br/fr spill area.  */
4053  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4054			  + current_frame_info.spill_size));
4055
4056  /* The GP may be stored on the stack in the prologue, but it's
4057     never restored in the epilogue.  Skip the stack slot.  */
4058  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4059    cfa_off -= 8;
4060
4061  /* Restore all general registers.  */
4062  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4063    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4064      {
4065	reg = gen_rtx_REG (DImode, regno);
4066	do_restore (gen_gr_restore, reg, cfa_off);
4067	cfa_off -= 8;
4068      }
4069
4070  /* Restore the branch registers.  */
4071  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4072    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4073      {
4074	alt_regno = next_scratch_gr_reg ();
4075	alt_reg = gen_rtx_REG (DImode, alt_regno);
4076	do_restore (gen_movdi_x, alt_reg, cfa_off);
4077	cfa_off -= 8;
4078	reg = gen_rtx_REG (DImode, regno);
4079	emit_move_insn (reg, alt_reg);
4080      }
4081
4082  /* Restore floating point registers.  */
4083  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4084    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4085      {
4086        gcc_assert (!(cfa_off & 15));
4087	reg = gen_rtx_REG (XFmode, regno);
4088	do_restore (gen_fr_restore_x, reg, cfa_off);
4089	cfa_off -= 16;
4090      }
4091
4092  /* Restore ar.unat for real.  */
4093  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4094    {
4095      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4096      emit_move_insn (reg, ar_unat_save_reg);
4097    }
4098
4099  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4100
4101  finish_spill_pointers ();
4102
4103  if (current_frame_info.total_size
4104      || cfun->machine->ia64_eh_epilogue_sp
4105      || frame_pointer_needed)
4106    {
4107      /* ??? At this point we must generate a magic insn that appears to
4108         modify the spill iterators, the stack pointer, and the frame
4109	 pointer.  This would allow the most scheduling freedom.  For now,
4110	 just hard stop.  */
4111      emit_insn (gen_blockage ());
4112    }
4113
4114  if (cfun->machine->ia64_eh_epilogue_sp)
4115    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4116  else if (frame_pointer_needed)
4117    {
4118      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4119      RTX_FRAME_RELATED_P (insn) = 1;
4120      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4121    }
4122  else if (current_frame_info.total_size)
4123    {
4124      rtx offset, frame_size_rtx;
4125
4126      frame_size_rtx = GEN_INT (current_frame_info.total_size);
4127      if (satisfies_constraint_I (frame_size_rtx))
4128	offset = frame_size_rtx;
4129      else
4130	{
4131	  regno = next_scratch_gr_reg ();
4132	  offset = gen_rtx_REG (DImode, regno);
4133	  emit_move_insn (offset, frame_size_rtx);
4134	}
4135
4136      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4137				    offset));
4138
4139      RTX_FRAME_RELATED_P (insn) = 1;
4140      add_reg_note (insn, REG_CFA_ADJUST_CFA,
4141		    gen_rtx_SET (VOIDmode,
4142				 stack_pointer_rtx,
4143				 gen_rtx_PLUS (DImode,
4144					       stack_pointer_rtx,
4145					       frame_size_rtx)));
4146    }
4147
4148  if (cfun->machine->ia64_eh_epilogue_bsp)
4149    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4150
4151  if (! sibcall_p)
4152    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4153  else
4154    {
4155      int fp = GR_REG (2);
4156      /* We need a throw away register here, r0 and r1 are reserved,
4157	 so r2 is the first available call clobbered register.  If
4158	 there was a frame_pointer register, we may have swapped the
4159	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4160	 sure we're using the string "r2" when emitting the register
4161	 name for the assembler.  */
4162      if (current_frame_info.r[reg_fp]
4163          && current_frame_info.r[reg_fp] == GR_REG (2))
4164	fp = HARD_FRAME_POINTER_REGNUM;
4165
4166      /* We must emit an alloc to force the input registers to become output
4167	 registers.  Otherwise, if the callee tries to pass its parameters
4168	 through to another call without an intervening alloc, then these
4169	 values get lost.  */
4170      /* ??? We don't need to preserve all input registers.  We only need to
4171	 preserve those input registers used as arguments to the sibling call.
4172	 It is unclear how to compute that number here.  */
4173      if (current_frame_info.n_input_regs != 0)
4174	{
4175	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4176
4177	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4178				const0_rtx, const0_rtx,
4179				n_inputs, const0_rtx));
4180	  RTX_FRAME_RELATED_P (insn) = 1;
4181
4182	  /* ??? We need to mark the alloc as frame-related so that it gets
4183	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4184	     But there's nothing dwarf2 related to be done wrt the register
4185	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
4186	     the empty parallel means dwarf2out will not see anything.  */
4187	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4188			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4189	}
4190    }
4191}
4192
4193/* Return 1 if br.ret can do all the work required to return from a
4194   function.  */
4195
4196int
4197ia64_direct_return (void)
4198{
4199  if (reload_completed && ! frame_pointer_needed)
4200    {
4201      ia64_compute_frame_size (get_frame_size ());
4202
4203      return (current_frame_info.total_size == 0
4204	      && current_frame_info.n_spilled == 0
4205	      && current_frame_info.r[reg_save_b0] == 0
4206	      && current_frame_info.r[reg_save_pr] == 0
4207	      && current_frame_info.r[reg_save_ar_pfs] == 0
4208	      && current_frame_info.r[reg_save_ar_unat] == 0
4209	      && current_frame_info.r[reg_save_ar_lc] == 0);
4210    }
4211  return 0;
4212}
4213
4214/* Return the magic cookie that we use to hold the return address
4215   during early compilation.  */
4216
4217rtx
4218ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4219{
4220  if (count != 0)
4221    return NULL;
4222  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4223}
4224
4225/* Split this value after reload, now that we know where the return
4226   address is saved.  */
4227
4228void
4229ia64_split_return_addr_rtx (rtx dest)
4230{
4231  rtx src;
4232
4233  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4234    {
4235      if (current_frame_info.r[reg_save_b0] != 0)
4236        {
4237	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4238	  reg_emitted (reg_save_b0);
4239	}
4240      else
4241	{
4242	  HOST_WIDE_INT off;
4243	  unsigned int regno;
4244	  rtx off_r;
4245
4246	  /* Compute offset from CFA for BR0.  */
4247	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
4248	  off = (current_frame_info.spill_cfa_off
4249		 + current_frame_info.spill_size);
4250	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4251	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4252	      off -= 8;
4253
4254	  /* Convert CFA offset to a register based offset.  */
4255	  if (frame_pointer_needed)
4256	    src = hard_frame_pointer_rtx;
4257	  else
4258	    {
4259	      src = stack_pointer_rtx;
4260	      off += current_frame_info.total_size;
4261	    }
4262
4263	  /* Load address into scratch register.  */
4264	  off_r = GEN_INT (off);
4265	  if (satisfies_constraint_I (off_r))
4266	    emit_insn (gen_adddi3 (dest, src, off_r));
4267	  else
4268	    {
4269	      emit_move_insn (dest, off_r);
4270	      emit_insn (gen_adddi3 (dest, src, dest));
4271	    }
4272
4273	  src = gen_rtx_MEM (Pmode, dest);
4274	}
4275    }
4276  else
4277    src = gen_rtx_REG (DImode, BR_REG (0));
4278
4279  emit_move_insn (dest, src);
4280}
4281
4282int
4283ia64_hard_regno_rename_ok (int from, int to)
4284{
4285  /* Don't clobber any of the registers we reserved for the prologue.  */
4286  unsigned int r;
4287
4288  for (r = reg_fp; r <= reg_save_ar_lc; r++)
4289    if (to == current_frame_info.r[r]
4290        || from == current_frame_info.r[r]
4291        || to == emitted_frame_related_regs[r]
4292        || from == emitted_frame_related_regs[r])
4293      return 0;
4294
4295  /* Don't use output registers outside the register frame.  */
4296  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4297    return 0;
4298
4299  /* Retain even/oddness on predicate register pairs.  */
4300  if (PR_REGNO_P (from) && PR_REGNO_P (to))
4301    return (from & 1) == (to & 1);
4302
4303  return 1;
4304}
4305
4306/* Target hook for assembling integer objects.  Handle word-sized
4307   aligned objects and detect the cases when @fptr is needed.  */
4308
4309static bool
4310ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4311{
4312  if (size == POINTER_SIZE / BITS_PER_UNIT
4313      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4314      && GET_CODE (x) == SYMBOL_REF
4315      && SYMBOL_REF_FUNCTION_P (x))
4316    {
4317      static const char * const directive[2][2] = {
4318	  /* 64-bit pointer */  /* 32-bit pointer */
4319	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
4320	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
4321      };
4322      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4323      output_addr_const (asm_out_file, x);
4324      fputs (")\n", asm_out_file);
4325      return true;
4326    }
4327  return default_assemble_integer (x, size, aligned_p);
4328}
4329
4330/* Emit the function prologue.  */
4331
4332static void
4333ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4334{
4335  int mask, grsave, grsave_prev;
4336
4337  if (current_frame_info.need_regstk)
4338    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4339	     current_frame_info.n_input_regs,
4340	     current_frame_info.n_local_regs,
4341	     current_frame_info.n_output_regs,
4342	     current_frame_info.n_rotate_regs);
4343
4344  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4345    return;
4346
4347  /* Emit the .prologue directive.  */
4348
4349  mask = 0;
4350  grsave = grsave_prev = 0;
4351  if (current_frame_info.r[reg_save_b0] != 0)
4352    {
4353      mask |= 8;
4354      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4355    }
4356  if (current_frame_info.r[reg_save_ar_pfs] != 0
4357      && (grsave_prev == 0
4358	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4359    {
4360      mask |= 4;
4361      if (grsave_prev == 0)
4362	grsave = current_frame_info.r[reg_save_ar_pfs];
4363      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4364    }
4365  if (current_frame_info.r[reg_fp] != 0
4366      && (grsave_prev == 0
4367	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
4368    {
4369      mask |= 2;
4370      if (grsave_prev == 0)
4371	grsave = HARD_FRAME_POINTER_REGNUM;
4372      grsave_prev = current_frame_info.r[reg_fp];
4373    }
4374  if (current_frame_info.r[reg_save_pr] != 0
4375      && (grsave_prev == 0
4376	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4377    {
4378      mask |= 1;
4379      if (grsave_prev == 0)
4380	grsave = current_frame_info.r[reg_save_pr];
4381    }
4382
4383  if (mask && TARGET_GNU_AS)
4384    fprintf (file, "\t.prologue %d, %d\n", mask,
4385	     ia64_dbx_register_number (grsave));
4386  else
4387    fputs ("\t.prologue\n", file);
4388
4389  /* Emit a .spill directive, if necessary, to relocate the base of
4390     the register spill area.  */
4391  if (current_frame_info.spill_cfa_off != -16)
4392    fprintf (file, "\t.spill %ld\n",
4393	     (long) (current_frame_info.spill_cfa_off
4394		     + current_frame_info.spill_size));
4395}
4396
4397/* Emit the .body directive at the scheduled end of the prologue.  */
4398
4399static void
4400ia64_output_function_end_prologue (FILE *file)
4401{
4402  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4403    return;
4404
4405  fputs ("\t.body\n", file);
4406}
4407
4408/* Emit the function epilogue.  */
4409
4410static void
4411ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
4412			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
4413{
4414  int i;
4415
4416  if (current_frame_info.r[reg_fp])
4417    {
4418      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4419      reg_names[HARD_FRAME_POINTER_REGNUM]
4420	= reg_names[current_frame_info.r[reg_fp]];
4421      reg_names[current_frame_info.r[reg_fp]] = tmp;
4422      reg_emitted (reg_fp);
4423    }
4424  if (! TARGET_REG_NAMES)
4425    {
4426      for (i = 0; i < current_frame_info.n_input_regs; i++)
4427	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4428      for (i = 0; i < current_frame_info.n_local_regs; i++)
4429	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4430      for (i = 0; i < current_frame_info.n_output_regs; i++)
4431	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4432    }
4433
4434  current_frame_info.initialized = 0;
4435}
4436
4437int
4438ia64_dbx_register_number (int regno)
4439{
4440  /* In ia64_expand_prologue we quite literally renamed the frame pointer
4441     from its home at loc79 to something inside the register frame.  We
4442     must perform the same renumbering here for the debug info.  */
4443  if (current_frame_info.r[reg_fp])
4444    {
4445      if (regno == HARD_FRAME_POINTER_REGNUM)
4446	regno = current_frame_info.r[reg_fp];
4447      else if (regno == current_frame_info.r[reg_fp])
4448	regno = HARD_FRAME_POINTER_REGNUM;
4449    }
4450
4451  if (IN_REGNO_P (regno))
4452    return 32 + regno - IN_REG (0);
4453  else if (LOC_REGNO_P (regno))
4454    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4455  else if (OUT_REGNO_P (regno))
4456    return (32 + current_frame_info.n_input_regs
4457	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
4458  else
4459    return regno;
4460}
4461
4462/* Implement TARGET_TRAMPOLINE_INIT.
4463
4464   The trampoline should set the static chain pointer to value placed
4465   into the trampoline and should branch to the specified routine.
4466   To make the normal indirect-subroutine calling convention work,
4467   the trampoline must look like a function descriptor; the first
4468   word being the target address and the second being the target's
4469   global pointer.
4470
4471   We abuse the concept of a global pointer by arranging for it
4472   to point to the data we need to load.  The complete trampoline
4473   has the following form:
4474
4475		+-------------------+ \
4476	TRAMP:	| __ia64_trampoline | |
4477		+-------------------+  > fake function descriptor
4478		| TRAMP+16          | |
4479		+-------------------+ /
4480		| target descriptor |
4481		+-------------------+
4482		| static link	    |
4483		+-------------------+
4484*/
4485
4486static void
4487ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4488{
4489  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4490  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4491
4492  /* The Intel assembler requires that the global __ia64_trampoline symbol
4493     be declared explicitly */
4494  if (!TARGET_GNU_AS)
4495    {
4496      static bool declared_ia64_trampoline = false;
4497
4498      if (!declared_ia64_trampoline)
4499	{
4500	  declared_ia64_trampoline = true;
4501	  (*targetm.asm_out.globalize_label) (asm_out_file,
4502					      "__ia64_trampoline");
4503	}
4504    }
4505
4506  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4507  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4508  fnaddr = convert_memory_address (Pmode, fnaddr);
4509  static_chain = convert_memory_address (Pmode, static_chain);
4510
4511  /* Load up our iterator.  */
4512  addr_reg = copy_to_reg (addr);
4513  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4514
4515  /* The first two words are the fake descriptor:
4516     __ia64_trampoline, ADDR+16.  */
4517  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4518  if (TARGET_ABI_OPEN_VMS)
4519    {
4520      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4521	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4522	 relocation against function symbols to make it identical to the
4523	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4524	 strict ELF and dereference to get the bare code address.  */
4525      rtx reg = gen_reg_rtx (Pmode);
4526      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4527      emit_move_insn (reg, tramp);
4528      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4529      tramp = reg;
4530   }
4531  emit_move_insn (m_tramp, tramp);
4532  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4533  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4534
4535  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4536  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4537  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4538
4539  /* The third word is the target descriptor.  */
4540  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4541  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4542  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4543
4544  /* The fourth word is the static chain.  */
4545  emit_move_insn (m_tramp, static_chain);
4546}
4547
4548/* Do any needed setup for a variadic function.  CUM has not been updated
4549   for the last named argument which has type TYPE and mode MODE.
4550
4551   We generate the actual spill instructions during prologue generation.  */
4552
4553static void
4554ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4555			     tree type, int * pretend_size,
4556			     int second_time ATTRIBUTE_UNUSED)
4557{
4558  CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4559
4560  /* Skip the current argument.  */
4561  ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1);
4562
4563  if (next_cum.words < MAX_ARGUMENT_SLOTS)
4564    {
4565      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4566      *pretend_size = n * UNITS_PER_WORD;
4567      cfun->machine->n_varargs = n;
4568    }
4569}
4570
4571/* Check whether TYPE is a homogeneous floating point aggregate.  If
4572   it is, return the mode of the floating point type that appears
4573   in all leafs.  If it is not, return VOIDmode.
4574
4575   An aggregate is a homogeneous floating point aggregate is if all
4576   fields/elements in it have the same floating point type (e.g,
4577   SFmode).  128-bit quad-precision floats are excluded.
4578
4579   Variable sized aggregates should never arrive here, since we should
4580   have already decided to pass them by reference.  Top-level zero-sized
4581   aggregates are excluded because our parallels crash the middle-end.  */
4582
4583static machine_mode
4584hfa_element_mode (const_tree type, bool nested)
4585{
4586  machine_mode element_mode = VOIDmode;
4587  machine_mode mode;
4588  enum tree_code code = TREE_CODE (type);
4589  int know_element_mode = 0;
4590  tree t;
4591
4592  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4593    return VOIDmode;
4594
4595  switch (code)
4596    {
4597    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4598    case BOOLEAN_TYPE:	case POINTER_TYPE:
4599    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4600    case LANG_TYPE:		case FUNCTION_TYPE:
4601      return VOIDmode;
4602
4603      /* Fortran complex types are supposed to be HFAs, so we need to handle
4604	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4605	 types though.  */
4606    case COMPLEX_TYPE:
4607      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4608	  && TYPE_MODE (type) != TCmode)
4609	return GET_MODE_INNER (TYPE_MODE (type));
4610      else
4611	return VOIDmode;
4612
4613    case REAL_TYPE:
4614      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4615	 mode if this is contained within an aggregate.  */
4616      if (nested && TYPE_MODE (type) != TFmode)
4617	return TYPE_MODE (type);
4618      else
4619	return VOIDmode;
4620
4621    case ARRAY_TYPE:
4622      return hfa_element_mode (TREE_TYPE (type), 1);
4623
4624    case RECORD_TYPE:
4625    case UNION_TYPE:
4626    case QUAL_UNION_TYPE:
4627      for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4628	{
4629	  if (TREE_CODE (t) != FIELD_DECL)
4630	    continue;
4631
4632	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4633	  if (know_element_mode)
4634	    {
4635	      if (mode != element_mode)
4636		return VOIDmode;
4637	    }
4638	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4639	    return VOIDmode;
4640	  else
4641	    {
4642	      know_element_mode = 1;
4643	      element_mode = mode;
4644	    }
4645	}
4646      return element_mode;
4647
4648    default:
4649      /* If we reach here, we probably have some front-end specific type
4650	 that the backend doesn't know about.  This can happen via the
4651	 aggregate_value_p call in init_function_start.  All we can do is
4652	 ignore unknown tree types.  */
4653      return VOIDmode;
4654    }
4655
4656  return VOIDmode;
4657}
4658
4659/* Return the number of words required to hold a quantity of TYPE and MODE
4660   when passed as an argument.  */
4661static int
4662ia64_function_arg_words (const_tree type, machine_mode mode)
4663{
4664  int words;
4665
4666  if (mode == BLKmode)
4667    words = int_size_in_bytes (type);
4668  else
4669    words = GET_MODE_SIZE (mode);
4670
4671  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4672}
4673
4674/* Return the number of registers that should be skipped so the current
4675   argument (described by TYPE and WORDS) will be properly aligned.
4676
4677   Integer and float arguments larger than 8 bytes start at the next
4678   even boundary.  Aggregates larger than 8 bytes start at the next
4679   even boundary if the aggregate has 16 byte alignment.  Note that
4680   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4681   but are still to be aligned in registers.
4682
4683   ??? The ABI does not specify how to handle aggregates with
4684   alignment from 9 to 15 bytes, or greater than 16.  We handle them
4685   all as if they had 16 byte alignment.  Such aggregates can occur
4686   only if gcc extensions are used.  */
4687static int
4688ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4689			  const_tree type, int words)
4690{
4691  /* No registers are skipped on VMS.  */
4692  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4693    return 0;
4694
4695  if (type
4696      && TREE_CODE (type) != INTEGER_TYPE
4697      && TREE_CODE (type) != REAL_TYPE)
4698    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4699  else
4700    return words > 1;
4701}
4702
4703/* Return rtx for register where argument is passed, or zero if it is passed
4704   on the stack.  */
4705/* ??? 128-bit quad-precision floats are always passed in general
4706   registers.  */
4707
4708static rtx
4709ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
4710		     const_tree type, bool named, bool incoming)
4711{
4712  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4713
4714  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4715  int words = ia64_function_arg_words (type, mode);
4716  int offset = ia64_function_arg_offset (cum, type, words);
4717  machine_mode hfa_mode = VOIDmode;
4718
4719  /* For OPEN VMS, emit the instruction setting up the argument register here,
4720     when we know this will be together with the other arguments setup related
4721     insns.  This is not the conceptually best place to do this, but this is
4722     the easiest as we have convenient access to cumulative args info.  */
4723
4724  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4725      && named == 1)
4726    {
4727      unsigned HOST_WIDE_INT regval = cum->words;
4728      int i;
4729
4730      for (i = 0; i < 8; i++)
4731	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4732
4733      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4734		      GEN_INT (regval));
4735    }
4736
4737  /* If all argument slots are used, then it must go on the stack.  */
4738  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4739    return 0;
4740
4741  /* On OpenVMS argument is either in Rn or Fn.  */
4742  if (TARGET_ABI_OPEN_VMS)
4743    {
4744      if (FLOAT_MODE_P (mode))
4745	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4746      else
4747	return gen_rtx_REG (mode, basereg + cum->words);
4748    }
4749
4750  /* Check for and handle homogeneous FP aggregates.  */
4751  if (type)
4752    hfa_mode = hfa_element_mode (type, 0);
4753
4754  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4755     and unprototyped hfas are passed specially.  */
4756  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4757    {
4758      rtx loc[16];
4759      int i = 0;
4760      int fp_regs = cum->fp_regs;
4761      int int_regs = cum->words + offset;
4762      int hfa_size = GET_MODE_SIZE (hfa_mode);
4763      int byte_size;
4764      int args_byte_size;
4765
4766      /* If prototyped, pass it in FR regs then GR regs.
4767	 If not prototyped, pass it in both FR and GR regs.
4768
4769	 If this is an SFmode aggregate, then it is possible to run out of
4770	 FR regs while GR regs are still left.  In that case, we pass the
4771	 remaining part in the GR regs.  */
4772
4773      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4774	 of the argument, the last FP register, or the last argument slot.  */
4775
4776      byte_size = ((mode == BLKmode)
4777		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4778      args_byte_size = int_regs * UNITS_PER_WORD;
4779      offset = 0;
4780      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4781	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4782	{
4783	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4784				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4785							      + fp_regs)),
4786				      GEN_INT (offset));
4787	  offset += hfa_size;
4788	  args_byte_size += hfa_size;
4789	  fp_regs++;
4790	}
4791
4792      /* If no prototype, then the whole thing must go in GR regs.  */
4793      if (! cum->prototype)
4794	offset = 0;
4795      /* If this is an SFmode aggregate, then we might have some left over
4796	 that needs to go in GR regs.  */
4797      else if (byte_size != offset)
4798	int_regs += offset / UNITS_PER_WORD;
4799
4800      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4801
4802      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4803	{
4804	  machine_mode gr_mode = DImode;
4805	  unsigned int gr_size;
4806
4807	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4808	     then this goes in a GR reg left adjusted/little endian, right
4809	     adjusted/big endian.  */
4810	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4811	     always right adjusted/little endian.  */
4812	  if (offset & 0x4)
4813	    gr_mode = SImode;
4814	  /* If we have an even 4 byte hunk because the aggregate is a
4815	     multiple of 4 bytes in size, then this goes in a GR reg right
4816	     adjusted/little endian.  */
4817	  else if (byte_size - offset == 4)
4818	    gr_mode = SImode;
4819
4820	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4821				      gen_rtx_REG (gr_mode, (basereg
4822							     + int_regs)),
4823				      GEN_INT (offset));
4824
4825	  gr_size = GET_MODE_SIZE (gr_mode);
4826	  offset += gr_size;
4827	  if (gr_size == UNITS_PER_WORD
4828	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4829	    int_regs++;
4830	  else if (gr_size > UNITS_PER_WORD)
4831	    int_regs += gr_size / UNITS_PER_WORD;
4832	}
4833      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4834    }
4835
4836  /* Integral and aggregates go in general registers.  If we have run out of
4837     FR registers, then FP values must also go in general registers.  This can
4838     happen when we have a SFmode HFA.  */
4839  else if (mode == TFmode || mode == TCmode
4840	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4841    {
4842      int byte_size = ((mode == BLKmode)
4843                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4844      if (BYTES_BIG_ENDIAN
4845	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4846	&& byte_size < UNITS_PER_WORD
4847	&& byte_size > 0)
4848	{
4849	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4850					  gen_rtx_REG (DImode,
4851						       (basereg + cum->words
4852							+ offset)),
4853					  const0_rtx);
4854	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4855	}
4856      else
4857	return gen_rtx_REG (mode, basereg + cum->words + offset);
4858
4859    }
4860
4861  /* If there is a prototype, then FP values go in a FR register when
4862     named, and in a GR register when unnamed.  */
4863  else if (cum->prototype)
4864    {
4865      if (named)
4866	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4867      /* In big-endian mode, an anonymous SFmode value must be represented
4868         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4869	 the value into the high half of the general register.  */
4870      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4871	return gen_rtx_PARALLEL (mode,
4872		 gen_rtvec (1,
4873                   gen_rtx_EXPR_LIST (VOIDmode,
4874		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4875				      const0_rtx)));
4876      else
4877	return gen_rtx_REG (mode, basereg + cum->words + offset);
4878    }
4879  /* If there is no prototype, then FP values go in both FR and GR
4880     registers.  */
4881  else
4882    {
4883      /* See comment above.  */
4884      machine_mode inner_mode =
4885	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4886
4887      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4888				      gen_rtx_REG (mode, (FR_ARG_FIRST
4889							  + cum->fp_regs)),
4890				      const0_rtx);
4891      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4892				      gen_rtx_REG (inner_mode,
4893						   (basereg + cum->words
4894						    + offset)),
4895				      const0_rtx);
4896
4897      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4898    }
4899}
4900
4901/* Implement TARGET_FUNCION_ARG target hook.  */
4902
4903static rtx
4904ia64_function_arg (cumulative_args_t cum, machine_mode mode,
4905		   const_tree type, bool named)
4906{
4907  return ia64_function_arg_1 (cum, mode, type, named, false);
4908}
4909
4910/* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4911
4912static rtx
4913ia64_function_incoming_arg (cumulative_args_t cum,
4914			    machine_mode mode,
4915			    const_tree type, bool named)
4916{
4917  return ia64_function_arg_1 (cum, mode, type, named, true);
4918}
4919
4920/* Return number of bytes, at the beginning of the argument, that must be
4921   put in registers.  0 is the argument is entirely in registers or entirely
4922   in memory.  */
4923
4924static int
4925ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
4926			tree type, bool named ATTRIBUTE_UNUSED)
4927{
4928  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4929
4930  int words = ia64_function_arg_words (type, mode);
4931  int offset = ia64_function_arg_offset (cum, type, words);
4932
4933  /* If all argument slots are used, then it must go on the stack.  */
4934  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4935    return 0;
4936
4937  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4938     it fits within the 8 argument slots, then it goes entirely in
4939     registers.  If it extends past the last argument slot, then the rest
4940     goes on the stack.  */
4941
4942  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4943    return 0;
4944
4945  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4946}
4947
4948/* Return ivms_arg_type based on machine_mode.  */
4949
4950static enum ivms_arg_type
4951ia64_arg_type (machine_mode mode)
4952{
4953  switch (mode)
4954    {
4955    case SFmode:
4956      return FS;
4957    case DFmode:
4958      return FT;
4959    default:
4960      return I64;
4961    }
4962}
4963
4964/* Update CUM to point after this argument.  This is patterned after
4965   ia64_function_arg.  */
4966
4967static void
4968ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
4969			   const_tree type, bool named)
4970{
4971  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4972  int words = ia64_function_arg_words (type, mode);
4973  int offset = ia64_function_arg_offset (cum, type, words);
4974  machine_mode hfa_mode = VOIDmode;
4975
4976  /* If all arg slots are already full, then there is nothing to do.  */
4977  if (cum->words >= MAX_ARGUMENT_SLOTS)
4978    {
4979      cum->words += words + offset;
4980      return;
4981    }
4982
4983  cum->atypes[cum->words] = ia64_arg_type (mode);
4984  cum->words += words + offset;
4985
4986  /* On OpenVMS argument is either in Rn or Fn.  */
4987  if (TARGET_ABI_OPEN_VMS)
4988    {
4989      cum->int_regs = cum->words;
4990      cum->fp_regs = cum->words;
4991      return;
4992    }
4993
4994  /* Check for and handle homogeneous FP aggregates.  */
4995  if (type)
4996    hfa_mode = hfa_element_mode (type, 0);
4997
4998  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4999     and unprototyped hfas are passed specially.  */
5000  if (hfa_mode != VOIDmode && (! cum->prototype || named))
5001    {
5002      int fp_regs = cum->fp_regs;
5003      /* This is the original value of cum->words + offset.  */
5004      int int_regs = cum->words - words;
5005      int hfa_size = GET_MODE_SIZE (hfa_mode);
5006      int byte_size;
5007      int args_byte_size;
5008
5009      /* If prototyped, pass it in FR regs then GR regs.
5010	 If not prototyped, pass it in both FR and GR regs.
5011
5012	 If this is an SFmode aggregate, then it is possible to run out of
5013	 FR regs while GR regs are still left.  In that case, we pass the
5014	 remaining part in the GR regs.  */
5015
5016      /* Fill the FP regs.  We do this always.  We stop if we reach the end
5017	 of the argument, the last FP register, or the last argument slot.  */
5018
5019      byte_size = ((mode == BLKmode)
5020		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
5021      args_byte_size = int_regs * UNITS_PER_WORD;
5022      offset = 0;
5023      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5024	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5025	{
5026	  offset += hfa_size;
5027	  args_byte_size += hfa_size;
5028	  fp_regs++;
5029	}
5030
5031      cum->fp_regs = fp_regs;
5032    }
5033
5034  /* Integral and aggregates go in general registers.  So do TFmode FP values.
5035     If we have run out of FR registers, then other FP values must also go in
5036     general registers.  This can happen when we have a SFmode HFA.  */
5037  else if (mode == TFmode || mode == TCmode
5038           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
5039    cum->int_regs = cum->words;
5040
5041  /* If there is a prototype, then FP values go in a FR register when
5042     named, and in a GR register when unnamed.  */
5043  else if (cum->prototype)
5044    {
5045      if (! named)
5046	cum->int_regs = cum->words;
5047      else
5048	/* ??? Complex types should not reach here.  */
5049	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5050    }
5051  /* If there is no prototype, then FP values go in both FR and GR
5052     registers.  */
5053  else
5054    {
5055      /* ??? Complex types should not reach here.  */
5056      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5057      cum->int_regs = cum->words;
5058    }
5059}
5060
5061/* Arguments with alignment larger than 8 bytes start at the next even
5062   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
5063   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
5064
5065static unsigned int
5066ia64_function_arg_boundary (machine_mode mode, const_tree type)
5067{
5068  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5069    return PARM_BOUNDARY * 2;
5070
5071  if (type)
5072    {
5073      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5074        return PARM_BOUNDARY * 2;
5075      else
5076        return PARM_BOUNDARY;
5077    }
5078
5079  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5080    return PARM_BOUNDARY * 2;
5081  else
5082    return PARM_BOUNDARY;
5083}
5084
5085/* True if it is OK to do sibling call optimization for the specified
5086   call expression EXP.  DECL will be the called function, or NULL if
5087   this is an indirect call.  */
5088static bool
5089ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5090{
5091  /* We can't perform a sibcall if the current function has the syscall_linkage
5092     attribute.  */
5093  if (lookup_attribute ("syscall_linkage",
5094			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5095    return false;
5096
5097  /* We must always return with our current GP.  This means we can
5098     only sibcall to functions defined in the current module unless
5099     TARGET_CONST_GP is set to true.  */
5100  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5101}
5102
5103
5104/* Implement va_arg.  */
5105
5106static tree
5107ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5108		      gimple_seq *post_p)
5109{
5110  /* Variable sized types are passed by reference.  */
5111  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
5112    {
5113      tree ptrtype = build_pointer_type (type);
5114      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5115      return build_va_arg_indirect_ref (addr);
5116    }
5117
5118  /* Aggregate arguments with alignment larger than 8 bytes start at
5119     the next even boundary.  Integer and floating point arguments
5120     do so if they are larger than 8 bytes, whether or not they are
5121     also aligned larger than 8 bytes.  */
5122  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5123      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5124    {
5125      tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5126      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5127		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5128      gimplify_assign (unshare_expr (valist), t, pre_p);
5129    }
5130
5131  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5132}
5133
5134/* Return 1 if function return value returned in memory.  Return 0 if it is
5135   in a register.  */
5136
5137static bool
5138ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5139{
5140  machine_mode mode;
5141  machine_mode hfa_mode;
5142  HOST_WIDE_INT byte_size;
5143
5144  mode = TYPE_MODE (valtype);
5145  byte_size = GET_MODE_SIZE (mode);
5146  if (mode == BLKmode)
5147    {
5148      byte_size = int_size_in_bytes (valtype);
5149      if (byte_size < 0)
5150	return true;
5151    }
5152
5153  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
5154
5155  hfa_mode = hfa_element_mode (valtype, 0);
5156  if (hfa_mode != VOIDmode)
5157    {
5158      int hfa_size = GET_MODE_SIZE (hfa_mode);
5159
5160      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5161	return true;
5162      else
5163	return false;
5164    }
5165  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5166    return true;
5167  else
5168    return false;
5169}
5170
5171/* Return rtx for register that holds the function return value.  */
5172
5173static rtx
5174ia64_function_value (const_tree valtype,
5175		     const_tree fn_decl_or_type,
5176		     bool outgoing ATTRIBUTE_UNUSED)
5177{
5178  machine_mode mode;
5179  machine_mode hfa_mode;
5180  int unsignedp;
5181  const_tree func = fn_decl_or_type;
5182
5183  if (fn_decl_or_type
5184      && !DECL_P (fn_decl_or_type))
5185    func = NULL;
5186
5187  mode = TYPE_MODE (valtype);
5188  hfa_mode = hfa_element_mode (valtype, 0);
5189
5190  if (hfa_mode != VOIDmode)
5191    {
5192      rtx loc[8];
5193      int i;
5194      int hfa_size;
5195      int byte_size;
5196      int offset;
5197
5198      hfa_size = GET_MODE_SIZE (hfa_mode);
5199      byte_size = ((mode == BLKmode)
5200		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5201      offset = 0;
5202      for (i = 0; offset < byte_size; i++)
5203	{
5204	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5205				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5206				      GEN_INT (offset));
5207	  offset += hfa_size;
5208	}
5209      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5210    }
5211  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5212    return gen_rtx_REG (mode, FR_ARG_FIRST);
5213  else
5214    {
5215      bool need_parallel = false;
5216
5217      /* In big-endian mode, we need to manage the layout of aggregates
5218	 in the registers so that we get the bits properly aligned in
5219	 the highpart of the registers.  */
5220      if (BYTES_BIG_ENDIAN
5221	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5222	need_parallel = true;
5223
5224      /* Something like struct S { long double x; char a[0] } is not an
5225	 HFA structure, and therefore doesn't go in fp registers.  But
5226	 the middle-end will give it XFmode anyway, and XFmode values
5227	 don't normally fit in integer registers.  So we need to smuggle
5228	 the value inside a parallel.  */
5229      else if (mode == XFmode || mode == XCmode || mode == RFmode)
5230	need_parallel = true;
5231
5232      if (need_parallel)
5233	{
5234	  rtx loc[8];
5235	  int offset;
5236	  int bytesize;
5237	  int i;
5238
5239	  offset = 0;
5240	  bytesize = int_size_in_bytes (valtype);
5241	  /* An empty PARALLEL is invalid here, but the return value
5242	     doesn't matter for empty structs.  */
5243	  if (bytesize == 0)
5244	    return gen_rtx_REG (mode, GR_RET_FIRST);
5245	  for (i = 0; offset < bytesize; i++)
5246	    {
5247	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5248					  gen_rtx_REG (DImode,
5249						       GR_RET_FIRST + i),
5250					  GEN_INT (offset));
5251	      offset += UNITS_PER_WORD;
5252	    }
5253	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5254	}
5255
5256      mode = promote_function_mode (valtype, mode, &unsignedp,
5257                                    func ? TREE_TYPE (func) : NULL_TREE,
5258                                    true);
5259
5260      return gen_rtx_REG (mode, GR_RET_FIRST);
5261    }
5262}
5263
5264/* Worker function for TARGET_LIBCALL_VALUE.  */
5265
5266static rtx
5267ia64_libcall_value (machine_mode mode,
5268		    const_rtx fun ATTRIBUTE_UNUSED)
5269{
5270  return gen_rtx_REG (mode,
5271		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5272			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5273			&& (mode) != TFmode)
5274		       ? FR_RET_FIRST : GR_RET_FIRST));
5275}
5276
5277/* Worker function for FUNCTION_VALUE_REGNO_P.  */
5278
5279static bool
5280ia64_function_value_regno_p (const unsigned int regno)
5281{
5282  return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5283          || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5284}
5285
5286/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5287   We need to emit DTP-relative relocations.  */
5288
5289static void
5290ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5291{
5292  gcc_assert (size == 4 || size == 8);
5293  if (size == 4)
5294    fputs ("\tdata4.ua\t@dtprel(", file);
5295  else
5296    fputs ("\tdata8.ua\t@dtprel(", file);
5297  output_addr_const (file, x);
5298  fputs (")", file);
5299}
5300
5301/* Print a memory address as an operand to reference that memory location.  */
5302
5303/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5304   also call this from ia64_print_operand for memory addresses.  */
5305
5306static void
5307ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5308			    rtx address ATTRIBUTE_UNUSED)
5309{
5310}
5311
5312/* Print an operand to an assembler instruction.
5313   C	Swap and print a comparison operator.
5314   D	Print an FP comparison operator.
5315   E    Print 32 - constant, for SImode shifts as extract.
5316   e    Print 64 - constant, for DImode rotates.
5317   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5318        a floating point register emitted normally.
5319   G	A floating point constant.
5320   I	Invert a predicate register by adding 1.
5321   J    Select the proper predicate register for a condition.
5322   j    Select the inverse predicate register for a condition.
5323   O	Append .acq for volatile load.
5324   P	Postincrement of a MEM.
5325   Q	Append .rel for volatile store.
5326   R	Print .s .d or nothing for a single, double or no truncation.
5327   S	Shift amount for shladd instruction.
5328   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5329	for Intel assembler.
5330   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5331	for Intel assembler.
5332   X	A pair of floating point registers.
5333   r	Print register name, or constant 0 as r0.  HP compatibility for
5334	Linux kernel.
5335   v    Print vector constant value as an 8-byte integer value.  */
5336
5337static void
5338ia64_print_operand (FILE * file, rtx x, int code)
5339{
5340  const char *str;
5341
5342  switch (code)
5343    {
5344    case 0:
5345      /* Handled below.  */
5346      break;
5347
5348    case 'C':
5349      {
5350	enum rtx_code c = swap_condition (GET_CODE (x));
5351	fputs (GET_RTX_NAME (c), file);
5352	return;
5353      }
5354
5355    case 'D':
5356      switch (GET_CODE (x))
5357	{
5358	case NE:
5359	  str = "neq";
5360	  break;
5361	case UNORDERED:
5362	  str = "unord";
5363	  break;
5364	case ORDERED:
5365	  str = "ord";
5366	  break;
5367	case UNLT:
5368	  str = "nge";
5369	  break;
5370	case UNLE:
5371	  str = "ngt";
5372	  break;
5373	case UNGT:
5374	  str = "nle";
5375	  break;
5376	case UNGE:
5377	  str = "nlt";
5378	  break;
5379	case UNEQ:
5380	case LTGT:
5381	  gcc_unreachable ();
5382	default:
5383	  str = GET_RTX_NAME (GET_CODE (x));
5384	  break;
5385	}
5386      fputs (str, file);
5387      return;
5388
5389    case 'E':
5390      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5391      return;
5392
5393    case 'e':
5394      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5395      return;
5396
5397    case 'F':
5398      if (x == CONST0_RTX (GET_MODE (x)))
5399	str = reg_names [FR_REG (0)];
5400      else if (x == CONST1_RTX (GET_MODE (x)))
5401	str = reg_names [FR_REG (1)];
5402      else
5403	{
5404	  gcc_assert (GET_CODE (x) == REG);
5405	  str = reg_names [REGNO (x)];
5406	}
5407      fputs (str, file);
5408      return;
5409
5410    case 'G':
5411      {
5412	long val[4];
5413	REAL_VALUE_TYPE rv;
5414	REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
5415	real_to_target (val, &rv, GET_MODE (x));
5416	if (GET_MODE (x) == SFmode)
5417	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5418	else if (GET_MODE (x) == DFmode)
5419	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5420					  & 0xffffffff,
5421					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5422					  & 0xffffffff);
5423	else
5424	  output_operand_lossage ("invalid %%G mode");
5425      }
5426      return;
5427
5428    case 'I':
5429      fputs (reg_names [REGNO (x) + 1], file);
5430      return;
5431
5432    case 'J':
5433    case 'j':
5434      {
5435	unsigned int regno = REGNO (XEXP (x, 0));
5436	if (GET_CODE (x) == EQ)
5437	  regno += 1;
5438	if (code == 'j')
5439	  regno ^= 1;
5440        fputs (reg_names [regno], file);
5441      }
5442      return;
5443
5444    case 'O':
5445      if (MEM_VOLATILE_P (x))
5446	fputs(".acq", file);
5447      return;
5448
5449    case 'P':
5450      {
5451	HOST_WIDE_INT value;
5452
5453	switch (GET_CODE (XEXP (x, 0)))
5454	  {
5455	  default:
5456	    return;
5457
5458	  case POST_MODIFY:
5459	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5460	    if (GET_CODE (x) == CONST_INT)
5461	      value = INTVAL (x);
5462	    else
5463	      {
5464		gcc_assert (GET_CODE (x) == REG);
5465		fprintf (file, ", %s", reg_names[REGNO (x)]);
5466		return;
5467	      }
5468	    break;
5469
5470	  case POST_INC:
5471	    value = GET_MODE_SIZE (GET_MODE (x));
5472	    break;
5473
5474	  case POST_DEC:
5475	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5476	    break;
5477	  }
5478
5479	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5480	return;
5481      }
5482
5483    case 'Q':
5484      if (MEM_VOLATILE_P (x))
5485	fputs(".rel", file);
5486      return;
5487
5488    case 'R':
5489      if (x == CONST0_RTX (GET_MODE (x)))
5490	fputs(".s", file);
5491      else if (x == CONST1_RTX (GET_MODE (x)))
5492	fputs(".d", file);
5493      else if (x == CONST2_RTX (GET_MODE (x)))
5494	;
5495      else
5496	output_operand_lossage ("invalid %%R value");
5497      return;
5498
5499    case 'S':
5500      fprintf (file, "%d", exact_log2 (INTVAL (x)));
5501      return;
5502
5503    case 'T':
5504      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5505	{
5506	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5507	  return;
5508	}
5509      break;
5510
5511    case 'U':
5512      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5513	{
5514	  const char *prefix = "0x";
5515	  if (INTVAL (x) & 0x80000000)
5516	    {
5517	      fprintf (file, "0xffffffff");
5518	      prefix = "";
5519	    }
5520	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5521	  return;
5522	}
5523      break;
5524
5525    case 'X':
5526      {
5527	unsigned int regno = REGNO (x);
5528	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5529      }
5530      return;
5531
5532    case 'r':
5533      /* If this operand is the constant zero, write it as register zero.
5534	 Any register, zero, or CONST_INT value is OK here.  */
5535      if (GET_CODE (x) == REG)
5536	fputs (reg_names[REGNO (x)], file);
5537      else if (x == CONST0_RTX (GET_MODE (x)))
5538	fputs ("r0", file);
5539      else if (GET_CODE (x) == CONST_INT)
5540	output_addr_const (file, x);
5541      else
5542	output_operand_lossage ("invalid %%r value");
5543      return;
5544
5545    case 'v':
5546      gcc_assert (GET_CODE (x) == CONST_VECTOR);
5547      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5548      break;
5549
5550    case '+':
5551      {
5552	const char *which;
5553
5554	/* For conditional branches, returns or calls, substitute
5555	   sptk, dptk, dpnt, or spnt for %s.  */
5556	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5557	if (x)
5558	  {
5559	    int pred_val = XINT (x, 0);
5560
5561	    /* Guess top and bottom 10% statically predicted.  */
5562	    if (pred_val < REG_BR_PROB_BASE / 50
5563		&& br_prob_note_reliable_p (x))
5564	      which = ".spnt";
5565	    else if (pred_val < REG_BR_PROB_BASE / 2)
5566	      which = ".dpnt";
5567	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5568		     || !br_prob_note_reliable_p (x))
5569	      which = ".dptk";
5570	    else
5571	      which = ".sptk";
5572	  }
5573	else if (CALL_P (current_output_insn))
5574	  which = ".sptk";
5575	else
5576	  which = ".dptk";
5577
5578	fputs (which, file);
5579	return;
5580      }
5581
5582    case ',':
5583      x = current_insn_predicate;
5584      if (x)
5585	{
5586	  unsigned int regno = REGNO (XEXP (x, 0));
5587	  if (GET_CODE (x) == EQ)
5588	    regno += 1;
5589          fprintf (file, "(%s) ", reg_names [regno]);
5590	}
5591      return;
5592
5593    default:
5594      output_operand_lossage ("ia64_print_operand: unknown code");
5595      return;
5596    }
5597
5598  switch (GET_CODE (x))
5599    {
5600      /* This happens for the spill/restore instructions.  */
5601    case POST_INC:
5602    case POST_DEC:
5603    case POST_MODIFY:
5604      x = XEXP (x, 0);
5605      /* ... fall through ...  */
5606
5607    case REG:
5608      fputs (reg_names [REGNO (x)], file);
5609      break;
5610
5611    case MEM:
5612      {
5613	rtx addr = XEXP (x, 0);
5614	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5615	  addr = XEXP (addr, 0);
5616	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5617	break;
5618      }
5619
5620    default:
5621      output_addr_const (file, x);
5622      break;
5623    }
5624
5625  return;
5626}
5627
5628/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5629
5630static bool
5631ia64_print_operand_punct_valid_p (unsigned char code)
5632{
5633  return (code == '+' || code == ',');
5634}
5635
5636/* Compute a (partial) cost for rtx X.  Return true if the complete
5637   cost has been computed, and false if subexpressions should be
5638   scanned.  In either case, *TOTAL contains the cost result.  */
5639/* ??? This is incomplete.  */
5640
5641static bool
5642ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
5643		int *total, bool speed ATTRIBUTE_UNUSED)
5644{
5645  switch (code)
5646    {
5647    case CONST_INT:
5648      switch (outer_code)
5649        {
5650        case SET:
5651	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5652	  return true;
5653        case PLUS:
5654	  if (satisfies_constraint_I (x))
5655	    *total = 0;
5656	  else if (satisfies_constraint_J (x))
5657	    *total = 1;
5658	  else
5659	    *total = COSTS_N_INSNS (1);
5660	  return true;
5661        default:
5662	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5663	    *total = 0;
5664	  else
5665	    *total = COSTS_N_INSNS (1);
5666	  return true;
5667	}
5668
5669    case CONST_DOUBLE:
5670      *total = COSTS_N_INSNS (1);
5671      return true;
5672
5673    case CONST:
5674    case SYMBOL_REF:
5675    case LABEL_REF:
5676      *total = COSTS_N_INSNS (3);
5677      return true;
5678
5679    case FMA:
5680      *total = COSTS_N_INSNS (4);
5681      return true;
5682
5683    case MULT:
5684      /* For multiplies wider than HImode, we have to go to the FPU,
5685         which normally involves copies.  Plus there's the latency
5686         of the multiply itself, and the latency of the instructions to
5687         transfer integer regs to FP regs.  */
5688      if (FLOAT_MODE_P (GET_MODE (x)))
5689	*total = COSTS_N_INSNS (4);
5690      else if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5691        *total = COSTS_N_INSNS (10);
5692      else
5693	*total = COSTS_N_INSNS (2);
5694      return true;
5695
5696    case PLUS:
5697    case MINUS:
5698      if (FLOAT_MODE_P (GET_MODE (x)))
5699	{
5700	  *total = COSTS_N_INSNS (4);
5701	  return true;
5702	}
5703      /* FALLTHRU */
5704
5705    case ASHIFT:
5706    case ASHIFTRT:
5707    case LSHIFTRT:
5708      *total = COSTS_N_INSNS (1);
5709      return true;
5710
5711    case DIV:
5712    case UDIV:
5713    case MOD:
5714    case UMOD:
5715      /* We make divide expensive, so that divide-by-constant will be
5716         optimized to a multiply.  */
5717      *total = COSTS_N_INSNS (60);
5718      return true;
5719
5720    default:
5721      return false;
5722    }
5723}
5724
5725/* Calculate the cost of moving data from a register in class FROM to
5726   one in class TO, using MODE.  */
5727
5728static int
5729ia64_register_move_cost (machine_mode mode, reg_class_t from,
5730			 reg_class_t to)
5731{
5732  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5733  if (to == ADDL_REGS)
5734    to = GR_REGS;
5735  if (from == ADDL_REGS)
5736    from = GR_REGS;
5737
5738  /* All costs are symmetric, so reduce cases by putting the
5739     lower number class as the destination.  */
5740  if (from < to)
5741    {
5742      reg_class_t tmp = to;
5743      to = from, from = tmp;
5744    }
5745
5746  /* Moving from FR<->GR in XFmode must be more expensive than 2,
5747     so that we get secondary memory reloads.  Between FR_REGS,
5748     we have to make this at least as expensive as memory_move_cost
5749     to avoid spectacularly poor register class preferencing.  */
5750  if (mode == XFmode || mode == RFmode)
5751    {
5752      if (to != GR_REGS || from != GR_REGS)
5753        return memory_move_cost (mode, to, false);
5754      else
5755	return 3;
5756    }
5757
5758  switch (to)
5759    {
5760    case PR_REGS:
5761      /* Moving between PR registers takes two insns.  */
5762      if (from == PR_REGS)
5763	return 3;
5764      /* Moving between PR and anything but GR is impossible.  */
5765      if (from != GR_REGS)
5766	return memory_move_cost (mode, to, false);
5767      break;
5768
5769    case BR_REGS:
5770      /* Moving between BR and anything but GR is impossible.  */
5771      if (from != GR_REGS && from != GR_AND_BR_REGS)
5772	return memory_move_cost (mode, to, false);
5773      break;
5774
5775    case AR_I_REGS:
5776    case AR_M_REGS:
5777      /* Moving between AR and anything but GR is impossible.  */
5778      if (from != GR_REGS)
5779	return memory_move_cost (mode, to, false);
5780      break;
5781
5782    case GR_REGS:
5783    case FR_REGS:
5784    case FP_REGS:
5785    case GR_AND_FR_REGS:
5786    case GR_AND_BR_REGS:
5787    case ALL_REGS:
5788      break;
5789
5790    default:
5791      gcc_unreachable ();
5792    }
5793
5794  return 2;
5795}
5796
5797/* Calculate the cost of moving data of MODE from a register to or from
5798   memory.  */
5799
5800static int
5801ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5802		       reg_class_t rclass,
5803		       bool in ATTRIBUTE_UNUSED)
5804{
5805  if (rclass == GENERAL_REGS
5806      || rclass == FR_REGS
5807      || rclass == FP_REGS
5808      || rclass == GR_AND_FR_REGS)
5809    return 4;
5810  else
5811    return 10;
5812}
5813
5814/* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5815   on RCLASS to use when copying X into that class.  */
5816
5817static reg_class_t
5818ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5819{
5820  switch (rclass)
5821    {
5822    case FR_REGS:
5823    case FP_REGS:
5824      /* Don't allow volatile mem reloads into floating point registers.
5825	 This is defined to force reload to choose the r/m case instead
5826	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5827      if (MEM_P (x) && MEM_VOLATILE_P (x))
5828	return NO_REGS;
5829
5830      /* Force all unrecognized constants into the constant pool.  */
5831      if (CONSTANT_P (x))
5832	return NO_REGS;
5833      break;
5834
5835    case AR_M_REGS:
5836    case AR_I_REGS:
5837      if (!OBJECT_P (x))
5838	return NO_REGS;
5839      break;
5840
5841    default:
5842      break;
5843    }
5844
5845  return rclass;
5846}
5847
5848/* This function returns the register class required for a secondary
5849   register when copying between one of the registers in RCLASS, and X,
5850   using MODE.  A return value of NO_REGS means that no secondary register
5851   is required.  */
5852
5853enum reg_class
5854ia64_secondary_reload_class (enum reg_class rclass,
5855			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5856{
5857  int regno = -1;
5858
5859  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5860    regno = true_regnum (x);
5861
5862  switch (rclass)
5863    {
5864    case BR_REGS:
5865    case AR_M_REGS:
5866    case AR_I_REGS:
5867      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5868	 interaction.  We end up with two pseudos with overlapping lifetimes
5869	 both of which are equiv to the same constant, and both which need
5870	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5871	 changes depending on the path length, which means the qty_first_reg
5872	 check in make_regs_eqv can give different answers at different times.
5873	 At some point I'll probably need a reload_indi pattern to handle
5874	 this.
5875
5876	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5877	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5878	 non-general registers for good measure.  */
5879      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5880	return GR_REGS;
5881
5882      /* This is needed if a pseudo used as a call_operand gets spilled to a
5883	 stack slot.  */
5884      if (GET_CODE (x) == MEM)
5885	return GR_REGS;
5886      break;
5887
5888    case FR_REGS:
5889    case FP_REGS:
5890      /* Need to go through general registers to get to other class regs.  */
5891      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5892	return GR_REGS;
5893
5894      /* This can happen when a paradoxical subreg is an operand to the
5895	 muldi3 pattern.  */
5896      /* ??? This shouldn't be necessary after instruction scheduling is
5897	 enabled, because paradoxical subregs are not accepted by
5898	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5899	 stop the paradoxical subreg stupidity in the *_operand functions
5900	 in recog.c.  */
5901      if (GET_CODE (x) == MEM
5902	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5903	      || GET_MODE (x) == QImode))
5904	return GR_REGS;
5905
5906      /* This can happen because of the ior/and/etc patterns that accept FP
5907	 registers as operands.  If the third operand is a constant, then it
5908	 needs to be reloaded into a FP register.  */
5909      if (GET_CODE (x) == CONST_INT)
5910	return GR_REGS;
5911
5912      /* This can happen because of register elimination in a muldi3 insn.
5913	 E.g. `26107 * (unsigned long)&u'.  */
5914      if (GET_CODE (x) == PLUS)
5915	return GR_REGS;
5916      break;
5917
5918    case PR_REGS:
5919      /* ??? This happens if we cse/gcse a BImode value across a call,
5920	 and the function has a nonlocal goto.  This is because global
5921	 does not allocate call crossing pseudos to hard registers when
5922	 crtl->has_nonlocal_goto is true.  This is relatively
5923	 common for C++ programs that use exceptions.  To reproduce,
5924	 return NO_REGS and compile libstdc++.  */
5925      if (GET_CODE (x) == MEM)
5926	return GR_REGS;
5927
5928      /* This can happen when we take a BImode subreg of a DImode value,
5929	 and that DImode value winds up in some non-GR register.  */
5930      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5931	return GR_REGS;
5932      break;
5933
5934    default:
5935      break;
5936    }
5937
5938  return NO_REGS;
5939}
5940
5941
5942/* Implement targetm.unspec_may_trap_p hook.  */
5943static int
5944ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5945{
5946  switch (XINT (x, 1))
5947    {
5948    case UNSPEC_LDA:
5949    case UNSPEC_LDS:
5950    case UNSPEC_LDSA:
5951    case UNSPEC_LDCCLR:
5952    case UNSPEC_CHKACLR:
5953    case UNSPEC_CHKS:
5954      /* These unspecs are just wrappers.  */
5955      return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5956    }
5957
5958  return default_unspec_may_trap_p (x, flags);
5959}
5960
5961
5962/* Parse the -mfixed-range= option string.  */
5963
5964static void
5965fix_range (const char *const_str)
5966{
5967  int i, first, last;
5968  char *str, *dash, *comma;
5969
5970  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5971     REG2 are either register names or register numbers.  The effect
5972     of this option is to mark the registers in the range from REG1 to
5973     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5974     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5975
5976  i = strlen (const_str);
5977  str = (char *) alloca (i + 1);
5978  memcpy (str, const_str, i + 1);
5979
5980  while (1)
5981    {
5982      dash = strchr (str, '-');
5983      if (!dash)
5984	{
5985	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5986	  return;
5987	}
5988      *dash = '\0';
5989
5990      comma = strchr (dash + 1, ',');
5991      if (comma)
5992	*comma = '\0';
5993
5994      first = decode_reg_name (str);
5995      if (first < 0)
5996	{
5997	  warning (0, "unknown register name: %s", str);
5998	  return;
5999	}
6000
6001      last = decode_reg_name (dash + 1);
6002      if (last < 0)
6003	{
6004	  warning (0, "unknown register name: %s", dash + 1);
6005	  return;
6006	}
6007
6008      *dash = '-';
6009
6010      if (first > last)
6011	{
6012	  warning (0, "%s-%s is an empty range", str, dash + 1);
6013	  return;
6014	}
6015
6016      for (i = first; i <= last; ++i)
6017	fixed_regs[i] = call_used_regs[i] = 1;
6018
6019      if (!comma)
6020	break;
6021
6022      *comma = ',';
6023      str = comma + 1;
6024    }
6025}
6026
6027/* Implement TARGET_OPTION_OVERRIDE.  */
6028
6029static void
6030ia64_option_override (void)
6031{
6032  unsigned int i;
6033  cl_deferred_option *opt;
6034  vec<cl_deferred_option> *v
6035    = (vec<cl_deferred_option> *) ia64_deferred_options;
6036
6037  if (v)
6038    FOR_EACH_VEC_ELT (*v, i, opt)
6039      {
6040	switch (opt->opt_index)
6041	  {
6042	  case OPT_mfixed_range_:
6043	    fix_range (opt->arg);
6044	    break;
6045
6046	  default:
6047	    gcc_unreachable ();
6048	  }
6049      }
6050
6051  if (TARGET_AUTO_PIC)
6052    target_flags |= MASK_CONST_GP;
6053
6054  /* Numerous experiment shows that IRA based loop pressure
6055     calculation works better for RTL loop invariant motion on targets
6056     with enough (>= 32) registers.  It is an expensive optimization.
6057     So it is on only for peak performance.  */
6058  if (optimize >= 3)
6059    flag_ira_loop_pressure = 1;
6060
6061
6062  ia64_section_threshold = (global_options_set.x_g_switch_value
6063			    ? g_switch_value
6064			    : IA64_DEFAULT_GVALUE);
6065
6066  init_machine_status = ia64_init_machine_status;
6067
6068  if (align_functions <= 0)
6069    align_functions = 64;
6070  if (align_loops <= 0)
6071    align_loops = 32;
6072  if (TARGET_ABI_OPEN_VMS)
6073    flag_no_common = 1;
6074
6075  ia64_override_options_after_change();
6076}
6077
6078/* Implement targetm.override_options_after_change.  */
6079
6080static void
6081ia64_override_options_after_change (void)
6082{
6083  if (optimize >= 3
6084      && !global_options_set.x_flag_selective_scheduling
6085      && !global_options_set.x_flag_selective_scheduling2)
6086    {
6087      flag_selective_scheduling2 = 1;
6088      flag_sel_sched_pipelining = 1;
6089    }
6090  if (mflag_sched_control_spec == 2)
6091    {
6092      /* Control speculation is on by default for the selective scheduler,
6093         but not for the Haifa scheduler.  */
6094      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6095    }
6096  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6097    {
6098      /* FIXME: remove this when we'd implement breaking autoinsns as
6099         a transformation.  */
6100      flag_auto_inc_dec = 0;
6101    }
6102}
6103
6104/* Initialize the record of emitted frame related registers.  */
6105
6106void ia64_init_expanders (void)
6107{
6108  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6109}
6110
6111static struct machine_function *
6112ia64_init_machine_status (void)
6113{
6114  return ggc_cleared_alloc<machine_function> ();
6115}
6116
6117static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6118static enum attr_type ia64_safe_type (rtx_insn *);
6119
6120static enum attr_itanium_class
6121ia64_safe_itanium_class (rtx_insn *insn)
6122{
6123  if (recog_memoized (insn) >= 0)
6124    return get_attr_itanium_class (insn);
6125  else if (DEBUG_INSN_P (insn))
6126    return ITANIUM_CLASS_IGNORE;
6127  else
6128    return ITANIUM_CLASS_UNKNOWN;
6129}
6130
6131static enum attr_type
6132ia64_safe_type (rtx_insn *insn)
6133{
6134  if (recog_memoized (insn) >= 0)
6135    return get_attr_type (insn);
6136  else
6137    return TYPE_UNKNOWN;
6138}
6139
6140/* The following collection of routines emit instruction group stop bits as
6141   necessary to avoid dependencies.  */
6142
6143/* Need to track some additional registers as far as serialization is
6144   concerned so we can properly handle br.call and br.ret.  We could
6145   make these registers visible to gcc, but since these registers are
6146   never explicitly used in gcc generated code, it seems wasteful to
6147   do so (plus it would make the call and return patterns needlessly
6148   complex).  */
6149#define REG_RP		(BR_REG (0))
6150#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
6151/* This is used for volatile asms which may require a stop bit immediately
6152   before and after them.  */
6153#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
6154#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
6155#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
6156
6157/* For each register, we keep track of how it has been written in the
6158   current instruction group.
6159
6160   If a register is written unconditionally (no qualifying predicate),
6161   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6162
6163   If a register is written if its qualifying predicate P is true, we
6164   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
6165   may be written again by the complement of P (P^1) and when this happens,
6166   WRITE_COUNT gets set to 2.
6167
6168   The result of this is that whenever an insn attempts to write a register
6169   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6170
6171   If a predicate register is written by a floating-point insn, we set
6172   WRITTEN_BY_FP to true.
6173
6174   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6175   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
6176
6177#if GCC_VERSION >= 4000
6178#define RWS_FIELD_TYPE __extension__ unsigned short
6179#else
6180#define RWS_FIELD_TYPE unsigned int
6181#endif
6182struct reg_write_state
6183{
6184  RWS_FIELD_TYPE write_count : 2;
6185  RWS_FIELD_TYPE first_pred : 10;
6186  RWS_FIELD_TYPE written_by_fp : 1;
6187  RWS_FIELD_TYPE written_by_and : 1;
6188  RWS_FIELD_TYPE written_by_or : 1;
6189};
6190
6191/* Cumulative info for the current instruction group.  */
6192struct reg_write_state rws_sum[NUM_REGS];
6193#ifdef ENABLE_CHECKING
6194/* Bitmap whether a register has been written in the current insn.  */
6195HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6196			   / HOST_BITS_PER_WIDEST_FAST_INT];
6197
6198static inline void
6199rws_insn_set (int regno)
6200{
6201  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
6202  SET_HARD_REG_BIT (rws_insn, regno);
6203}
6204
6205static inline int
6206rws_insn_test (int regno)
6207{
6208  return TEST_HARD_REG_BIT (rws_insn, regno);
6209}
6210#else
6211/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
6212unsigned char rws_insn[2];
6213
6214static inline void
6215rws_insn_set (int regno)
6216{
6217  if (regno == REG_AR_CFM)
6218    rws_insn[0] = 1;
6219  else if (regno == REG_VOLATILE)
6220    rws_insn[1] = 1;
6221}
6222
6223static inline int
6224rws_insn_test (int regno)
6225{
6226  if (regno == REG_AR_CFM)
6227    return rws_insn[0];
6228  if (regno == REG_VOLATILE)
6229    return rws_insn[1];
6230  return 0;
6231}
6232#endif
6233
6234/* Indicates whether this is the first instruction after a stop bit,
6235   in which case we don't need another stop bit.  Without this,
6236   ia64_variable_issue will die when scheduling an alloc.  */
6237static int first_instruction;
6238
6239/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6240   RTL for one instruction.  */
6241struct reg_flags
6242{
6243  unsigned int is_write : 1;	/* Is register being written?  */
6244  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
6245  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
6246  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
6247  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
6248  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
6249};
6250
6251static void rws_update (int, struct reg_flags, int);
6252static int rws_access_regno (int, struct reg_flags, int);
6253static int rws_access_reg (rtx, struct reg_flags, int);
6254static void update_set_flags (rtx, struct reg_flags *);
6255static int set_src_needs_barrier (rtx, struct reg_flags, int);
6256static int rtx_needs_barrier (rtx, struct reg_flags, int);
6257static void init_insn_group_barriers (void);
6258static int group_barrier_needed (rtx_insn *);
6259static int safe_group_barrier_needed (rtx_insn *);
6260static int in_safe_group_barrier;
6261
6262/* Update *RWS for REGNO, which is being written by the current instruction,
6263   with predicate PRED, and associated register flags in FLAGS.  */
6264
6265static void
6266rws_update (int regno, struct reg_flags flags, int pred)
6267{
6268  if (pred)
6269    rws_sum[regno].write_count++;
6270  else
6271    rws_sum[regno].write_count = 2;
6272  rws_sum[regno].written_by_fp |= flags.is_fp;
6273  /* ??? Not tracking and/or across differing predicates.  */
6274  rws_sum[regno].written_by_and = flags.is_and;
6275  rws_sum[regno].written_by_or = flags.is_or;
6276  rws_sum[regno].first_pred = pred;
6277}
6278
6279/* Handle an access to register REGNO of type FLAGS using predicate register
6280   PRED.  Update rws_sum array.  Return 1 if this access creates
6281   a dependency with an earlier instruction in the same group.  */
6282
6283static int
6284rws_access_regno (int regno, struct reg_flags flags, int pred)
6285{
6286  int need_barrier = 0;
6287
6288  gcc_assert (regno < NUM_REGS);
6289
6290  if (! PR_REGNO_P (regno))
6291    flags.is_and = flags.is_or = 0;
6292
6293  if (flags.is_write)
6294    {
6295      int write_count;
6296
6297      rws_insn_set (regno);
6298      write_count = rws_sum[regno].write_count;
6299
6300      switch (write_count)
6301	{
6302	case 0:
6303	  /* The register has not been written yet.  */
6304	  if (!in_safe_group_barrier)
6305	    rws_update (regno, flags, pred);
6306	  break;
6307
6308	case 1:
6309	  /* The register has been written via a predicate.  Treat
6310	     it like a unconditional write and do not try to check
6311	     for complementary pred reg in earlier write.  */
6312	  if (flags.is_and && rws_sum[regno].written_by_and)
6313	    ;
6314	  else if (flags.is_or && rws_sum[regno].written_by_or)
6315	    ;
6316	  else
6317	    need_barrier = 1;
6318	  if (!in_safe_group_barrier)
6319	    rws_update (regno, flags, pred);
6320	  break;
6321
6322	case 2:
6323	  /* The register has been unconditionally written already.  We
6324	     need a barrier.  */
6325	  if (flags.is_and && rws_sum[regno].written_by_and)
6326	    ;
6327	  else if (flags.is_or && rws_sum[regno].written_by_or)
6328	    ;
6329	  else
6330	    need_barrier = 1;
6331	  if (!in_safe_group_barrier)
6332	    {
6333	      rws_sum[regno].written_by_and = flags.is_and;
6334	      rws_sum[regno].written_by_or = flags.is_or;
6335	    }
6336	  break;
6337
6338	default:
6339	  gcc_unreachable ();
6340	}
6341    }
6342  else
6343    {
6344      if (flags.is_branch)
6345	{
6346	  /* Branches have several RAW exceptions that allow to avoid
6347	     barriers.  */
6348
6349	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6350	    /* RAW dependencies on branch regs are permissible as long
6351	       as the writer is a non-branch instruction.  Since we
6352	       never generate code that uses a branch register written
6353	       by a branch instruction, handling this case is
6354	       easy.  */
6355	    return 0;
6356
6357	  if (REGNO_REG_CLASS (regno) == PR_REGS
6358	      && ! rws_sum[regno].written_by_fp)
6359	    /* The predicates of a branch are available within the
6360	       same insn group as long as the predicate was written by
6361	       something other than a floating-point instruction.  */
6362	    return 0;
6363	}
6364
6365      if (flags.is_and && rws_sum[regno].written_by_and)
6366	return 0;
6367      if (flags.is_or && rws_sum[regno].written_by_or)
6368	return 0;
6369
6370      switch (rws_sum[regno].write_count)
6371	{
6372	case 0:
6373	  /* The register has not been written yet.  */
6374	  break;
6375
6376	case 1:
6377	  /* The register has been written via a predicate, assume we
6378	     need a barrier (don't check for complementary regs).  */
6379	  need_barrier = 1;
6380	  break;
6381
6382	case 2:
6383	  /* The register has been unconditionally written already.  We
6384	     need a barrier.  */
6385	  need_barrier = 1;
6386	  break;
6387
6388	default:
6389	  gcc_unreachable ();
6390	}
6391    }
6392
6393  return need_barrier;
6394}
6395
6396static int
6397rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6398{
6399  int regno = REGNO (reg);
6400  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
6401
6402  if (n == 1)
6403    return rws_access_regno (regno, flags, pred);
6404  else
6405    {
6406      int need_barrier = 0;
6407      while (--n >= 0)
6408	need_barrier |= rws_access_regno (regno + n, flags, pred);
6409      return need_barrier;
6410    }
6411}
6412
6413/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6414   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6415
6416static void
6417update_set_flags (rtx x, struct reg_flags *pflags)
6418{
6419  rtx src = SET_SRC (x);
6420
6421  switch (GET_CODE (src))
6422    {
6423    case CALL:
6424      return;
6425
6426    case IF_THEN_ELSE:
6427      /* There are four cases here:
6428	 (1) The destination is (pc), in which case this is a branch,
6429	 nothing here applies.
6430	 (2) The destination is ar.lc, in which case this is a
6431	 doloop_end_internal,
6432	 (3) The destination is an fp register, in which case this is
6433	 an fselect instruction.
6434	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6435	 this is a check load.
6436	 In all cases, nothing we do in this function applies.  */
6437      return;
6438
6439    default:
6440      if (COMPARISON_P (src)
6441	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6442	/* Set pflags->is_fp to 1 so that we know we're dealing
6443	   with a floating point comparison when processing the
6444	   destination of the SET.  */
6445	pflags->is_fp = 1;
6446
6447      /* Discover if this is a parallel comparison.  We only handle
6448	 and.orcm and or.andcm at present, since we must retain a
6449	 strict inverse on the predicate pair.  */
6450      else if (GET_CODE (src) == AND)
6451	pflags->is_and = 1;
6452      else if (GET_CODE (src) == IOR)
6453	pflags->is_or = 1;
6454
6455      break;
6456    }
6457}
6458
6459/* Subroutine of rtx_needs_barrier; this function determines whether the
6460   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6461   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6462   for this insn.  */
6463
6464static int
6465set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6466{
6467  int need_barrier = 0;
6468  rtx dst;
6469  rtx src = SET_SRC (x);
6470
6471  if (GET_CODE (src) == CALL)
6472    /* We don't need to worry about the result registers that
6473       get written by subroutine call.  */
6474    return rtx_needs_barrier (src, flags, pred);
6475  else if (SET_DEST (x) == pc_rtx)
6476    {
6477      /* X is a conditional branch.  */
6478      /* ??? This seems redundant, as the caller sets this bit for
6479	 all JUMP_INSNs.  */
6480      if (!ia64_spec_check_src_p (src))
6481	flags.is_branch = 1;
6482      return rtx_needs_barrier (src, flags, pred);
6483    }
6484
6485  if (ia64_spec_check_src_p (src))
6486    /* Avoid checking one register twice (in condition
6487       and in 'then' section) for ldc pattern.  */
6488    {
6489      gcc_assert (REG_P (XEXP (src, 2)));
6490      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6491
6492      /* We process MEM below.  */
6493      src = XEXP (src, 1);
6494    }
6495
6496  need_barrier |= rtx_needs_barrier (src, flags, pred);
6497
6498  dst = SET_DEST (x);
6499  if (GET_CODE (dst) == ZERO_EXTRACT)
6500    {
6501      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6502      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6503    }
6504  return need_barrier;
6505}
6506
6507/* Handle an access to rtx X of type FLAGS using predicate register
6508   PRED.  Return 1 if this access creates a dependency with an earlier
6509   instruction in the same group.  */
6510
6511static int
6512rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6513{
6514  int i, j;
6515  int is_complemented = 0;
6516  int need_barrier = 0;
6517  const char *format_ptr;
6518  struct reg_flags new_flags;
6519  rtx cond;
6520
6521  if (! x)
6522    return 0;
6523
6524  new_flags = flags;
6525
6526  switch (GET_CODE (x))
6527    {
6528    case SET:
6529      update_set_flags (x, &new_flags);
6530      need_barrier = set_src_needs_barrier (x, new_flags, pred);
6531      if (GET_CODE (SET_SRC (x)) != CALL)
6532	{
6533	  new_flags.is_write = 1;
6534	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6535	}
6536      break;
6537
6538    case CALL:
6539      new_flags.is_write = 0;
6540      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6541
6542      /* Avoid multiple register writes, in case this is a pattern with
6543	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6544      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6545	{
6546	  new_flags.is_write = 1;
6547	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6548	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6549	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6550	}
6551      break;
6552
6553    case COND_EXEC:
6554      /* X is a predicated instruction.  */
6555
6556      cond = COND_EXEC_TEST (x);
6557      gcc_assert (!pred);
6558      need_barrier = rtx_needs_barrier (cond, flags, 0);
6559
6560      if (GET_CODE (cond) == EQ)
6561	is_complemented = 1;
6562      cond = XEXP (cond, 0);
6563      gcc_assert (GET_CODE (cond) == REG
6564		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6565      pred = REGNO (cond);
6566      if (is_complemented)
6567	++pred;
6568
6569      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6570      return need_barrier;
6571
6572    case CLOBBER:
6573    case USE:
6574      /* Clobber & use are for earlier compiler-phases only.  */
6575      break;
6576
6577    case ASM_OPERANDS:
6578    case ASM_INPUT:
6579      /* We always emit stop bits for traditional asms.  We emit stop bits
6580	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6581      if (GET_CODE (x) != ASM_OPERANDS
6582	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6583	{
6584	  /* Avoid writing the register multiple times if we have multiple
6585	     asm outputs.  This avoids a failure in rws_access_reg.  */
6586	  if (! rws_insn_test (REG_VOLATILE))
6587	    {
6588	      new_flags.is_write = 1;
6589	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6590	    }
6591	  return 1;
6592	}
6593
6594      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6595	 We cannot just fall through here since then we would be confused
6596	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6597	 traditional asms unlike their normal usage.  */
6598
6599      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6600	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6601	  need_barrier = 1;
6602      break;
6603
6604    case PARALLEL:
6605      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6606	{
6607	  rtx pat = XVECEXP (x, 0, i);
6608	  switch (GET_CODE (pat))
6609	    {
6610	    case SET:
6611	      update_set_flags (pat, &new_flags);
6612	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6613	      break;
6614
6615	    case USE:
6616	    case CALL:
6617	    case ASM_OPERANDS:
6618	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6619	      break;
6620
6621	    case CLOBBER:
6622	      if (REG_P (XEXP (pat, 0))
6623		  && extract_asm_operands (x) != NULL_RTX
6624		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6625		{
6626		  new_flags.is_write = 1;
6627		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6628						     new_flags, pred);
6629		  new_flags = flags;
6630		}
6631	      break;
6632
6633	    case RETURN:
6634	      break;
6635
6636	    default:
6637	      gcc_unreachable ();
6638	    }
6639	}
6640      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6641	{
6642	  rtx pat = XVECEXP (x, 0, i);
6643	  if (GET_CODE (pat) == SET)
6644	    {
6645	      if (GET_CODE (SET_SRC (pat)) != CALL)
6646		{
6647		  new_flags.is_write = 1;
6648		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6649						     pred);
6650		}
6651	    }
6652	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6653	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6654	}
6655      break;
6656
6657    case SUBREG:
6658      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6659      break;
6660    case REG:
6661      if (REGNO (x) == AR_UNAT_REGNUM)
6662	{
6663	  for (i = 0; i < 64; ++i)
6664	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6665	}
6666      else
6667	need_barrier = rws_access_reg (x, flags, pred);
6668      break;
6669
6670    case MEM:
6671      /* Find the regs used in memory address computation.  */
6672      new_flags.is_write = 0;
6673      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6674      break;
6675
6676    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6677    case SYMBOL_REF:  case LABEL_REF:     case CONST:
6678      break;
6679
6680      /* Operators with side-effects.  */
6681    case POST_INC:    case POST_DEC:
6682      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6683
6684      new_flags.is_write = 0;
6685      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6686      new_flags.is_write = 1;
6687      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6688      break;
6689
6690    case POST_MODIFY:
6691      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6692
6693      new_flags.is_write = 0;
6694      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6695      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6696      new_flags.is_write = 1;
6697      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6698      break;
6699
6700      /* Handle common unary and binary ops for efficiency.  */
6701    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6702    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6703    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6704    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6705    case NE:       case EQ:      case GE:      case GT:        case LE:
6706    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6707      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6708      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6709      break;
6710
6711    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6712    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6713    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6714    case SQRT:     case FFS:		case POPCOUNT:
6715      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6716      break;
6717
6718    case VEC_SELECT:
6719      /* VEC_SELECT's second argument is a PARALLEL with integers that
6720	 describe the elements selected.  On ia64, those integers are
6721	 always constants.  Avoid walking the PARALLEL so that we don't
6722	 get confused with "normal" parallels and then die.  */
6723      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6724      break;
6725
6726    case UNSPEC:
6727      switch (XINT (x, 1))
6728	{
6729	case UNSPEC_LTOFF_DTPMOD:
6730	case UNSPEC_LTOFF_DTPREL:
6731	case UNSPEC_DTPREL:
6732	case UNSPEC_LTOFF_TPREL:
6733	case UNSPEC_TPREL:
6734	case UNSPEC_PRED_REL_MUTEX:
6735	case UNSPEC_PIC_CALL:
6736        case UNSPEC_MF:
6737        case UNSPEC_FETCHADD_ACQ:
6738        case UNSPEC_FETCHADD_REL:
6739	case UNSPEC_BSP_VALUE:
6740	case UNSPEC_FLUSHRS:
6741	case UNSPEC_BUNDLE_SELECTOR:
6742          break;
6743
6744	case UNSPEC_GR_SPILL:
6745	case UNSPEC_GR_RESTORE:
6746	  {
6747	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6748	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6749
6750	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6751	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6752	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6753					      new_flags, pred);
6754	    break;
6755	  }
6756
6757	case UNSPEC_FR_SPILL:
6758	case UNSPEC_FR_RESTORE:
6759	case UNSPEC_GETF_EXP:
6760	case UNSPEC_SETF_EXP:
6761        case UNSPEC_ADDP4:
6762	case UNSPEC_FR_SQRT_RECIP_APPROX:
6763	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6764	case UNSPEC_LDA:
6765	case UNSPEC_LDS:
6766	case UNSPEC_LDS_A:
6767	case UNSPEC_LDSA:
6768	case UNSPEC_CHKACLR:
6769        case UNSPEC_CHKS:
6770	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6771	  break;
6772
6773	case UNSPEC_FR_RECIP_APPROX:
6774	case UNSPEC_SHRP:
6775	case UNSPEC_COPYSIGN:
6776	case UNSPEC_FR_RECIP_APPROX_RES:
6777	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6778	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6779	  break;
6780
6781        case UNSPEC_CMPXCHG_ACQ:
6782        case UNSPEC_CMPXCHG_REL:
6783	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6784	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6785	  break;
6786
6787	default:
6788	  gcc_unreachable ();
6789	}
6790      break;
6791
6792    case UNSPEC_VOLATILE:
6793      switch (XINT (x, 1))
6794	{
6795	case UNSPECV_ALLOC:
6796	  /* Alloc must always be the first instruction of a group.
6797	     We force this by always returning true.  */
6798	  /* ??? We might get better scheduling if we explicitly check for
6799	     input/local/output register dependencies, and modify the
6800	     scheduler so that alloc is always reordered to the start of
6801	     the current group.  We could then eliminate all of the
6802	     first_instruction code.  */
6803	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6804
6805	  new_flags.is_write = 1;
6806	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6807	  return 1;
6808
6809	case UNSPECV_SET_BSP:
6810	case UNSPECV_PROBE_STACK_RANGE:
6811	  need_barrier = 1;
6812          break;
6813
6814	case UNSPECV_BLOCKAGE:
6815	case UNSPECV_INSN_GROUP_BARRIER:
6816	case UNSPECV_BREAK:
6817	case UNSPECV_PSAC_ALL:
6818	case UNSPECV_PSAC_NORMAL:
6819	  return 0;
6820
6821	case UNSPECV_PROBE_STACK_ADDRESS:
6822	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6823	  break;
6824
6825	default:
6826	  gcc_unreachable ();
6827	}
6828      break;
6829
6830    case RETURN:
6831      new_flags.is_write = 0;
6832      need_barrier  = rws_access_regno (REG_RP, flags, pred);
6833      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6834
6835      new_flags.is_write = 1;
6836      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6837      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6838      break;
6839
6840    default:
6841      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6842      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6843	switch (format_ptr[i])
6844	  {
6845	  case '0':	/* unused field */
6846	  case 'i':	/* integer */
6847	  case 'n':	/* note */
6848	  case 'w':	/* wide integer */
6849	  case 's':	/* pointer to string */
6850	  case 'S':	/* optional pointer to string */
6851	    break;
6852
6853	  case 'e':
6854	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6855	      need_barrier = 1;
6856	    break;
6857
6858	  case 'E':
6859	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6860	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6861		need_barrier = 1;
6862	    break;
6863
6864	  default:
6865	    gcc_unreachable ();
6866	  }
6867      break;
6868    }
6869  return need_barrier;
6870}
6871
6872/* Clear out the state for group_barrier_needed at the start of a
6873   sequence of insns.  */
6874
6875static void
6876init_insn_group_barriers (void)
6877{
6878  memset (rws_sum, 0, sizeof (rws_sum));
6879  first_instruction = 1;
6880}
6881
6882/* Given the current state, determine whether a group barrier (a stop bit) is
6883   necessary before INSN.  Return nonzero if so.  This modifies the state to
6884   include the effects of INSN as a side-effect.  */
6885
6886static int
6887group_barrier_needed (rtx_insn *insn)
6888{
6889  rtx pat;
6890  int need_barrier = 0;
6891  struct reg_flags flags;
6892
6893  memset (&flags, 0, sizeof (flags));
6894  switch (GET_CODE (insn))
6895    {
6896    case NOTE:
6897    case DEBUG_INSN:
6898      break;
6899
6900    case BARRIER:
6901      /* A barrier doesn't imply an instruction group boundary.  */
6902      break;
6903
6904    case CODE_LABEL:
6905      memset (rws_insn, 0, sizeof (rws_insn));
6906      return 1;
6907
6908    case CALL_INSN:
6909      flags.is_branch = 1;
6910      flags.is_sibcall = SIBLING_CALL_P (insn);
6911      memset (rws_insn, 0, sizeof (rws_insn));
6912
6913      /* Don't bundle a call following another call.  */
6914      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6915	{
6916	  need_barrier = 1;
6917	  break;
6918	}
6919
6920      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6921      break;
6922
6923    case JUMP_INSN:
6924      if (!ia64_spec_check_p (insn))
6925	flags.is_branch = 1;
6926
6927      /* Don't bundle a jump following a call.  */
6928      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6929	{
6930	  need_barrier = 1;
6931	  break;
6932	}
6933      /* FALLTHRU */
6934
6935    case INSN:
6936      if (GET_CODE (PATTERN (insn)) == USE
6937	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6938	/* Don't care about USE and CLOBBER "insns"---those are used to
6939	   indicate to the optimizer that it shouldn't get rid of
6940	   certain operations.  */
6941	break;
6942
6943      pat = PATTERN (insn);
6944
6945      /* Ug.  Hack hacks hacked elsewhere.  */
6946      switch (recog_memoized (insn))
6947	{
6948	  /* We play dependency tricks with the epilogue in order
6949	     to get proper schedules.  Undo this for dv analysis.  */
6950	case CODE_FOR_epilogue_deallocate_stack:
6951	case CODE_FOR_prologue_allocate_stack:
6952	  pat = XVECEXP (pat, 0, 0);
6953	  break;
6954
6955	  /* The pattern we use for br.cloop confuses the code above.
6956	     The second element of the vector is representative.  */
6957	case CODE_FOR_doloop_end_internal:
6958	  pat = XVECEXP (pat, 0, 1);
6959	  break;
6960
6961	  /* Doesn't generate code.  */
6962	case CODE_FOR_pred_rel_mutex:
6963	case CODE_FOR_prologue_use:
6964	  return 0;
6965
6966	default:
6967	  break;
6968	}
6969
6970      memset (rws_insn, 0, sizeof (rws_insn));
6971      need_barrier = rtx_needs_barrier (pat, flags, 0);
6972
6973      /* Check to see if the previous instruction was a volatile
6974	 asm.  */
6975      if (! need_barrier)
6976	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6977
6978      break;
6979
6980    default:
6981      gcc_unreachable ();
6982    }
6983
6984  if (first_instruction && important_for_bundling_p (insn))
6985    {
6986      need_barrier = 0;
6987      first_instruction = 0;
6988    }
6989
6990  return need_barrier;
6991}
6992
6993/* Like group_barrier_needed, but do not clobber the current state.  */
6994
6995static int
6996safe_group_barrier_needed (rtx_insn *insn)
6997{
6998  int saved_first_instruction;
6999  int t;
7000
7001  saved_first_instruction = first_instruction;
7002  in_safe_group_barrier = 1;
7003
7004  t = group_barrier_needed (insn);
7005
7006  first_instruction = saved_first_instruction;
7007  in_safe_group_barrier = 0;
7008
7009  return t;
7010}
7011
7012/* Scan the current function and insert stop bits as necessary to
7013   eliminate dependencies.  This function assumes that a final
7014   instruction scheduling pass has been run which has already
7015   inserted most of the necessary stop bits.  This function only
7016   inserts new ones at basic block boundaries, since these are
7017   invisible to the scheduler.  */
7018
7019static void
7020emit_insn_group_barriers (FILE *dump)
7021{
7022  rtx_insn *insn;
7023  rtx_insn *last_label = 0;
7024  int insns_since_last_label = 0;
7025
7026  init_insn_group_barriers ();
7027
7028  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7029    {
7030      if (LABEL_P (insn))
7031	{
7032	  if (insns_since_last_label)
7033	    last_label = insn;
7034	  insns_since_last_label = 0;
7035	}
7036      else if (NOTE_P (insn)
7037	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7038	{
7039	  if (insns_since_last_label)
7040	    last_label = insn;
7041	  insns_since_last_label = 0;
7042	}
7043      else if (NONJUMP_INSN_P (insn)
7044	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7045	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7046	{
7047	  init_insn_group_barriers ();
7048	  last_label = 0;
7049	}
7050      else if (NONDEBUG_INSN_P (insn))
7051	{
7052	  insns_since_last_label = 1;
7053
7054	  if (group_barrier_needed (insn))
7055	    {
7056	      if (last_label)
7057		{
7058		  if (dump)
7059		    fprintf (dump, "Emitting stop before label %d\n",
7060			     INSN_UID (last_label));
7061		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7062		  insn = last_label;
7063
7064		  init_insn_group_barriers ();
7065		  last_label = 0;
7066		}
7067	    }
7068	}
7069    }
7070}
7071
7072/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7073   This function has to emit all necessary group barriers.  */
7074
7075static void
7076emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7077{
7078  rtx_insn *insn;
7079
7080  init_insn_group_barriers ();
7081
7082  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7083    {
7084      if (BARRIER_P (insn))
7085	{
7086	  rtx_insn *last = prev_active_insn (insn);
7087
7088	  if (! last)
7089	    continue;
7090	  if (JUMP_TABLE_DATA_P (last))
7091	    last = prev_active_insn (last);
7092	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7093	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7094
7095	  init_insn_group_barriers ();
7096	}
7097      else if (NONDEBUG_INSN_P (insn))
7098	{
7099	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7100	    init_insn_group_barriers ();
7101	  else if (group_barrier_needed (insn))
7102	    {
7103	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7104	      init_insn_group_barriers ();
7105	      group_barrier_needed (insn);
7106	    }
7107	}
7108    }
7109}
7110
7111
7112
7113/* Instruction scheduling support.  */
7114
7115#define NR_BUNDLES 10
7116
7117/* A list of names of all available bundles.  */
7118
7119static const char *bundle_name [NR_BUNDLES] =
7120{
7121  ".mii",
7122  ".mmi",
7123  ".mfi",
7124  ".mmf",
7125#if NR_BUNDLES == 10
7126  ".bbb",
7127  ".mbb",
7128#endif
7129  ".mib",
7130  ".mmb",
7131  ".mfb",
7132  ".mlx"
7133};
7134
7135/* Nonzero if we should insert stop bits into the schedule.  */
7136
7137int ia64_final_schedule = 0;
7138
7139/* Codes of the corresponding queried units: */
7140
7141static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7142static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7143
7144static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7145static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7146
7147static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7148
7149/* The following variable value is an insn group barrier.  */
7150
7151static rtx_insn *dfa_stop_insn;
7152
7153/* The following variable value is the last issued insn.  */
7154
7155static rtx_insn *last_scheduled_insn;
7156
7157/* The following variable value is pointer to a DFA state used as
7158   temporary variable.  */
7159
7160static state_t temp_dfa_state = NULL;
7161
7162/* The following variable value is DFA state after issuing the last
7163   insn.  */
7164
7165static state_t prev_cycle_state = NULL;
7166
7167/* The following array element values are TRUE if the corresponding
7168   insn requires to add stop bits before it.  */
7169
7170static char *stops_p = NULL;
7171
7172/* The following variable is used to set up the mentioned above array.  */
7173
7174static int stop_before_p = 0;
7175
7176/* The following variable value is length of the arrays `clocks' and
7177   `add_cycles'. */
7178
7179static int clocks_length;
7180
7181/* The following variable value is number of data speculations in progress.  */
7182static int pending_data_specs = 0;
7183
7184/* Number of memory references on current and three future processor cycles.  */
7185static char mem_ops_in_group[4];
7186
7187/* Number of current processor cycle (from scheduler's point of view).  */
7188static int current_cycle;
7189
7190static rtx ia64_single_set (rtx_insn *);
7191static void ia64_emit_insn_before (rtx, rtx);
7192
7193/* Map a bundle number to its pseudo-op.  */
7194
7195const char *
7196get_bundle_name (int b)
7197{
7198  return bundle_name[b];
7199}
7200
7201
7202/* Return the maximum number of instructions a cpu can issue.  */
7203
7204static int
7205ia64_issue_rate (void)
7206{
7207  return 6;
7208}
7209
7210/* Helper function - like single_set, but look inside COND_EXEC.  */
7211
7212static rtx
7213ia64_single_set (rtx_insn *insn)
7214{
7215  rtx x = PATTERN (insn), ret;
7216  if (GET_CODE (x) == COND_EXEC)
7217    x = COND_EXEC_CODE (x);
7218  if (GET_CODE (x) == SET)
7219    return x;
7220
7221  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7222     Although they are not classical single set, the second set is there just
7223     to protect it from moving past FP-relative stack accesses.  */
7224  switch (recog_memoized (insn))
7225    {
7226    case CODE_FOR_prologue_allocate_stack:
7227    case CODE_FOR_prologue_allocate_stack_pr:
7228    case CODE_FOR_epilogue_deallocate_stack:
7229    case CODE_FOR_epilogue_deallocate_stack_pr:
7230      ret = XVECEXP (x, 0, 0);
7231      break;
7232
7233    default:
7234      ret = single_set_2 (insn, x);
7235      break;
7236    }
7237
7238  return ret;
7239}
7240
7241/* Adjust the cost of a scheduling dependency.
7242   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7243   COST is the current cost, DW is dependency weakness.  */
7244static int
7245ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7246		    int cost, dw_t dw)
7247{
7248  enum reg_note dep_type = (enum reg_note) dep_type1;
7249  enum attr_itanium_class dep_class;
7250  enum attr_itanium_class insn_class;
7251
7252  insn_class = ia64_safe_itanium_class (insn);
7253  dep_class = ia64_safe_itanium_class (dep_insn);
7254
7255  /* Treat true memory dependencies separately.  Ignore apparent true
7256     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7257  if (dep_type == REG_DEP_TRUE
7258      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7259      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7260    return 0;
7261
7262  if (dw == MIN_DEP_WEAK)
7263    /* Store and load are likely to alias, use higher cost to avoid stall.  */
7264    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
7265  else if (dw > MIN_DEP_WEAK)
7266    {
7267      /* Store and load are less likely to alias.  */
7268      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7269	/* Assume there will be no cache conflict for floating-point data.
7270	   For integer data, L1 conflict penalty is huge (17 cycles), so we
7271	   never assume it will not cause a conflict.  */
7272	return 0;
7273      else
7274	return cost;
7275    }
7276
7277  if (dep_type != REG_DEP_OUTPUT)
7278    return cost;
7279
7280  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7281      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7282    return 0;
7283
7284  return cost;
7285}
7286
7287/* Like emit_insn_before, but skip cycle_display notes.
7288   ??? When cycle display notes are implemented, update this.  */
7289
7290static void
7291ia64_emit_insn_before (rtx insn, rtx before)
7292{
7293  emit_insn_before (insn, before);
7294}
7295
7296/* The following function marks insns who produce addresses for load
7297   and store insns.  Such insns will be placed into M slots because it
7298   decrease latency time for Itanium1 (see function
7299   `ia64_produce_address_p' and the DFA descriptions).  */
7300
7301static void
7302ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7303{
7304  rtx_insn *insn, *next, *next_tail;
7305
7306  /* Before reload, which_alternative is not set, which means that
7307     ia64_safe_itanium_class will produce wrong results for (at least)
7308     move instructions.  */
7309  if (!reload_completed)
7310    return;
7311
7312  next_tail = NEXT_INSN (tail);
7313  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7314    if (INSN_P (insn))
7315      insn->call = 0;
7316  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7317    if (INSN_P (insn)
7318	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7319      {
7320	sd_iterator_def sd_it;
7321	dep_t dep;
7322	bool has_mem_op_consumer_p = false;
7323
7324	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7325	  {
7326	    enum attr_itanium_class c;
7327
7328	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
7329	      continue;
7330
7331	    next = DEP_CON (dep);
7332	    c = ia64_safe_itanium_class (next);
7333	    if ((c == ITANIUM_CLASS_ST
7334		 || c == ITANIUM_CLASS_STF)
7335		&& ia64_st_address_bypass_p (insn, next))
7336	      {
7337		has_mem_op_consumer_p = true;
7338		break;
7339	      }
7340	    else if ((c == ITANIUM_CLASS_LD
7341		      || c == ITANIUM_CLASS_FLD
7342		      || c == ITANIUM_CLASS_FLDP)
7343		     && ia64_ld_address_bypass_p (insn, next))
7344	      {
7345		has_mem_op_consumer_p = true;
7346		break;
7347	      }
7348	  }
7349
7350	insn->call = has_mem_op_consumer_p;
7351      }
7352}
7353
7354/* We're beginning a new block.  Initialize data structures as necessary.  */
7355
7356static void
7357ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7358		 int sched_verbose ATTRIBUTE_UNUSED,
7359		 int max_ready ATTRIBUTE_UNUSED)
7360{
7361#ifdef ENABLE_CHECKING
7362  rtx_insn *insn;
7363
7364  if (!sel_sched_p () && reload_completed)
7365    for (insn = NEXT_INSN (current_sched_info->prev_head);
7366	 insn != current_sched_info->next_tail;
7367	 insn = NEXT_INSN (insn))
7368      gcc_assert (!SCHED_GROUP_P (insn));
7369#endif
7370  last_scheduled_insn = NULL;
7371  init_insn_group_barriers ();
7372
7373  current_cycle = 0;
7374  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7375}
7376
7377/* We're beginning a scheduling pass.  Check assertion.  */
7378
7379static void
7380ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7381                        int sched_verbose ATTRIBUTE_UNUSED,
7382                        int max_ready ATTRIBUTE_UNUSED)
7383{
7384  gcc_assert (pending_data_specs == 0);
7385}
7386
7387/* Scheduling pass is now finished.  Free/reset static variable.  */
7388static void
7389ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7390			  int sched_verbose ATTRIBUTE_UNUSED)
7391{
7392  gcc_assert (pending_data_specs == 0);
7393}
7394
7395/* Return TRUE if INSN is a load (either normal or speculative, but not a
7396   speculation check), FALSE otherwise.  */
7397static bool
7398is_load_p (rtx_insn *insn)
7399{
7400  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7401
7402  return
7403   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7404    && get_attr_check_load (insn) == CHECK_LOAD_NO);
7405}
7406
7407/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7408   (taking account for 3-cycle cache reference postponing for stores: Intel
7409   Itanium 2 Reference Manual for Software Development and Optimization,
7410   6.7.3.1).  */
7411static void
7412record_memory_reference (rtx_insn *insn)
7413{
7414  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7415
7416  switch (insn_class) {
7417    case ITANIUM_CLASS_FLD:
7418    case ITANIUM_CLASS_LD:
7419      mem_ops_in_group[current_cycle % 4]++;
7420      break;
7421    case ITANIUM_CLASS_STF:
7422    case ITANIUM_CLASS_ST:
7423      mem_ops_in_group[(current_cycle + 3) % 4]++;
7424      break;
7425    default:;
7426  }
7427}
7428
7429/* We are about to being issuing insns for this clock cycle.
7430   Override the default sort algorithm to better slot instructions.  */
7431
7432static int
7433ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7434			int *pn_ready, int clock_var,
7435			int reorder_type)
7436{
7437  int n_asms;
7438  int n_ready = *pn_ready;
7439  rtx_insn **e_ready = ready + n_ready;
7440  rtx_insn **insnp;
7441
7442  if (sched_verbose)
7443    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7444
7445  if (reorder_type == 0)
7446    {
7447      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7448      n_asms = 0;
7449      for (insnp = ready; insnp < e_ready; insnp++)
7450	if (insnp < e_ready)
7451	  {
7452	    rtx_insn *insn = *insnp;
7453	    enum attr_type t = ia64_safe_type (insn);
7454	    if (t == TYPE_UNKNOWN)
7455	      {
7456		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7457		    || asm_noperands (PATTERN (insn)) >= 0)
7458		  {
7459		    rtx_insn *lowest = ready[n_asms];
7460		    ready[n_asms] = insn;
7461		    *insnp = lowest;
7462		    n_asms++;
7463		  }
7464		else
7465		  {
7466		    rtx_insn *highest = ready[n_ready - 1];
7467		    ready[n_ready - 1] = insn;
7468		    *insnp = highest;
7469		    return 1;
7470		  }
7471	      }
7472	  }
7473
7474      if (n_asms < n_ready)
7475	{
7476	  /* Some normal insns to process.  Skip the asms.  */
7477	  ready += n_asms;
7478	  n_ready -= n_asms;
7479	}
7480      else if (n_ready > 0)
7481	return 1;
7482    }
7483
7484  if (ia64_final_schedule)
7485    {
7486      int deleted = 0;
7487      int nr_need_stop = 0;
7488
7489      for (insnp = ready; insnp < e_ready; insnp++)
7490	if (safe_group_barrier_needed (*insnp))
7491	  nr_need_stop++;
7492
7493      if (reorder_type == 1 && n_ready == nr_need_stop)
7494	return 0;
7495      if (reorder_type == 0)
7496	return 1;
7497      insnp = e_ready;
7498      /* Move down everything that needs a stop bit, preserving
7499	 relative order.  */
7500      while (insnp-- > ready + deleted)
7501	while (insnp >= ready + deleted)
7502	  {
7503	    rtx_insn *insn = *insnp;
7504	    if (! safe_group_barrier_needed (insn))
7505	      break;
7506	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7507	    *ready = insn;
7508	    deleted++;
7509	  }
7510      n_ready -= deleted;
7511      ready += deleted;
7512    }
7513
7514  current_cycle = clock_var;
7515  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7516    {
7517      int moved = 0;
7518
7519      insnp = e_ready;
7520      /* Move down loads/stores, preserving relative order.  */
7521      while (insnp-- > ready + moved)
7522	while (insnp >= ready + moved)
7523	  {
7524	    rtx_insn *insn = *insnp;
7525	    if (! is_load_p (insn))
7526	      break;
7527	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7528	    *ready = insn;
7529	    moved++;
7530	  }
7531      n_ready -= moved;
7532      ready += moved;
7533    }
7534
7535  return 1;
7536}
7537
7538/* We are about to being issuing insns for this clock cycle.  Override
7539   the default sort algorithm to better slot instructions.  */
7540
7541static int
7542ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7543		    int *pn_ready, int clock_var)
7544{
7545  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7546				 pn_ready, clock_var, 0);
7547}
7548
7549/* Like ia64_sched_reorder, but called after issuing each insn.
7550   Override the default sort algorithm to better slot instructions.  */
7551
7552static int
7553ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7554		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7555		     int *pn_ready, int clock_var)
7556{
7557  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7558				 clock_var, 1);
7559}
7560
7561/* We are about to issue INSN.  Return the number of insns left on the
7562   ready queue that can be issued this cycle.  */
7563
7564static int
7565ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7566		     int sched_verbose ATTRIBUTE_UNUSED,
7567		     rtx_insn *insn,
7568		     int can_issue_more ATTRIBUTE_UNUSED)
7569{
7570  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7571    /* Modulo scheduling does not extend h_i_d when emitting
7572       new instructions.  Don't use h_i_d, if we don't have to.  */
7573    {
7574      if (DONE_SPEC (insn) & BEGIN_DATA)
7575	pending_data_specs++;
7576      if (CHECK_SPEC (insn) & BEGIN_DATA)
7577	pending_data_specs--;
7578    }
7579
7580  if (DEBUG_INSN_P (insn))
7581    return 1;
7582
7583  last_scheduled_insn = insn;
7584  memcpy (prev_cycle_state, curr_state, dfa_state_size);
7585  if (reload_completed)
7586    {
7587      int needed = group_barrier_needed (insn);
7588
7589      gcc_assert (!needed);
7590      if (CALL_P (insn))
7591	init_insn_group_barriers ();
7592      stops_p [INSN_UID (insn)] = stop_before_p;
7593      stop_before_p = 0;
7594
7595      record_memory_reference (insn);
7596    }
7597  return 1;
7598}
7599
7600/* We are choosing insn from the ready queue.  Return zero if INSN
7601   can be chosen.  */
7602
7603static int
7604ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7605{
7606  gcc_assert (insn && INSN_P (insn));
7607
7608  /* Size of ALAT is 32.  As far as we perform conservative
7609     data speculation, we keep ALAT half-empty.  */
7610  if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7611    return ready_index == 0 ? -1 : 1;
7612
7613  if (ready_index == 0)
7614    return 0;
7615
7616  if ((!reload_completed
7617       || !safe_group_barrier_needed (insn))
7618      && (!mflag_sched_mem_insns_hard_limit
7619	  || !is_load_p (insn)
7620	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7621    return 0;
7622
7623  return 1;
7624}
7625
7626/* The following variable value is pseudo-insn used by the DFA insn
7627   scheduler to change the DFA state when the simulated clock is
7628   increased.  */
7629
7630static rtx_insn *dfa_pre_cycle_insn;
7631
7632/* Returns 1 when a meaningful insn was scheduled between the last group
7633   barrier and LAST.  */
7634static int
7635scheduled_good_insn (rtx_insn *last)
7636{
7637  if (last && recog_memoized (last) >= 0)
7638    return 1;
7639
7640  for ( ;
7641       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7642       && !stops_p[INSN_UID (last)];
7643       last = PREV_INSN (last))
7644    /* We could hit a NOTE_INSN_DELETED here which is actually outside
7645       the ebb we're scheduling.  */
7646    if (INSN_P (last) && recog_memoized (last) >= 0)
7647      return 1;
7648
7649  return 0;
7650}
7651
7652/* We are about to being issuing INSN.  Return nonzero if we cannot
7653   issue it on given cycle CLOCK and return zero if we should not sort
7654   the ready queue on the next clock start.  */
7655
7656static int
7657ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7658		    int clock, int *sort_p)
7659{
7660  gcc_assert (insn && INSN_P (insn));
7661
7662  if (DEBUG_INSN_P (insn))
7663    return 0;
7664
7665  /* When a group barrier is needed for insn, last_scheduled_insn
7666     should be set.  */
7667  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7668              || last_scheduled_insn);
7669
7670  if ((reload_completed
7671       && (safe_group_barrier_needed (insn)
7672	   || (mflag_sched_stop_bits_after_every_cycle
7673	       && last_clock != clock
7674	       && last_scheduled_insn
7675	       && scheduled_good_insn (last_scheduled_insn))))
7676      || (last_scheduled_insn
7677	  && (CALL_P (last_scheduled_insn)
7678	      || unknown_for_bundling_p (last_scheduled_insn))))
7679    {
7680      init_insn_group_barriers ();
7681
7682      if (verbose && dump)
7683	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7684		 last_clock == clock ? " + cycle advance" : "");
7685
7686      stop_before_p = 1;
7687      current_cycle = clock;
7688      mem_ops_in_group[current_cycle % 4] = 0;
7689
7690      if (last_clock == clock)
7691	{
7692	  state_transition (curr_state, dfa_stop_insn);
7693	  if (TARGET_EARLY_STOP_BITS)
7694	    *sort_p = (last_scheduled_insn == NULL_RTX
7695		       || ! CALL_P (last_scheduled_insn));
7696	  else
7697	    *sort_p = 0;
7698	  return 1;
7699	}
7700
7701      if (last_scheduled_insn)
7702	{
7703	  if (unknown_for_bundling_p (last_scheduled_insn))
7704	    state_reset (curr_state);
7705	  else
7706	    {
7707	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7708	      state_transition (curr_state, dfa_stop_insn);
7709	      state_transition (curr_state, dfa_pre_cycle_insn);
7710	      state_transition (curr_state, NULL);
7711	    }
7712	}
7713    }
7714  return 0;
7715}
7716
7717/* Implement targetm.sched.h_i_d_extended hook.
7718   Extend internal data structures.  */
7719static void
7720ia64_h_i_d_extended (void)
7721{
7722  if (stops_p != NULL)
7723    {
7724      int new_clocks_length = get_max_uid () * 3 / 2;
7725      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7726      clocks_length = new_clocks_length;
7727    }
7728}
7729
7730
7731/* This structure describes the data used by the backend to guide scheduling.
7732   When the current scheduling point is switched, this data should be saved
7733   and restored later, if the scheduler returns to this point.  */
7734struct _ia64_sched_context
7735{
7736  state_t prev_cycle_state;
7737  rtx_insn *last_scheduled_insn;
7738  struct reg_write_state rws_sum[NUM_REGS];
7739  struct reg_write_state rws_insn[NUM_REGS];
7740  int first_instruction;
7741  int pending_data_specs;
7742  int current_cycle;
7743  char mem_ops_in_group[4];
7744};
7745typedef struct _ia64_sched_context *ia64_sched_context_t;
7746
7747/* Allocates a scheduling context.  */
7748static void *
7749ia64_alloc_sched_context (void)
7750{
7751  return xmalloc (sizeof (struct _ia64_sched_context));
7752}
7753
7754/* Initializes the _SC context with clean data, if CLEAN_P, and from
7755   the global context otherwise.  */
7756static void
7757ia64_init_sched_context (void *_sc, bool clean_p)
7758{
7759  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7760
7761  sc->prev_cycle_state = xmalloc (dfa_state_size);
7762  if (clean_p)
7763    {
7764      state_reset (sc->prev_cycle_state);
7765      sc->last_scheduled_insn = NULL;
7766      memset (sc->rws_sum, 0, sizeof (rws_sum));
7767      memset (sc->rws_insn, 0, sizeof (rws_insn));
7768      sc->first_instruction = 1;
7769      sc->pending_data_specs = 0;
7770      sc->current_cycle = 0;
7771      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7772    }
7773  else
7774    {
7775      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7776      sc->last_scheduled_insn = last_scheduled_insn;
7777      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7778      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7779      sc->first_instruction = first_instruction;
7780      sc->pending_data_specs = pending_data_specs;
7781      sc->current_cycle = current_cycle;
7782      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7783    }
7784}
7785
7786/* Sets the global scheduling context to the one pointed to by _SC.  */
7787static void
7788ia64_set_sched_context (void *_sc)
7789{
7790  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7791
7792  gcc_assert (sc != NULL);
7793
7794  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7795  last_scheduled_insn = sc->last_scheduled_insn;
7796  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7797  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7798  first_instruction = sc->first_instruction;
7799  pending_data_specs = sc->pending_data_specs;
7800  current_cycle = sc->current_cycle;
7801  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7802}
7803
7804/* Clears the data in the _SC scheduling context.  */
7805static void
7806ia64_clear_sched_context (void *_sc)
7807{
7808  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7809
7810  free (sc->prev_cycle_state);
7811  sc->prev_cycle_state = NULL;
7812}
7813
7814/* Frees the _SC scheduling context.  */
7815static void
7816ia64_free_sched_context (void *_sc)
7817{
7818  gcc_assert (_sc != NULL);
7819
7820  free (_sc);
7821}
7822
7823typedef rtx (* gen_func_t) (rtx, rtx);
7824
7825/* Return a function that will generate a load of mode MODE_NO
7826   with speculation types TS.  */
7827static gen_func_t
7828get_spec_load_gen_function (ds_t ts, int mode_no)
7829{
7830  static gen_func_t gen_ld_[] = {
7831    gen_movbi,
7832    gen_movqi_internal,
7833    gen_movhi_internal,
7834    gen_movsi_internal,
7835    gen_movdi_internal,
7836    gen_movsf_internal,
7837    gen_movdf_internal,
7838    gen_movxf_internal,
7839    gen_movti_internal,
7840    gen_zero_extendqidi2,
7841    gen_zero_extendhidi2,
7842    gen_zero_extendsidi2,
7843  };
7844
7845  static gen_func_t gen_ld_a[] = {
7846    gen_movbi_advanced,
7847    gen_movqi_advanced,
7848    gen_movhi_advanced,
7849    gen_movsi_advanced,
7850    gen_movdi_advanced,
7851    gen_movsf_advanced,
7852    gen_movdf_advanced,
7853    gen_movxf_advanced,
7854    gen_movti_advanced,
7855    gen_zero_extendqidi2_advanced,
7856    gen_zero_extendhidi2_advanced,
7857    gen_zero_extendsidi2_advanced,
7858  };
7859  static gen_func_t gen_ld_s[] = {
7860    gen_movbi_speculative,
7861    gen_movqi_speculative,
7862    gen_movhi_speculative,
7863    gen_movsi_speculative,
7864    gen_movdi_speculative,
7865    gen_movsf_speculative,
7866    gen_movdf_speculative,
7867    gen_movxf_speculative,
7868    gen_movti_speculative,
7869    gen_zero_extendqidi2_speculative,
7870    gen_zero_extendhidi2_speculative,
7871    gen_zero_extendsidi2_speculative,
7872  };
7873  static gen_func_t gen_ld_sa[] = {
7874    gen_movbi_speculative_advanced,
7875    gen_movqi_speculative_advanced,
7876    gen_movhi_speculative_advanced,
7877    gen_movsi_speculative_advanced,
7878    gen_movdi_speculative_advanced,
7879    gen_movsf_speculative_advanced,
7880    gen_movdf_speculative_advanced,
7881    gen_movxf_speculative_advanced,
7882    gen_movti_speculative_advanced,
7883    gen_zero_extendqidi2_speculative_advanced,
7884    gen_zero_extendhidi2_speculative_advanced,
7885    gen_zero_extendsidi2_speculative_advanced,
7886  };
7887  static gen_func_t gen_ld_s_a[] = {
7888    gen_movbi_speculative_a,
7889    gen_movqi_speculative_a,
7890    gen_movhi_speculative_a,
7891    gen_movsi_speculative_a,
7892    gen_movdi_speculative_a,
7893    gen_movsf_speculative_a,
7894    gen_movdf_speculative_a,
7895    gen_movxf_speculative_a,
7896    gen_movti_speculative_a,
7897    gen_zero_extendqidi2_speculative_a,
7898    gen_zero_extendhidi2_speculative_a,
7899    gen_zero_extendsidi2_speculative_a,
7900  };
7901
7902  gen_func_t *gen_ld;
7903
7904  if (ts & BEGIN_DATA)
7905    {
7906      if (ts & BEGIN_CONTROL)
7907	gen_ld = gen_ld_sa;
7908      else
7909	gen_ld = gen_ld_a;
7910    }
7911  else if (ts & BEGIN_CONTROL)
7912    {
7913      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7914	  || ia64_needs_block_p (ts))
7915	gen_ld = gen_ld_s;
7916      else
7917	gen_ld = gen_ld_s_a;
7918    }
7919  else if (ts == 0)
7920    gen_ld = gen_ld_;
7921  else
7922    gcc_unreachable ();
7923
7924  return gen_ld[mode_no];
7925}
7926
7927/* Constants that help mapping 'machine_mode' to int.  */
7928enum SPEC_MODES
7929  {
7930    SPEC_MODE_INVALID = -1,
7931    SPEC_MODE_FIRST = 0,
7932    SPEC_MODE_FOR_EXTEND_FIRST = 1,
7933    SPEC_MODE_FOR_EXTEND_LAST = 3,
7934    SPEC_MODE_LAST = 8
7935  };
7936
7937enum
7938  {
7939    /* Offset to reach ZERO_EXTEND patterns.  */
7940    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7941  };
7942
7943/* Return index of the MODE.  */
7944static int
7945ia64_mode_to_int (machine_mode mode)
7946{
7947  switch (mode)
7948    {
7949    case BImode: return 0; /* SPEC_MODE_FIRST  */
7950    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7951    case HImode: return 2;
7952    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7953    case DImode: return 4;
7954    case SFmode: return 5;
7955    case DFmode: return 6;
7956    case XFmode: return 7;
7957    case TImode:
7958      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7959	 mentioned in itanium[12].md.  Predicate fp_register_operand also
7960	 needs to be defined.  Bottom line: better disable for now.  */
7961      return SPEC_MODE_INVALID;
7962    default:     return SPEC_MODE_INVALID;
7963    }
7964}
7965
7966/* Provide information about speculation capabilities.  */
7967static void
7968ia64_set_sched_flags (spec_info_t spec_info)
7969{
7970  unsigned int *flags = &(current_sched_info->flags);
7971
7972  if (*flags & SCHED_RGN
7973      || *flags & SCHED_EBB
7974      || *flags & SEL_SCHED)
7975    {
7976      int mask = 0;
7977
7978      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7979          || (mflag_sched_ar_data_spec && reload_completed))
7980	{
7981	  mask |= BEGIN_DATA;
7982
7983	  if (!sel_sched_p ()
7984	      && ((mflag_sched_br_in_data_spec && !reload_completed)
7985		  || (mflag_sched_ar_in_data_spec && reload_completed)))
7986	    mask |= BE_IN_DATA;
7987	}
7988
7989      if (mflag_sched_control_spec
7990          && (!sel_sched_p ()
7991	      || reload_completed))
7992	{
7993	  mask |= BEGIN_CONTROL;
7994
7995	  if (!sel_sched_p () && mflag_sched_in_control_spec)
7996	    mask |= BE_IN_CONTROL;
7997	}
7998
7999      spec_info->mask = mask;
8000
8001      if (mask)
8002	{
8003	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
8004
8005	  if (mask & BE_IN_SPEC)
8006	    *flags |= NEW_BBS;
8007
8008	  spec_info->flags = 0;
8009
8010	  if ((mask & CONTROL_SPEC)
8011	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8012	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8013
8014	  if (sched_verbose >= 1)
8015	    spec_info->dump = sched_dump;
8016	  else
8017	    spec_info->dump = 0;
8018
8019	  if (mflag_sched_count_spec_in_critical_path)
8020	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8021	}
8022    }
8023  else
8024    spec_info->mask = 0;
8025}
8026
8027/* If INSN is an appropriate load return its mode.
8028   Return -1 otherwise.  */
8029static int
8030get_mode_no_for_insn (rtx_insn *insn)
8031{
8032  rtx reg, mem, mode_rtx;
8033  int mode_no;
8034  bool extend_p;
8035
8036  extract_insn_cached (insn);
8037
8038  /* We use WHICH_ALTERNATIVE only after reload.  This will
8039     guarantee that reload won't touch a speculative insn.  */
8040
8041  if (recog_data.n_operands != 2)
8042    return -1;
8043
8044  reg = recog_data.operand[0];
8045  mem = recog_data.operand[1];
8046
8047  /* We should use MEM's mode since REG's mode in presence of
8048     ZERO_EXTEND will always be DImode.  */
8049  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8050    /* Process non-speculative ld.  */
8051    {
8052      if (!reload_completed)
8053	{
8054	  /* Do not speculate into regs like ar.lc.  */
8055	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8056	    return -1;
8057
8058	  if (!MEM_P (mem))
8059	    return -1;
8060
8061	  {
8062	    rtx mem_reg = XEXP (mem, 0);
8063
8064	    if (!REG_P (mem_reg))
8065	      return -1;
8066	  }
8067
8068	  mode_rtx = mem;
8069	}
8070      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8071	{
8072	  gcc_assert (REG_P (reg) && MEM_P (mem));
8073	  mode_rtx = mem;
8074	}
8075      else
8076	return -1;
8077    }
8078  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8079	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8080	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
8081    /* Process speculative ld or ld.c.  */
8082    {
8083      gcc_assert (REG_P (reg) && MEM_P (mem));
8084      mode_rtx = mem;
8085    }
8086  else
8087    {
8088      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8089
8090      if (attr_class == ITANIUM_CLASS_CHK_A
8091	  || attr_class == ITANIUM_CLASS_CHK_S_I
8092	  || attr_class == ITANIUM_CLASS_CHK_S_F)
8093	/* Process chk.  */
8094	mode_rtx = reg;
8095      else
8096	return -1;
8097    }
8098
8099  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8100
8101  if (mode_no == SPEC_MODE_INVALID)
8102    return -1;
8103
8104  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8105
8106  if (extend_p)
8107    {
8108      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8109	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8110	return -1;
8111
8112      mode_no += SPEC_GEN_EXTEND_OFFSET;
8113    }
8114
8115  return mode_no;
8116}
8117
8118/* If X is an unspec part of a speculative load, return its code.
8119   Return -1 otherwise.  */
8120static int
8121get_spec_unspec_code (const_rtx x)
8122{
8123  if (GET_CODE (x) != UNSPEC)
8124    return -1;
8125
8126  {
8127    int code;
8128
8129    code = XINT (x, 1);
8130
8131    switch (code)
8132      {
8133      case UNSPEC_LDA:
8134      case UNSPEC_LDS:
8135      case UNSPEC_LDS_A:
8136      case UNSPEC_LDSA:
8137	return code;
8138
8139      default:
8140	return -1;
8141      }
8142  }
8143}
8144
8145/* Implement skip_rtx_p hook.  */
8146static bool
8147ia64_skip_rtx_p (const_rtx x)
8148{
8149  return get_spec_unspec_code (x) != -1;
8150}
8151
8152/* If INSN is a speculative load, return its UNSPEC code.
8153   Return -1 otherwise.  */
8154static int
8155get_insn_spec_code (const_rtx insn)
8156{
8157  rtx pat, reg, mem;
8158
8159  pat = PATTERN (insn);
8160
8161  if (GET_CODE (pat) == COND_EXEC)
8162    pat = COND_EXEC_CODE (pat);
8163
8164  if (GET_CODE (pat) != SET)
8165    return -1;
8166
8167  reg = SET_DEST (pat);
8168  if (!REG_P (reg))
8169    return -1;
8170
8171  mem = SET_SRC (pat);
8172  if (GET_CODE (mem) == ZERO_EXTEND)
8173    mem = XEXP (mem, 0);
8174
8175  return get_spec_unspec_code (mem);
8176}
8177
8178/* If INSN is a speculative load, return a ds with the speculation types.
8179   Otherwise [if INSN is a normal instruction] return 0.  */
8180static ds_t
8181ia64_get_insn_spec_ds (rtx_insn *insn)
8182{
8183  int code = get_insn_spec_code (insn);
8184
8185  switch (code)
8186    {
8187    case UNSPEC_LDA:
8188      return BEGIN_DATA;
8189
8190    case UNSPEC_LDS:
8191    case UNSPEC_LDS_A:
8192      return BEGIN_CONTROL;
8193
8194    case UNSPEC_LDSA:
8195      return BEGIN_DATA | BEGIN_CONTROL;
8196
8197    default:
8198      return 0;
8199    }
8200}
8201
8202/* If INSN is a speculative load return a ds with the speculation types that
8203   will be checked.
8204   Otherwise [if INSN is a normal instruction] return 0.  */
8205static ds_t
8206ia64_get_insn_checked_ds (rtx_insn *insn)
8207{
8208  int code = get_insn_spec_code (insn);
8209
8210  switch (code)
8211    {
8212    case UNSPEC_LDA:
8213      return BEGIN_DATA | BEGIN_CONTROL;
8214
8215    case UNSPEC_LDS:
8216      return BEGIN_CONTROL;
8217
8218    case UNSPEC_LDS_A:
8219    case UNSPEC_LDSA:
8220      return BEGIN_DATA | BEGIN_CONTROL;
8221
8222    default:
8223      return 0;
8224    }
8225}
8226
8227/* If GEN_P is true, calculate the index of needed speculation check and return
8228   speculative pattern for INSN with speculative mode TS, machine mode
8229   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8230   If GEN_P is false, just calculate the index of needed speculation check.  */
8231static rtx
8232ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8233{
8234  rtx pat, new_pat;
8235  gen_func_t gen_load;
8236
8237  gen_load = get_spec_load_gen_function (ts, mode_no);
8238
8239  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8240		      copy_rtx (recog_data.operand[1]));
8241
8242  pat = PATTERN (insn);
8243  if (GET_CODE (pat) == COND_EXEC)
8244    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8245				 new_pat);
8246
8247  return new_pat;
8248}
8249
8250static bool
8251insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8252			      ds_t ds ATTRIBUTE_UNUSED)
8253{
8254  return false;
8255}
8256
8257/* Implement targetm.sched.speculate_insn hook.
8258   Check if the INSN can be TS speculative.
8259   If 'no' - return -1.
8260   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8261   If current pattern of the INSN already provides TS speculation,
8262   return 0.  */
8263static int
8264ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8265{
8266  int mode_no;
8267  int res;
8268
8269  gcc_assert (!(ts & ~SPECULATIVE));
8270
8271  if (ia64_spec_check_p (insn))
8272    return -1;
8273
8274  if ((ts & BE_IN_SPEC)
8275      && !insn_can_be_in_speculative_p (insn, ts))
8276    return -1;
8277
8278  mode_no = get_mode_no_for_insn (insn);
8279
8280  if (mode_no != SPEC_MODE_INVALID)
8281    {
8282      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8283	res = 0;
8284      else
8285	{
8286	  res = 1;
8287	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8288	}
8289    }
8290  else
8291    res = -1;
8292
8293  return res;
8294}
8295
8296/* Return a function that will generate a check for speculation TS with mode
8297   MODE_NO.
8298   If simple check is needed, pass true for SIMPLE_CHECK_P.
8299   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8300static gen_func_t
8301get_spec_check_gen_function (ds_t ts, int mode_no,
8302			     bool simple_check_p, bool clearing_check_p)
8303{
8304  static gen_func_t gen_ld_c_clr[] = {
8305    gen_movbi_clr,
8306    gen_movqi_clr,
8307    gen_movhi_clr,
8308    gen_movsi_clr,
8309    gen_movdi_clr,
8310    gen_movsf_clr,
8311    gen_movdf_clr,
8312    gen_movxf_clr,
8313    gen_movti_clr,
8314    gen_zero_extendqidi2_clr,
8315    gen_zero_extendhidi2_clr,
8316    gen_zero_extendsidi2_clr,
8317  };
8318  static gen_func_t gen_ld_c_nc[] = {
8319    gen_movbi_nc,
8320    gen_movqi_nc,
8321    gen_movhi_nc,
8322    gen_movsi_nc,
8323    gen_movdi_nc,
8324    gen_movsf_nc,
8325    gen_movdf_nc,
8326    gen_movxf_nc,
8327    gen_movti_nc,
8328    gen_zero_extendqidi2_nc,
8329    gen_zero_extendhidi2_nc,
8330    gen_zero_extendsidi2_nc,
8331  };
8332  static gen_func_t gen_chk_a_clr[] = {
8333    gen_advanced_load_check_clr_bi,
8334    gen_advanced_load_check_clr_qi,
8335    gen_advanced_load_check_clr_hi,
8336    gen_advanced_load_check_clr_si,
8337    gen_advanced_load_check_clr_di,
8338    gen_advanced_load_check_clr_sf,
8339    gen_advanced_load_check_clr_df,
8340    gen_advanced_load_check_clr_xf,
8341    gen_advanced_load_check_clr_ti,
8342    gen_advanced_load_check_clr_di,
8343    gen_advanced_load_check_clr_di,
8344    gen_advanced_load_check_clr_di,
8345  };
8346  static gen_func_t gen_chk_a_nc[] = {
8347    gen_advanced_load_check_nc_bi,
8348    gen_advanced_load_check_nc_qi,
8349    gen_advanced_load_check_nc_hi,
8350    gen_advanced_load_check_nc_si,
8351    gen_advanced_load_check_nc_di,
8352    gen_advanced_load_check_nc_sf,
8353    gen_advanced_load_check_nc_df,
8354    gen_advanced_load_check_nc_xf,
8355    gen_advanced_load_check_nc_ti,
8356    gen_advanced_load_check_nc_di,
8357    gen_advanced_load_check_nc_di,
8358    gen_advanced_load_check_nc_di,
8359  };
8360  static gen_func_t gen_chk_s[] = {
8361    gen_speculation_check_bi,
8362    gen_speculation_check_qi,
8363    gen_speculation_check_hi,
8364    gen_speculation_check_si,
8365    gen_speculation_check_di,
8366    gen_speculation_check_sf,
8367    gen_speculation_check_df,
8368    gen_speculation_check_xf,
8369    gen_speculation_check_ti,
8370    gen_speculation_check_di,
8371    gen_speculation_check_di,
8372    gen_speculation_check_di,
8373  };
8374
8375  gen_func_t *gen_check;
8376
8377  if (ts & BEGIN_DATA)
8378    {
8379      /* We don't need recovery because even if this is ld.sa
8380	 ALAT entry will be allocated only if NAT bit is set to zero.
8381	 So it is enough to use ld.c here.  */
8382
8383      if (simple_check_p)
8384	{
8385	  gcc_assert (mflag_sched_spec_ldc);
8386
8387	  if (clearing_check_p)
8388	    gen_check = gen_ld_c_clr;
8389	  else
8390	    gen_check = gen_ld_c_nc;
8391	}
8392      else
8393	{
8394	  if (clearing_check_p)
8395	    gen_check = gen_chk_a_clr;
8396	  else
8397	    gen_check = gen_chk_a_nc;
8398	}
8399    }
8400  else if (ts & BEGIN_CONTROL)
8401    {
8402      if (simple_check_p)
8403	/* We might want to use ld.sa -> ld.c instead of
8404	   ld.s -> chk.s.  */
8405	{
8406	  gcc_assert (!ia64_needs_block_p (ts));
8407
8408	  if (clearing_check_p)
8409	    gen_check = gen_ld_c_clr;
8410	  else
8411	    gen_check = gen_ld_c_nc;
8412	}
8413      else
8414	{
8415	  gen_check = gen_chk_s;
8416	}
8417    }
8418  else
8419    gcc_unreachable ();
8420
8421  gcc_assert (mode_no >= 0);
8422  return gen_check[mode_no];
8423}
8424
8425/* Return nonzero, if INSN needs branchy recovery check.  */
8426static bool
8427ia64_needs_block_p (ds_t ts)
8428{
8429  if (ts & BEGIN_DATA)
8430    return !mflag_sched_spec_ldc;
8431
8432  gcc_assert ((ts & BEGIN_CONTROL) != 0);
8433
8434  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8435}
8436
8437/* Generate (or regenerate) a recovery check for INSN.  */
8438static rtx
8439ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8440{
8441  rtx op1, pat, check_pat;
8442  gen_func_t gen_check;
8443  int mode_no;
8444
8445  mode_no = get_mode_no_for_insn (insn);
8446  gcc_assert (mode_no >= 0);
8447
8448  if (label)
8449    op1 = label;
8450  else
8451    {
8452      gcc_assert (!ia64_needs_block_p (ds));
8453      op1 = copy_rtx (recog_data.operand[1]);
8454    }
8455
8456  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8457					   true);
8458
8459  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8460
8461  pat = PATTERN (insn);
8462  if (GET_CODE (pat) == COND_EXEC)
8463    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8464				   check_pat);
8465
8466  return check_pat;
8467}
8468
8469/* Return nonzero, if X is branchy recovery check.  */
8470static int
8471ia64_spec_check_p (rtx x)
8472{
8473  x = PATTERN (x);
8474  if (GET_CODE (x) == COND_EXEC)
8475    x = COND_EXEC_CODE (x);
8476  if (GET_CODE (x) == SET)
8477    return ia64_spec_check_src_p (SET_SRC (x));
8478  return 0;
8479}
8480
8481/* Return nonzero, if SRC belongs to recovery check.  */
8482static int
8483ia64_spec_check_src_p (rtx src)
8484{
8485  if (GET_CODE (src) == IF_THEN_ELSE)
8486    {
8487      rtx t;
8488
8489      t = XEXP (src, 0);
8490      if (GET_CODE (t) == NE)
8491	{
8492	  t = XEXP (t, 0);
8493
8494	  if (GET_CODE (t) == UNSPEC)
8495	    {
8496	      int code;
8497
8498	      code = XINT (t, 1);
8499
8500	      if (code == UNSPEC_LDCCLR
8501		  || code == UNSPEC_LDCNC
8502		  || code == UNSPEC_CHKACLR
8503		  || code == UNSPEC_CHKANC
8504		  || code == UNSPEC_CHKS)
8505		{
8506		  gcc_assert (code != 0);
8507		  return code;
8508		}
8509	    }
8510	}
8511    }
8512  return 0;
8513}
8514
8515
8516/* The following page contains abstract data `bundle states' which are
8517   used for bundling insns (inserting nops and template generation).  */
8518
8519/* The following describes state of insn bundling.  */
8520
8521struct bundle_state
8522{
8523  /* Unique bundle state number to identify them in the debugging
8524     output  */
8525  int unique_num;
8526  rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
8527  /* number nops before and after the insn  */
8528  short before_nops_num, after_nops_num;
8529  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8530                   insn */
8531  int cost;     /* cost of the state in cycles */
8532  int accumulated_insns_num; /* number of all previous insns including
8533				nops.  L is considered as 2 insns */
8534  int branch_deviation; /* deviation of previous branches from 3rd slots  */
8535  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8536  struct bundle_state *next;  /* next state with the same insn_num  */
8537  struct bundle_state *originator; /* originator (previous insn state)  */
8538  /* All bundle states are in the following chain.  */
8539  struct bundle_state *allocated_states_chain;
8540  /* The DFA State after issuing the insn and the nops.  */
8541  state_t dfa_state;
8542};
8543
8544/* The following is map insn number to the corresponding bundle state.  */
8545
8546static struct bundle_state **index_to_bundle_states;
8547
8548/* The unique number of next bundle state.  */
8549
8550static int bundle_states_num;
8551
8552/* All allocated bundle states are in the following chain.  */
8553
8554static struct bundle_state *allocated_bundle_states_chain;
8555
8556/* All allocated but not used bundle states are in the following
8557   chain.  */
8558
8559static struct bundle_state *free_bundle_state_chain;
8560
8561
8562/* The following function returns a free bundle state.  */
8563
8564static struct bundle_state *
8565get_free_bundle_state (void)
8566{
8567  struct bundle_state *result;
8568
8569  if (free_bundle_state_chain != NULL)
8570    {
8571      result = free_bundle_state_chain;
8572      free_bundle_state_chain = result->next;
8573    }
8574  else
8575    {
8576      result = XNEW (struct bundle_state);
8577      result->dfa_state = xmalloc (dfa_state_size);
8578      result->allocated_states_chain = allocated_bundle_states_chain;
8579      allocated_bundle_states_chain = result;
8580    }
8581  result->unique_num = bundle_states_num++;
8582  return result;
8583
8584}
8585
8586/* The following function frees given bundle state.  */
8587
8588static void
8589free_bundle_state (struct bundle_state *state)
8590{
8591  state->next = free_bundle_state_chain;
8592  free_bundle_state_chain = state;
8593}
8594
8595/* Start work with abstract data `bundle states'.  */
8596
8597static void
8598initiate_bundle_states (void)
8599{
8600  bundle_states_num = 0;
8601  free_bundle_state_chain = NULL;
8602  allocated_bundle_states_chain = NULL;
8603}
8604
8605/* Finish work with abstract data `bundle states'.  */
8606
8607static void
8608finish_bundle_states (void)
8609{
8610  struct bundle_state *curr_state, *next_state;
8611
8612  for (curr_state = allocated_bundle_states_chain;
8613       curr_state != NULL;
8614       curr_state = next_state)
8615    {
8616      next_state = curr_state->allocated_states_chain;
8617      free (curr_state->dfa_state);
8618      free (curr_state);
8619    }
8620}
8621
8622/* Hashtable helpers.  */
8623
8624struct bundle_state_hasher : typed_noop_remove <bundle_state>
8625{
8626  typedef bundle_state value_type;
8627  typedef bundle_state compare_type;
8628  static inline hashval_t hash (const value_type *);
8629  static inline bool equal (const value_type *, const compare_type *);
8630};
8631
8632/* The function returns hash of BUNDLE_STATE.  */
8633
8634inline hashval_t
8635bundle_state_hasher::hash (const value_type *state)
8636{
8637  unsigned result, i;
8638
8639  for (result = i = 0; i < dfa_state_size; i++)
8640    result += (((unsigned char *) state->dfa_state) [i]
8641	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8642  return result + state->insn_num;
8643}
8644
8645/* The function returns nonzero if the bundle state keys are equal.  */
8646
8647inline bool
8648bundle_state_hasher::equal (const value_type *state1,
8649			    const compare_type *state2)
8650{
8651  return (state1->insn_num == state2->insn_num
8652	  && memcmp (state1->dfa_state, state2->dfa_state,
8653		     dfa_state_size) == 0);
8654}
8655
8656/* Hash table of the bundle states.  The key is dfa_state and insn_num
8657   of the bundle states.  */
8658
8659static hash_table<bundle_state_hasher> *bundle_state_table;
8660
8661/* The function inserts the BUNDLE_STATE into the hash table.  The
8662   function returns nonzero if the bundle has been inserted into the
8663   table.  The table contains the best bundle state with given key.  */
8664
8665static int
8666insert_bundle_state (struct bundle_state *bundle_state)
8667{
8668  struct bundle_state **entry_ptr;
8669
8670  entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8671  if (*entry_ptr == NULL)
8672    {
8673      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8674      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8675      *entry_ptr = bundle_state;
8676      return TRUE;
8677    }
8678  else if (bundle_state->cost < (*entry_ptr)->cost
8679	   || (bundle_state->cost == (*entry_ptr)->cost
8680	       && ((*entry_ptr)->accumulated_insns_num
8681		   > bundle_state->accumulated_insns_num
8682		   || ((*entry_ptr)->accumulated_insns_num
8683		       == bundle_state->accumulated_insns_num
8684		       && ((*entry_ptr)->branch_deviation
8685			   > bundle_state->branch_deviation
8686			   || ((*entry_ptr)->branch_deviation
8687			       == bundle_state->branch_deviation
8688			       && (*entry_ptr)->middle_bundle_stops
8689			       > bundle_state->middle_bundle_stops))))))
8690
8691    {
8692      struct bundle_state temp;
8693
8694      temp = **entry_ptr;
8695      **entry_ptr = *bundle_state;
8696      (*entry_ptr)->next = temp.next;
8697      *bundle_state = temp;
8698    }
8699  return FALSE;
8700}
8701
8702/* Start work with the hash table.  */
8703
8704static void
8705initiate_bundle_state_table (void)
8706{
8707  bundle_state_table = new hash_table<bundle_state_hasher> (50);
8708}
8709
8710/* Finish work with the hash table.  */
8711
8712static void
8713finish_bundle_state_table (void)
8714{
8715  delete bundle_state_table;
8716  bundle_state_table = NULL;
8717}
8718
8719
8720
8721/* The following variable is a insn `nop' used to check bundle states
8722   with different number of inserted nops.  */
8723
8724static rtx_insn *ia64_nop;
8725
8726/* The following function tries to issue NOPS_NUM nops for the current
8727   state without advancing processor cycle.  If it failed, the
8728   function returns FALSE and frees the current state.  */
8729
8730static int
8731try_issue_nops (struct bundle_state *curr_state, int nops_num)
8732{
8733  int i;
8734
8735  for (i = 0; i < nops_num; i++)
8736    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8737      {
8738	free_bundle_state (curr_state);
8739	return FALSE;
8740      }
8741  return TRUE;
8742}
8743
8744/* The following function tries to issue INSN for the current
8745   state without advancing processor cycle.  If it failed, the
8746   function returns FALSE and frees the current state.  */
8747
8748static int
8749try_issue_insn (struct bundle_state *curr_state, rtx insn)
8750{
8751  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8752    {
8753      free_bundle_state (curr_state);
8754      return FALSE;
8755    }
8756  return TRUE;
8757}
8758
8759/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8760   starting with ORIGINATOR without advancing processor cycle.  If
8761   TRY_BUNDLE_END_P is TRUE, the function also/only (if
8762   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8763   If it was successful, the function creates new bundle state and
8764   insert into the hash table and into `index_to_bundle_states'.  */
8765
8766static void
8767issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8768		     rtx_insn *insn, int try_bundle_end_p,
8769		     int only_bundle_end_p)
8770{
8771  struct bundle_state *curr_state;
8772
8773  curr_state = get_free_bundle_state ();
8774  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8775  curr_state->insn = insn;
8776  curr_state->insn_num = originator->insn_num + 1;
8777  curr_state->cost = originator->cost;
8778  curr_state->originator = originator;
8779  curr_state->before_nops_num = before_nops_num;
8780  curr_state->after_nops_num = 0;
8781  curr_state->accumulated_insns_num
8782    = originator->accumulated_insns_num + before_nops_num;
8783  curr_state->branch_deviation = originator->branch_deviation;
8784  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8785  gcc_assert (insn);
8786  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8787    {
8788      gcc_assert (GET_MODE (insn) != TImode);
8789      if (!try_issue_nops (curr_state, before_nops_num))
8790	return;
8791      if (!try_issue_insn (curr_state, insn))
8792	return;
8793      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8794      if (curr_state->accumulated_insns_num % 3 != 0)
8795	curr_state->middle_bundle_stops++;
8796      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8797	  && curr_state->accumulated_insns_num % 3 != 0)
8798	{
8799	  free_bundle_state (curr_state);
8800	  return;
8801	}
8802    }
8803  else if (GET_MODE (insn) != TImode)
8804    {
8805      if (!try_issue_nops (curr_state, before_nops_num))
8806	return;
8807      if (!try_issue_insn (curr_state, insn))
8808	return;
8809      curr_state->accumulated_insns_num++;
8810      gcc_assert (!unknown_for_bundling_p (insn));
8811
8812      if (ia64_safe_type (insn) == TYPE_L)
8813	curr_state->accumulated_insns_num++;
8814    }
8815  else
8816    {
8817      /* If this is an insn that must be first in a group, then don't allow
8818	 nops to be emitted before it.  Currently, alloc is the only such
8819	 supported instruction.  */
8820      /* ??? The bundling automatons should handle this for us, but they do
8821	 not yet have support for the first_insn attribute.  */
8822      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8823	{
8824	  free_bundle_state (curr_state);
8825	  return;
8826	}
8827
8828      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8829      state_transition (curr_state->dfa_state, NULL);
8830      curr_state->cost++;
8831      if (!try_issue_nops (curr_state, before_nops_num))
8832	return;
8833      if (!try_issue_insn (curr_state, insn))
8834	return;
8835      curr_state->accumulated_insns_num++;
8836      if (unknown_for_bundling_p (insn))
8837	{
8838	  /* Finish bundle containing asm insn.  */
8839	  curr_state->after_nops_num
8840	    = 3 - curr_state->accumulated_insns_num % 3;
8841	  curr_state->accumulated_insns_num
8842	    += 3 - curr_state->accumulated_insns_num % 3;
8843	}
8844      else if (ia64_safe_type (insn) == TYPE_L)
8845	curr_state->accumulated_insns_num++;
8846    }
8847  if (ia64_safe_type (insn) == TYPE_B)
8848    curr_state->branch_deviation
8849      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8850  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8851    {
8852      if (!only_bundle_end_p && insert_bundle_state (curr_state))
8853	{
8854	  state_t dfa_state;
8855	  struct bundle_state *curr_state1;
8856	  struct bundle_state *allocated_states_chain;
8857
8858	  curr_state1 = get_free_bundle_state ();
8859	  dfa_state = curr_state1->dfa_state;
8860	  allocated_states_chain = curr_state1->allocated_states_chain;
8861	  *curr_state1 = *curr_state;
8862	  curr_state1->dfa_state = dfa_state;
8863	  curr_state1->allocated_states_chain = allocated_states_chain;
8864	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8865		  dfa_state_size);
8866	  curr_state = curr_state1;
8867	}
8868      if (!try_issue_nops (curr_state,
8869			   3 - curr_state->accumulated_insns_num % 3))
8870	return;
8871      curr_state->after_nops_num
8872	= 3 - curr_state->accumulated_insns_num % 3;
8873      curr_state->accumulated_insns_num
8874	+= 3 - curr_state->accumulated_insns_num % 3;
8875    }
8876  if (!insert_bundle_state (curr_state))
8877    free_bundle_state (curr_state);
8878  return;
8879}
8880
8881/* The following function returns position in the two window bundle
8882   for given STATE.  */
8883
8884static int
8885get_max_pos (state_t state)
8886{
8887  if (cpu_unit_reservation_p (state, pos_6))
8888    return 6;
8889  else if (cpu_unit_reservation_p (state, pos_5))
8890    return 5;
8891  else if (cpu_unit_reservation_p (state, pos_4))
8892    return 4;
8893  else if (cpu_unit_reservation_p (state, pos_3))
8894    return 3;
8895  else if (cpu_unit_reservation_p (state, pos_2))
8896    return 2;
8897  else if (cpu_unit_reservation_p (state, pos_1))
8898    return 1;
8899  else
8900    return 0;
8901}
8902
8903/* The function returns code of a possible template for given position
8904   and state.  The function should be called only with 2 values of
8905   position equal to 3 or 6.  We avoid generating F NOPs by putting
8906   templates containing F insns at the end of the template search
8907   because undocumented anomaly in McKinley derived cores which can
8908   cause stalls if an F-unit insn (including a NOP) is issued within a
8909   six-cycle window after reading certain application registers (such
8910   as ar.bsp).  Furthermore, power-considerations also argue against
8911   the use of F-unit instructions unless they're really needed.  */
8912
8913static int
8914get_template (state_t state, int pos)
8915{
8916  switch (pos)
8917    {
8918    case 3:
8919      if (cpu_unit_reservation_p (state, _0mmi_))
8920	return 1;
8921      else if (cpu_unit_reservation_p (state, _0mii_))
8922	return 0;
8923      else if (cpu_unit_reservation_p (state, _0mmb_))
8924	return 7;
8925      else if (cpu_unit_reservation_p (state, _0mib_))
8926	return 6;
8927      else if (cpu_unit_reservation_p (state, _0mbb_))
8928	return 5;
8929      else if (cpu_unit_reservation_p (state, _0bbb_))
8930	return 4;
8931      else if (cpu_unit_reservation_p (state, _0mmf_))
8932	return 3;
8933      else if (cpu_unit_reservation_p (state, _0mfi_))
8934	return 2;
8935      else if (cpu_unit_reservation_p (state, _0mfb_))
8936	return 8;
8937      else if (cpu_unit_reservation_p (state, _0mlx_))
8938	return 9;
8939      else
8940	gcc_unreachable ();
8941    case 6:
8942      if (cpu_unit_reservation_p (state, _1mmi_))
8943	return 1;
8944      else if (cpu_unit_reservation_p (state, _1mii_))
8945	return 0;
8946      else if (cpu_unit_reservation_p (state, _1mmb_))
8947	return 7;
8948      else if (cpu_unit_reservation_p (state, _1mib_))
8949	return 6;
8950      else if (cpu_unit_reservation_p (state, _1mbb_))
8951	return 5;
8952      else if (cpu_unit_reservation_p (state, _1bbb_))
8953	return 4;
8954      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8955	return 3;
8956      else if (cpu_unit_reservation_p (state, _1mfi_))
8957	return 2;
8958      else if (cpu_unit_reservation_p (state, _1mfb_))
8959	return 8;
8960      else if (cpu_unit_reservation_p (state, _1mlx_))
8961	return 9;
8962      else
8963	gcc_unreachable ();
8964    default:
8965      gcc_unreachable ();
8966    }
8967}
8968
8969/* True when INSN is important for bundling.  */
8970
8971static bool
8972important_for_bundling_p (rtx_insn *insn)
8973{
8974  return (INSN_P (insn)
8975	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8976	  && GET_CODE (PATTERN (insn)) != USE
8977	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8978}
8979
8980/* The following function returns an insn important for insn bundling
8981   followed by INSN and before TAIL.  */
8982
8983static rtx_insn *
8984get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
8985{
8986  for (; insn && insn != tail; insn = NEXT_INSN (insn))
8987    if (important_for_bundling_p (insn))
8988      return insn;
8989  return NULL;
8990}
8991
8992/* True when INSN is unknown, but important, for bundling.  */
8993
8994static bool
8995unknown_for_bundling_p (rtx_insn *insn)
8996{
8997  return (INSN_P (insn)
8998	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
8999	  && GET_CODE (PATTERN (insn)) != USE
9000	  && GET_CODE (PATTERN (insn)) != CLOBBER);
9001}
9002
9003/* Add a bundle selector TEMPLATE0 before INSN.  */
9004
9005static void
9006ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9007{
9008  rtx b = gen_bundle_selector (GEN_INT (template0));
9009
9010  ia64_emit_insn_before (b, insn);
9011#if NR_BUNDLES == 10
9012  if ((template0 == 4 || template0 == 5)
9013      && ia64_except_unwind_info (&global_options) == UI_TARGET)
9014    {
9015      int i;
9016      rtx note = NULL_RTX;
9017
9018      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9019	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
9020	 to following nops, as br.call sets rp to the address of following
9021	 bundle and therefore an EH region end must be on a bundle
9022	 boundary.  */
9023      insn = PREV_INSN (insn);
9024      for (i = 0; i < 3; i++)
9025	{
9026	  do
9027	    insn = next_active_insn (insn);
9028	  while (NONJUMP_INSN_P (insn)
9029		 && get_attr_empty (insn) == EMPTY_YES);
9030	  if (CALL_P (insn))
9031	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9032	  else if (note)
9033	    {
9034	      int code;
9035
9036	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9037			  || code == CODE_FOR_nop_b);
9038	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9039		note = NULL_RTX;
9040	      else
9041		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9042	    }
9043	}
9044    }
9045#endif
9046}
9047
9048/* The following function does insn bundling.  Bundling means
9049   inserting templates and nop insns to fit insn groups into permitted
9050   templates.  Instruction scheduling uses NDFA (non-deterministic
9051   finite automata) encoding informations about the templates and the
9052   inserted nops.  Nondeterminism of the automata permits follows
9053   all possible insn sequences very fast.
9054
9055   Unfortunately it is not possible to get information about inserting
9056   nop insns and used templates from the automata states.  The
9057   automata only says that we can issue an insn possibly inserting
9058   some nops before it and using some template.  Therefore insn
9059   bundling in this function is implemented by using DFA
9060   (deterministic finite automata).  We follow all possible insn
9061   sequences by inserting 0-2 nops (that is what the NDFA describe for
9062   insn scheduling) before/after each insn being bundled.  We know the
9063   start of simulated processor cycle from insn scheduling (insn
9064   starting a new cycle has TImode).
9065
9066   Simple implementation of insn bundling would create enormous
9067   number of possible insn sequences satisfying information about new
9068   cycle ticks taken from the insn scheduling.  To make the algorithm
9069   practical we use dynamic programming.  Each decision (about
9070   inserting nops and implicitly about previous decisions) is described
9071   by structure bundle_state (see above).  If we generate the same
9072   bundle state (key is automaton state after issuing the insns and
9073   nops for it), we reuse already generated one.  As consequence we
9074   reject some decisions which cannot improve the solution and
9075   reduce memory for the algorithm.
9076
9077   When we reach the end of EBB (extended basic block), we choose the
9078   best sequence and then, moving back in EBB, insert templates for
9079   the best alternative.  The templates are taken from querying
9080   automaton state for each insn in chosen bundle states.
9081
9082   So the algorithm makes two (forward and backward) passes through
9083   EBB.  */
9084
9085static void
9086bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9087{
9088  struct bundle_state *curr_state, *next_state, *best_state;
9089  rtx_insn *insn, *next_insn;
9090  int insn_num;
9091  int i, bundle_end_p, only_bundle_end_p, asm_p;
9092  int pos = 0, max_pos, template0, template1;
9093  rtx_insn *b;
9094  enum attr_type type;
9095
9096  insn_num = 0;
9097  /* Count insns in the EBB.  */
9098  for (insn = NEXT_INSN (prev_head_insn);
9099       insn && insn != tail;
9100       insn = NEXT_INSN (insn))
9101    if (INSN_P (insn))
9102      insn_num++;
9103  if (insn_num == 0)
9104    return;
9105  bundling_p = 1;
9106  dfa_clean_insn_cache ();
9107  initiate_bundle_state_table ();
9108  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9109  /* First (forward) pass -- generation of bundle states.  */
9110  curr_state = get_free_bundle_state ();
9111  curr_state->insn = NULL;
9112  curr_state->before_nops_num = 0;
9113  curr_state->after_nops_num = 0;
9114  curr_state->insn_num = 0;
9115  curr_state->cost = 0;
9116  curr_state->accumulated_insns_num = 0;
9117  curr_state->branch_deviation = 0;
9118  curr_state->middle_bundle_stops = 0;
9119  curr_state->next = NULL;
9120  curr_state->originator = NULL;
9121  state_reset (curr_state->dfa_state);
9122  index_to_bundle_states [0] = curr_state;
9123  insn_num = 0;
9124  /* Shift cycle mark if it is put on insn which could be ignored.  */
9125  for (insn = NEXT_INSN (prev_head_insn);
9126       insn != tail;
9127       insn = NEXT_INSN (insn))
9128    if (INSN_P (insn)
9129	&& !important_for_bundling_p (insn)
9130	&& GET_MODE (insn) == TImode)
9131      {
9132	PUT_MODE (insn, VOIDmode);
9133	for (next_insn = NEXT_INSN (insn);
9134	     next_insn != tail;
9135	     next_insn = NEXT_INSN (next_insn))
9136	  if (important_for_bundling_p (next_insn)
9137	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9138	    {
9139	      PUT_MODE (next_insn, TImode);
9140	      break;
9141	    }
9142      }
9143  /* Forward pass: generation of bundle states.  */
9144  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9145       insn != NULL_RTX;
9146       insn = next_insn)
9147    {
9148      gcc_assert (important_for_bundling_p (insn));
9149      type = ia64_safe_type (insn);
9150      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9151      insn_num++;
9152      index_to_bundle_states [insn_num] = NULL;
9153      for (curr_state = index_to_bundle_states [insn_num - 1];
9154	   curr_state != NULL;
9155	   curr_state = next_state)
9156	{
9157	  pos = curr_state->accumulated_insns_num % 3;
9158	  next_state = curr_state->next;
9159	  /* We must fill up the current bundle in order to start a
9160	     subsequent asm insn in a new bundle.  Asm insn is always
9161	     placed in a separate bundle.  */
9162	  only_bundle_end_p
9163	    = (next_insn != NULL_RTX
9164	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9165	       && unknown_for_bundling_p (next_insn));
9166	  /* We may fill up the current bundle if it is the cycle end
9167	     without a group barrier.  */
9168	  bundle_end_p
9169	    = (only_bundle_end_p || next_insn == NULL_RTX
9170	       || (GET_MODE (next_insn) == TImode
9171		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9172	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9173	      || type == TYPE_S)
9174	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9175				 only_bundle_end_p);
9176	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9177			       only_bundle_end_p);
9178	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9179			       only_bundle_end_p);
9180	}
9181      gcc_assert (index_to_bundle_states [insn_num]);
9182      for (curr_state = index_to_bundle_states [insn_num];
9183	   curr_state != NULL;
9184	   curr_state = curr_state->next)
9185	if (verbose >= 2 && dump)
9186	  {
9187	    /* This structure is taken from generated code of the
9188	       pipeline hazard recognizer (see file insn-attrtab.c).
9189	       Please don't forget to change the structure if a new
9190	       automaton is added to .md file.  */
9191	    struct DFA_chip
9192	    {
9193	      unsigned short one_automaton_state;
9194	      unsigned short oneb_automaton_state;
9195	      unsigned short two_automaton_state;
9196	      unsigned short twob_automaton_state;
9197	    };
9198
9199	    fprintf
9200	      (dump,
9201	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9202	       curr_state->unique_num,
9203	       (curr_state->originator == NULL
9204		? -1 : curr_state->originator->unique_num),
9205	       curr_state->cost,
9206	       curr_state->before_nops_num, curr_state->after_nops_num,
9207	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
9208	       curr_state->middle_bundle_stops,
9209	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9210	       INSN_UID (insn));
9211	  }
9212    }
9213
9214  /* We should find a solution because the 2nd insn scheduling has
9215     found one.  */
9216  gcc_assert (index_to_bundle_states [insn_num]);
9217  /* Find a state corresponding to the best insn sequence.  */
9218  best_state = NULL;
9219  for (curr_state = index_to_bundle_states [insn_num];
9220       curr_state != NULL;
9221       curr_state = curr_state->next)
9222    /* We are just looking at the states with fully filled up last
9223       bundle.  The first we prefer insn sequences with minimal cost
9224       then with minimal inserted nops and finally with branch insns
9225       placed in the 3rd slots.  */
9226    if (curr_state->accumulated_insns_num % 3 == 0
9227	&& (best_state == NULL || best_state->cost > curr_state->cost
9228	    || (best_state->cost == curr_state->cost
9229		&& (curr_state->accumulated_insns_num
9230		    < best_state->accumulated_insns_num
9231		    || (curr_state->accumulated_insns_num
9232			== best_state->accumulated_insns_num
9233			&& (curr_state->branch_deviation
9234			    < best_state->branch_deviation
9235			    || (curr_state->branch_deviation
9236				== best_state->branch_deviation
9237				&& curr_state->middle_bundle_stops
9238				< best_state->middle_bundle_stops)))))))
9239      best_state = curr_state;
9240  /* Second (backward) pass: adding nops and templates.  */
9241  gcc_assert (best_state);
9242  insn_num = best_state->before_nops_num;
9243  template0 = template1 = -1;
9244  for (curr_state = best_state;
9245       curr_state->originator != NULL;
9246       curr_state = curr_state->originator)
9247    {
9248      insn = curr_state->insn;
9249      asm_p = unknown_for_bundling_p (insn);
9250      insn_num++;
9251      if (verbose >= 2 && dump)
9252	{
9253	  struct DFA_chip
9254	  {
9255	    unsigned short one_automaton_state;
9256	    unsigned short oneb_automaton_state;
9257	    unsigned short two_automaton_state;
9258	    unsigned short twob_automaton_state;
9259	  };
9260
9261	  fprintf
9262	    (dump,
9263	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9264	     curr_state->unique_num,
9265	     (curr_state->originator == NULL
9266	      ? -1 : curr_state->originator->unique_num),
9267	     curr_state->cost,
9268	     curr_state->before_nops_num, curr_state->after_nops_num,
9269	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
9270	     curr_state->middle_bundle_stops,
9271	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9272	     INSN_UID (insn));
9273	}
9274      /* Find the position in the current bundle window.  The window can
9275	 contain at most two bundles.  Two bundle window means that
9276	 the processor will make two bundle rotation.  */
9277      max_pos = get_max_pos (curr_state->dfa_state);
9278      if (max_pos == 6
9279	  /* The following (negative template number) means that the
9280	     processor did one bundle rotation.  */
9281	  || (max_pos == 3 && template0 < 0))
9282	{
9283	  /* We are at the end of the window -- find template(s) for
9284	     its bundle(s).  */
9285	  pos = max_pos;
9286	  if (max_pos == 3)
9287	    template0 = get_template (curr_state->dfa_state, 3);
9288	  else
9289	    {
9290	      template1 = get_template (curr_state->dfa_state, 3);
9291	      template0 = get_template (curr_state->dfa_state, 6);
9292	    }
9293	}
9294      if (max_pos > 3 && template1 < 0)
9295	/* It may happen when we have the stop inside a bundle.  */
9296	{
9297	  gcc_assert (pos <= 3);
9298	  template1 = get_template (curr_state->dfa_state, 3);
9299	  pos += 3;
9300	}
9301      if (!asm_p)
9302	/* Emit nops after the current insn.  */
9303	for (i = 0; i < curr_state->after_nops_num; i++)
9304	  {
9305	    rtx nop_pat = gen_nop ();
9306	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
9307	    pos--;
9308	    gcc_assert (pos >= 0);
9309	    if (pos % 3 == 0)
9310	      {
9311		/* We are at the start of a bundle: emit the template
9312		   (it should be defined).  */
9313		gcc_assert (template0 >= 0);
9314		ia64_add_bundle_selector_before (template0, nop);
9315		/* If we have two bundle window, we make one bundle
9316		   rotation.  Otherwise template0 will be undefined
9317		   (negative value).  */
9318		template0 = template1;
9319		template1 = -1;
9320	      }
9321	  }
9322      /* Move the position backward in the window.  Group barrier has
9323	 no slot.  Asm insn takes all bundle.  */
9324      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9325	  && !unknown_for_bundling_p (insn))
9326	pos--;
9327      /* Long insn takes 2 slots.  */
9328      if (ia64_safe_type (insn) == TYPE_L)
9329	pos--;
9330      gcc_assert (pos >= 0);
9331      if (pos % 3 == 0
9332	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9333	  && !unknown_for_bundling_p (insn))
9334	{
9335	  /* The current insn is at the bundle start: emit the
9336	     template.  */
9337	  gcc_assert (template0 >= 0);
9338	  ia64_add_bundle_selector_before (template0, insn);
9339	  b = PREV_INSN (insn);
9340	  insn = b;
9341	  /* See comment above in analogous place for emitting nops
9342	     after the insn.  */
9343	  template0 = template1;
9344	  template1 = -1;
9345	}
9346      /* Emit nops after the current insn.  */
9347      for (i = 0; i < curr_state->before_nops_num; i++)
9348	{
9349	  rtx nop_pat = gen_nop ();
9350	  ia64_emit_insn_before (nop_pat, insn);
9351	  rtx_insn *nop = PREV_INSN (insn);
9352	  insn = nop;
9353	  pos--;
9354	  gcc_assert (pos >= 0);
9355	  if (pos % 3 == 0)
9356	    {
9357	      /* See comment above in analogous place for emitting nops
9358		 after the insn.  */
9359	      gcc_assert (template0 >= 0);
9360	      ia64_add_bundle_selector_before (template0, insn);
9361	      b = PREV_INSN (insn);
9362	      insn = b;
9363	      template0 = template1;
9364	      template1 = -1;
9365	    }
9366	}
9367    }
9368
9369#ifdef ENABLE_CHECKING
9370  {
9371    /* Assert right calculation of middle_bundle_stops.  */
9372    int num = best_state->middle_bundle_stops;
9373    bool start_bundle = true, end_bundle = false;
9374
9375    for (insn = NEXT_INSN (prev_head_insn);
9376	 insn && insn != tail;
9377	 insn = NEXT_INSN (insn))
9378      {
9379	if (!INSN_P (insn))
9380	  continue;
9381	if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9382	  start_bundle = true;
9383	else
9384	  {
9385	    rtx_insn *next_insn;
9386
9387	    for (next_insn = NEXT_INSN (insn);
9388		 next_insn && next_insn != tail;
9389		 next_insn = NEXT_INSN (next_insn))
9390	      if (INSN_P (next_insn)
9391		  && (ia64_safe_itanium_class (next_insn)
9392		      != ITANIUM_CLASS_IGNORE
9393		      || recog_memoized (next_insn)
9394		      == CODE_FOR_bundle_selector)
9395		  && GET_CODE (PATTERN (next_insn)) != USE
9396		  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9397		break;
9398
9399	    end_bundle = next_insn == NULL_RTX
9400	     || next_insn == tail
9401	     || (INSN_P (next_insn)
9402		 && recog_memoized (next_insn)
9403		 == CODE_FOR_bundle_selector);
9404	    if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9405		&& !start_bundle && !end_bundle
9406		&& next_insn
9407		&& !unknown_for_bundling_p (next_insn))
9408	      num--;
9409
9410	    start_bundle = false;
9411	  }
9412      }
9413
9414    gcc_assert (num == 0);
9415  }
9416#endif
9417
9418  free (index_to_bundle_states);
9419  finish_bundle_state_table ();
9420  bundling_p = 0;
9421  dfa_clean_insn_cache ();
9422}
9423
9424/* The following function is called at the end of scheduling BB or
9425   EBB.  After reload, it inserts stop bits and does insn bundling.  */
9426
9427static void
9428ia64_sched_finish (FILE *dump, int sched_verbose)
9429{
9430  if (sched_verbose)
9431    fprintf (dump, "// Finishing schedule.\n");
9432  if (!reload_completed)
9433    return;
9434  if (reload_completed)
9435    {
9436      final_emit_insn_group_barriers (dump);
9437      bundling (dump, sched_verbose, current_sched_info->prev_head,
9438		current_sched_info->next_tail);
9439      if (sched_verbose && dump)
9440	fprintf (dump, "//    finishing %d-%d\n",
9441		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9442		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9443
9444      return;
9445    }
9446}
9447
9448/* The following function inserts stop bits in scheduled BB or EBB.  */
9449
9450static void
9451final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9452{
9453  rtx_insn *insn;
9454  int need_barrier_p = 0;
9455  int seen_good_insn = 0;
9456
9457  init_insn_group_barriers ();
9458
9459  for (insn = NEXT_INSN (current_sched_info->prev_head);
9460       insn != current_sched_info->next_tail;
9461       insn = NEXT_INSN (insn))
9462    {
9463      if (BARRIER_P (insn))
9464	{
9465	  rtx_insn *last = prev_active_insn (insn);
9466
9467	  if (! last)
9468	    continue;
9469	  if (JUMP_TABLE_DATA_P (last))
9470	    last = prev_active_insn (last);
9471	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9472	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9473
9474	  init_insn_group_barriers ();
9475	  seen_good_insn = 0;
9476	  need_barrier_p = 0;
9477	}
9478      else if (NONDEBUG_INSN_P (insn))
9479	{
9480	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9481	    {
9482	      init_insn_group_barriers ();
9483	      seen_good_insn = 0;
9484	      need_barrier_p = 0;
9485	    }
9486	  else if (need_barrier_p || group_barrier_needed (insn)
9487		   || (mflag_sched_stop_bits_after_every_cycle
9488		       && GET_MODE (insn) == TImode
9489		       && seen_good_insn))
9490	    {
9491	      if (TARGET_EARLY_STOP_BITS)
9492		{
9493		  rtx_insn *last;
9494
9495		  for (last = insn;
9496		       last != current_sched_info->prev_head;
9497		       last = PREV_INSN (last))
9498		    if (INSN_P (last) && GET_MODE (last) == TImode
9499			&& stops_p [INSN_UID (last)])
9500		      break;
9501		  if (last == current_sched_info->prev_head)
9502		    last = insn;
9503		  last = prev_active_insn (last);
9504		  if (last
9505		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9506		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9507				     last);
9508		  init_insn_group_barriers ();
9509		  for (last = NEXT_INSN (last);
9510		       last != insn;
9511		       last = NEXT_INSN (last))
9512		    if (INSN_P (last))
9513		      {
9514			group_barrier_needed (last);
9515			if (recog_memoized (last) >= 0
9516			    && important_for_bundling_p (last))
9517			  seen_good_insn = 1;
9518		      }
9519		}
9520	      else
9521		{
9522		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9523				    insn);
9524		  init_insn_group_barriers ();
9525		  seen_good_insn = 0;
9526		}
9527	      group_barrier_needed (insn);
9528	      if (recog_memoized (insn) >= 0
9529		  && important_for_bundling_p (insn))
9530		seen_good_insn = 1;
9531	    }
9532	  else if (recog_memoized (insn) >= 0
9533		   && important_for_bundling_p (insn))
9534	    seen_good_insn = 1;
9535	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9536	}
9537    }
9538}
9539
9540
9541
9542/* If the following function returns TRUE, we will use the DFA
9543   insn scheduler.  */
9544
9545static int
9546ia64_first_cycle_multipass_dfa_lookahead (void)
9547{
9548  return (reload_completed ? 6 : 4);
9549}
9550
9551/* The following function initiates variable `dfa_pre_cycle_insn'.  */
9552
9553static void
9554ia64_init_dfa_pre_cycle_insn (void)
9555{
9556  if (temp_dfa_state == NULL)
9557    {
9558      dfa_state_size = state_size ();
9559      temp_dfa_state = xmalloc (dfa_state_size);
9560      prev_cycle_state = xmalloc (dfa_state_size);
9561    }
9562  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9563  SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9564  recog_memoized (dfa_pre_cycle_insn);
9565  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9566  SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9567  recog_memoized (dfa_stop_insn);
9568}
9569
9570/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9571   used by the DFA insn scheduler.  */
9572
9573static rtx
9574ia64_dfa_pre_cycle_insn (void)
9575{
9576  return dfa_pre_cycle_insn;
9577}
9578
9579/* The following function returns TRUE if PRODUCER (of type ilog or
9580   ld) produces address for CONSUMER (of type st or stf). */
9581
9582int
9583ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9584{
9585  rtx dest, reg, mem;
9586
9587  gcc_assert (producer && consumer);
9588  dest = ia64_single_set (producer);
9589  gcc_assert (dest);
9590  reg = SET_DEST (dest);
9591  gcc_assert (reg);
9592  if (GET_CODE (reg) == SUBREG)
9593    reg = SUBREG_REG (reg);
9594  gcc_assert (GET_CODE (reg) == REG);
9595
9596  dest = ia64_single_set (consumer);
9597  gcc_assert (dest);
9598  mem = SET_DEST (dest);
9599  gcc_assert (mem && GET_CODE (mem) == MEM);
9600  return reg_mentioned_p (reg, mem);
9601}
9602
9603/* The following function returns TRUE if PRODUCER (of type ilog or
9604   ld) produces address for CONSUMER (of type ld or fld). */
9605
9606int
9607ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9608{
9609  rtx dest, src, reg, mem;
9610
9611  gcc_assert (producer && consumer);
9612  dest = ia64_single_set (producer);
9613  gcc_assert (dest);
9614  reg = SET_DEST (dest);
9615  gcc_assert (reg);
9616  if (GET_CODE (reg) == SUBREG)
9617    reg = SUBREG_REG (reg);
9618  gcc_assert (GET_CODE (reg) == REG);
9619
9620  src = ia64_single_set (consumer);
9621  gcc_assert (src);
9622  mem = SET_SRC (src);
9623  gcc_assert (mem);
9624
9625  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9626    mem = XVECEXP (mem, 0, 0);
9627  else if (GET_CODE (mem) == IF_THEN_ELSE)
9628    /* ??? Is this bypass necessary for ld.c?  */
9629    {
9630      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9631      mem = XEXP (mem, 1);
9632    }
9633
9634  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9635    mem = XEXP (mem, 0);
9636
9637  if (GET_CODE (mem) == UNSPEC)
9638    {
9639      int c = XINT (mem, 1);
9640
9641      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9642		  || c == UNSPEC_LDSA);
9643      mem = XVECEXP (mem, 0, 0);
9644    }
9645
9646  /* Note that LO_SUM is used for GOT loads.  */
9647  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9648
9649  return reg_mentioned_p (reg, mem);
9650}
9651
9652/* The following function returns TRUE if INSN produces address for a
9653   load/store insn.  We will place such insns into M slot because it
9654   decreases its latency time.  */
9655
9656int
9657ia64_produce_address_p (rtx insn)
9658{
9659  return insn->call;
9660}
9661
9662
9663/* Emit pseudo-ops for the assembler to describe predicate relations.
9664   At present this assumes that we only consider predicate pairs to
9665   be mutex, and that the assembler can deduce proper values from
9666   straight-line code.  */
9667
9668static void
9669emit_predicate_relation_info (void)
9670{
9671  basic_block bb;
9672
9673  FOR_EACH_BB_REVERSE_FN (bb, cfun)
9674    {
9675      int r;
9676      rtx_insn *head = BB_HEAD (bb);
9677
9678      /* We only need such notes at code labels.  */
9679      if (! LABEL_P (head))
9680	continue;
9681      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9682	head = NEXT_INSN (head);
9683
9684      /* Skip p0, which may be thought to be live due to (reg:DI p0)
9685	 grabbing the entire block of predicate registers.  */
9686      for (r = PR_REG (2); r < PR_REG (64); r += 2)
9687	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9688	  {
9689	    rtx p = gen_rtx_REG (BImode, r);
9690	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9691	    if (head == BB_END (bb))
9692	      BB_END (bb) = n;
9693	    head = n;
9694	  }
9695    }
9696
9697  /* Look for conditional calls that do not return, and protect predicate
9698     relations around them.  Otherwise the assembler will assume the call
9699     returns, and complain about uses of call-clobbered predicates after
9700     the call.  */
9701  FOR_EACH_BB_REVERSE_FN (bb, cfun)
9702    {
9703      rtx_insn *insn = BB_HEAD (bb);
9704
9705      while (1)
9706	{
9707	  if (CALL_P (insn)
9708	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9709	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9710	    {
9711	      rtx_insn *b =
9712		emit_insn_before (gen_safe_across_calls_all (), insn);
9713	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9714	      if (BB_HEAD (bb) == insn)
9715		BB_HEAD (bb) = b;
9716	      if (BB_END (bb) == insn)
9717		BB_END (bb) = a;
9718	    }
9719
9720	  if (insn == BB_END (bb))
9721	    break;
9722	  insn = NEXT_INSN (insn);
9723	}
9724    }
9725}
9726
9727/* Perform machine dependent operations on the rtl chain INSNS.  */
9728
9729static void
9730ia64_reorg (void)
9731{
9732  /* We are freeing block_for_insn in the toplev to keep compatibility
9733     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9734  compute_bb_for_insn ();
9735
9736  /* If optimizing, we'll have split before scheduling.  */
9737  if (optimize == 0)
9738    split_all_insns ();
9739
9740  if (optimize && flag_schedule_insns_after_reload
9741      && dbg_cnt (ia64_sched2))
9742    {
9743      basic_block bb;
9744      timevar_push (TV_SCHED2);
9745      ia64_final_schedule = 1;
9746
9747      /* We can't let modulo-sched prevent us from scheduling any bbs,
9748	 since we need the final schedule to produce bundle information.  */
9749      FOR_EACH_BB_FN (bb, cfun)
9750	bb->flags &= ~BB_DISABLE_SCHEDULE;
9751
9752      initiate_bundle_states ();
9753      ia64_nop = make_insn_raw (gen_nop ());
9754      SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9755      recog_memoized (ia64_nop);
9756      clocks_length = get_max_uid () + 1;
9757      stops_p = XCNEWVEC (char, clocks_length);
9758
9759      if (ia64_tune == PROCESSOR_ITANIUM2)
9760	{
9761	  pos_1 = get_cpu_unit_code ("2_1");
9762	  pos_2 = get_cpu_unit_code ("2_2");
9763	  pos_3 = get_cpu_unit_code ("2_3");
9764	  pos_4 = get_cpu_unit_code ("2_4");
9765	  pos_5 = get_cpu_unit_code ("2_5");
9766	  pos_6 = get_cpu_unit_code ("2_6");
9767	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9768	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9769	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9770	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9771	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9772	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9773	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9774	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9775	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9776	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9777	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9778	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9779	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9780	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9781	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9782	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9783	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9784	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9785	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9786	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9787	}
9788      else
9789	{
9790	  pos_1 = get_cpu_unit_code ("1_1");
9791	  pos_2 = get_cpu_unit_code ("1_2");
9792	  pos_3 = get_cpu_unit_code ("1_3");
9793	  pos_4 = get_cpu_unit_code ("1_4");
9794	  pos_5 = get_cpu_unit_code ("1_5");
9795	  pos_6 = get_cpu_unit_code ("1_6");
9796	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9797	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9798	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9799	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9800	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9801	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9802	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9803	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9804	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9805	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9806	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9807	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9808	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9809	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9810	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9811	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9812	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9813	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9814	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9815	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9816	}
9817
9818      if (flag_selective_scheduling2
9819	  && !maybe_skip_selective_scheduling ())
9820        run_selective_scheduling ();
9821      else
9822	schedule_ebbs ();
9823
9824      /* Redo alignment computation, as it might gone wrong.  */
9825      compute_alignments ();
9826
9827      /* We cannot reuse this one because it has been corrupted by the
9828	 evil glat.  */
9829      finish_bundle_states ();
9830      free (stops_p);
9831      stops_p = NULL;
9832      emit_insn_group_barriers (dump_file);
9833
9834      ia64_final_schedule = 0;
9835      timevar_pop (TV_SCHED2);
9836    }
9837  else
9838    emit_all_insn_group_barriers (dump_file);
9839
9840  df_analyze ();
9841
9842  /* A call must not be the last instruction in a function, so that the
9843     return address is still within the function, so that unwinding works
9844     properly.  Note that IA-64 differs from dwarf2 on this point.  */
9845  if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9846    {
9847      rtx_insn *insn;
9848      int saw_stop = 0;
9849
9850      insn = get_last_insn ();
9851      if (! INSN_P (insn))
9852        insn = prev_active_insn (insn);
9853      if (insn)
9854	{
9855	  /* Skip over insns that expand to nothing.  */
9856	  while (NONJUMP_INSN_P (insn)
9857		 && get_attr_empty (insn) == EMPTY_YES)
9858	    {
9859	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9860		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9861		saw_stop = 1;
9862	      insn = prev_active_insn (insn);
9863	    }
9864	  if (CALL_P (insn))
9865	    {
9866	      if (! saw_stop)
9867		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9868	      emit_insn (gen_break_f ());
9869	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9870	    }
9871	}
9872    }
9873
9874  emit_predicate_relation_info ();
9875
9876  if (flag_var_tracking)
9877    {
9878      timevar_push (TV_VAR_TRACKING);
9879      variable_tracking_main ();
9880      timevar_pop (TV_VAR_TRACKING);
9881    }
9882  df_finish_pass (false);
9883}
9884
9885/* Return true if REGNO is used by the epilogue.  */
9886
9887int
9888ia64_epilogue_uses (int regno)
9889{
9890  switch (regno)
9891    {
9892    case R_GR (1):
9893      /* With a call to a function in another module, we will write a new
9894	 value to "gp".  After returning from such a call, we need to make
9895	 sure the function restores the original gp-value, even if the
9896	 function itself does not use the gp anymore.  */
9897      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9898
9899    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9900    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9901      /* For functions defined with the syscall_linkage attribute, all
9902	 input registers are marked as live at all function exits.  This
9903	 prevents the register allocator from using the input registers,
9904	 which in turn makes it possible to restart a system call after
9905	 an interrupt without having to save/restore the input registers.
9906	 This also prevents kernel data from leaking to application code.  */
9907      return lookup_attribute ("syscall_linkage",
9908	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9909
9910    case R_BR (0):
9911      /* Conditional return patterns can't represent the use of `b0' as
9912         the return address, so we force the value live this way.  */
9913      return 1;
9914
9915    case AR_PFS_REGNUM:
9916      /* Likewise for ar.pfs, which is used by br.ret.  */
9917      return 1;
9918
9919    default:
9920      return 0;
9921    }
9922}
9923
9924/* Return true if REGNO is used by the frame unwinder.  */
9925
9926int
9927ia64_eh_uses (int regno)
9928{
9929  unsigned int r;
9930
9931  if (! reload_completed)
9932    return 0;
9933
9934  if (regno == 0)
9935    return 0;
9936
9937  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9938    if (regno == current_frame_info.r[r]
9939       || regno == emitted_frame_related_regs[r])
9940      return 1;
9941
9942  return 0;
9943}
9944
9945/* Return true if this goes in small data/bss.  */
9946
9947/* ??? We could also support own long data here.  Generating movl/add/ld8
9948   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9949   code faster because there is one less load.  This also includes incomplete
9950   types which can't go in sdata/sbss.  */
9951
9952static bool
9953ia64_in_small_data_p (const_tree exp)
9954{
9955  if (TARGET_NO_SDATA)
9956    return false;
9957
9958  /* We want to merge strings, so we never consider them small data.  */
9959  if (TREE_CODE (exp) == STRING_CST)
9960    return false;
9961
9962  /* Functions are never small data.  */
9963  if (TREE_CODE (exp) == FUNCTION_DECL)
9964    return false;
9965
9966  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9967    {
9968      const char *section = DECL_SECTION_NAME (exp);
9969
9970      if (strcmp (section, ".sdata") == 0
9971	  || strncmp (section, ".sdata.", 7) == 0
9972	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9973	  || strcmp (section, ".sbss") == 0
9974	  || strncmp (section, ".sbss.", 6) == 0
9975	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9976	return true;
9977    }
9978  else
9979    {
9980      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9981
9982      /* If this is an incomplete type with size 0, then we can't put it
9983	 in sdata because it might be too big when completed.  */
9984      if (size > 0 && size <= ia64_section_threshold)
9985	return true;
9986    }
9987
9988  return false;
9989}
9990
9991/* Output assembly directives for prologue regions.  */
9992
9993/* The current basic block number.  */
9994
9995static bool last_block;
9996
9997/* True if we need a copy_state command at the start of the next block.  */
9998
9999static bool need_copy_state;
10000
10001#ifndef MAX_ARTIFICIAL_LABEL_BYTES
10002# define MAX_ARTIFICIAL_LABEL_BYTES 30
10003#endif
10004
10005/* The function emits unwind directives for the start of an epilogue.  */
10006
10007static void
10008process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10009		  bool unwind, bool frame ATTRIBUTE_UNUSED)
10010{
10011  /* If this isn't the last block of the function, then we need to label the
10012     current state, and copy it back in at the start of the next block.  */
10013
10014  if (!last_block)
10015    {
10016      if (unwind)
10017	fprintf (asm_out_file, "\t.label_state %d\n",
10018		 ++cfun->machine->state_num);
10019      need_copy_state = true;
10020    }
10021
10022  if (unwind)
10023    fprintf (asm_out_file, "\t.restore sp\n");
10024}
10025
10026/* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
10027
10028static void
10029process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10030			bool unwind, bool frame)
10031{
10032  rtx dest = SET_DEST (pat);
10033  rtx src = SET_SRC (pat);
10034
10035  if (dest == stack_pointer_rtx)
10036    {
10037      if (GET_CODE (src) == PLUS)
10038	{
10039	  rtx op0 = XEXP (src, 0);
10040	  rtx op1 = XEXP (src, 1);
10041
10042	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10043
10044	  if (INTVAL (op1) < 0)
10045	    {
10046	      gcc_assert (!frame_pointer_needed);
10047	      if (unwind)
10048		fprintf (asm_out_file,
10049			 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
10050			 -INTVAL (op1));
10051	    }
10052	  else
10053	    process_epilogue (asm_out_file, insn, unwind, frame);
10054	}
10055      else
10056	{
10057	  gcc_assert (src == hard_frame_pointer_rtx);
10058	  process_epilogue (asm_out_file, insn, unwind, frame);
10059	}
10060    }
10061  else if (dest == hard_frame_pointer_rtx)
10062    {
10063      gcc_assert (src == stack_pointer_rtx);
10064      gcc_assert (frame_pointer_needed);
10065
10066      if (unwind)
10067	fprintf (asm_out_file, "\t.vframe r%d\n",
10068		 ia64_dbx_register_number (REGNO (dest)));
10069    }
10070  else
10071    gcc_unreachable ();
10072}
10073
10074/* This function processes a SET pattern for REG_CFA_REGISTER.  */
10075
10076static void
10077process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10078{
10079  rtx dest = SET_DEST (pat);
10080  rtx src = SET_SRC (pat);
10081  int dest_regno = REGNO (dest);
10082  int src_regno;
10083
10084  if (src == pc_rtx)
10085    {
10086      /* Saving return address pointer.  */
10087      if (unwind)
10088	fprintf (asm_out_file, "\t.save rp, r%d\n",
10089		 ia64_dbx_register_number (dest_regno));
10090      return;
10091    }
10092
10093  src_regno = REGNO (src);
10094
10095  switch (src_regno)
10096    {
10097    case PR_REG (0):
10098      gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10099      if (unwind)
10100	fprintf (asm_out_file, "\t.save pr, r%d\n",
10101		 ia64_dbx_register_number (dest_regno));
10102      break;
10103
10104    case AR_UNAT_REGNUM:
10105      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10106      if (unwind)
10107	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10108		 ia64_dbx_register_number (dest_regno));
10109      break;
10110
10111    case AR_LC_REGNUM:
10112      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10113      if (unwind)
10114	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10115		 ia64_dbx_register_number (dest_regno));
10116      break;
10117
10118    default:
10119      /* Everything else should indicate being stored to memory.  */
10120      gcc_unreachable ();
10121    }
10122}
10123
10124/* This function processes a SET pattern for REG_CFA_OFFSET.  */
10125
10126static void
10127process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10128{
10129  rtx dest = SET_DEST (pat);
10130  rtx src = SET_SRC (pat);
10131  int src_regno = REGNO (src);
10132  const char *saveop;
10133  HOST_WIDE_INT off;
10134  rtx base;
10135
10136  gcc_assert (MEM_P (dest));
10137  if (GET_CODE (XEXP (dest, 0)) == REG)
10138    {
10139      base = XEXP (dest, 0);
10140      off = 0;
10141    }
10142  else
10143    {
10144      gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10145		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10146      base = XEXP (XEXP (dest, 0), 0);
10147      off = INTVAL (XEXP (XEXP (dest, 0), 1));
10148    }
10149
10150  if (base == hard_frame_pointer_rtx)
10151    {
10152      saveop = ".savepsp";
10153      off = - off;
10154    }
10155  else
10156    {
10157      gcc_assert (base == stack_pointer_rtx);
10158      saveop = ".savesp";
10159    }
10160
10161  src_regno = REGNO (src);
10162  switch (src_regno)
10163    {
10164    case BR_REG (0):
10165      gcc_assert (!current_frame_info.r[reg_save_b0]);
10166      if (unwind)
10167	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10168		 saveop, off);
10169      break;
10170
10171    case PR_REG (0):
10172      gcc_assert (!current_frame_info.r[reg_save_pr]);
10173      if (unwind)
10174	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10175		 saveop, off);
10176      break;
10177
10178    case AR_LC_REGNUM:
10179      gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10180      if (unwind)
10181	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10182		 saveop, off);
10183      break;
10184
10185    case AR_PFS_REGNUM:
10186      gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10187      if (unwind)
10188	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10189		 saveop, off);
10190      break;
10191
10192    case AR_UNAT_REGNUM:
10193      gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10194      if (unwind)
10195	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10196		 saveop, off);
10197      break;
10198
10199    case GR_REG (4):
10200    case GR_REG (5):
10201    case GR_REG (6):
10202    case GR_REG (7):
10203      if (unwind)
10204	fprintf (asm_out_file, "\t.save.g 0x%x\n",
10205		 1 << (src_regno - GR_REG (4)));
10206      break;
10207
10208    case BR_REG (1):
10209    case BR_REG (2):
10210    case BR_REG (3):
10211    case BR_REG (4):
10212    case BR_REG (5):
10213      if (unwind)
10214	fprintf (asm_out_file, "\t.save.b 0x%x\n",
10215		 1 << (src_regno - BR_REG (1)));
10216      break;
10217
10218    case FR_REG (2):
10219    case FR_REG (3):
10220    case FR_REG (4):
10221    case FR_REG (5):
10222      if (unwind)
10223	fprintf (asm_out_file, "\t.save.f 0x%x\n",
10224		 1 << (src_regno - FR_REG (2)));
10225      break;
10226
10227    case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10228    case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10229    case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10230    case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10231      if (unwind)
10232	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10233		 1 << (src_regno - FR_REG (12)));
10234      break;
10235
10236    default:
10237      /* ??? For some reason we mark other general registers, even those
10238	 we can't represent in the unwind info.  Ignore them.  */
10239      break;
10240    }
10241}
10242
10243/* This function looks at a single insn and emits any directives
10244   required to unwind this insn.  */
10245
10246static void
10247ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10248{
10249  bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10250  bool frame = dwarf2out_do_frame ();
10251  rtx note, pat;
10252  bool handled_one;
10253
10254  if (!unwind && !frame)
10255    return;
10256
10257  if (NOTE_INSN_BASIC_BLOCK_P (insn))
10258    {
10259      last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10260     == EXIT_BLOCK_PTR_FOR_FN (cfun);
10261
10262      /* Restore unwind state from immediately before the epilogue.  */
10263      if (need_copy_state)
10264	{
10265	  if (unwind)
10266	    {
10267	      fprintf (asm_out_file, "\t.body\n");
10268	      fprintf (asm_out_file, "\t.copy_state %d\n",
10269		       cfun->machine->state_num);
10270	    }
10271	  need_copy_state = false;
10272	}
10273    }
10274
10275  if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10276    return;
10277
10278  /* Look for the ALLOC insn.  */
10279  if (INSN_CODE (insn) == CODE_FOR_alloc)
10280    {
10281      rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10282      int dest_regno = REGNO (dest);
10283
10284      /* If this is the final destination for ar.pfs, then this must
10285	 be the alloc in the prologue.  */
10286      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10287	{
10288	  if (unwind)
10289	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10290		     ia64_dbx_register_number (dest_regno));
10291	}
10292      else
10293	{
10294	  /* This must be an alloc before a sibcall.  We must drop the
10295	     old frame info.  The easiest way to drop the old frame
10296	     info is to ensure we had a ".restore sp" directive
10297	     followed by a new prologue.  If the procedure doesn't
10298	     have a memory-stack frame, we'll issue a dummy ".restore
10299	     sp" now.  */
10300	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10301	    /* if haven't done process_epilogue() yet, do it now */
10302	    process_epilogue (asm_out_file, insn, unwind, frame);
10303	  if (unwind)
10304	    fprintf (asm_out_file, "\t.prologue\n");
10305	}
10306      return;
10307    }
10308
10309  handled_one = false;
10310  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10311    switch (REG_NOTE_KIND (note))
10312      {
10313      case REG_CFA_ADJUST_CFA:
10314	pat = XEXP (note, 0);
10315	if (pat == NULL)
10316	  pat = PATTERN (insn);
10317	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10318	handled_one = true;
10319	break;
10320
10321      case REG_CFA_OFFSET:
10322	pat = XEXP (note, 0);
10323	if (pat == NULL)
10324	  pat = PATTERN (insn);
10325	process_cfa_offset (asm_out_file, pat, unwind);
10326	handled_one = true;
10327	break;
10328
10329      case REG_CFA_REGISTER:
10330	pat = XEXP (note, 0);
10331	if (pat == NULL)
10332	  pat = PATTERN (insn);
10333	process_cfa_register (asm_out_file, pat, unwind);
10334	handled_one = true;
10335	break;
10336
10337      case REG_FRAME_RELATED_EXPR:
10338      case REG_CFA_DEF_CFA:
10339      case REG_CFA_EXPRESSION:
10340      case REG_CFA_RESTORE:
10341      case REG_CFA_SET_VDRAP:
10342	/* Not used in the ia64 port.  */
10343	gcc_unreachable ();
10344
10345      default:
10346	/* Not a frame-related note.  */
10347	break;
10348      }
10349
10350  /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10351     explicit action to take.  No guessing required.  */
10352  gcc_assert (handled_one);
10353}
10354
10355/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10356
10357static void
10358ia64_asm_emit_except_personality (rtx personality)
10359{
10360  fputs ("\t.personality\t", asm_out_file);
10361  output_addr_const (asm_out_file, personality);
10362  fputc ('\n', asm_out_file);
10363}
10364
10365/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10366
10367static void
10368ia64_asm_init_sections (void)
10369{
10370  exception_section = get_unnamed_section (0, output_section_asm_op,
10371					   "\t.handlerdata");
10372}
10373
10374/* Implement TARGET_DEBUG_UNWIND_INFO.  */
10375
10376static enum unwind_info_type
10377ia64_debug_unwind_info (void)
10378{
10379  return UI_TARGET;
10380}
10381
10382enum ia64_builtins
10383{
10384  IA64_BUILTIN_BSP,
10385  IA64_BUILTIN_COPYSIGNQ,
10386  IA64_BUILTIN_FABSQ,
10387  IA64_BUILTIN_FLUSHRS,
10388  IA64_BUILTIN_INFQ,
10389  IA64_BUILTIN_HUGE_VALQ,
10390  IA64_BUILTIN_max
10391};
10392
10393static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10394
10395void
10396ia64_init_builtins (void)
10397{
10398  tree fpreg_type;
10399  tree float80_type;
10400  tree decl;
10401
10402  /* The __fpreg type.  */
10403  fpreg_type = make_node (REAL_TYPE);
10404  TYPE_PRECISION (fpreg_type) = 82;
10405  layout_type (fpreg_type);
10406  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10407
10408  /* The __float80 type.  */
10409  float80_type = make_node (REAL_TYPE);
10410  TYPE_PRECISION (float80_type) = 80;
10411  layout_type (float80_type);
10412  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10413
10414  /* The __float128 type.  */
10415  if (!TARGET_HPUX)
10416    {
10417      tree ftype;
10418      tree float128_type = make_node (REAL_TYPE);
10419
10420      TYPE_PRECISION (float128_type) = 128;
10421      layout_type (float128_type);
10422      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
10423
10424      /* TFmode support builtins.  */
10425      ftype = build_function_type_list (float128_type, NULL_TREE);
10426      decl = add_builtin_function ("__builtin_infq", ftype,
10427				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10428				   NULL, NULL_TREE);
10429      ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10430
10431      decl = add_builtin_function ("__builtin_huge_valq", ftype,
10432				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10433				   NULL, NULL_TREE);
10434      ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10435
10436      ftype = build_function_type_list (float128_type,
10437					float128_type,
10438					NULL_TREE);
10439      decl = add_builtin_function ("__builtin_fabsq", ftype,
10440				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10441				   "__fabstf2", NULL_TREE);
10442      TREE_READONLY (decl) = 1;
10443      ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10444
10445      ftype = build_function_type_list (float128_type,
10446					float128_type,
10447					float128_type,
10448					NULL_TREE);
10449      decl = add_builtin_function ("__builtin_copysignq", ftype,
10450				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10451				   "__copysigntf3", NULL_TREE);
10452      TREE_READONLY (decl) = 1;
10453      ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10454    }
10455  else
10456    /* Under HPUX, this is a synonym for "long double".  */
10457    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10458					       "__float128");
10459
10460  /* Fwrite on VMS is non-standard.  */
10461#if TARGET_ABI_OPEN_VMS
10462  vms_patch_builtins ();
10463#endif
10464
10465#define def_builtin(name, type, code)					\
10466  add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
10467		       NULL, NULL_TREE)
10468
10469  decl = def_builtin ("__builtin_ia64_bsp",
10470		      build_function_type_list (ptr_type_node, NULL_TREE),
10471		      IA64_BUILTIN_BSP);
10472  ia64_builtins[IA64_BUILTIN_BSP] = decl;
10473
10474  decl = def_builtin ("__builtin_ia64_flushrs",
10475		      build_function_type_list (void_type_node, NULL_TREE),
10476		      IA64_BUILTIN_FLUSHRS);
10477  ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10478
10479#undef def_builtin
10480
10481  if (TARGET_HPUX)
10482    {
10483      if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10484	set_user_assembler_name (decl, "_Isfinite");
10485      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10486	set_user_assembler_name (decl, "_Isfinitef");
10487      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10488	set_user_assembler_name (decl, "_Isfinitef128");
10489    }
10490}
10491
10492rtx
10493ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10494		     machine_mode mode ATTRIBUTE_UNUSED,
10495		     int ignore ATTRIBUTE_UNUSED)
10496{
10497  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10498  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10499
10500  switch (fcode)
10501    {
10502    case IA64_BUILTIN_BSP:
10503      if (! target || ! register_operand (target, DImode))
10504	target = gen_reg_rtx (DImode);
10505      emit_insn (gen_bsp_value (target));
10506#ifdef POINTERS_EXTEND_UNSIGNED
10507      target = convert_memory_address (ptr_mode, target);
10508#endif
10509      return target;
10510
10511    case IA64_BUILTIN_FLUSHRS:
10512      emit_insn (gen_flushrs ());
10513      return const0_rtx;
10514
10515    case IA64_BUILTIN_INFQ:
10516    case IA64_BUILTIN_HUGE_VALQ:
10517      {
10518        machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10519	REAL_VALUE_TYPE inf;
10520	rtx tmp;
10521
10522	real_inf (&inf);
10523	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
10524
10525	tmp = validize_mem (force_const_mem (target_mode, tmp));
10526
10527	if (target == 0)
10528	  target = gen_reg_rtx (target_mode);
10529
10530	emit_move_insn (target, tmp);
10531	return target;
10532      }
10533
10534    case IA64_BUILTIN_FABSQ:
10535    case IA64_BUILTIN_COPYSIGNQ:
10536      return expand_call (exp, target, ignore);
10537
10538    default:
10539      gcc_unreachable ();
10540    }
10541
10542  return NULL_RTX;
10543}
10544
10545/* Return the ia64 builtin for CODE.  */
10546
10547static tree
10548ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10549{
10550  if (code >= IA64_BUILTIN_max)
10551    return error_mark_node;
10552
10553  return ia64_builtins[code];
10554}
10555
10556/* For the HP-UX IA64 aggregate parameters are passed stored in the
10557   most significant bits of the stack slot.  */
10558
10559enum direction
10560ia64_hpux_function_arg_padding (machine_mode mode, const_tree type)
10561{
10562   /* Exception to normal case for structures/unions/etc.  */
10563
10564   if (type && AGGREGATE_TYPE_P (type)
10565       && int_size_in_bytes (type) < UNITS_PER_WORD)
10566     return upward;
10567
10568   /* Fall back to the default.  */
10569   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10570}
10571
10572/* Emit text to declare externally defined variables and functions, because
10573   the Intel assembler does not support undefined externals.  */
10574
10575void
10576ia64_asm_output_external (FILE *file, tree decl, const char *name)
10577{
10578  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10579     set in order to avoid putting out names that are never really
10580     used. */
10581  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10582    {
10583      /* maybe_assemble_visibility will return 1 if the assembler
10584	 visibility directive is output.  */
10585      int need_visibility = ((*targetm.binds_local_p) (decl)
10586			     && maybe_assemble_visibility (decl));
10587
10588      /* GNU as does not need anything here, but the HP linker does
10589	 need something for external functions.  */
10590      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10591	  && TREE_CODE (decl) == FUNCTION_DECL)
10592	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10593      else if (need_visibility && !TARGET_GNU_AS)
10594	(*targetm.asm_out.globalize_label) (file, name);
10595    }
10596}
10597
10598/* Set SImode div/mod functions, init_integral_libfuncs only initializes
10599   modes of word_mode and larger.  Rename the TFmode libfuncs using the
10600   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10601   backward compatibility. */
10602
10603static void
10604ia64_init_libfuncs (void)
10605{
10606  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10607  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10608  set_optab_libfunc (smod_optab, SImode, "__modsi3");
10609  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10610
10611  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10612  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10613  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10614  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10615  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10616
10617  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10618  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10619  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10620  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10621  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10622  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10623
10624  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10625  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10626  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10627  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10628  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10629
10630  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10631  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10632  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10633  /* HP-UX 11.23 libc does not have a function for unsigned
10634     SImode-to-TFmode conversion.  */
10635  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10636}
10637
10638/* Rename all the TFmode libfuncs using the HPUX conventions.  */
10639
10640static void
10641ia64_hpux_init_libfuncs (void)
10642{
10643  ia64_init_libfuncs ();
10644
10645  /* The HP SI millicode division and mod functions expect DI arguments.
10646     By turning them off completely we avoid using both libgcc and the
10647     non-standard millicode routines and use the HP DI millicode routines
10648     instead.  */
10649
10650  set_optab_libfunc (sdiv_optab, SImode, 0);
10651  set_optab_libfunc (udiv_optab, SImode, 0);
10652  set_optab_libfunc (smod_optab, SImode, 0);
10653  set_optab_libfunc (umod_optab, SImode, 0);
10654
10655  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10656  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10657  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10658  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10659
10660  /* HP-UX libc has TF min/max/abs routines in it.  */
10661  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10662  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10663  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10664
10665  /* ia64_expand_compare uses this.  */
10666  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10667
10668  /* These should never be used.  */
10669  set_optab_libfunc (eq_optab, TFmode, 0);
10670  set_optab_libfunc (ne_optab, TFmode, 0);
10671  set_optab_libfunc (gt_optab, TFmode, 0);
10672  set_optab_libfunc (ge_optab, TFmode, 0);
10673  set_optab_libfunc (lt_optab, TFmode, 0);
10674  set_optab_libfunc (le_optab, TFmode, 0);
10675}
10676
10677/* Rename the division and modulus functions in VMS.  */
10678
10679static void
10680ia64_vms_init_libfuncs (void)
10681{
10682  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10683  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10684  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10685  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10686  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10687  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10688  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10689  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10690  abort_libfunc = init_one_libfunc ("decc$abort");
10691  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10692#ifdef MEM_LIBFUNCS_INIT
10693  MEM_LIBFUNCS_INIT;
10694#endif
10695}
10696
10697/* Rename the TFmode libfuncs available from soft-fp in glibc using
10698   the HPUX conventions.  */
10699
10700static void
10701ia64_sysv4_init_libfuncs (void)
10702{
10703  ia64_init_libfuncs ();
10704
10705  /* These functions are not part of the HPUX TFmode interface.  We
10706     use them instead of _U_Qfcmp, which doesn't work the way we
10707     expect.  */
10708  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10709  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10710  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10711  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10712  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10713  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10714
10715  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10716     glibc doesn't have them.  */
10717}
10718
10719/* Use soft-fp.  */
10720
10721static void
10722ia64_soft_fp_init_libfuncs (void)
10723{
10724}
10725
10726static bool
10727ia64_vms_valid_pointer_mode (machine_mode mode)
10728{
10729  return (mode == SImode || mode == DImode);
10730}
10731
10732/* For HPUX, it is illegal to have relocations in shared segments.  */
10733
10734static int
10735ia64_hpux_reloc_rw_mask (void)
10736{
10737  return 3;
10738}
10739
10740/* For others, relax this so that relocations to local data goes in
10741   read-only segments, but we still cannot allow global relocations
10742   in read-only segments.  */
10743
10744static int
10745ia64_reloc_rw_mask (void)
10746{
10747  return flag_pic ? 3 : 2;
10748}
10749
10750/* Return the section to use for X.  The only special thing we do here
10751   is to honor small data.  */
10752
10753static section *
10754ia64_select_rtx_section (machine_mode mode, rtx x,
10755			 unsigned HOST_WIDE_INT align)
10756{
10757  if (GET_MODE_SIZE (mode) > 0
10758      && GET_MODE_SIZE (mode) <= ia64_section_threshold
10759      && !TARGET_NO_SDATA)
10760    return sdata_section;
10761  else
10762    return default_elf_select_rtx_section (mode, x, align);
10763}
10764
10765static unsigned int
10766ia64_section_type_flags (tree decl, const char *name, int reloc)
10767{
10768  unsigned int flags = 0;
10769
10770  if (strcmp (name, ".sdata") == 0
10771      || strncmp (name, ".sdata.", 7) == 0
10772      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10773      || strncmp (name, ".sdata2.", 8) == 0
10774      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10775      || strcmp (name, ".sbss") == 0
10776      || strncmp (name, ".sbss.", 6) == 0
10777      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10778    flags = SECTION_SMALL;
10779
10780  flags |= default_section_type_flags (decl, name, reloc);
10781  return flags;
10782}
10783
10784/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10785   structure type and that the address of that type should be passed
10786   in out0, rather than in r8.  */
10787
10788static bool
10789ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10790{
10791  tree ret_type = TREE_TYPE (fntype);
10792
10793  /* The Itanium C++ ABI requires that out0, rather than r8, be used
10794     as the structure return address parameter, if the return value
10795     type has a non-trivial copy constructor or destructor.  It is not
10796     clear if this same convention should be used for other
10797     programming languages.  Until G++ 3.4, we incorrectly used r8 for
10798     these return values.  */
10799  return (abi_version_at_least (2)
10800	  && ret_type
10801	  && TYPE_MODE (ret_type) == BLKmode
10802	  && TREE_ADDRESSABLE (ret_type)
10803	  && lang_GNU_CXX ());
10804}
10805
10806/* Output the assembler code for a thunk function.  THUNK_DECL is the
10807   declaration for the thunk function itself, FUNCTION is the decl for
10808   the target function.  DELTA is an immediate constant offset to be
10809   added to THIS.  If VCALL_OFFSET is nonzero, the word at
10810   *(*this + vcall_offset) should be added to THIS.  */
10811
10812static void
10813ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10814		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10815		      tree function)
10816{
10817  rtx this_rtx, funexp;
10818  rtx_insn *insn;
10819  unsigned int this_parmno;
10820  unsigned int this_regno;
10821  rtx delta_rtx;
10822
10823  reload_completed = 1;
10824  epilogue_completed = 1;
10825
10826  /* Set things up as ia64_expand_prologue might.  */
10827  last_scratch_gr_reg = 15;
10828
10829  memset (&current_frame_info, 0, sizeof (current_frame_info));
10830  current_frame_info.spill_cfa_off = -16;
10831  current_frame_info.n_input_regs = 1;
10832  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10833
10834  /* Mark the end of the (empty) prologue.  */
10835  emit_note (NOTE_INSN_PROLOGUE_END);
10836
10837  /* Figure out whether "this" will be the first parameter (the
10838     typical case) or the second parameter (as happens when the
10839     virtual function returns certain class objects).  */
10840  this_parmno
10841    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10842       ? 1 : 0);
10843  this_regno = IN_REG (this_parmno);
10844  if (!TARGET_REG_NAMES)
10845    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10846
10847  this_rtx = gen_rtx_REG (Pmode, this_regno);
10848
10849  /* Apply the constant offset, if required.  */
10850  delta_rtx = GEN_INT (delta);
10851  if (TARGET_ILP32)
10852    {
10853      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10854      REG_POINTER (tmp) = 1;
10855      if (delta && satisfies_constraint_I (delta_rtx))
10856	{
10857	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10858	  delta = 0;
10859	}
10860      else
10861	emit_insn (gen_ptr_extend (this_rtx, tmp));
10862    }
10863  if (delta)
10864    {
10865      if (!satisfies_constraint_I (delta_rtx))
10866	{
10867	  rtx tmp = gen_rtx_REG (Pmode, 2);
10868	  emit_move_insn (tmp, delta_rtx);
10869	  delta_rtx = tmp;
10870	}
10871      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10872    }
10873
10874  /* Apply the offset from the vtable, if required.  */
10875  if (vcall_offset)
10876    {
10877      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10878      rtx tmp = gen_rtx_REG (Pmode, 2);
10879
10880      if (TARGET_ILP32)
10881	{
10882	  rtx t = gen_rtx_REG (ptr_mode, 2);
10883	  REG_POINTER (t) = 1;
10884	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10885	  if (satisfies_constraint_I (vcall_offset_rtx))
10886	    {
10887	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10888	      vcall_offset = 0;
10889	    }
10890	  else
10891	    emit_insn (gen_ptr_extend (tmp, t));
10892	}
10893      else
10894	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10895
10896      if (vcall_offset)
10897	{
10898	  if (!satisfies_constraint_J (vcall_offset_rtx))
10899	    {
10900	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10901	      emit_move_insn (tmp2, vcall_offset_rtx);
10902	      vcall_offset_rtx = tmp2;
10903	    }
10904	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10905	}
10906
10907      if (TARGET_ILP32)
10908	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10909      else
10910	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10911
10912      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10913    }
10914
10915  /* Generate a tail call to the target function.  */
10916  if (! TREE_USED (function))
10917    {
10918      assemble_external (function);
10919      TREE_USED (function) = 1;
10920    }
10921  funexp = XEXP (DECL_RTL (function), 0);
10922  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10923  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10924  insn = get_last_insn ();
10925  SIBLING_CALL_P (insn) = 1;
10926
10927  /* Code generation for calls relies on splitting.  */
10928  reload_completed = 1;
10929  epilogue_completed = 1;
10930  try_split (PATTERN (insn), insn, 0);
10931
10932  emit_barrier ();
10933
10934  /* Run just enough of rest_of_compilation to get the insns emitted.
10935     There's not really enough bulk here to make other passes such as
10936     instruction scheduling worth while.  Note that use_thunk calls
10937     assemble_start_function and assemble_end_function.  */
10938
10939  emit_all_insn_group_barriers (NULL);
10940  insn = get_insns ();
10941  shorten_branches (insn);
10942  final_start_function (insn, file, 1);
10943  final (insn, file, 1);
10944  final_end_function ();
10945
10946  reload_completed = 0;
10947  epilogue_completed = 0;
10948}
10949
10950/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10951
10952static rtx
10953ia64_struct_value_rtx (tree fntype,
10954		       int incoming ATTRIBUTE_UNUSED)
10955{
10956  if (TARGET_ABI_OPEN_VMS ||
10957      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10958    return NULL_RTX;
10959  return gen_rtx_REG (Pmode, GR_REG (8));
10960}
10961
10962static bool
10963ia64_scalar_mode_supported_p (machine_mode mode)
10964{
10965  switch (mode)
10966    {
10967    case QImode:
10968    case HImode:
10969    case SImode:
10970    case DImode:
10971    case TImode:
10972      return true;
10973
10974    case SFmode:
10975    case DFmode:
10976    case XFmode:
10977    case RFmode:
10978      return true;
10979
10980    case TFmode:
10981      return true;
10982
10983    default:
10984      return false;
10985    }
10986}
10987
10988static bool
10989ia64_vector_mode_supported_p (machine_mode mode)
10990{
10991  switch (mode)
10992    {
10993    case V8QImode:
10994    case V4HImode:
10995    case V2SImode:
10996      return true;
10997
10998    case V2SFmode:
10999      return true;
11000
11001    default:
11002      return false;
11003    }
11004}
11005
11006/* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P.  */
11007
11008static bool
11009ia64_libgcc_floating_mode_supported_p (machine_mode mode)
11010{
11011  switch (mode)
11012    {
11013    case SFmode:
11014    case DFmode:
11015      return true;
11016
11017    case XFmode:
11018#ifdef IA64_NO_LIBGCC_XFMODE
11019      return false;
11020#else
11021      return true;
11022#endif
11023
11024    case TFmode:
11025#ifdef IA64_NO_LIBGCC_TFMODE
11026      return false;
11027#else
11028      return true;
11029#endif
11030
11031    default:
11032      return false;
11033    }
11034}
11035
11036/* Implement the FUNCTION_PROFILER macro.  */
11037
11038void
11039ia64_output_function_profiler (FILE *file, int labelno)
11040{
11041  bool indirect_call;
11042
11043  /* If the function needs a static chain and the static chain
11044     register is r15, we use an indirect call so as to bypass
11045     the PLT stub in case the executable is dynamically linked,
11046     because the stub clobbers r15 as per 5.3.6 of the psABI.
11047     We don't need to do that in non canonical PIC mode.  */
11048
11049  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11050    {
11051      gcc_assert (STATIC_CHAIN_REGNUM == 15);
11052      indirect_call = true;
11053    }
11054  else
11055    indirect_call = false;
11056
11057  if (TARGET_GNU_AS)
11058    fputs ("\t.prologue 4, r40\n", file);
11059  else
11060    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11061  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11062
11063  if (NO_PROFILE_COUNTERS)
11064    fputs ("\tmov out3 = r0\n", file);
11065  else
11066    {
11067      char buf[20];
11068      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11069
11070      if (TARGET_AUTO_PIC)
11071	fputs ("\tmovl out3 = @gprel(", file);
11072      else
11073	fputs ("\taddl out3 = @ltoff(", file);
11074      assemble_name (file, buf);
11075      if (TARGET_AUTO_PIC)
11076	fputs (")\n", file);
11077      else
11078	fputs ("), r1\n", file);
11079    }
11080
11081  if (indirect_call)
11082    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11083  fputs ("\t;;\n", file);
11084
11085  fputs ("\t.save rp, r42\n", file);
11086  fputs ("\tmov out2 = b0\n", file);
11087  if (indirect_call)
11088    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11089  fputs ("\t.body\n", file);
11090  fputs ("\tmov out1 = r1\n", file);
11091  if (indirect_call)
11092    {
11093      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11094      fputs ("\tmov b6 = r16\n", file);
11095      fputs ("\tld8 r1 = [r14]\n", file);
11096      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11097    }
11098  else
11099    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11100}
11101
11102static GTY(()) rtx mcount_func_rtx;
11103static rtx
11104gen_mcount_func_rtx (void)
11105{
11106  if (!mcount_func_rtx)
11107    mcount_func_rtx = init_one_libfunc ("_mcount");
11108  return mcount_func_rtx;
11109}
11110
11111void
11112ia64_profile_hook (int labelno)
11113{
11114  rtx label, ip;
11115
11116  if (NO_PROFILE_COUNTERS)
11117    label = const0_rtx;
11118  else
11119    {
11120      char buf[30];
11121      const char *label_name;
11122      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11123      label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11124      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11125      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11126    }
11127  ip = gen_reg_rtx (Pmode);
11128  emit_insn (gen_ip_value (ip));
11129  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11130                     VOIDmode, 3,
11131		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11132		     ip, Pmode,
11133		     label, Pmode);
11134}
11135
11136/* Return the mangling of TYPE if it is an extended fundamental type.  */
11137
11138static const char *
11139ia64_mangle_type (const_tree type)
11140{
11141  type = TYPE_MAIN_VARIANT (type);
11142
11143  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11144      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11145    return NULL;
11146
11147  /* On HP-UX, "long double" is mangled as "e" so __float128 is
11148     mangled as "e".  */
11149  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11150    return "g";
11151  /* On HP-UX, "e" is not available as a mangling of __float80 so use
11152     an extended mangling.  Elsewhere, "e" is available since long
11153     double is 80 bits.  */
11154  if (TYPE_MODE (type) == XFmode)
11155    return TARGET_HPUX ? "u9__float80" : "e";
11156  if (TYPE_MODE (type) == RFmode)
11157    return "u7__fpreg";
11158  return NULL;
11159}
11160
11161/* Return the diagnostic message string if conversion from FROMTYPE to
11162   TOTYPE is not allowed, NULL otherwise.  */
11163static const char *
11164ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11165{
11166  /* Reject nontrivial conversion to or from __fpreg.  */
11167  if (TYPE_MODE (fromtype) == RFmode
11168      && TYPE_MODE (totype) != RFmode
11169      && TYPE_MODE (totype) != VOIDmode)
11170    return N_("invalid conversion from %<__fpreg%>");
11171  if (TYPE_MODE (totype) == RFmode
11172      && TYPE_MODE (fromtype) != RFmode)
11173    return N_("invalid conversion to %<__fpreg%>");
11174  return NULL;
11175}
11176
11177/* Return the diagnostic message string if the unary operation OP is
11178   not permitted on TYPE, NULL otherwise.  */
11179static const char *
11180ia64_invalid_unary_op (int op, const_tree type)
11181{
11182  /* Reject operations on __fpreg other than unary + or &.  */
11183  if (TYPE_MODE (type) == RFmode
11184      && op != CONVERT_EXPR
11185      && op != ADDR_EXPR)
11186    return N_("invalid operation on %<__fpreg%>");
11187  return NULL;
11188}
11189
11190/* Return the diagnostic message string if the binary operation OP is
11191   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
11192static const char *
11193ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11194{
11195  /* Reject operations on __fpreg.  */
11196  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11197    return N_("invalid operation on %<__fpreg%>");
11198  return NULL;
11199}
11200
11201/* HP-UX version_id attribute.
11202   For object foo, if the version_id is set to 1234 put out an alias
11203   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
11204   other than an alias statement because it is an illegal symbol name.  */
11205
11206static tree
11207ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11208                                 tree name ATTRIBUTE_UNUSED,
11209                                 tree args,
11210                                 int flags ATTRIBUTE_UNUSED,
11211                                 bool *no_add_attrs)
11212{
11213  tree arg = TREE_VALUE (args);
11214
11215  if (TREE_CODE (arg) != STRING_CST)
11216    {
11217      error("version attribute is not a string");
11218      *no_add_attrs = true;
11219      return NULL_TREE;
11220    }
11221  return NULL_TREE;
11222}
11223
11224/* Target hook for c_mode_for_suffix.  */
11225
11226static machine_mode
11227ia64_c_mode_for_suffix (char suffix)
11228{
11229  if (suffix == 'q')
11230    return TFmode;
11231  if (suffix == 'w')
11232    return XFmode;
11233
11234  return VOIDmode;
11235}
11236
11237static GTY(()) rtx ia64_dconst_0_5_rtx;
11238
11239rtx
11240ia64_dconst_0_5 (void)
11241{
11242  if (! ia64_dconst_0_5_rtx)
11243    {
11244      REAL_VALUE_TYPE rv;
11245      real_from_string (&rv, "0.5");
11246      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11247    }
11248  return ia64_dconst_0_5_rtx;
11249}
11250
11251static GTY(()) rtx ia64_dconst_0_375_rtx;
11252
11253rtx
11254ia64_dconst_0_375 (void)
11255{
11256  if (! ia64_dconst_0_375_rtx)
11257    {
11258      REAL_VALUE_TYPE rv;
11259      real_from_string (&rv, "0.375");
11260      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11261    }
11262  return ia64_dconst_0_375_rtx;
11263}
11264
11265static machine_mode
11266ia64_get_reg_raw_mode (int regno)
11267{
11268  if (FR_REGNO_P (regno))
11269    return XFmode;
11270  return default_get_reg_raw_mode(regno);
11271}
11272
11273/* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
11274   anymore.  */
11275
11276bool
11277ia64_member_type_forces_blk (const_tree, machine_mode mode)
11278{
11279  return TARGET_HPUX && mode == TFmode;
11280}
11281
11282/* Always default to .text section until HP-UX linker is fixed.  */
11283
11284ATTRIBUTE_UNUSED static section *
11285ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11286			    enum node_frequency freq ATTRIBUTE_UNUSED,
11287			    bool startup ATTRIBUTE_UNUSED,
11288			    bool exit ATTRIBUTE_UNUSED)
11289{
11290  return NULL;
11291}
11292
11293/* Construct (set target (vec_select op0 (parallel perm))) and
11294   return true if that's a valid instruction in the active ISA.  */
11295
11296static bool
11297expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11298{
11299  rtx rperm[MAX_VECT_LEN], x;
11300  unsigned i;
11301
11302  for (i = 0; i < nelt; ++i)
11303    rperm[i] = GEN_INT (perm[i]);
11304
11305  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11306  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11307  x = gen_rtx_SET (VOIDmode, target, x);
11308
11309  rtx_insn *insn = emit_insn (x);
11310  if (recog_memoized (insn) < 0)
11311    {
11312      remove_insn (insn);
11313      return false;
11314    }
11315  return true;
11316}
11317
11318/* Similar, but generate a vec_concat from op0 and op1 as well.  */
11319
11320static bool
11321expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11322			const unsigned char *perm, unsigned nelt)
11323{
11324  machine_mode v2mode;
11325  rtx x;
11326
11327  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
11328  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11329  return expand_vselect (target, x, perm, nelt);
11330}
11331
11332/* Try to expand a no-op permutation.  */
11333
11334static bool
11335expand_vec_perm_identity (struct expand_vec_perm_d *d)
11336{
11337  unsigned i, nelt = d->nelt;
11338
11339  for (i = 0; i < nelt; ++i)
11340    if (d->perm[i] != i)
11341      return false;
11342
11343  if (!d->testing_p)
11344    emit_move_insn (d->target, d->op0);
11345
11346  return true;
11347}
11348
11349/* Try to expand D via a shrp instruction.  */
11350
11351static bool
11352expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11353{
11354  unsigned i, nelt = d->nelt, shift, mask;
11355  rtx tmp, hi, lo;
11356
11357  /* ??? Don't force V2SFmode into the integer registers.  */
11358  if (d->vmode == V2SFmode)
11359    return false;
11360
11361  mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11362
11363  shift = d->perm[0];
11364  if (BYTES_BIG_ENDIAN && shift > nelt)
11365    return false;
11366
11367  for (i = 1; i < nelt; ++i)
11368    if (d->perm[i] != ((shift + i) & mask))
11369      return false;
11370
11371  if (d->testing_p)
11372    return true;
11373
11374  hi = shift < nelt ? d->op1 : d->op0;
11375  lo = shift < nelt ? d->op0 : d->op1;
11376
11377  shift %= nelt;
11378
11379  shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11380
11381  /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11382  gcc_assert (IN_RANGE (shift, 1, 63));
11383
11384  /* Recall that big-endian elements are numbered starting at the top of
11385     the register.  Ideally we'd have a shift-left-pair.  But since we
11386     don't, convert to a shift the other direction.  */
11387  if (BYTES_BIG_ENDIAN)
11388    shift = 64 - shift;
11389
11390  tmp = gen_reg_rtx (DImode);
11391  hi = gen_lowpart (DImode, hi);
11392  lo = gen_lowpart (DImode, lo);
11393  emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11394
11395  emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11396  return true;
11397}
11398
11399/* Try to instantiate D in a single instruction.  */
11400
11401static bool
11402expand_vec_perm_1 (struct expand_vec_perm_d *d)
11403{
11404  unsigned i, nelt = d->nelt;
11405  unsigned char perm2[MAX_VECT_LEN];
11406
11407  /* Try single-operand selections.  */
11408  if (d->one_operand_p)
11409    {
11410      if (expand_vec_perm_identity (d))
11411	return true;
11412      if (expand_vselect (d->target, d->op0, d->perm, nelt))
11413	return true;
11414    }
11415
11416  /* Try two operand selections.  */
11417  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11418    return true;
11419
11420  /* Recognize interleave style patterns with reversed operands.  */
11421  if (!d->one_operand_p)
11422    {
11423      for (i = 0; i < nelt; ++i)
11424	{
11425	  unsigned e = d->perm[i];
11426	  if (e >= nelt)
11427	    e -= nelt;
11428	  else
11429	    e += nelt;
11430	  perm2[i] = e;
11431	}
11432
11433      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11434	return true;
11435    }
11436
11437  if (expand_vec_perm_shrp (d))
11438    return true;
11439
11440  /* ??? Look for deposit-like permutations where most of the result
11441     comes from one vector unchanged and the rest comes from a
11442     sequential hunk of the other vector.  */
11443
11444  return false;
11445}
11446
11447/* Pattern match broadcast permutations.  */
11448
11449static bool
11450expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11451{
11452  unsigned i, elt, nelt = d->nelt;
11453  unsigned char perm2[2];
11454  rtx temp;
11455  bool ok;
11456
11457  if (!d->one_operand_p)
11458    return false;
11459
11460  elt = d->perm[0];
11461  for (i = 1; i < nelt; ++i)
11462    if (d->perm[i] != elt)
11463      return false;
11464
11465  switch (d->vmode)
11466    {
11467    case V2SImode:
11468    case V2SFmode:
11469      /* Implementable by interleave.  */
11470      perm2[0] = elt;
11471      perm2[1] = elt + 2;
11472      ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11473      gcc_assert (ok);
11474      break;
11475
11476    case V8QImode:
11477      /* Implementable by extract + broadcast.  */
11478      if (BYTES_BIG_ENDIAN)
11479	elt = 7 - elt;
11480      elt *= BITS_PER_UNIT;
11481      temp = gen_reg_rtx (DImode);
11482      emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11483			    GEN_INT (8), GEN_INT (elt)));
11484      emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11485      break;
11486
11487    case V4HImode:
11488      /* Should have been matched directly by vec_select.  */
11489    default:
11490      gcc_unreachable ();
11491    }
11492
11493  return true;
11494}
11495
11496/* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11497   two vector permutation into a single vector permutation by using
11498   an interleave operation to merge the vectors.  */
11499
11500static bool
11501expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11502{
11503  struct expand_vec_perm_d dremap, dfinal;
11504  unsigned char remap[2 * MAX_VECT_LEN];
11505  unsigned contents, i, nelt, nelt2;
11506  unsigned h0, h1, h2, h3;
11507  rtx_insn *seq;
11508  bool ok;
11509
11510  if (d->one_operand_p)
11511    return false;
11512
11513  nelt = d->nelt;
11514  nelt2 = nelt / 2;
11515
11516  /* Examine from whence the elements come.  */
11517  contents = 0;
11518  for (i = 0; i < nelt; ++i)
11519    contents |= 1u << d->perm[i];
11520
11521  memset (remap, 0xff, sizeof (remap));
11522  dremap = *d;
11523
11524  h0 = (1u << nelt2) - 1;
11525  h1 = h0 << nelt2;
11526  h2 = h0 << nelt;
11527  h3 = h0 << (nelt + nelt2);
11528
11529  if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
11530    {
11531      for (i = 0; i < nelt; ++i)
11532	{
11533	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
11534	  remap[which] = i;
11535	  dremap.perm[i] = which;
11536	}
11537    }
11538  else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
11539    {
11540      for (i = 0; i < nelt; ++i)
11541	{
11542	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11543	  remap[which] = i;
11544	  dremap.perm[i] = which;
11545	}
11546    }
11547  else if ((contents & 0x5555) == contents)	/* mix even elements */
11548    {
11549      for (i = 0; i < nelt; ++i)
11550	{
11551	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11552	  remap[which] = i;
11553	  dremap.perm[i] = which;
11554	}
11555    }
11556  else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
11557    {
11558      for (i = 0; i < nelt; ++i)
11559	{
11560	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11561	  remap[which] = i;
11562	  dremap.perm[i] = which;
11563	}
11564    }
11565  else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11566    {
11567      unsigned shift = ctz_hwi (contents);
11568      for (i = 0; i < nelt; ++i)
11569	{
11570	  unsigned which = (i + shift) & (2 * nelt - 1);
11571	  remap[which] = i;
11572	  dremap.perm[i] = which;
11573	}
11574    }
11575  else
11576    return false;
11577
11578  /* Use the remapping array set up above to move the elements from their
11579     swizzled locations into their final destinations.  */
11580  dfinal = *d;
11581  for (i = 0; i < nelt; ++i)
11582    {
11583      unsigned e = remap[d->perm[i]];
11584      gcc_assert (e < nelt);
11585      dfinal.perm[i] = e;
11586    }
11587  if (d->testing_p)
11588    dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11589  else
11590    dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11591  dfinal.op1 = dfinal.op0;
11592  dfinal.one_operand_p = true;
11593  dremap.target = dfinal.op0;
11594
11595  /* Test if the final remap can be done with a single insn.  For V4HImode
11596     this *will* succeed.  For V8QImode or V2SImode it may not.  */
11597  start_sequence ();
11598  ok = expand_vec_perm_1 (&dfinal);
11599  seq = get_insns ();
11600  end_sequence ();
11601  if (!ok)
11602    return false;
11603  if (d->testing_p)
11604    return true;
11605
11606  ok = expand_vec_perm_1 (&dremap);
11607  gcc_assert (ok);
11608
11609  emit_insn (seq);
11610  return true;
11611}
11612
11613/* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11614   constant permutation via two mux2 and a merge.  */
11615
11616static bool
11617expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11618{
11619  unsigned char perm2[4];
11620  rtx rmask[4];
11621  unsigned i;
11622  rtx t0, t1, mask, x;
11623  bool ok;
11624
11625  if (d->vmode != V4HImode || d->one_operand_p)
11626    return false;
11627  if (d->testing_p)
11628    return true;
11629
11630  for (i = 0; i < 4; ++i)
11631    {
11632      perm2[i] = d->perm[i] & 3;
11633      rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11634    }
11635  mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11636  mask = force_reg (V4HImode, mask);
11637
11638  t0 = gen_reg_rtx (V4HImode);
11639  t1 = gen_reg_rtx (V4HImode);
11640
11641  ok = expand_vselect (t0, d->op0, perm2, 4);
11642  gcc_assert (ok);
11643  ok = expand_vselect (t1, d->op1, perm2, 4);
11644  gcc_assert (ok);
11645
11646  x = gen_rtx_AND (V4HImode, mask, t0);
11647  emit_insn (gen_rtx_SET (VOIDmode, t0, x));
11648
11649  x = gen_rtx_NOT (V4HImode, mask);
11650  x = gen_rtx_AND (V4HImode, x, t1);
11651  emit_insn (gen_rtx_SET (VOIDmode, t1, x));
11652
11653  x = gen_rtx_IOR (V4HImode, t0, t1);
11654  emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
11655
11656  return true;
11657}
11658
11659/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11660   With all of the interface bits taken care of, perform the expansion
11661   in D and return true on success.  */
11662
11663static bool
11664ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11665{
11666  if (expand_vec_perm_1 (d))
11667    return true;
11668  if (expand_vec_perm_broadcast (d))
11669    return true;
11670  if (expand_vec_perm_interleave_2 (d))
11671    return true;
11672  if (expand_vec_perm_v4hi_5 (d))
11673    return true;
11674  return false;
11675}
11676
11677bool
11678ia64_expand_vec_perm_const (rtx operands[4])
11679{
11680  struct expand_vec_perm_d d;
11681  unsigned char perm[MAX_VECT_LEN];
11682  int i, nelt, which;
11683  rtx sel;
11684
11685  d.target = operands[0];
11686  d.op0 = operands[1];
11687  d.op1 = operands[2];
11688  sel = operands[3];
11689
11690  d.vmode = GET_MODE (d.target);
11691  gcc_assert (VECTOR_MODE_P (d.vmode));
11692  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11693  d.testing_p = false;
11694
11695  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
11696  gcc_assert (XVECLEN (sel, 0) == nelt);
11697  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11698
11699  for (i = which = 0; i < nelt; ++i)
11700    {
11701      rtx e = XVECEXP (sel, 0, i);
11702      int ei = INTVAL (e) & (2 * nelt - 1);
11703
11704      which |= (ei < nelt ? 1 : 2);
11705      d.perm[i] = ei;
11706      perm[i] = ei;
11707    }
11708
11709  switch (which)
11710    {
11711    default:
11712      gcc_unreachable();
11713
11714    case 3:
11715      if (!rtx_equal_p (d.op0, d.op1))
11716	{
11717	  d.one_operand_p = false;
11718	  break;
11719	}
11720
11721      /* The elements of PERM do not suggest that only the first operand
11722	 is used, but both operands are identical.  Allow easier matching
11723	 of the permutation by folding the permutation into the single
11724	 input vector.  */
11725      for (i = 0; i < nelt; ++i)
11726	if (d.perm[i] >= nelt)
11727	  d.perm[i] -= nelt;
11728      /* FALLTHRU */
11729
11730    case 1:
11731      d.op1 = d.op0;
11732      d.one_operand_p = true;
11733      break;
11734
11735    case 2:
11736      for (i = 0; i < nelt; ++i)
11737        d.perm[i] -= nelt;
11738      d.op0 = d.op1;
11739      d.one_operand_p = true;
11740      break;
11741    }
11742
11743  if (ia64_expand_vec_perm_const_1 (&d))
11744    return true;
11745
11746  /* If the mask says both arguments are needed, but they are the same,
11747     the above tried to expand with one_operand_p true.  If that didn't
11748     work, retry with one_operand_p false, as that's what we used in _ok.  */
11749  if (which == 3 && d.one_operand_p)
11750    {
11751      memcpy (d.perm, perm, sizeof (perm));
11752      d.one_operand_p = false;
11753      return ia64_expand_vec_perm_const_1 (&d);
11754    }
11755
11756  return false;
11757}
11758
11759/* Implement targetm.vectorize.vec_perm_const_ok.  */
11760
11761static bool
11762ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
11763				  const unsigned char *sel)
11764{
11765  struct expand_vec_perm_d d;
11766  unsigned int i, nelt, which;
11767  bool ret;
11768
11769  d.vmode = vmode;
11770  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11771  d.testing_p = true;
11772
11773  /* Extract the values from the vector CST into the permutation
11774     array in D.  */
11775  memcpy (d.perm, sel, nelt);
11776  for (i = which = 0; i < nelt; ++i)
11777    {
11778      unsigned char e = d.perm[i];
11779      gcc_assert (e < 2 * nelt);
11780      which |= (e < nelt ? 1 : 2);
11781    }
11782
11783  /* For all elements from second vector, fold the elements to first.  */
11784  if (which == 2)
11785    for (i = 0; i < nelt; ++i)
11786      d.perm[i] -= nelt;
11787
11788  /* Check whether the mask can be applied to the vector type.  */
11789  d.one_operand_p = (which != 3);
11790
11791  /* Otherwise we have to go through the motions and see if we can
11792     figure out how to generate the requested permutation.  */
11793  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11794  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11795  if (!d.one_operand_p)
11796    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11797
11798  start_sequence ();
11799  ret = ia64_expand_vec_perm_const_1 (&d);
11800  end_sequence ();
11801
11802  return ret;
11803}
11804
11805void
11806ia64_expand_vec_setv2sf (rtx operands[3])
11807{
11808  struct expand_vec_perm_d d;
11809  unsigned int which;
11810  bool ok;
11811
11812  d.target = operands[0];
11813  d.op0 = operands[0];
11814  d.op1 = gen_reg_rtx (V2SFmode);
11815  d.vmode = V2SFmode;
11816  d.nelt = 2;
11817  d.one_operand_p = false;
11818  d.testing_p = false;
11819
11820  which = INTVAL (operands[2]);
11821  gcc_assert (which <= 1);
11822  d.perm[0] = 1 - which;
11823  d.perm[1] = which + 2;
11824
11825  emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11826
11827  ok = ia64_expand_vec_perm_const_1 (&d);
11828  gcc_assert (ok);
11829}
11830
11831void
11832ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11833{
11834  struct expand_vec_perm_d d;
11835  machine_mode vmode = GET_MODE (target);
11836  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11837  bool ok;
11838
11839  d.target = target;
11840  d.op0 = op0;
11841  d.op1 = op1;
11842  d.vmode = vmode;
11843  d.nelt = nelt;
11844  d.one_operand_p = false;
11845  d.testing_p = false;
11846
11847  for (i = 0; i < nelt; ++i)
11848    d.perm[i] = i * 2 + odd;
11849
11850  ok = ia64_expand_vec_perm_const_1 (&d);
11851  gcc_assert (ok);
11852}
11853
11854#include "gt-ia64.h"
11855