1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999-2020 Free Software Foundation, Inc.
3   Contributed by James E. Wilson <wilson@cygnus.com> and
4		  David Mosberger <davidm@hpl.hp.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3, or (at your option)
11any later version.
12
13GCC is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#define IN_TARGET_CODE 1
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "memmodel.h"
32#include "cfghooks.h"
33#include "df.h"
34#include "tm_p.h"
35#include "stringpool.h"
36#include "attribs.h"
37#include "optabs.h"
38#include "regs.h"
39#include "emit-rtl.h"
40#include "recog.h"
41#include "diagnostic-core.h"
42#include "alias.h"
43#include "fold-const.h"
44#include "stor-layout.h"
45#include "calls.h"
46#include "varasm.h"
47#include "output.h"
48#include "insn-attr.h"
49#include "flags.h"
50#include "explow.h"
51#include "expr.h"
52#include "cfgrtl.h"
53#include "libfuncs.h"
54#include "sched-int.h"
55#include "common/common-target.h"
56#include "langhooks.h"
57#include "gimplify.h"
58#include "intl.h"
59#include "debug.h"
60#include "dbgcnt.h"
61#include "tm-constrs.h"
62#include "sel-sched.h"
63#include "reload.h"
64#include "opts.h"
65#include "dumpfile.h"
66#include "builtins.h"
67
68/* This file should be included last.  */
69#include "target-def.h"
70
71/* This is used for communication between ASM_OUTPUT_LABEL and
72   ASM_OUTPUT_LABELREF.  */
73int ia64_asm_output_label = 0;
74
75/* Register names for ia64_expand_prologue.  */
76static const char * const ia64_reg_numbers[96] =
77{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
78  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
79  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
80  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
81  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
82  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
83  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
84  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
85  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
86  "r104","r105","r106","r107","r108","r109","r110","r111",
87  "r112","r113","r114","r115","r116","r117","r118","r119",
88  "r120","r121","r122","r123","r124","r125","r126","r127"};
89
90/* ??? These strings could be shared with REGISTER_NAMES.  */
91static const char * const ia64_input_reg_names[8] =
92{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
93
94/* ??? These strings could be shared with REGISTER_NAMES.  */
95static const char * const ia64_local_reg_names[80] =
96{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
97  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
98  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
99  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
100  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
101  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
102  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
103  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
104  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
105  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
106
107/* ??? These strings could be shared with REGISTER_NAMES.  */
108static const char * const ia64_output_reg_names[8] =
109{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
110
111/* Variables which are this size or smaller are put in the sdata/sbss
112   sections.  */
113
114unsigned int ia64_section_threshold;
115
116/* The following variable is used by the DFA insn scheduler.  The value is
117   TRUE if we do insn bundling instead of insn scheduling.  */
118int bundling_p = 0;
119
120enum ia64_frame_regs
121{
122   reg_fp,
123   reg_save_b0,
124   reg_save_pr,
125   reg_save_ar_pfs,
126   reg_save_ar_unat,
127   reg_save_ar_lc,
128   reg_save_gp,
129   number_of_ia64_frame_regs
130};
131
132/* Structure to be filled in by ia64_compute_frame_size with register
133   save masks and offsets for the current function.  */
134
135struct ia64_frame_info
136{
137  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
138				   the caller's scratch area.  */
139  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
140  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
141  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
142  HARD_REG_SET mask;		/* mask of saved registers.  */
143  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
144				   registers or long-term scratches.  */
145  int n_spilled;		/* number of spilled registers.  */
146  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
147  int n_input_regs;		/* number of input registers used.  */
148  int n_local_regs;		/* number of local registers used.  */
149  int n_output_regs;		/* number of output registers used.  */
150  int n_rotate_regs;		/* number of rotating registers used.  */
151
152  char need_regstk;		/* true if a .regstk directive needed.  */
153  char initialized;		/* true if the data is finalized.  */
154};
155
156/* Current frame information calculated by ia64_compute_frame_size.  */
157static struct ia64_frame_info current_frame_info;
158/* The actual registers that are emitted.  */
159static int emitted_frame_related_regs[number_of_ia64_frame_regs];
160
161static int ia64_first_cycle_multipass_dfa_lookahead (void);
162static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
163static void ia64_init_dfa_pre_cycle_insn (void);
164static rtx ia64_dfa_pre_cycle_insn (void);
165static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
166static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
167static void ia64_h_i_d_extended (void);
168static void * ia64_alloc_sched_context (void);
169static void ia64_init_sched_context (void *, bool);
170static void ia64_set_sched_context (void *);
171static void ia64_clear_sched_context (void *);
172static void ia64_free_sched_context (void *);
173static int ia64_mode_to_int (machine_mode);
174static void ia64_set_sched_flags (spec_info_t);
175static ds_t ia64_get_insn_spec_ds (rtx_insn *);
176static ds_t ia64_get_insn_checked_ds (rtx_insn *);
177static bool ia64_skip_rtx_p (const_rtx);
178static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
179static bool ia64_needs_block_p (ds_t);
180static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
181static int ia64_spec_check_p (rtx);
182static int ia64_spec_check_src_p (rtx);
183static rtx gen_tls_get_addr (void);
184static rtx gen_thread_pointer (void);
185static int find_gr_spill (enum ia64_frame_regs, int);
186static int next_scratch_gr_reg (void);
187static void mark_reg_gr_used_mask (rtx, void *);
188static void ia64_compute_frame_size (HOST_WIDE_INT);
189static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
190static void finish_spill_pointers (void);
191static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
192static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
193static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
194static rtx gen_movdi_x (rtx, rtx, rtx);
195static rtx gen_fr_spill_x (rtx, rtx, rtx);
196static rtx gen_fr_restore_x (rtx, rtx, rtx);
197
198static void ia64_option_override (void);
199static bool ia64_can_eliminate (const int, const int);
200static machine_mode hfa_element_mode (const_tree, bool);
201static void ia64_setup_incoming_varargs (cumulative_args_t,
202					 const function_arg_info &,
203					 int *, int);
204static int ia64_arg_partial_bytes (cumulative_args_t,
205				   const function_arg_info &);
206static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &);
207static rtx ia64_function_incoming_arg (cumulative_args_t,
208				       const function_arg_info &);
209static void ia64_function_arg_advance (cumulative_args_t,
210				       const function_arg_info &);
211static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
212static unsigned int ia64_function_arg_boundary (machine_mode,
213						const_tree);
214static bool ia64_function_ok_for_sibcall (tree, tree);
215static bool ia64_return_in_memory (const_tree, const_tree);
216static rtx ia64_function_value (const_tree, const_tree, bool);
217static rtx ia64_libcall_value (machine_mode, const_rtx);
218static bool ia64_function_value_regno_p (const unsigned int);
219static int ia64_register_move_cost (machine_mode, reg_class_t,
220                                    reg_class_t);
221static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
222				  bool);
223static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
224static int ia64_unspec_may_trap_p (const_rtx, unsigned);
225static void fix_range (const char *);
226static struct machine_function * ia64_init_machine_status (void);
227static void emit_insn_group_barriers (FILE *);
228static void emit_all_insn_group_barriers (FILE *);
229static void final_emit_insn_group_barriers (FILE *);
230static void emit_predicate_relation_info (void);
231static void ia64_reorg (void);
232static bool ia64_in_small_data_p (const_tree);
233static void process_epilogue (FILE *, rtx, bool, bool);
234
235static bool ia64_assemble_integer (rtx, unsigned int, int);
236static void ia64_output_function_prologue (FILE *);
237static void ia64_output_function_epilogue (FILE *);
238static void ia64_output_function_end_prologue (FILE *);
239
240static void ia64_print_operand (FILE *, rtx, int);
241static void ia64_print_operand_address (FILE *, machine_mode, rtx);
242static bool ia64_print_operand_punct_valid_p (unsigned char code);
243
244static int ia64_issue_rate (void);
245static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
246static void ia64_sched_init (FILE *, int, int);
247static void ia64_sched_init_global (FILE *, int, int);
248static void ia64_sched_finish_global (FILE *, int);
249static void ia64_sched_finish (FILE *, int);
250static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
251static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
252static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
253static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
254
255static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
256static void ia64_asm_emit_except_personality (rtx);
257static void ia64_asm_init_sections (void);
258
259static enum unwind_info_type ia64_debug_unwind_info (void);
260
261static struct bundle_state *get_free_bundle_state (void);
262static void free_bundle_state (struct bundle_state *);
263static void initiate_bundle_states (void);
264static void finish_bundle_states (void);
265static int insert_bundle_state (struct bundle_state *);
266static void initiate_bundle_state_table (void);
267static void finish_bundle_state_table (void);
268static int try_issue_nops (struct bundle_state *, int);
269static int try_issue_insn (struct bundle_state *, rtx);
270static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
271				 int, int);
272static int get_max_pos (state_t);
273static int get_template (state_t, int);
274
275static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
276static bool important_for_bundling_p (rtx_insn *);
277static bool unknown_for_bundling_p (rtx_insn *);
278static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
279
280static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
281				  HOST_WIDE_INT, tree);
282static void ia64_file_start (void);
283static void ia64_globalize_decl_name (FILE *, tree);
284
285static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
286static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
287static section *ia64_select_rtx_section (machine_mode, rtx,
288					 unsigned HOST_WIDE_INT);
289static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
290     ATTRIBUTE_UNUSED;
291static unsigned int ia64_section_type_flags (tree, const char *, int);
292static void ia64_init_libfuncs (void)
293     ATTRIBUTE_UNUSED;
294static void ia64_hpux_init_libfuncs (void)
295     ATTRIBUTE_UNUSED;
296static void ia64_sysv4_init_libfuncs (void)
297     ATTRIBUTE_UNUSED;
298static void ia64_vms_init_libfuncs (void)
299     ATTRIBUTE_UNUSED;
300static void ia64_soft_fp_init_libfuncs (void)
301     ATTRIBUTE_UNUSED;
302static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
303     ATTRIBUTE_UNUSED;
304static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
305     ATTRIBUTE_UNUSED;
306
307static bool ia64_attribute_takes_identifier_p (const_tree);
308static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
309static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
310static void ia64_encode_section_info (tree, rtx, int);
311static rtx ia64_struct_value_rtx (tree, int);
312static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
313static bool ia64_scalar_mode_supported_p (scalar_mode mode);
314static bool ia64_vector_mode_supported_p (machine_mode mode);
315static bool ia64_legitimate_constant_p (machine_mode, rtx);
316static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
317static bool ia64_cannot_force_const_mem (machine_mode, rtx);
318static const char *ia64_mangle_type (const_tree);
319static const char *ia64_invalid_conversion (const_tree, const_tree);
320static const char *ia64_invalid_unary_op (int, const_tree);
321static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
322static machine_mode ia64_c_mode_for_suffix (char);
323static void ia64_trampoline_init (rtx, tree, rtx);
324static void ia64_override_options_after_change (void);
325static bool ia64_member_type_forces_blk (const_tree, machine_mode);
326
327static tree ia64_fold_builtin (tree, int, tree *, bool);
328static tree ia64_builtin_decl (unsigned, bool);
329
330static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
331static fixed_size_mode ia64_get_reg_raw_mode (int regno);
332static section * ia64_hpux_function_section (tree, enum node_frequency,
333					     bool, bool);
334
335static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
336					   const vec_perm_indices &);
337
338static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
339static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
340static bool ia64_modes_tieable_p (machine_mode, machine_mode);
341static bool ia64_can_change_mode_class (machine_mode, machine_mode,
342					reg_class_t);
343
344#define MAX_VECT_LEN	8
345
346struct expand_vec_perm_d
347{
348  rtx target, op0, op1;
349  unsigned char perm[MAX_VECT_LEN];
350  machine_mode vmode;
351  unsigned char nelt;
352  bool one_operand_p;
353  bool testing_p;
354};
355
356static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
357
358
359/* Table of valid machine attributes.  */
360static const struct attribute_spec ia64_attribute_table[] =
361{
362  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
363       affects_type_identity, handler, exclude } */
364  { "syscall_linkage", 0, 0, false, true,  true,  false, NULL, NULL },
365  { "model",	       1, 1, true, false, false,  false,
366    ia64_handle_model_attribute, NULL },
367#if TARGET_ABI_OPEN_VMS
368  { "common_object",   1, 1, true, false, false, false,
369    ia64_vms_common_object_attribute, NULL },
370#endif
371  { "version_id",      1, 1, true, false, false, false,
372    ia64_handle_version_id_attribute, NULL },
373  { NULL,	       0, 0, false, false, false, false, NULL, NULL }
374};
375
376/* Initialize the GCC target structure.  */
377#undef TARGET_ATTRIBUTE_TABLE
378#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
379
380#undef TARGET_INIT_BUILTINS
381#define TARGET_INIT_BUILTINS ia64_init_builtins
382
383#undef TARGET_FOLD_BUILTIN
384#define TARGET_FOLD_BUILTIN ia64_fold_builtin
385
386#undef TARGET_EXPAND_BUILTIN
387#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
388
389#undef TARGET_BUILTIN_DECL
390#define TARGET_BUILTIN_DECL ia64_builtin_decl
391
392#undef TARGET_ASM_BYTE_OP
393#define TARGET_ASM_BYTE_OP "\tdata1\t"
394#undef TARGET_ASM_ALIGNED_HI_OP
395#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
396#undef TARGET_ASM_ALIGNED_SI_OP
397#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
398#undef TARGET_ASM_ALIGNED_DI_OP
399#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
400#undef TARGET_ASM_UNALIGNED_HI_OP
401#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
402#undef TARGET_ASM_UNALIGNED_SI_OP
403#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
404#undef TARGET_ASM_UNALIGNED_DI_OP
405#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
406#undef TARGET_ASM_INTEGER
407#define TARGET_ASM_INTEGER ia64_assemble_integer
408
409#undef TARGET_OPTION_OVERRIDE
410#define TARGET_OPTION_OVERRIDE ia64_option_override
411
412#undef TARGET_ASM_FUNCTION_PROLOGUE
413#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
414#undef TARGET_ASM_FUNCTION_END_PROLOGUE
415#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
416#undef TARGET_ASM_FUNCTION_EPILOGUE
417#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
418
419#undef TARGET_PRINT_OPERAND
420#define TARGET_PRINT_OPERAND ia64_print_operand
421#undef TARGET_PRINT_OPERAND_ADDRESS
422#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
423#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
424#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
425
426#undef TARGET_IN_SMALL_DATA_P
427#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
428
429#undef TARGET_SCHED_ADJUST_COST
430#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
431#undef TARGET_SCHED_ISSUE_RATE
432#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
433#undef TARGET_SCHED_VARIABLE_ISSUE
434#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
435#undef TARGET_SCHED_INIT
436#define TARGET_SCHED_INIT ia64_sched_init
437#undef TARGET_SCHED_FINISH
438#define TARGET_SCHED_FINISH ia64_sched_finish
439#undef TARGET_SCHED_INIT_GLOBAL
440#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
441#undef TARGET_SCHED_FINISH_GLOBAL
442#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
443#undef TARGET_SCHED_REORDER
444#define TARGET_SCHED_REORDER ia64_sched_reorder
445#undef TARGET_SCHED_REORDER2
446#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
447
448#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
449#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
450
451#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
452#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
453
454#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
455#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
456#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
457#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
458
459#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
460#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
461  ia64_first_cycle_multipass_dfa_lookahead_guard
462
463#undef TARGET_SCHED_DFA_NEW_CYCLE
464#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
465
466#undef TARGET_SCHED_H_I_D_EXTENDED
467#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
468
469#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
470#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
471
472#undef TARGET_SCHED_INIT_SCHED_CONTEXT
473#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
474
475#undef TARGET_SCHED_SET_SCHED_CONTEXT
476#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
477
478#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
479#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
480
481#undef TARGET_SCHED_FREE_SCHED_CONTEXT
482#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
483
484#undef TARGET_SCHED_SET_SCHED_FLAGS
485#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
486
487#undef TARGET_SCHED_GET_INSN_SPEC_DS
488#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
489
490#undef TARGET_SCHED_GET_INSN_CHECKED_DS
491#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
492
493#undef TARGET_SCHED_SPECULATE_INSN
494#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
495
496#undef TARGET_SCHED_NEEDS_BLOCK_P
497#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
498
499#undef TARGET_SCHED_GEN_SPEC_CHECK
500#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
501
502#undef TARGET_SCHED_SKIP_RTX_P
503#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
504
505#undef TARGET_FUNCTION_OK_FOR_SIBCALL
506#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
507#undef TARGET_ARG_PARTIAL_BYTES
508#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
509#undef TARGET_FUNCTION_ARG
510#define TARGET_FUNCTION_ARG ia64_function_arg
511#undef TARGET_FUNCTION_INCOMING_ARG
512#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
513#undef TARGET_FUNCTION_ARG_ADVANCE
514#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
515#undef TARGET_FUNCTION_ARG_PADDING
516#define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
517#undef TARGET_FUNCTION_ARG_BOUNDARY
518#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
519
520#undef TARGET_ASM_OUTPUT_MI_THUNK
521#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
522#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
523#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
524
525#undef TARGET_ASM_FILE_START
526#define TARGET_ASM_FILE_START ia64_file_start
527
528#undef TARGET_ASM_GLOBALIZE_DECL_NAME
529#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
530
531#undef TARGET_REGISTER_MOVE_COST
532#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
533#undef TARGET_MEMORY_MOVE_COST
534#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
535#undef TARGET_RTX_COSTS
536#define TARGET_RTX_COSTS ia64_rtx_costs
537#undef TARGET_ADDRESS_COST
538#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
539
540#undef TARGET_UNSPEC_MAY_TRAP_P
541#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
542
543#undef TARGET_MACHINE_DEPENDENT_REORG
544#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
545
546#undef TARGET_ENCODE_SECTION_INFO
547#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
548
549#undef  TARGET_SECTION_TYPE_FLAGS
550#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
551
552#ifdef HAVE_AS_TLS
553#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
554#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
555#endif
556
557/* ??? Investigate.  */
558#if 0
559#undef TARGET_PROMOTE_PROTOTYPES
560#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
561#endif
562
563#undef TARGET_FUNCTION_VALUE
564#define TARGET_FUNCTION_VALUE ia64_function_value
565#undef TARGET_LIBCALL_VALUE
566#define TARGET_LIBCALL_VALUE ia64_libcall_value
567#undef TARGET_FUNCTION_VALUE_REGNO_P
568#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
569
570#undef TARGET_STRUCT_VALUE_RTX
571#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
572#undef TARGET_RETURN_IN_MEMORY
573#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
574#undef TARGET_SETUP_INCOMING_VARARGS
575#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
576#undef TARGET_STRICT_ARGUMENT_NAMING
577#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
578#undef TARGET_MUST_PASS_IN_STACK
579#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
580#undef TARGET_GET_RAW_RESULT_MODE
581#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
582#undef TARGET_GET_RAW_ARG_MODE
583#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
584
585#undef TARGET_MEMBER_TYPE_FORCES_BLK
586#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
587
588#undef TARGET_GIMPLIFY_VA_ARG_EXPR
589#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
590
591#undef TARGET_ASM_UNWIND_EMIT
592#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
593#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
594#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
595#undef TARGET_ASM_INIT_SECTIONS
596#define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
597
598#undef TARGET_DEBUG_UNWIND_INFO
599#define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
600
601#undef TARGET_SCALAR_MODE_SUPPORTED_P
602#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
603#undef TARGET_VECTOR_MODE_SUPPORTED_P
604#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
605
606#undef TARGET_LEGITIMATE_CONSTANT_P
607#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
608#undef TARGET_LEGITIMATE_ADDRESS_P
609#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
610
611#undef TARGET_LRA_P
612#define TARGET_LRA_P hook_bool_void_false
613
614#undef TARGET_CANNOT_FORCE_CONST_MEM
615#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
616
617#undef TARGET_MANGLE_TYPE
618#define TARGET_MANGLE_TYPE ia64_mangle_type
619
620#undef TARGET_INVALID_CONVERSION
621#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
622#undef TARGET_INVALID_UNARY_OP
623#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
624#undef TARGET_INVALID_BINARY_OP
625#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
626
627#undef TARGET_C_MODE_FOR_SUFFIX
628#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
629
630#undef TARGET_CAN_ELIMINATE
631#define TARGET_CAN_ELIMINATE ia64_can_eliminate
632
633#undef TARGET_TRAMPOLINE_INIT
634#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
635
636#undef TARGET_CAN_USE_DOLOOP_P
637#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
638#undef TARGET_INVALID_WITHIN_DOLOOP
639#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
640
641#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
642#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
643
644#undef TARGET_PREFERRED_RELOAD_CLASS
645#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
646
647#undef TARGET_DELAY_SCHED2
648#define TARGET_DELAY_SCHED2 true
649
650/* Variable tracking should be run after all optimizations which
651   change order of insns.  It also needs a valid CFG.  */
652#undef TARGET_DELAY_VARTRACK
653#define TARGET_DELAY_VARTRACK true
654
655#undef TARGET_VECTORIZE_VEC_PERM_CONST
656#define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
657
658#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
659#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
660
661#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
662#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
663
664#undef TARGET_HARD_REGNO_NREGS
665#define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
666#undef TARGET_HARD_REGNO_MODE_OK
667#define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
668
669#undef TARGET_MODES_TIEABLE_P
670#define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
671
672#undef TARGET_CAN_CHANGE_MODE_CLASS
673#define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
674
675#undef TARGET_CONSTANT_ALIGNMENT
676#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
677
678struct gcc_target targetm = TARGET_INITIALIZER;
679
680/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
681   identifier as an argument, so the front end shouldn't look it up.  */
682
683static bool
684ia64_attribute_takes_identifier_p (const_tree attr_id)
685{
686  if (is_attribute_p ("model", attr_id))
687    return true;
688#if TARGET_ABI_OPEN_VMS
689  if (is_attribute_p ("common_object", attr_id))
690    return true;
691#endif
692  return false;
693}
694
695typedef enum
696  {
697    ADDR_AREA_NORMAL,	/* normal address area */
698    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
699  }
700ia64_addr_area;
701
702static GTY(()) tree small_ident1;
703static GTY(()) tree small_ident2;
704
705static void
706init_idents (void)
707{
708  if (small_ident1 == 0)
709    {
710      small_ident1 = get_identifier ("small");
711      small_ident2 = get_identifier ("__small__");
712    }
713}
714
715/* Retrieve the address area that has been chosen for the given decl.  */
716
717static ia64_addr_area
718ia64_get_addr_area (tree decl)
719{
720  tree model_attr;
721
722  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
723  if (model_attr)
724    {
725      tree id;
726
727      init_idents ();
728      id = TREE_VALUE (TREE_VALUE (model_attr));
729      if (id == small_ident1 || id == small_ident2)
730	return ADDR_AREA_SMALL;
731    }
732  return ADDR_AREA_NORMAL;
733}
734
735static tree
736ia64_handle_model_attribute (tree *node, tree name, tree args,
737			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
738{
739  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
740  ia64_addr_area area;
741  tree arg, decl = *node;
742
743  init_idents ();
744  arg = TREE_VALUE (args);
745  if (arg == small_ident1 || arg == small_ident2)
746    {
747      addr_area = ADDR_AREA_SMALL;
748    }
749  else
750    {
751      warning (OPT_Wattributes, "invalid argument of %qE attribute",
752	       name);
753      *no_add_attrs = true;
754    }
755
756  switch (TREE_CODE (decl))
757    {
758    case VAR_DECL:
759      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
760	   == FUNCTION_DECL)
761	  && !TREE_STATIC (decl))
762	{
763	  error_at (DECL_SOURCE_LOCATION (decl),
764		    "an address area attribute cannot be specified for "
765		    "local variables");
766	  *no_add_attrs = true;
767	}
768      area = ia64_get_addr_area (decl);
769      if (area != ADDR_AREA_NORMAL && addr_area != area)
770	{
771	  error ("address area of %q+D conflicts with previous "
772		 "declaration", decl);
773	  *no_add_attrs = true;
774	}
775      break;
776
777    case FUNCTION_DECL:
778      error_at (DECL_SOURCE_LOCATION (decl),
779		"address area attribute cannot be specified for "
780		"functions");
781      *no_add_attrs = true;
782      break;
783
784    default:
785      warning (OPT_Wattributes, "%qE attribute ignored",
786	       name);
787      *no_add_attrs = true;
788      break;
789    }
790
791  return NULL_TREE;
792}
793
794/* Part of the low level implementation of DEC Ada pragma Common_Object which
795   enables the shared use of variables stored in overlaid linker areas
796   corresponding to the use of Fortran COMMON.  */
797
798static tree
799ia64_vms_common_object_attribute (tree *node, tree name, tree args,
800				  int flags ATTRIBUTE_UNUSED,
801				  bool *no_add_attrs)
802{
803    tree decl = *node;
804    tree id;
805
806    gcc_assert (DECL_P (decl));
807
808    DECL_COMMON (decl) = 1;
809    id = TREE_VALUE (args);
810    if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
811      {
812	error ("%qE attribute requires a string constant argument", name);
813	*no_add_attrs = true;
814	return NULL_TREE;
815      }
816    return NULL_TREE;
817}
818
819/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
820
821void
822ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
823				     unsigned HOST_WIDE_INT size,
824				     unsigned int align)
825{
826  tree attr = DECL_ATTRIBUTES (decl);
827
828  if (attr)
829    attr = lookup_attribute ("common_object", attr);
830  if (attr)
831    {
832      tree id = TREE_VALUE (TREE_VALUE (attr));
833      const char *name;
834
835      if (TREE_CODE (id) == IDENTIFIER_NODE)
836        name = IDENTIFIER_POINTER (id);
837      else if (TREE_CODE (id) == STRING_CST)
838        name = TREE_STRING_POINTER (id);
839      else
840        abort ();
841
842      fprintf (file, "\t.vms_common\t\"%s\",", name);
843    }
844  else
845    fprintf (file, "%s", COMMON_ASM_OP);
846
847  /*  Code from elfos.h.  */
848  assemble_name (file, name);
849  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
850           size, align / BITS_PER_UNIT);
851
852  fputc ('\n', file);
853}
854
855static void
856ia64_encode_addr_area (tree decl, rtx symbol)
857{
858  int flags;
859
860  flags = SYMBOL_REF_FLAGS (symbol);
861  switch (ia64_get_addr_area (decl))
862    {
863    case ADDR_AREA_NORMAL: break;
864    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
865    default: gcc_unreachable ();
866    }
867  SYMBOL_REF_FLAGS (symbol) = flags;
868}
869
870static void
871ia64_encode_section_info (tree decl, rtx rtl, int first)
872{
873  default_encode_section_info (decl, rtl, first);
874
875  /* Careful not to prod global register variables.  */
876  if (TREE_CODE (decl) == VAR_DECL
877      && GET_CODE (DECL_RTL (decl)) == MEM
878      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
879      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
880    ia64_encode_addr_area (decl, XEXP (rtl, 0));
881}
882
883/* Return 1 if the operands of a move are ok.  */
884
885int
886ia64_move_ok (rtx dst, rtx src)
887{
888  /* If we're under init_recog_no_volatile, we'll not be able to use
889     memory_operand.  So check the code directly and don't worry about
890     the validity of the underlying address, which should have been
891     checked elsewhere anyway.  */
892  if (GET_CODE (dst) != MEM)
893    return 1;
894  if (GET_CODE (src) == MEM)
895    return 0;
896  if (register_operand (src, VOIDmode))
897    return 1;
898
899  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
900  if (INTEGRAL_MODE_P (GET_MODE (dst)))
901    return src == const0_rtx;
902  else
903    return satisfies_constraint_G (src);
904}
905
906/* Return 1 if the operands are ok for a floating point load pair.  */
907
908int
909ia64_load_pair_ok (rtx dst, rtx src)
910{
911  /* ??? There is a thinko in the implementation of the "x" constraint and the
912     FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
913     also return false for it.  */
914  if (GET_CODE (dst) != REG
915      || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
916    return 0;
917  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
918    return 0;
919  switch (GET_CODE (XEXP (src, 0)))
920    {
921    case REG:
922    case POST_INC:
923      break;
924    case POST_DEC:
925      return 0;
926    case POST_MODIFY:
927      {
928	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
929
930	if (GET_CODE (adjust) != CONST_INT
931	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
932	  return 0;
933      }
934      break;
935    default:
936      abort ();
937    }
938  return 1;
939}
940
941int
942addp4_optimize_ok (rtx op1, rtx op2)
943{
944  return (basereg_operand (op1, GET_MODE(op1)) !=
945	  basereg_operand (op2, GET_MODE(op2)));
946}
947
948/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
949   Return the length of the field, or <= 0 on failure.  */
950
951int
952ia64_depz_field_mask (rtx rop, rtx rshift)
953{
954  unsigned HOST_WIDE_INT op = INTVAL (rop);
955  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
956
957  /* Get rid of the zero bits we're shifting in.  */
958  op >>= shift;
959
960  /* We must now have a solid block of 1's at bit 0.  */
961  return exact_log2 (op + 1);
962}
963
964/* Return the TLS model to use for ADDR.  */
965
966static enum tls_model
967tls_symbolic_operand_type (rtx addr)
968{
969  enum tls_model tls_kind = TLS_MODEL_NONE;
970
971  if (GET_CODE (addr) == CONST)
972    {
973      if (GET_CODE (XEXP (addr, 0)) == PLUS
974	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
975        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
976    }
977  else if (GET_CODE (addr) == SYMBOL_REF)
978    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
979
980  return tls_kind;
981}
982
983/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
984   as a base register.  */
985
986static inline bool
987ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
988{
989  if (strict
990      && REGNO_OK_FOR_BASE_P (REGNO (reg)))
991    return true;
992  else if (!strict
993	   && (GENERAL_REGNO_P (REGNO (reg))
994	       || !HARD_REGISTER_P (reg)))
995    return true;
996  else
997    return false;
998}
999
1000static bool
1001ia64_legitimate_address_reg (const_rtx reg, bool strict)
1002{
1003  if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
1004      || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
1005	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
1006    return true;
1007
1008  return false;
1009}
1010
1011static bool
1012ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
1013{
1014  if (GET_CODE (disp) == PLUS
1015      && rtx_equal_p (reg, XEXP (disp, 0))
1016      && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
1017	  || (CONST_INT_P (XEXP (disp, 1))
1018	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
1019    return true;
1020
1021  return false;
1022}
1023
1024/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
1025
1026static bool
1027ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
1028			   rtx x, bool strict)
1029{
1030  if (ia64_legitimate_address_reg (x, strict))
1031    return true;
1032  else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
1033	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1034	   && XEXP (x, 0) != arg_pointer_rtx)
1035    return true;
1036  else if (GET_CODE (x) == POST_MODIFY
1037	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
1038	   && XEXP (x, 0) != arg_pointer_rtx
1039	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
1040    return true;
1041  else
1042    return false;
1043}
1044
1045/* Return true if X is a constant that is valid for some immediate
1046   field in an instruction.  */
1047
1048static bool
1049ia64_legitimate_constant_p (machine_mode mode, rtx x)
1050{
1051  switch (GET_CODE (x))
1052    {
1053    case CONST_INT:
1054    case LABEL_REF:
1055      return true;
1056
1057    case CONST_DOUBLE:
1058      if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
1059	return true;
1060      return satisfies_constraint_G (x);
1061
1062    case CONST:
1063    case SYMBOL_REF:
1064      /* ??? Short term workaround for PR 28490.  We must make the code here
1065	 match the code in ia64_expand_move and move_operand, even though they
1066	 are both technically wrong.  */
1067      if (tls_symbolic_operand_type (x) == 0)
1068	{
1069	  HOST_WIDE_INT addend = 0;
1070	  rtx op = x;
1071
1072	  if (GET_CODE (op) == CONST
1073	      && GET_CODE (XEXP (op, 0)) == PLUS
1074	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
1075	    {
1076	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
1077	      op = XEXP (XEXP (op, 0), 0);
1078	    }
1079
1080          if (any_offset_symbol_operand (op, mode)
1081              || function_operand (op, mode))
1082            return true;
1083	  if (aligned_offset_symbol_operand (op, mode))
1084	    return (addend & 0x3fff) == 0;
1085	  return false;
1086	}
1087      return false;
1088
1089    case CONST_VECTOR:
1090      if (mode == V2SFmode)
1091	return satisfies_constraint_Y (x);
1092
1093      return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1094	      && GET_MODE_SIZE (mode) <= 8);
1095
1096    default:
1097      return false;
1098    }
1099}
1100
1101/* Don't allow TLS addresses to get spilled to memory.  */
1102
1103static bool
1104ia64_cannot_force_const_mem (machine_mode mode, rtx x)
1105{
1106  if (mode == RFmode)
1107    return true;
1108  return tls_symbolic_operand_type (x) != 0;
1109}
1110
1111/* Expand a symbolic constant load.  */
1112
1113bool
1114ia64_expand_load_address (rtx dest, rtx src)
1115{
1116  gcc_assert (GET_CODE (dest) == REG);
1117
1118  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
1119     having to pointer-extend the value afterward.  Other forms of address
1120     computation below are also more natural to compute as 64-bit quantities.
1121     If we've been given an SImode destination register, change it.  */
1122  if (GET_MODE (dest) != Pmode)
1123    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
1124			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
1125
1126  if (TARGET_NO_PIC)
1127    return false;
1128  if (small_addr_symbolic_operand (src, VOIDmode))
1129    return false;
1130
1131  if (TARGET_AUTO_PIC)
1132    emit_insn (gen_load_gprel64 (dest, src));
1133  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
1134    emit_insn (gen_load_fptr (dest, src));
1135  else if (sdata_symbolic_operand (src, VOIDmode))
1136    emit_insn (gen_load_gprel (dest, src));
1137  else if (local_symbolic_operand64 (src, VOIDmode))
1138    {
1139      /* We want to use @gprel rather than @ltoff relocations for local
1140	 symbols:
1141	  - @gprel does not require dynamic linker
1142	  - and does not use .sdata section
1143	 https://gcc.gnu.org/bugzilla/60465 */
1144      emit_insn (gen_load_gprel64 (dest, src));
1145    }
1146  else
1147    {
1148      HOST_WIDE_INT addend = 0;
1149      rtx tmp;
1150
1151      /* We did split constant offsets in ia64_expand_move, and we did try
1152	 to keep them split in move_operand, but we also allowed reload to
1153	 rematerialize arbitrary constants rather than spill the value to
1154	 the stack and reload it.  So we have to be prepared here to split
1155	 them apart again.  */
1156      if (GET_CODE (src) == CONST)
1157	{
1158	  HOST_WIDE_INT hi, lo;
1159
1160	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
1161	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
1162	  hi = hi - lo;
1163
1164	  if (lo != 0)
1165	    {
1166	      addend = lo;
1167	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
1168	    }
1169	}
1170
1171      tmp = gen_rtx_HIGH (Pmode, src);
1172      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
1173      emit_insn (gen_rtx_SET (dest, tmp));
1174
1175      tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
1176      emit_insn (gen_rtx_SET (dest, tmp));
1177
1178      if (addend)
1179	{
1180	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
1181	  emit_insn (gen_rtx_SET (dest, tmp));
1182	}
1183    }
1184
1185  return true;
1186}
1187
1188static GTY(()) rtx gen_tls_tga;
1189static rtx
1190gen_tls_get_addr (void)
1191{
1192  if (!gen_tls_tga)
1193    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1194  return gen_tls_tga;
1195}
1196
1197static GTY(()) rtx thread_pointer_rtx;
1198static rtx
1199gen_thread_pointer (void)
1200{
1201  if (!thread_pointer_rtx)
1202    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1203  return thread_pointer_rtx;
1204}
1205
1206static rtx
1207ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1208			 rtx orig_op1, HOST_WIDE_INT addend)
1209{
1210  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
1211  rtx_insn *insns;
1212  rtx orig_op0 = op0;
1213  HOST_WIDE_INT addend_lo, addend_hi;
1214
1215  switch (tls_kind)
1216    {
1217    case TLS_MODEL_GLOBAL_DYNAMIC:
1218      start_sequence ();
1219
1220      tga_op1 = gen_reg_rtx (Pmode);
1221      emit_insn (gen_load_dtpmod (tga_op1, op1));
1222
1223      tga_op2 = gen_reg_rtx (Pmode);
1224      emit_insn (gen_load_dtprel (tga_op2, op1));
1225
1226      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1227					 LCT_CONST, Pmode,
1228					 tga_op1, Pmode, tga_op2, Pmode);
1229
1230      insns = get_insns ();
1231      end_sequence ();
1232
1233      if (GET_MODE (op0) != Pmode)
1234	op0 = tga_ret;
1235      emit_libcall_block (insns, op0, tga_ret, op1);
1236      break;
1237
1238    case TLS_MODEL_LOCAL_DYNAMIC:
1239      /* ??? This isn't the completely proper way to do local-dynamic
1240	 If the call to __tls_get_addr is used only by a single symbol,
1241	 then we should (somehow) move the dtprel to the second arg
1242	 to avoid the extra add.  */
1243      start_sequence ();
1244
1245      tga_op1 = gen_reg_rtx (Pmode);
1246      emit_insn (gen_load_dtpmod (tga_op1, op1));
1247
1248      tga_op2 = const0_rtx;
1249
1250      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1251					 LCT_CONST, Pmode,
1252					 tga_op1, Pmode, tga_op2, Pmode);
1253
1254      insns = get_insns ();
1255      end_sequence ();
1256
1257      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1258				UNSPEC_LD_BASE);
1259      tmp = gen_reg_rtx (Pmode);
1260      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1261
1262      if (!register_operand (op0, Pmode))
1263	op0 = gen_reg_rtx (Pmode);
1264      if (TARGET_TLS64)
1265	{
1266	  emit_insn (gen_load_dtprel (op0, op1));
1267	  emit_insn (gen_adddi3 (op0, tmp, op0));
1268	}
1269      else
1270	emit_insn (gen_add_dtprel (op0, op1, tmp));
1271      break;
1272
1273    case TLS_MODEL_INITIAL_EXEC:
1274      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1275      addend_hi = addend - addend_lo;
1276
1277      op1 = plus_constant (Pmode, op1, addend_hi);
1278      addend = addend_lo;
1279
1280      tmp = gen_reg_rtx (Pmode);
1281      emit_insn (gen_load_tprel (tmp, op1));
1282
1283      if (!register_operand (op0, Pmode))
1284	op0 = gen_reg_rtx (Pmode);
1285      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1286      break;
1287
1288    case TLS_MODEL_LOCAL_EXEC:
1289      if (!register_operand (op0, Pmode))
1290	op0 = gen_reg_rtx (Pmode);
1291
1292      op1 = orig_op1;
1293      addend = 0;
1294      if (TARGET_TLS64)
1295	{
1296	  emit_insn (gen_load_tprel (op0, op1));
1297	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1298	}
1299      else
1300	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1301      break;
1302
1303    default:
1304      gcc_unreachable ();
1305    }
1306
1307  if (addend)
1308    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1309			       orig_op0, 1, OPTAB_DIRECT);
1310  if (orig_op0 == op0)
1311    return NULL_RTX;
1312  if (GET_MODE (orig_op0) == Pmode)
1313    return op0;
1314  return gen_lowpart (GET_MODE (orig_op0), op0);
1315}
1316
1317rtx
1318ia64_expand_move (rtx op0, rtx op1)
1319{
1320  machine_mode mode = GET_MODE (op0);
1321
1322  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1323    op1 = force_reg (mode, op1);
1324
1325  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1326    {
1327      HOST_WIDE_INT addend = 0;
1328      enum tls_model tls_kind;
1329      rtx sym = op1;
1330
1331      if (GET_CODE (op1) == CONST
1332	  && GET_CODE (XEXP (op1, 0)) == PLUS
1333	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1334	{
1335	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1336	  sym = XEXP (XEXP (op1, 0), 0);
1337	}
1338
1339      tls_kind = tls_symbolic_operand_type (sym);
1340      if (tls_kind)
1341	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1342
1343      if (any_offset_symbol_operand (sym, mode))
1344	addend = 0;
1345      else if (aligned_offset_symbol_operand (sym, mode))
1346	{
1347	  HOST_WIDE_INT addend_lo, addend_hi;
1348
1349	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1350	  addend_hi = addend - addend_lo;
1351
1352	  if (addend_lo != 0)
1353	    {
1354	      op1 = plus_constant (mode, sym, addend_hi);
1355	      addend = addend_lo;
1356	    }
1357	  else
1358	    addend = 0;
1359	}
1360      else
1361	op1 = sym;
1362
1363      if (reload_completed)
1364	{
1365	  /* We really should have taken care of this offset earlier.  */
1366	  gcc_assert (addend == 0);
1367	  if (ia64_expand_load_address (op0, op1))
1368	    return NULL_RTX;
1369	}
1370
1371      if (addend)
1372	{
1373	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1374
1375	  emit_insn (gen_rtx_SET (subtarget, op1));
1376
1377	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1378				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1379	  if (op0 == op1)
1380	    return NULL_RTX;
1381	}
1382    }
1383
1384  return op1;
1385}
1386
1387/* Split a move from OP1 to OP0 conditional on COND.  */
1388
1389void
1390ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1391{
1392  rtx_insn *insn, *first = get_last_insn ();
1393
1394  emit_move_insn (op0, op1);
1395
1396  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1397    if (INSN_P (insn))
1398      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1399					  PATTERN (insn));
1400}
1401
1402/* Split a post-reload TImode or TFmode reference into two DImode
1403   components.  This is made extra difficult by the fact that we do
1404   not get any scratch registers to work with, because reload cannot
1405   be prevented from giving us a scratch that overlaps the register
1406   pair involved.  So instead, when addressing memory, we tweak the
1407   pointer register up and back down with POST_INCs.  Or up and not
1408   back down when we can get away with it.
1409
1410   REVERSED is true when the loads must be done in reversed order
1411   (high word first) for correctness.  DEAD is true when the pointer
1412   dies with the second insn we generate and therefore the second
1413   address must not carry a postmodify.
1414
1415   May return an insn which is to be emitted after the moves.  */
1416
1417static rtx
1418ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1419{
1420  rtx fixup = 0;
1421
1422  switch (GET_CODE (in))
1423    {
1424    case REG:
1425      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1426      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1427      break;
1428
1429    case CONST_INT:
1430    case CONST_DOUBLE:
1431      /* Cannot occur reversed.  */
1432      gcc_assert (!reversed);
1433
1434      if (GET_MODE (in) != TFmode)
1435	split_double (in, &out[0], &out[1]);
1436      else
1437	/* split_double does not understand how to split a TFmode
1438	   quantity into a pair of DImode constants.  */
1439	{
1440	  unsigned HOST_WIDE_INT p[2];
1441	  long l[4];  /* TFmode is 128 bits */
1442
1443	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
1444
1445	  if (FLOAT_WORDS_BIG_ENDIAN)
1446	    {
1447	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1448	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1449	    }
1450	  else
1451	    {
1452	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1453	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1454	    }
1455	  out[0] = GEN_INT (p[0]);
1456	  out[1] = GEN_INT (p[1]);
1457	}
1458      break;
1459
1460    case MEM:
1461      {
1462	rtx base = XEXP (in, 0);
1463	rtx offset;
1464
1465	switch (GET_CODE (base))
1466	  {
1467	  case REG:
1468	    if (!reversed)
1469	      {
1470		out[0] = adjust_automodify_address
1471		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1472		out[1] = adjust_automodify_address
1473		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1474	      }
1475	    else
1476	      {
1477		/* Reversal requires a pre-increment, which can only
1478		   be done as a separate insn.  */
1479		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1480		out[0] = adjust_automodify_address
1481		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1482		out[1] = adjust_address (in, DImode, 0);
1483	      }
1484	    break;
1485
1486	  case POST_INC:
1487	    gcc_assert (!reversed && !dead);
1488
1489	    /* Just do the increment in two steps.  */
1490	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1491	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1492	    break;
1493
1494	  case POST_DEC:
1495	    gcc_assert (!reversed && !dead);
1496
1497	    /* Add 8, subtract 24.  */
1498	    base = XEXP (base, 0);
1499	    out[0] = adjust_automodify_address
1500	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1501	    out[1] = adjust_automodify_address
1502	      (in, DImode,
1503	       gen_rtx_POST_MODIFY (Pmode, base,
1504				    plus_constant (Pmode, base, -24)),
1505	       8);
1506	    break;
1507
1508	  case POST_MODIFY:
1509	    gcc_assert (!reversed && !dead);
1510
1511	    /* Extract and adjust the modification.  This case is
1512	       trickier than the others, because we might have an
1513	       index register, or we might have a combined offset that
1514	       doesn't fit a signed 9-bit displacement field.  We can
1515	       assume the incoming expression is already legitimate.  */
1516	    offset = XEXP (base, 1);
1517	    base = XEXP (base, 0);
1518
1519	    out[0] = adjust_automodify_address
1520	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1521
1522	    if (GET_CODE (XEXP (offset, 1)) == REG)
1523	      {
1524		/* Can't adjust the postmodify to match.  Emit the
1525		   original, then a separate addition insn.  */
1526		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1527		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1528	      }
1529	    else
1530	      {
1531		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1532		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1533		  {
1534		    /* Again the postmodify cannot be made to match,
1535		       but in this case it's more efficient to get rid
1536		       of the postmodify entirely and fix up with an
1537		       add insn.  */
1538		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1539		    fixup = gen_adddi3
1540		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1541		  }
1542		else
1543		  {
1544		    /* Combined offset still fits in the displacement field.
1545		       (We cannot overflow it at the high end.)  */
1546		    out[1] = adjust_automodify_address
1547		      (in, DImode, gen_rtx_POST_MODIFY
1548		       (Pmode, base, gen_rtx_PLUS
1549			(Pmode, base,
1550			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1551		       8);
1552		  }
1553	      }
1554	    break;
1555
1556	  default:
1557	    gcc_unreachable ();
1558	  }
1559	break;
1560      }
1561
1562    default:
1563      gcc_unreachable ();
1564    }
1565
1566  return fixup;
1567}
1568
1569/* Split a TImode or TFmode move instruction after reload.
1570   This is used by *movtf_internal and *movti_internal.  */
1571void
1572ia64_split_tmode_move (rtx operands[])
1573{
1574  rtx in[2], out[2], insn;
1575  rtx fixup[2];
1576  bool dead = false;
1577  bool reversed = false;
1578
1579  /* It is possible for reload to decide to overwrite a pointer with
1580     the value it points to.  In that case we have to do the loads in
1581     the appropriate order so that the pointer is not destroyed too
1582     early.  Also we must not generate a postmodify for that second
1583     load, or rws_access_regno will die.  And we must not generate a
1584     postmodify for the second load if the destination register
1585     overlaps with the base register.  */
1586  if (GET_CODE (operands[1]) == MEM
1587      && reg_overlap_mentioned_p (operands[0], operands[1]))
1588    {
1589      rtx base = XEXP (operands[1], 0);
1590      while (GET_CODE (base) != REG)
1591	base = XEXP (base, 0);
1592
1593      if (REGNO (base) == REGNO (operands[0]))
1594	reversed = true;
1595
1596      if (refers_to_regno_p (REGNO (operands[0]),
1597			     REGNO (operands[0])+2,
1598			     base, 0))
1599	dead = true;
1600    }
1601  /* Another reason to do the moves in reversed order is if the first
1602     element of the target register pair is also the second element of
1603     the source register pair.  */
1604  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1605      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1606    reversed = true;
1607
1608  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1609  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1610
1611#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1612  if (GET_CODE (EXP) == MEM						\
1613      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1614	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1615	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1616    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1617
1618  insn = emit_insn (gen_rtx_SET (out[0], in[0]));
1619  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1620  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1621
1622  insn = emit_insn (gen_rtx_SET (out[1], in[1]));
1623  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1624  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1625
1626  if (fixup[0])
1627    emit_insn (fixup[0]);
1628  if (fixup[1])
1629    emit_insn (fixup[1]);
1630
1631#undef MAYBE_ADD_REG_INC_NOTE
1632}
1633
1634/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1635   through memory plus an extra GR scratch register.  Except that you can
1636   either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
1637   from SECONDARY_RELOAD_CLASS, but not both.
1638
1639   We got into problems in the first place by allowing a construct like
1640   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1641   This solution attempts to prevent this situation from occurring.  When
1642   we see something like the above, we spill the inner register to memory.  */
1643
1644static rtx
1645spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
1646{
1647  if (GET_CODE (in) == SUBREG
1648      && GET_MODE (SUBREG_REG (in)) == TImode
1649      && GET_CODE (SUBREG_REG (in)) == REG)
1650    {
1651      rtx memt = assign_stack_temp (TImode, 16);
1652      emit_move_insn (memt, SUBREG_REG (in));
1653      return adjust_address (memt, mode, 0);
1654    }
1655  else if (force && GET_CODE (in) == REG)
1656    {
1657      rtx memx = assign_stack_temp (mode, 16);
1658      emit_move_insn (memx, in);
1659      return memx;
1660    }
1661  else
1662    return in;
1663}
1664
1665/* Expand the movxf or movrf pattern (MODE says which) with the given
1666   OPERANDS, returning true if the pattern should then invoke
1667   DONE.  */
1668
1669bool
1670ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
1671{
1672  rtx op0 = operands[0];
1673
1674  if (GET_CODE (op0) == SUBREG)
1675    op0 = SUBREG_REG (op0);
1676
1677  /* We must support XFmode loads into general registers for stdarg/vararg,
1678     unprototyped calls, and a rare case where a long double is passed as
1679     an argument after a float HFA fills the FP registers.  We split them into
1680     DImode loads for convenience.  We also need to support XFmode stores
1681     for the last case.  This case does not happen for stdarg/vararg routines,
1682     because we do a block store to memory of unnamed arguments.  */
1683
1684  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1685    {
1686      rtx out[2];
1687
1688      /* We're hoping to transform everything that deals with XFmode
1689	 quantities and GR registers early in the compiler.  */
1690      gcc_assert (can_create_pseudo_p ());
1691
1692      /* Struct to register can just use TImode instead.  */
1693      if ((GET_CODE (operands[1]) == SUBREG
1694	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1695	  || (GET_CODE (operands[1]) == REG
1696	      && GR_REGNO_P (REGNO (operands[1]))))
1697	{
1698	  rtx op1 = operands[1];
1699
1700	  if (GET_CODE (op1) == SUBREG)
1701	    op1 = SUBREG_REG (op1);
1702	  else
1703	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1704
1705	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1706	  return true;
1707	}
1708
1709      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1710	{
1711	  /* Don't word-swap when reading in the constant.  */
1712	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1713			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1714					   0, mode));
1715	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1716			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1717					   0, mode));
1718	  return true;
1719	}
1720
1721      /* If the quantity is in a register not known to be GR, spill it.  */
1722      if (register_operand (operands[1], mode))
1723	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1724
1725      gcc_assert (GET_CODE (operands[1]) == MEM);
1726
1727      /* Don't word-swap when reading in the value.  */
1728      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1729      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1730
1731      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1732      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1733      return true;
1734    }
1735
1736  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1737    {
1738      /* We're hoping to transform everything that deals with XFmode
1739	 quantities and GR registers early in the compiler.  */
1740      gcc_assert (can_create_pseudo_p ());
1741
1742      /* Op0 can't be a GR_REG here, as that case is handled above.
1743	 If op0 is a register, then we spill op1, so that we now have a
1744	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1745	 to force the spill.  */
1746      if (register_operand (operands[0], mode))
1747	{
1748	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1749	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1750	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1751	}
1752
1753      else
1754	{
1755	  rtx in[2];
1756
1757	  gcc_assert (GET_CODE (operands[0]) == MEM);
1758
1759	  /* Don't word-swap when writing out the value.  */
1760	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1761	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1762
1763	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1764	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1765	  return true;
1766	}
1767    }
1768
1769  if (!reload_in_progress && !reload_completed)
1770    {
1771      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1772
1773      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1774	{
1775	  rtx memt, memx, in = operands[1];
1776	  if (CONSTANT_P (in))
1777	    in = validize_mem (force_const_mem (mode, in));
1778	  if (GET_CODE (in) == MEM)
1779	    memt = adjust_address (in, TImode, 0);
1780	  else
1781	    {
1782	      memt = assign_stack_temp (TImode, 16);
1783	      memx = adjust_address (memt, mode, 0);
1784	      emit_move_insn (memx, in);
1785	    }
1786	  emit_move_insn (op0, memt);
1787	  return true;
1788	}
1789
1790      if (!ia64_move_ok (operands[0], operands[1]))
1791	operands[1] = force_reg (mode, operands[1]);
1792    }
1793
1794  return false;
1795}
1796
1797/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1798   with the expression that holds the compare result (in VOIDmode).  */
1799
1800static GTY(()) rtx cmptf_libfunc;
1801
1802void
1803ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1804{
1805  enum rtx_code code = GET_CODE (*expr);
1806  rtx cmp;
1807
1808  /* If we have a BImode input, then we already have a compare result, and
1809     do not need to emit another comparison.  */
1810  if (GET_MODE (*op0) == BImode)
1811    {
1812      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1813      cmp = *op0;
1814    }
1815  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1816     magic number as its third argument, that indicates what to do.
1817     The return value is an integer to be compared against zero.  */
1818  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1819    {
1820      enum qfcmp_magic {
1821	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
1822	QCMP_UNORD = 2,
1823	QCMP_EQ = 4,
1824	QCMP_LT = 8,
1825	QCMP_GT = 16
1826      };
1827      int magic;
1828      enum rtx_code ncode;
1829      rtx ret;
1830
1831      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1832      switch (code)
1833	{
1834	  /* 1 = equal, 0 = not equal.  Equality operators do
1835	     not raise FP_INVALID when given a NaN operand.  */
1836	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1837	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1838	  /* isunordered() from C99.  */
1839	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1840	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1841	  /* Relational operators raise FP_INVALID when given
1842	     a NaN operand.  */
1843	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1844	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1845	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1846	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1847          /* Unordered relational operators do not raise FP_INVALID
1848	     when given a NaN operand.  */
1849	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
1850	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1851	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
1852	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
1853	  /* Not supported.  */
1854	case UNEQ:
1855	case LTGT:
1856	default: gcc_unreachable ();
1857	}
1858
1859      start_sequence ();
1860
1861      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
1862				     *op0, TFmode, *op1, TFmode,
1863				     GEN_INT (magic), DImode);
1864      cmp = gen_reg_rtx (BImode);
1865      emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
1866						   ret, const0_rtx)));
1867
1868      rtx_insn *insns = get_insns ();
1869      end_sequence ();
1870
1871      emit_libcall_block (insns, cmp, cmp,
1872			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1873      code = NE;
1874    }
1875  else
1876    {
1877      cmp = gen_reg_rtx (BImode);
1878      emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1879      code = NE;
1880    }
1881
1882  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1883  *op0 = cmp;
1884  *op1 = const0_rtx;
1885}
1886
1887/* Generate an integral vector comparison.  Return true if the condition has
1888   been reversed, and so the sense of the comparison should be inverted.  */
1889
1890static bool
1891ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
1892			    rtx dest, rtx op0, rtx op1)
1893{
1894  bool negate = false;
1895  rtx x;
1896
1897  /* Canonicalize the comparison to EQ, GT, GTU.  */
1898  switch (code)
1899    {
1900    case EQ:
1901    case GT:
1902    case GTU:
1903      break;
1904
1905    case NE:
1906    case LE:
1907    case LEU:
1908      code = reverse_condition (code);
1909      negate = true;
1910      break;
1911
1912    case GE:
1913    case GEU:
1914      code = reverse_condition (code);
1915      negate = true;
1916      /* FALLTHRU */
1917
1918    case LT:
1919    case LTU:
1920      code = swap_condition (code);
1921      x = op0, op0 = op1, op1 = x;
1922      break;
1923
1924    default:
1925      gcc_unreachable ();
1926    }
1927
1928  /* Unsigned parallel compare is not supported by the hardware.  Play some
1929     tricks to turn this into a signed comparison against 0.  */
1930  if (code == GTU)
1931    {
1932      switch (mode)
1933	{
1934	case E_V2SImode:
1935	  {
1936	    rtx t1, t2, mask;
1937
1938	    /* Subtract (-(INT MAX) - 1) from both operands to make
1939	       them signed.  */
1940	    mask = gen_int_mode (0x80000000, SImode);
1941	    mask = gen_const_vec_duplicate (V2SImode, mask);
1942	    mask = force_reg (mode, mask);
1943	    t1 = gen_reg_rtx (mode);
1944	    emit_insn (gen_subv2si3 (t1, op0, mask));
1945	    t2 = gen_reg_rtx (mode);
1946	    emit_insn (gen_subv2si3 (t2, op1, mask));
1947	    op0 = t1;
1948	    op1 = t2;
1949	    code = GT;
1950	  }
1951	  break;
1952
1953	case E_V8QImode:
1954	case E_V4HImode:
1955	  /* Perform a parallel unsigned saturating subtraction.  */
1956	  x = gen_reg_rtx (mode);
1957	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
1958
1959	  code = EQ;
1960	  op0 = x;
1961	  op1 = CONST0_RTX (mode);
1962	  negate = !negate;
1963	  break;
1964
1965	default:
1966	  gcc_unreachable ();
1967	}
1968    }
1969
1970  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1971  emit_insn (gen_rtx_SET (dest, x));
1972
1973  return negate;
1974}
1975
1976/* Emit an integral vector conditional move.  */
1977
1978void
1979ia64_expand_vecint_cmov (rtx operands[])
1980{
1981  machine_mode mode = GET_MODE (operands[0]);
1982  enum rtx_code code = GET_CODE (operands[3]);
1983  bool negate;
1984  rtx cmp, x, ot, of;
1985
1986  cmp = gen_reg_rtx (mode);
1987  negate = ia64_expand_vecint_compare (code, mode, cmp,
1988				       operands[4], operands[5]);
1989
1990  ot = operands[1+negate];
1991  of = operands[2-negate];
1992
1993  if (ot == CONST0_RTX (mode))
1994    {
1995      if (of == CONST0_RTX (mode))
1996	{
1997	  emit_move_insn (operands[0], ot);
1998	  return;
1999	}
2000
2001      x = gen_rtx_NOT (mode, cmp);
2002      x = gen_rtx_AND (mode, x, of);
2003      emit_insn (gen_rtx_SET (operands[0], x));
2004    }
2005  else if (of == CONST0_RTX (mode))
2006    {
2007      x = gen_rtx_AND (mode, cmp, ot);
2008      emit_insn (gen_rtx_SET (operands[0], x));
2009    }
2010  else
2011    {
2012      rtx t, f;
2013
2014      t = gen_reg_rtx (mode);
2015      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
2016      emit_insn (gen_rtx_SET (t, x));
2017
2018      f = gen_reg_rtx (mode);
2019      x = gen_rtx_NOT (mode, cmp);
2020      x = gen_rtx_AND (mode, x, operands[2-negate]);
2021      emit_insn (gen_rtx_SET (f, x));
2022
2023      x = gen_rtx_IOR (mode, t, f);
2024      emit_insn (gen_rtx_SET (operands[0], x));
2025    }
2026}
2027
2028/* Emit an integral vector min or max operation.  Return true if all done.  */
2029
2030bool
2031ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
2032			   rtx operands[])
2033{
2034  rtx xops[6];
2035
2036  /* These four combinations are supported directly.  */
2037  if (mode == V8QImode && (code == UMIN || code == UMAX))
2038    return false;
2039  if (mode == V4HImode && (code == SMIN || code == SMAX))
2040    return false;
2041
2042  /* This combination can be implemented with only saturating subtraction.  */
2043  if (mode == V4HImode && code == UMAX)
2044    {
2045      rtx x, tmp = gen_reg_rtx (mode);
2046
2047      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
2048      emit_insn (gen_rtx_SET (tmp, x));
2049
2050      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
2051      return true;
2052    }
2053
2054  /* Everything else implemented via vector comparisons.  */
2055  xops[0] = operands[0];
2056  xops[4] = xops[1] = operands[1];
2057  xops[5] = xops[2] = operands[2];
2058
2059  switch (code)
2060    {
2061    case UMIN:
2062      code = LTU;
2063      break;
2064    case UMAX:
2065      code = GTU;
2066      break;
2067    case SMIN:
2068      code = LT;
2069      break;
2070    case SMAX:
2071      code = GT;
2072      break;
2073    default:
2074      gcc_unreachable ();
2075    }
2076  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
2077
2078  ia64_expand_vecint_cmov (xops);
2079  return true;
2080}
2081
2082/* The vectors LO and HI each contain N halves of a double-wide vector.
2083   Reassemble either the first N/2 or the second N/2 elements.  */
2084
2085void
2086ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
2087{
2088  machine_mode vmode = GET_MODE (lo);
2089  unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
2090  struct expand_vec_perm_d d;
2091  bool ok;
2092
2093  d.target = gen_lowpart (vmode, out);
2094  d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
2095  d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
2096  d.vmode = vmode;
2097  d.nelt = nelt;
2098  d.one_operand_p = false;
2099  d.testing_p = false;
2100
2101  high = (highp ? nelt / 2 : 0);
2102  for (i = 0; i < nelt / 2; ++i)
2103    {
2104      d.perm[i * 2] = i + high;
2105      d.perm[i * 2 + 1] = i + high + nelt;
2106    }
2107
2108  ok = ia64_expand_vec_perm_const_1 (&d);
2109  gcc_assert (ok);
2110}
2111
2112/* Return a vector of the sign-extension of VEC.  */
2113
2114static rtx
2115ia64_unpack_sign (rtx vec, bool unsignedp)
2116{
2117  machine_mode mode = GET_MODE (vec);
2118  rtx zero = CONST0_RTX (mode);
2119
2120  if (unsignedp)
2121    return zero;
2122  else
2123    {
2124      rtx sign = gen_reg_rtx (mode);
2125      bool neg;
2126
2127      neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
2128      gcc_assert (!neg);
2129
2130      return sign;
2131    }
2132}
2133
2134/* Emit an integral vector unpack operation.  */
2135
2136void
2137ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
2138{
2139  rtx sign = ia64_unpack_sign (operands[1], unsignedp);
2140  ia64_unpack_assemble (operands[0], operands[1], sign, highp);
2141}
2142
2143/* Emit an integral vector widening sum operations.  */
2144
2145void
2146ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
2147{
2148  machine_mode wmode;
2149  rtx l, h, t, sign;
2150
2151  sign = ia64_unpack_sign (operands[1], unsignedp);
2152
2153  wmode = GET_MODE (operands[0]);
2154  l = gen_reg_rtx (wmode);
2155  h = gen_reg_rtx (wmode);
2156
2157  ia64_unpack_assemble (l, operands[1], sign, false);
2158  ia64_unpack_assemble (h, operands[1], sign, true);
2159
2160  t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
2161  t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
2162  if (t != operands[0])
2163    emit_move_insn (operands[0], t);
2164}
2165
2166/* Emit the appropriate sequence for a call.  */
2167
2168void
2169ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
2170		  int sibcall_p)
2171{
2172  rtx insn, b0;
2173
2174  addr = XEXP (addr, 0);
2175  addr = convert_memory_address (DImode, addr);
2176  b0 = gen_rtx_REG (DImode, R_BR (0));
2177
2178  /* ??? Should do this for functions known to bind local too.  */
2179  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2180    {
2181      if (sibcall_p)
2182	insn = gen_sibcall_nogp (addr);
2183      else if (! retval)
2184	insn = gen_call_nogp (addr, b0);
2185      else
2186	insn = gen_call_value_nogp (retval, addr, b0);
2187      insn = emit_call_insn (insn);
2188    }
2189  else
2190    {
2191      if (sibcall_p)
2192	insn = gen_sibcall_gp (addr);
2193      else if (! retval)
2194	insn = gen_call_gp (addr, b0);
2195      else
2196	insn = gen_call_value_gp (retval, addr, b0);
2197      insn = emit_call_insn (insn);
2198
2199      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2200    }
2201
2202  if (sibcall_p)
2203    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2204
2205  if (TARGET_ABI_OPEN_VMS)
2206    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2207	     gen_rtx_REG (DImode, GR_REG (25)));
2208}
2209
2210static void
2211reg_emitted (enum ia64_frame_regs r)
2212{
2213  if (emitted_frame_related_regs[r] == 0)
2214    emitted_frame_related_regs[r] = current_frame_info.r[r];
2215  else
2216    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2217}
2218
2219static int
2220get_reg (enum ia64_frame_regs r)
2221{
2222  reg_emitted (r);
2223  return current_frame_info.r[r];
2224}
2225
2226static bool
2227is_emitted (int regno)
2228{
2229  unsigned int r;
2230
2231  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2232    if (emitted_frame_related_regs[r] == regno)
2233      return true;
2234  return false;
2235}
2236
2237void
2238ia64_reload_gp (void)
2239{
2240  rtx tmp;
2241
2242  if (current_frame_info.r[reg_save_gp])
2243    {
2244      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2245    }
2246  else
2247    {
2248      HOST_WIDE_INT offset;
2249      rtx offset_r;
2250
2251      offset = (current_frame_info.spill_cfa_off
2252	        + current_frame_info.spill_size);
2253      if (frame_pointer_needed)
2254        {
2255          tmp = hard_frame_pointer_rtx;
2256          offset = -offset;
2257        }
2258      else
2259        {
2260          tmp = stack_pointer_rtx;
2261          offset = current_frame_info.total_size - offset;
2262        }
2263
2264      offset_r = GEN_INT (offset);
2265      if (satisfies_constraint_I (offset_r))
2266        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2267      else
2268        {
2269          emit_move_insn (pic_offset_table_rtx, offset_r);
2270          emit_insn (gen_adddi3 (pic_offset_table_rtx,
2271			         pic_offset_table_rtx, tmp));
2272        }
2273
2274      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2275    }
2276
2277  emit_move_insn (pic_offset_table_rtx, tmp);
2278}
2279
2280void
2281ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2282		 rtx scratch_b, int noreturn_p, int sibcall_p)
2283{
2284  rtx insn;
2285  bool is_desc = false;
2286
2287  /* If we find we're calling through a register, then we're actually
2288     calling through a descriptor, so load up the values.  */
2289  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2290    {
2291      rtx tmp;
2292      bool addr_dead_p;
2293
2294      /* ??? We are currently constrained to *not* use peep2, because
2295	 we can legitimately change the global lifetime of the GP
2296	 (in the form of killing where previously live).  This is
2297	 because a call through a descriptor doesn't use the previous
2298	 value of the GP, while a direct call does, and we do not
2299	 commit to either form until the split here.
2300
2301	 That said, this means that we lack precise life info for
2302	 whether ADDR is dead after this call.  This is not terribly
2303	 important, since we can fix things up essentially for free
2304	 with the POST_DEC below, but it's nice to not use it when we
2305	 can immediately tell it's not necessary.  */
2306      addr_dead_p = ((noreturn_p || sibcall_p
2307		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2308					    REGNO (addr)))
2309		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2310
2311      /* Load the code address into scratch_b.  */
2312      tmp = gen_rtx_POST_INC (Pmode, addr);
2313      tmp = gen_rtx_MEM (Pmode, tmp);
2314      emit_move_insn (scratch_r, tmp);
2315      emit_move_insn (scratch_b, scratch_r);
2316
2317      /* Load the GP address.  If ADDR is not dead here, then we must
2318	 revert the change made above via the POST_INCREMENT.  */
2319      if (!addr_dead_p)
2320	tmp = gen_rtx_POST_DEC (Pmode, addr);
2321      else
2322	tmp = addr;
2323      tmp = gen_rtx_MEM (Pmode, tmp);
2324      emit_move_insn (pic_offset_table_rtx, tmp);
2325
2326      is_desc = true;
2327      addr = scratch_b;
2328    }
2329
2330  if (sibcall_p)
2331    insn = gen_sibcall_nogp (addr);
2332  else if (retval)
2333    insn = gen_call_value_nogp (retval, addr, retaddr);
2334  else
2335    insn = gen_call_nogp (addr, retaddr);
2336  emit_call_insn (insn);
2337
2338  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2339    ia64_reload_gp ();
2340}
2341
2342/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2343
2344   This differs from the generic code in that we know about the zero-extending
2345   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2346   also know that ld.acq+cmpxchg.rel equals a full barrier.
2347
2348   The loop we want to generate looks like
2349
2350	cmp_reg = mem;
2351      label:
2352        old_reg = cmp_reg;
2353	new_reg = cmp_reg op val;
2354	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2355	if (cmp_reg != old_reg)
2356	  goto label;
2357
2358   Note that we only do the plain load from memory once.  Subsequent
2359   iterations use the value loaded by the compare-and-swap pattern.  */
2360
2361void
2362ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2363		       rtx old_dst, rtx new_dst, enum memmodel model)
2364{
2365  machine_mode mode = GET_MODE (mem);
2366  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2367  enum insn_code icode;
2368
2369  /* Special case for using fetchadd.  */
2370  if ((mode == SImode || mode == DImode)
2371      && (code == PLUS || code == MINUS)
2372      && fetchadd_operand (val, mode))
2373    {
2374      if (code == MINUS)
2375	val = GEN_INT (-INTVAL (val));
2376
2377      if (!old_dst)
2378        old_dst = gen_reg_rtx (mode);
2379
2380      switch (model)
2381	{
2382	case MEMMODEL_ACQ_REL:
2383	case MEMMODEL_SEQ_CST:
2384	case MEMMODEL_SYNC_SEQ_CST:
2385	  emit_insn (gen_memory_barrier ());
2386	  /* FALLTHRU */
2387	case MEMMODEL_RELAXED:
2388	case MEMMODEL_ACQUIRE:
2389	case MEMMODEL_SYNC_ACQUIRE:
2390	case MEMMODEL_CONSUME:
2391	  if (mode == SImode)
2392	    icode = CODE_FOR_fetchadd_acq_si;
2393	  else
2394	    icode = CODE_FOR_fetchadd_acq_di;
2395	  break;
2396	case MEMMODEL_RELEASE:
2397	case MEMMODEL_SYNC_RELEASE:
2398	  if (mode == SImode)
2399	    icode = CODE_FOR_fetchadd_rel_si;
2400	  else
2401	    icode = CODE_FOR_fetchadd_rel_di;
2402	  break;
2403
2404	default:
2405	  gcc_unreachable ();
2406	}
2407
2408      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2409
2410      if (new_dst)
2411	{
2412	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2413					 true, OPTAB_WIDEN);
2414	  if (new_reg != new_dst)
2415	    emit_move_insn (new_dst, new_reg);
2416	}
2417      return;
2418    }
2419
2420  /* Because of the volatile mem read, we get an ld.acq, which is the
2421     front half of the full barrier.  The end half is the cmpxchg.rel.
2422     For relaxed and release memory models, we don't need this.  But we
2423     also don't bother trying to prevent it either.  */
2424  gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
2425	      || MEM_VOLATILE_P (mem));
2426
2427  old_reg = gen_reg_rtx (DImode);
2428  cmp_reg = gen_reg_rtx (DImode);
2429  label = gen_label_rtx ();
2430
2431  if (mode != DImode)
2432    {
2433      val = simplify_gen_subreg (DImode, val, mode, 0);
2434      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2435    }
2436  else
2437    emit_move_insn (cmp_reg, mem);
2438
2439  emit_label (label);
2440
2441  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2442  emit_move_insn (old_reg, cmp_reg);
2443  emit_move_insn (ar_ccv, cmp_reg);
2444
2445  if (old_dst)
2446    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2447
2448  new_reg = cmp_reg;
2449  if (code == NOT)
2450    {
2451      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2452				     true, OPTAB_DIRECT);
2453      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2454    }
2455  else
2456    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2457				   true, OPTAB_DIRECT);
2458
2459  if (mode != DImode)
2460    new_reg = gen_lowpart (mode, new_reg);
2461  if (new_dst)
2462    emit_move_insn (new_dst, new_reg);
2463
2464  switch (model)
2465    {
2466    case MEMMODEL_RELAXED:
2467    case MEMMODEL_ACQUIRE:
2468    case MEMMODEL_SYNC_ACQUIRE:
2469    case MEMMODEL_CONSUME:
2470      switch (mode)
2471	{
2472	case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
2473	case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
2474	case E_SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
2475	case E_DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
2476	default:
2477	  gcc_unreachable ();
2478	}
2479      break;
2480
2481    case MEMMODEL_RELEASE:
2482    case MEMMODEL_SYNC_RELEASE:
2483    case MEMMODEL_ACQ_REL:
2484    case MEMMODEL_SEQ_CST:
2485    case MEMMODEL_SYNC_SEQ_CST:
2486      switch (mode)
2487	{
2488	case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
2489	case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
2490	case E_SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
2491	case E_DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
2492	default:
2493	  gcc_unreachable ();
2494	}
2495      break;
2496
2497    default:
2498      gcc_unreachable ();
2499    }
2500
2501  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2502
2503  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2504}
2505
2506/* Begin the assembly file.  */
2507
2508static void
2509ia64_file_start (void)
2510{
2511  default_file_start ();
2512  emit_safe_across_calls ();
2513}
2514
2515void
2516emit_safe_across_calls (void)
2517{
2518  unsigned int rs, re;
2519  int out_state;
2520
2521  rs = 1;
2522  out_state = 0;
2523  while (1)
2524    {
2525      while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs)))
2526	rs++;
2527      if (rs >= 64)
2528	break;
2529      for (re = rs + 1;
2530	   re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++)
2531	continue;
2532      if (out_state == 0)
2533	{
2534	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2535	  out_state = 1;
2536	}
2537      else
2538	fputc (',', asm_out_file);
2539      if (re == rs + 1)
2540	fprintf (asm_out_file, "p%u", rs);
2541      else
2542	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2543      rs = re + 1;
2544    }
2545  if (out_state)
2546    fputc ('\n', asm_out_file);
2547}
2548
2549/* Globalize a declaration.  */
2550
2551static void
2552ia64_globalize_decl_name (FILE * stream, tree decl)
2553{
2554  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2555  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2556  if (version_attr)
2557    {
2558      tree v = TREE_VALUE (TREE_VALUE (version_attr));
2559      const char *p = TREE_STRING_POINTER (v);
2560      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2561    }
2562  targetm.asm_out.globalize_label (stream, name);
2563  if (TREE_CODE (decl) == FUNCTION_DECL)
2564    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2565}
2566
2567/* Helper function for ia64_compute_frame_size: find an appropriate general
2568   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2569   bits in GR0 to GR31 that have already been allocated by this routine.
2570   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2571
2572static int
2573find_gr_spill (enum ia64_frame_regs r, int try_locals)
2574{
2575  int regno;
2576
2577  if (emitted_frame_related_regs[r] != 0)
2578    {
2579      regno = emitted_frame_related_regs[r];
2580      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2581	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2582        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2583      else if (crtl->is_leaf
2584               && regno >= GR_REG (1) && regno <= GR_REG (31))
2585        current_frame_info.gr_used_mask |= 1 << regno;
2586
2587      return regno;
2588    }
2589
2590  /* If this is a leaf function, first try an otherwise unused
2591     call-clobbered register.  */
2592  if (crtl->is_leaf)
2593    {
2594      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2595	if (! df_regs_ever_live_p (regno)
2596	    && call_used_or_fixed_reg_p (regno)
2597	    && ! fixed_regs[regno]
2598	    && ! global_regs[regno]
2599	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2600            && ! is_emitted (regno))
2601	  {
2602	    current_frame_info.gr_used_mask |= 1 << regno;
2603	    return regno;
2604	  }
2605    }
2606
2607  if (try_locals)
2608    {
2609      regno = current_frame_info.n_local_regs;
2610      /* If there is a frame pointer, then we can't use loc79, because
2611	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2612	 reg_name switching code in ia64_expand_prologue.  */
2613      while (regno < (80 - frame_pointer_needed))
2614	if (! is_emitted (LOC_REG (regno++)))
2615	  {
2616	    current_frame_info.n_local_regs = regno;
2617	    return LOC_REG (regno - 1);
2618	  }
2619    }
2620
2621  /* Failed to find a general register to spill to.  Must use stack.  */
2622  return 0;
2623}
2624
2625/* In order to make for nice schedules, we try to allocate every temporary
2626   to a different register.  We must of course stay away from call-saved,
2627   fixed, and global registers.  We must also stay away from registers
2628   allocated in current_frame_info.gr_used_mask, since those include regs
2629   used all through the prologue.
2630
2631   Any register allocated here must be used immediately.  The idea is to
2632   aid scheduling, not to solve data flow problems.  */
2633
2634static int last_scratch_gr_reg;
2635
2636static int
2637next_scratch_gr_reg (void)
2638{
2639  int i, regno;
2640
2641  for (i = 0; i < 32; ++i)
2642    {
2643      regno = (last_scratch_gr_reg + i + 1) & 31;
2644      if (call_used_or_fixed_reg_p (regno)
2645	  && ! fixed_regs[regno]
2646	  && ! global_regs[regno]
2647	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2648	{
2649	  last_scratch_gr_reg = regno;
2650	  return regno;
2651	}
2652    }
2653
2654  /* There must be _something_ available.  */
2655  gcc_unreachable ();
2656}
2657
2658/* Helper function for ia64_compute_frame_size, called through
2659   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2660
2661static void
2662mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2663{
2664  unsigned int regno = REGNO (reg);
2665  if (regno < 32)
2666    {
2667      unsigned int i, n = REG_NREGS (reg);
2668      for (i = 0; i < n; ++i)
2669	current_frame_info.gr_used_mask |= 1 << (regno + i);
2670    }
2671}
2672
2673
2674/* Returns the number of bytes offset between the frame pointer and the stack
2675   pointer for the current function.  SIZE is the number of bytes of space
2676   needed for local variables.  */
2677
2678static void
2679ia64_compute_frame_size (HOST_WIDE_INT size)
2680{
2681  HOST_WIDE_INT total_size;
2682  HOST_WIDE_INT spill_size = 0;
2683  HOST_WIDE_INT extra_spill_size = 0;
2684  HOST_WIDE_INT pretend_args_size;
2685  HARD_REG_SET mask;
2686  int n_spilled = 0;
2687  int spilled_gr_p = 0;
2688  int spilled_fr_p = 0;
2689  unsigned int regno;
2690  int min_regno;
2691  int max_regno;
2692  int i;
2693
2694  if (current_frame_info.initialized)
2695    return;
2696
2697  memset (&current_frame_info, 0, sizeof current_frame_info);
2698  CLEAR_HARD_REG_SET (mask);
2699
2700  /* Don't allocate scratches to the return register.  */
2701  diddle_return_value (mark_reg_gr_used_mask, NULL);
2702
2703  /* Don't allocate scratches to the EH scratch registers.  */
2704  if (cfun->machine->ia64_eh_epilogue_sp)
2705    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2706  if (cfun->machine->ia64_eh_epilogue_bsp)
2707    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2708
2709  /* Static stack checking uses r2 and r3.  */
2710  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
2711      || flag_stack_clash_protection)
2712    current_frame_info.gr_used_mask |= 0xc;
2713
2714  /* Find the size of the register stack frame.  We have only 80 local
2715     registers, because we reserve 8 for the inputs and 8 for the
2716     outputs.  */
2717
2718  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2719     since we'll be adjusting that down later.  */
2720  regno = LOC_REG (78) + ! frame_pointer_needed;
2721  for (; regno >= LOC_REG (0); regno--)
2722    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2723      break;
2724  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2725
2726  /* For functions marked with the syscall_linkage attribute, we must mark
2727     all eight input registers as in use, so that locals aren't visible to
2728     the caller.  */
2729
2730  if (cfun->machine->n_varargs > 0
2731      || lookup_attribute ("syscall_linkage",
2732			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2733    current_frame_info.n_input_regs = 8;
2734  else
2735    {
2736      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2737	if (df_regs_ever_live_p (regno))
2738	  break;
2739      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2740    }
2741
2742  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2743    if (df_regs_ever_live_p (regno))
2744      break;
2745  i = regno - OUT_REG (0) + 1;
2746
2747#ifndef PROFILE_HOOK
2748  /* When -p profiling, we need one output register for the mcount argument.
2749     Likewise for -a profiling for the bb_init_func argument.  For -ax
2750     profiling, we need two output registers for the two bb_init_trace_func
2751     arguments.  */
2752  if (crtl->profile)
2753    i = MAX (i, 1);
2754#endif
2755  current_frame_info.n_output_regs = i;
2756
2757  /* ??? No rotating register support yet.  */
2758  current_frame_info.n_rotate_regs = 0;
2759
2760  /* Discover which registers need spilling, and how much room that
2761     will take.  Begin with floating point and general registers,
2762     which will always wind up on the stack.  */
2763
2764  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2765    if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2766      {
2767	SET_HARD_REG_BIT (mask, regno);
2768	spill_size += 16;
2769	n_spilled += 1;
2770	spilled_fr_p = 1;
2771      }
2772
2773  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2774    if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2775      {
2776	SET_HARD_REG_BIT (mask, regno);
2777	spill_size += 8;
2778	n_spilled += 1;
2779	spilled_gr_p = 1;
2780      }
2781
2782  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2783    if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2784      {
2785	SET_HARD_REG_BIT (mask, regno);
2786	spill_size += 8;
2787	n_spilled += 1;
2788      }
2789
2790  /* Now come all special registers that might get saved in other
2791     general registers.  */
2792
2793  if (frame_pointer_needed)
2794    {
2795      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2796      /* If we did not get a register, then we take LOC79.  This is guaranteed
2797	 to be free, even if regs_ever_live is already set, because this is
2798	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2799	 as we don't count loc79 above.  */
2800      if (current_frame_info.r[reg_fp] == 0)
2801	{
2802	  current_frame_info.r[reg_fp] = LOC_REG (79);
2803	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2804	}
2805    }
2806
2807  if (! crtl->is_leaf)
2808    {
2809      /* Emit a save of BR0 if we call other functions.  Do this even
2810	 if this function doesn't return, as EH depends on this to be
2811	 able to unwind the stack.  */
2812      SET_HARD_REG_BIT (mask, BR_REG (0));
2813
2814      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2815      if (current_frame_info.r[reg_save_b0] == 0)
2816	{
2817	  extra_spill_size += 8;
2818	  n_spilled += 1;
2819	}
2820
2821      /* Similarly for ar.pfs.  */
2822      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2823      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2824      if (current_frame_info.r[reg_save_ar_pfs] == 0)
2825	{
2826	  extra_spill_size += 8;
2827	  n_spilled += 1;
2828	}
2829
2830      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2831	 registers are clobbered, so we fall back to the stack.  */
2832      current_frame_info.r[reg_save_gp]
2833	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2834      if (current_frame_info.r[reg_save_gp] == 0)
2835	{
2836	  SET_HARD_REG_BIT (mask, GR_REG (1));
2837	  spill_size += 8;
2838	  n_spilled += 1;
2839	}
2840    }
2841  else
2842    {
2843      if (df_regs_ever_live_p (BR_REG (0))
2844	  && ! call_used_or_fixed_reg_p (BR_REG (0)))
2845	{
2846	  SET_HARD_REG_BIT (mask, BR_REG (0));
2847	  extra_spill_size += 8;
2848	  n_spilled += 1;
2849	}
2850
2851      if (df_regs_ever_live_p (AR_PFS_REGNUM))
2852	{
2853	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2854 	  current_frame_info.r[reg_save_ar_pfs]
2855            = find_gr_spill (reg_save_ar_pfs, 1);
2856	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2857	    {
2858	      extra_spill_size += 8;
2859	      n_spilled += 1;
2860	    }
2861	}
2862    }
2863
2864  /* Unwind descriptor hackery: things are most efficient if we allocate
2865     consecutive GR save registers for RP, PFS, FP in that order. However,
2866     it is absolutely critical that FP get the only hard register that's
2867     guaranteed to be free, so we allocated it first.  If all three did
2868     happen to be allocated hard regs, and are consecutive, rearrange them
2869     into the preferred order now.
2870
2871     If we have already emitted code for any of those registers,
2872     then it's already too late to change.  */
2873  min_regno = MIN (current_frame_info.r[reg_fp],
2874		   MIN (current_frame_info.r[reg_save_b0],
2875			current_frame_info.r[reg_save_ar_pfs]));
2876  max_regno = MAX (current_frame_info.r[reg_fp],
2877		   MAX (current_frame_info.r[reg_save_b0],
2878			current_frame_info.r[reg_save_ar_pfs]));
2879  if (min_regno > 0
2880      && min_regno + 2 == max_regno
2881      && (current_frame_info.r[reg_fp] == min_regno + 1
2882	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2883	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2884      && (emitted_frame_related_regs[reg_save_b0] == 0
2885	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2886      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2887	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2888      && (emitted_frame_related_regs[reg_fp] == 0
2889	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2890    {
2891      current_frame_info.r[reg_save_b0] = min_regno;
2892      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2893      current_frame_info.r[reg_fp] = min_regno + 2;
2894    }
2895
2896  /* See if we need to store the predicate register block.  */
2897  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2898    if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
2899      break;
2900  if (regno <= PR_REG (63))
2901    {
2902      SET_HARD_REG_BIT (mask, PR_REG (0));
2903      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2904      if (current_frame_info.r[reg_save_pr] == 0)
2905	{
2906	  extra_spill_size += 8;
2907	  n_spilled += 1;
2908	}
2909
2910      /* ??? Mark them all as used so that register renaming and such
2911	 are free to use them.  */
2912      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2913	df_set_regs_ever_live (regno, true);
2914    }
2915
2916  /* If we're forced to use st8.spill, we're forced to save and restore
2917     ar.unat as well.  The check for existing liveness allows inline asm
2918     to touch ar.unat.  */
2919  if (spilled_gr_p || cfun->machine->n_varargs
2920      || df_regs_ever_live_p (AR_UNAT_REGNUM))
2921    {
2922      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2923      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2924      current_frame_info.r[reg_save_ar_unat]
2925        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2926      if (current_frame_info.r[reg_save_ar_unat] == 0)
2927	{
2928	  extra_spill_size += 8;
2929	  n_spilled += 1;
2930	}
2931    }
2932
2933  if (df_regs_ever_live_p (AR_LC_REGNUM))
2934    {
2935      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2936      current_frame_info.r[reg_save_ar_lc]
2937        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2938      if (current_frame_info.r[reg_save_ar_lc] == 0)
2939	{
2940	  extra_spill_size += 8;
2941	  n_spilled += 1;
2942	}
2943    }
2944
2945  /* If we have an odd number of words of pretend arguments written to
2946     the stack, then the FR save area will be unaligned.  We round the
2947     size of this area up to keep things 16 byte aligned.  */
2948  if (spilled_fr_p)
2949    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2950  else
2951    pretend_args_size = crtl->args.pretend_args_size;
2952
2953  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2954		+ crtl->outgoing_args_size);
2955  total_size = IA64_STACK_ALIGN (total_size);
2956
2957  /* We always use the 16-byte scratch area provided by the caller, but
2958     if we are a leaf function, there's no one to which we need to provide
2959     a scratch area.  However, if the function allocates dynamic stack space,
2960     the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
2961     so we need to cope.  */
2962  if (crtl->is_leaf && !cfun->calls_alloca)
2963    total_size = MAX (0, total_size - 16);
2964
2965  current_frame_info.total_size = total_size;
2966  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2967  current_frame_info.spill_size = spill_size;
2968  current_frame_info.extra_spill_size = extra_spill_size;
2969  current_frame_info.mask = mask;
2970  current_frame_info.n_spilled = n_spilled;
2971  current_frame_info.initialized = reload_completed;
2972}
2973
2974/* Worker function for TARGET_CAN_ELIMINATE.  */
2975
2976bool
2977ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2978{
2979  return (to == BR_REG (0) ? crtl->is_leaf : true);
2980}
2981
2982/* Compute the initial difference between the specified pair of registers.  */
2983
2984HOST_WIDE_INT
2985ia64_initial_elimination_offset (int from, int to)
2986{
2987  HOST_WIDE_INT offset;
2988
2989  ia64_compute_frame_size (get_frame_size ());
2990  switch (from)
2991    {
2992    case FRAME_POINTER_REGNUM:
2993      switch (to)
2994	{
2995	case HARD_FRAME_POINTER_REGNUM:
2996	  offset = -current_frame_info.total_size;
2997	  if (!crtl->is_leaf || cfun->calls_alloca)
2998	    offset += 16 + crtl->outgoing_args_size;
2999	  break;
3000
3001	case STACK_POINTER_REGNUM:
3002	  offset = 0;
3003	  if (!crtl->is_leaf || cfun->calls_alloca)
3004	    offset += 16 + crtl->outgoing_args_size;
3005	  break;
3006
3007	default:
3008	  gcc_unreachable ();
3009	}
3010      break;
3011
3012    case ARG_POINTER_REGNUM:
3013      /* Arguments start above the 16 byte save area, unless stdarg
3014	 in which case we store through the 16 byte save area.  */
3015      switch (to)
3016	{
3017	case HARD_FRAME_POINTER_REGNUM:
3018	  offset = 16 - crtl->args.pretend_args_size;
3019	  break;
3020
3021	case STACK_POINTER_REGNUM:
3022	  offset = (current_frame_info.total_size
3023		    + 16 - crtl->args.pretend_args_size);
3024	  break;
3025
3026	default:
3027	  gcc_unreachable ();
3028	}
3029      break;
3030
3031    default:
3032      gcc_unreachable ();
3033    }
3034
3035  return offset;
3036}
3037
3038/* If there are more than a trivial number of register spills, we use
3039   two interleaved iterators so that we can get two memory references
3040   per insn group.
3041
3042   In order to simplify things in the prologue and epilogue expanders,
3043   we use helper functions to fix up the memory references after the
3044   fact with the appropriate offsets to a POST_MODIFY memory mode.
3045   The following data structure tracks the state of the two iterators
3046   while insns are being emitted.  */
3047
3048struct spill_fill_data
3049{
3050  rtx_insn *init_after;		/* point at which to emit initializations */
3051  rtx init_reg[2];		/* initial base register */
3052  rtx iter_reg[2];		/* the iterator registers */
3053  rtx *prev_addr[2];		/* address of last memory use */
3054  rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
3055  HOST_WIDE_INT prev_off[2];	/* last offset */
3056  int n_iter;			/* number of iterators in use */
3057  int next_iter;		/* next iterator to use */
3058  unsigned int save_gr_used_mask;
3059};
3060
3061static struct spill_fill_data spill_fill_data;
3062
3063static void
3064setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
3065{
3066  int i;
3067
3068  spill_fill_data.init_after = get_last_insn ();
3069  spill_fill_data.init_reg[0] = init_reg;
3070  spill_fill_data.init_reg[1] = init_reg;
3071  spill_fill_data.prev_addr[0] = NULL;
3072  spill_fill_data.prev_addr[1] = NULL;
3073  spill_fill_data.prev_insn[0] = NULL;
3074  spill_fill_data.prev_insn[1] = NULL;
3075  spill_fill_data.prev_off[0] = cfa_off;
3076  spill_fill_data.prev_off[1] = cfa_off;
3077  spill_fill_data.next_iter = 0;
3078  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
3079
3080  spill_fill_data.n_iter = 1 + (n_spills > 2);
3081  for (i = 0; i < spill_fill_data.n_iter; ++i)
3082    {
3083      int regno = next_scratch_gr_reg ();
3084      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
3085      current_frame_info.gr_used_mask |= 1 << regno;
3086    }
3087}
3088
3089static void
3090finish_spill_pointers (void)
3091{
3092  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
3093}
3094
3095static rtx
3096spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
3097{
3098  int iter = spill_fill_data.next_iter;
3099  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
3100  rtx disp_rtx = GEN_INT (disp);
3101  rtx mem;
3102
3103  if (spill_fill_data.prev_addr[iter])
3104    {
3105      if (satisfies_constraint_N (disp_rtx))
3106	{
3107	  *spill_fill_data.prev_addr[iter]
3108	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
3109				   gen_rtx_PLUS (DImode,
3110						 spill_fill_data.iter_reg[iter],
3111						 disp_rtx));
3112	  add_reg_note (spill_fill_data.prev_insn[iter],
3113			REG_INC, spill_fill_data.iter_reg[iter]);
3114	}
3115      else
3116	{
3117	  /* ??? Could use register post_modify for loads.  */
3118	  if (!satisfies_constraint_I (disp_rtx))
3119	    {
3120	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3121	      emit_move_insn (tmp, disp_rtx);
3122	      disp_rtx = tmp;
3123	    }
3124	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3125				 spill_fill_data.iter_reg[iter], disp_rtx));
3126	}
3127    }
3128  /* Micro-optimization: if we've created a frame pointer, it's at
3129     CFA 0, which may allow the real iterator to be initialized lower,
3130     slightly increasing parallelism.  Also, if there are few saves
3131     it may eliminate the iterator entirely.  */
3132  else if (disp == 0
3133	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
3134	   && frame_pointer_needed)
3135    {
3136      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
3137      set_mem_alias_set (mem, get_varargs_alias_set ());
3138      return mem;
3139    }
3140  else
3141    {
3142      rtx seq;
3143      rtx_insn *insn;
3144
3145      if (disp == 0)
3146	seq = gen_movdi (spill_fill_data.iter_reg[iter],
3147			 spill_fill_data.init_reg[iter]);
3148      else
3149	{
3150	  start_sequence ();
3151
3152	  if (!satisfies_constraint_I (disp_rtx))
3153	    {
3154	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
3155	      emit_move_insn (tmp, disp_rtx);
3156	      disp_rtx = tmp;
3157	    }
3158
3159	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
3160				 spill_fill_data.init_reg[iter],
3161				 disp_rtx));
3162
3163	  seq = get_insns ();
3164	  end_sequence ();
3165	}
3166
3167      /* Careful for being the first insn in a sequence.  */
3168      if (spill_fill_data.init_after)
3169	insn = emit_insn_after (seq, spill_fill_data.init_after);
3170      else
3171	{
3172	  rtx_insn *first = get_insns ();
3173	  if (first)
3174	    insn = emit_insn_before (seq, first);
3175	  else
3176	    insn = emit_insn (seq);
3177	}
3178      spill_fill_data.init_after = insn;
3179    }
3180
3181  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
3182
3183  /* ??? Not all of the spills are for varargs, but some of them are.
3184     The rest of the spills belong in an alias set of their own.  But
3185     it doesn't actually hurt to include them here.  */
3186  set_mem_alias_set (mem, get_varargs_alias_set ());
3187
3188  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
3189  spill_fill_data.prev_off[iter] = cfa_off;
3190
3191  if (++iter >= spill_fill_data.n_iter)
3192    iter = 0;
3193  spill_fill_data.next_iter = iter;
3194
3195  return mem;
3196}
3197
3198static void
3199do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
3200	  rtx frame_reg)
3201{
3202  int iter = spill_fill_data.next_iter;
3203  rtx mem;
3204  rtx_insn *insn;
3205
3206  mem = spill_restore_mem (reg, cfa_off);
3207  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
3208  spill_fill_data.prev_insn[iter] = insn;
3209
3210  if (frame_reg)
3211    {
3212      rtx base;
3213      HOST_WIDE_INT off;
3214
3215      RTX_FRAME_RELATED_P (insn) = 1;
3216
3217      /* Don't even pretend that the unwind code can intuit its way
3218	 through a pair of interleaved post_modify iterators.  Just
3219	 provide the correct answer.  */
3220
3221      if (frame_pointer_needed)
3222	{
3223	  base = hard_frame_pointer_rtx;
3224	  off = - cfa_off;
3225	}
3226      else
3227	{
3228	  base = stack_pointer_rtx;
3229	  off = current_frame_info.total_size - cfa_off;
3230	}
3231
3232      add_reg_note (insn, REG_CFA_OFFSET,
3233		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
3234					      plus_constant (Pmode,
3235							     base, off)),
3236				 frame_reg));
3237    }
3238}
3239
3240static void
3241do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3242{
3243  int iter = spill_fill_data.next_iter;
3244  rtx_insn *insn;
3245
3246  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3247				GEN_INT (cfa_off)));
3248  spill_fill_data.prev_insn[iter] = insn;
3249}
3250
3251/* Wrapper functions that discards the CONST_INT spill offset.  These
3252   exist so that we can give gr_spill/gr_fill the offset they need and
3253   use a consistent function interface.  */
3254
3255static rtx
3256gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3257{
3258  return gen_movdi (dest, src);
3259}
3260
3261static rtx
3262gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3263{
3264  return gen_fr_spill (dest, src);
3265}
3266
3267static rtx
3268gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3269{
3270  return gen_fr_restore (dest, src);
3271}
3272
3273#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
3274
3275/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
3276#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
3277
3278/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
3279   inclusive.  These are offsets from the current stack pointer.  BS_SIZE
3280   is the size of the backing store.  ??? This clobbers r2 and r3.  */
3281
3282static void
3283ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
3284			     int bs_size)
3285{
3286  rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
3287  rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
3288  rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
3289
3290  /* On the IA-64 there is a second stack in memory, namely the Backing Store
3291     of the Register Stack Engine.  We also need to probe it after checking
3292     that the 2 stacks don't overlap.  */
3293  emit_insn (gen_bsp_value (r3));
3294  emit_move_insn (r2, GEN_INT (-(first + size)));
3295
3296  /* Compare current value of BSP and SP registers.  */
3297  emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
3298					      r3, stack_pointer_rtx)));
3299
3300  /* Compute the address of the probe for the Backing Store (which grows
3301     towards higher addresses).  We probe only at the first offset of
3302     the next page because some OS (eg Linux/ia64) only extend the
3303     backing store when this specific address is hit (but generate a SEGV
3304     on other address).  Page size is the worst case (4KB).  The reserve
3305     size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
3306     Also compute the address of the last probe for the memory stack
3307     (which grows towards lower addresses).  */
3308  emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
3309  emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3310
3311  /* Compare them and raise SEGV if the former has topped the latter.  */
3312  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3313				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3314				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
3315								 r3, r2))));
3316  emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
3317						const0_rtx),
3318			  const0_rtx));
3319  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
3320				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
3321				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
3322						 GEN_INT (11))));
3323
3324  /* Probe the Backing Store if necessary.  */
3325  if (bs_size > 0)
3326    emit_stack_probe (r3);
3327
3328  /* Probe the memory stack if necessary.  */
3329  if (size == 0)
3330    ;
3331
3332  /* See if we have a constant small number of probes to generate.  If so,
3333     that's the easy case.  */
3334  else if (size <= PROBE_INTERVAL)
3335    emit_stack_probe (r2);
3336
3337  /* The run-time loop is made up of 9 insns in the generic case while this
3338     compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
3339  else if (size <= 4 * PROBE_INTERVAL)
3340    {
3341      HOST_WIDE_INT i;
3342
3343      emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
3344      emit_insn (gen_rtx_SET (r2,
3345			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3346      emit_stack_probe (r2);
3347
3348      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
3349	 it exceeds SIZE.  If only two probes are needed, this will not
3350	 generate any code.  Then probe at FIRST + SIZE.  */
3351      for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
3352	{
3353	  emit_insn (gen_rtx_SET (r2,
3354				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
3355	  emit_stack_probe (r2);
3356	}
3357
3358      emit_insn (gen_rtx_SET (r2,
3359			      plus_constant (Pmode, r2,
3360					     (i - PROBE_INTERVAL) - size)));
3361      emit_stack_probe (r2);
3362    }
3363
3364  /* Otherwise, do the same as above, but in a loop.  Note that we must be
3365     extra careful with variables wrapping around because we might be at
3366     the very top (or the very bottom) of the address space and we have
3367     to be able to handle this case properly; in particular, we use an
3368     equality test for the loop condition.  */
3369  else
3370    {
3371      HOST_WIDE_INT rounded_size;
3372
3373      emit_move_insn (r2, GEN_INT (-first));
3374
3375
3376      /* Step 1: round SIZE to the previous multiple of the interval.  */
3377
3378      rounded_size = size & -PROBE_INTERVAL;
3379
3380
3381      /* Step 2: compute initial and final value of the loop counter.  */
3382
3383      /* TEST_ADDR = SP + FIRST.  */
3384      emit_insn (gen_rtx_SET (r2,
3385			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
3386
3387      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
3388      if (rounded_size > (1 << 21))
3389	{
3390	  emit_move_insn (r3, GEN_INT (-rounded_size));
3391	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
3392	}
3393      else
3394        emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
3395						  GEN_INT (-rounded_size))));
3396
3397
3398      /* Step 3: the loop
3399
3400	 do
3401	   {
3402	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
3403	     probe at TEST_ADDR
3404	   }
3405	 while (TEST_ADDR != LAST_ADDR)
3406
3407	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
3408	 until it is equal to ROUNDED_SIZE.  */
3409
3410      emit_insn (gen_probe_stack_range (r2, r2, r3));
3411
3412
3413      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
3414	 that SIZE is equal to ROUNDED_SIZE.  */
3415
3416      /* TEMP = SIZE - ROUNDED_SIZE.  */
3417      if (size != rounded_size)
3418	{
3419	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
3420						     rounded_size - size)));
3421	  emit_stack_probe (r2);
3422	}
3423    }
3424
3425  /* Make sure nothing is scheduled before we are done.  */
3426  emit_insn (gen_blockage ());
3427}
3428
3429/* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
3430   absolute addresses.  */
3431
3432const char *
3433output_probe_stack_range (rtx reg1, rtx reg2)
3434{
3435  static int labelno = 0;
3436  char loop_lab[32];
3437  rtx xops[3];
3438
3439  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
3440
3441  /* Loop.  */
3442  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
3443
3444  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
3445  xops[0] = reg1;
3446  xops[1] = GEN_INT (-PROBE_INTERVAL);
3447  output_asm_insn ("addl %0 = %1, %0", xops);
3448  fputs ("\t;;\n", asm_out_file);
3449
3450  /* Probe at TEST_ADDR.  */
3451  output_asm_insn ("probe.w.fault %0, 0", xops);
3452
3453  /* Test if TEST_ADDR == LAST_ADDR.  */
3454  xops[1] = reg2;
3455  xops[2] = gen_rtx_REG (BImode, PR_REG (6));
3456  output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
3457
3458  /* Branch.  */
3459  fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
3460  assemble_name_raw (asm_out_file, loop_lab);
3461  fputc ('\n', asm_out_file);
3462
3463  return "";
3464}
3465
3466/* Called after register allocation to add any instructions needed for the
3467   prologue.  Using a prologue insn is favored compared to putting all of the
3468   instructions in output_function_prologue(), since it allows the scheduler
3469   to intermix instructions with the saves of the caller saved registers.  In
3470   some cases, it might be necessary to emit a barrier instruction as the last
3471   insn to prevent such scheduling.
3472
3473   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3474   so that the debug info generation code can handle them properly.
3475
3476   The register save area is laid out like so:
3477   cfa+16
3478	[ varargs spill area ]
3479	[ fr register spill area ]
3480	[ br register spill area ]
3481	[ ar register spill area ]
3482	[ pr register spill area ]
3483	[ gr register spill area ] */
3484
3485/* ??? Get inefficient code when the frame size is larger than can fit in an
3486   adds instruction.  */
3487
3488void
3489ia64_expand_prologue (void)
3490{
3491  rtx_insn *insn;
3492  rtx ar_pfs_save_reg, ar_unat_save_reg;
3493  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3494  rtx reg, alt_reg;
3495
3496  ia64_compute_frame_size (get_frame_size ());
3497  last_scratch_gr_reg = 15;
3498
3499  if (flag_stack_usage_info)
3500    current_function_static_stack_size = current_frame_info.total_size;
3501
3502  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
3503      || flag_stack_clash_protection)
3504    {
3505      HOST_WIDE_INT size = current_frame_info.total_size;
3506      int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
3507					  + current_frame_info.n_local_regs);
3508
3509      if (crtl->is_leaf && !cfun->calls_alloca)
3510	{
3511	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
3512	    ia64_emit_probe_stack_range (get_stack_check_protect (),
3513					 size - get_stack_check_protect (),
3514					 bs_size);
3515	  else if (size + bs_size > get_stack_check_protect ())
3516	    ia64_emit_probe_stack_range (get_stack_check_protect (),
3517					 0, bs_size);
3518	}
3519      else if (size + bs_size > 0)
3520	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
3521    }
3522
3523  if (dump_file)
3524    {
3525      fprintf (dump_file, "ia64 frame related registers "
3526               "recorded in current_frame_info.r[]:\n");
3527#define PRINTREG(a) if (current_frame_info.r[a]) \
3528        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3529      PRINTREG(reg_fp);
3530      PRINTREG(reg_save_b0);
3531      PRINTREG(reg_save_pr);
3532      PRINTREG(reg_save_ar_pfs);
3533      PRINTREG(reg_save_ar_unat);
3534      PRINTREG(reg_save_ar_lc);
3535      PRINTREG(reg_save_gp);
3536#undef PRINTREG
3537    }
3538
3539  /* If there is no epilogue, then we don't need some prologue insns.
3540     We need to avoid emitting the dead prologue insns, because flow
3541     will complain about them.  */
3542  if (optimize)
3543    {
3544      edge e;
3545      edge_iterator ei;
3546
3547      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
3548	if ((e->flags & EDGE_FAKE) == 0
3549	    && (e->flags & EDGE_FALLTHRU) != 0)
3550	  break;
3551      epilogue_p = (e != NULL);
3552    }
3553  else
3554    epilogue_p = 1;
3555
3556  /* Set the local, input, and output register names.  We need to do this
3557     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3558     half.  If we use in/loc/out register names, then we get assembler errors
3559     in crtn.S because there is no alloc insn or regstk directive in there.  */
3560  if (! TARGET_REG_NAMES)
3561    {
3562      int inputs = current_frame_info.n_input_regs;
3563      int locals = current_frame_info.n_local_regs;
3564      int outputs = current_frame_info.n_output_regs;
3565
3566      for (i = 0; i < inputs; i++)
3567	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3568      for (i = 0; i < locals; i++)
3569	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3570      for (i = 0; i < outputs; i++)
3571	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3572    }
3573
3574  /* Set the frame pointer register name.  The regnum is logically loc79,
3575     but of course we'll not have allocated that many locals.  Rather than
3576     worrying about renumbering the existing rtxs, we adjust the name.  */
3577  /* ??? This code means that we can never use one local register when
3578     there is a frame pointer.  loc79 gets wasted in this case, as it is
3579     renamed to a register that will never be used.  See also the try_locals
3580     code in find_gr_spill.  */
3581  if (current_frame_info.r[reg_fp])
3582    {
3583      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3584      reg_names[HARD_FRAME_POINTER_REGNUM]
3585	= reg_names[current_frame_info.r[reg_fp]];
3586      reg_names[current_frame_info.r[reg_fp]] = tmp;
3587    }
3588
3589  /* We don't need an alloc instruction if we've used no outputs or locals.  */
3590  if (current_frame_info.n_local_regs == 0
3591      && current_frame_info.n_output_regs == 0
3592      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3593      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3594    {
3595      /* If there is no alloc, but there are input registers used, then we
3596	 need a .regstk directive.  */
3597      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3598      ar_pfs_save_reg = NULL_RTX;
3599    }
3600  else
3601    {
3602      current_frame_info.need_regstk = 0;
3603
3604      if (current_frame_info.r[reg_save_ar_pfs])
3605        {
3606	  regno = current_frame_info.r[reg_save_ar_pfs];
3607	  reg_emitted (reg_save_ar_pfs);
3608	}
3609      else
3610	regno = next_scratch_gr_reg ();
3611      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3612
3613      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3614				   GEN_INT (current_frame_info.n_input_regs),
3615				   GEN_INT (current_frame_info.n_local_regs),
3616				   GEN_INT (current_frame_info.n_output_regs),
3617				   GEN_INT (current_frame_info.n_rotate_regs)));
3618      if (current_frame_info.r[reg_save_ar_pfs])
3619	{
3620	  RTX_FRAME_RELATED_P (insn) = 1;
3621	  add_reg_note (insn, REG_CFA_REGISTER,
3622			gen_rtx_SET (ar_pfs_save_reg,
3623				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
3624	}
3625    }
3626
3627  /* Set up frame pointer, stack pointer, and spill iterators.  */
3628
3629  n_varargs = cfun->machine->n_varargs;
3630  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3631			stack_pointer_rtx, 0);
3632
3633  if (frame_pointer_needed)
3634    {
3635      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3636      RTX_FRAME_RELATED_P (insn) = 1;
3637
3638      /* Force the unwind info to recognize this as defining a new CFA,
3639	 rather than some temp register setup.  */
3640      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
3641    }
3642
3643  if (current_frame_info.total_size != 0)
3644    {
3645      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3646      rtx offset;
3647
3648      if (satisfies_constraint_I (frame_size_rtx))
3649	offset = frame_size_rtx;
3650      else
3651	{
3652	  regno = next_scratch_gr_reg ();
3653	  offset = gen_rtx_REG (DImode, regno);
3654	  emit_move_insn (offset, frame_size_rtx);
3655	}
3656
3657      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3658				    stack_pointer_rtx, offset));
3659
3660      if (! frame_pointer_needed)
3661	{
3662	  RTX_FRAME_RELATED_P (insn) = 1;
3663	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
3664			gen_rtx_SET (stack_pointer_rtx,
3665				     gen_rtx_PLUS (DImode,
3666						   stack_pointer_rtx,
3667						   frame_size_rtx)));
3668	}
3669
3670      /* ??? At this point we must generate a magic insn that appears to
3671	 modify the stack pointer, the frame pointer, and all spill
3672	 iterators.  This would allow the most scheduling freedom.  For
3673	 now, just hard stop.  */
3674      emit_insn (gen_blockage ());
3675    }
3676
3677  /* Must copy out ar.unat before doing any integer spills.  */
3678  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3679    {
3680      if (current_frame_info.r[reg_save_ar_unat])
3681        {
3682	  ar_unat_save_reg
3683	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3684	  reg_emitted (reg_save_ar_unat);
3685	}
3686      else
3687	{
3688	  alt_regno = next_scratch_gr_reg ();
3689	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3690	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3691	}
3692
3693      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3694      insn = emit_move_insn (ar_unat_save_reg, reg);
3695      if (current_frame_info.r[reg_save_ar_unat])
3696	{
3697	  RTX_FRAME_RELATED_P (insn) = 1;
3698	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3699	}
3700
3701      /* Even if we're not going to generate an epilogue, we still
3702	 need to save the register so that EH works.  */
3703      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3704	emit_insn (gen_prologue_use (ar_unat_save_reg));
3705    }
3706  else
3707    ar_unat_save_reg = NULL_RTX;
3708
3709  /* Spill all varargs registers.  Do this before spilling any GR registers,
3710     since we want the UNAT bits for the GR registers to override the UNAT
3711     bits from varargs, which we don't care about.  */
3712
3713  cfa_off = -16;
3714  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3715    {
3716      reg = gen_rtx_REG (DImode, regno);
3717      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3718    }
3719
3720  /* Locate the bottom of the register save area.  */
3721  cfa_off = (current_frame_info.spill_cfa_off
3722	     + current_frame_info.spill_size
3723	     + current_frame_info.extra_spill_size);
3724
3725  /* Save the predicate register block either in a register or in memory.  */
3726  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3727    {
3728      reg = gen_rtx_REG (DImode, PR_REG (0));
3729      if (current_frame_info.r[reg_save_pr] != 0)
3730	{
3731	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3732	  reg_emitted (reg_save_pr);
3733	  insn = emit_move_insn (alt_reg, reg);
3734
3735	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3736	     64 hard registers.  */
3737	  RTX_FRAME_RELATED_P (insn) = 1;
3738	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3739
3740	  /* Even if we're not going to generate an epilogue, we still
3741	     need to save the register so that EH works.  */
3742	  if (! epilogue_p)
3743	    emit_insn (gen_prologue_use (alt_reg));
3744	}
3745      else
3746	{
3747	  alt_regno = next_scratch_gr_reg ();
3748	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3749	  insn = emit_move_insn (alt_reg, reg);
3750	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3751	  cfa_off -= 8;
3752	}
3753    }
3754
3755  /* Handle AR regs in numerical order.  All of them get special handling.  */
3756  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3757      && current_frame_info.r[reg_save_ar_unat] == 0)
3758    {
3759      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3760      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3761      cfa_off -= 8;
3762    }
3763
3764  /* The alloc insn already copied ar.pfs into a general register.  The
3765     only thing we have to do now is copy that register to a stack slot
3766     if we'd not allocated a local register for the job.  */
3767  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3768      && current_frame_info.r[reg_save_ar_pfs] == 0)
3769    {
3770      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3771      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3772      cfa_off -= 8;
3773    }
3774
3775  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3776    {
3777      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3778      if (current_frame_info.r[reg_save_ar_lc] != 0)
3779	{
3780	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3781	  reg_emitted (reg_save_ar_lc);
3782	  insn = emit_move_insn (alt_reg, reg);
3783	  RTX_FRAME_RELATED_P (insn) = 1;
3784	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
3785
3786	  /* Even if we're not going to generate an epilogue, we still
3787	     need to save the register so that EH works.  */
3788	  if (! epilogue_p)
3789	    emit_insn (gen_prologue_use (alt_reg));
3790	}
3791      else
3792	{
3793	  alt_regno = next_scratch_gr_reg ();
3794	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3795	  emit_move_insn (alt_reg, reg);
3796	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3797	  cfa_off -= 8;
3798	}
3799    }
3800
3801  /* Save the return pointer.  */
3802  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3803    {
3804      reg = gen_rtx_REG (DImode, BR_REG (0));
3805      if (current_frame_info.r[reg_save_b0] != 0)
3806	{
3807          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3808          reg_emitted (reg_save_b0);
3809	  insn = emit_move_insn (alt_reg, reg);
3810	  RTX_FRAME_RELATED_P (insn) = 1;
3811	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
3812
3813	  /* Even if we're not going to generate an epilogue, we still
3814	     need to save the register so that EH works.  */
3815	  if (! epilogue_p)
3816	    emit_insn (gen_prologue_use (alt_reg));
3817	}
3818      else
3819	{
3820	  alt_regno = next_scratch_gr_reg ();
3821	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3822	  emit_move_insn (alt_reg, reg);
3823	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3824	  cfa_off -= 8;
3825	}
3826    }
3827
3828  if (current_frame_info.r[reg_save_gp])
3829    {
3830      reg_emitted (reg_save_gp);
3831      insn = emit_move_insn (gen_rtx_REG (DImode,
3832					  current_frame_info.r[reg_save_gp]),
3833			     pic_offset_table_rtx);
3834    }
3835
3836  /* We should now be at the base of the gr/br/fr spill area.  */
3837  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3838			  + current_frame_info.spill_size));
3839
3840  /* Spill all general registers.  */
3841  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3842    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3843      {
3844	reg = gen_rtx_REG (DImode, regno);
3845	do_spill (gen_gr_spill, reg, cfa_off, reg);
3846	cfa_off -= 8;
3847      }
3848
3849  /* Spill the rest of the BR registers.  */
3850  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3851    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3852      {
3853	alt_regno = next_scratch_gr_reg ();
3854	alt_reg = gen_rtx_REG (DImode, alt_regno);
3855	reg = gen_rtx_REG (DImode, regno);
3856	emit_move_insn (alt_reg, reg);
3857	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3858	cfa_off -= 8;
3859      }
3860
3861  /* Align the frame and spill all FR registers.  */
3862  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3863    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3864      {
3865        gcc_assert (!(cfa_off & 15));
3866	reg = gen_rtx_REG (XFmode, regno);
3867	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3868	cfa_off -= 16;
3869      }
3870
3871  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3872
3873  finish_spill_pointers ();
3874}
3875
3876/* Output the textual info surrounding the prologue.  */
3877
3878void
3879ia64_start_function (FILE *file, const char *fnname,
3880		     tree decl ATTRIBUTE_UNUSED)
3881{
3882#if TARGET_ABI_OPEN_VMS
3883  vms_start_function (fnname);
3884#endif
3885
3886  fputs ("\t.proc ", file);
3887  assemble_name (file, fnname);
3888  fputc ('\n', file);
3889  ASM_OUTPUT_LABEL (file, fnname);
3890}
3891
3892/* Called after register allocation to add any instructions needed for the
3893   epilogue.  Using an epilogue insn is favored compared to putting all of the
3894   instructions in output_function_prologue(), since it allows the scheduler
3895   to intermix instructions with the saves of the caller saved registers.  In
3896   some cases, it might be necessary to emit a barrier instruction as the last
3897   insn to prevent such scheduling.  */
3898
3899void
3900ia64_expand_epilogue (int sibcall_p)
3901{
3902  rtx_insn *insn;
3903  rtx reg, alt_reg, ar_unat_save_reg;
3904  int regno, alt_regno, cfa_off;
3905
3906  ia64_compute_frame_size (get_frame_size ());
3907
3908  /* If there is a frame pointer, then we use it instead of the stack
3909     pointer, so that the stack pointer does not need to be valid when
3910     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3911  if (frame_pointer_needed)
3912    setup_spill_pointers (current_frame_info.n_spilled,
3913			  hard_frame_pointer_rtx, 0);
3914  else
3915    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3916			  current_frame_info.total_size);
3917
3918  if (current_frame_info.total_size != 0)
3919    {
3920      /* ??? At this point we must generate a magic insn that appears to
3921         modify the spill iterators and the frame pointer.  This would
3922	 allow the most scheduling freedom.  For now, just hard stop.  */
3923      emit_insn (gen_blockage ());
3924    }
3925
3926  /* Locate the bottom of the register save area.  */
3927  cfa_off = (current_frame_info.spill_cfa_off
3928	     + current_frame_info.spill_size
3929	     + current_frame_info.extra_spill_size);
3930
3931  /* Restore the predicate registers.  */
3932  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3933    {
3934      if (current_frame_info.r[reg_save_pr] != 0)
3935        {
3936	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3937	  reg_emitted (reg_save_pr);
3938	}
3939      else
3940	{
3941	  alt_regno = next_scratch_gr_reg ();
3942	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3943	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3944	  cfa_off -= 8;
3945	}
3946      reg = gen_rtx_REG (DImode, PR_REG (0));
3947      emit_move_insn (reg, alt_reg);
3948    }
3949
3950  /* Restore the application registers.  */
3951
3952  /* Load the saved unat from the stack, but do not restore it until
3953     after the GRs have been restored.  */
3954  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3955    {
3956      if (current_frame_info.r[reg_save_ar_unat] != 0)
3957        {
3958          ar_unat_save_reg
3959	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3960	  reg_emitted (reg_save_ar_unat);
3961	}
3962      else
3963	{
3964	  alt_regno = next_scratch_gr_reg ();
3965	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3966	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3967	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3968	  cfa_off -= 8;
3969	}
3970    }
3971  else
3972    ar_unat_save_reg = NULL_RTX;
3973
3974  if (current_frame_info.r[reg_save_ar_pfs] != 0)
3975    {
3976      reg_emitted (reg_save_ar_pfs);
3977      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3978      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3979      emit_move_insn (reg, alt_reg);
3980    }
3981  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3982    {
3983      alt_regno = next_scratch_gr_reg ();
3984      alt_reg = gen_rtx_REG (DImode, alt_regno);
3985      do_restore (gen_movdi_x, alt_reg, cfa_off);
3986      cfa_off -= 8;
3987      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3988      emit_move_insn (reg, alt_reg);
3989    }
3990
3991  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3992    {
3993      if (current_frame_info.r[reg_save_ar_lc] != 0)
3994        {
3995	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3996          reg_emitted (reg_save_ar_lc);
3997	}
3998      else
3999	{
4000	  alt_regno = next_scratch_gr_reg ();
4001	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4002	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4003	  cfa_off -= 8;
4004	}
4005      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
4006      emit_move_insn (reg, alt_reg);
4007    }
4008
4009  /* Restore the return pointer.  */
4010  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4011    {
4012      if (current_frame_info.r[reg_save_b0] != 0)
4013        {
4014         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4015         reg_emitted (reg_save_b0);
4016        }
4017      else
4018	{
4019	  alt_regno = next_scratch_gr_reg ();
4020	  alt_reg = gen_rtx_REG (DImode, alt_regno);
4021	  do_restore (gen_movdi_x, alt_reg, cfa_off);
4022	  cfa_off -= 8;
4023	}
4024      reg = gen_rtx_REG (DImode, BR_REG (0));
4025      emit_move_insn (reg, alt_reg);
4026    }
4027
4028  /* We should now be at the base of the gr/br/fr spill area.  */
4029  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
4030			  + current_frame_info.spill_size));
4031
4032  /* The GP may be stored on the stack in the prologue, but it's
4033     never restored in the epilogue.  Skip the stack slot.  */
4034  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
4035    cfa_off -= 8;
4036
4037  /* Restore all general registers.  */
4038  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
4039    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4040      {
4041	reg = gen_rtx_REG (DImode, regno);
4042	do_restore (gen_gr_restore, reg, cfa_off);
4043	cfa_off -= 8;
4044      }
4045
4046  /* Restore the branch registers.  */
4047  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
4048    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4049      {
4050	alt_regno = next_scratch_gr_reg ();
4051	alt_reg = gen_rtx_REG (DImode, alt_regno);
4052	do_restore (gen_movdi_x, alt_reg, cfa_off);
4053	cfa_off -= 8;
4054	reg = gen_rtx_REG (DImode, regno);
4055	emit_move_insn (reg, alt_reg);
4056      }
4057
4058  /* Restore floating point registers.  */
4059  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
4060    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4061      {
4062        gcc_assert (!(cfa_off & 15));
4063	reg = gen_rtx_REG (XFmode, regno);
4064	do_restore (gen_fr_restore_x, reg, cfa_off);
4065	cfa_off -= 16;
4066      }
4067
4068  /* Restore ar.unat for real.  */
4069  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
4070    {
4071      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
4072      emit_move_insn (reg, ar_unat_save_reg);
4073    }
4074
4075  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
4076
4077  finish_spill_pointers ();
4078
4079  if (current_frame_info.total_size
4080      || cfun->machine->ia64_eh_epilogue_sp
4081      || frame_pointer_needed)
4082    {
4083      /* ??? At this point we must generate a magic insn that appears to
4084         modify the spill iterators, the stack pointer, and the frame
4085	 pointer.  This would allow the most scheduling freedom.  For now,
4086	 just hard stop.  */
4087      emit_insn (gen_blockage ());
4088    }
4089
4090  if (cfun->machine->ia64_eh_epilogue_sp)
4091    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
4092  else if (frame_pointer_needed)
4093    {
4094      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
4095      RTX_FRAME_RELATED_P (insn) = 1;
4096      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
4097    }
4098  else if (current_frame_info.total_size)
4099    {
4100      rtx offset, frame_size_rtx;
4101
4102      frame_size_rtx = GEN_INT (current_frame_info.total_size);
4103      if (satisfies_constraint_I (frame_size_rtx))
4104	offset = frame_size_rtx;
4105      else
4106	{
4107	  regno = next_scratch_gr_reg ();
4108	  offset = gen_rtx_REG (DImode, regno);
4109	  emit_move_insn (offset, frame_size_rtx);
4110	}
4111
4112      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
4113				    offset));
4114
4115      RTX_FRAME_RELATED_P (insn) = 1;
4116      add_reg_note (insn, REG_CFA_ADJUST_CFA,
4117		    gen_rtx_SET (stack_pointer_rtx,
4118				 gen_rtx_PLUS (DImode,
4119					       stack_pointer_rtx,
4120					       frame_size_rtx)));
4121    }
4122
4123  if (cfun->machine->ia64_eh_epilogue_bsp)
4124    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
4125
4126  if (! sibcall_p)
4127    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
4128  else
4129    {
4130      int fp = GR_REG (2);
4131      /* We need a throw away register here, r0 and r1 are reserved,
4132	 so r2 is the first available call clobbered register.  If
4133	 there was a frame_pointer register, we may have swapped the
4134	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
4135	 sure we're using the string "r2" when emitting the register
4136	 name for the assembler.  */
4137      if (current_frame_info.r[reg_fp]
4138          && current_frame_info.r[reg_fp] == GR_REG (2))
4139	fp = HARD_FRAME_POINTER_REGNUM;
4140
4141      /* We must emit an alloc to force the input registers to become output
4142	 registers.  Otherwise, if the callee tries to pass its parameters
4143	 through to another call without an intervening alloc, then these
4144	 values get lost.  */
4145      /* ??? We don't need to preserve all input registers.  We only need to
4146	 preserve those input registers used as arguments to the sibling call.
4147	 It is unclear how to compute that number here.  */
4148      if (current_frame_info.n_input_regs != 0)
4149	{
4150	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
4151
4152	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
4153				const0_rtx, const0_rtx,
4154				n_inputs, const0_rtx));
4155	  RTX_FRAME_RELATED_P (insn) = 1;
4156
4157	  /* ??? We need to mark the alloc as frame-related so that it gets
4158	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
4159	     But there's nothing dwarf2 related to be done wrt the register
4160	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
4161	     the empty parallel means dwarf2out will not see anything.  */
4162	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
4163			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
4164	}
4165    }
4166}
4167
4168/* Return 1 if br.ret can do all the work required to return from a
4169   function.  */
4170
4171int
4172ia64_direct_return (void)
4173{
4174  if (reload_completed && ! frame_pointer_needed)
4175    {
4176      ia64_compute_frame_size (get_frame_size ());
4177
4178      return (current_frame_info.total_size == 0
4179	      && current_frame_info.n_spilled == 0
4180	      && current_frame_info.r[reg_save_b0] == 0
4181	      && current_frame_info.r[reg_save_pr] == 0
4182	      && current_frame_info.r[reg_save_ar_pfs] == 0
4183	      && current_frame_info.r[reg_save_ar_unat] == 0
4184	      && current_frame_info.r[reg_save_ar_lc] == 0);
4185    }
4186  return 0;
4187}
4188
4189/* Return the magic cookie that we use to hold the return address
4190   during early compilation.  */
4191
4192rtx
4193ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
4194{
4195  if (count != 0)
4196    return NULL;
4197  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
4198}
4199
4200/* Split this value after reload, now that we know where the return
4201   address is saved.  */
4202
4203void
4204ia64_split_return_addr_rtx (rtx dest)
4205{
4206  rtx src;
4207
4208  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
4209    {
4210      if (current_frame_info.r[reg_save_b0] != 0)
4211        {
4212	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
4213	  reg_emitted (reg_save_b0);
4214	}
4215      else
4216	{
4217	  HOST_WIDE_INT off;
4218	  unsigned int regno;
4219	  rtx off_r;
4220
4221	  /* Compute offset from CFA for BR0.  */
4222	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
4223	  off = (current_frame_info.spill_cfa_off
4224		 + current_frame_info.spill_size);
4225	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
4226	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
4227	      off -= 8;
4228
4229	  /* Convert CFA offset to a register based offset.  */
4230	  if (frame_pointer_needed)
4231	    src = hard_frame_pointer_rtx;
4232	  else
4233	    {
4234	      src = stack_pointer_rtx;
4235	      off += current_frame_info.total_size;
4236	    }
4237
4238	  /* Load address into scratch register.  */
4239	  off_r = GEN_INT (off);
4240	  if (satisfies_constraint_I (off_r))
4241	    emit_insn (gen_adddi3 (dest, src, off_r));
4242	  else
4243	    {
4244	      emit_move_insn (dest, off_r);
4245	      emit_insn (gen_adddi3 (dest, src, dest));
4246	    }
4247
4248	  src = gen_rtx_MEM (Pmode, dest);
4249	}
4250    }
4251  else
4252    src = gen_rtx_REG (DImode, BR_REG (0));
4253
4254  emit_move_insn (dest, src);
4255}
4256
4257int
4258ia64_hard_regno_rename_ok (int from, int to)
4259{
4260  /* Don't clobber any of the registers we reserved for the prologue.  */
4261  unsigned int r;
4262
4263  for (r = reg_fp; r <= reg_save_ar_lc; r++)
4264    if (to == current_frame_info.r[r]
4265        || from == current_frame_info.r[r]
4266        || to == emitted_frame_related_regs[r]
4267        || from == emitted_frame_related_regs[r])
4268      return 0;
4269
4270  /* Don't use output registers outside the register frame.  */
4271  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
4272    return 0;
4273
4274  /* Retain even/oddness on predicate register pairs.  */
4275  if (PR_REGNO_P (from) && PR_REGNO_P (to))
4276    return (from & 1) == (to & 1);
4277
4278  return 1;
4279}
4280
4281/* Implement TARGET_HARD_REGNO_NREGS.
4282
4283   ??? We say that BImode PR values require two registers.  This allows us to
4284   easily store the normal and inverted values.  We use CCImode to indicate
4285   a single predicate register.  */
4286
4287static unsigned int
4288ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
4289{
4290  if (regno == PR_REG (0) && mode == DImode)
4291    return 64;
4292  if (PR_REGNO_P (regno) && (mode) == BImode)
4293    return 2;
4294  if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
4295    return 1;
4296  if (FR_REGNO_P (regno) && mode == XFmode)
4297    return 1;
4298  if (FR_REGNO_P (regno) && mode == RFmode)
4299    return 1;
4300  if (FR_REGNO_P (regno) && mode == XCmode)
4301    return 2;
4302  return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
4303}
4304
4305/* Implement TARGET_HARD_REGNO_MODE_OK.  */
4306
4307static bool
4308ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
4309{
4310  if (FR_REGNO_P (regno))
4311    return (GET_MODE_CLASS (mode) != MODE_CC
4312	    && mode != BImode
4313	    && mode != TFmode);
4314
4315  if (PR_REGNO_P (regno))
4316    return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
4317
4318  if (GR_REGNO_P (regno))
4319    return mode != XFmode && mode != XCmode && mode != RFmode;
4320
4321  if (AR_REGNO_P (regno))
4322    return mode == DImode;
4323
4324  if (BR_REGNO_P (regno))
4325    return mode == DImode;
4326
4327  return false;
4328}
4329
4330/* Implement TARGET_MODES_TIEABLE_P.
4331
4332   Don't tie integer and FP modes, as that causes us to get integer registers
4333   allocated for FP instructions.  XFmode only supported in FP registers so
4334   we can't tie it with any other modes.  */
4335
4336static bool
4337ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
4338{
4339  return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
4340	  && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
4341	      == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
4342	  && (mode1 == BImode) == (mode2 == BImode));
4343}
4344
4345/* Target hook for assembling integer objects.  Handle word-sized
4346   aligned objects and detect the cases when @fptr is needed.  */
4347
4348static bool
4349ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
4350{
4351  if (size == POINTER_SIZE / BITS_PER_UNIT
4352      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
4353      && GET_CODE (x) == SYMBOL_REF
4354      && SYMBOL_REF_FUNCTION_P (x))
4355    {
4356      static const char * const directive[2][2] = {
4357	  /* 64-bit pointer */  /* 32-bit pointer */
4358	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
4359	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
4360      };
4361      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
4362      output_addr_const (asm_out_file, x);
4363      fputs (")\n", asm_out_file);
4364      return true;
4365    }
4366  return default_assemble_integer (x, size, aligned_p);
4367}
4368
4369/* Emit the function prologue.  */
4370
4371static void
4372ia64_output_function_prologue (FILE *file)
4373{
4374  int mask, grsave, grsave_prev;
4375
4376  if (current_frame_info.need_regstk)
4377    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
4378	     current_frame_info.n_input_regs,
4379	     current_frame_info.n_local_regs,
4380	     current_frame_info.n_output_regs,
4381	     current_frame_info.n_rotate_regs);
4382
4383  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4384    return;
4385
4386  /* Emit the .prologue directive.  */
4387
4388  mask = 0;
4389  grsave = grsave_prev = 0;
4390  if (current_frame_info.r[reg_save_b0] != 0)
4391    {
4392      mask |= 8;
4393      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
4394    }
4395  if (current_frame_info.r[reg_save_ar_pfs] != 0
4396      && (grsave_prev == 0
4397	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
4398    {
4399      mask |= 4;
4400      if (grsave_prev == 0)
4401	grsave = current_frame_info.r[reg_save_ar_pfs];
4402      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
4403    }
4404  if (current_frame_info.r[reg_fp] != 0
4405      && (grsave_prev == 0
4406	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
4407    {
4408      mask |= 2;
4409      if (grsave_prev == 0)
4410	grsave = HARD_FRAME_POINTER_REGNUM;
4411      grsave_prev = current_frame_info.r[reg_fp];
4412    }
4413  if (current_frame_info.r[reg_save_pr] != 0
4414      && (grsave_prev == 0
4415	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
4416    {
4417      mask |= 1;
4418      if (grsave_prev == 0)
4419	grsave = current_frame_info.r[reg_save_pr];
4420    }
4421
4422  if (mask && TARGET_GNU_AS)
4423    fprintf (file, "\t.prologue %d, %d\n", mask,
4424	     ia64_dbx_register_number (grsave));
4425  else
4426    fputs ("\t.prologue\n", file);
4427
4428  /* Emit a .spill directive, if necessary, to relocate the base of
4429     the register spill area.  */
4430  if (current_frame_info.spill_cfa_off != -16)
4431    fprintf (file, "\t.spill %ld\n",
4432	     (long) (current_frame_info.spill_cfa_off
4433		     + current_frame_info.spill_size));
4434}
4435
4436/* Emit the .body directive at the scheduled end of the prologue.  */
4437
4438static void
4439ia64_output_function_end_prologue (FILE *file)
4440{
4441  if (ia64_except_unwind_info (&global_options) != UI_TARGET)
4442    return;
4443
4444  fputs ("\t.body\n", file);
4445}
4446
4447/* Emit the function epilogue.  */
4448
4449static void
4450ia64_output_function_epilogue (FILE *)
4451{
4452  int i;
4453
4454  if (current_frame_info.r[reg_fp])
4455    {
4456      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
4457      reg_names[HARD_FRAME_POINTER_REGNUM]
4458	= reg_names[current_frame_info.r[reg_fp]];
4459      reg_names[current_frame_info.r[reg_fp]] = tmp;
4460      reg_emitted (reg_fp);
4461    }
4462  if (! TARGET_REG_NAMES)
4463    {
4464      for (i = 0; i < current_frame_info.n_input_regs; i++)
4465	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
4466      for (i = 0; i < current_frame_info.n_local_regs; i++)
4467	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
4468      for (i = 0; i < current_frame_info.n_output_regs; i++)
4469	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
4470    }
4471
4472  current_frame_info.initialized = 0;
4473}
4474
4475int
4476ia64_dbx_register_number (int regno)
4477{
4478  /* In ia64_expand_prologue we quite literally renamed the frame pointer
4479     from its home at loc79 to something inside the register frame.  We
4480     must perform the same renumbering here for the debug info.  */
4481  if (current_frame_info.r[reg_fp])
4482    {
4483      if (regno == HARD_FRAME_POINTER_REGNUM)
4484	regno = current_frame_info.r[reg_fp];
4485      else if (regno == current_frame_info.r[reg_fp])
4486	regno = HARD_FRAME_POINTER_REGNUM;
4487    }
4488
4489  if (IN_REGNO_P (regno))
4490    return 32 + regno - IN_REG (0);
4491  else if (LOC_REGNO_P (regno))
4492    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
4493  else if (OUT_REGNO_P (regno))
4494    return (32 + current_frame_info.n_input_regs
4495	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
4496  else
4497    return regno;
4498}
4499
4500/* Implement TARGET_TRAMPOLINE_INIT.
4501
4502   The trampoline should set the static chain pointer to value placed
4503   into the trampoline and should branch to the specified routine.
4504   To make the normal indirect-subroutine calling convention work,
4505   the trampoline must look like a function descriptor; the first
4506   word being the target address and the second being the target's
4507   global pointer.
4508
4509   We abuse the concept of a global pointer by arranging for it
4510   to point to the data we need to load.  The complete trampoline
4511   has the following form:
4512
4513		+-------------------+ \
4514	TRAMP:	| __ia64_trampoline | |
4515		+-------------------+  > fake function descriptor
4516		| TRAMP+16          | |
4517		+-------------------+ /
4518		| target descriptor |
4519		+-------------------+
4520		| static link	    |
4521		+-------------------+
4522*/
4523
4524static void
4525ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
4526{
4527  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
4528  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
4529
4530  /* The Intel assembler requires that the global __ia64_trampoline symbol
4531     be declared explicitly */
4532  if (!TARGET_GNU_AS)
4533    {
4534      static bool declared_ia64_trampoline = false;
4535
4536      if (!declared_ia64_trampoline)
4537	{
4538	  declared_ia64_trampoline = true;
4539	  (*targetm.asm_out.globalize_label) (asm_out_file,
4540					      "__ia64_trampoline");
4541	}
4542    }
4543
4544  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
4545  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
4546  fnaddr = convert_memory_address (Pmode, fnaddr);
4547  static_chain = convert_memory_address (Pmode, static_chain);
4548
4549  /* Load up our iterator.  */
4550  addr_reg = copy_to_reg (addr);
4551  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4552
4553  /* The first two words are the fake descriptor:
4554     __ia64_trampoline, ADDR+16.  */
4555  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4556  if (TARGET_ABI_OPEN_VMS)
4557    {
4558      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4559	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4560	 relocation against function symbols to make it identical to the
4561	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4562	 strict ELF and dereference to get the bare code address.  */
4563      rtx reg = gen_reg_rtx (Pmode);
4564      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4565      emit_move_insn (reg, tramp);
4566      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4567      tramp = reg;
4568   }
4569  emit_move_insn (m_tramp, tramp);
4570  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4571  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4572
4573  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
4574  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4575  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4576
4577  /* The third word is the target descriptor.  */
4578  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4579  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4580  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4581
4582  /* The fourth word is the static chain.  */
4583  emit_move_insn (m_tramp, static_chain);
4584}
4585
4586/* Do any needed setup for a variadic function.  CUM has not been updated
4587   for the last named argument, which is given by ARG.
4588
4589   We generate the actual spill instructions during prologue generation.  */
4590
4591static void
4592ia64_setup_incoming_varargs (cumulative_args_t cum,
4593			     const function_arg_info &arg,
4594			     int *pretend_size,
4595			     int second_time ATTRIBUTE_UNUSED)
4596{
4597  CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
4598
4599  /* Skip the current argument.  */
4600  ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4601
4602  if (next_cum.words < MAX_ARGUMENT_SLOTS)
4603    {
4604      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4605      *pretend_size = n * UNITS_PER_WORD;
4606      cfun->machine->n_varargs = n;
4607    }
4608}
4609
4610/* Check whether TYPE is a homogeneous floating point aggregate.  If
4611   it is, return the mode of the floating point type that appears
4612   in all leafs.  If it is not, return VOIDmode.
4613
4614   An aggregate is a homogeneous floating point aggregate is if all
4615   fields/elements in it have the same floating point type (e.g,
4616   SFmode).  128-bit quad-precision floats are excluded.
4617
4618   Variable sized aggregates should never arrive here, since we should
4619   have already decided to pass them by reference.  Top-level zero-sized
4620   aggregates are excluded because our parallels crash the middle-end.  */
4621
4622static machine_mode
4623hfa_element_mode (const_tree type, bool nested)
4624{
4625  machine_mode element_mode = VOIDmode;
4626  machine_mode mode;
4627  enum tree_code code = TREE_CODE (type);
4628  int know_element_mode = 0;
4629  tree t;
4630
4631  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4632    return VOIDmode;
4633
4634  switch (code)
4635    {
4636    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4637    case BOOLEAN_TYPE:	case POINTER_TYPE:
4638    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4639    case LANG_TYPE:		case FUNCTION_TYPE:
4640      return VOIDmode;
4641
4642      /* Fortran complex types are supposed to be HFAs, so we need to handle
4643	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4644	 types though.  */
4645    case COMPLEX_TYPE:
4646      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4647	  && TYPE_MODE (type) != TCmode)
4648	return GET_MODE_INNER (TYPE_MODE (type));
4649      else
4650	return VOIDmode;
4651
4652    case REAL_TYPE:
4653      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4654	 mode if this is contained within an aggregate.  */
4655      if (nested && TYPE_MODE (type) != TFmode)
4656	return TYPE_MODE (type);
4657      else
4658	return VOIDmode;
4659
4660    case ARRAY_TYPE:
4661      return hfa_element_mode (TREE_TYPE (type), 1);
4662
4663    case RECORD_TYPE:
4664    case UNION_TYPE:
4665    case QUAL_UNION_TYPE:
4666      for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
4667	{
4668	  if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t))
4669	    continue;
4670
4671	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4672	  if (know_element_mode)
4673	    {
4674	      if (mode != element_mode)
4675		return VOIDmode;
4676	    }
4677	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4678	    return VOIDmode;
4679	  else
4680	    {
4681	      know_element_mode = 1;
4682	      element_mode = mode;
4683	    }
4684	}
4685      return element_mode;
4686
4687    default:
4688      /* If we reach here, we probably have some front-end specific type
4689	 that the backend doesn't know about.  This can happen via the
4690	 aggregate_value_p call in init_function_start.  All we can do is
4691	 ignore unknown tree types.  */
4692      return VOIDmode;
4693    }
4694
4695  return VOIDmode;
4696}
4697
4698/* Return the number of words required to hold a quantity of TYPE and MODE
4699   when passed as an argument.  */
4700static int
4701ia64_function_arg_words (const_tree type, machine_mode mode)
4702{
4703  int words;
4704
4705  if (mode == BLKmode)
4706    words = int_size_in_bytes (type);
4707  else
4708    words = GET_MODE_SIZE (mode);
4709
4710  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4711}
4712
4713/* Return the number of registers that should be skipped so the current
4714   argument (described by TYPE and WORDS) will be properly aligned.
4715
4716   Integer and float arguments larger than 8 bytes start at the next
4717   even boundary.  Aggregates larger than 8 bytes start at the next
4718   even boundary if the aggregate has 16 byte alignment.  Note that
4719   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4720   but are still to be aligned in registers.
4721
4722   ??? The ABI does not specify how to handle aggregates with
4723   alignment from 9 to 15 bytes, or greater than 16.  We handle them
4724   all as if they had 16 byte alignment.  Such aggregates can occur
4725   only if gcc extensions are used.  */
4726static int
4727ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
4728			  const_tree type, int words)
4729{
4730  /* No registers are skipped on VMS.  */
4731  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4732    return 0;
4733
4734  if (type
4735      && TREE_CODE (type) != INTEGER_TYPE
4736      && TREE_CODE (type) != REAL_TYPE)
4737    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4738  else
4739    return words > 1;
4740}
4741
4742/* Return rtx for register where argument is passed, or zero if it is passed
4743   on the stack.  */
4744/* ??? 128-bit quad-precision floats are always passed in general
4745   registers.  */
4746
4747static rtx
4748ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
4749		     bool incoming)
4750{
4751  const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4752
4753  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4754  int words = ia64_function_arg_words (arg.type, arg.mode);
4755  int offset = ia64_function_arg_offset (cum, arg.type, words);
4756  machine_mode hfa_mode = VOIDmode;
4757
4758  /* For OPEN VMS, emit the instruction setting up the argument register here,
4759     when we know this will be together with the other arguments setup related
4760     insns.  This is not the conceptually best place to do this, but this is
4761     the easiest as we have convenient access to cumulative args info.  */
4762
4763  if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ())
4764    {
4765      unsigned HOST_WIDE_INT regval = cum->words;
4766      int i;
4767
4768      for (i = 0; i < 8; i++)
4769	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4770
4771      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4772		      GEN_INT (regval));
4773    }
4774
4775  /* If all argument slots are used, then it must go on the stack.  */
4776  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4777    return 0;
4778
4779  /* On OpenVMS argument is either in Rn or Fn.  */
4780  if (TARGET_ABI_OPEN_VMS)
4781    {
4782      if (FLOAT_MODE_P (arg.mode))
4783	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words);
4784      else
4785	return gen_rtx_REG (arg.mode, basereg + cum->words);
4786    }
4787
4788  /* Check for and handle homogeneous FP aggregates.  */
4789  if (arg.type)
4790    hfa_mode = hfa_element_mode (arg.type, 0);
4791
4792  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4793     and unprototyped hfas are passed specially.  */
4794  if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
4795    {
4796      rtx loc[16];
4797      int i = 0;
4798      int fp_regs = cum->fp_regs;
4799      int int_regs = cum->words + offset;
4800      int hfa_size = GET_MODE_SIZE (hfa_mode);
4801      int byte_size;
4802      int args_byte_size;
4803
4804      /* If prototyped, pass it in FR regs then GR regs.
4805	 If not prototyped, pass it in both FR and GR regs.
4806
4807	 If this is an SFmode aggregate, then it is possible to run out of
4808	 FR regs while GR regs are still left.  In that case, we pass the
4809	 remaining part in the GR regs.  */
4810
4811      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4812	 of the argument, the last FP register, or the last argument slot.  */
4813
4814      byte_size = arg.promoted_size_in_bytes ();
4815      args_byte_size = int_regs * UNITS_PER_WORD;
4816      offset = 0;
4817      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4818	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4819	{
4820	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4821				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4822							      + fp_regs)),
4823				      GEN_INT (offset));
4824	  offset += hfa_size;
4825	  args_byte_size += hfa_size;
4826	  fp_regs++;
4827	}
4828
4829      /* If no prototype, then the whole thing must go in GR regs.  */
4830      if (! cum->prototype)
4831	offset = 0;
4832      /* If this is an SFmode aggregate, then we might have some left over
4833	 that needs to go in GR regs.  */
4834      else if (byte_size != offset)
4835	int_regs += offset / UNITS_PER_WORD;
4836
4837      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4838
4839      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4840	{
4841	  machine_mode gr_mode = DImode;
4842	  unsigned int gr_size;
4843
4844	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4845	     then this goes in a GR reg left adjusted/little endian, right
4846	     adjusted/big endian.  */
4847	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4848	     always right adjusted/little endian.  */
4849	  if (offset & 0x4)
4850	    gr_mode = SImode;
4851	  /* If we have an even 4 byte hunk because the aggregate is a
4852	     multiple of 4 bytes in size, then this goes in a GR reg right
4853	     adjusted/little endian.  */
4854	  else if (byte_size - offset == 4)
4855	    gr_mode = SImode;
4856
4857	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4858				      gen_rtx_REG (gr_mode, (basereg
4859							     + int_regs)),
4860				      GEN_INT (offset));
4861
4862	  gr_size = GET_MODE_SIZE (gr_mode);
4863	  offset += gr_size;
4864	  if (gr_size == UNITS_PER_WORD
4865	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4866	    int_regs++;
4867	  else if (gr_size > UNITS_PER_WORD)
4868	    int_regs += gr_size / UNITS_PER_WORD;
4869	}
4870      return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
4871    }
4872
4873  /* Integral and aggregates go in general registers.  If we have run out of
4874     FR registers, then FP values must also go in general registers.  This can
4875     happen when we have a SFmode HFA.  */
4876  else if (arg.mode == TFmode || arg.mode == TCmode
4877	   || !FLOAT_MODE_P (arg.mode)
4878	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
4879    {
4880      int byte_size = arg.promoted_size_in_bytes ();
4881      if (BYTES_BIG_ENDIAN
4882	  && (arg.mode == BLKmode || arg.aggregate_type_p ())
4883	  && byte_size < UNITS_PER_WORD
4884	  && byte_size > 0)
4885	{
4886	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4887					  gen_rtx_REG (DImode,
4888						       (basereg + cum->words
4889							+ offset)),
4890					  const0_rtx);
4891	  return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
4892	}
4893      else
4894	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4895
4896    }
4897
4898  /* If there is a prototype, then FP values go in a FR register when
4899     named, and in a GR register when unnamed.  */
4900  else if (cum->prototype)
4901    {
4902      if (arg.named)
4903	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs);
4904      /* In big-endian mode, an anonymous SFmode value must be represented
4905         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4906	 the value into the high half of the general register.  */
4907      else if (BYTES_BIG_ENDIAN && arg.mode == SFmode)
4908	return gen_rtx_PARALLEL (arg.mode,
4909		 gen_rtvec (1,
4910                   gen_rtx_EXPR_LIST (VOIDmode,
4911		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4912				      const0_rtx)));
4913      else
4914	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
4915    }
4916  /* If there is no prototype, then FP values go in both FR and GR
4917     registers.  */
4918  else
4919    {
4920      /* See comment above.  */
4921      machine_mode inner_mode =
4922	(BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode;
4923
4924      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4925				      gen_rtx_REG (arg.mode, (FR_ARG_FIRST
4926							  + cum->fp_regs)),
4927				      const0_rtx);
4928      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4929				      gen_rtx_REG (inner_mode,
4930						   (basereg + cum->words
4931						    + offset)),
4932				      const0_rtx);
4933
4934      return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg));
4935    }
4936}
4937
4938/* Implement TARGET_FUNCION_ARG target hook.  */
4939
4940static rtx
4941ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg)
4942{
4943  return ia64_function_arg_1 (cum, arg, false);
4944}
4945
4946/* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
4947
4948static rtx
4949ia64_function_incoming_arg (cumulative_args_t cum,
4950			    const function_arg_info &arg)
4951{
4952  return ia64_function_arg_1 (cum, arg, true);
4953}
4954
4955/* Return number of bytes, at the beginning of the argument, that must be
4956   put in registers.  0 is the argument is entirely in registers or entirely
4957   in memory.  */
4958
4959static int
4960ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
4961{
4962  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4963
4964  int words = ia64_function_arg_words (arg.type, arg.mode);
4965  int offset = ia64_function_arg_offset (cum, arg.type, words);
4966
4967  /* If all argument slots are used, then it must go on the stack.  */
4968  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4969    return 0;
4970
4971  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4972     it fits within the 8 argument slots, then it goes entirely in
4973     registers.  If it extends past the last argument slot, then the rest
4974     goes on the stack.  */
4975
4976  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4977    return 0;
4978
4979  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4980}
4981
4982/* Return ivms_arg_type based on machine_mode.  */
4983
4984static enum ivms_arg_type
4985ia64_arg_type (machine_mode mode)
4986{
4987  switch (mode)
4988    {
4989    case E_SFmode:
4990      return FS;
4991    case E_DFmode:
4992      return FT;
4993    default:
4994      return I64;
4995    }
4996}
4997
4998/* Update CUM to point after this argument.  This is patterned after
4999   ia64_function_arg.  */
5000
5001static void
5002ia64_function_arg_advance (cumulative_args_t cum_v,
5003			   const function_arg_info &arg)
5004{
5005  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5006  int words = ia64_function_arg_words (arg.type, arg.mode);
5007  int offset = ia64_function_arg_offset (cum, arg.type, words);
5008  machine_mode hfa_mode = VOIDmode;
5009
5010  /* If all arg slots are already full, then there is nothing to do.  */
5011  if (cum->words >= MAX_ARGUMENT_SLOTS)
5012    {
5013      cum->words += words + offset;
5014      return;
5015    }
5016
5017  cum->atypes[cum->words] = ia64_arg_type (arg.mode);
5018  cum->words += words + offset;
5019
5020  /* On OpenVMS argument is either in Rn or Fn.  */
5021  if (TARGET_ABI_OPEN_VMS)
5022    {
5023      cum->int_regs = cum->words;
5024      cum->fp_regs = cum->words;
5025      return;
5026    }
5027
5028  /* Check for and handle homogeneous FP aggregates.  */
5029  if (arg.type)
5030    hfa_mode = hfa_element_mode (arg.type, 0);
5031
5032  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
5033     and unprototyped hfas are passed specially.  */
5034  if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
5035    {
5036      int fp_regs = cum->fp_regs;
5037      /* This is the original value of cum->words + offset.  */
5038      int int_regs = cum->words - words;
5039      int hfa_size = GET_MODE_SIZE (hfa_mode);
5040      int byte_size;
5041      int args_byte_size;
5042
5043      /* If prototyped, pass it in FR regs then GR regs.
5044	 If not prototyped, pass it in both FR and GR regs.
5045
5046	 If this is an SFmode aggregate, then it is possible to run out of
5047	 FR regs while GR regs are still left.  In that case, we pass the
5048	 remaining part in the GR regs.  */
5049
5050      /* Fill the FP regs.  We do this always.  We stop if we reach the end
5051	 of the argument, the last FP register, or the last argument slot.  */
5052
5053      byte_size = arg.promoted_size_in_bytes ();
5054      args_byte_size = int_regs * UNITS_PER_WORD;
5055      offset = 0;
5056      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
5057	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
5058	{
5059	  offset += hfa_size;
5060	  args_byte_size += hfa_size;
5061	  fp_regs++;
5062	}
5063
5064      cum->fp_regs = fp_regs;
5065    }
5066
5067  /* Integral and aggregates go in general registers.  So do TFmode FP values.
5068     If we have run out of FR registers, then other FP values must also go in
5069     general registers.  This can happen when we have a SFmode HFA.  */
5070  else if (arg.mode == TFmode || arg.mode == TCmode
5071           || !FLOAT_MODE_P (arg.mode)
5072	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
5073    cum->int_regs = cum->words;
5074
5075  /* If there is a prototype, then FP values go in a FR register when
5076     named, and in a GR register when unnamed.  */
5077  else if (cum->prototype)
5078    {
5079      if (! arg.named)
5080	cum->int_regs = cum->words;
5081      else
5082	/* ??? Complex types should not reach here.  */
5083	cum->fp_regs
5084	  += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5085    }
5086  /* If there is no prototype, then FP values go in both FR and GR
5087     registers.  */
5088  else
5089    {
5090      /* ??? Complex types should not reach here.  */
5091      cum->fp_regs
5092	+= (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
5093      cum->int_regs = cum->words;
5094    }
5095}
5096
5097/* Arguments with alignment larger than 8 bytes start at the next even
5098   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
5099   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
5100
5101static unsigned int
5102ia64_function_arg_boundary (machine_mode mode, const_tree type)
5103{
5104  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
5105    return PARM_BOUNDARY * 2;
5106
5107  if (type)
5108    {
5109      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
5110        return PARM_BOUNDARY * 2;
5111      else
5112        return PARM_BOUNDARY;
5113    }
5114
5115  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
5116    return PARM_BOUNDARY * 2;
5117  else
5118    return PARM_BOUNDARY;
5119}
5120
5121/* True if it is OK to do sibling call optimization for the specified
5122   call expression EXP.  DECL will be the called function, or NULL if
5123   this is an indirect call.  */
5124static bool
5125ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5126{
5127  /* We can't perform a sibcall if the current function has the syscall_linkage
5128     attribute.  */
5129  if (lookup_attribute ("syscall_linkage",
5130			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
5131    return false;
5132
5133  /* We must always return with our current GP.  This means we can
5134     only sibcall to functions defined in the current module unless
5135     TARGET_CONST_GP is set to true.  */
5136  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
5137}
5138
5139
5140/* Implement va_arg.  */
5141
5142static tree
5143ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
5144		      gimple_seq *post_p)
5145{
5146  /* Variable sized types are passed by reference.  */
5147  if (pass_va_arg_by_reference (type))
5148    {
5149      tree ptrtype = build_pointer_type (type);
5150      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
5151      return build_va_arg_indirect_ref (addr);
5152    }
5153
5154  /* Aggregate arguments with alignment larger than 8 bytes start at
5155     the next even boundary.  Integer and floating point arguments
5156     do so if they are larger than 8 bytes, whether or not they are
5157     also aligned larger than 8 bytes.  */
5158  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
5159      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
5160    {
5161      tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
5162      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5163		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
5164      gimplify_assign (unshare_expr (valist), t, pre_p);
5165    }
5166
5167  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5168}
5169
5170/* Return 1 if function return value returned in memory.  Return 0 if it is
5171   in a register.  */
5172
5173static bool
5174ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
5175{
5176  machine_mode mode;
5177  machine_mode hfa_mode;
5178  HOST_WIDE_INT byte_size;
5179
5180  mode = TYPE_MODE (valtype);
5181  byte_size = GET_MODE_SIZE (mode);
5182  if (mode == BLKmode)
5183    {
5184      byte_size = int_size_in_bytes (valtype);
5185      if (byte_size < 0)
5186	return true;
5187    }
5188
5189  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
5190
5191  hfa_mode = hfa_element_mode (valtype, 0);
5192  if (hfa_mode != VOIDmode)
5193    {
5194      int hfa_size = GET_MODE_SIZE (hfa_mode);
5195
5196      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
5197	return true;
5198      else
5199	return false;
5200    }
5201  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
5202    return true;
5203  else
5204    return false;
5205}
5206
5207/* Return rtx for register that holds the function return value.  */
5208
5209static rtx
5210ia64_function_value (const_tree valtype,
5211		     const_tree fn_decl_or_type,
5212		     bool outgoing ATTRIBUTE_UNUSED)
5213{
5214  machine_mode mode;
5215  machine_mode hfa_mode;
5216  int unsignedp;
5217  const_tree func = fn_decl_or_type;
5218
5219  if (fn_decl_or_type
5220      && !DECL_P (fn_decl_or_type))
5221    func = NULL;
5222
5223  mode = TYPE_MODE (valtype);
5224  hfa_mode = hfa_element_mode (valtype, 0);
5225
5226  if (hfa_mode != VOIDmode)
5227    {
5228      rtx loc[8];
5229      int i;
5230      int hfa_size;
5231      int byte_size;
5232      int offset;
5233
5234      hfa_size = GET_MODE_SIZE (hfa_mode);
5235      byte_size = ((mode == BLKmode)
5236		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
5237      offset = 0;
5238      for (i = 0; offset < byte_size; i++)
5239	{
5240	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5241				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
5242				      GEN_INT (offset));
5243	  offset += hfa_size;
5244	}
5245      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5246    }
5247  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
5248    return gen_rtx_REG (mode, FR_ARG_FIRST);
5249  else
5250    {
5251      bool need_parallel = false;
5252
5253      /* In big-endian mode, we need to manage the layout of aggregates
5254	 in the registers so that we get the bits properly aligned in
5255	 the highpart of the registers.  */
5256      if (BYTES_BIG_ENDIAN
5257	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
5258	need_parallel = true;
5259
5260      /* Something like struct S { long double x; char a[0] } is not an
5261	 HFA structure, and therefore doesn't go in fp registers.  But
5262	 the middle-end will give it XFmode anyway, and XFmode values
5263	 don't normally fit in integer registers.  So we need to smuggle
5264	 the value inside a parallel.  */
5265      else if (mode == XFmode || mode == XCmode || mode == RFmode)
5266	need_parallel = true;
5267
5268      if (need_parallel)
5269	{
5270	  rtx loc[8];
5271	  int offset;
5272	  int bytesize;
5273	  int i;
5274
5275	  offset = 0;
5276	  bytesize = int_size_in_bytes (valtype);
5277	  /* An empty PARALLEL is invalid here, but the return value
5278	     doesn't matter for empty structs.  */
5279	  if (bytesize == 0)
5280	    return gen_rtx_REG (mode, GR_RET_FIRST);
5281	  for (i = 0; offset < bytesize; i++)
5282	    {
5283	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
5284					  gen_rtx_REG (DImode,
5285						       GR_RET_FIRST + i),
5286					  GEN_INT (offset));
5287	      offset += UNITS_PER_WORD;
5288	    }
5289	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
5290	}
5291
5292      mode = promote_function_mode (valtype, mode, &unsignedp,
5293                                    func ? TREE_TYPE (func) : NULL_TREE,
5294                                    true);
5295
5296      return gen_rtx_REG (mode, GR_RET_FIRST);
5297    }
5298}
5299
5300/* Worker function for TARGET_LIBCALL_VALUE.  */
5301
5302static rtx
5303ia64_libcall_value (machine_mode mode,
5304		    const_rtx fun ATTRIBUTE_UNUSED)
5305{
5306  return gen_rtx_REG (mode,
5307		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
5308			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5309			&& (mode) != TFmode)
5310		       ? FR_RET_FIRST : GR_RET_FIRST));
5311}
5312
5313/* Worker function for FUNCTION_VALUE_REGNO_P.  */
5314
5315static bool
5316ia64_function_value_regno_p (const unsigned int regno)
5317{
5318  return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
5319          || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
5320}
5321
5322/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5323   We need to emit DTP-relative relocations.  */
5324
5325static void
5326ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
5327{
5328  gcc_assert (size == 4 || size == 8);
5329  if (size == 4)
5330    fputs ("\tdata4.ua\t@dtprel(", file);
5331  else
5332    fputs ("\tdata8.ua\t@dtprel(", file);
5333  output_addr_const (file, x);
5334  fputs (")", file);
5335}
5336
5337/* Print a memory address as an operand to reference that memory location.  */
5338
5339/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
5340   also call this from ia64_print_operand for memory addresses.  */
5341
5342static void
5343ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
5344			    machine_mode /*mode*/,
5345			    rtx address ATTRIBUTE_UNUSED)
5346{
5347}
5348
5349/* Print an operand to an assembler instruction.
5350   C	Swap and print a comparison operator.
5351   D	Print an FP comparison operator.
5352   E    Print 32 - constant, for SImode shifts as extract.
5353   e    Print 64 - constant, for DImode rotates.
5354   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
5355        a floating point register emitted normally.
5356   G	A floating point constant.
5357   I	Invert a predicate register by adding 1.
5358   J    Select the proper predicate register for a condition.
5359   j    Select the inverse predicate register for a condition.
5360   O	Append .acq for volatile load.
5361   P	Postincrement of a MEM.
5362   Q	Append .rel for volatile store.
5363   R	Print .s .d or nothing for a single, double or no truncation.
5364   S	Shift amount for shladd instruction.
5365   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
5366	for Intel assembler.
5367   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
5368	for Intel assembler.
5369   X	A pair of floating point registers.
5370   r	Print register name, or constant 0 as r0.  HP compatibility for
5371	Linux kernel.
5372   v    Print vector constant value as an 8-byte integer value.  */
5373
5374static void
5375ia64_print_operand (FILE * file, rtx x, int code)
5376{
5377  const char *str;
5378
5379  switch (code)
5380    {
5381    case 0:
5382      /* Handled below.  */
5383      break;
5384
5385    case 'C':
5386      {
5387	enum rtx_code c = swap_condition (GET_CODE (x));
5388	fputs (GET_RTX_NAME (c), file);
5389	return;
5390      }
5391
5392    case 'D':
5393      switch (GET_CODE (x))
5394	{
5395	case NE:
5396	  str = "neq";
5397	  break;
5398	case UNORDERED:
5399	  str = "unord";
5400	  break;
5401	case ORDERED:
5402	  str = "ord";
5403	  break;
5404	case UNLT:
5405	  str = "nge";
5406	  break;
5407	case UNLE:
5408	  str = "ngt";
5409	  break;
5410	case UNGT:
5411	  str = "nle";
5412	  break;
5413	case UNGE:
5414	  str = "nlt";
5415	  break;
5416	case UNEQ:
5417	case LTGT:
5418	  gcc_unreachable ();
5419	default:
5420	  str = GET_RTX_NAME (GET_CODE (x));
5421	  break;
5422	}
5423      fputs (str, file);
5424      return;
5425
5426    case 'E':
5427      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
5428      return;
5429
5430    case 'e':
5431      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
5432      return;
5433
5434    case 'F':
5435      if (x == CONST0_RTX (GET_MODE (x)))
5436	str = reg_names [FR_REG (0)];
5437      else if (x == CONST1_RTX (GET_MODE (x)))
5438	str = reg_names [FR_REG (1)];
5439      else
5440	{
5441	  gcc_assert (GET_CODE (x) == REG);
5442	  str = reg_names [REGNO (x)];
5443	}
5444      fputs (str, file);
5445      return;
5446
5447    case 'G':
5448      {
5449	long val[4];
5450	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
5451	if (GET_MODE (x) == SFmode)
5452	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
5453	else if (GET_MODE (x) == DFmode)
5454	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
5455					  & 0xffffffff,
5456					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
5457					  & 0xffffffff);
5458	else
5459	  output_operand_lossage ("invalid %%G mode");
5460      }
5461      return;
5462
5463    case 'I':
5464      fputs (reg_names [REGNO (x) + 1], file);
5465      return;
5466
5467    case 'J':
5468    case 'j':
5469      {
5470	unsigned int regno = REGNO (XEXP (x, 0));
5471	if (GET_CODE (x) == EQ)
5472	  regno += 1;
5473	if (code == 'j')
5474	  regno ^= 1;
5475        fputs (reg_names [regno], file);
5476      }
5477      return;
5478
5479    case 'O':
5480      if (MEM_VOLATILE_P (x))
5481	fputs(".acq", file);
5482      return;
5483
5484    case 'P':
5485      {
5486	HOST_WIDE_INT value;
5487
5488	switch (GET_CODE (XEXP (x, 0)))
5489	  {
5490	  default:
5491	    return;
5492
5493	  case POST_MODIFY:
5494	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
5495	    if (GET_CODE (x) == CONST_INT)
5496	      value = INTVAL (x);
5497	    else
5498	      {
5499		gcc_assert (GET_CODE (x) == REG);
5500		fprintf (file, ", %s", reg_names[REGNO (x)]);
5501		return;
5502	      }
5503	    break;
5504
5505	  case POST_INC:
5506	    value = GET_MODE_SIZE (GET_MODE (x));
5507	    break;
5508
5509	  case POST_DEC:
5510	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
5511	    break;
5512	  }
5513
5514	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
5515	return;
5516      }
5517
5518    case 'Q':
5519      if (MEM_VOLATILE_P (x))
5520	fputs(".rel", file);
5521      return;
5522
5523    case 'R':
5524      if (x == CONST0_RTX (GET_MODE (x)))
5525	fputs(".s", file);
5526      else if (x == CONST1_RTX (GET_MODE (x)))
5527	fputs(".d", file);
5528      else if (x == CONST2_RTX (GET_MODE (x)))
5529	;
5530      else
5531	output_operand_lossage ("invalid %%R value");
5532      return;
5533
5534    case 'S':
5535      fprintf (file, "%d", exact_log2 (INTVAL (x)));
5536      return;
5537
5538    case 'T':
5539      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5540	{
5541	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
5542	  return;
5543	}
5544      break;
5545
5546    case 'U':
5547      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
5548	{
5549	  const char *prefix = "0x";
5550	  if (INTVAL (x) & 0x80000000)
5551	    {
5552	      fprintf (file, "0xffffffff");
5553	      prefix = "";
5554	    }
5555	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
5556	  return;
5557	}
5558      break;
5559
5560    case 'X':
5561      {
5562	unsigned int regno = REGNO (x);
5563	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
5564      }
5565      return;
5566
5567    case 'r':
5568      /* If this operand is the constant zero, write it as register zero.
5569	 Any register, zero, or CONST_INT value is OK here.  */
5570      if (GET_CODE (x) == REG)
5571	fputs (reg_names[REGNO (x)], file);
5572      else if (x == CONST0_RTX (GET_MODE (x)))
5573	fputs ("r0", file);
5574      else if (GET_CODE (x) == CONST_INT)
5575	output_addr_const (file, x);
5576      else
5577	output_operand_lossage ("invalid %%r value");
5578      return;
5579
5580    case 'v':
5581      gcc_assert (GET_CODE (x) == CONST_VECTOR);
5582      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
5583      break;
5584
5585    case '+':
5586      {
5587	const char *which;
5588
5589	/* For conditional branches, returns or calls, substitute
5590	   sptk, dptk, dpnt, or spnt for %s.  */
5591	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
5592	if (x)
5593	  {
5594	    int pred_val = profile_probability::from_reg_br_prob_note
5595				 (XINT (x, 0)).to_reg_br_prob_base ();
5596
5597	    /* Guess top and bottom 10% statically predicted.  */
5598	    if (pred_val < REG_BR_PROB_BASE / 50
5599		&& br_prob_note_reliable_p (x))
5600	      which = ".spnt";
5601	    else if (pred_val < REG_BR_PROB_BASE / 2)
5602	      which = ".dpnt";
5603	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5604		     || !br_prob_note_reliable_p (x))
5605	      which = ".dptk";
5606	    else
5607	      which = ".sptk";
5608	  }
5609	else if (CALL_P (current_output_insn))
5610	  which = ".sptk";
5611	else
5612	  which = ".dptk";
5613
5614	fputs (which, file);
5615	return;
5616      }
5617
5618    case ',':
5619      x = current_insn_predicate;
5620      if (x)
5621	{
5622	  unsigned int regno = REGNO (XEXP (x, 0));
5623	  if (GET_CODE (x) == EQ)
5624	    regno += 1;
5625          fprintf (file, "(%s) ", reg_names [regno]);
5626	}
5627      return;
5628
5629    default:
5630      output_operand_lossage ("ia64_print_operand: unknown code");
5631      return;
5632    }
5633
5634  switch (GET_CODE (x))
5635    {
5636      /* This happens for the spill/restore instructions.  */
5637    case POST_INC:
5638    case POST_DEC:
5639    case POST_MODIFY:
5640      x = XEXP (x, 0);
5641      /* fall through */
5642
5643    case REG:
5644      fputs (reg_names [REGNO (x)], file);
5645      break;
5646
5647    case MEM:
5648      {
5649	rtx addr = XEXP (x, 0);
5650	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5651	  addr = XEXP (addr, 0);
5652	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5653	break;
5654      }
5655
5656    default:
5657      output_addr_const (file, x);
5658      break;
5659    }
5660
5661  return;
5662}
5663
5664/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
5665
5666static bool
5667ia64_print_operand_punct_valid_p (unsigned char code)
5668{
5669  return (code == '+' || code == ',');
5670}
5671
5672/* Compute a (partial) cost for rtx X.  Return true if the complete
5673   cost has been computed, and false if subexpressions should be
5674   scanned.  In either case, *TOTAL contains the cost result.  */
5675/* ??? This is incomplete.  */
5676
5677static bool
5678ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
5679		int opno ATTRIBUTE_UNUSED,
5680		int *total, bool speed ATTRIBUTE_UNUSED)
5681{
5682  int code = GET_CODE (x);
5683
5684  switch (code)
5685    {
5686    case CONST_INT:
5687      switch (outer_code)
5688        {
5689        case SET:
5690	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5691	  return true;
5692        case PLUS:
5693	  if (satisfies_constraint_I (x))
5694	    *total = 0;
5695	  else if (satisfies_constraint_J (x))
5696	    *total = 1;
5697	  else
5698	    *total = COSTS_N_INSNS (1);
5699	  return true;
5700        default:
5701	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5702	    *total = 0;
5703	  else
5704	    *total = COSTS_N_INSNS (1);
5705	  return true;
5706	}
5707
5708    case CONST_DOUBLE:
5709      *total = COSTS_N_INSNS (1);
5710      return true;
5711
5712    case CONST:
5713    case SYMBOL_REF:
5714    case LABEL_REF:
5715      *total = COSTS_N_INSNS (3);
5716      return true;
5717
5718    case FMA:
5719      *total = COSTS_N_INSNS (4);
5720      return true;
5721
5722    case MULT:
5723      /* For multiplies wider than HImode, we have to go to the FPU,
5724         which normally involves copies.  Plus there's the latency
5725         of the multiply itself, and the latency of the instructions to
5726         transfer integer regs to FP regs.  */
5727      if (FLOAT_MODE_P (mode))
5728	*total = COSTS_N_INSNS (4);
5729      else if (GET_MODE_SIZE (mode) > 2)
5730        *total = COSTS_N_INSNS (10);
5731      else
5732	*total = COSTS_N_INSNS (2);
5733      return true;
5734
5735    case PLUS:
5736    case MINUS:
5737      if (FLOAT_MODE_P (mode))
5738	{
5739	  *total = COSTS_N_INSNS (4);
5740	  return true;
5741	}
5742      /* FALLTHRU */
5743
5744    case ASHIFT:
5745    case ASHIFTRT:
5746    case LSHIFTRT:
5747      *total = COSTS_N_INSNS (1);
5748      return true;
5749
5750    case DIV:
5751    case UDIV:
5752    case MOD:
5753    case UMOD:
5754      /* We make divide expensive, so that divide-by-constant will be
5755         optimized to a multiply.  */
5756      *total = COSTS_N_INSNS (60);
5757      return true;
5758
5759    default:
5760      return false;
5761    }
5762}
5763
5764/* Calculate the cost of moving data from a register in class FROM to
5765   one in class TO, using MODE.  */
5766
5767static int
5768ia64_register_move_cost (machine_mode mode, reg_class_t from,
5769			 reg_class_t to)
5770{
5771  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5772  if (to == ADDL_REGS)
5773    to = GR_REGS;
5774  if (from == ADDL_REGS)
5775    from = GR_REGS;
5776
5777  /* All costs are symmetric, so reduce cases by putting the
5778     lower number class as the destination.  */
5779  if (from < to)
5780    {
5781      reg_class_t tmp = to;
5782      to = from, from = tmp;
5783    }
5784
5785  /* Moving from FR<->GR in XFmode must be more expensive than 2,
5786     so that we get secondary memory reloads.  Between FR_REGS,
5787     we have to make this at least as expensive as memory_move_cost
5788     to avoid spectacularly poor register class preferencing.  */
5789  if (mode == XFmode || mode == RFmode)
5790    {
5791      if (to != GR_REGS || from != GR_REGS)
5792        return memory_move_cost (mode, to, false);
5793      else
5794	return 3;
5795    }
5796
5797  switch (to)
5798    {
5799    case PR_REGS:
5800      /* Moving between PR registers takes two insns.  */
5801      if (from == PR_REGS)
5802	return 3;
5803      /* Moving between PR and anything but GR is impossible.  */
5804      if (from != GR_REGS)
5805	return memory_move_cost (mode, to, false);
5806      break;
5807
5808    case BR_REGS:
5809      /* Moving between BR and anything but GR is impossible.  */
5810      if (from != GR_REGS && from != GR_AND_BR_REGS)
5811	return memory_move_cost (mode, to, false);
5812      break;
5813
5814    case AR_I_REGS:
5815    case AR_M_REGS:
5816      /* Moving between AR and anything but GR is impossible.  */
5817      if (from != GR_REGS)
5818	return memory_move_cost (mode, to, false);
5819      break;
5820
5821    case GR_REGS:
5822    case FR_REGS:
5823    case FP_REGS:
5824    case GR_AND_FR_REGS:
5825    case GR_AND_BR_REGS:
5826    case ALL_REGS:
5827      break;
5828
5829    default:
5830      gcc_unreachable ();
5831    }
5832
5833  return 2;
5834}
5835
5836/* Calculate the cost of moving data of MODE from a register to or from
5837   memory.  */
5838
5839static int
5840ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
5841		       reg_class_t rclass,
5842		       bool in ATTRIBUTE_UNUSED)
5843{
5844  if (rclass == GENERAL_REGS
5845      || rclass == FR_REGS
5846      || rclass == FP_REGS
5847      || rclass == GR_AND_FR_REGS)
5848    return 4;
5849  else
5850    return 10;
5851}
5852
5853/* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
5854   on RCLASS to use when copying X into that class.  */
5855
5856static reg_class_t
5857ia64_preferred_reload_class (rtx x, reg_class_t rclass)
5858{
5859  switch (rclass)
5860    {
5861    case FR_REGS:
5862    case FP_REGS:
5863      /* Don't allow volatile mem reloads into floating point registers.
5864	 This is defined to force reload to choose the r/m case instead
5865	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5866      if (MEM_P (x) && MEM_VOLATILE_P (x))
5867	return NO_REGS;
5868
5869      /* Force all unrecognized constants into the constant pool.  */
5870      if (CONSTANT_P (x))
5871	return NO_REGS;
5872      break;
5873
5874    case AR_M_REGS:
5875    case AR_I_REGS:
5876      if (!OBJECT_P (x))
5877	return NO_REGS;
5878      break;
5879
5880    default:
5881      break;
5882    }
5883
5884  return rclass;
5885}
5886
5887/* This function returns the register class required for a secondary
5888   register when copying between one of the registers in RCLASS, and X,
5889   using MODE.  A return value of NO_REGS means that no secondary register
5890   is required.  */
5891
5892enum reg_class
5893ia64_secondary_reload_class (enum reg_class rclass,
5894			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5895{
5896  int regno = -1;
5897
5898  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5899    regno = true_regnum (x);
5900
5901  switch (rclass)
5902    {
5903    case BR_REGS:
5904    case AR_M_REGS:
5905    case AR_I_REGS:
5906      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5907	 interaction.  We end up with two pseudos with overlapping lifetimes
5908	 both of which are equiv to the same constant, and both which need
5909	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5910	 changes depending on the path length, which means the qty_first_reg
5911	 check in make_regs_eqv can give different answers at different times.
5912	 At some point I'll probably need a reload_indi pattern to handle
5913	 this.
5914
5915	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5916	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5917	 non-general registers for good measure.  */
5918      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5919	return GR_REGS;
5920
5921      /* This is needed if a pseudo used as a call_operand gets spilled to a
5922	 stack slot.  */
5923      if (GET_CODE (x) == MEM)
5924	return GR_REGS;
5925      break;
5926
5927    case FR_REGS:
5928    case FP_REGS:
5929      /* Need to go through general registers to get to other class regs.  */
5930      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5931	return GR_REGS;
5932
5933      /* This can happen when a paradoxical subreg is an operand to the
5934	 muldi3 pattern.  */
5935      /* ??? This shouldn't be necessary after instruction scheduling is
5936	 enabled, because paradoxical subregs are not accepted by
5937	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5938	 stop the paradoxical subreg stupidity in the *_operand functions
5939	 in recog.c.  */
5940      if (GET_CODE (x) == MEM
5941	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5942	      || GET_MODE (x) == QImode))
5943	return GR_REGS;
5944
5945      /* This can happen because of the ior/and/etc patterns that accept FP
5946	 registers as operands.  If the third operand is a constant, then it
5947	 needs to be reloaded into a FP register.  */
5948      if (GET_CODE (x) == CONST_INT)
5949	return GR_REGS;
5950
5951      /* This can happen because of register elimination in a muldi3 insn.
5952	 E.g. `26107 * (unsigned long)&u'.  */
5953      if (GET_CODE (x) == PLUS)
5954	return GR_REGS;
5955      break;
5956
5957    case PR_REGS:
5958      /* ??? This happens if we cse/gcse a BImode value across a call,
5959	 and the function has a nonlocal goto.  This is because global
5960	 does not allocate call crossing pseudos to hard registers when
5961	 crtl->has_nonlocal_goto is true.  This is relatively
5962	 common for C++ programs that use exceptions.  To reproduce,
5963	 return NO_REGS and compile libstdc++.  */
5964      if (GET_CODE (x) == MEM)
5965	return GR_REGS;
5966
5967      /* This can happen when we take a BImode subreg of a DImode value,
5968	 and that DImode value winds up in some non-GR register.  */
5969      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5970	return GR_REGS;
5971      break;
5972
5973    default:
5974      break;
5975    }
5976
5977  return NO_REGS;
5978}
5979
5980
5981/* Implement targetm.unspec_may_trap_p hook.  */
5982static int
5983ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5984{
5985  switch (XINT (x, 1))
5986    {
5987    case UNSPEC_LDA:
5988    case UNSPEC_LDS:
5989    case UNSPEC_LDSA:
5990    case UNSPEC_LDCCLR:
5991    case UNSPEC_CHKACLR:
5992    case UNSPEC_CHKS:
5993      /* These unspecs are just wrappers.  */
5994      return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5995    }
5996
5997  return default_unspec_may_trap_p (x, flags);
5998}
5999
6000
6001/* Parse the -mfixed-range= option string.  */
6002
6003static void
6004fix_range (const char *const_str)
6005{
6006  int i, first, last;
6007  char *str, *dash, *comma;
6008
6009  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
6010     REG2 are either register names or register numbers.  The effect
6011     of this option is to mark the registers in the range from REG1 to
6012     REG2 as ``fixed'' so they won't be used by the compiler.  This is
6013     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
6014
6015  i = strlen (const_str);
6016  str = (char *) alloca (i + 1);
6017  memcpy (str, const_str, i + 1);
6018
6019  while (1)
6020    {
6021      dash = strchr (str, '-');
6022      if (!dash)
6023	{
6024	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
6025	  return;
6026	}
6027      *dash = '\0';
6028
6029      comma = strchr (dash + 1, ',');
6030      if (comma)
6031	*comma = '\0';
6032
6033      first = decode_reg_name (str);
6034      if (first < 0)
6035	{
6036	  warning (0, "unknown register name: %s", str);
6037	  return;
6038	}
6039
6040      last = decode_reg_name (dash + 1);
6041      if (last < 0)
6042	{
6043	  warning (0, "unknown register name: %s", dash + 1);
6044	  return;
6045	}
6046
6047      *dash = '-';
6048
6049      if (first > last)
6050	{
6051	  warning (0, "%s-%s is an empty range", str, dash + 1);
6052	  return;
6053	}
6054
6055      for (i = first; i <= last; ++i)
6056	fixed_regs[i] = 1;
6057
6058      if (!comma)
6059	break;
6060
6061      *comma = ',';
6062      str = comma + 1;
6063    }
6064}
6065
6066/* Implement TARGET_OPTION_OVERRIDE.  */
6067
6068static void
6069ia64_option_override (void)
6070{
6071  unsigned int i;
6072  cl_deferred_option *opt;
6073  vec<cl_deferred_option> *v
6074    = (vec<cl_deferred_option> *) ia64_deferred_options;
6075
6076  if (v)
6077    FOR_EACH_VEC_ELT (*v, i, opt)
6078      {
6079	switch (opt->opt_index)
6080	  {
6081	  case OPT_mfixed_range_:
6082	    fix_range (opt->arg);
6083	    break;
6084
6085	  default:
6086	    gcc_unreachable ();
6087	  }
6088      }
6089
6090  if (TARGET_AUTO_PIC)
6091    target_flags |= MASK_CONST_GP;
6092
6093  /* Numerous experiment shows that IRA based loop pressure
6094     calculation works better for RTL loop invariant motion on targets
6095     with enough (>= 32) registers.  It is an expensive optimization.
6096     So it is on only for peak performance.  */
6097  if (optimize >= 3)
6098    flag_ira_loop_pressure = 1;
6099
6100
6101  ia64_section_threshold = (global_options_set.x_g_switch_value
6102			    ? g_switch_value
6103			    : IA64_DEFAULT_GVALUE);
6104
6105  init_machine_status = ia64_init_machine_status;
6106
6107  if (flag_align_functions && !str_align_functions)
6108    str_align_functions = "64";
6109  if (flag_align_loops && !str_align_loops)
6110    str_align_loops = "32";
6111  if (TARGET_ABI_OPEN_VMS)
6112    flag_no_common = 1;
6113
6114  ia64_override_options_after_change();
6115}
6116
6117/* Implement targetm.override_options_after_change.  */
6118
6119static void
6120ia64_override_options_after_change (void)
6121{
6122  if (optimize >= 3
6123      && !global_options_set.x_flag_selective_scheduling
6124      && !global_options_set.x_flag_selective_scheduling2)
6125    {
6126      flag_selective_scheduling2 = 1;
6127      flag_sel_sched_pipelining = 1;
6128    }
6129  if (mflag_sched_control_spec == 2)
6130    {
6131      /* Control speculation is on by default for the selective scheduler,
6132         but not for the Haifa scheduler.  */
6133      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
6134    }
6135  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
6136    {
6137      /* FIXME: remove this when we'd implement breaking autoinsns as
6138         a transformation.  */
6139      flag_auto_inc_dec = 0;
6140    }
6141}
6142
6143/* Initialize the record of emitted frame related registers.  */
6144
6145void ia64_init_expanders (void)
6146{
6147  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
6148}
6149
6150static struct machine_function *
6151ia64_init_machine_status (void)
6152{
6153  return ggc_cleared_alloc<machine_function> ();
6154}
6155
6156static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
6157static enum attr_type ia64_safe_type (rtx_insn *);
6158
6159static enum attr_itanium_class
6160ia64_safe_itanium_class (rtx_insn *insn)
6161{
6162  if (recog_memoized (insn) >= 0)
6163    return get_attr_itanium_class (insn);
6164  else if (DEBUG_INSN_P (insn))
6165    return ITANIUM_CLASS_IGNORE;
6166  else
6167    return ITANIUM_CLASS_UNKNOWN;
6168}
6169
6170static enum attr_type
6171ia64_safe_type (rtx_insn *insn)
6172{
6173  if (recog_memoized (insn) >= 0)
6174    return get_attr_type (insn);
6175  else
6176    return TYPE_UNKNOWN;
6177}
6178
6179/* The following collection of routines emit instruction group stop bits as
6180   necessary to avoid dependencies.  */
6181
6182/* Need to track some additional registers as far as serialization is
6183   concerned so we can properly handle br.call and br.ret.  We could
6184   make these registers visible to gcc, but since these registers are
6185   never explicitly used in gcc generated code, it seems wasteful to
6186   do so (plus it would make the call and return patterns needlessly
6187   complex).  */
6188#define REG_RP		(BR_REG (0))
6189#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
6190/* This is used for volatile asms which may require a stop bit immediately
6191   before and after them.  */
6192#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
6193#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
6194#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
6195
6196/* For each register, we keep track of how it has been written in the
6197   current instruction group.
6198
6199   If a register is written unconditionally (no qualifying predicate),
6200   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
6201
6202   If a register is written if its qualifying predicate P is true, we
6203   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
6204   may be written again by the complement of P (P^1) and when this happens,
6205   WRITE_COUNT gets set to 2.
6206
6207   The result of this is that whenever an insn attempts to write a register
6208   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
6209
6210   If a predicate register is written by a floating-point insn, we set
6211   WRITTEN_BY_FP to true.
6212
6213   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
6214   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
6215
6216#if GCC_VERSION >= 4000
6217#define RWS_FIELD_TYPE __extension__ unsigned short
6218#else
6219#define RWS_FIELD_TYPE unsigned int
6220#endif
6221struct reg_write_state
6222{
6223  RWS_FIELD_TYPE write_count : 2;
6224  RWS_FIELD_TYPE first_pred : 10;
6225  RWS_FIELD_TYPE written_by_fp : 1;
6226  RWS_FIELD_TYPE written_by_and : 1;
6227  RWS_FIELD_TYPE written_by_or : 1;
6228};
6229
6230/* Cumulative info for the current instruction group.  */
6231struct reg_write_state rws_sum[NUM_REGS];
6232#if CHECKING_P
6233/* Bitmap whether a register has been written in the current insn.  */
6234unsigned HOST_WIDEST_FAST_INT rws_insn
6235  [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
6236   / HOST_BITS_PER_WIDEST_FAST_INT];
6237
6238static inline void
6239rws_insn_set (unsigned int regno)
6240{
6241  unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6242  unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6243  gcc_assert (!((rws_insn[elt] >> bit) & 1));
6244  rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit;
6245}
6246
6247static inline int
6248rws_insn_test (unsigned int regno)
6249{
6250  unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
6251  unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
6252  return (rws_insn[elt] >> bit) & 1;
6253}
6254#else
6255/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
6256unsigned char rws_insn[2];
6257
6258static inline void
6259rws_insn_set (int regno)
6260{
6261  if (regno == REG_AR_CFM)
6262    rws_insn[0] = 1;
6263  else if (regno == REG_VOLATILE)
6264    rws_insn[1] = 1;
6265}
6266
6267static inline int
6268rws_insn_test (int regno)
6269{
6270  if (regno == REG_AR_CFM)
6271    return rws_insn[0];
6272  if (regno == REG_VOLATILE)
6273    return rws_insn[1];
6274  return 0;
6275}
6276#endif
6277
6278/* Indicates whether this is the first instruction after a stop bit,
6279   in which case we don't need another stop bit.  Without this,
6280   ia64_variable_issue will die when scheduling an alloc.  */
6281static int first_instruction;
6282
6283/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
6284   RTL for one instruction.  */
6285struct reg_flags
6286{
6287  unsigned int is_write : 1;	/* Is register being written?  */
6288  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
6289  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
6290  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
6291  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
6292  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
6293};
6294
6295static void rws_update (int, struct reg_flags, int);
6296static int rws_access_regno (int, struct reg_flags, int);
6297static int rws_access_reg (rtx, struct reg_flags, int);
6298static void update_set_flags (rtx, struct reg_flags *);
6299static int set_src_needs_barrier (rtx, struct reg_flags, int);
6300static int rtx_needs_barrier (rtx, struct reg_flags, int);
6301static void init_insn_group_barriers (void);
6302static int group_barrier_needed (rtx_insn *);
6303static int safe_group_barrier_needed (rtx_insn *);
6304static int in_safe_group_barrier;
6305
6306/* Update *RWS for REGNO, which is being written by the current instruction,
6307   with predicate PRED, and associated register flags in FLAGS.  */
6308
6309static void
6310rws_update (int regno, struct reg_flags flags, int pred)
6311{
6312  if (pred)
6313    rws_sum[regno].write_count++;
6314  else
6315    rws_sum[regno].write_count = 2;
6316  rws_sum[regno].written_by_fp |= flags.is_fp;
6317  /* ??? Not tracking and/or across differing predicates.  */
6318  rws_sum[regno].written_by_and = flags.is_and;
6319  rws_sum[regno].written_by_or = flags.is_or;
6320  rws_sum[regno].first_pred = pred;
6321}
6322
6323/* Handle an access to register REGNO of type FLAGS using predicate register
6324   PRED.  Update rws_sum array.  Return 1 if this access creates
6325   a dependency with an earlier instruction in the same group.  */
6326
6327static int
6328rws_access_regno (int regno, struct reg_flags flags, int pred)
6329{
6330  int need_barrier = 0;
6331
6332  gcc_assert (regno < NUM_REGS);
6333
6334  if (! PR_REGNO_P (regno))
6335    flags.is_and = flags.is_or = 0;
6336
6337  if (flags.is_write)
6338    {
6339      int write_count;
6340
6341      rws_insn_set (regno);
6342      write_count = rws_sum[regno].write_count;
6343
6344      switch (write_count)
6345	{
6346	case 0:
6347	  /* The register has not been written yet.  */
6348	  if (!in_safe_group_barrier)
6349	    rws_update (regno, flags, pred);
6350	  break;
6351
6352	case 1:
6353	  /* The register has been written via a predicate.  Treat
6354	     it like a unconditional write and do not try to check
6355	     for complementary pred reg in earlier write.  */
6356	  if (flags.is_and && rws_sum[regno].written_by_and)
6357	    ;
6358	  else if (flags.is_or && rws_sum[regno].written_by_or)
6359	    ;
6360	  else
6361	    need_barrier = 1;
6362	  if (!in_safe_group_barrier)
6363	    rws_update (regno, flags, pred);
6364	  break;
6365
6366	case 2:
6367	  /* The register has been unconditionally written already.  We
6368	     need a barrier.  */
6369	  if (flags.is_and && rws_sum[regno].written_by_and)
6370	    ;
6371	  else if (flags.is_or && rws_sum[regno].written_by_or)
6372	    ;
6373	  else
6374	    need_barrier = 1;
6375	  if (!in_safe_group_barrier)
6376	    {
6377	      rws_sum[regno].written_by_and = flags.is_and;
6378	      rws_sum[regno].written_by_or = flags.is_or;
6379	    }
6380	  break;
6381
6382	default:
6383	  gcc_unreachable ();
6384	}
6385    }
6386  else
6387    {
6388      if (flags.is_branch)
6389	{
6390	  /* Branches have several RAW exceptions that allow to avoid
6391	     barriers.  */
6392
6393	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
6394	    /* RAW dependencies on branch regs are permissible as long
6395	       as the writer is a non-branch instruction.  Since we
6396	       never generate code that uses a branch register written
6397	       by a branch instruction, handling this case is
6398	       easy.  */
6399	    return 0;
6400
6401	  if (REGNO_REG_CLASS (regno) == PR_REGS
6402	      && ! rws_sum[regno].written_by_fp)
6403	    /* The predicates of a branch are available within the
6404	       same insn group as long as the predicate was written by
6405	       something other than a floating-point instruction.  */
6406	    return 0;
6407	}
6408
6409      if (flags.is_and && rws_sum[regno].written_by_and)
6410	return 0;
6411      if (flags.is_or && rws_sum[regno].written_by_or)
6412	return 0;
6413
6414      switch (rws_sum[regno].write_count)
6415	{
6416	case 0:
6417	  /* The register has not been written yet.  */
6418	  break;
6419
6420	case 1:
6421	  /* The register has been written via a predicate, assume we
6422	     need a barrier (don't check for complementary regs).  */
6423	  need_barrier = 1;
6424	  break;
6425
6426	case 2:
6427	  /* The register has been unconditionally written already.  We
6428	     need a barrier.  */
6429	  need_barrier = 1;
6430	  break;
6431
6432	default:
6433	  gcc_unreachable ();
6434	}
6435    }
6436
6437  return need_barrier;
6438}
6439
6440static int
6441rws_access_reg (rtx reg, struct reg_flags flags, int pred)
6442{
6443  int regno = REGNO (reg);
6444  int n = REG_NREGS (reg);
6445
6446  if (n == 1)
6447    return rws_access_regno (regno, flags, pred);
6448  else
6449    {
6450      int need_barrier = 0;
6451      while (--n >= 0)
6452	need_barrier |= rws_access_regno (regno + n, flags, pred);
6453      return need_barrier;
6454    }
6455}
6456
6457/* Examine X, which is a SET rtx, and update the flags, the predicate, and
6458   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
6459
6460static void
6461update_set_flags (rtx x, struct reg_flags *pflags)
6462{
6463  rtx src = SET_SRC (x);
6464
6465  switch (GET_CODE (src))
6466    {
6467    case CALL:
6468      return;
6469
6470    case IF_THEN_ELSE:
6471      /* There are four cases here:
6472	 (1) The destination is (pc), in which case this is a branch,
6473	 nothing here applies.
6474	 (2) The destination is ar.lc, in which case this is a
6475	 doloop_end_internal,
6476	 (3) The destination is an fp register, in which case this is
6477	 an fselect instruction.
6478	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
6479	 this is a check load.
6480	 In all cases, nothing we do in this function applies.  */
6481      return;
6482
6483    default:
6484      if (COMPARISON_P (src)
6485	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
6486	/* Set pflags->is_fp to 1 so that we know we're dealing
6487	   with a floating point comparison when processing the
6488	   destination of the SET.  */
6489	pflags->is_fp = 1;
6490
6491      /* Discover if this is a parallel comparison.  We only handle
6492	 and.orcm and or.andcm at present, since we must retain a
6493	 strict inverse on the predicate pair.  */
6494      else if (GET_CODE (src) == AND)
6495	pflags->is_and = 1;
6496      else if (GET_CODE (src) == IOR)
6497	pflags->is_or = 1;
6498
6499      break;
6500    }
6501}
6502
6503/* Subroutine of rtx_needs_barrier; this function determines whether the
6504   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
6505   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
6506   for this insn.  */
6507
6508static int
6509set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
6510{
6511  int need_barrier = 0;
6512  rtx dst;
6513  rtx src = SET_SRC (x);
6514
6515  if (GET_CODE (src) == CALL)
6516    /* We don't need to worry about the result registers that
6517       get written by subroutine call.  */
6518    return rtx_needs_barrier (src, flags, pred);
6519  else if (SET_DEST (x) == pc_rtx)
6520    {
6521      /* X is a conditional branch.  */
6522      /* ??? This seems redundant, as the caller sets this bit for
6523	 all JUMP_INSNs.  */
6524      if (!ia64_spec_check_src_p (src))
6525	flags.is_branch = 1;
6526      return rtx_needs_barrier (src, flags, pred);
6527    }
6528
6529  if (ia64_spec_check_src_p (src))
6530    /* Avoid checking one register twice (in condition
6531       and in 'then' section) for ldc pattern.  */
6532    {
6533      gcc_assert (REG_P (XEXP (src, 2)));
6534      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
6535
6536      /* We process MEM below.  */
6537      src = XEXP (src, 1);
6538    }
6539
6540  need_barrier |= rtx_needs_barrier (src, flags, pred);
6541
6542  dst = SET_DEST (x);
6543  if (GET_CODE (dst) == ZERO_EXTRACT)
6544    {
6545      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
6546      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
6547    }
6548  return need_barrier;
6549}
6550
6551/* Handle an access to rtx X of type FLAGS using predicate register
6552   PRED.  Return 1 if this access creates a dependency with an earlier
6553   instruction in the same group.  */
6554
6555static int
6556rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
6557{
6558  int i, j;
6559  int is_complemented = 0;
6560  int need_barrier = 0;
6561  const char *format_ptr;
6562  struct reg_flags new_flags;
6563  rtx cond;
6564
6565  if (! x)
6566    return 0;
6567
6568  new_flags = flags;
6569
6570  switch (GET_CODE (x))
6571    {
6572    case SET:
6573      update_set_flags (x, &new_flags);
6574      need_barrier = set_src_needs_barrier (x, new_flags, pred);
6575      if (GET_CODE (SET_SRC (x)) != CALL)
6576	{
6577	  new_flags.is_write = 1;
6578	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
6579	}
6580      break;
6581
6582    case CALL:
6583      new_flags.is_write = 0;
6584      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6585
6586      /* Avoid multiple register writes, in case this is a pattern with
6587	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
6588      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
6589	{
6590	  new_flags.is_write = 1;
6591	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
6592	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
6593	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6594	}
6595      break;
6596
6597    case COND_EXEC:
6598      /* X is a predicated instruction.  */
6599
6600      cond = COND_EXEC_TEST (x);
6601      gcc_assert (!pred);
6602      need_barrier = rtx_needs_barrier (cond, flags, 0);
6603
6604      if (GET_CODE (cond) == EQ)
6605	is_complemented = 1;
6606      cond = XEXP (cond, 0);
6607      gcc_assert (GET_CODE (cond) == REG
6608		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
6609      pred = REGNO (cond);
6610      if (is_complemented)
6611	++pred;
6612
6613      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6614      return need_barrier;
6615
6616    case CLOBBER:
6617    case USE:
6618      /* Clobber & use are for earlier compiler-phases only.  */
6619      break;
6620
6621    case ASM_OPERANDS:
6622    case ASM_INPUT:
6623      /* We always emit stop bits for traditional asms.  We emit stop bits
6624	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6625      if (GET_CODE (x) != ASM_OPERANDS
6626	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6627	{
6628	  /* Avoid writing the register multiple times if we have multiple
6629	     asm outputs.  This avoids a failure in rws_access_reg.  */
6630	  if (! rws_insn_test (REG_VOLATILE))
6631	    {
6632	      new_flags.is_write = 1;
6633	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6634	    }
6635	  return 1;
6636	}
6637
6638      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6639	 We cannot just fall through here since then we would be confused
6640	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6641	 traditional asms unlike their normal usage.  */
6642
6643      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6644	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6645	  need_barrier = 1;
6646      break;
6647
6648    case PARALLEL:
6649      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6650	{
6651	  rtx pat = XVECEXP (x, 0, i);
6652	  switch (GET_CODE (pat))
6653	    {
6654	    case SET:
6655	      update_set_flags (pat, &new_flags);
6656	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6657	      break;
6658
6659	    case USE:
6660	    case CALL:
6661	    case ASM_OPERANDS:
6662	    case ASM_INPUT:
6663	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6664	      break;
6665
6666	    case CLOBBER:
6667	      if (REG_P (XEXP (pat, 0))
6668		  && extract_asm_operands (x) != NULL_RTX
6669		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6670		{
6671		  new_flags.is_write = 1;
6672		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6673						     new_flags, pred);
6674		  new_flags = flags;
6675		}
6676	      break;
6677
6678	    case RETURN:
6679	      break;
6680
6681	    default:
6682	      gcc_unreachable ();
6683	    }
6684	}
6685      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6686	{
6687	  rtx pat = XVECEXP (x, 0, i);
6688	  if (GET_CODE (pat) == SET)
6689	    {
6690	      if (GET_CODE (SET_SRC (pat)) != CALL)
6691		{
6692		  new_flags.is_write = 1;
6693		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6694						     pred);
6695		}
6696	    }
6697	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6698	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6699	}
6700      break;
6701
6702    case SUBREG:
6703      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6704      break;
6705    case REG:
6706      if (REGNO (x) == AR_UNAT_REGNUM)
6707	{
6708	  for (i = 0; i < 64; ++i)
6709	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6710	}
6711      else
6712	need_barrier = rws_access_reg (x, flags, pred);
6713      break;
6714
6715    case MEM:
6716      /* Find the regs used in memory address computation.  */
6717      new_flags.is_write = 0;
6718      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6719      break;
6720
6721    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6722    case SYMBOL_REF:  case LABEL_REF:     case CONST:
6723      break;
6724
6725      /* Operators with side-effects.  */
6726    case POST_INC:    case POST_DEC:
6727      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6728
6729      new_flags.is_write = 0;
6730      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6731      new_flags.is_write = 1;
6732      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6733      break;
6734
6735    case POST_MODIFY:
6736      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6737
6738      new_flags.is_write = 0;
6739      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6740      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6741      new_flags.is_write = 1;
6742      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6743      break;
6744
6745      /* Handle common unary and binary ops for efficiency.  */
6746    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6747    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6748    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6749    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6750    case NE:       case EQ:      case GE:      case GT:        case LE:
6751    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6752      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6753      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6754      break;
6755
6756    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6757    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6758    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6759    case SQRT:     case FFS:		case POPCOUNT:
6760      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6761      break;
6762
6763    case VEC_SELECT:
6764      /* VEC_SELECT's second argument is a PARALLEL with integers that
6765	 describe the elements selected.  On ia64, those integers are
6766	 always constants.  Avoid walking the PARALLEL so that we don't
6767	 get confused with "normal" parallels and then die.  */
6768      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6769      break;
6770
6771    case UNSPEC:
6772      switch (XINT (x, 1))
6773	{
6774	case UNSPEC_LTOFF_DTPMOD:
6775	case UNSPEC_LTOFF_DTPREL:
6776	case UNSPEC_DTPREL:
6777	case UNSPEC_LTOFF_TPREL:
6778	case UNSPEC_TPREL:
6779	case UNSPEC_PRED_REL_MUTEX:
6780	case UNSPEC_PIC_CALL:
6781        case UNSPEC_MF:
6782        case UNSPEC_FETCHADD_ACQ:
6783        case UNSPEC_FETCHADD_REL:
6784	case UNSPEC_BSP_VALUE:
6785	case UNSPEC_FLUSHRS:
6786	case UNSPEC_BUNDLE_SELECTOR:
6787          break;
6788
6789	case UNSPEC_GR_SPILL:
6790	case UNSPEC_GR_RESTORE:
6791	  {
6792	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6793	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6794
6795	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6796	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6797	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6798					      new_flags, pred);
6799	    break;
6800	  }
6801
6802	case UNSPEC_FR_SPILL:
6803	case UNSPEC_FR_RESTORE:
6804	case UNSPEC_GETF_EXP:
6805	case UNSPEC_SETF_EXP:
6806        case UNSPEC_ADDP4:
6807	case UNSPEC_FR_SQRT_RECIP_APPROX:
6808	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6809	case UNSPEC_LDA:
6810	case UNSPEC_LDS:
6811	case UNSPEC_LDS_A:
6812	case UNSPEC_LDSA:
6813	case UNSPEC_CHKACLR:
6814        case UNSPEC_CHKS:
6815	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6816	  break;
6817
6818	case UNSPEC_FR_RECIP_APPROX:
6819	case UNSPEC_SHRP:
6820	case UNSPEC_COPYSIGN:
6821	case UNSPEC_FR_RECIP_APPROX_RES:
6822	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6823	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6824	  break;
6825
6826        case UNSPEC_CMPXCHG_ACQ:
6827        case UNSPEC_CMPXCHG_REL:
6828	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6829	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6830	  break;
6831
6832	default:
6833	  gcc_unreachable ();
6834	}
6835      break;
6836
6837    case UNSPEC_VOLATILE:
6838      switch (XINT (x, 1))
6839	{
6840	case UNSPECV_ALLOC:
6841	  /* Alloc must always be the first instruction of a group.
6842	     We force this by always returning true.  */
6843	  /* ??? We might get better scheduling if we explicitly check for
6844	     input/local/output register dependencies, and modify the
6845	     scheduler so that alloc is always reordered to the start of
6846	     the current group.  We could then eliminate all of the
6847	     first_instruction code.  */
6848	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6849
6850	  new_flags.is_write = 1;
6851	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6852	  return 1;
6853
6854	case UNSPECV_SET_BSP:
6855	case UNSPECV_PROBE_STACK_RANGE:
6856	  need_barrier = 1;
6857          break;
6858
6859	case UNSPECV_BLOCKAGE:
6860	case UNSPECV_INSN_GROUP_BARRIER:
6861	case UNSPECV_BREAK:
6862	case UNSPECV_PSAC_ALL:
6863	case UNSPECV_PSAC_NORMAL:
6864	  return 0;
6865
6866	case UNSPECV_PROBE_STACK_ADDRESS:
6867	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6868	  break;
6869
6870	default:
6871	  gcc_unreachable ();
6872	}
6873      break;
6874
6875    case RETURN:
6876      new_flags.is_write = 0;
6877      need_barrier  = rws_access_regno (REG_RP, flags, pred);
6878      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6879
6880      new_flags.is_write = 1;
6881      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6882      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6883      break;
6884
6885    default:
6886      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6887      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6888	switch (format_ptr[i])
6889	  {
6890	  case '0':	/* unused field */
6891	  case 'i':	/* integer */
6892	  case 'n':	/* note */
6893	  case 'w':	/* wide integer */
6894	  case 's':	/* pointer to string */
6895	  case 'S':	/* optional pointer to string */
6896	    break;
6897
6898	  case 'e':
6899	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6900	      need_barrier = 1;
6901	    break;
6902
6903	  case 'E':
6904	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6905	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6906		need_barrier = 1;
6907	    break;
6908
6909	  default:
6910	    gcc_unreachable ();
6911	  }
6912      break;
6913    }
6914  return need_barrier;
6915}
6916
6917/* Clear out the state for group_barrier_needed at the start of a
6918   sequence of insns.  */
6919
6920static void
6921init_insn_group_barriers (void)
6922{
6923  memset (rws_sum, 0, sizeof (rws_sum));
6924  first_instruction = 1;
6925}
6926
6927/* Given the current state, determine whether a group barrier (a stop bit) is
6928   necessary before INSN.  Return nonzero if so.  This modifies the state to
6929   include the effects of INSN as a side-effect.  */
6930
6931static int
6932group_barrier_needed (rtx_insn *insn)
6933{
6934  rtx pat;
6935  int need_barrier = 0;
6936  struct reg_flags flags;
6937
6938  memset (&flags, 0, sizeof (flags));
6939  switch (GET_CODE (insn))
6940    {
6941    case NOTE:
6942    case DEBUG_INSN:
6943      break;
6944
6945    case BARRIER:
6946      /* A barrier doesn't imply an instruction group boundary.  */
6947      break;
6948
6949    case CODE_LABEL:
6950      memset (rws_insn, 0, sizeof (rws_insn));
6951      return 1;
6952
6953    case CALL_INSN:
6954      flags.is_branch = 1;
6955      flags.is_sibcall = SIBLING_CALL_P (insn);
6956      memset (rws_insn, 0, sizeof (rws_insn));
6957
6958      /* Don't bundle a call following another call.  */
6959      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6960	{
6961	  need_barrier = 1;
6962	  break;
6963	}
6964
6965      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6966      break;
6967
6968    case JUMP_INSN:
6969      if (!ia64_spec_check_p (insn))
6970	flags.is_branch = 1;
6971
6972      /* Don't bundle a jump following a call.  */
6973      if ((pat = prev_active_insn (insn)) && CALL_P (pat))
6974	{
6975	  need_barrier = 1;
6976	  break;
6977	}
6978      /* FALLTHRU */
6979
6980    case INSN:
6981      if (GET_CODE (PATTERN (insn)) == USE
6982	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6983	/* Don't care about USE and CLOBBER "insns"---those are used to
6984	   indicate to the optimizer that it shouldn't get rid of
6985	   certain operations.  */
6986	break;
6987
6988      pat = PATTERN (insn);
6989
6990      /* Ug.  Hack hacks hacked elsewhere.  */
6991      switch (recog_memoized (insn))
6992	{
6993	  /* We play dependency tricks with the epilogue in order
6994	     to get proper schedules.  Undo this for dv analysis.  */
6995	case CODE_FOR_epilogue_deallocate_stack:
6996	case CODE_FOR_prologue_allocate_stack:
6997	  pat = XVECEXP (pat, 0, 0);
6998	  break;
6999
7000	  /* The pattern we use for br.cloop confuses the code above.
7001	     The second element of the vector is representative.  */
7002	case CODE_FOR_doloop_end_internal:
7003	  pat = XVECEXP (pat, 0, 1);
7004	  break;
7005
7006	  /* Doesn't generate code.  */
7007	case CODE_FOR_pred_rel_mutex:
7008	case CODE_FOR_prologue_use:
7009	  return 0;
7010
7011	default:
7012	  break;
7013	}
7014
7015      memset (rws_insn, 0, sizeof (rws_insn));
7016      need_barrier = rtx_needs_barrier (pat, flags, 0);
7017
7018      /* Check to see if the previous instruction was a volatile
7019	 asm.  */
7020      if (! need_barrier)
7021	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
7022
7023      break;
7024
7025    default:
7026      gcc_unreachable ();
7027    }
7028
7029  if (first_instruction && important_for_bundling_p (insn))
7030    {
7031      need_barrier = 0;
7032      first_instruction = 0;
7033    }
7034
7035  return need_barrier;
7036}
7037
7038/* Like group_barrier_needed, but do not clobber the current state.  */
7039
7040static int
7041safe_group_barrier_needed (rtx_insn *insn)
7042{
7043  int saved_first_instruction;
7044  int t;
7045
7046  saved_first_instruction = first_instruction;
7047  in_safe_group_barrier = 1;
7048
7049  t = group_barrier_needed (insn);
7050
7051  first_instruction = saved_first_instruction;
7052  in_safe_group_barrier = 0;
7053
7054  return t;
7055}
7056
7057/* Scan the current function and insert stop bits as necessary to
7058   eliminate dependencies.  This function assumes that a final
7059   instruction scheduling pass has been run which has already
7060   inserted most of the necessary stop bits.  This function only
7061   inserts new ones at basic block boundaries, since these are
7062   invisible to the scheduler.  */
7063
7064static void
7065emit_insn_group_barriers (FILE *dump)
7066{
7067  rtx_insn *insn;
7068  rtx_insn *last_label = 0;
7069  int insns_since_last_label = 0;
7070
7071  init_insn_group_barriers ();
7072
7073  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7074    {
7075      if (LABEL_P (insn))
7076	{
7077	  if (insns_since_last_label)
7078	    last_label = insn;
7079	  insns_since_last_label = 0;
7080	}
7081      else if (NOTE_P (insn)
7082	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
7083	{
7084	  if (insns_since_last_label)
7085	    last_label = insn;
7086	  insns_since_last_label = 0;
7087	}
7088      else if (NONJUMP_INSN_P (insn)
7089	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
7090	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
7091	{
7092	  init_insn_group_barriers ();
7093	  last_label = 0;
7094	}
7095      else if (NONDEBUG_INSN_P (insn))
7096	{
7097	  insns_since_last_label = 1;
7098
7099	  if (group_barrier_needed (insn))
7100	    {
7101	      if (last_label)
7102		{
7103		  if (dump)
7104		    fprintf (dump, "Emitting stop before label %d\n",
7105			     INSN_UID (last_label));
7106		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
7107		  insn = last_label;
7108
7109		  init_insn_group_barriers ();
7110		  last_label = 0;
7111		}
7112	    }
7113	}
7114    }
7115}
7116
7117/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
7118   This function has to emit all necessary group barriers.  */
7119
7120static void
7121emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
7122{
7123  rtx_insn *insn;
7124
7125  init_insn_group_barriers ();
7126
7127  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7128    {
7129      if (BARRIER_P (insn))
7130	{
7131	  rtx_insn *last = prev_active_insn (insn);
7132
7133	  if (! last)
7134	    continue;
7135	  if (JUMP_TABLE_DATA_P (last))
7136	    last = prev_active_insn (last);
7137	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
7138	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
7139
7140	  init_insn_group_barriers ();
7141	}
7142      else if (NONDEBUG_INSN_P (insn))
7143	{
7144	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
7145	    init_insn_group_barriers ();
7146	  else if (group_barrier_needed (insn))
7147	    {
7148	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
7149	      init_insn_group_barriers ();
7150	      group_barrier_needed (insn);
7151	    }
7152	}
7153    }
7154}
7155
7156
7157
7158/* Instruction scheduling support.  */
7159
7160#define NR_BUNDLES 10
7161
7162/* A list of names of all available bundles.  */
7163
7164static const char *bundle_name [NR_BUNDLES] =
7165{
7166  ".mii",
7167  ".mmi",
7168  ".mfi",
7169  ".mmf",
7170#if NR_BUNDLES == 10
7171  ".bbb",
7172  ".mbb",
7173#endif
7174  ".mib",
7175  ".mmb",
7176  ".mfb",
7177  ".mlx"
7178};
7179
7180/* Nonzero if we should insert stop bits into the schedule.  */
7181
7182int ia64_final_schedule = 0;
7183
7184/* Codes of the corresponding queried units: */
7185
7186static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
7187static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
7188
7189static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
7190static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
7191
7192static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
7193
7194/* The following variable value is an insn group barrier.  */
7195
7196static rtx_insn *dfa_stop_insn;
7197
7198/* The following variable value is the last issued insn.  */
7199
7200static rtx_insn *last_scheduled_insn;
7201
7202/* The following variable value is pointer to a DFA state used as
7203   temporary variable.  */
7204
7205static state_t temp_dfa_state = NULL;
7206
7207/* The following variable value is DFA state after issuing the last
7208   insn.  */
7209
7210static state_t prev_cycle_state = NULL;
7211
7212/* The following array element values are TRUE if the corresponding
7213   insn requires to add stop bits before it.  */
7214
7215static char *stops_p = NULL;
7216
7217/* The following variable is used to set up the mentioned above array.  */
7218
7219static int stop_before_p = 0;
7220
7221/* The following variable value is length of the arrays `clocks' and
7222   `add_cycles'. */
7223
7224static int clocks_length;
7225
7226/* The following variable value is number of data speculations in progress.  */
7227static int pending_data_specs = 0;
7228
7229/* Number of memory references on current and three future processor cycles.  */
7230static char mem_ops_in_group[4];
7231
7232/* Number of current processor cycle (from scheduler's point of view).  */
7233static int current_cycle;
7234
7235static rtx ia64_single_set (rtx_insn *);
7236static void ia64_emit_insn_before (rtx, rtx_insn *);
7237
7238/* Map a bundle number to its pseudo-op.  */
7239
7240const char *
7241get_bundle_name (int b)
7242{
7243  return bundle_name[b];
7244}
7245
7246
7247/* Return the maximum number of instructions a cpu can issue.  */
7248
7249static int
7250ia64_issue_rate (void)
7251{
7252  return 6;
7253}
7254
7255/* Helper function - like single_set, but look inside COND_EXEC.  */
7256
7257static rtx
7258ia64_single_set (rtx_insn *insn)
7259{
7260  rtx x = PATTERN (insn), ret;
7261  if (GET_CODE (x) == COND_EXEC)
7262    x = COND_EXEC_CODE (x);
7263  if (GET_CODE (x) == SET)
7264    return x;
7265
7266  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
7267     Although they are not classical single set, the second set is there just
7268     to protect it from moving past FP-relative stack accesses.  */
7269  switch (recog_memoized (insn))
7270    {
7271    case CODE_FOR_prologue_allocate_stack:
7272    case CODE_FOR_prologue_allocate_stack_pr:
7273    case CODE_FOR_epilogue_deallocate_stack:
7274    case CODE_FOR_epilogue_deallocate_stack_pr:
7275      ret = XVECEXP (x, 0, 0);
7276      break;
7277
7278    default:
7279      ret = single_set_2 (insn, x);
7280      break;
7281    }
7282
7283  return ret;
7284}
7285
7286/* Adjust the cost of a scheduling dependency.
7287   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
7288   COST is the current cost, DW is dependency weakness.  */
7289static int
7290ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
7291		  int cost, dw_t dw)
7292{
7293  enum reg_note dep_type = (enum reg_note) dep_type1;
7294  enum attr_itanium_class dep_class;
7295  enum attr_itanium_class insn_class;
7296
7297  insn_class = ia64_safe_itanium_class (insn);
7298  dep_class = ia64_safe_itanium_class (dep_insn);
7299
7300  /* Treat true memory dependencies separately.  Ignore apparent true
7301     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
7302  if (dep_type == REG_DEP_TRUE
7303      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
7304      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
7305    return 0;
7306
7307  if (dw == MIN_DEP_WEAK)
7308    /* Store and load are likely to alias, use higher cost to avoid stall.  */
7309    return param_sched_mem_true_dep_cost;
7310  else if (dw > MIN_DEP_WEAK)
7311    {
7312      /* Store and load are less likely to alias.  */
7313      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
7314	/* Assume there will be no cache conflict for floating-point data.
7315	   For integer data, L1 conflict penalty is huge (17 cycles), so we
7316	   never assume it will not cause a conflict.  */
7317	return 0;
7318      else
7319	return cost;
7320    }
7321
7322  if (dep_type != REG_DEP_OUTPUT)
7323    return cost;
7324
7325  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
7326      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
7327    return 0;
7328
7329  return cost;
7330}
7331
7332/* Like emit_insn_before, but skip cycle_display notes.
7333   ??? When cycle display notes are implemented, update this.  */
7334
7335static void
7336ia64_emit_insn_before (rtx insn, rtx_insn *before)
7337{
7338  emit_insn_before (insn, before);
7339}
7340
7341/* The following function marks insns who produce addresses for load
7342   and store insns.  Such insns will be placed into M slots because it
7343   decrease latency time for Itanium1 (see function
7344   `ia64_produce_address_p' and the DFA descriptions).  */
7345
7346static void
7347ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
7348{
7349  rtx_insn *insn, *next, *next_tail;
7350
7351  /* Before reload, which_alternative is not set, which means that
7352     ia64_safe_itanium_class will produce wrong results for (at least)
7353     move instructions.  */
7354  if (!reload_completed)
7355    return;
7356
7357  next_tail = NEXT_INSN (tail);
7358  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7359    if (INSN_P (insn))
7360      insn->call = 0;
7361  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
7362    if (INSN_P (insn)
7363	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
7364      {
7365	sd_iterator_def sd_it;
7366	dep_t dep;
7367	bool has_mem_op_consumer_p = false;
7368
7369	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
7370	  {
7371	    enum attr_itanium_class c;
7372
7373	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
7374	      continue;
7375
7376	    next = DEP_CON (dep);
7377	    c = ia64_safe_itanium_class (next);
7378	    if ((c == ITANIUM_CLASS_ST
7379		 || c == ITANIUM_CLASS_STF)
7380		&& ia64_st_address_bypass_p (insn, next))
7381	      {
7382		has_mem_op_consumer_p = true;
7383		break;
7384	      }
7385	    else if ((c == ITANIUM_CLASS_LD
7386		      || c == ITANIUM_CLASS_FLD
7387		      || c == ITANIUM_CLASS_FLDP)
7388		     && ia64_ld_address_bypass_p (insn, next))
7389	      {
7390		has_mem_op_consumer_p = true;
7391		break;
7392	      }
7393	  }
7394
7395	insn->call = has_mem_op_consumer_p;
7396      }
7397}
7398
7399/* We're beginning a new block.  Initialize data structures as necessary.  */
7400
7401static void
7402ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
7403		 int sched_verbose ATTRIBUTE_UNUSED,
7404		 int max_ready ATTRIBUTE_UNUSED)
7405{
7406  if (flag_checking && !sel_sched_p () && reload_completed)
7407    {
7408      for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
7409	   insn != current_sched_info->next_tail;
7410	   insn = NEXT_INSN (insn))
7411	gcc_assert (!SCHED_GROUP_P (insn));
7412    }
7413  last_scheduled_insn = NULL;
7414  init_insn_group_barriers ();
7415
7416  current_cycle = 0;
7417  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7418}
7419
7420/* We're beginning a scheduling pass.  Check assertion.  */
7421
7422static void
7423ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
7424                        int sched_verbose ATTRIBUTE_UNUSED,
7425                        int max_ready ATTRIBUTE_UNUSED)
7426{
7427  gcc_assert (pending_data_specs == 0);
7428}
7429
7430/* Scheduling pass is now finished.  Free/reset static variable.  */
7431static void
7432ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
7433			  int sched_verbose ATTRIBUTE_UNUSED)
7434{
7435  gcc_assert (pending_data_specs == 0);
7436}
7437
7438/* Return TRUE if INSN is a load (either normal or speculative, but not a
7439   speculation check), FALSE otherwise.  */
7440static bool
7441is_load_p (rtx_insn *insn)
7442{
7443  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7444
7445  return
7446   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
7447    && get_attr_check_load (insn) == CHECK_LOAD_NO);
7448}
7449
7450/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
7451   (taking account for 3-cycle cache reference postponing for stores: Intel
7452   Itanium 2 Reference Manual for Software Development and Optimization,
7453   6.7.3.1).  */
7454static void
7455record_memory_reference (rtx_insn *insn)
7456{
7457  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
7458
7459  switch (insn_class) {
7460    case ITANIUM_CLASS_FLD:
7461    case ITANIUM_CLASS_LD:
7462      mem_ops_in_group[current_cycle % 4]++;
7463      break;
7464    case ITANIUM_CLASS_STF:
7465    case ITANIUM_CLASS_ST:
7466      mem_ops_in_group[(current_cycle + 3) % 4]++;
7467      break;
7468    default:;
7469  }
7470}
7471
7472/* We are about to being issuing insns for this clock cycle.
7473   Override the default sort algorithm to better slot instructions.  */
7474
7475static int
7476ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7477			int *pn_ready, int clock_var,
7478			int reorder_type)
7479{
7480  int n_asms;
7481  int n_ready = *pn_ready;
7482  rtx_insn **e_ready = ready + n_ready;
7483  rtx_insn **insnp;
7484
7485  if (sched_verbose)
7486    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
7487
7488  if (reorder_type == 0)
7489    {
7490      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
7491      n_asms = 0;
7492      for (insnp = ready; insnp < e_ready; insnp++)
7493	if (insnp < e_ready)
7494	  {
7495	    rtx_insn *insn = *insnp;
7496	    enum attr_type t = ia64_safe_type (insn);
7497	    if (t == TYPE_UNKNOWN)
7498	      {
7499		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7500		    || asm_noperands (PATTERN (insn)) >= 0)
7501		  {
7502		    rtx_insn *lowest = ready[n_asms];
7503		    ready[n_asms] = insn;
7504		    *insnp = lowest;
7505		    n_asms++;
7506		  }
7507		else
7508		  {
7509		    rtx_insn *highest = ready[n_ready - 1];
7510		    ready[n_ready - 1] = insn;
7511		    *insnp = highest;
7512		    return 1;
7513		  }
7514	      }
7515	  }
7516
7517      if (n_asms < n_ready)
7518	{
7519	  /* Some normal insns to process.  Skip the asms.  */
7520	  ready += n_asms;
7521	  n_ready -= n_asms;
7522	}
7523      else if (n_ready > 0)
7524	return 1;
7525    }
7526
7527  if (ia64_final_schedule)
7528    {
7529      int deleted = 0;
7530      int nr_need_stop = 0;
7531
7532      for (insnp = ready; insnp < e_ready; insnp++)
7533	if (safe_group_barrier_needed (*insnp))
7534	  nr_need_stop++;
7535
7536      if (reorder_type == 1 && n_ready == nr_need_stop)
7537	return 0;
7538      if (reorder_type == 0)
7539	return 1;
7540      insnp = e_ready;
7541      /* Move down everything that needs a stop bit, preserving
7542	 relative order.  */
7543      while (insnp-- > ready + deleted)
7544	while (insnp >= ready + deleted)
7545	  {
7546	    rtx_insn *insn = *insnp;
7547	    if (! safe_group_barrier_needed (insn))
7548	      break;
7549	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7550	    *ready = insn;
7551	    deleted++;
7552	  }
7553      n_ready -= deleted;
7554      ready += deleted;
7555    }
7556
7557  current_cycle = clock_var;
7558  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
7559    {
7560      int moved = 0;
7561
7562      insnp = e_ready;
7563      /* Move down loads/stores, preserving relative order.  */
7564      while (insnp-- > ready + moved)
7565	while (insnp >= ready + moved)
7566	  {
7567	    rtx_insn *insn = *insnp;
7568	    if (! is_load_p (insn))
7569	      break;
7570	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
7571	    *ready = insn;
7572	    moved++;
7573	  }
7574      n_ready -= moved;
7575      ready += moved;
7576    }
7577
7578  return 1;
7579}
7580
7581/* We are about to being issuing insns for this clock cycle.  Override
7582   the default sort algorithm to better slot instructions.  */
7583
7584static int
7585ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
7586		    int *pn_ready, int clock_var)
7587{
7588  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
7589				 pn_ready, clock_var, 0);
7590}
7591
7592/* Like ia64_sched_reorder, but called after issuing each insn.
7593   Override the default sort algorithm to better slot instructions.  */
7594
7595static int
7596ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
7597		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
7598		     int *pn_ready, int clock_var)
7599{
7600  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
7601				 clock_var, 1);
7602}
7603
7604/* We are about to issue INSN.  Return the number of insns left on the
7605   ready queue that can be issued this cycle.  */
7606
7607static int
7608ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
7609		     int sched_verbose ATTRIBUTE_UNUSED,
7610		     rtx_insn *insn,
7611		     int can_issue_more ATTRIBUTE_UNUSED)
7612{
7613  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
7614    /* Modulo scheduling does not extend h_i_d when emitting
7615       new instructions.  Don't use h_i_d, if we don't have to.  */
7616    {
7617      if (DONE_SPEC (insn) & BEGIN_DATA)
7618	pending_data_specs++;
7619      if (CHECK_SPEC (insn) & BEGIN_DATA)
7620	pending_data_specs--;
7621    }
7622
7623  if (DEBUG_INSN_P (insn))
7624    return 1;
7625
7626  last_scheduled_insn = insn;
7627  memcpy (prev_cycle_state, curr_state, dfa_state_size);
7628  if (reload_completed)
7629    {
7630      int needed = group_barrier_needed (insn);
7631
7632      gcc_assert (!needed);
7633      if (CALL_P (insn))
7634	init_insn_group_barriers ();
7635      stops_p [INSN_UID (insn)] = stop_before_p;
7636      stop_before_p = 0;
7637
7638      record_memory_reference (insn);
7639    }
7640  return 1;
7641}
7642
7643/* We are choosing insn from the ready queue.  Return zero if INSN
7644   can be chosen.  */
7645
7646static int
7647ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
7648{
7649  gcc_assert (insn && INSN_P (insn));
7650
7651  /* Size of ALAT is 32.  As far as we perform conservative
7652     data speculation, we keep ALAT half-empty.  */
7653  if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
7654    return ready_index == 0 ? -1 : 1;
7655
7656  if (ready_index == 0)
7657    return 0;
7658
7659  if ((!reload_completed
7660       || !safe_group_barrier_needed (insn))
7661      && (!mflag_sched_mem_insns_hard_limit
7662	  || !is_load_p (insn)
7663	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
7664    return 0;
7665
7666  return 1;
7667}
7668
7669/* The following variable value is pseudo-insn used by the DFA insn
7670   scheduler to change the DFA state when the simulated clock is
7671   increased.  */
7672
7673static rtx_insn *dfa_pre_cycle_insn;
7674
7675/* Returns 1 when a meaningful insn was scheduled between the last group
7676   barrier and LAST.  */
7677static int
7678scheduled_good_insn (rtx_insn *last)
7679{
7680  if (last && recog_memoized (last) >= 0)
7681    return 1;
7682
7683  for ( ;
7684       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7685       && !stops_p[INSN_UID (last)];
7686       last = PREV_INSN (last))
7687    /* We could hit a NOTE_INSN_DELETED here which is actually outside
7688       the ebb we're scheduling.  */
7689    if (INSN_P (last) && recog_memoized (last) >= 0)
7690      return 1;
7691
7692  return 0;
7693}
7694
7695/* We are about to being issuing INSN.  Return nonzero if we cannot
7696   issue it on given cycle CLOCK and return zero if we should not sort
7697   the ready queue on the next clock start.  */
7698
7699static int
7700ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
7701		    int clock, int *sort_p)
7702{
7703  gcc_assert (insn && INSN_P (insn));
7704
7705  if (DEBUG_INSN_P (insn))
7706    return 0;
7707
7708  /* When a group barrier is needed for insn, last_scheduled_insn
7709     should be set.  */
7710  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7711              || last_scheduled_insn);
7712
7713  if ((reload_completed
7714       && (safe_group_barrier_needed (insn)
7715	   || (mflag_sched_stop_bits_after_every_cycle
7716	       && last_clock != clock
7717	       && last_scheduled_insn
7718	       && scheduled_good_insn (last_scheduled_insn))))
7719      || (last_scheduled_insn
7720	  && (CALL_P (last_scheduled_insn)
7721	      || unknown_for_bundling_p (last_scheduled_insn))))
7722    {
7723      init_insn_group_barriers ();
7724
7725      if (verbose && dump)
7726	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7727		 last_clock == clock ? " + cycle advance" : "");
7728
7729      stop_before_p = 1;
7730      current_cycle = clock;
7731      mem_ops_in_group[current_cycle % 4] = 0;
7732
7733      if (last_clock == clock)
7734	{
7735	  state_transition (curr_state, dfa_stop_insn);
7736	  if (TARGET_EARLY_STOP_BITS)
7737	    *sort_p = (last_scheduled_insn == NULL_RTX
7738		       || ! CALL_P (last_scheduled_insn));
7739	  else
7740	    *sort_p = 0;
7741	  return 1;
7742	}
7743
7744      if (last_scheduled_insn)
7745	{
7746	  if (unknown_for_bundling_p (last_scheduled_insn))
7747	    state_reset (curr_state);
7748	  else
7749	    {
7750	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7751	      state_transition (curr_state, dfa_stop_insn);
7752	      state_transition (curr_state, dfa_pre_cycle_insn);
7753	      state_transition (curr_state, NULL);
7754	    }
7755	}
7756    }
7757  return 0;
7758}
7759
7760/* Implement targetm.sched.h_i_d_extended hook.
7761   Extend internal data structures.  */
7762static void
7763ia64_h_i_d_extended (void)
7764{
7765  if (stops_p != NULL)
7766    {
7767      int new_clocks_length = get_max_uid () * 3 / 2;
7768      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7769      clocks_length = new_clocks_length;
7770    }
7771}
7772
7773
7774/* This structure describes the data used by the backend to guide scheduling.
7775   When the current scheduling point is switched, this data should be saved
7776   and restored later, if the scheduler returns to this point.  */
7777struct _ia64_sched_context
7778{
7779  state_t prev_cycle_state;
7780  rtx_insn *last_scheduled_insn;
7781  struct reg_write_state rws_sum[NUM_REGS];
7782  struct reg_write_state rws_insn[NUM_REGS];
7783  int first_instruction;
7784  int pending_data_specs;
7785  int current_cycle;
7786  char mem_ops_in_group[4];
7787};
7788typedef struct _ia64_sched_context *ia64_sched_context_t;
7789
7790/* Allocates a scheduling context.  */
7791static void *
7792ia64_alloc_sched_context (void)
7793{
7794  return xmalloc (sizeof (struct _ia64_sched_context));
7795}
7796
7797/* Initializes the _SC context with clean data, if CLEAN_P, and from
7798   the global context otherwise.  */
7799static void
7800ia64_init_sched_context (void *_sc, bool clean_p)
7801{
7802  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7803
7804  sc->prev_cycle_state = xmalloc (dfa_state_size);
7805  if (clean_p)
7806    {
7807      state_reset (sc->prev_cycle_state);
7808      sc->last_scheduled_insn = NULL;
7809      memset (sc->rws_sum, 0, sizeof (rws_sum));
7810      memset (sc->rws_insn, 0, sizeof (rws_insn));
7811      sc->first_instruction = 1;
7812      sc->pending_data_specs = 0;
7813      sc->current_cycle = 0;
7814      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7815    }
7816  else
7817    {
7818      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7819      sc->last_scheduled_insn = last_scheduled_insn;
7820      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7821      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7822      sc->first_instruction = first_instruction;
7823      sc->pending_data_specs = pending_data_specs;
7824      sc->current_cycle = current_cycle;
7825      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7826    }
7827}
7828
7829/* Sets the global scheduling context to the one pointed to by _SC.  */
7830static void
7831ia64_set_sched_context (void *_sc)
7832{
7833  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7834
7835  gcc_assert (sc != NULL);
7836
7837  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7838  last_scheduled_insn = sc->last_scheduled_insn;
7839  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7840  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7841  first_instruction = sc->first_instruction;
7842  pending_data_specs = sc->pending_data_specs;
7843  current_cycle = sc->current_cycle;
7844  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7845}
7846
7847/* Clears the data in the _SC scheduling context.  */
7848static void
7849ia64_clear_sched_context (void *_sc)
7850{
7851  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7852
7853  free (sc->prev_cycle_state);
7854  sc->prev_cycle_state = NULL;
7855}
7856
7857/* Frees the _SC scheduling context.  */
7858static void
7859ia64_free_sched_context (void *_sc)
7860{
7861  gcc_assert (_sc != NULL);
7862
7863  free (_sc);
7864}
7865
7866typedef rtx (* gen_func_t) (rtx, rtx);
7867
7868/* Return a function that will generate a load of mode MODE_NO
7869   with speculation types TS.  */
7870static gen_func_t
7871get_spec_load_gen_function (ds_t ts, int mode_no)
7872{
7873  static gen_func_t gen_ld_[] = {
7874    gen_movbi,
7875    gen_movqi_internal,
7876    gen_movhi_internal,
7877    gen_movsi_internal,
7878    gen_movdi_internal,
7879    gen_movsf_internal,
7880    gen_movdf_internal,
7881    gen_movxf_internal,
7882    gen_movti_internal,
7883    gen_zero_extendqidi2,
7884    gen_zero_extendhidi2,
7885    gen_zero_extendsidi2,
7886  };
7887
7888  static gen_func_t gen_ld_a[] = {
7889    gen_movbi_advanced,
7890    gen_movqi_advanced,
7891    gen_movhi_advanced,
7892    gen_movsi_advanced,
7893    gen_movdi_advanced,
7894    gen_movsf_advanced,
7895    gen_movdf_advanced,
7896    gen_movxf_advanced,
7897    gen_movti_advanced,
7898    gen_zero_extendqidi2_advanced,
7899    gen_zero_extendhidi2_advanced,
7900    gen_zero_extendsidi2_advanced,
7901  };
7902  static gen_func_t gen_ld_s[] = {
7903    gen_movbi_speculative,
7904    gen_movqi_speculative,
7905    gen_movhi_speculative,
7906    gen_movsi_speculative,
7907    gen_movdi_speculative,
7908    gen_movsf_speculative,
7909    gen_movdf_speculative,
7910    gen_movxf_speculative,
7911    gen_movti_speculative,
7912    gen_zero_extendqidi2_speculative,
7913    gen_zero_extendhidi2_speculative,
7914    gen_zero_extendsidi2_speculative,
7915  };
7916  static gen_func_t gen_ld_sa[] = {
7917    gen_movbi_speculative_advanced,
7918    gen_movqi_speculative_advanced,
7919    gen_movhi_speculative_advanced,
7920    gen_movsi_speculative_advanced,
7921    gen_movdi_speculative_advanced,
7922    gen_movsf_speculative_advanced,
7923    gen_movdf_speculative_advanced,
7924    gen_movxf_speculative_advanced,
7925    gen_movti_speculative_advanced,
7926    gen_zero_extendqidi2_speculative_advanced,
7927    gen_zero_extendhidi2_speculative_advanced,
7928    gen_zero_extendsidi2_speculative_advanced,
7929  };
7930  static gen_func_t gen_ld_s_a[] = {
7931    gen_movbi_speculative_a,
7932    gen_movqi_speculative_a,
7933    gen_movhi_speculative_a,
7934    gen_movsi_speculative_a,
7935    gen_movdi_speculative_a,
7936    gen_movsf_speculative_a,
7937    gen_movdf_speculative_a,
7938    gen_movxf_speculative_a,
7939    gen_movti_speculative_a,
7940    gen_zero_extendqidi2_speculative_a,
7941    gen_zero_extendhidi2_speculative_a,
7942    gen_zero_extendsidi2_speculative_a,
7943  };
7944
7945  gen_func_t *gen_ld;
7946
7947  if (ts & BEGIN_DATA)
7948    {
7949      if (ts & BEGIN_CONTROL)
7950	gen_ld = gen_ld_sa;
7951      else
7952	gen_ld = gen_ld_a;
7953    }
7954  else if (ts & BEGIN_CONTROL)
7955    {
7956      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7957	  || ia64_needs_block_p (ts))
7958	gen_ld = gen_ld_s;
7959      else
7960	gen_ld = gen_ld_s_a;
7961    }
7962  else if (ts == 0)
7963    gen_ld = gen_ld_;
7964  else
7965    gcc_unreachable ();
7966
7967  return gen_ld[mode_no];
7968}
7969
7970/* Constants that help mapping 'machine_mode' to int.  */
7971enum SPEC_MODES
7972  {
7973    SPEC_MODE_INVALID = -1,
7974    SPEC_MODE_FIRST = 0,
7975    SPEC_MODE_FOR_EXTEND_FIRST = 1,
7976    SPEC_MODE_FOR_EXTEND_LAST = 3,
7977    SPEC_MODE_LAST = 8
7978  };
7979
7980enum
7981  {
7982    /* Offset to reach ZERO_EXTEND patterns.  */
7983    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7984  };
7985
7986/* Return index of the MODE.  */
7987static int
7988ia64_mode_to_int (machine_mode mode)
7989{
7990  switch (mode)
7991    {
7992    case E_BImode: return 0; /* SPEC_MODE_FIRST  */
7993    case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7994    case E_HImode: return 2;
7995    case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7996    case E_DImode: return 4;
7997    case E_SFmode: return 5;
7998    case E_DFmode: return 6;
7999    case E_XFmode: return 7;
8000    case E_TImode:
8001      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
8002	 mentioned in itanium[12].md.  Predicate fp_register_operand also
8003	 needs to be defined.  Bottom line: better disable for now.  */
8004      return SPEC_MODE_INVALID;
8005    default:     return SPEC_MODE_INVALID;
8006    }
8007}
8008
8009/* Provide information about speculation capabilities.  */
8010static void
8011ia64_set_sched_flags (spec_info_t spec_info)
8012{
8013  unsigned int *flags = &(current_sched_info->flags);
8014
8015  if (*flags & SCHED_RGN
8016      || *flags & SCHED_EBB
8017      || *flags & SEL_SCHED)
8018    {
8019      int mask = 0;
8020
8021      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
8022          || (mflag_sched_ar_data_spec && reload_completed))
8023	{
8024	  mask |= BEGIN_DATA;
8025
8026	  if (!sel_sched_p ()
8027	      && ((mflag_sched_br_in_data_spec && !reload_completed)
8028		  || (mflag_sched_ar_in_data_spec && reload_completed)))
8029	    mask |= BE_IN_DATA;
8030	}
8031
8032      if (mflag_sched_control_spec
8033          && (!sel_sched_p ()
8034	      || reload_completed))
8035	{
8036	  mask |= BEGIN_CONTROL;
8037
8038	  if (!sel_sched_p () && mflag_sched_in_control_spec)
8039	    mask |= BE_IN_CONTROL;
8040	}
8041
8042      spec_info->mask = mask;
8043
8044      if (mask)
8045	{
8046	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
8047
8048	  if (mask & BE_IN_SPEC)
8049	    *flags |= NEW_BBS;
8050
8051	  spec_info->flags = 0;
8052
8053	  if ((mask & CONTROL_SPEC)
8054	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
8055	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
8056
8057	  if (sched_verbose >= 1)
8058	    spec_info->dump = sched_dump;
8059	  else
8060	    spec_info->dump = 0;
8061
8062	  if (mflag_sched_count_spec_in_critical_path)
8063	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
8064	}
8065    }
8066  else
8067    spec_info->mask = 0;
8068}
8069
8070/* If INSN is an appropriate load return its mode.
8071   Return -1 otherwise.  */
8072static int
8073get_mode_no_for_insn (rtx_insn *insn)
8074{
8075  rtx reg, mem, mode_rtx;
8076  int mode_no;
8077  bool extend_p;
8078
8079  extract_insn_cached (insn);
8080
8081  /* We use WHICH_ALTERNATIVE only after reload.  This will
8082     guarantee that reload won't touch a speculative insn.  */
8083
8084  if (recog_data.n_operands != 2)
8085    return -1;
8086
8087  reg = recog_data.operand[0];
8088  mem = recog_data.operand[1];
8089
8090  /* We should use MEM's mode since REG's mode in presence of
8091     ZERO_EXTEND will always be DImode.  */
8092  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
8093    /* Process non-speculative ld.  */
8094    {
8095      if (!reload_completed)
8096	{
8097	  /* Do not speculate into regs like ar.lc.  */
8098	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
8099	    return -1;
8100
8101	  if (!MEM_P (mem))
8102	    return -1;
8103
8104	  {
8105	    rtx mem_reg = XEXP (mem, 0);
8106
8107	    if (!REG_P (mem_reg))
8108	      return -1;
8109	  }
8110
8111	  mode_rtx = mem;
8112	}
8113      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
8114	{
8115	  gcc_assert (REG_P (reg) && MEM_P (mem));
8116	  mode_rtx = mem;
8117	}
8118      else
8119	return -1;
8120    }
8121  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
8122	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
8123	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
8124    /* Process speculative ld or ld.c.  */
8125    {
8126      gcc_assert (REG_P (reg) && MEM_P (mem));
8127      mode_rtx = mem;
8128    }
8129  else
8130    {
8131      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
8132
8133      if (attr_class == ITANIUM_CLASS_CHK_A
8134	  || attr_class == ITANIUM_CLASS_CHK_S_I
8135	  || attr_class == ITANIUM_CLASS_CHK_S_F)
8136	/* Process chk.  */
8137	mode_rtx = reg;
8138      else
8139	return -1;
8140    }
8141
8142  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
8143
8144  if (mode_no == SPEC_MODE_INVALID)
8145    return -1;
8146
8147  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
8148
8149  if (extend_p)
8150    {
8151      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
8152	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
8153	return -1;
8154
8155      mode_no += SPEC_GEN_EXTEND_OFFSET;
8156    }
8157
8158  return mode_no;
8159}
8160
8161/* If X is an unspec part of a speculative load, return its code.
8162   Return -1 otherwise.  */
8163static int
8164get_spec_unspec_code (const_rtx x)
8165{
8166  if (GET_CODE (x) != UNSPEC)
8167    return -1;
8168
8169  {
8170    int code;
8171
8172    code = XINT (x, 1);
8173
8174    switch (code)
8175      {
8176      case UNSPEC_LDA:
8177      case UNSPEC_LDS:
8178      case UNSPEC_LDS_A:
8179      case UNSPEC_LDSA:
8180	return code;
8181
8182      default:
8183	return -1;
8184      }
8185  }
8186}
8187
8188/* Implement skip_rtx_p hook.  */
8189static bool
8190ia64_skip_rtx_p (const_rtx x)
8191{
8192  return get_spec_unspec_code (x) != -1;
8193}
8194
8195/* If INSN is a speculative load, return its UNSPEC code.
8196   Return -1 otherwise.  */
8197static int
8198get_insn_spec_code (const_rtx insn)
8199{
8200  rtx pat, reg, mem;
8201
8202  pat = PATTERN (insn);
8203
8204  if (GET_CODE (pat) == COND_EXEC)
8205    pat = COND_EXEC_CODE (pat);
8206
8207  if (GET_CODE (pat) != SET)
8208    return -1;
8209
8210  reg = SET_DEST (pat);
8211  if (!REG_P (reg))
8212    return -1;
8213
8214  mem = SET_SRC (pat);
8215  if (GET_CODE (mem) == ZERO_EXTEND)
8216    mem = XEXP (mem, 0);
8217
8218  return get_spec_unspec_code (mem);
8219}
8220
8221/* If INSN is a speculative load, return a ds with the speculation types.
8222   Otherwise [if INSN is a normal instruction] return 0.  */
8223static ds_t
8224ia64_get_insn_spec_ds (rtx_insn *insn)
8225{
8226  int code = get_insn_spec_code (insn);
8227
8228  switch (code)
8229    {
8230    case UNSPEC_LDA:
8231      return BEGIN_DATA;
8232
8233    case UNSPEC_LDS:
8234    case UNSPEC_LDS_A:
8235      return BEGIN_CONTROL;
8236
8237    case UNSPEC_LDSA:
8238      return BEGIN_DATA | BEGIN_CONTROL;
8239
8240    default:
8241      return 0;
8242    }
8243}
8244
8245/* If INSN is a speculative load return a ds with the speculation types that
8246   will be checked.
8247   Otherwise [if INSN is a normal instruction] return 0.  */
8248static ds_t
8249ia64_get_insn_checked_ds (rtx_insn *insn)
8250{
8251  int code = get_insn_spec_code (insn);
8252
8253  switch (code)
8254    {
8255    case UNSPEC_LDA:
8256      return BEGIN_DATA | BEGIN_CONTROL;
8257
8258    case UNSPEC_LDS:
8259      return BEGIN_CONTROL;
8260
8261    case UNSPEC_LDS_A:
8262    case UNSPEC_LDSA:
8263      return BEGIN_DATA | BEGIN_CONTROL;
8264
8265    default:
8266      return 0;
8267    }
8268}
8269
8270/* If GEN_P is true, calculate the index of needed speculation check and return
8271   speculative pattern for INSN with speculative mode TS, machine mode
8272   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
8273   If GEN_P is false, just calculate the index of needed speculation check.  */
8274static rtx
8275ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
8276{
8277  rtx pat, new_pat;
8278  gen_func_t gen_load;
8279
8280  gen_load = get_spec_load_gen_function (ts, mode_no);
8281
8282  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
8283		      copy_rtx (recog_data.operand[1]));
8284
8285  pat = PATTERN (insn);
8286  if (GET_CODE (pat) == COND_EXEC)
8287    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8288				 new_pat);
8289
8290  return new_pat;
8291}
8292
8293static bool
8294insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
8295			      ds_t ds ATTRIBUTE_UNUSED)
8296{
8297  return false;
8298}
8299
8300/* Implement targetm.sched.speculate_insn hook.
8301   Check if the INSN can be TS speculative.
8302   If 'no' - return -1.
8303   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
8304   If current pattern of the INSN already provides TS speculation,
8305   return 0.  */
8306static int
8307ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
8308{
8309  int mode_no;
8310  int res;
8311
8312  gcc_assert (!(ts & ~SPECULATIVE));
8313
8314  if (ia64_spec_check_p (insn))
8315    return -1;
8316
8317  if ((ts & BE_IN_SPEC)
8318      && !insn_can_be_in_speculative_p (insn, ts))
8319    return -1;
8320
8321  mode_no = get_mode_no_for_insn (insn);
8322
8323  if (mode_no != SPEC_MODE_INVALID)
8324    {
8325      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
8326	res = 0;
8327      else
8328	{
8329	  res = 1;
8330	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
8331	}
8332    }
8333  else
8334    res = -1;
8335
8336  return res;
8337}
8338
8339/* Return a function that will generate a check for speculation TS with mode
8340   MODE_NO.
8341   If simple check is needed, pass true for SIMPLE_CHECK_P.
8342   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
8343static gen_func_t
8344get_spec_check_gen_function (ds_t ts, int mode_no,
8345			     bool simple_check_p, bool clearing_check_p)
8346{
8347  static gen_func_t gen_ld_c_clr[] = {
8348    gen_movbi_clr,
8349    gen_movqi_clr,
8350    gen_movhi_clr,
8351    gen_movsi_clr,
8352    gen_movdi_clr,
8353    gen_movsf_clr,
8354    gen_movdf_clr,
8355    gen_movxf_clr,
8356    gen_movti_clr,
8357    gen_zero_extendqidi2_clr,
8358    gen_zero_extendhidi2_clr,
8359    gen_zero_extendsidi2_clr,
8360  };
8361  static gen_func_t gen_ld_c_nc[] = {
8362    gen_movbi_nc,
8363    gen_movqi_nc,
8364    gen_movhi_nc,
8365    gen_movsi_nc,
8366    gen_movdi_nc,
8367    gen_movsf_nc,
8368    gen_movdf_nc,
8369    gen_movxf_nc,
8370    gen_movti_nc,
8371    gen_zero_extendqidi2_nc,
8372    gen_zero_extendhidi2_nc,
8373    gen_zero_extendsidi2_nc,
8374  };
8375  static gen_func_t gen_chk_a_clr[] = {
8376    gen_advanced_load_check_clr_bi,
8377    gen_advanced_load_check_clr_qi,
8378    gen_advanced_load_check_clr_hi,
8379    gen_advanced_load_check_clr_si,
8380    gen_advanced_load_check_clr_di,
8381    gen_advanced_load_check_clr_sf,
8382    gen_advanced_load_check_clr_df,
8383    gen_advanced_load_check_clr_xf,
8384    gen_advanced_load_check_clr_ti,
8385    gen_advanced_load_check_clr_di,
8386    gen_advanced_load_check_clr_di,
8387    gen_advanced_load_check_clr_di,
8388  };
8389  static gen_func_t gen_chk_a_nc[] = {
8390    gen_advanced_load_check_nc_bi,
8391    gen_advanced_load_check_nc_qi,
8392    gen_advanced_load_check_nc_hi,
8393    gen_advanced_load_check_nc_si,
8394    gen_advanced_load_check_nc_di,
8395    gen_advanced_load_check_nc_sf,
8396    gen_advanced_load_check_nc_df,
8397    gen_advanced_load_check_nc_xf,
8398    gen_advanced_load_check_nc_ti,
8399    gen_advanced_load_check_nc_di,
8400    gen_advanced_load_check_nc_di,
8401    gen_advanced_load_check_nc_di,
8402  };
8403  static gen_func_t gen_chk_s[] = {
8404    gen_speculation_check_bi,
8405    gen_speculation_check_qi,
8406    gen_speculation_check_hi,
8407    gen_speculation_check_si,
8408    gen_speculation_check_di,
8409    gen_speculation_check_sf,
8410    gen_speculation_check_df,
8411    gen_speculation_check_xf,
8412    gen_speculation_check_ti,
8413    gen_speculation_check_di,
8414    gen_speculation_check_di,
8415    gen_speculation_check_di,
8416  };
8417
8418  gen_func_t *gen_check;
8419
8420  if (ts & BEGIN_DATA)
8421    {
8422      /* We don't need recovery because even if this is ld.sa
8423	 ALAT entry will be allocated only if NAT bit is set to zero.
8424	 So it is enough to use ld.c here.  */
8425
8426      if (simple_check_p)
8427	{
8428	  gcc_assert (mflag_sched_spec_ldc);
8429
8430	  if (clearing_check_p)
8431	    gen_check = gen_ld_c_clr;
8432	  else
8433	    gen_check = gen_ld_c_nc;
8434	}
8435      else
8436	{
8437	  if (clearing_check_p)
8438	    gen_check = gen_chk_a_clr;
8439	  else
8440	    gen_check = gen_chk_a_nc;
8441	}
8442    }
8443  else if (ts & BEGIN_CONTROL)
8444    {
8445      if (simple_check_p)
8446	/* We might want to use ld.sa -> ld.c instead of
8447	   ld.s -> chk.s.  */
8448	{
8449	  gcc_assert (!ia64_needs_block_p (ts));
8450
8451	  if (clearing_check_p)
8452	    gen_check = gen_ld_c_clr;
8453	  else
8454	    gen_check = gen_ld_c_nc;
8455	}
8456      else
8457	{
8458	  gen_check = gen_chk_s;
8459	}
8460    }
8461  else
8462    gcc_unreachable ();
8463
8464  gcc_assert (mode_no >= 0);
8465  return gen_check[mode_no];
8466}
8467
8468/* Return nonzero, if INSN needs branchy recovery check.  */
8469static bool
8470ia64_needs_block_p (ds_t ts)
8471{
8472  if (ts & BEGIN_DATA)
8473    return !mflag_sched_spec_ldc;
8474
8475  gcc_assert ((ts & BEGIN_CONTROL) != 0);
8476
8477  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
8478}
8479
8480/* Generate (or regenerate) a recovery check for INSN.  */
8481static rtx
8482ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
8483{
8484  rtx op1, pat, check_pat;
8485  gen_func_t gen_check;
8486  int mode_no;
8487
8488  mode_no = get_mode_no_for_insn (insn);
8489  gcc_assert (mode_no >= 0);
8490
8491  if (label)
8492    op1 = label;
8493  else
8494    {
8495      gcc_assert (!ia64_needs_block_p (ds));
8496      op1 = copy_rtx (recog_data.operand[1]);
8497    }
8498
8499  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
8500					   true);
8501
8502  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
8503
8504  pat = PATTERN (insn);
8505  if (GET_CODE (pat) == COND_EXEC)
8506    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
8507				   check_pat);
8508
8509  return check_pat;
8510}
8511
8512/* Return nonzero, if X is branchy recovery check.  */
8513static int
8514ia64_spec_check_p (rtx x)
8515{
8516  x = PATTERN (x);
8517  if (GET_CODE (x) == COND_EXEC)
8518    x = COND_EXEC_CODE (x);
8519  if (GET_CODE (x) == SET)
8520    return ia64_spec_check_src_p (SET_SRC (x));
8521  return 0;
8522}
8523
8524/* Return nonzero, if SRC belongs to recovery check.  */
8525static int
8526ia64_spec_check_src_p (rtx src)
8527{
8528  if (GET_CODE (src) == IF_THEN_ELSE)
8529    {
8530      rtx t;
8531
8532      t = XEXP (src, 0);
8533      if (GET_CODE (t) == NE)
8534	{
8535	  t = XEXP (t, 0);
8536
8537	  if (GET_CODE (t) == UNSPEC)
8538	    {
8539	      int code;
8540
8541	      code = XINT (t, 1);
8542
8543	      if (code == UNSPEC_LDCCLR
8544		  || code == UNSPEC_LDCNC
8545		  || code == UNSPEC_CHKACLR
8546		  || code == UNSPEC_CHKANC
8547		  || code == UNSPEC_CHKS)
8548		{
8549		  gcc_assert (code != 0);
8550		  return code;
8551		}
8552	    }
8553	}
8554    }
8555  return 0;
8556}
8557
8558
8559/* The following page contains abstract data `bundle states' which are
8560   used for bundling insns (inserting nops and template generation).  */
8561
8562/* The following describes state of insn bundling.  */
8563
8564struct bundle_state
8565{
8566  /* Unique bundle state number to identify them in the debugging
8567     output  */
8568  int unique_num;
8569  rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
8570  /* number nops before and after the insn  */
8571  short before_nops_num, after_nops_num;
8572  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
8573                   insn */
8574  int cost;     /* cost of the state in cycles */
8575  int accumulated_insns_num; /* number of all previous insns including
8576				nops.  L is considered as 2 insns */
8577  int branch_deviation; /* deviation of previous branches from 3rd slots  */
8578  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
8579  struct bundle_state *next;  /* next state with the same insn_num  */
8580  struct bundle_state *originator; /* originator (previous insn state)  */
8581  /* All bundle states are in the following chain.  */
8582  struct bundle_state *allocated_states_chain;
8583  /* The DFA State after issuing the insn and the nops.  */
8584  state_t dfa_state;
8585};
8586
8587/* The following is map insn number to the corresponding bundle state.  */
8588
8589static struct bundle_state **index_to_bundle_states;
8590
8591/* The unique number of next bundle state.  */
8592
8593static int bundle_states_num;
8594
8595/* All allocated bundle states are in the following chain.  */
8596
8597static struct bundle_state *allocated_bundle_states_chain;
8598
8599/* All allocated but not used bundle states are in the following
8600   chain.  */
8601
8602static struct bundle_state *free_bundle_state_chain;
8603
8604
8605/* The following function returns a free bundle state.  */
8606
8607static struct bundle_state *
8608get_free_bundle_state (void)
8609{
8610  struct bundle_state *result;
8611
8612  if (free_bundle_state_chain != NULL)
8613    {
8614      result = free_bundle_state_chain;
8615      free_bundle_state_chain = result->next;
8616    }
8617  else
8618    {
8619      result = XNEW (struct bundle_state);
8620      result->dfa_state = xmalloc (dfa_state_size);
8621      result->allocated_states_chain = allocated_bundle_states_chain;
8622      allocated_bundle_states_chain = result;
8623    }
8624  result->unique_num = bundle_states_num++;
8625  return result;
8626
8627}
8628
8629/* The following function frees given bundle state.  */
8630
8631static void
8632free_bundle_state (struct bundle_state *state)
8633{
8634  state->next = free_bundle_state_chain;
8635  free_bundle_state_chain = state;
8636}
8637
8638/* Start work with abstract data `bundle states'.  */
8639
8640static void
8641initiate_bundle_states (void)
8642{
8643  bundle_states_num = 0;
8644  free_bundle_state_chain = NULL;
8645  allocated_bundle_states_chain = NULL;
8646}
8647
8648/* Finish work with abstract data `bundle states'.  */
8649
8650static void
8651finish_bundle_states (void)
8652{
8653  struct bundle_state *curr_state, *next_state;
8654
8655  for (curr_state = allocated_bundle_states_chain;
8656       curr_state != NULL;
8657       curr_state = next_state)
8658    {
8659      next_state = curr_state->allocated_states_chain;
8660      free (curr_state->dfa_state);
8661      free (curr_state);
8662    }
8663}
8664
8665/* Hashtable helpers.  */
8666
8667struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
8668{
8669  static inline hashval_t hash (const bundle_state *);
8670  static inline bool equal (const bundle_state *, const bundle_state *);
8671};
8672
8673/* The function returns hash of BUNDLE_STATE.  */
8674
8675inline hashval_t
8676bundle_state_hasher::hash (const bundle_state *state)
8677{
8678  unsigned result, i;
8679
8680  for (result = i = 0; i < dfa_state_size; i++)
8681    result += (((unsigned char *) state->dfa_state) [i]
8682	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8683  return result + state->insn_num;
8684}
8685
8686/* The function returns nonzero if the bundle state keys are equal.  */
8687
8688inline bool
8689bundle_state_hasher::equal (const bundle_state *state1,
8690			    const bundle_state *state2)
8691{
8692  return (state1->insn_num == state2->insn_num
8693	  && memcmp (state1->dfa_state, state2->dfa_state,
8694		     dfa_state_size) == 0);
8695}
8696
8697/* Hash table of the bundle states.  The key is dfa_state and insn_num
8698   of the bundle states.  */
8699
8700static hash_table<bundle_state_hasher> *bundle_state_table;
8701
8702/* The function inserts the BUNDLE_STATE into the hash table.  The
8703   function returns nonzero if the bundle has been inserted into the
8704   table.  The table contains the best bundle state with given key.  */
8705
8706static int
8707insert_bundle_state (struct bundle_state *bundle_state)
8708{
8709  struct bundle_state **entry_ptr;
8710
8711  entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
8712  if (*entry_ptr == NULL)
8713    {
8714      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8715      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8716      *entry_ptr = bundle_state;
8717      return TRUE;
8718    }
8719  else if (bundle_state->cost < (*entry_ptr)->cost
8720	   || (bundle_state->cost == (*entry_ptr)->cost
8721	       && ((*entry_ptr)->accumulated_insns_num
8722		   > bundle_state->accumulated_insns_num
8723		   || ((*entry_ptr)->accumulated_insns_num
8724		       == bundle_state->accumulated_insns_num
8725		       && ((*entry_ptr)->branch_deviation
8726			   > bundle_state->branch_deviation
8727			   || ((*entry_ptr)->branch_deviation
8728			       == bundle_state->branch_deviation
8729			       && (*entry_ptr)->middle_bundle_stops
8730			       > bundle_state->middle_bundle_stops))))))
8731
8732    {
8733      struct bundle_state temp;
8734
8735      temp = **entry_ptr;
8736      **entry_ptr = *bundle_state;
8737      (*entry_ptr)->next = temp.next;
8738      *bundle_state = temp;
8739    }
8740  return FALSE;
8741}
8742
8743/* Start work with the hash table.  */
8744
8745static void
8746initiate_bundle_state_table (void)
8747{
8748  bundle_state_table = new hash_table<bundle_state_hasher> (50);
8749}
8750
8751/* Finish work with the hash table.  */
8752
8753static void
8754finish_bundle_state_table (void)
8755{
8756  delete bundle_state_table;
8757  bundle_state_table = NULL;
8758}
8759
8760
8761
8762/* The following variable is a insn `nop' used to check bundle states
8763   with different number of inserted nops.  */
8764
8765static rtx_insn *ia64_nop;
8766
8767/* The following function tries to issue NOPS_NUM nops for the current
8768   state without advancing processor cycle.  If it failed, the
8769   function returns FALSE and frees the current state.  */
8770
8771static int
8772try_issue_nops (struct bundle_state *curr_state, int nops_num)
8773{
8774  int i;
8775
8776  for (i = 0; i < nops_num; i++)
8777    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8778      {
8779	free_bundle_state (curr_state);
8780	return FALSE;
8781      }
8782  return TRUE;
8783}
8784
8785/* The following function tries to issue INSN for the current
8786   state without advancing processor cycle.  If it failed, the
8787   function returns FALSE and frees the current state.  */
8788
8789static int
8790try_issue_insn (struct bundle_state *curr_state, rtx insn)
8791{
8792  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8793    {
8794      free_bundle_state (curr_state);
8795      return FALSE;
8796    }
8797  return TRUE;
8798}
8799
8800/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8801   starting with ORIGINATOR without advancing processor cycle.  If
8802   TRY_BUNDLE_END_P is TRUE, the function also/only (if
8803   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8804   If it was successful, the function creates new bundle state and
8805   insert into the hash table and into `index_to_bundle_states'.  */
8806
8807static void
8808issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8809		     rtx_insn *insn, int try_bundle_end_p,
8810		     int only_bundle_end_p)
8811{
8812  struct bundle_state *curr_state;
8813
8814  curr_state = get_free_bundle_state ();
8815  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8816  curr_state->insn = insn;
8817  curr_state->insn_num = originator->insn_num + 1;
8818  curr_state->cost = originator->cost;
8819  curr_state->originator = originator;
8820  curr_state->before_nops_num = before_nops_num;
8821  curr_state->after_nops_num = 0;
8822  curr_state->accumulated_insns_num
8823    = originator->accumulated_insns_num + before_nops_num;
8824  curr_state->branch_deviation = originator->branch_deviation;
8825  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8826  gcc_assert (insn);
8827  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8828    {
8829      gcc_assert (GET_MODE (insn) != TImode);
8830      if (!try_issue_nops (curr_state, before_nops_num))
8831	return;
8832      if (!try_issue_insn (curr_state, insn))
8833	return;
8834      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8835      if (curr_state->accumulated_insns_num % 3 != 0)
8836	curr_state->middle_bundle_stops++;
8837      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8838	  && curr_state->accumulated_insns_num % 3 != 0)
8839	{
8840	  free_bundle_state (curr_state);
8841	  return;
8842	}
8843    }
8844  else if (GET_MODE (insn) != TImode)
8845    {
8846      if (!try_issue_nops (curr_state, before_nops_num))
8847	return;
8848      if (!try_issue_insn (curr_state, insn))
8849	return;
8850      curr_state->accumulated_insns_num++;
8851      gcc_assert (!unknown_for_bundling_p (insn));
8852
8853      if (ia64_safe_type (insn) == TYPE_L)
8854	curr_state->accumulated_insns_num++;
8855    }
8856  else
8857    {
8858      /* If this is an insn that must be first in a group, then don't allow
8859	 nops to be emitted before it.  Currently, alloc is the only such
8860	 supported instruction.  */
8861      /* ??? The bundling automatons should handle this for us, but they do
8862	 not yet have support for the first_insn attribute.  */
8863      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8864	{
8865	  free_bundle_state (curr_state);
8866	  return;
8867	}
8868
8869      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8870      state_transition (curr_state->dfa_state, NULL);
8871      curr_state->cost++;
8872      if (!try_issue_nops (curr_state, before_nops_num))
8873	return;
8874      if (!try_issue_insn (curr_state, insn))
8875	return;
8876      curr_state->accumulated_insns_num++;
8877      if (unknown_for_bundling_p (insn))
8878	{
8879	  /* Finish bundle containing asm insn.  */
8880	  curr_state->after_nops_num
8881	    = 3 - curr_state->accumulated_insns_num % 3;
8882	  curr_state->accumulated_insns_num
8883	    += 3 - curr_state->accumulated_insns_num % 3;
8884	}
8885      else if (ia64_safe_type (insn) == TYPE_L)
8886	curr_state->accumulated_insns_num++;
8887    }
8888  if (ia64_safe_type (insn) == TYPE_B)
8889    curr_state->branch_deviation
8890      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8891  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8892    {
8893      if (!only_bundle_end_p && insert_bundle_state (curr_state))
8894	{
8895	  state_t dfa_state;
8896	  struct bundle_state *curr_state1;
8897	  struct bundle_state *allocated_states_chain;
8898
8899	  curr_state1 = get_free_bundle_state ();
8900	  dfa_state = curr_state1->dfa_state;
8901	  allocated_states_chain = curr_state1->allocated_states_chain;
8902	  *curr_state1 = *curr_state;
8903	  curr_state1->dfa_state = dfa_state;
8904	  curr_state1->allocated_states_chain = allocated_states_chain;
8905	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8906		  dfa_state_size);
8907	  curr_state = curr_state1;
8908	}
8909      if (!try_issue_nops (curr_state,
8910			   3 - curr_state->accumulated_insns_num % 3))
8911	return;
8912      curr_state->after_nops_num
8913	= 3 - curr_state->accumulated_insns_num % 3;
8914      curr_state->accumulated_insns_num
8915	+= 3 - curr_state->accumulated_insns_num % 3;
8916    }
8917  if (!insert_bundle_state (curr_state))
8918    free_bundle_state (curr_state);
8919  return;
8920}
8921
8922/* The following function returns position in the two window bundle
8923   for given STATE.  */
8924
8925static int
8926get_max_pos (state_t state)
8927{
8928  if (cpu_unit_reservation_p (state, pos_6))
8929    return 6;
8930  else if (cpu_unit_reservation_p (state, pos_5))
8931    return 5;
8932  else if (cpu_unit_reservation_p (state, pos_4))
8933    return 4;
8934  else if (cpu_unit_reservation_p (state, pos_3))
8935    return 3;
8936  else if (cpu_unit_reservation_p (state, pos_2))
8937    return 2;
8938  else if (cpu_unit_reservation_p (state, pos_1))
8939    return 1;
8940  else
8941    return 0;
8942}
8943
8944/* The function returns code of a possible template for given position
8945   and state.  The function should be called only with 2 values of
8946   position equal to 3 or 6.  We avoid generating F NOPs by putting
8947   templates containing F insns at the end of the template search
8948   because undocumented anomaly in McKinley derived cores which can
8949   cause stalls if an F-unit insn (including a NOP) is issued within a
8950   six-cycle window after reading certain application registers (such
8951   as ar.bsp).  Furthermore, power-considerations also argue against
8952   the use of F-unit instructions unless they're really needed.  */
8953
8954static int
8955get_template (state_t state, int pos)
8956{
8957  switch (pos)
8958    {
8959    case 3:
8960      if (cpu_unit_reservation_p (state, _0mmi_))
8961	return 1;
8962      else if (cpu_unit_reservation_p (state, _0mii_))
8963	return 0;
8964      else if (cpu_unit_reservation_p (state, _0mmb_))
8965	return 7;
8966      else if (cpu_unit_reservation_p (state, _0mib_))
8967	return 6;
8968      else if (cpu_unit_reservation_p (state, _0mbb_))
8969	return 5;
8970      else if (cpu_unit_reservation_p (state, _0bbb_))
8971	return 4;
8972      else if (cpu_unit_reservation_p (state, _0mmf_))
8973	return 3;
8974      else if (cpu_unit_reservation_p (state, _0mfi_))
8975	return 2;
8976      else if (cpu_unit_reservation_p (state, _0mfb_))
8977	return 8;
8978      else if (cpu_unit_reservation_p (state, _0mlx_))
8979	return 9;
8980      else
8981	gcc_unreachable ();
8982    case 6:
8983      if (cpu_unit_reservation_p (state, _1mmi_))
8984	return 1;
8985      else if (cpu_unit_reservation_p (state, _1mii_))
8986	return 0;
8987      else if (cpu_unit_reservation_p (state, _1mmb_))
8988	return 7;
8989      else if (cpu_unit_reservation_p (state, _1mib_))
8990	return 6;
8991      else if (cpu_unit_reservation_p (state, _1mbb_))
8992	return 5;
8993      else if (cpu_unit_reservation_p (state, _1bbb_))
8994	return 4;
8995      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8996	return 3;
8997      else if (cpu_unit_reservation_p (state, _1mfi_))
8998	return 2;
8999      else if (cpu_unit_reservation_p (state, _1mfb_))
9000	return 8;
9001      else if (cpu_unit_reservation_p (state, _1mlx_))
9002	return 9;
9003      else
9004	gcc_unreachable ();
9005    default:
9006      gcc_unreachable ();
9007    }
9008}
9009
9010/* True when INSN is important for bundling.  */
9011
9012static bool
9013important_for_bundling_p (rtx_insn *insn)
9014{
9015  return (INSN_P (insn)
9016	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
9017	  && GET_CODE (PATTERN (insn)) != USE
9018	  && GET_CODE (PATTERN (insn)) != CLOBBER);
9019}
9020
9021/* The following function returns an insn important for insn bundling
9022   followed by INSN and before TAIL.  */
9023
9024static rtx_insn *
9025get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
9026{
9027  for (; insn && insn != tail; insn = NEXT_INSN (insn))
9028    if (important_for_bundling_p (insn))
9029      return insn;
9030  return NULL;
9031}
9032
9033/* True when INSN is unknown, but important, for bundling.  */
9034
9035static bool
9036unknown_for_bundling_p (rtx_insn *insn)
9037{
9038  return (INSN_P (insn)
9039	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
9040	  && GET_CODE (PATTERN (insn)) != USE
9041	  && GET_CODE (PATTERN (insn)) != CLOBBER);
9042}
9043
9044/* Add a bundle selector TEMPLATE0 before INSN.  */
9045
9046static void
9047ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
9048{
9049  rtx b = gen_bundle_selector (GEN_INT (template0));
9050
9051  ia64_emit_insn_before (b, insn);
9052#if NR_BUNDLES == 10
9053  if ((template0 == 4 || template0 == 5)
9054      && ia64_except_unwind_info (&global_options) == UI_TARGET)
9055    {
9056      int i;
9057      rtx note = NULL_RTX;
9058
9059      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
9060	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
9061	 to following nops, as br.call sets rp to the address of following
9062	 bundle and therefore an EH region end must be on a bundle
9063	 boundary.  */
9064      insn = PREV_INSN (insn);
9065      for (i = 0; i < 3; i++)
9066	{
9067	  do
9068	    insn = next_active_insn (insn);
9069	  while (NONJUMP_INSN_P (insn)
9070		 && get_attr_empty (insn) == EMPTY_YES);
9071	  if (CALL_P (insn))
9072	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
9073	  else if (note)
9074	    {
9075	      int code;
9076
9077	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
9078			  || code == CODE_FOR_nop_b);
9079	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
9080		note = NULL_RTX;
9081	      else
9082		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
9083	    }
9084	}
9085    }
9086#endif
9087}
9088
9089/* The following function does insn bundling.  Bundling means
9090   inserting templates and nop insns to fit insn groups into permitted
9091   templates.  Instruction scheduling uses NDFA (non-deterministic
9092   finite automata) encoding informations about the templates and the
9093   inserted nops.  Nondeterminism of the automata permits follows
9094   all possible insn sequences very fast.
9095
9096   Unfortunately it is not possible to get information about inserting
9097   nop insns and used templates from the automata states.  The
9098   automata only says that we can issue an insn possibly inserting
9099   some nops before it and using some template.  Therefore insn
9100   bundling in this function is implemented by using DFA
9101   (deterministic finite automata).  We follow all possible insn
9102   sequences by inserting 0-2 nops (that is what the NDFA describe for
9103   insn scheduling) before/after each insn being bundled.  We know the
9104   start of simulated processor cycle from insn scheduling (insn
9105   starting a new cycle has TImode).
9106
9107   Simple implementation of insn bundling would create enormous
9108   number of possible insn sequences satisfying information about new
9109   cycle ticks taken from the insn scheduling.  To make the algorithm
9110   practical we use dynamic programming.  Each decision (about
9111   inserting nops and implicitly about previous decisions) is described
9112   by structure bundle_state (see above).  If we generate the same
9113   bundle state (key is automaton state after issuing the insns and
9114   nops for it), we reuse already generated one.  As consequence we
9115   reject some decisions which cannot improve the solution and
9116   reduce memory for the algorithm.
9117
9118   When we reach the end of EBB (extended basic block), we choose the
9119   best sequence and then, moving back in EBB, insert templates for
9120   the best alternative.  The templates are taken from querying
9121   automaton state for each insn in chosen bundle states.
9122
9123   So the algorithm makes two (forward and backward) passes through
9124   EBB.  */
9125
9126static void
9127bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
9128{
9129  struct bundle_state *curr_state, *next_state, *best_state;
9130  rtx_insn *insn, *next_insn;
9131  int insn_num;
9132  int i, bundle_end_p, only_bundle_end_p, asm_p;
9133  int pos = 0, max_pos, template0, template1;
9134  rtx_insn *b;
9135  enum attr_type type;
9136
9137  insn_num = 0;
9138  /* Count insns in the EBB.  */
9139  for (insn = NEXT_INSN (prev_head_insn);
9140       insn && insn != tail;
9141       insn = NEXT_INSN (insn))
9142    if (INSN_P (insn))
9143      insn_num++;
9144  if (insn_num == 0)
9145    return;
9146  bundling_p = 1;
9147  dfa_clean_insn_cache ();
9148  initiate_bundle_state_table ();
9149  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
9150  /* First (forward) pass -- generation of bundle states.  */
9151  curr_state = get_free_bundle_state ();
9152  curr_state->insn = NULL;
9153  curr_state->before_nops_num = 0;
9154  curr_state->after_nops_num = 0;
9155  curr_state->insn_num = 0;
9156  curr_state->cost = 0;
9157  curr_state->accumulated_insns_num = 0;
9158  curr_state->branch_deviation = 0;
9159  curr_state->middle_bundle_stops = 0;
9160  curr_state->next = NULL;
9161  curr_state->originator = NULL;
9162  state_reset (curr_state->dfa_state);
9163  index_to_bundle_states [0] = curr_state;
9164  insn_num = 0;
9165  /* Shift cycle mark if it is put on insn which could be ignored.  */
9166  for (insn = NEXT_INSN (prev_head_insn);
9167       insn != tail;
9168       insn = NEXT_INSN (insn))
9169    if (INSN_P (insn)
9170	&& !important_for_bundling_p (insn)
9171	&& GET_MODE (insn) == TImode)
9172      {
9173	PUT_MODE (insn, VOIDmode);
9174	for (next_insn = NEXT_INSN (insn);
9175	     next_insn != tail;
9176	     next_insn = NEXT_INSN (next_insn))
9177	  if (important_for_bundling_p (next_insn)
9178	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
9179	    {
9180	      PUT_MODE (next_insn, TImode);
9181	      break;
9182	    }
9183      }
9184  /* Forward pass: generation of bundle states.  */
9185  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
9186       insn != NULL_RTX;
9187       insn = next_insn)
9188    {
9189      gcc_assert (important_for_bundling_p (insn));
9190      type = ia64_safe_type (insn);
9191      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
9192      insn_num++;
9193      index_to_bundle_states [insn_num] = NULL;
9194      for (curr_state = index_to_bundle_states [insn_num - 1];
9195	   curr_state != NULL;
9196	   curr_state = next_state)
9197	{
9198	  pos = curr_state->accumulated_insns_num % 3;
9199	  next_state = curr_state->next;
9200	  /* We must fill up the current bundle in order to start a
9201	     subsequent asm insn in a new bundle.  Asm insn is always
9202	     placed in a separate bundle.  */
9203	  only_bundle_end_p
9204	    = (next_insn != NULL_RTX
9205	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
9206	       && unknown_for_bundling_p (next_insn));
9207	  /* We may fill up the current bundle if it is the cycle end
9208	     without a group barrier.  */
9209	  bundle_end_p
9210	    = (only_bundle_end_p || next_insn == NULL_RTX
9211	       || (GET_MODE (next_insn) == TImode
9212		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
9213	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
9214	      || type == TYPE_S)
9215	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
9216				 only_bundle_end_p);
9217	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
9218			       only_bundle_end_p);
9219	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
9220			       only_bundle_end_p);
9221	}
9222      gcc_assert (index_to_bundle_states [insn_num]);
9223      for (curr_state = index_to_bundle_states [insn_num];
9224	   curr_state != NULL;
9225	   curr_state = curr_state->next)
9226	if (verbose >= 2 && dump)
9227	  {
9228	    /* This structure is taken from generated code of the
9229	       pipeline hazard recognizer (see file insn-attrtab.c).
9230	       Please don't forget to change the structure if a new
9231	       automaton is added to .md file.  */
9232	    struct DFA_chip
9233	    {
9234	      unsigned short one_automaton_state;
9235	      unsigned short oneb_automaton_state;
9236	      unsigned short two_automaton_state;
9237	      unsigned short twob_automaton_state;
9238	    };
9239
9240	    fprintf
9241	      (dump,
9242	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
9243	       curr_state->unique_num,
9244	       (curr_state->originator == NULL
9245		? -1 : curr_state->originator->unique_num),
9246	       curr_state->cost,
9247	       curr_state->before_nops_num, curr_state->after_nops_num,
9248	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
9249	       curr_state->middle_bundle_stops,
9250	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9251	       INSN_UID (insn));
9252	  }
9253    }
9254
9255  /* We should find a solution because the 2nd insn scheduling has
9256     found one.  */
9257  gcc_assert (index_to_bundle_states [insn_num]);
9258  /* Find a state corresponding to the best insn sequence.  */
9259  best_state = NULL;
9260  for (curr_state = index_to_bundle_states [insn_num];
9261       curr_state != NULL;
9262       curr_state = curr_state->next)
9263    /* We are just looking at the states with fully filled up last
9264       bundle.  The first we prefer insn sequences with minimal cost
9265       then with minimal inserted nops and finally with branch insns
9266       placed in the 3rd slots.  */
9267    if (curr_state->accumulated_insns_num % 3 == 0
9268	&& (best_state == NULL || best_state->cost > curr_state->cost
9269	    || (best_state->cost == curr_state->cost
9270		&& (curr_state->accumulated_insns_num
9271		    < best_state->accumulated_insns_num
9272		    || (curr_state->accumulated_insns_num
9273			== best_state->accumulated_insns_num
9274			&& (curr_state->branch_deviation
9275			    < best_state->branch_deviation
9276			    || (curr_state->branch_deviation
9277				== best_state->branch_deviation
9278				&& curr_state->middle_bundle_stops
9279				< best_state->middle_bundle_stops)))))))
9280      best_state = curr_state;
9281  /* Second (backward) pass: adding nops and templates.  */
9282  gcc_assert (best_state);
9283  insn_num = best_state->before_nops_num;
9284  template0 = template1 = -1;
9285  for (curr_state = best_state;
9286       curr_state->originator != NULL;
9287       curr_state = curr_state->originator)
9288    {
9289      insn = curr_state->insn;
9290      asm_p = unknown_for_bundling_p (insn);
9291      insn_num++;
9292      if (verbose >= 2 && dump)
9293	{
9294	  struct DFA_chip
9295	  {
9296	    unsigned short one_automaton_state;
9297	    unsigned short oneb_automaton_state;
9298	    unsigned short two_automaton_state;
9299	    unsigned short twob_automaton_state;
9300	  };
9301
9302	  fprintf
9303	    (dump,
9304	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
9305	     curr_state->unique_num,
9306	     (curr_state->originator == NULL
9307	      ? -1 : curr_state->originator->unique_num),
9308	     curr_state->cost,
9309	     curr_state->before_nops_num, curr_state->after_nops_num,
9310	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
9311	     curr_state->middle_bundle_stops,
9312	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
9313	     INSN_UID (insn));
9314	}
9315      /* Find the position in the current bundle window.  The window can
9316	 contain at most two bundles.  Two bundle window means that
9317	 the processor will make two bundle rotation.  */
9318      max_pos = get_max_pos (curr_state->dfa_state);
9319      if (max_pos == 6
9320	  /* The following (negative template number) means that the
9321	     processor did one bundle rotation.  */
9322	  || (max_pos == 3 && template0 < 0))
9323	{
9324	  /* We are at the end of the window -- find template(s) for
9325	     its bundle(s).  */
9326	  pos = max_pos;
9327	  if (max_pos == 3)
9328	    template0 = get_template (curr_state->dfa_state, 3);
9329	  else
9330	    {
9331	      template1 = get_template (curr_state->dfa_state, 3);
9332	      template0 = get_template (curr_state->dfa_state, 6);
9333	    }
9334	}
9335      if (max_pos > 3 && template1 < 0)
9336	/* It may happen when we have the stop inside a bundle.  */
9337	{
9338	  gcc_assert (pos <= 3);
9339	  template1 = get_template (curr_state->dfa_state, 3);
9340	  pos += 3;
9341	}
9342      if (!asm_p)
9343	/* Emit nops after the current insn.  */
9344	for (i = 0; i < curr_state->after_nops_num; i++)
9345	  {
9346	    rtx nop_pat = gen_nop ();
9347	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
9348	    pos--;
9349	    gcc_assert (pos >= 0);
9350	    if (pos % 3 == 0)
9351	      {
9352		/* We are at the start of a bundle: emit the template
9353		   (it should be defined).  */
9354		gcc_assert (template0 >= 0);
9355		ia64_add_bundle_selector_before (template0, nop);
9356		/* If we have two bundle window, we make one bundle
9357		   rotation.  Otherwise template0 will be undefined
9358		   (negative value).  */
9359		template0 = template1;
9360		template1 = -1;
9361	      }
9362	  }
9363      /* Move the position backward in the window.  Group barrier has
9364	 no slot.  Asm insn takes all bundle.  */
9365      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9366	  && !unknown_for_bundling_p (insn))
9367	pos--;
9368      /* Long insn takes 2 slots.  */
9369      if (ia64_safe_type (insn) == TYPE_L)
9370	pos--;
9371      gcc_assert (pos >= 0);
9372      if (pos % 3 == 0
9373	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
9374	  && !unknown_for_bundling_p (insn))
9375	{
9376	  /* The current insn is at the bundle start: emit the
9377	     template.  */
9378	  gcc_assert (template0 >= 0);
9379	  ia64_add_bundle_selector_before (template0, insn);
9380	  b = PREV_INSN (insn);
9381	  insn = b;
9382	  /* See comment above in analogous place for emitting nops
9383	     after the insn.  */
9384	  template0 = template1;
9385	  template1 = -1;
9386	}
9387      /* Emit nops after the current insn.  */
9388      for (i = 0; i < curr_state->before_nops_num; i++)
9389	{
9390	  rtx nop_pat = gen_nop ();
9391	  ia64_emit_insn_before (nop_pat, insn);
9392	  rtx_insn *nop = PREV_INSN (insn);
9393	  insn = nop;
9394	  pos--;
9395	  gcc_assert (pos >= 0);
9396	  if (pos % 3 == 0)
9397	    {
9398	      /* See comment above in analogous place for emitting nops
9399		 after the insn.  */
9400	      gcc_assert (template0 >= 0);
9401	      ia64_add_bundle_selector_before (template0, insn);
9402	      b = PREV_INSN (insn);
9403	      insn = b;
9404	      template0 = template1;
9405	      template1 = -1;
9406	    }
9407	}
9408    }
9409
9410  if (flag_checking)
9411    {
9412      /* Assert right calculation of middle_bundle_stops.  */
9413      int num = best_state->middle_bundle_stops;
9414      bool start_bundle = true, end_bundle = false;
9415
9416      for (insn = NEXT_INSN (prev_head_insn);
9417	   insn && insn != tail;
9418	   insn = NEXT_INSN (insn))
9419	{
9420	  if (!INSN_P (insn))
9421	    continue;
9422	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
9423	    start_bundle = true;
9424	  else
9425	    {
9426	      rtx_insn *next_insn;
9427
9428	      for (next_insn = NEXT_INSN (insn);
9429		   next_insn && next_insn != tail;
9430		   next_insn = NEXT_INSN (next_insn))
9431		if (INSN_P (next_insn)
9432		    && (ia64_safe_itanium_class (next_insn)
9433			!= ITANIUM_CLASS_IGNORE
9434			|| recog_memoized (next_insn)
9435			== CODE_FOR_bundle_selector)
9436		    && GET_CODE (PATTERN (next_insn)) != USE
9437		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
9438		  break;
9439
9440	      end_bundle = next_insn == NULL_RTX
9441		|| next_insn == tail
9442		|| (INSN_P (next_insn)
9443		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
9444	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
9445		  && !start_bundle && !end_bundle
9446		  && next_insn
9447		  && !unknown_for_bundling_p (next_insn))
9448		num--;
9449
9450	      start_bundle = false;
9451	    }
9452	}
9453
9454      gcc_assert (num == 0);
9455    }
9456
9457  free (index_to_bundle_states);
9458  finish_bundle_state_table ();
9459  bundling_p = 0;
9460  dfa_clean_insn_cache ();
9461}
9462
9463/* The following function is called at the end of scheduling BB or
9464   EBB.  After reload, it inserts stop bits and does insn bundling.  */
9465
9466static void
9467ia64_sched_finish (FILE *dump, int sched_verbose)
9468{
9469  if (sched_verbose)
9470    fprintf (dump, "// Finishing schedule.\n");
9471  if (!reload_completed)
9472    return;
9473  if (reload_completed)
9474    {
9475      final_emit_insn_group_barriers (dump);
9476      bundling (dump, sched_verbose, current_sched_info->prev_head,
9477		current_sched_info->next_tail);
9478      if (sched_verbose && dump)
9479	fprintf (dump, "//    finishing %d-%d\n",
9480		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
9481		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
9482
9483      return;
9484    }
9485}
9486
9487/* The following function inserts stop bits in scheduled BB or EBB.  */
9488
9489static void
9490final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
9491{
9492  rtx_insn *insn;
9493  int need_barrier_p = 0;
9494  int seen_good_insn = 0;
9495
9496  init_insn_group_barriers ();
9497
9498  for (insn = NEXT_INSN (current_sched_info->prev_head);
9499       insn != current_sched_info->next_tail;
9500       insn = NEXT_INSN (insn))
9501    {
9502      if (BARRIER_P (insn))
9503	{
9504	  rtx_insn *last = prev_active_insn (insn);
9505
9506	  if (! last)
9507	    continue;
9508	  if (JUMP_TABLE_DATA_P (last))
9509	    last = prev_active_insn (last);
9510	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
9511	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
9512
9513	  init_insn_group_barriers ();
9514	  seen_good_insn = 0;
9515	  need_barrier_p = 0;
9516	}
9517      else if (NONDEBUG_INSN_P (insn))
9518	{
9519	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
9520	    {
9521	      init_insn_group_barriers ();
9522	      seen_good_insn = 0;
9523	      need_barrier_p = 0;
9524	    }
9525	  else if (need_barrier_p || group_barrier_needed (insn)
9526		   || (mflag_sched_stop_bits_after_every_cycle
9527		       && GET_MODE (insn) == TImode
9528		       && seen_good_insn))
9529	    {
9530	      if (TARGET_EARLY_STOP_BITS)
9531		{
9532		  rtx_insn *last;
9533
9534		  for (last = insn;
9535		       last != current_sched_info->prev_head;
9536		       last = PREV_INSN (last))
9537		    if (INSN_P (last) && GET_MODE (last) == TImode
9538			&& stops_p [INSN_UID (last)])
9539		      break;
9540		  if (last == current_sched_info->prev_head)
9541		    last = insn;
9542		  last = prev_active_insn (last);
9543		  if (last
9544		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
9545		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
9546				     last);
9547		  init_insn_group_barriers ();
9548		  for (last = NEXT_INSN (last);
9549		       last != insn;
9550		       last = NEXT_INSN (last))
9551		    if (INSN_P (last))
9552		      {
9553			group_barrier_needed (last);
9554			if (recog_memoized (last) >= 0
9555			    && important_for_bundling_p (last))
9556			  seen_good_insn = 1;
9557		      }
9558		}
9559	      else
9560		{
9561		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
9562				    insn);
9563		  init_insn_group_barriers ();
9564		  seen_good_insn = 0;
9565		}
9566	      group_barrier_needed (insn);
9567	      if (recog_memoized (insn) >= 0
9568		  && important_for_bundling_p (insn))
9569		seen_good_insn = 1;
9570	    }
9571	  else if (recog_memoized (insn) >= 0
9572		   && important_for_bundling_p (insn))
9573	    seen_good_insn = 1;
9574	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
9575	}
9576    }
9577}
9578
9579
9580
9581/* If the following function returns TRUE, we will use the DFA
9582   insn scheduler.  */
9583
9584static int
9585ia64_first_cycle_multipass_dfa_lookahead (void)
9586{
9587  return (reload_completed ? 6 : 4);
9588}
9589
9590/* The following function initiates variable `dfa_pre_cycle_insn'.  */
9591
9592static void
9593ia64_init_dfa_pre_cycle_insn (void)
9594{
9595  if (temp_dfa_state == NULL)
9596    {
9597      dfa_state_size = state_size ();
9598      temp_dfa_state = xmalloc (dfa_state_size);
9599      prev_cycle_state = xmalloc (dfa_state_size);
9600    }
9601  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9602  SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9603  recog_memoized (dfa_pre_cycle_insn);
9604  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9605  SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9606  recog_memoized (dfa_stop_insn);
9607}
9608
9609/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9610   used by the DFA insn scheduler.  */
9611
9612static rtx
9613ia64_dfa_pre_cycle_insn (void)
9614{
9615  return dfa_pre_cycle_insn;
9616}
9617
9618/* The following function returns TRUE if PRODUCER (of type ilog or
9619   ld) produces address for CONSUMER (of type st or stf). */
9620
9621int
9622ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9623{
9624  rtx dest, reg, mem;
9625
9626  gcc_assert (producer && consumer);
9627  dest = ia64_single_set (producer);
9628  gcc_assert (dest);
9629  reg = SET_DEST (dest);
9630  gcc_assert (reg);
9631  if (GET_CODE (reg) == SUBREG)
9632    reg = SUBREG_REG (reg);
9633  gcc_assert (GET_CODE (reg) == REG);
9634
9635  dest = ia64_single_set (consumer);
9636  gcc_assert (dest);
9637  mem = SET_DEST (dest);
9638  gcc_assert (mem && GET_CODE (mem) == MEM);
9639  return reg_mentioned_p (reg, mem);
9640}
9641
9642/* The following function returns TRUE if PRODUCER (of type ilog or
9643   ld) produces address for CONSUMER (of type ld or fld). */
9644
9645int
9646ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
9647{
9648  rtx dest, src, reg, mem;
9649
9650  gcc_assert (producer && consumer);
9651  dest = ia64_single_set (producer);
9652  gcc_assert (dest);
9653  reg = SET_DEST (dest);
9654  gcc_assert (reg);
9655  if (GET_CODE (reg) == SUBREG)
9656    reg = SUBREG_REG (reg);
9657  gcc_assert (GET_CODE (reg) == REG);
9658
9659  src = ia64_single_set (consumer);
9660  gcc_assert (src);
9661  mem = SET_SRC (src);
9662  gcc_assert (mem);
9663
9664  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9665    mem = XVECEXP (mem, 0, 0);
9666  else if (GET_CODE (mem) == IF_THEN_ELSE)
9667    /* ??? Is this bypass necessary for ld.c?  */
9668    {
9669      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9670      mem = XEXP (mem, 1);
9671    }
9672
9673  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9674    mem = XEXP (mem, 0);
9675
9676  if (GET_CODE (mem) == UNSPEC)
9677    {
9678      int c = XINT (mem, 1);
9679
9680      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9681		  || c == UNSPEC_LDSA);
9682      mem = XVECEXP (mem, 0, 0);
9683    }
9684
9685  /* Note that LO_SUM is used for GOT loads.  */
9686  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9687
9688  return reg_mentioned_p (reg, mem);
9689}
9690
9691/* The following function returns TRUE if INSN produces address for a
9692   load/store insn.  We will place such insns into M slot because it
9693   decreases its latency time.  */
9694
9695int
9696ia64_produce_address_p (rtx insn)
9697{
9698  return insn->call;
9699}
9700
9701
9702/* Emit pseudo-ops for the assembler to describe predicate relations.
9703   At present this assumes that we only consider predicate pairs to
9704   be mutex, and that the assembler can deduce proper values from
9705   straight-line code.  */
9706
9707static void
9708emit_predicate_relation_info (void)
9709{
9710  basic_block bb;
9711
9712  FOR_EACH_BB_REVERSE_FN (bb, cfun)
9713    {
9714      int r;
9715      rtx_insn *head = BB_HEAD (bb);
9716
9717      /* We only need such notes at code labels.  */
9718      if (! LABEL_P (head))
9719	continue;
9720      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9721	head = NEXT_INSN (head);
9722
9723      /* Skip p0, which may be thought to be live due to (reg:DI p0)
9724	 grabbing the entire block of predicate registers.  */
9725      for (r = PR_REG (2); r < PR_REG (64); r += 2)
9726	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9727	  {
9728	    rtx p = gen_rtx_REG (BImode, r);
9729	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
9730	    if (head == BB_END (bb))
9731	      BB_END (bb) = n;
9732	    head = n;
9733	  }
9734    }
9735
9736  /* Look for conditional calls that do not return, and protect predicate
9737     relations around them.  Otherwise the assembler will assume the call
9738     returns, and complain about uses of call-clobbered predicates after
9739     the call.  */
9740  FOR_EACH_BB_REVERSE_FN (bb, cfun)
9741    {
9742      rtx_insn *insn = BB_HEAD (bb);
9743
9744      while (1)
9745	{
9746	  if (CALL_P (insn)
9747	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9748	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9749	    {
9750	      rtx_insn *b =
9751		emit_insn_before (gen_safe_across_calls_all (), insn);
9752	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9753	      if (BB_HEAD (bb) == insn)
9754		BB_HEAD (bb) = b;
9755	      if (BB_END (bb) == insn)
9756		BB_END (bb) = a;
9757	    }
9758
9759	  if (insn == BB_END (bb))
9760	    break;
9761	  insn = NEXT_INSN (insn);
9762	}
9763    }
9764}
9765
9766/* Perform machine dependent operations on the rtl chain INSNS.  */
9767
9768static void
9769ia64_reorg (void)
9770{
9771  /* We are freeing block_for_insn in the toplev to keep compatibility
9772     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9773  compute_bb_for_insn ();
9774
9775  /* If optimizing, we'll have split before scheduling.  */
9776  if (optimize == 0)
9777    split_all_insns ();
9778
9779  if (optimize && flag_schedule_insns_after_reload
9780      && dbg_cnt (ia64_sched2))
9781    {
9782      basic_block bb;
9783      timevar_push (TV_SCHED2);
9784      ia64_final_schedule = 1;
9785
9786      /* We can't let modulo-sched prevent us from scheduling any bbs,
9787	 since we need the final schedule to produce bundle information.  */
9788      FOR_EACH_BB_FN (bb, cfun)
9789	bb->flags &= ~BB_DISABLE_SCHEDULE;
9790
9791      initiate_bundle_states ();
9792      ia64_nop = make_insn_raw (gen_nop ());
9793      SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
9794      recog_memoized (ia64_nop);
9795      clocks_length = get_max_uid () + 1;
9796      stops_p = XCNEWVEC (char, clocks_length);
9797
9798      if (ia64_tune == PROCESSOR_ITANIUM2)
9799	{
9800	  pos_1 = get_cpu_unit_code ("2_1");
9801	  pos_2 = get_cpu_unit_code ("2_2");
9802	  pos_3 = get_cpu_unit_code ("2_3");
9803	  pos_4 = get_cpu_unit_code ("2_4");
9804	  pos_5 = get_cpu_unit_code ("2_5");
9805	  pos_6 = get_cpu_unit_code ("2_6");
9806	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9807	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9808	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9809	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9810	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9811	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9812	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9813	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9814	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9815	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9816	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9817	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9818	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9819	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9820	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9821	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9822	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9823	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9824	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9825	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9826	}
9827      else
9828	{
9829	  pos_1 = get_cpu_unit_code ("1_1");
9830	  pos_2 = get_cpu_unit_code ("1_2");
9831	  pos_3 = get_cpu_unit_code ("1_3");
9832	  pos_4 = get_cpu_unit_code ("1_4");
9833	  pos_5 = get_cpu_unit_code ("1_5");
9834	  pos_6 = get_cpu_unit_code ("1_6");
9835	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9836	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9837	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9838	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9839	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9840	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9841	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9842	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9843	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9844	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9845	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9846	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9847	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9848	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9849	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9850	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9851	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9852	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9853	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9854	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9855	}
9856
9857      if (flag_selective_scheduling2
9858	  && !maybe_skip_selective_scheduling ())
9859        run_selective_scheduling ();
9860      else
9861	schedule_ebbs ();
9862
9863      /* Redo alignment computation, as it might gone wrong.  */
9864      compute_alignments ();
9865
9866      /* We cannot reuse this one because it has been corrupted by the
9867	 evil glat.  */
9868      finish_bundle_states ();
9869      free (stops_p);
9870      stops_p = NULL;
9871      emit_insn_group_barriers (dump_file);
9872
9873      ia64_final_schedule = 0;
9874      timevar_pop (TV_SCHED2);
9875    }
9876  else
9877    emit_all_insn_group_barriers (dump_file);
9878
9879  df_analyze ();
9880
9881  /* A call must not be the last instruction in a function, so that the
9882     return address is still within the function, so that unwinding works
9883     properly.  Note that IA-64 differs from dwarf2 on this point.  */
9884  if (ia64_except_unwind_info (&global_options) == UI_TARGET)
9885    {
9886      rtx_insn *insn;
9887      int saw_stop = 0;
9888
9889      insn = get_last_insn ();
9890      if (! INSN_P (insn))
9891        insn = prev_active_insn (insn);
9892      if (insn)
9893	{
9894	  /* Skip over insns that expand to nothing.  */
9895	  while (NONJUMP_INSN_P (insn)
9896		 && get_attr_empty (insn) == EMPTY_YES)
9897	    {
9898	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9899		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9900		saw_stop = 1;
9901	      insn = prev_active_insn (insn);
9902	    }
9903	  if (CALL_P (insn))
9904	    {
9905	      if (! saw_stop)
9906		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9907	      emit_insn (gen_break_f ());
9908	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9909	    }
9910	}
9911    }
9912
9913  emit_predicate_relation_info ();
9914
9915  if (flag_var_tracking)
9916    {
9917      timevar_push (TV_VAR_TRACKING);
9918      variable_tracking_main ();
9919      timevar_pop (TV_VAR_TRACKING);
9920    }
9921  df_finish_pass (false);
9922}
9923
9924/* Return true if REGNO is used by the epilogue.  */
9925
9926int
9927ia64_epilogue_uses (int regno)
9928{
9929  switch (regno)
9930    {
9931    case R_GR (1):
9932      /* With a call to a function in another module, we will write a new
9933	 value to "gp".  After returning from such a call, we need to make
9934	 sure the function restores the original gp-value, even if the
9935	 function itself does not use the gp anymore.  */
9936      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9937
9938    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9939    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9940      /* For functions defined with the syscall_linkage attribute, all
9941	 input registers are marked as live at all function exits.  This
9942	 prevents the register allocator from using the input registers,
9943	 which in turn makes it possible to restart a system call after
9944	 an interrupt without having to save/restore the input registers.
9945	 This also prevents kernel data from leaking to application code.  */
9946      return lookup_attribute ("syscall_linkage",
9947	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9948
9949    case R_BR (0):
9950      /* Conditional return patterns can't represent the use of `b0' as
9951         the return address, so we force the value live this way.  */
9952      return 1;
9953
9954    case AR_PFS_REGNUM:
9955      /* Likewise for ar.pfs, which is used by br.ret.  */
9956      return 1;
9957
9958    default:
9959      return 0;
9960    }
9961}
9962
9963/* Return true if REGNO is used by the frame unwinder.  */
9964
9965int
9966ia64_eh_uses (int regno)
9967{
9968  unsigned int r;
9969
9970  if (! reload_completed)
9971    return 0;
9972
9973  if (regno == 0)
9974    return 0;
9975
9976  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9977    if (regno == current_frame_info.r[r]
9978       || regno == emitted_frame_related_regs[r])
9979      return 1;
9980
9981  return 0;
9982}
9983
9984/* Return true if this goes in small data/bss.  */
9985
9986/* ??? We could also support own long data here.  Generating movl/add/ld8
9987   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9988   code faster because there is one less load.  This also includes incomplete
9989   types which can't go in sdata/sbss.  */
9990
9991static bool
9992ia64_in_small_data_p (const_tree exp)
9993{
9994  if (TARGET_NO_SDATA)
9995    return false;
9996
9997  /* We want to merge strings, so we never consider them small data.  */
9998  if (TREE_CODE (exp) == STRING_CST)
9999    return false;
10000
10001  /* Functions are never small data.  */
10002  if (TREE_CODE (exp) == FUNCTION_DECL)
10003    return false;
10004
10005  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
10006    {
10007      const char *section = DECL_SECTION_NAME (exp);
10008
10009      if (strcmp (section, ".sdata") == 0
10010	  || strncmp (section, ".sdata.", 7) == 0
10011	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
10012	  || strcmp (section, ".sbss") == 0
10013	  || strncmp (section, ".sbss.", 6) == 0
10014	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
10015	return true;
10016    }
10017  else
10018    {
10019      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
10020
10021      /* If this is an incomplete type with size 0, then we can't put it
10022	 in sdata because it might be too big when completed.  */
10023      if (size > 0 && size <= ia64_section_threshold)
10024	return true;
10025    }
10026
10027  return false;
10028}
10029
10030/* Output assembly directives for prologue regions.  */
10031
10032/* The current basic block number.  */
10033
10034static bool last_block;
10035
10036/* True if we need a copy_state command at the start of the next block.  */
10037
10038static bool need_copy_state;
10039
10040#ifndef MAX_ARTIFICIAL_LABEL_BYTES
10041# define MAX_ARTIFICIAL_LABEL_BYTES 30
10042#endif
10043
10044/* The function emits unwind directives for the start of an epilogue.  */
10045
10046static void
10047process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED,
10048		  bool unwind, bool frame ATTRIBUTE_UNUSED)
10049{
10050  /* If this isn't the last block of the function, then we need to label the
10051     current state, and copy it back in at the start of the next block.  */
10052
10053  if (!last_block)
10054    {
10055      if (unwind)
10056	fprintf (asm_out_file, "\t.label_state %d\n",
10057		 ++cfun->machine->state_num);
10058      need_copy_state = true;
10059    }
10060
10061  if (unwind)
10062    fprintf (asm_out_file, "\t.restore sp\n");
10063}
10064
10065/* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
10066
10067static void
10068process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn,
10069			bool unwind, bool frame)
10070{
10071  rtx dest = SET_DEST (pat);
10072  rtx src = SET_SRC (pat);
10073
10074  if (dest == stack_pointer_rtx)
10075    {
10076      if (GET_CODE (src) == PLUS)
10077	{
10078	  rtx op0 = XEXP (src, 0);
10079	  rtx op1 = XEXP (src, 1);
10080
10081	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
10082
10083	  if (INTVAL (op1) < 0)
10084	    {
10085	      gcc_assert (!frame_pointer_needed);
10086	      if (unwind)
10087		fprintf (asm_out_file,
10088			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
10089			 -INTVAL (op1));
10090	    }
10091	  else
10092	    process_epilogue (asm_out_file, insn, unwind, frame);
10093	}
10094      else
10095	{
10096	  gcc_assert (src == hard_frame_pointer_rtx);
10097	  process_epilogue (asm_out_file, insn, unwind, frame);
10098	}
10099    }
10100  else if (dest == hard_frame_pointer_rtx)
10101    {
10102      gcc_assert (src == stack_pointer_rtx);
10103      gcc_assert (frame_pointer_needed);
10104
10105      if (unwind)
10106	fprintf (asm_out_file, "\t.vframe r%d\n",
10107		 ia64_dbx_register_number (REGNO (dest)));
10108    }
10109  else
10110    gcc_unreachable ();
10111}
10112
10113/* This function processes a SET pattern for REG_CFA_REGISTER.  */
10114
10115static void
10116process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind)
10117{
10118  rtx dest = SET_DEST (pat);
10119  rtx src = SET_SRC (pat);
10120  int dest_regno = REGNO (dest);
10121  int src_regno;
10122
10123  if (src == pc_rtx)
10124    {
10125      /* Saving return address pointer.  */
10126      if (unwind)
10127	fprintf (asm_out_file, "\t.save rp, r%d\n",
10128		 ia64_dbx_register_number (dest_regno));
10129      return;
10130    }
10131
10132  src_regno = REGNO (src);
10133
10134  switch (src_regno)
10135    {
10136    case PR_REG (0):
10137      gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
10138      if (unwind)
10139	fprintf (asm_out_file, "\t.save pr, r%d\n",
10140		 ia64_dbx_register_number (dest_regno));
10141      break;
10142
10143    case AR_UNAT_REGNUM:
10144      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
10145      if (unwind)
10146	fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
10147		 ia64_dbx_register_number (dest_regno));
10148      break;
10149
10150    case AR_LC_REGNUM:
10151      gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
10152      if (unwind)
10153	fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
10154		 ia64_dbx_register_number (dest_regno));
10155      break;
10156
10157    default:
10158      /* Everything else should indicate being stored to memory.  */
10159      gcc_unreachable ();
10160    }
10161}
10162
10163/* This function processes a SET pattern for REG_CFA_OFFSET.  */
10164
10165static void
10166process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind)
10167{
10168  rtx dest = SET_DEST (pat);
10169  rtx src = SET_SRC (pat);
10170  int src_regno = REGNO (src);
10171  const char *saveop;
10172  HOST_WIDE_INT off;
10173  rtx base;
10174
10175  gcc_assert (MEM_P (dest));
10176  if (GET_CODE (XEXP (dest, 0)) == REG)
10177    {
10178      base = XEXP (dest, 0);
10179      off = 0;
10180    }
10181  else
10182    {
10183      gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
10184		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
10185      base = XEXP (XEXP (dest, 0), 0);
10186      off = INTVAL (XEXP (XEXP (dest, 0), 1));
10187    }
10188
10189  if (base == hard_frame_pointer_rtx)
10190    {
10191      saveop = ".savepsp";
10192      off = - off;
10193    }
10194  else
10195    {
10196      gcc_assert (base == stack_pointer_rtx);
10197      saveop = ".savesp";
10198    }
10199
10200  src_regno = REGNO (src);
10201  switch (src_regno)
10202    {
10203    case BR_REG (0):
10204      gcc_assert (!current_frame_info.r[reg_save_b0]);
10205      if (unwind)
10206	fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
10207		 saveop, off);
10208      break;
10209
10210    case PR_REG (0):
10211      gcc_assert (!current_frame_info.r[reg_save_pr]);
10212      if (unwind)
10213	fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
10214		 saveop, off);
10215      break;
10216
10217    case AR_LC_REGNUM:
10218      gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
10219      if (unwind)
10220	fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
10221		 saveop, off);
10222      break;
10223
10224    case AR_PFS_REGNUM:
10225      gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
10226      if (unwind)
10227	fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
10228		 saveop, off);
10229      break;
10230
10231    case AR_UNAT_REGNUM:
10232      gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
10233      if (unwind)
10234	fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
10235		 saveop, off);
10236      break;
10237
10238    case GR_REG (4):
10239    case GR_REG (5):
10240    case GR_REG (6):
10241    case GR_REG (7):
10242      if (unwind)
10243	fprintf (asm_out_file, "\t.save.g 0x%x\n",
10244		 1 << (src_regno - GR_REG (4)));
10245      break;
10246
10247    case BR_REG (1):
10248    case BR_REG (2):
10249    case BR_REG (3):
10250    case BR_REG (4):
10251    case BR_REG (5):
10252      if (unwind)
10253	fprintf (asm_out_file, "\t.save.b 0x%x\n",
10254		 1 << (src_regno - BR_REG (1)));
10255      break;
10256
10257    case FR_REG (2):
10258    case FR_REG (3):
10259    case FR_REG (4):
10260    case FR_REG (5):
10261      if (unwind)
10262	fprintf (asm_out_file, "\t.save.f 0x%x\n",
10263		 1 << (src_regno - FR_REG (2)));
10264      break;
10265
10266    case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
10267    case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
10268    case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
10269    case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
10270      if (unwind)
10271	fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
10272		 1 << (src_regno - FR_REG (12)));
10273      break;
10274
10275    default:
10276      /* ??? For some reason we mark other general registers, even those
10277	 we can't represent in the unwind info.  Ignore them.  */
10278      break;
10279    }
10280}
10281
10282/* This function looks at a single insn and emits any directives
10283   required to unwind this insn.  */
10284
10285static void
10286ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn)
10287{
10288  bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
10289  bool frame = dwarf2out_do_frame ();
10290  rtx note, pat;
10291  bool handled_one;
10292
10293  if (!unwind && !frame)
10294    return;
10295
10296  if (NOTE_INSN_BASIC_BLOCK_P (insn))
10297    {
10298      last_block = NOTE_BASIC_BLOCK (insn)->next_bb
10299     == EXIT_BLOCK_PTR_FOR_FN (cfun);
10300
10301      /* Restore unwind state from immediately before the epilogue.  */
10302      if (need_copy_state)
10303	{
10304	  if (unwind)
10305	    {
10306	      fprintf (asm_out_file, "\t.body\n");
10307	      fprintf (asm_out_file, "\t.copy_state %d\n",
10308		       cfun->machine->state_num);
10309	    }
10310	  need_copy_state = false;
10311	}
10312    }
10313
10314  if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10315    return;
10316
10317  /* Look for the ALLOC insn.  */
10318  if (INSN_CODE (insn) == CODE_FOR_alloc)
10319    {
10320      rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
10321      int dest_regno = REGNO (dest);
10322
10323      /* If this is the final destination for ar.pfs, then this must
10324	 be the alloc in the prologue.  */
10325      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
10326	{
10327	  if (unwind)
10328	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
10329		     ia64_dbx_register_number (dest_regno));
10330	}
10331      else
10332	{
10333	  /* This must be an alloc before a sibcall.  We must drop the
10334	     old frame info.  The easiest way to drop the old frame
10335	     info is to ensure we had a ".restore sp" directive
10336	     followed by a new prologue.  If the procedure doesn't
10337	     have a memory-stack frame, we'll issue a dummy ".restore
10338	     sp" now.  */
10339	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
10340	    /* if haven't done process_epilogue() yet, do it now */
10341	    process_epilogue (asm_out_file, insn, unwind, frame);
10342	  if (unwind)
10343	    fprintf (asm_out_file, "\t.prologue\n");
10344	}
10345      return;
10346    }
10347
10348  handled_one = false;
10349  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
10350    switch (REG_NOTE_KIND (note))
10351      {
10352      case REG_CFA_ADJUST_CFA:
10353	pat = XEXP (note, 0);
10354	if (pat == NULL)
10355	  pat = PATTERN (insn);
10356	process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame);
10357	handled_one = true;
10358	break;
10359
10360      case REG_CFA_OFFSET:
10361	pat = XEXP (note, 0);
10362	if (pat == NULL)
10363	  pat = PATTERN (insn);
10364	process_cfa_offset (asm_out_file, pat, unwind);
10365	handled_one = true;
10366	break;
10367
10368      case REG_CFA_REGISTER:
10369	pat = XEXP (note, 0);
10370	if (pat == NULL)
10371	  pat = PATTERN (insn);
10372	process_cfa_register (asm_out_file, pat, unwind);
10373	handled_one = true;
10374	break;
10375
10376      case REG_FRAME_RELATED_EXPR:
10377      case REG_CFA_DEF_CFA:
10378      case REG_CFA_EXPRESSION:
10379      case REG_CFA_RESTORE:
10380      case REG_CFA_SET_VDRAP:
10381	/* Not used in the ia64 port.  */
10382	gcc_unreachable ();
10383
10384      default:
10385	/* Not a frame-related note.  */
10386	break;
10387      }
10388
10389  /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
10390     explicit action to take.  No guessing required.  */
10391  gcc_assert (handled_one);
10392}
10393
10394/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
10395
10396static void
10397ia64_asm_emit_except_personality (rtx personality)
10398{
10399  fputs ("\t.personality\t", asm_out_file);
10400  output_addr_const (asm_out_file, personality);
10401  fputc ('\n', asm_out_file);
10402}
10403
10404/* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
10405
10406static void
10407ia64_asm_init_sections (void)
10408{
10409  exception_section = get_unnamed_section (0, output_section_asm_op,
10410					   "\t.handlerdata");
10411}
10412
10413/* Implement TARGET_DEBUG_UNWIND_INFO.  */
10414
10415static enum unwind_info_type
10416ia64_debug_unwind_info (void)
10417{
10418  return UI_TARGET;
10419}
10420
10421enum ia64_builtins
10422{
10423  IA64_BUILTIN_BSP,
10424  IA64_BUILTIN_COPYSIGNQ,
10425  IA64_BUILTIN_FABSQ,
10426  IA64_BUILTIN_FLUSHRS,
10427  IA64_BUILTIN_INFQ,
10428  IA64_BUILTIN_HUGE_VALQ,
10429  IA64_BUILTIN_NANQ,
10430  IA64_BUILTIN_NANSQ,
10431  IA64_BUILTIN_max
10432};
10433
10434static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
10435
10436void
10437ia64_init_builtins (void)
10438{
10439  tree fpreg_type;
10440  tree float80_type;
10441  tree decl;
10442
10443  /* The __fpreg type.  */
10444  fpreg_type = make_node (REAL_TYPE);
10445  TYPE_PRECISION (fpreg_type) = 82;
10446  layout_type (fpreg_type);
10447  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
10448
10449  /* The __float80 type.  */
10450  if (float64x_type_node != NULL_TREE
10451      && TYPE_MODE (float64x_type_node) == XFmode)
10452    float80_type = float64x_type_node;
10453  else
10454    {
10455      float80_type = make_node (REAL_TYPE);
10456      TYPE_PRECISION (float80_type) = 80;
10457      layout_type (float80_type);
10458    }
10459  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
10460
10461  /* The __float128 type.  */
10462  if (!TARGET_HPUX)
10463    {
10464      tree ftype;
10465      tree const_string_type
10466	= build_pointer_type (build_qualified_type
10467			      (char_type_node, TYPE_QUAL_CONST));
10468
10469      (*lang_hooks.types.register_builtin_type) (float128_type_node,
10470						 "__float128");
10471
10472      /* TFmode support builtins.  */
10473      ftype = build_function_type_list (float128_type_node, NULL_TREE);
10474      decl = add_builtin_function ("__builtin_infq", ftype,
10475				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
10476				   NULL, NULL_TREE);
10477      ia64_builtins[IA64_BUILTIN_INFQ] = decl;
10478
10479      decl = add_builtin_function ("__builtin_huge_valq", ftype,
10480				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
10481				   NULL, NULL_TREE);
10482      ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
10483
10484      ftype = build_function_type_list (float128_type_node,
10485					const_string_type,
10486					NULL_TREE);
10487      decl = add_builtin_function ("__builtin_nanq", ftype,
10488				   IA64_BUILTIN_NANQ, BUILT_IN_MD,
10489				   "nanq", NULL_TREE);
10490      TREE_READONLY (decl) = 1;
10491      ia64_builtins[IA64_BUILTIN_NANQ] = decl;
10492
10493      decl = add_builtin_function ("__builtin_nansq", ftype,
10494				   IA64_BUILTIN_NANSQ, BUILT_IN_MD,
10495				   "nansq", NULL_TREE);
10496      TREE_READONLY (decl) = 1;
10497      ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
10498
10499      ftype = build_function_type_list (float128_type_node,
10500					float128_type_node,
10501					NULL_TREE);
10502      decl = add_builtin_function ("__builtin_fabsq", ftype,
10503				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
10504				   "__fabstf2", NULL_TREE);
10505      TREE_READONLY (decl) = 1;
10506      ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
10507
10508      ftype = build_function_type_list (float128_type_node,
10509					float128_type_node,
10510					float128_type_node,
10511					NULL_TREE);
10512      decl = add_builtin_function ("__builtin_copysignq", ftype,
10513				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
10514				   "__copysigntf3", NULL_TREE);
10515      TREE_READONLY (decl) = 1;
10516      ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
10517    }
10518  else
10519    /* Under HPUX, this is a synonym for "long double".  */
10520    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
10521					       "__float128");
10522
10523  /* Fwrite on VMS is non-standard.  */
10524#if TARGET_ABI_OPEN_VMS
10525  vms_patch_builtins ();
10526#endif
10527
10528#define def_builtin(name, type, code)					\
10529  add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
10530		       NULL, NULL_TREE)
10531
10532  decl = def_builtin ("__builtin_ia64_bsp",
10533		      build_function_type_list (ptr_type_node, NULL_TREE),
10534		      IA64_BUILTIN_BSP);
10535  ia64_builtins[IA64_BUILTIN_BSP] = decl;
10536
10537  decl = def_builtin ("__builtin_ia64_flushrs",
10538		      build_function_type_list (void_type_node, NULL_TREE),
10539		      IA64_BUILTIN_FLUSHRS);
10540  ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
10541
10542#undef def_builtin
10543
10544  if (TARGET_HPUX)
10545    {
10546      if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
10547	set_user_assembler_name (decl, "_Isfinite");
10548      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
10549	set_user_assembler_name (decl, "_Isfinitef");
10550      if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
10551	set_user_assembler_name (decl, "_Isfinitef128");
10552    }
10553}
10554
10555static tree
10556ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
10557		   tree *args, bool ignore ATTRIBUTE_UNUSED)
10558{
10559  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
10560    {
10561      enum ia64_builtins fn_code
10562	= (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl);
10563      switch (fn_code)
10564	{
10565	case IA64_BUILTIN_NANQ:
10566	case IA64_BUILTIN_NANSQ:
10567	  {
10568	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
10569	    const char *str = c_getstr (*args);
10570	    int quiet = fn_code == IA64_BUILTIN_NANQ;
10571	    REAL_VALUE_TYPE real;
10572
10573	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
10574	      return build_real (type, real);
10575	    return NULL_TREE;
10576	  }
10577
10578	default:
10579	  break;
10580	}
10581    }
10582
10583#ifdef SUBTARGET_FOLD_BUILTIN
10584  return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
10585#endif
10586
10587  return NULL_TREE;
10588}
10589
10590rtx
10591ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10592		     machine_mode mode ATTRIBUTE_UNUSED,
10593		     int ignore ATTRIBUTE_UNUSED)
10594{
10595  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10596  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
10597
10598  switch (fcode)
10599    {
10600    case IA64_BUILTIN_BSP:
10601      if (! target || ! register_operand (target, DImode))
10602	target = gen_reg_rtx (DImode);
10603      emit_insn (gen_bsp_value (target));
10604#ifdef POINTERS_EXTEND_UNSIGNED
10605      target = convert_memory_address (ptr_mode, target);
10606#endif
10607      return target;
10608
10609    case IA64_BUILTIN_FLUSHRS:
10610      emit_insn (gen_flushrs ());
10611      return const0_rtx;
10612
10613    case IA64_BUILTIN_INFQ:
10614    case IA64_BUILTIN_HUGE_VALQ:
10615      {
10616        machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
10617	REAL_VALUE_TYPE inf;
10618	rtx tmp;
10619
10620	real_inf (&inf);
10621	tmp = const_double_from_real_value (inf, target_mode);
10622
10623	tmp = validize_mem (force_const_mem (target_mode, tmp));
10624
10625	if (target == 0)
10626	  target = gen_reg_rtx (target_mode);
10627
10628	emit_move_insn (target, tmp);
10629	return target;
10630      }
10631
10632    case IA64_BUILTIN_NANQ:
10633    case IA64_BUILTIN_NANSQ:
10634    case IA64_BUILTIN_FABSQ:
10635    case IA64_BUILTIN_COPYSIGNQ:
10636      return expand_call (exp, target, ignore);
10637
10638    default:
10639      gcc_unreachable ();
10640    }
10641
10642  return NULL_RTX;
10643}
10644
10645/* Return the ia64 builtin for CODE.  */
10646
10647static tree
10648ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10649{
10650  if (code >= IA64_BUILTIN_max)
10651    return error_mark_node;
10652
10653  return ia64_builtins[code];
10654}
10655
10656/* Implement TARGET_FUNCTION_ARG_PADDING.
10657
10658   For the HP-UX IA64 aggregate parameters are passed stored in the
10659   most significant bits of the stack slot.  */
10660
10661static pad_direction
10662ia64_function_arg_padding (machine_mode mode, const_tree type)
10663{
10664  /* Exception to normal case for structures/unions/etc.  */
10665  if (TARGET_HPUX
10666      && type
10667      && AGGREGATE_TYPE_P (type)
10668      && int_size_in_bytes (type) < UNITS_PER_WORD)
10669    return PAD_UPWARD;
10670
10671  /* Fall back to the default.  */
10672  return default_function_arg_padding (mode, type);
10673}
10674
10675/* Emit text to declare externally defined variables and functions, because
10676   the Intel assembler does not support undefined externals.  */
10677
10678void
10679ia64_asm_output_external (FILE *file, tree decl, const char *name)
10680{
10681  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10682     set in order to avoid putting out names that are never really
10683     used. */
10684  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10685    {
10686      /* maybe_assemble_visibility will return 1 if the assembler
10687	 visibility directive is output.  */
10688      int need_visibility = ((*targetm.binds_local_p) (decl)
10689			     && maybe_assemble_visibility (decl));
10690
10691      /* GNU as does not need anything here, but the HP linker does
10692	 need something for external functions.  */
10693      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10694	  && TREE_CODE (decl) == FUNCTION_DECL)
10695	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10696      else if (need_visibility && !TARGET_GNU_AS)
10697	(*targetm.asm_out.globalize_label) (file, name);
10698    }
10699}
10700
10701/* Set SImode div/mod functions, init_integral_libfuncs only initializes
10702   modes of word_mode and larger.  Rename the TFmode libfuncs using the
10703   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10704   backward compatibility. */
10705
10706static void
10707ia64_init_libfuncs (void)
10708{
10709  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10710  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10711  set_optab_libfunc (smod_optab, SImode, "__modsi3");
10712  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10713
10714  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10715  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10716  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10717  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10718  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10719
10720  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10721  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10722  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10723  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10724  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10725  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10726
10727  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10728  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10729  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10730  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10731  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10732
10733  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10734  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10735  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10736  /* HP-UX 11.23 libc does not have a function for unsigned
10737     SImode-to-TFmode conversion.  */
10738  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10739}
10740
10741/* Rename all the TFmode libfuncs using the HPUX conventions.  */
10742
10743static void
10744ia64_hpux_init_libfuncs (void)
10745{
10746  ia64_init_libfuncs ();
10747
10748  /* The HP SI millicode division and mod functions expect DI arguments.
10749     By turning them off completely we avoid using both libgcc and the
10750     non-standard millicode routines and use the HP DI millicode routines
10751     instead.  */
10752
10753  set_optab_libfunc (sdiv_optab, SImode, 0);
10754  set_optab_libfunc (udiv_optab, SImode, 0);
10755  set_optab_libfunc (smod_optab, SImode, 0);
10756  set_optab_libfunc (umod_optab, SImode, 0);
10757
10758  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10759  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10760  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10761  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10762
10763  /* HP-UX libc has TF min/max/abs routines in it.  */
10764  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10765  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10766  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10767
10768  /* ia64_expand_compare uses this.  */
10769  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10770
10771  /* These should never be used.  */
10772  set_optab_libfunc (eq_optab, TFmode, 0);
10773  set_optab_libfunc (ne_optab, TFmode, 0);
10774  set_optab_libfunc (gt_optab, TFmode, 0);
10775  set_optab_libfunc (ge_optab, TFmode, 0);
10776  set_optab_libfunc (lt_optab, TFmode, 0);
10777  set_optab_libfunc (le_optab, TFmode, 0);
10778}
10779
10780/* Rename the division and modulus functions in VMS.  */
10781
10782static void
10783ia64_vms_init_libfuncs (void)
10784{
10785  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10786  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10787  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10788  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10789  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10790  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10791  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10792  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10793#ifdef MEM_LIBFUNCS_INIT
10794  MEM_LIBFUNCS_INIT;
10795#endif
10796}
10797
10798/* Rename the TFmode libfuncs available from soft-fp in glibc using
10799   the HPUX conventions.  */
10800
10801static void
10802ia64_sysv4_init_libfuncs (void)
10803{
10804  ia64_init_libfuncs ();
10805
10806  /* These functions are not part of the HPUX TFmode interface.  We
10807     use them instead of _U_Qfcmp, which doesn't work the way we
10808     expect.  */
10809  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10810  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10811  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10812  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10813  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10814  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10815
10816  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10817     glibc doesn't have them.  */
10818}
10819
10820/* Use soft-fp.  */
10821
10822static void
10823ia64_soft_fp_init_libfuncs (void)
10824{
10825}
10826
10827static bool
10828ia64_vms_valid_pointer_mode (scalar_int_mode mode)
10829{
10830  return (mode == SImode || mode == DImode);
10831}
10832
10833/* For HPUX, it is illegal to have relocations in shared segments.  */
10834
10835static int
10836ia64_hpux_reloc_rw_mask (void)
10837{
10838  return 3;
10839}
10840
10841/* For others, relax this so that relocations to local data goes in
10842   read-only segments, but we still cannot allow global relocations
10843   in read-only segments.  */
10844
10845static int
10846ia64_reloc_rw_mask (void)
10847{
10848  return flag_pic ? 3 : 2;
10849}
10850
10851/* Return the section to use for X.  The only special thing we do here
10852   is to honor small data.  */
10853
10854static section *
10855ia64_select_rtx_section (machine_mode mode, rtx x,
10856			 unsigned HOST_WIDE_INT align)
10857{
10858  if (GET_MODE_SIZE (mode) > 0
10859      && GET_MODE_SIZE (mode) <= ia64_section_threshold
10860      && !TARGET_NO_SDATA)
10861    return sdata_section;
10862  else
10863    return default_elf_select_rtx_section (mode, x, align);
10864}
10865
10866static unsigned int
10867ia64_section_type_flags (tree decl, const char *name, int reloc)
10868{
10869  unsigned int flags = 0;
10870
10871  if (strcmp (name, ".sdata") == 0
10872      || strncmp (name, ".sdata.", 7) == 0
10873      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10874      || strncmp (name, ".sdata2.", 8) == 0
10875      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10876      || strcmp (name, ".sbss") == 0
10877      || strncmp (name, ".sbss.", 6) == 0
10878      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10879    flags = SECTION_SMALL;
10880
10881  flags |= default_section_type_flags (decl, name, reloc);
10882  return flags;
10883}
10884
10885/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10886   structure type and that the address of that type should be passed
10887   in out0, rather than in r8.  */
10888
10889static bool
10890ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10891{
10892  tree ret_type = TREE_TYPE (fntype);
10893
10894  /* The Itanium C++ ABI requires that out0, rather than r8, be used
10895     as the structure return address parameter, if the return value
10896     type has a non-trivial copy constructor or destructor.  It is not
10897     clear if this same convention should be used for other
10898     programming languages.  Until G++ 3.4, we incorrectly used r8 for
10899     these return values.  */
10900  return (abi_version_at_least (2)
10901	  && ret_type
10902	  && TYPE_MODE (ret_type) == BLKmode
10903	  && TREE_ADDRESSABLE (ret_type)
10904	  && lang_GNU_CXX ());
10905}
10906
10907/* Output the assembler code for a thunk function.  THUNK_DECL is the
10908   declaration for the thunk function itself, FUNCTION is the decl for
10909   the target function.  DELTA is an immediate constant offset to be
10910   added to THIS.  If VCALL_OFFSET is nonzero, the word at
10911   *(*this + vcall_offset) should be added to THIS.  */
10912
10913static void
10914ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10915		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10916		      tree function)
10917{
10918  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
10919  rtx this_rtx, funexp;
10920  rtx_insn *insn;
10921  unsigned int this_parmno;
10922  unsigned int this_regno;
10923  rtx delta_rtx;
10924
10925  reload_completed = 1;
10926  epilogue_completed = 1;
10927
10928  /* Set things up as ia64_expand_prologue might.  */
10929  last_scratch_gr_reg = 15;
10930
10931  memset (&current_frame_info, 0, sizeof (current_frame_info));
10932  current_frame_info.spill_cfa_off = -16;
10933  current_frame_info.n_input_regs = 1;
10934  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10935
10936  /* Mark the end of the (empty) prologue.  */
10937  emit_note (NOTE_INSN_PROLOGUE_END);
10938
10939  /* Figure out whether "this" will be the first parameter (the
10940     typical case) or the second parameter (as happens when the
10941     virtual function returns certain class objects).  */
10942  this_parmno
10943    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10944       ? 1 : 0);
10945  this_regno = IN_REG (this_parmno);
10946  if (!TARGET_REG_NAMES)
10947    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10948
10949  this_rtx = gen_rtx_REG (Pmode, this_regno);
10950
10951  /* Apply the constant offset, if required.  */
10952  delta_rtx = GEN_INT (delta);
10953  if (TARGET_ILP32)
10954    {
10955      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10956      REG_POINTER (tmp) = 1;
10957      if (delta && satisfies_constraint_I (delta_rtx))
10958	{
10959	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10960	  delta = 0;
10961	}
10962      else
10963	emit_insn (gen_ptr_extend (this_rtx, tmp));
10964    }
10965  if (delta)
10966    {
10967      if (!satisfies_constraint_I (delta_rtx))
10968	{
10969	  rtx tmp = gen_rtx_REG (Pmode, 2);
10970	  emit_move_insn (tmp, delta_rtx);
10971	  delta_rtx = tmp;
10972	}
10973      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10974    }
10975
10976  /* Apply the offset from the vtable, if required.  */
10977  if (vcall_offset)
10978    {
10979      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10980      rtx tmp = gen_rtx_REG (Pmode, 2);
10981
10982      if (TARGET_ILP32)
10983	{
10984	  rtx t = gen_rtx_REG (ptr_mode, 2);
10985	  REG_POINTER (t) = 1;
10986	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10987	  if (satisfies_constraint_I (vcall_offset_rtx))
10988	    {
10989	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10990	      vcall_offset = 0;
10991	    }
10992	  else
10993	    emit_insn (gen_ptr_extend (tmp, t));
10994	}
10995      else
10996	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10997
10998      if (vcall_offset)
10999	{
11000	  if (!satisfies_constraint_J (vcall_offset_rtx))
11001	    {
11002	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
11003	      emit_move_insn (tmp2, vcall_offset_rtx);
11004	      vcall_offset_rtx = tmp2;
11005	    }
11006	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
11007	}
11008
11009      if (TARGET_ILP32)
11010	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
11011      else
11012	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
11013
11014      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
11015    }
11016
11017  /* Generate a tail call to the target function.  */
11018  if (! TREE_USED (function))
11019    {
11020      assemble_external (function);
11021      TREE_USED (function) = 1;
11022    }
11023  funexp = XEXP (DECL_RTL (function), 0);
11024  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
11025  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
11026  insn = get_last_insn ();
11027  SIBLING_CALL_P (insn) = 1;
11028
11029  /* Code generation for calls relies on splitting.  */
11030  reload_completed = 1;
11031  epilogue_completed = 1;
11032  try_split (PATTERN (insn), insn, 0);
11033
11034  emit_barrier ();
11035
11036  /* Run just enough of rest_of_compilation to get the insns emitted.
11037     There's not really enough bulk here to make other passes such as
11038     instruction scheduling worth while.  */
11039
11040  emit_all_insn_group_barriers (NULL);
11041  insn = get_insns ();
11042  shorten_branches (insn);
11043  assemble_start_function (thunk, fnname);
11044  final_start_function (insn, file, 1);
11045  final (insn, file, 1);
11046  final_end_function ();
11047  assemble_end_function (thunk, fnname);
11048
11049  reload_completed = 0;
11050  epilogue_completed = 0;
11051}
11052
11053/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
11054
11055static rtx
11056ia64_struct_value_rtx (tree fntype,
11057		       int incoming ATTRIBUTE_UNUSED)
11058{
11059  if (TARGET_ABI_OPEN_VMS ||
11060      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
11061    return NULL_RTX;
11062  return gen_rtx_REG (Pmode, GR_REG (8));
11063}
11064
11065static bool
11066ia64_scalar_mode_supported_p (scalar_mode mode)
11067{
11068  switch (mode)
11069    {
11070    case E_QImode:
11071    case E_HImode:
11072    case E_SImode:
11073    case E_DImode:
11074    case E_TImode:
11075      return true;
11076
11077    case E_SFmode:
11078    case E_DFmode:
11079    case E_XFmode:
11080    case E_RFmode:
11081      return true;
11082
11083    case E_TFmode:
11084      return true;
11085
11086    default:
11087      return false;
11088    }
11089}
11090
11091static bool
11092ia64_vector_mode_supported_p (machine_mode mode)
11093{
11094  switch (mode)
11095    {
11096    case E_V8QImode:
11097    case E_V4HImode:
11098    case E_V2SImode:
11099      return true;
11100
11101    case E_V2SFmode:
11102      return true;
11103
11104    default:
11105      return false;
11106    }
11107}
11108
11109/* Implement the FUNCTION_PROFILER macro.  */
11110
11111void
11112ia64_output_function_profiler (FILE *file, int labelno)
11113{
11114  bool indirect_call;
11115
11116  /* If the function needs a static chain and the static chain
11117     register is r15, we use an indirect call so as to bypass
11118     the PLT stub in case the executable is dynamically linked,
11119     because the stub clobbers r15 as per 5.3.6 of the psABI.
11120     We don't need to do that in non canonical PIC mode.  */
11121
11122  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
11123    {
11124      gcc_assert (STATIC_CHAIN_REGNUM == 15);
11125      indirect_call = true;
11126    }
11127  else
11128    indirect_call = false;
11129
11130  if (TARGET_GNU_AS)
11131    fputs ("\t.prologue 4, r40\n", file);
11132  else
11133    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
11134  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
11135
11136  if (NO_PROFILE_COUNTERS)
11137    fputs ("\tmov out3 = r0\n", file);
11138  else
11139    {
11140      char buf[20];
11141      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11142
11143      if (TARGET_AUTO_PIC)
11144	fputs ("\tmovl out3 = @gprel(", file);
11145      else
11146	fputs ("\taddl out3 = @ltoff(", file);
11147      assemble_name (file, buf);
11148      if (TARGET_AUTO_PIC)
11149	fputs (")\n", file);
11150      else
11151	fputs ("), r1\n", file);
11152    }
11153
11154  if (indirect_call)
11155    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
11156  fputs ("\t;;\n", file);
11157
11158  fputs ("\t.save rp, r42\n", file);
11159  fputs ("\tmov out2 = b0\n", file);
11160  if (indirect_call)
11161    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
11162  fputs ("\t.body\n", file);
11163  fputs ("\tmov out1 = r1\n", file);
11164  if (indirect_call)
11165    {
11166      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
11167      fputs ("\tmov b6 = r16\n", file);
11168      fputs ("\tld8 r1 = [r14]\n", file);
11169      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
11170    }
11171  else
11172    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
11173}
11174
11175static GTY(()) rtx mcount_func_rtx;
11176static rtx
11177gen_mcount_func_rtx (void)
11178{
11179  if (!mcount_func_rtx)
11180    mcount_func_rtx = init_one_libfunc ("_mcount");
11181  return mcount_func_rtx;
11182}
11183
11184void
11185ia64_profile_hook (int labelno)
11186{
11187  rtx label, ip;
11188
11189  if (NO_PROFILE_COUNTERS)
11190    label = const0_rtx;
11191  else
11192    {
11193      char buf[30];
11194      const char *label_name;
11195      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
11196      label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
11197      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
11198      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
11199    }
11200  ip = gen_reg_rtx (Pmode);
11201  emit_insn (gen_ip_value (ip));
11202  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
11203                     VOIDmode,
11204		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
11205		     ip, Pmode,
11206		     label, Pmode);
11207}
11208
11209/* Return the mangling of TYPE if it is an extended fundamental type.  */
11210
11211static const char *
11212ia64_mangle_type (const_tree type)
11213{
11214  type = TYPE_MAIN_VARIANT (type);
11215
11216  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
11217      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
11218    return NULL;
11219
11220  /* On HP-UX, "long double" is mangled as "e" so __float128 is
11221     mangled as "e".  */
11222  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
11223    return "g";
11224  /* On HP-UX, "e" is not available as a mangling of __float80 so use
11225     an extended mangling.  Elsewhere, "e" is available since long
11226     double is 80 bits.  */
11227  if (TYPE_MODE (type) == XFmode)
11228    return TARGET_HPUX ? "u9__float80" : "e";
11229  if (TYPE_MODE (type) == RFmode)
11230    return "u7__fpreg";
11231  return NULL;
11232}
11233
11234/* Return the diagnostic message string if conversion from FROMTYPE to
11235   TOTYPE is not allowed, NULL otherwise.  */
11236static const char *
11237ia64_invalid_conversion (const_tree fromtype, const_tree totype)
11238{
11239  /* Reject nontrivial conversion to or from __fpreg.  */
11240  if (TYPE_MODE (fromtype) == RFmode
11241      && TYPE_MODE (totype) != RFmode
11242      && TYPE_MODE (totype) != VOIDmode)
11243    return N_("invalid conversion from %<__fpreg%>");
11244  if (TYPE_MODE (totype) == RFmode
11245      && TYPE_MODE (fromtype) != RFmode)
11246    return N_("invalid conversion to %<__fpreg%>");
11247  return NULL;
11248}
11249
11250/* Return the diagnostic message string if the unary operation OP is
11251   not permitted on TYPE, NULL otherwise.  */
11252static const char *
11253ia64_invalid_unary_op (int op, const_tree type)
11254{
11255  /* Reject operations on __fpreg other than unary + or &.  */
11256  if (TYPE_MODE (type) == RFmode
11257      && op != CONVERT_EXPR
11258      && op != ADDR_EXPR)
11259    return N_("invalid operation on %<__fpreg%>");
11260  return NULL;
11261}
11262
11263/* Return the diagnostic message string if the binary operation OP is
11264   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
11265static const char *
11266ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
11267{
11268  /* Reject operations on __fpreg.  */
11269  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
11270    return N_("invalid operation on %<__fpreg%>");
11271  return NULL;
11272}
11273
11274/* HP-UX version_id attribute.
11275   For object foo, if the version_id is set to 1234 put out an alias
11276   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
11277   other than an alias statement because it is an illegal symbol name.  */
11278
11279static tree
11280ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
11281                                 tree name ATTRIBUTE_UNUSED,
11282                                 tree args,
11283                                 int flags ATTRIBUTE_UNUSED,
11284                                 bool *no_add_attrs)
11285{
11286  tree arg = TREE_VALUE (args);
11287
11288  if (TREE_CODE (arg) != STRING_CST)
11289    {
11290      error("version attribute is not a string");
11291      *no_add_attrs = true;
11292      return NULL_TREE;
11293    }
11294  return NULL_TREE;
11295}
11296
11297/* Target hook for c_mode_for_suffix.  */
11298
11299static machine_mode
11300ia64_c_mode_for_suffix (char suffix)
11301{
11302  if (suffix == 'q')
11303    return TFmode;
11304  if (suffix == 'w')
11305    return XFmode;
11306
11307  return VOIDmode;
11308}
11309
11310static GTY(()) rtx ia64_dconst_0_5_rtx;
11311
11312rtx
11313ia64_dconst_0_5 (void)
11314{
11315  if (! ia64_dconst_0_5_rtx)
11316    {
11317      REAL_VALUE_TYPE rv;
11318      real_from_string (&rv, "0.5");
11319      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
11320    }
11321  return ia64_dconst_0_5_rtx;
11322}
11323
11324static GTY(()) rtx ia64_dconst_0_375_rtx;
11325
11326rtx
11327ia64_dconst_0_375 (void)
11328{
11329  if (! ia64_dconst_0_375_rtx)
11330    {
11331      REAL_VALUE_TYPE rv;
11332      real_from_string (&rv, "0.375");
11333      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
11334    }
11335  return ia64_dconst_0_375_rtx;
11336}
11337
11338static fixed_size_mode
11339ia64_get_reg_raw_mode (int regno)
11340{
11341  if (FR_REGNO_P (regno))
11342    return XFmode;
11343  return default_get_reg_raw_mode(regno);
11344}
11345
11346/* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
11347   anymore.  */
11348
11349bool
11350ia64_member_type_forces_blk (const_tree, machine_mode mode)
11351{
11352  return TARGET_HPUX && mode == TFmode;
11353}
11354
11355/* Always default to .text section until HP-UX linker is fixed.  */
11356
11357ATTRIBUTE_UNUSED static section *
11358ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
11359			    enum node_frequency freq ATTRIBUTE_UNUSED,
11360			    bool startup ATTRIBUTE_UNUSED,
11361			    bool exit ATTRIBUTE_UNUSED)
11362{
11363  return NULL;
11364}
11365
11366/* Construct (set target (vec_select op0 (parallel perm))) and
11367   return true if that's a valid instruction in the active ISA.  */
11368
11369static bool
11370expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
11371{
11372  rtx rperm[MAX_VECT_LEN], x;
11373  unsigned i;
11374
11375  for (i = 0; i < nelt; ++i)
11376    rperm[i] = GEN_INT (perm[i]);
11377
11378  x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
11379  x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
11380  x = gen_rtx_SET (target, x);
11381
11382  rtx_insn *insn = emit_insn (x);
11383  if (recog_memoized (insn) < 0)
11384    {
11385      remove_insn (insn);
11386      return false;
11387    }
11388  return true;
11389}
11390
11391/* Similar, but generate a vec_concat from op0 and op1 as well.  */
11392
11393static bool
11394expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
11395			const unsigned char *perm, unsigned nelt)
11396{
11397  machine_mode v2mode;
11398  rtx x;
11399
11400  if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
11401    return false;
11402  x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
11403  return expand_vselect (target, x, perm, nelt);
11404}
11405
11406/* Try to expand a no-op permutation.  */
11407
11408static bool
11409expand_vec_perm_identity (struct expand_vec_perm_d *d)
11410{
11411  unsigned i, nelt = d->nelt;
11412
11413  for (i = 0; i < nelt; ++i)
11414    if (d->perm[i] != i)
11415      return false;
11416
11417  if (!d->testing_p)
11418    emit_move_insn (d->target, d->op0);
11419
11420  return true;
11421}
11422
11423/* Try to expand D via a shrp instruction.  */
11424
11425static bool
11426expand_vec_perm_shrp (struct expand_vec_perm_d *d)
11427{
11428  unsigned i, nelt = d->nelt, shift, mask;
11429  rtx tmp, hi, lo;
11430
11431  /* ??? Don't force V2SFmode into the integer registers.  */
11432  if (d->vmode == V2SFmode)
11433    return false;
11434
11435  mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
11436
11437  shift = d->perm[0];
11438  if (BYTES_BIG_ENDIAN && shift > nelt)
11439    return false;
11440
11441  for (i = 1; i < nelt; ++i)
11442    if (d->perm[i] != ((shift + i) & mask))
11443      return false;
11444
11445  if (d->testing_p)
11446    return true;
11447
11448  hi = shift < nelt ? d->op1 : d->op0;
11449  lo = shift < nelt ? d->op0 : d->op1;
11450
11451  shift %= nelt;
11452
11453  shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
11454
11455  /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
11456  gcc_assert (IN_RANGE (shift, 1, 63));
11457
11458  /* Recall that big-endian elements are numbered starting at the top of
11459     the register.  Ideally we'd have a shift-left-pair.  But since we
11460     don't, convert to a shift the other direction.  */
11461  if (BYTES_BIG_ENDIAN)
11462    shift = 64 - shift;
11463
11464  tmp = gen_reg_rtx (DImode);
11465  hi = gen_lowpart (DImode, hi);
11466  lo = gen_lowpart (DImode, lo);
11467  emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
11468
11469  emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
11470  return true;
11471}
11472
11473/* Try to instantiate D in a single instruction.  */
11474
11475static bool
11476expand_vec_perm_1 (struct expand_vec_perm_d *d)
11477{
11478  unsigned i, nelt = d->nelt;
11479  unsigned char perm2[MAX_VECT_LEN];
11480
11481  /* Try single-operand selections.  */
11482  if (d->one_operand_p)
11483    {
11484      if (expand_vec_perm_identity (d))
11485	return true;
11486      if (expand_vselect (d->target, d->op0, d->perm, nelt))
11487	return true;
11488    }
11489
11490  /* Try two operand selections.  */
11491  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
11492    return true;
11493
11494  /* Recognize interleave style patterns with reversed operands.  */
11495  if (!d->one_operand_p)
11496    {
11497      for (i = 0; i < nelt; ++i)
11498	{
11499	  unsigned e = d->perm[i];
11500	  if (e >= nelt)
11501	    e -= nelt;
11502	  else
11503	    e += nelt;
11504	  perm2[i] = e;
11505	}
11506
11507      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
11508	return true;
11509    }
11510
11511  if (expand_vec_perm_shrp (d))
11512    return true;
11513
11514  /* ??? Look for deposit-like permutations where most of the result
11515     comes from one vector unchanged and the rest comes from a
11516     sequential hunk of the other vector.  */
11517
11518  return false;
11519}
11520
11521/* Pattern match broadcast permutations.  */
11522
11523static bool
11524expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
11525{
11526  unsigned i, elt, nelt = d->nelt;
11527  unsigned char perm2[2];
11528  rtx temp;
11529  bool ok;
11530
11531  if (!d->one_operand_p)
11532    return false;
11533
11534  elt = d->perm[0];
11535  for (i = 1; i < nelt; ++i)
11536    if (d->perm[i] != elt)
11537      return false;
11538
11539  switch (d->vmode)
11540    {
11541    case E_V2SImode:
11542    case E_V2SFmode:
11543      /* Implementable by interleave.  */
11544      perm2[0] = elt;
11545      perm2[1] = elt + 2;
11546      ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
11547      gcc_assert (ok);
11548      break;
11549
11550    case E_V8QImode:
11551      /* Implementable by extract + broadcast.  */
11552      if (BYTES_BIG_ENDIAN)
11553	elt = 7 - elt;
11554      elt *= BITS_PER_UNIT;
11555      temp = gen_reg_rtx (DImode);
11556      emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
11557			    GEN_INT (8), GEN_INT (elt)));
11558      emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
11559      break;
11560
11561    case E_V4HImode:
11562      /* Should have been matched directly by vec_select.  */
11563    default:
11564      gcc_unreachable ();
11565    }
11566
11567  return true;
11568}
11569
11570/* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
11571   two vector permutation into a single vector permutation by using
11572   an interleave operation to merge the vectors.  */
11573
11574static bool
11575expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
11576{
11577  struct expand_vec_perm_d dremap, dfinal;
11578  unsigned char remap[2 * MAX_VECT_LEN];
11579  unsigned contents, i, nelt, nelt2;
11580  unsigned h0, h1, h2, h3;
11581  rtx_insn *seq;
11582  bool ok;
11583
11584  if (d->one_operand_p)
11585    return false;
11586
11587  nelt = d->nelt;
11588  nelt2 = nelt / 2;
11589
11590  /* Examine from whence the elements come.  */
11591  contents = 0;
11592  for (i = 0; i < nelt; ++i)
11593    contents |= 1u << d->perm[i];
11594
11595  memset (remap, 0xff, sizeof (remap));
11596  dremap = *d;
11597
11598  h0 = (1u << nelt2) - 1;
11599  h1 = h0 << nelt2;
11600  h2 = h0 << nelt;
11601  h3 = h0 << (nelt + nelt2);
11602
11603  if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
11604    {
11605      for (i = 0; i < nelt; ++i)
11606	{
11607	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
11608	  remap[which] = i;
11609	  dremap.perm[i] = which;
11610	}
11611    }
11612  else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
11613    {
11614      for (i = 0; i < nelt; ++i)
11615	{
11616	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
11617	  remap[which] = i;
11618	  dremap.perm[i] = which;
11619	}
11620    }
11621  else if ((contents & 0x5555) == contents)	/* mix even elements */
11622    {
11623      for (i = 0; i < nelt; ++i)
11624	{
11625	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
11626	  remap[which] = i;
11627	  dremap.perm[i] = which;
11628	}
11629    }
11630  else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
11631    {
11632      for (i = 0; i < nelt; ++i)
11633	{
11634	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
11635	  remap[which] = i;
11636	  dremap.perm[i] = which;
11637	}
11638    }
11639  else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
11640    {
11641      unsigned shift = ctz_hwi (contents);
11642      for (i = 0; i < nelt; ++i)
11643	{
11644	  unsigned which = (i + shift) & (2 * nelt - 1);
11645	  remap[which] = i;
11646	  dremap.perm[i] = which;
11647	}
11648    }
11649  else
11650    return false;
11651
11652  /* Use the remapping array set up above to move the elements from their
11653     swizzled locations into their final destinations.  */
11654  dfinal = *d;
11655  for (i = 0; i < nelt; ++i)
11656    {
11657      unsigned e = remap[d->perm[i]];
11658      gcc_assert (e < nelt);
11659      dfinal.perm[i] = e;
11660    }
11661  if (d->testing_p)
11662    dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
11663  else
11664    dfinal.op0 = gen_reg_rtx (dfinal.vmode);
11665  dfinal.op1 = dfinal.op0;
11666  dfinal.one_operand_p = true;
11667  dremap.target = dfinal.op0;
11668
11669  /* Test if the final remap can be done with a single insn.  For V4HImode
11670     this *will* succeed.  For V8QImode or V2SImode it may not.  */
11671  start_sequence ();
11672  ok = expand_vec_perm_1 (&dfinal);
11673  seq = get_insns ();
11674  end_sequence ();
11675  if (!ok)
11676    return false;
11677  if (d->testing_p)
11678    return true;
11679
11680  ok = expand_vec_perm_1 (&dremap);
11681  gcc_assert (ok);
11682
11683  emit_insn (seq);
11684  return true;
11685}
11686
11687/* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
11688   constant permutation via two mux2 and a merge.  */
11689
11690static bool
11691expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
11692{
11693  unsigned char perm2[4];
11694  rtx rmask[4];
11695  unsigned i;
11696  rtx t0, t1, mask, x;
11697  bool ok;
11698
11699  if (d->vmode != V4HImode || d->one_operand_p)
11700    return false;
11701  if (d->testing_p)
11702    return true;
11703
11704  for (i = 0; i < 4; ++i)
11705    {
11706      perm2[i] = d->perm[i] & 3;
11707      rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
11708    }
11709  mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
11710  mask = force_reg (V4HImode, mask);
11711
11712  t0 = gen_reg_rtx (V4HImode);
11713  t1 = gen_reg_rtx (V4HImode);
11714
11715  ok = expand_vselect (t0, d->op0, perm2, 4);
11716  gcc_assert (ok);
11717  ok = expand_vselect (t1, d->op1, perm2, 4);
11718  gcc_assert (ok);
11719
11720  x = gen_rtx_AND (V4HImode, mask, t0);
11721  emit_insn (gen_rtx_SET (t0, x));
11722
11723  x = gen_rtx_NOT (V4HImode, mask);
11724  x = gen_rtx_AND (V4HImode, x, t1);
11725  emit_insn (gen_rtx_SET (t1, x));
11726
11727  x = gen_rtx_IOR (V4HImode, t0, t1);
11728  emit_insn (gen_rtx_SET (d->target, x));
11729
11730  return true;
11731}
11732
11733/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
11734   With all of the interface bits taken care of, perform the expansion
11735   in D and return true on success.  */
11736
11737static bool
11738ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
11739{
11740  if (expand_vec_perm_1 (d))
11741    return true;
11742  if (expand_vec_perm_broadcast (d))
11743    return true;
11744  if (expand_vec_perm_interleave_2 (d))
11745    return true;
11746  if (expand_vec_perm_v4hi_5 (d))
11747    return true;
11748  return false;
11749}
11750
11751/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
11752
11753static bool
11754ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
11755			       rtx op1, const vec_perm_indices &sel)
11756{
11757  struct expand_vec_perm_d d;
11758  unsigned char perm[MAX_VECT_LEN];
11759  unsigned int i, nelt, which;
11760
11761  d.target = target;
11762  d.op0 = op0;
11763  d.op1 = op1;
11764
11765  d.vmode = vmode;
11766  gcc_assert (VECTOR_MODE_P (d.vmode));
11767  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
11768  d.testing_p = !target;
11769
11770  gcc_assert (sel.length () == nelt);
11771  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
11772
11773  for (i = which = 0; i < nelt; ++i)
11774    {
11775      unsigned int ei = sel[i] & (2 * nelt - 1);
11776
11777      which |= (ei < nelt ? 1 : 2);
11778      d.perm[i] = ei;
11779      perm[i] = ei;
11780    }
11781
11782  switch (which)
11783    {
11784    default:
11785      gcc_unreachable();
11786
11787    case 3:
11788      if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
11789	{
11790	  d.one_operand_p = false;
11791	  break;
11792	}
11793
11794      /* The elements of PERM do not suggest that only the first operand
11795	 is used, but both operands are identical.  Allow easier matching
11796	 of the permutation by folding the permutation into the single
11797	 input vector.  */
11798      for (i = 0; i < nelt; ++i)
11799	if (d.perm[i] >= nelt)
11800	  d.perm[i] -= nelt;
11801      /* FALLTHRU */
11802
11803    case 1:
11804      d.op1 = d.op0;
11805      d.one_operand_p = true;
11806      break;
11807
11808    case 2:
11809      for (i = 0; i < nelt; ++i)
11810        d.perm[i] -= nelt;
11811      d.op0 = d.op1;
11812      d.one_operand_p = true;
11813      break;
11814    }
11815
11816  if (d.testing_p)
11817    {
11818      /* We have to go through the motions and see if we can
11819	 figure out how to generate the requested permutation.  */
11820      d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
11821      d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
11822      if (!d.one_operand_p)
11823	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
11824
11825      start_sequence ();
11826      bool ret = ia64_expand_vec_perm_const_1 (&d);
11827      end_sequence ();
11828
11829      return ret;
11830    }
11831
11832  if (ia64_expand_vec_perm_const_1 (&d))
11833    return true;
11834
11835  /* If the mask says both arguments are needed, but they are the same,
11836     the above tried to expand with one_operand_p true.  If that didn't
11837     work, retry with one_operand_p false, as that's what we used in _ok.  */
11838  if (which == 3 && d.one_operand_p)
11839    {
11840      memcpy (d.perm, perm, sizeof (perm));
11841      d.one_operand_p = false;
11842      return ia64_expand_vec_perm_const_1 (&d);
11843    }
11844
11845  return false;
11846}
11847
11848void
11849ia64_expand_vec_setv2sf (rtx operands[3])
11850{
11851  struct expand_vec_perm_d d;
11852  unsigned int which;
11853  bool ok;
11854
11855  d.target = operands[0];
11856  d.op0 = operands[0];
11857  d.op1 = gen_reg_rtx (V2SFmode);
11858  d.vmode = V2SFmode;
11859  d.nelt = 2;
11860  d.one_operand_p = false;
11861  d.testing_p = false;
11862
11863  which = INTVAL (operands[2]);
11864  gcc_assert (which <= 1);
11865  d.perm[0] = 1 - which;
11866  d.perm[1] = which + 2;
11867
11868  emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
11869
11870  ok = ia64_expand_vec_perm_const_1 (&d);
11871  gcc_assert (ok);
11872}
11873
11874void
11875ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
11876{
11877  struct expand_vec_perm_d d;
11878  machine_mode vmode = GET_MODE (target);
11879  unsigned int i, nelt = GET_MODE_NUNITS (vmode);
11880  bool ok;
11881
11882  d.target = target;
11883  d.op0 = op0;
11884  d.op1 = op1;
11885  d.vmode = vmode;
11886  d.nelt = nelt;
11887  d.one_operand_p = false;
11888  d.testing_p = false;
11889
11890  for (i = 0; i < nelt; ++i)
11891    d.perm[i] = i * 2 + odd;
11892
11893  ok = ia64_expand_vec_perm_const_1 (&d);
11894  gcc_assert (ok);
11895}
11896
11897/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
11898
11899   In BR regs, we can't change the DImode at all.
11900   In FP regs, we can't change FP values to integer values and vice versa,
11901   but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
11902
11903static bool
11904ia64_can_change_mode_class (machine_mode from, machine_mode to,
11905			    reg_class_t rclass)
11906{
11907  if (reg_classes_intersect_p (rclass, BR_REGS))
11908    return from == to;
11909  if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
11910    return !reg_classes_intersect_p (rclass, FR_REGS);
11911  return true;
11912}
11913
11914#include "gt-ia64.h"
11915