1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
3   2009, 2010
4   Free Software Foundation, Inc.
5   Contributed by James E. Wilson <wilson@cygnus.com> and
6		  David Mosberger <davidm@hpl.hp.com>.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3, or (at your option)
13any later version.
14
15GCC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3.  If not see
22<http://www.gnu.org/licenses/>.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "libfuncs.h"
46#include "toplev.h"
47#include "sched-int.h"
48#include "timevar.h"
49#include "target.h"
50#include "target-def.h"
51#include "tm_p.h"
52#include "hashtab.h"
53#include "langhooks.h"
54#include "cfglayout.h"
55#include "gimple.h"
56#include "intl.h"
57#include "df.h"
58#include "debug.h"
59#include "params.h"
60#include "dbgcnt.h"
61#include "tm-constrs.h"
62#include "sel-sched.h"
63
64/* This is used for communication between ASM_OUTPUT_LABEL and
65   ASM_OUTPUT_LABELREF.  */
66int ia64_asm_output_label = 0;
67
68/* Register names for ia64_expand_prologue.  */
69static const char * const ia64_reg_numbers[96] =
70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79  "r104","r105","r106","r107","r108","r109","r110","r111",
80  "r112","r113","r114","r115","r116","r117","r118","r119",
81  "r120","r121","r122","r123","r124","r125","r126","r127"};
82
83/* ??? These strings could be shared with REGISTER_NAMES.  */
84static const char * const ia64_input_reg_names[8] =
85{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
86
87/* ??? These strings could be shared with REGISTER_NAMES.  */
88static const char * const ia64_local_reg_names[80] =
89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
100/* ??? These strings could be shared with REGISTER_NAMES.  */
101static const char * const ia64_output_reg_names[8] =
102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
104/* Which cpu are we scheduling for.  */
105enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106
107/* Determines whether we run our final scheduling pass or not.  We always
108   avoid the normal second scheduling pass.  */
109static int ia64_flag_schedule_insns2;
110
111/* Determines whether we run variable tracking in machine dependent
112   reorganization.  */
113static int ia64_flag_var_tracking;
114
115/* Variables which are this size or smaller are put in the sdata/sbss
116   sections.  */
117
118unsigned int ia64_section_threshold;
119
120/* The following variable is used by the DFA insn scheduler.  The value is
121   TRUE if we do insn bundling instead of insn scheduling.  */
122int bundling_p = 0;
123
124enum ia64_frame_regs
125{
126   reg_fp,
127   reg_save_b0,
128   reg_save_pr,
129   reg_save_ar_pfs,
130   reg_save_ar_unat,
131   reg_save_ar_lc,
132   reg_save_gp,
133   number_of_ia64_frame_regs
134};
135
136/* Structure to be filled in by ia64_compute_frame_size with register
137   save masks and offsets for the current function.  */
138
139struct ia64_frame_info
140{
141  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
142				   the caller's scratch area.  */
143  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
144  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
145  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
146  HARD_REG_SET mask;		/* mask of saved registers.  */
147  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
148				   registers or long-term scratches.  */
149  int n_spilled;		/* number of spilled registers.  */
150  int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
151  int n_input_regs;		/* number of input registers used.  */
152  int n_local_regs;		/* number of local registers used.  */
153  int n_output_regs;		/* number of output registers used.  */
154  int n_rotate_regs;		/* number of rotating registers used.  */
155
156  char need_regstk;		/* true if a .regstk directive needed.  */
157  char initialized;		/* true if the data is finalized.  */
158};
159
160/* Current frame information calculated by ia64_compute_frame_size.  */
161static struct ia64_frame_info current_frame_info;
162/* The actual registers that are emitted.  */
163static int emitted_frame_related_regs[number_of_ia64_frame_regs];
164
165static int ia64_first_cycle_multipass_dfa_lookahead (void);
166static void ia64_dependencies_evaluation_hook (rtx, rtx);
167static void ia64_init_dfa_pre_cycle_insn (void);
168static rtx ia64_dfa_pre_cycle_insn (void);
169static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
170static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx);
171static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
172static void ia64_h_i_d_extended (void);
173static void * ia64_alloc_sched_context (void);
174static void ia64_init_sched_context (void *, bool);
175static void ia64_set_sched_context (void *);
176static void ia64_clear_sched_context (void *);
177static void ia64_free_sched_context (void *);
178static int ia64_mode_to_int (enum machine_mode);
179static void ia64_set_sched_flags (spec_info_t);
180static ds_t ia64_get_insn_spec_ds (rtx);
181static ds_t ia64_get_insn_checked_ds (rtx);
182static bool ia64_skip_rtx_p (const_rtx);
183static int ia64_speculate_insn (rtx, ds_t, rtx *);
184static bool ia64_needs_block_p (int);
185static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
186static int ia64_spec_check_p (rtx);
187static int ia64_spec_check_src_p (rtx);
188static rtx gen_tls_get_addr (void);
189static rtx gen_thread_pointer (void);
190static int find_gr_spill (enum ia64_frame_regs, int);
191static int next_scratch_gr_reg (void);
192static void mark_reg_gr_used_mask (rtx, void *);
193static void ia64_compute_frame_size (HOST_WIDE_INT);
194static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
195static void finish_spill_pointers (void);
196static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
197static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
198static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
199static rtx gen_movdi_x (rtx, rtx, rtx);
200static rtx gen_fr_spill_x (rtx, rtx, rtx);
201static rtx gen_fr_restore_x (rtx, rtx, rtx);
202
203static bool ia64_can_eliminate (const int, const int);
204static enum machine_mode hfa_element_mode (const_tree, bool);
205static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
206					 tree, int *, int);
207static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
208				   tree, bool);
209static bool ia64_function_ok_for_sibcall (tree, tree);
210static bool ia64_return_in_memory (const_tree, const_tree);
211static bool ia64_rtx_costs (rtx, int, int, int *, bool);
212static int ia64_unspec_may_trap_p (const_rtx, unsigned);
213static void fix_range (const char *);
214static bool ia64_handle_option (size_t, const char *, int);
215static struct machine_function * ia64_init_machine_status (void);
216static void emit_insn_group_barriers (FILE *);
217static void emit_all_insn_group_barriers (FILE *);
218static void final_emit_insn_group_barriers (FILE *);
219static void emit_predicate_relation_info (void);
220static void ia64_reorg (void);
221static bool ia64_in_small_data_p (const_tree);
222static void process_epilogue (FILE *, rtx, bool, bool);
223static int process_set (FILE *, rtx, rtx, bool, bool);
224
225static bool ia64_assemble_integer (rtx, unsigned int, int);
226static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
227static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
228static void ia64_output_function_end_prologue (FILE *);
229
230static int ia64_issue_rate (void);
231static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t);
232static void ia64_sched_init (FILE *, int, int);
233static void ia64_sched_init_global (FILE *, int, int);
234static void ia64_sched_finish_global (FILE *, int);
235static void ia64_sched_finish (FILE *, int);
236static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
237static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
238static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
239static int ia64_variable_issue (FILE *, int, rtx, int);
240
241static struct bundle_state *get_free_bundle_state (void);
242static void free_bundle_state (struct bundle_state *);
243static void initiate_bundle_states (void);
244static void finish_bundle_states (void);
245static unsigned bundle_state_hash (const void *);
246static int bundle_state_eq_p (const void *, const void *);
247static int insert_bundle_state (struct bundle_state *);
248static void initiate_bundle_state_table (void);
249static void finish_bundle_state_table (void);
250static int try_issue_nops (struct bundle_state *, int);
251static int try_issue_insn (struct bundle_state *, rtx);
252static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
253static int get_max_pos (state_t);
254static int get_template (state_t, int);
255
256static rtx get_next_important_insn (rtx, rtx);
257static bool important_for_bundling_p (rtx);
258static void bundling (FILE *, int, rtx, rtx);
259
260static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
261				  HOST_WIDE_INT, tree);
262static void ia64_file_start (void);
263static void ia64_globalize_decl_name (FILE *, tree);
264
265static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
266static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
267static section *ia64_select_rtx_section (enum machine_mode, rtx,
268					 unsigned HOST_WIDE_INT);
269static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
270     ATTRIBUTE_UNUSED;
271static unsigned int ia64_section_type_flags (tree, const char *, int);
272static void ia64_init_libfuncs (void)
273     ATTRIBUTE_UNUSED;
274static void ia64_hpux_init_libfuncs (void)
275     ATTRIBUTE_UNUSED;
276static void ia64_sysv4_init_libfuncs (void)
277     ATTRIBUTE_UNUSED;
278static void ia64_vms_init_libfuncs (void)
279     ATTRIBUTE_UNUSED;
280static void ia64_soft_fp_init_libfuncs (void)
281     ATTRIBUTE_UNUSED;
282static bool ia64_vms_valid_pointer_mode (enum machine_mode mode)
283     ATTRIBUTE_UNUSED;
284static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
285     ATTRIBUTE_UNUSED;
286
287static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
288static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
289static void ia64_encode_section_info (tree, rtx, int);
290static rtx ia64_struct_value_rtx (tree, int);
291static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
292static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
293static bool ia64_vector_mode_supported_p (enum machine_mode mode);
294static bool ia64_cannot_force_const_mem (rtx);
295static const char *ia64_mangle_type (const_tree);
296static const char *ia64_invalid_conversion (const_tree, const_tree);
297static const char *ia64_invalid_unary_op (int, const_tree);
298static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
299static enum machine_mode ia64_c_mode_for_suffix (char);
300static enum machine_mode ia64_promote_function_mode (const_tree,
301						     enum machine_mode,
302						     int *,
303						     const_tree,
304						     int);
305static void ia64_trampoline_init (rtx, tree, rtx);
306static void ia64_override_options_after_change (void);
307
308/* Table of valid machine attributes.  */
309static const struct attribute_spec ia64_attribute_table[] =
310{
311  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
312  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
313  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
314#if TARGET_ABI_OPEN_VMS
315  { "common_object",   1, 1, true, false, false, ia64_vms_common_object_attribute},
316#endif
317  { "version_id",      1, 1, true, false, false,
318    ia64_handle_version_id_attribute },
319  { NULL,	       0, 0, false, false, false, NULL }
320};
321
322/* Initialize the GCC target structure.  */
323#undef TARGET_ATTRIBUTE_TABLE
324#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
325
326#undef TARGET_INIT_BUILTINS
327#define TARGET_INIT_BUILTINS ia64_init_builtins
328
329#undef TARGET_EXPAND_BUILTIN
330#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
331
332#undef TARGET_ASM_BYTE_OP
333#define TARGET_ASM_BYTE_OP "\tdata1\t"
334#undef TARGET_ASM_ALIGNED_HI_OP
335#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
336#undef TARGET_ASM_ALIGNED_SI_OP
337#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
338#undef TARGET_ASM_ALIGNED_DI_OP
339#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
340#undef TARGET_ASM_UNALIGNED_HI_OP
341#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
342#undef TARGET_ASM_UNALIGNED_SI_OP
343#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
344#undef TARGET_ASM_UNALIGNED_DI_OP
345#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
346#undef TARGET_ASM_INTEGER
347#define TARGET_ASM_INTEGER ia64_assemble_integer
348
349#undef TARGET_ASM_FUNCTION_PROLOGUE
350#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
351#undef TARGET_ASM_FUNCTION_END_PROLOGUE
352#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
353#undef TARGET_ASM_FUNCTION_EPILOGUE
354#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
355
356#undef TARGET_IN_SMALL_DATA_P
357#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
358
359#undef TARGET_SCHED_ADJUST_COST_2
360#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
361#undef TARGET_SCHED_ISSUE_RATE
362#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
363#undef TARGET_SCHED_VARIABLE_ISSUE
364#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
365#undef TARGET_SCHED_INIT
366#define TARGET_SCHED_INIT ia64_sched_init
367#undef TARGET_SCHED_FINISH
368#define TARGET_SCHED_FINISH ia64_sched_finish
369#undef TARGET_SCHED_INIT_GLOBAL
370#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
371#undef TARGET_SCHED_FINISH_GLOBAL
372#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
373#undef TARGET_SCHED_REORDER
374#define TARGET_SCHED_REORDER ia64_sched_reorder
375#undef TARGET_SCHED_REORDER2
376#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
377
378#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
379#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
380
381#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
382#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
383
384#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
385#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
386#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
387#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
388
389#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
390#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
391  ia64_first_cycle_multipass_dfa_lookahead_guard
392
393#undef TARGET_SCHED_DFA_NEW_CYCLE
394#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
395
396#undef TARGET_SCHED_H_I_D_EXTENDED
397#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
398
399#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
400#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
401
402#undef TARGET_SCHED_INIT_SCHED_CONTEXT
403#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
404
405#undef TARGET_SCHED_SET_SCHED_CONTEXT
406#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
407
408#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
409#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
410
411#undef TARGET_SCHED_FREE_SCHED_CONTEXT
412#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
413
414#undef TARGET_SCHED_SET_SCHED_FLAGS
415#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
416
417#undef TARGET_SCHED_GET_INSN_SPEC_DS
418#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
419
420#undef TARGET_SCHED_GET_INSN_CHECKED_DS
421#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
422
423#undef TARGET_SCHED_SPECULATE_INSN
424#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
425
426#undef TARGET_SCHED_NEEDS_BLOCK_P
427#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
428
429#undef TARGET_SCHED_GEN_SPEC_CHECK
430#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
431
432#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
433#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
434  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
435
436#undef TARGET_SCHED_SKIP_RTX_P
437#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
438
439#undef TARGET_FUNCTION_OK_FOR_SIBCALL
440#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
441#undef TARGET_ARG_PARTIAL_BYTES
442#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
443
444#undef TARGET_ASM_OUTPUT_MI_THUNK
445#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
446#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
447#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
448
449#undef TARGET_ASM_FILE_START
450#define TARGET_ASM_FILE_START ia64_file_start
451
452#undef TARGET_ASM_GLOBALIZE_DECL_NAME
453#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
454
455#undef TARGET_RTX_COSTS
456#define TARGET_RTX_COSTS ia64_rtx_costs
457#undef TARGET_ADDRESS_COST
458#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
459
460#undef TARGET_UNSPEC_MAY_TRAP_P
461#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
462
463#undef TARGET_MACHINE_DEPENDENT_REORG
464#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
465
466#undef TARGET_ENCODE_SECTION_INFO
467#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
468
469#undef  TARGET_SECTION_TYPE_FLAGS
470#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
471
472#ifdef HAVE_AS_TLS
473#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
474#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
475#endif
476
477#undef TARGET_PROMOTE_FUNCTION_MODE
478#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode
479
480/* ??? Investigate.  */
481#if 0
482#undef TARGET_PROMOTE_PROTOTYPES
483#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
484#endif
485
486#undef TARGET_STRUCT_VALUE_RTX
487#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
488#undef TARGET_RETURN_IN_MEMORY
489#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
490#undef TARGET_SETUP_INCOMING_VARARGS
491#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
492#undef TARGET_STRICT_ARGUMENT_NAMING
493#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
494#undef TARGET_MUST_PASS_IN_STACK
495#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
496
497#undef TARGET_GIMPLIFY_VA_ARG_EXPR
498#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
499
500#undef TARGET_UNWIND_EMIT
501#define TARGET_UNWIND_EMIT process_for_unwind_directive
502
503#undef TARGET_SCALAR_MODE_SUPPORTED_P
504#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
505#undef TARGET_VECTOR_MODE_SUPPORTED_P
506#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
507
508/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
509   in an order different from the specified program order.  */
510#undef TARGET_RELAXED_ORDERING
511#define TARGET_RELAXED_ORDERING true
512
513#undef TARGET_DEFAULT_TARGET_FLAGS
514#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
515#undef TARGET_HANDLE_OPTION
516#define TARGET_HANDLE_OPTION ia64_handle_option
517
518#undef TARGET_CANNOT_FORCE_CONST_MEM
519#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
520
521#undef TARGET_MANGLE_TYPE
522#define TARGET_MANGLE_TYPE ia64_mangle_type
523
524#undef TARGET_INVALID_CONVERSION
525#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
526#undef TARGET_INVALID_UNARY_OP
527#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
528#undef TARGET_INVALID_BINARY_OP
529#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
530
531#undef TARGET_C_MODE_FOR_SUFFIX
532#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
533
534#undef TARGET_CAN_ELIMINATE
535#define TARGET_CAN_ELIMINATE ia64_can_eliminate
536
537#undef TARGET_TRAMPOLINE_INIT
538#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
539
540#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
541#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
542
543struct gcc_target targetm = TARGET_INITIALIZER;
544
545typedef enum
546  {
547    ADDR_AREA_NORMAL,	/* normal address area */
548    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
549  }
550ia64_addr_area;
551
552static GTY(()) tree small_ident1;
553static GTY(()) tree small_ident2;
554
555static void
556init_idents (void)
557{
558  if (small_ident1 == 0)
559    {
560      small_ident1 = get_identifier ("small");
561      small_ident2 = get_identifier ("__small__");
562    }
563}
564
565/* Retrieve the address area that has been chosen for the given decl.  */
566
567static ia64_addr_area
568ia64_get_addr_area (tree decl)
569{
570  tree model_attr;
571
572  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
573  if (model_attr)
574    {
575      tree id;
576
577      init_idents ();
578      id = TREE_VALUE (TREE_VALUE (model_attr));
579      if (id == small_ident1 || id == small_ident2)
580	return ADDR_AREA_SMALL;
581    }
582  return ADDR_AREA_NORMAL;
583}
584
585static tree
586ia64_handle_model_attribute (tree *node, tree name, tree args,
587			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
588{
589  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
590  ia64_addr_area area;
591  tree arg, decl = *node;
592
593  init_idents ();
594  arg = TREE_VALUE (args);
595  if (arg == small_ident1 || arg == small_ident2)
596    {
597      addr_area = ADDR_AREA_SMALL;
598    }
599  else
600    {
601      warning (OPT_Wattributes, "invalid argument of %qE attribute",
602	       name);
603      *no_add_attrs = true;
604    }
605
606  switch (TREE_CODE (decl))
607    {
608    case VAR_DECL:
609      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
610	   == FUNCTION_DECL)
611	  && !TREE_STATIC (decl))
612	{
613	  error_at (DECL_SOURCE_LOCATION (decl),
614		    "an address area attribute cannot be specified for "
615		    "local variables");
616	  *no_add_attrs = true;
617	}
618      area = ia64_get_addr_area (decl);
619      if (area != ADDR_AREA_NORMAL && addr_area != area)
620	{
621	  error ("address area of %q+D conflicts with previous "
622		 "declaration", decl);
623	  *no_add_attrs = true;
624	}
625      break;
626
627    case FUNCTION_DECL:
628      error_at (DECL_SOURCE_LOCATION (decl),
629		"address area attribute cannot be specified for "
630		"functions");
631      *no_add_attrs = true;
632      break;
633
634    default:
635      warning (OPT_Wattributes, "%qE attribute ignored",
636	       name);
637      *no_add_attrs = true;
638      break;
639    }
640
641  return NULL_TREE;
642}
643
644/* The section must have global and overlaid attributes.  */
645#define SECTION_VMS_OVERLAY SECTION_MACH_DEP
646
647/* Part of the low level implementation of DEC Ada pragma Common_Object which
648   enables the shared use of variables stored in overlaid linker areas
649   corresponding to the use of Fortran COMMON.  */
650
651static tree
652ia64_vms_common_object_attribute (tree *node, tree name, tree args,
653				  int flags ATTRIBUTE_UNUSED,
654				  bool *no_add_attrs)
655{
656    tree decl = *node;
657    tree id, val;
658    if (! DECL_P (decl))
659      abort ();
660
661    DECL_COMMON (decl) = 1;
662    id = TREE_VALUE (args);
663    if (TREE_CODE (id) == IDENTIFIER_NODE)
664      val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id));
665    else if (TREE_CODE (id) == STRING_CST)
666      val = id;
667    else
668      {
669	warning (OPT_Wattributes,
670		 "%qE attribute requires a string constant argument", name);
671	*no_add_attrs = true;
672	return NULL_TREE;
673      }
674    DECL_SECTION_NAME (decl) = val;
675    return NULL_TREE;
676}
677
678/* Part of the low level implementation of DEC Ada pragma Common_Object.  */
679
680void
681ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
682				     unsigned HOST_WIDE_INT size,
683				     unsigned int align)
684{
685  tree attr = DECL_ATTRIBUTES (decl);
686
687  /* As common_object attribute set DECL_SECTION_NAME check it before
688     looking up the attribute.  */
689  if (DECL_SECTION_NAME (decl) && attr)
690    attr = lookup_attribute ("common_object", attr);
691  else
692    attr = NULL_TREE;
693
694  if (!attr)
695    {
696      /*  Code from elfos.h.  */
697      fprintf (file, "%s", COMMON_ASM_OP);
698      assemble_name (file, name);
699      fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
700	       size, align / BITS_PER_UNIT);
701    }
702  else
703    {
704      ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
705      ASM_OUTPUT_LABEL (file, name);
706      ASM_OUTPUT_SKIP (file, size ? size : 1);
707    }
708}
709
710/* Definition of TARGET_ASM_NAMED_SECTION for VMS.  */
711
712void
713ia64_vms_elf_asm_named_section (const char *name, unsigned int flags,
714				tree decl)
715{
716  if (!(flags & SECTION_VMS_OVERLAY))
717    {
718      default_elf_asm_named_section (name, flags, decl);
719      return;
720    }
721  if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE))
722    abort ();
723
724  if (flags & SECTION_DECLARED)
725    {
726      fprintf (asm_out_file, "\t.section\t%s\n", name);
727      return;
728    }
729
730  fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name);
731}
732
733static void
734ia64_encode_addr_area (tree decl, rtx symbol)
735{
736  int flags;
737
738  flags = SYMBOL_REF_FLAGS (symbol);
739  switch (ia64_get_addr_area (decl))
740    {
741    case ADDR_AREA_NORMAL: break;
742    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
743    default: gcc_unreachable ();
744    }
745  SYMBOL_REF_FLAGS (symbol) = flags;
746}
747
748static void
749ia64_encode_section_info (tree decl, rtx rtl, int first)
750{
751  default_encode_section_info (decl, rtl, first);
752
753  /* Careful not to prod global register variables.  */
754  if (TREE_CODE (decl) == VAR_DECL
755      && GET_CODE (DECL_RTL (decl)) == MEM
756      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
757      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
758    ia64_encode_addr_area (decl, XEXP (rtl, 0));
759}
760
761/* Return 1 if the operands of a move are ok.  */
762
763int
764ia64_move_ok (rtx dst, rtx src)
765{
766  /* If we're under init_recog_no_volatile, we'll not be able to use
767     memory_operand.  So check the code directly and don't worry about
768     the validity of the underlying address, which should have been
769     checked elsewhere anyway.  */
770  if (GET_CODE (dst) != MEM)
771    return 1;
772  if (GET_CODE (src) == MEM)
773    return 0;
774  if (register_operand (src, VOIDmode))
775    return 1;
776
777  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
778  if (INTEGRAL_MODE_P (GET_MODE (dst)))
779    return src == const0_rtx;
780  else
781    return satisfies_constraint_G (src);
782}
783
784/* Return 1 if the operands are ok for a floating point load pair.  */
785
786int
787ia64_load_pair_ok (rtx dst, rtx src)
788{
789  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
790    return 0;
791  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
792    return 0;
793  switch (GET_CODE (XEXP (src, 0)))
794    {
795    case REG:
796    case POST_INC:
797      break;
798    case POST_DEC:
799      return 0;
800    case POST_MODIFY:
801      {
802	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
803
804	if (GET_CODE (adjust) != CONST_INT
805	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
806	  return 0;
807      }
808      break;
809    default:
810      abort ();
811    }
812  return 1;
813}
814
815int
816addp4_optimize_ok (rtx op1, rtx op2)
817{
818  return (basereg_operand (op1, GET_MODE(op1)) !=
819	  basereg_operand (op2, GET_MODE(op2)));
820}
821
822/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
823   Return the length of the field, or <= 0 on failure.  */
824
825int
826ia64_depz_field_mask (rtx rop, rtx rshift)
827{
828  unsigned HOST_WIDE_INT op = INTVAL (rop);
829  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
830
831  /* Get rid of the zero bits we're shifting in.  */
832  op >>= shift;
833
834  /* We must now have a solid block of 1's at bit 0.  */
835  return exact_log2 (op + 1);
836}
837
838/* Return the TLS model to use for ADDR.  */
839
840static enum tls_model
841tls_symbolic_operand_type (rtx addr)
842{
843  enum tls_model tls_kind = TLS_MODEL_NONE;
844
845  if (GET_CODE (addr) == CONST)
846    {
847      if (GET_CODE (XEXP (addr, 0)) == PLUS
848	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
849        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
850    }
851  else if (GET_CODE (addr) == SYMBOL_REF)
852    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
853
854  return tls_kind;
855}
856
857/* Return true if X is a constant that is valid for some immediate
858   field in an instruction.  */
859
860bool
861ia64_legitimate_constant_p (rtx x)
862{
863  switch (GET_CODE (x))
864    {
865    case CONST_INT:
866    case LABEL_REF:
867      return true;
868
869    case CONST_DOUBLE:
870      if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode
871	  || GET_MODE (x) == DFmode)
872	return true;
873      return satisfies_constraint_G (x);
874
875    case CONST:
876    case SYMBOL_REF:
877      /* ??? Short term workaround for PR 28490.  We must make the code here
878	 match the code in ia64_expand_move and move_operand, even though they
879	 are both technically wrong.  */
880      if (tls_symbolic_operand_type (x) == 0)
881	{
882	  HOST_WIDE_INT addend = 0;
883	  rtx op = x;
884
885	  if (GET_CODE (op) == CONST
886	      && GET_CODE (XEXP (op, 0)) == PLUS
887	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
888	    {
889	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
890	      op = XEXP (XEXP (op, 0), 0);
891	    }
892
893          if (any_offset_symbol_operand (op, GET_MODE (op))
894              || function_operand (op, GET_MODE (op)))
895            return true;
896	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
897	    return (addend & 0x3fff) == 0;
898	  return false;
899	}
900      return false;
901
902    case CONST_VECTOR:
903      {
904	enum machine_mode mode = GET_MODE (x);
905
906	if (mode == V2SFmode)
907	  return satisfies_constraint_Y (x);
908
909	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
910		&& GET_MODE_SIZE (mode) <= 8);
911      }
912
913    default:
914      return false;
915    }
916}
917
918/* Don't allow TLS addresses to get spilled to memory.  */
919
920static bool
921ia64_cannot_force_const_mem (rtx x)
922{
923  if (GET_MODE (x) == RFmode)
924    return true;
925  return tls_symbolic_operand_type (x) != 0;
926}
927
928/* Expand a symbolic constant load.  */
929
930bool
931ia64_expand_load_address (rtx dest, rtx src)
932{
933  gcc_assert (GET_CODE (dest) == REG);
934
935  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
936     having to pointer-extend the value afterward.  Other forms of address
937     computation below are also more natural to compute as 64-bit quantities.
938     If we've been given an SImode destination register, change it.  */
939  if (GET_MODE (dest) != Pmode)
940    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
941			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
942
943  if (TARGET_NO_PIC)
944    return false;
945  if (small_addr_symbolic_operand (src, VOIDmode))
946    return false;
947
948  if (TARGET_AUTO_PIC)
949    emit_insn (gen_load_gprel64 (dest, src));
950  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
951    emit_insn (gen_load_fptr (dest, src));
952  else if (sdata_symbolic_operand (src, VOIDmode))
953    emit_insn (gen_load_gprel (dest, src));
954  else
955    {
956      HOST_WIDE_INT addend = 0;
957      rtx tmp;
958
959      /* We did split constant offsets in ia64_expand_move, and we did try
960	 to keep them split in move_operand, but we also allowed reload to
961	 rematerialize arbitrary constants rather than spill the value to
962	 the stack and reload it.  So we have to be prepared here to split
963	 them apart again.  */
964      if (GET_CODE (src) == CONST)
965	{
966	  HOST_WIDE_INT hi, lo;
967
968	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
969	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
970	  hi = hi - lo;
971
972	  if (lo != 0)
973	    {
974	      addend = lo;
975	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
976	    }
977	}
978
979      tmp = gen_rtx_HIGH (Pmode, src);
980      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
981      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
982
983      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
984      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
985
986      if (addend)
987	{
988	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
989	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
990	}
991    }
992
993  return true;
994}
995
996static GTY(()) rtx gen_tls_tga;
997static rtx
998gen_tls_get_addr (void)
999{
1000  if (!gen_tls_tga)
1001    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
1002  return gen_tls_tga;
1003}
1004
1005static GTY(()) rtx thread_pointer_rtx;
1006static rtx
1007gen_thread_pointer (void)
1008{
1009  if (!thread_pointer_rtx)
1010    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
1011  return thread_pointer_rtx;
1012}
1013
1014static rtx
1015ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
1016			 rtx orig_op1, HOST_WIDE_INT addend)
1017{
1018  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
1019  rtx orig_op0 = op0;
1020  HOST_WIDE_INT addend_lo, addend_hi;
1021
1022  switch (tls_kind)
1023    {
1024    case TLS_MODEL_GLOBAL_DYNAMIC:
1025      start_sequence ();
1026
1027      tga_op1 = gen_reg_rtx (Pmode);
1028      emit_insn (gen_load_dtpmod (tga_op1, op1));
1029
1030      tga_op2 = gen_reg_rtx (Pmode);
1031      emit_insn (gen_load_dtprel (tga_op2, op1));
1032
1033      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1034					 LCT_CONST, Pmode, 2, tga_op1,
1035					 Pmode, tga_op2, Pmode);
1036
1037      insns = get_insns ();
1038      end_sequence ();
1039
1040      if (GET_MODE (op0) != Pmode)
1041	op0 = tga_ret;
1042      emit_libcall_block (insns, op0, tga_ret, op1);
1043      break;
1044
1045    case TLS_MODEL_LOCAL_DYNAMIC:
1046      /* ??? This isn't the completely proper way to do local-dynamic
1047	 If the call to __tls_get_addr is used only by a single symbol,
1048	 then we should (somehow) move the dtprel to the second arg
1049	 to avoid the extra add.  */
1050      start_sequence ();
1051
1052      tga_op1 = gen_reg_rtx (Pmode);
1053      emit_insn (gen_load_dtpmod (tga_op1, op1));
1054
1055      tga_op2 = const0_rtx;
1056
1057      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
1058					 LCT_CONST, Pmode, 2, tga_op1,
1059					 Pmode, tga_op2, Pmode);
1060
1061      insns = get_insns ();
1062      end_sequence ();
1063
1064      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1065				UNSPEC_LD_BASE);
1066      tmp = gen_reg_rtx (Pmode);
1067      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1068
1069      if (!register_operand (op0, Pmode))
1070	op0 = gen_reg_rtx (Pmode);
1071      if (TARGET_TLS64)
1072	{
1073	  emit_insn (gen_load_dtprel (op0, op1));
1074	  emit_insn (gen_adddi3 (op0, tmp, op0));
1075	}
1076      else
1077	emit_insn (gen_add_dtprel (op0, op1, tmp));
1078      break;
1079
1080    case TLS_MODEL_INITIAL_EXEC:
1081      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1082      addend_hi = addend - addend_lo;
1083
1084      op1 = plus_constant (op1, addend_hi);
1085      addend = addend_lo;
1086
1087      tmp = gen_reg_rtx (Pmode);
1088      emit_insn (gen_load_tprel (tmp, op1));
1089
1090      if (!register_operand (op0, Pmode))
1091	op0 = gen_reg_rtx (Pmode);
1092      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1093      break;
1094
1095    case TLS_MODEL_LOCAL_EXEC:
1096      if (!register_operand (op0, Pmode))
1097	op0 = gen_reg_rtx (Pmode);
1098
1099      op1 = orig_op1;
1100      addend = 0;
1101      if (TARGET_TLS64)
1102	{
1103	  emit_insn (gen_load_tprel (op0, op1));
1104	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1105	}
1106      else
1107	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1108      break;
1109
1110    default:
1111      gcc_unreachable ();
1112    }
1113
1114  if (addend)
1115    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1116			       orig_op0, 1, OPTAB_DIRECT);
1117  if (orig_op0 == op0)
1118    return NULL_RTX;
1119  if (GET_MODE (orig_op0) == Pmode)
1120    return op0;
1121  return gen_lowpart (GET_MODE (orig_op0), op0);
1122}
1123
1124rtx
1125ia64_expand_move (rtx op0, rtx op1)
1126{
1127  enum machine_mode mode = GET_MODE (op0);
1128
1129  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1130    op1 = force_reg (mode, op1);
1131
1132  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1133    {
1134      HOST_WIDE_INT addend = 0;
1135      enum tls_model tls_kind;
1136      rtx sym = op1;
1137
1138      if (GET_CODE (op1) == CONST
1139	  && GET_CODE (XEXP (op1, 0)) == PLUS
1140	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1141	{
1142	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1143	  sym = XEXP (XEXP (op1, 0), 0);
1144	}
1145
1146      tls_kind = tls_symbolic_operand_type (sym);
1147      if (tls_kind)
1148	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1149
1150      if (any_offset_symbol_operand (sym, mode))
1151	addend = 0;
1152      else if (aligned_offset_symbol_operand (sym, mode))
1153	{
1154	  HOST_WIDE_INT addend_lo, addend_hi;
1155
1156	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1157	  addend_hi = addend - addend_lo;
1158
1159	  if (addend_lo != 0)
1160	    {
1161	      op1 = plus_constant (sym, addend_hi);
1162	      addend = addend_lo;
1163	    }
1164	  else
1165	    addend = 0;
1166	}
1167      else
1168	op1 = sym;
1169
1170      if (reload_completed)
1171	{
1172	  /* We really should have taken care of this offset earlier.  */
1173	  gcc_assert (addend == 0);
1174	  if (ia64_expand_load_address (op0, op1))
1175	    return NULL_RTX;
1176	}
1177
1178      if (addend)
1179	{
1180	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
1181
1182	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1183
1184	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1185				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1186	  if (op0 == op1)
1187	    return NULL_RTX;
1188	}
1189    }
1190
1191  return op1;
1192}
1193
1194/* Split a move from OP1 to OP0 conditional on COND.  */
1195
1196void
1197ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1198{
1199  rtx insn, first = get_last_insn ();
1200
1201  emit_move_insn (op0, op1);
1202
1203  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1204    if (INSN_P (insn))
1205      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1206					  PATTERN (insn));
1207}
1208
1209/* Split a post-reload TImode or TFmode reference into two DImode
1210   components.  This is made extra difficult by the fact that we do
1211   not get any scratch registers to work with, because reload cannot
1212   be prevented from giving us a scratch that overlaps the register
1213   pair involved.  So instead, when addressing memory, we tweak the
1214   pointer register up and back down with POST_INCs.  Or up and not
1215   back down when we can get away with it.
1216
1217   REVERSED is true when the loads must be done in reversed order
1218   (high word first) for correctness.  DEAD is true when the pointer
1219   dies with the second insn we generate and therefore the second
1220   address must not carry a postmodify.
1221
1222   May return an insn which is to be emitted after the moves.  */
1223
1224static rtx
1225ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1226{
1227  rtx fixup = 0;
1228
1229  switch (GET_CODE (in))
1230    {
1231    case REG:
1232      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1233      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1234      break;
1235
1236    case CONST_INT:
1237    case CONST_DOUBLE:
1238      /* Cannot occur reversed.  */
1239      gcc_assert (!reversed);
1240
1241      if (GET_MODE (in) != TFmode)
1242	split_double (in, &out[0], &out[1]);
1243      else
1244	/* split_double does not understand how to split a TFmode
1245	   quantity into a pair of DImode constants.  */
1246	{
1247	  REAL_VALUE_TYPE r;
1248	  unsigned HOST_WIDE_INT p[2];
1249	  long l[4];  /* TFmode is 128 bits */
1250
1251	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1252	  real_to_target (l, &r, TFmode);
1253
1254	  if (FLOAT_WORDS_BIG_ENDIAN)
1255	    {
1256	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1257	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1258	    }
1259	  else
1260	    {
1261	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1262	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1263	    }
1264	  out[0] = GEN_INT (p[0]);
1265	  out[1] = GEN_INT (p[1]);
1266	}
1267      break;
1268
1269    case MEM:
1270      {
1271	rtx base = XEXP (in, 0);
1272	rtx offset;
1273
1274	switch (GET_CODE (base))
1275	  {
1276	  case REG:
1277	    if (!reversed)
1278	      {
1279		out[0] = adjust_automodify_address
1280		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1281		out[1] = adjust_automodify_address
1282		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1283	      }
1284	    else
1285	      {
1286		/* Reversal requires a pre-increment, which can only
1287		   be done as a separate insn.  */
1288		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1289		out[0] = adjust_automodify_address
1290		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1291		out[1] = adjust_address (in, DImode, 0);
1292	      }
1293	    break;
1294
1295	  case POST_INC:
1296	    gcc_assert (!reversed && !dead);
1297
1298	    /* Just do the increment in two steps.  */
1299	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1300	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1301	    break;
1302
1303	  case POST_DEC:
1304	    gcc_assert (!reversed && !dead);
1305
1306	    /* Add 8, subtract 24.  */
1307	    base = XEXP (base, 0);
1308	    out[0] = adjust_automodify_address
1309	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1310	    out[1] = adjust_automodify_address
1311	      (in, DImode,
1312	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1313	       8);
1314	    break;
1315
1316	  case POST_MODIFY:
1317	    gcc_assert (!reversed && !dead);
1318
1319	    /* Extract and adjust the modification.  This case is
1320	       trickier than the others, because we might have an
1321	       index register, or we might have a combined offset that
1322	       doesn't fit a signed 9-bit displacement field.  We can
1323	       assume the incoming expression is already legitimate.  */
1324	    offset = XEXP (base, 1);
1325	    base = XEXP (base, 0);
1326
1327	    out[0] = adjust_automodify_address
1328	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1329
1330	    if (GET_CODE (XEXP (offset, 1)) == REG)
1331	      {
1332		/* Can't adjust the postmodify to match.  Emit the
1333		   original, then a separate addition insn.  */
1334		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1335		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1336	      }
1337	    else
1338	      {
1339		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1340		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1341		  {
1342		    /* Again the postmodify cannot be made to match,
1343		       but in this case it's more efficient to get rid
1344		       of the postmodify entirely and fix up with an
1345		       add insn.  */
1346		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1347		    fixup = gen_adddi3
1348		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1349		  }
1350		else
1351		  {
1352		    /* Combined offset still fits in the displacement field.
1353		       (We cannot overflow it at the high end.)  */
1354		    out[1] = adjust_automodify_address
1355		      (in, DImode, gen_rtx_POST_MODIFY
1356		       (Pmode, base, gen_rtx_PLUS
1357			(Pmode, base,
1358			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1359		       8);
1360		  }
1361	      }
1362	    break;
1363
1364	  default:
1365	    gcc_unreachable ();
1366	  }
1367	break;
1368      }
1369
1370    default:
1371      gcc_unreachable ();
1372    }
1373
1374  return fixup;
1375}
1376
1377/* Split a TImode or TFmode move instruction after reload.
1378   This is used by *movtf_internal and *movti_internal.  */
1379void
1380ia64_split_tmode_move (rtx operands[])
1381{
1382  rtx in[2], out[2], insn;
1383  rtx fixup[2];
1384  bool dead = false;
1385  bool reversed = false;
1386
1387  /* It is possible for reload to decide to overwrite a pointer with
1388     the value it points to.  In that case we have to do the loads in
1389     the appropriate order so that the pointer is not destroyed too
1390     early.  Also we must not generate a postmodify for that second
1391     load, or rws_access_regno will die.  */
1392  if (GET_CODE (operands[1]) == MEM
1393      && reg_overlap_mentioned_p (operands[0], operands[1]))
1394    {
1395      rtx base = XEXP (operands[1], 0);
1396      while (GET_CODE (base) != REG)
1397	base = XEXP (base, 0);
1398
1399      if (REGNO (base) == REGNO (operands[0]))
1400	reversed = true;
1401      dead = true;
1402    }
1403  /* Another reason to do the moves in reversed order is if the first
1404     element of the target register pair is also the second element of
1405     the source register pair.  */
1406  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1407      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1408    reversed = true;
1409
1410  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1411  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1412
1413#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1414  if (GET_CODE (EXP) == MEM						\
1415      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1416	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1417	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1418    add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
1419
1420  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1421  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1422  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1423
1424  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1425  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1426  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1427
1428  if (fixup[0])
1429    emit_insn (fixup[0]);
1430  if (fixup[1])
1431    emit_insn (fixup[1]);
1432
1433#undef MAYBE_ADD_REG_INC_NOTE
1434}
1435
1436/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1437   through memory plus an extra GR scratch register.  Except that you can
1438   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1439   SECONDARY_RELOAD_CLASS, but not both.
1440
1441   We got into problems in the first place by allowing a construct like
1442   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1443   This solution attempts to prevent this situation from occurring.  When
1444   we see something like the above, we spill the inner register to memory.  */
1445
1446static rtx
1447spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1448{
1449  if (GET_CODE (in) == SUBREG
1450      && GET_MODE (SUBREG_REG (in)) == TImode
1451      && GET_CODE (SUBREG_REG (in)) == REG)
1452    {
1453      rtx memt = assign_stack_temp (TImode, 16, 0);
1454      emit_move_insn (memt, SUBREG_REG (in));
1455      return adjust_address (memt, mode, 0);
1456    }
1457  else if (force && GET_CODE (in) == REG)
1458    {
1459      rtx memx = assign_stack_temp (mode, 16, 0);
1460      emit_move_insn (memx, in);
1461      return memx;
1462    }
1463  else
1464    return in;
1465}
1466
1467/* Expand the movxf or movrf pattern (MODE says which) with the given
1468   OPERANDS, returning true if the pattern should then invoke
1469   DONE.  */
1470
1471bool
1472ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1473{
1474  rtx op0 = operands[0];
1475
1476  if (GET_CODE (op0) == SUBREG)
1477    op0 = SUBREG_REG (op0);
1478
1479  /* We must support XFmode loads into general registers for stdarg/vararg,
1480     unprototyped calls, and a rare case where a long double is passed as
1481     an argument after a float HFA fills the FP registers.  We split them into
1482     DImode loads for convenience.  We also need to support XFmode stores
1483     for the last case.  This case does not happen for stdarg/vararg routines,
1484     because we do a block store to memory of unnamed arguments.  */
1485
1486  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1487    {
1488      rtx out[2];
1489
1490      /* We're hoping to transform everything that deals with XFmode
1491	 quantities and GR registers early in the compiler.  */
1492      gcc_assert (can_create_pseudo_p ());
1493
1494      /* Struct to register can just use TImode instead.  */
1495      if ((GET_CODE (operands[1]) == SUBREG
1496	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1497	  || (GET_CODE (operands[1]) == REG
1498	      && GR_REGNO_P (REGNO (operands[1]))))
1499	{
1500	  rtx op1 = operands[1];
1501
1502	  if (GET_CODE (op1) == SUBREG)
1503	    op1 = SUBREG_REG (op1);
1504	  else
1505	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1506
1507	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1508	  return true;
1509	}
1510
1511      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1512	{
1513	  /* Don't word-swap when reading in the constant.  */
1514	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1515			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1516					   0, mode));
1517	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1518			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1519					   0, mode));
1520	  return true;
1521	}
1522
1523      /* If the quantity is in a register not known to be GR, spill it.  */
1524      if (register_operand (operands[1], mode))
1525	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1526
1527      gcc_assert (GET_CODE (operands[1]) == MEM);
1528
1529      /* Don't word-swap when reading in the value.  */
1530      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1531      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1532
1533      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1534      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1535      return true;
1536    }
1537
1538  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1539    {
1540      /* We're hoping to transform everything that deals with XFmode
1541	 quantities and GR registers early in the compiler.  */
1542      gcc_assert (can_create_pseudo_p ());
1543
1544      /* Op0 can't be a GR_REG here, as that case is handled above.
1545	 If op0 is a register, then we spill op1, so that we now have a
1546	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1547	 to force the spill.  */
1548      if (register_operand (operands[0], mode))
1549	{
1550	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1551	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1552	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1553	}
1554
1555      else
1556	{
1557	  rtx in[2];
1558
1559	  gcc_assert (GET_CODE (operands[0]) == MEM);
1560
1561	  /* Don't word-swap when writing out the value.  */
1562	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1563	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1564
1565	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1566	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1567	  return true;
1568	}
1569    }
1570
1571  if (!reload_in_progress && !reload_completed)
1572    {
1573      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1574
1575      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1576	{
1577	  rtx memt, memx, in = operands[1];
1578	  if (CONSTANT_P (in))
1579	    in = validize_mem (force_const_mem (mode, in));
1580	  if (GET_CODE (in) == MEM)
1581	    memt = adjust_address (in, TImode, 0);
1582	  else
1583	    {
1584	      memt = assign_stack_temp (TImode, 16, 0);
1585	      memx = adjust_address (memt, mode, 0);
1586	      emit_move_insn (memx, in);
1587	    }
1588	  emit_move_insn (op0, memt);
1589	  return true;
1590	}
1591
1592      if (!ia64_move_ok (operands[0], operands[1]))
1593	operands[1] = force_reg (mode, operands[1]);
1594    }
1595
1596  return false;
1597}
1598
1599/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
1600   with the expression that holds the compare result (in VOIDmode).  */
1601
1602static GTY(()) rtx cmptf_libfunc;
1603
1604void
1605ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
1606{
1607  enum rtx_code code = GET_CODE (*expr);
1608  rtx cmp;
1609
1610  /* If we have a BImode input, then we already have a compare result, and
1611     do not need to emit another comparison.  */
1612  if (GET_MODE (*op0) == BImode)
1613    {
1614      gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
1615      cmp = *op0;
1616    }
1617  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1618     magic number as its third argument, that indicates what to do.
1619     The return value is an integer to be compared against zero.  */
1620  else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
1621    {
1622      enum qfcmp_magic {
1623	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1624	QCMP_UNORD = 2,
1625	QCMP_EQ = 4,
1626	QCMP_LT = 8,
1627	QCMP_GT = 16
1628      };
1629      int magic;
1630      enum rtx_code ncode;
1631      rtx ret, insns;
1632
1633      gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
1634      switch (code)
1635	{
1636	  /* 1 = equal, 0 = not equal.  Equality operators do
1637	     not raise FP_INVALID when given an SNaN operand.  */
1638	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1639	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1640	  /* isunordered() from C99.  */
1641	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1642	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1643	  /* Relational operators raise FP_INVALID when given
1644	     an SNaN operand.  */
1645	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1646	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1647	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1648	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1649	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1650	     Expanders for buneq etc. weuld have to be added to ia64.md
1651	     for this to be useful.  */
1652	default: gcc_unreachable ();
1653	}
1654
1655      start_sequence ();
1656
1657      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1658				     *op0, TFmode, *op1, TFmode,
1659				     GEN_INT (magic), DImode);
1660      cmp = gen_reg_rtx (BImode);
1661      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1662			      gen_rtx_fmt_ee (ncode, BImode,
1663					      ret, const0_rtx)));
1664
1665      insns = get_insns ();
1666      end_sequence ();
1667
1668      emit_libcall_block (insns, cmp, cmp,
1669			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
1670      code = NE;
1671    }
1672  else
1673    {
1674      cmp = gen_reg_rtx (BImode);
1675      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1676			      gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
1677      code = NE;
1678    }
1679
1680  *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
1681  *op0 = cmp;
1682  *op1 = const0_rtx;
1683}
1684
1685/* Generate an integral vector comparison.  Return true if the condition has
1686   been reversed, and so the sense of the comparison should be inverted.  */
1687
1688static bool
1689ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1690			    rtx dest, rtx op0, rtx op1)
1691{
1692  bool negate = false;
1693  rtx x;
1694
1695  /* Canonicalize the comparison to EQ, GT, GTU.  */
1696  switch (code)
1697    {
1698    case EQ:
1699    case GT:
1700    case GTU:
1701      break;
1702
1703    case NE:
1704    case LE:
1705    case LEU:
1706      code = reverse_condition (code);
1707      negate = true;
1708      break;
1709
1710    case GE:
1711    case GEU:
1712      code = reverse_condition (code);
1713      negate = true;
1714      /* FALLTHRU */
1715
1716    case LT:
1717    case LTU:
1718      code = swap_condition (code);
1719      x = op0, op0 = op1, op1 = x;
1720      break;
1721
1722    default:
1723      gcc_unreachable ();
1724    }
1725
1726  /* Unsigned parallel compare is not supported by the hardware.  Play some
1727     tricks to turn this into a signed comparison against 0.  */
1728  if (code == GTU)
1729    {
1730      switch (mode)
1731	{
1732	case V2SImode:
1733	  {
1734	    rtx t1, t2, mask;
1735
1736	    /* Subtract (-(INT MAX) - 1) from both operands to make
1737	       them signed.  */
1738	    mask = GEN_INT (0x80000000);
1739	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1740	    mask = force_reg (mode, mask);
1741	    t1 = gen_reg_rtx (mode);
1742	    emit_insn (gen_subv2si3 (t1, op0, mask));
1743	    t2 = gen_reg_rtx (mode);
1744	    emit_insn (gen_subv2si3 (t2, op1, mask));
1745	    op0 = t1;
1746	    op1 = t2;
1747	    code = GT;
1748	  }
1749	  break;
1750
1751	case V8QImode:
1752	case V4HImode:
1753	  /* Perform a parallel unsigned saturating subtraction.  */
1754	  x = gen_reg_rtx (mode);
1755	  emit_insn (gen_rtx_SET (VOIDmode, x,
1756				  gen_rtx_US_MINUS (mode, op0, op1)));
1757
1758	  code = EQ;
1759	  op0 = x;
1760	  op1 = CONST0_RTX (mode);
1761	  negate = !negate;
1762	  break;
1763
1764	default:
1765	  gcc_unreachable ();
1766	}
1767    }
1768
1769  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1770  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1771
1772  return negate;
1773}
1774
1775/* Emit an integral vector conditional move.  */
1776
1777void
1778ia64_expand_vecint_cmov (rtx operands[])
1779{
1780  enum machine_mode mode = GET_MODE (operands[0]);
1781  enum rtx_code code = GET_CODE (operands[3]);
1782  bool negate;
1783  rtx cmp, x, ot, of;
1784
1785  cmp = gen_reg_rtx (mode);
1786  negate = ia64_expand_vecint_compare (code, mode, cmp,
1787				       operands[4], operands[5]);
1788
1789  ot = operands[1+negate];
1790  of = operands[2-negate];
1791
1792  if (ot == CONST0_RTX (mode))
1793    {
1794      if (of == CONST0_RTX (mode))
1795	{
1796	  emit_move_insn (operands[0], ot);
1797	  return;
1798	}
1799
1800      x = gen_rtx_NOT (mode, cmp);
1801      x = gen_rtx_AND (mode, x, of);
1802      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1803    }
1804  else if (of == CONST0_RTX (mode))
1805    {
1806      x = gen_rtx_AND (mode, cmp, ot);
1807      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1808    }
1809  else
1810    {
1811      rtx t, f;
1812
1813      t = gen_reg_rtx (mode);
1814      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1815      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1816
1817      f = gen_reg_rtx (mode);
1818      x = gen_rtx_NOT (mode, cmp);
1819      x = gen_rtx_AND (mode, x, operands[2-negate]);
1820      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1821
1822      x = gen_rtx_IOR (mode, t, f);
1823      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1824    }
1825}
1826
1827/* Emit an integral vector min or max operation.  Return true if all done.  */
1828
1829bool
1830ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1831			   rtx operands[])
1832{
1833  rtx xops[6];
1834
1835  /* These four combinations are supported directly.  */
1836  if (mode == V8QImode && (code == UMIN || code == UMAX))
1837    return false;
1838  if (mode == V4HImode && (code == SMIN || code == SMAX))
1839    return false;
1840
1841  /* This combination can be implemented with only saturating subtraction.  */
1842  if (mode == V4HImode && code == UMAX)
1843    {
1844      rtx x, tmp = gen_reg_rtx (mode);
1845
1846      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1847      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1848
1849      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1850      return true;
1851    }
1852
1853  /* Everything else implemented via vector comparisons.  */
1854  xops[0] = operands[0];
1855  xops[4] = xops[1] = operands[1];
1856  xops[5] = xops[2] = operands[2];
1857
1858  switch (code)
1859    {
1860    case UMIN:
1861      code = LTU;
1862      break;
1863    case UMAX:
1864      code = GTU;
1865      break;
1866    case SMIN:
1867      code = LT;
1868      break;
1869    case SMAX:
1870      code = GT;
1871      break;
1872    default:
1873      gcc_unreachable ();
1874    }
1875  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1876
1877  ia64_expand_vecint_cmov (xops);
1878  return true;
1879}
1880
1881/* Emit an integral vector widening sum operations.  */
1882
1883void
1884ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1885{
1886  rtx l, h, x, s;
1887  enum machine_mode wmode, mode;
1888  rtx (*unpack_l) (rtx, rtx, rtx);
1889  rtx (*unpack_h) (rtx, rtx, rtx);
1890  rtx (*plus) (rtx, rtx, rtx);
1891
1892  wmode = GET_MODE (operands[0]);
1893  mode = GET_MODE (operands[1]);
1894
1895  switch (mode)
1896    {
1897    case V8QImode:
1898      unpack_l = gen_unpack1_l;
1899      unpack_h = gen_unpack1_h;
1900      plus = gen_addv4hi3;
1901      break;
1902    case V4HImode:
1903      unpack_l = gen_unpack2_l;
1904      unpack_h = gen_unpack2_h;
1905      plus = gen_addv2si3;
1906      break;
1907    default:
1908      gcc_unreachable ();
1909    }
1910
1911  /* Fill in x with the sign extension of each element in op1.  */
1912  if (unsignedp)
1913    x = CONST0_RTX (mode);
1914  else
1915    {
1916      bool neg;
1917
1918      x = gen_reg_rtx (mode);
1919
1920      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1921					CONST0_RTX (mode));
1922      gcc_assert (!neg);
1923    }
1924
1925  l = gen_reg_rtx (wmode);
1926  h = gen_reg_rtx (wmode);
1927  s = gen_reg_rtx (wmode);
1928
1929  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1930  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1931  emit_insn (plus (s, l, operands[2]));
1932  emit_insn (plus (operands[0], h, s));
1933}
1934
1935/* Emit a signed or unsigned V8QI dot product operation.  */
1936
1937void
1938ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1939{
1940  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1941
1942  /* Fill in x1 and x2 with the sign extension of each element.  */
1943  if (unsignedp)
1944    x1 = x2 = CONST0_RTX (V8QImode);
1945  else
1946    {
1947      bool neg;
1948
1949      x1 = gen_reg_rtx (V8QImode);
1950      x2 = gen_reg_rtx (V8QImode);
1951
1952      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1953					CONST0_RTX (V8QImode));
1954      gcc_assert (!neg);
1955      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1956					CONST0_RTX (V8QImode));
1957      gcc_assert (!neg);
1958    }
1959
1960  l1 = gen_reg_rtx (V4HImode);
1961  l2 = gen_reg_rtx (V4HImode);
1962  h1 = gen_reg_rtx (V4HImode);
1963  h2 = gen_reg_rtx (V4HImode);
1964
1965  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1966  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1967  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1968  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1969
1970  p1 = gen_reg_rtx (V2SImode);
1971  p2 = gen_reg_rtx (V2SImode);
1972  p3 = gen_reg_rtx (V2SImode);
1973  p4 = gen_reg_rtx (V2SImode);
1974  emit_insn (gen_pmpy2_r (p1, l1, l2));
1975  emit_insn (gen_pmpy2_l (p2, l1, l2));
1976  emit_insn (gen_pmpy2_r (p3, h1, h2));
1977  emit_insn (gen_pmpy2_l (p4, h1, h2));
1978
1979  s1 = gen_reg_rtx (V2SImode);
1980  s2 = gen_reg_rtx (V2SImode);
1981  s3 = gen_reg_rtx (V2SImode);
1982  emit_insn (gen_addv2si3 (s1, p1, p2));
1983  emit_insn (gen_addv2si3 (s2, p3, p4));
1984  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1985  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1986}
1987
1988/* Emit the appropriate sequence for a call.  */
1989
1990void
1991ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1992		  int sibcall_p)
1993{
1994  rtx insn, b0;
1995
1996  addr = XEXP (addr, 0);
1997  addr = convert_memory_address (DImode, addr);
1998  b0 = gen_rtx_REG (DImode, R_BR (0));
1999
2000  /* ??? Should do this for functions known to bind local too.  */
2001  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
2002    {
2003      if (sibcall_p)
2004	insn = gen_sibcall_nogp (addr);
2005      else if (! retval)
2006	insn = gen_call_nogp (addr, b0);
2007      else
2008	insn = gen_call_value_nogp (retval, addr, b0);
2009      insn = emit_call_insn (insn);
2010    }
2011  else
2012    {
2013      if (sibcall_p)
2014	insn = gen_sibcall_gp (addr);
2015      else if (! retval)
2016	insn = gen_call_gp (addr, b0);
2017      else
2018	insn = gen_call_value_gp (retval, addr, b0);
2019      insn = emit_call_insn (insn);
2020
2021      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
2022    }
2023
2024  if (sibcall_p)
2025    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
2026
2027  if (TARGET_ABI_OPEN_VMS)
2028    use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
2029	     gen_rtx_REG (DImode, GR_REG (25)));
2030}
2031
2032static void
2033reg_emitted (enum ia64_frame_regs r)
2034{
2035  if (emitted_frame_related_regs[r] == 0)
2036    emitted_frame_related_regs[r] = current_frame_info.r[r];
2037  else
2038    gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
2039}
2040
2041static int
2042get_reg (enum ia64_frame_regs r)
2043{
2044  reg_emitted (r);
2045  return current_frame_info.r[r];
2046}
2047
2048static bool
2049is_emitted (int regno)
2050{
2051  unsigned int r;
2052
2053  for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
2054    if (emitted_frame_related_regs[r] == regno)
2055      return true;
2056  return false;
2057}
2058
2059void
2060ia64_reload_gp (void)
2061{
2062  rtx tmp;
2063
2064  if (current_frame_info.r[reg_save_gp])
2065    {
2066      tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
2067    }
2068  else
2069    {
2070      HOST_WIDE_INT offset;
2071      rtx offset_r;
2072
2073      offset = (current_frame_info.spill_cfa_off
2074	        + current_frame_info.spill_size);
2075      if (frame_pointer_needed)
2076        {
2077          tmp = hard_frame_pointer_rtx;
2078          offset = -offset;
2079        }
2080      else
2081        {
2082          tmp = stack_pointer_rtx;
2083          offset = current_frame_info.total_size - offset;
2084        }
2085
2086      offset_r = GEN_INT (offset);
2087      if (satisfies_constraint_I (offset_r))
2088        emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
2089      else
2090        {
2091          emit_move_insn (pic_offset_table_rtx, offset_r);
2092          emit_insn (gen_adddi3 (pic_offset_table_rtx,
2093			         pic_offset_table_rtx, tmp));
2094        }
2095
2096      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2097    }
2098
2099  emit_move_insn (pic_offset_table_rtx, tmp);
2100}
2101
2102void
2103ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2104		 rtx scratch_b, int noreturn_p, int sibcall_p)
2105{
2106  rtx insn;
2107  bool is_desc = false;
2108
2109  /* If we find we're calling through a register, then we're actually
2110     calling through a descriptor, so load up the values.  */
2111  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2112    {
2113      rtx tmp;
2114      bool addr_dead_p;
2115
2116      /* ??? We are currently constrained to *not* use peep2, because
2117	 we can legitimately change the global lifetime of the GP
2118	 (in the form of killing where previously live).  This is
2119	 because a call through a descriptor doesn't use the previous
2120	 value of the GP, while a direct call does, and we do not
2121	 commit to either form until the split here.
2122
2123	 That said, this means that we lack precise life info for
2124	 whether ADDR is dead after this call.  This is not terribly
2125	 important, since we can fix things up essentially for free
2126	 with the POST_DEC below, but it's nice to not use it when we
2127	 can immediately tell it's not necessary.  */
2128      addr_dead_p = ((noreturn_p || sibcall_p
2129		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2130					    REGNO (addr)))
2131		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2132
2133      /* Load the code address into scratch_b.  */
2134      tmp = gen_rtx_POST_INC (Pmode, addr);
2135      tmp = gen_rtx_MEM (Pmode, tmp);
2136      emit_move_insn (scratch_r, tmp);
2137      emit_move_insn (scratch_b, scratch_r);
2138
2139      /* Load the GP address.  If ADDR is not dead here, then we must
2140	 revert the change made above via the POST_INCREMENT.  */
2141      if (!addr_dead_p)
2142	tmp = gen_rtx_POST_DEC (Pmode, addr);
2143      else
2144	tmp = addr;
2145      tmp = gen_rtx_MEM (Pmode, tmp);
2146      emit_move_insn (pic_offset_table_rtx, tmp);
2147
2148      is_desc = true;
2149      addr = scratch_b;
2150    }
2151
2152  if (sibcall_p)
2153    insn = gen_sibcall_nogp (addr);
2154  else if (retval)
2155    insn = gen_call_value_nogp (retval, addr, retaddr);
2156  else
2157    insn = gen_call_nogp (addr, retaddr);
2158  emit_call_insn (insn);
2159
2160  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2161    ia64_reload_gp ();
2162}
2163
2164/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2165
2166   This differs from the generic code in that we know about the zero-extending
2167   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2168   also know that ld.acq+cmpxchg.rel equals a full barrier.
2169
2170   The loop we want to generate looks like
2171
2172	cmp_reg = mem;
2173      label:
2174        old_reg = cmp_reg;
2175	new_reg = cmp_reg op val;
2176	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2177	if (cmp_reg != old_reg)
2178	  goto label;
2179
2180   Note that we only do the plain load from memory once.  Subsequent
2181   iterations use the value loaded by the compare-and-swap pattern.  */
2182
2183void
2184ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2185		       rtx old_dst, rtx new_dst)
2186{
2187  enum machine_mode mode = GET_MODE (mem);
2188  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2189  enum insn_code icode;
2190
2191  /* Special case for using fetchadd.  */
2192  if ((mode == SImode || mode == DImode)
2193      && (code == PLUS || code == MINUS)
2194      && fetchadd_operand (val, mode))
2195    {
2196      if (code == MINUS)
2197	val = GEN_INT (-INTVAL (val));
2198
2199      if (!old_dst)
2200        old_dst = gen_reg_rtx (mode);
2201
2202      emit_insn (gen_memory_barrier ());
2203
2204      if (mode == SImode)
2205	icode = CODE_FOR_fetchadd_acq_si;
2206      else
2207	icode = CODE_FOR_fetchadd_acq_di;
2208      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2209
2210      if (new_dst)
2211	{
2212	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2213					 true, OPTAB_WIDEN);
2214	  if (new_reg != new_dst)
2215	    emit_move_insn (new_dst, new_reg);
2216	}
2217      return;
2218    }
2219
2220  /* Because of the volatile mem read, we get an ld.acq, which is the
2221     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2222  gcc_assert (MEM_VOLATILE_P (mem));
2223
2224  old_reg = gen_reg_rtx (DImode);
2225  cmp_reg = gen_reg_rtx (DImode);
2226  label = gen_label_rtx ();
2227
2228  if (mode != DImode)
2229    {
2230      val = simplify_gen_subreg (DImode, val, mode, 0);
2231      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2232    }
2233  else
2234    emit_move_insn (cmp_reg, mem);
2235
2236  emit_label (label);
2237
2238  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2239  emit_move_insn (old_reg, cmp_reg);
2240  emit_move_insn (ar_ccv, cmp_reg);
2241
2242  if (old_dst)
2243    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2244
2245  new_reg = cmp_reg;
2246  if (code == NOT)
2247    {
2248      new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
2249				     true, OPTAB_DIRECT);
2250      new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
2251    }
2252  else
2253    new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2254				   true, OPTAB_DIRECT);
2255
2256  if (mode != DImode)
2257    new_reg = gen_lowpart (mode, new_reg);
2258  if (new_dst)
2259    emit_move_insn (new_dst, new_reg);
2260
2261  switch (mode)
2262    {
2263    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2264    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2265    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2266    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2267    default:
2268      gcc_unreachable ();
2269    }
2270
2271  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2272
2273  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2274}
2275
2276/* Begin the assembly file.  */
2277
2278static void
2279ia64_file_start (void)
2280{
2281  /* Variable tracking should be run after all optimizations which change order
2282     of insns.  It also needs a valid CFG.  This can't be done in
2283     ia64_override_options, because flag_var_tracking is finalized after
2284     that.  */
2285  ia64_flag_var_tracking = flag_var_tracking;
2286  flag_var_tracking = 0;
2287
2288  default_file_start ();
2289  emit_safe_across_calls ();
2290}
2291
2292void
2293emit_safe_across_calls (void)
2294{
2295  unsigned int rs, re;
2296  int out_state;
2297
2298  rs = 1;
2299  out_state = 0;
2300  while (1)
2301    {
2302      while (rs < 64 && call_used_regs[PR_REG (rs)])
2303	rs++;
2304      if (rs >= 64)
2305	break;
2306      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2307	continue;
2308      if (out_state == 0)
2309	{
2310	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2311	  out_state = 1;
2312	}
2313      else
2314	fputc (',', asm_out_file);
2315      if (re == rs + 1)
2316	fprintf (asm_out_file, "p%u", rs);
2317      else
2318	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2319      rs = re + 1;
2320    }
2321  if (out_state)
2322    fputc ('\n', asm_out_file);
2323}
2324
2325/* Globalize a declaration.  */
2326
2327static void
2328ia64_globalize_decl_name (FILE * stream, tree decl)
2329{
2330  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2331  tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
2332  if (version_attr)
2333    {
2334      tree v = TREE_VALUE (TREE_VALUE (version_attr));
2335      const char *p = TREE_STRING_POINTER (v);
2336      fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
2337    }
2338  targetm.asm_out.globalize_label (stream, name);
2339  if (TREE_CODE (decl) == FUNCTION_DECL)
2340    ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
2341}
2342
2343/* Helper function for ia64_compute_frame_size: find an appropriate general
2344   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2345   bits in GR0 to GR31 that have already been allocated by this routine.
2346   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2347
2348static int
2349find_gr_spill (enum ia64_frame_regs r, int try_locals)
2350{
2351  int regno;
2352
2353  if (emitted_frame_related_regs[r] != 0)
2354    {
2355      regno = emitted_frame_related_regs[r];
2356      if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
2357	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
2358        current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2359      else if (current_function_is_leaf
2360               && regno >= GR_REG (1) && regno <= GR_REG (31))
2361        current_frame_info.gr_used_mask |= 1 << regno;
2362
2363      return regno;
2364    }
2365
2366  /* If this is a leaf function, first try an otherwise unused
2367     call-clobbered register.  */
2368  if (current_function_is_leaf)
2369    {
2370      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2371	if (! df_regs_ever_live_p (regno)
2372	    && call_used_regs[regno]
2373	    && ! fixed_regs[regno]
2374	    && ! global_regs[regno]
2375	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
2376            && ! is_emitted (regno))
2377	  {
2378	    current_frame_info.gr_used_mask |= 1 << regno;
2379	    return regno;
2380	  }
2381    }
2382
2383  if (try_locals)
2384    {
2385      regno = current_frame_info.n_local_regs;
2386      /* If there is a frame pointer, then we can't use loc79, because
2387	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2388	 reg_name switching code in ia64_expand_prologue.  */
2389      while (regno < (80 - frame_pointer_needed))
2390	if (! is_emitted (LOC_REG (regno++)))
2391	  {
2392	    current_frame_info.n_local_regs = regno;
2393	    return LOC_REG (regno - 1);
2394	  }
2395    }
2396
2397  /* Failed to find a general register to spill to.  Must use stack.  */
2398  return 0;
2399}
2400
2401/* In order to make for nice schedules, we try to allocate every temporary
2402   to a different register.  We must of course stay away from call-saved,
2403   fixed, and global registers.  We must also stay away from registers
2404   allocated in current_frame_info.gr_used_mask, since those include regs
2405   used all through the prologue.
2406
2407   Any register allocated here must be used immediately.  The idea is to
2408   aid scheduling, not to solve data flow problems.  */
2409
2410static int last_scratch_gr_reg;
2411
2412static int
2413next_scratch_gr_reg (void)
2414{
2415  int i, regno;
2416
2417  for (i = 0; i < 32; ++i)
2418    {
2419      regno = (last_scratch_gr_reg + i + 1) & 31;
2420      if (call_used_regs[regno]
2421	  && ! fixed_regs[regno]
2422	  && ! global_regs[regno]
2423	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2424	{
2425	  last_scratch_gr_reg = regno;
2426	  return regno;
2427	}
2428    }
2429
2430  /* There must be _something_ available.  */
2431  gcc_unreachable ();
2432}
2433
2434/* Helper function for ia64_compute_frame_size, called through
2435   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2436
2437static void
2438mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2439{
2440  unsigned int regno = REGNO (reg);
2441  if (regno < 32)
2442    {
2443      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2444      for (i = 0; i < n; ++i)
2445	current_frame_info.gr_used_mask |= 1 << (regno + i);
2446    }
2447}
2448
2449
2450/* Returns the number of bytes offset between the frame pointer and the stack
2451   pointer for the current function.  SIZE is the number of bytes of space
2452   needed for local variables.  */
2453
2454static void
2455ia64_compute_frame_size (HOST_WIDE_INT size)
2456{
2457  HOST_WIDE_INT total_size;
2458  HOST_WIDE_INT spill_size = 0;
2459  HOST_WIDE_INT extra_spill_size = 0;
2460  HOST_WIDE_INT pretend_args_size;
2461  HARD_REG_SET mask;
2462  int n_spilled = 0;
2463  int spilled_gr_p = 0;
2464  int spilled_fr_p = 0;
2465  unsigned int regno;
2466  int min_regno;
2467  int max_regno;
2468  int i;
2469
2470  if (current_frame_info.initialized)
2471    return;
2472
2473  memset (&current_frame_info, 0, sizeof current_frame_info);
2474  CLEAR_HARD_REG_SET (mask);
2475
2476  /* Don't allocate scratches to the return register.  */
2477  diddle_return_value (mark_reg_gr_used_mask, NULL);
2478
2479  /* Don't allocate scratches to the EH scratch registers.  */
2480  if (cfun->machine->ia64_eh_epilogue_sp)
2481    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2482  if (cfun->machine->ia64_eh_epilogue_bsp)
2483    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2484
2485  /* Find the size of the register stack frame.  We have only 80 local
2486     registers, because we reserve 8 for the inputs and 8 for the
2487     outputs.  */
2488
2489  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2490     since we'll be adjusting that down later.  */
2491  regno = LOC_REG (78) + ! frame_pointer_needed;
2492  for (; regno >= LOC_REG (0); regno--)
2493    if (df_regs_ever_live_p (regno) && !is_emitted (regno))
2494      break;
2495  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2496
2497  /* For functions marked with the syscall_linkage attribute, we must mark
2498     all eight input registers as in use, so that locals aren't visible to
2499     the caller.  */
2500
2501  if (cfun->machine->n_varargs > 0
2502      || lookup_attribute ("syscall_linkage",
2503			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2504    current_frame_info.n_input_regs = 8;
2505  else
2506    {
2507      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2508	if (df_regs_ever_live_p (regno))
2509	  break;
2510      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2511    }
2512
2513  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2514    if (df_regs_ever_live_p (regno))
2515      break;
2516  i = regno - OUT_REG (0) + 1;
2517
2518#ifndef PROFILE_HOOK
2519  /* When -p profiling, we need one output register for the mcount argument.
2520     Likewise for -a profiling for the bb_init_func argument.  For -ax
2521     profiling, we need two output registers for the two bb_init_trace_func
2522     arguments.  */
2523  if (crtl->profile)
2524    i = MAX (i, 1);
2525#endif
2526  current_frame_info.n_output_regs = i;
2527
2528  /* ??? No rotating register support yet.  */
2529  current_frame_info.n_rotate_regs = 0;
2530
2531  /* Discover which registers need spilling, and how much room that
2532     will take.  Begin with floating point and general registers,
2533     which will always wind up on the stack.  */
2534
2535  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2536    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2537      {
2538	SET_HARD_REG_BIT (mask, regno);
2539	spill_size += 16;
2540	n_spilled += 1;
2541	spilled_fr_p = 1;
2542      }
2543
2544  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2545    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2546      {
2547	SET_HARD_REG_BIT (mask, regno);
2548	spill_size += 8;
2549	n_spilled += 1;
2550	spilled_gr_p = 1;
2551      }
2552
2553  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2554    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2555      {
2556	SET_HARD_REG_BIT (mask, regno);
2557	spill_size += 8;
2558	n_spilled += 1;
2559      }
2560
2561  /* Now come all special registers that might get saved in other
2562     general registers.  */
2563
2564  if (frame_pointer_needed)
2565    {
2566      current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
2567      /* If we did not get a register, then we take LOC79.  This is guaranteed
2568	 to be free, even if regs_ever_live is already set, because this is
2569	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2570	 as we don't count loc79 above.  */
2571      if (current_frame_info.r[reg_fp] == 0)
2572	{
2573	  current_frame_info.r[reg_fp] = LOC_REG (79);
2574	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
2575	}
2576    }
2577
2578  if (! current_function_is_leaf)
2579    {
2580      /* Emit a save of BR0 if we call other functions.  Do this even
2581	 if this function doesn't return, as EH depends on this to be
2582	 able to unwind the stack.  */
2583      SET_HARD_REG_BIT (mask, BR_REG (0));
2584
2585      current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
2586      if (current_frame_info.r[reg_save_b0] == 0)
2587	{
2588	  extra_spill_size += 8;
2589	  n_spilled += 1;
2590	}
2591
2592      /* Similarly for ar.pfs.  */
2593      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2594      current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
2595      if (current_frame_info.r[reg_save_ar_pfs] == 0)
2596	{
2597	  extra_spill_size += 8;
2598	  n_spilled += 1;
2599	}
2600
2601      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2602	 registers are clobbered, so we fall back to the stack.  */
2603      current_frame_info.r[reg_save_gp]
2604	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
2605      if (current_frame_info.r[reg_save_gp] == 0)
2606	{
2607	  SET_HARD_REG_BIT (mask, GR_REG (1));
2608	  spill_size += 8;
2609	  n_spilled += 1;
2610	}
2611    }
2612  else
2613    {
2614      if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
2615	{
2616	  SET_HARD_REG_BIT (mask, BR_REG (0));
2617	  extra_spill_size += 8;
2618	  n_spilled += 1;
2619	}
2620
2621      if (df_regs_ever_live_p (AR_PFS_REGNUM))
2622	{
2623	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2624 	  current_frame_info.r[reg_save_ar_pfs]
2625            = find_gr_spill (reg_save_ar_pfs, 1);
2626	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
2627	    {
2628	      extra_spill_size += 8;
2629	      n_spilled += 1;
2630	    }
2631	}
2632    }
2633
2634  /* Unwind descriptor hackery: things are most efficient if we allocate
2635     consecutive GR save registers for RP, PFS, FP in that order. However,
2636     it is absolutely critical that FP get the only hard register that's
2637     guaranteed to be free, so we allocated it first.  If all three did
2638     happen to be allocated hard regs, and are consecutive, rearrange them
2639     into the preferred order now.
2640
2641     If we have already emitted code for any of those registers,
2642     then it's already too late to change.  */
2643  min_regno = MIN (current_frame_info.r[reg_fp],
2644		   MIN (current_frame_info.r[reg_save_b0],
2645			current_frame_info.r[reg_save_ar_pfs]));
2646  max_regno = MAX (current_frame_info.r[reg_fp],
2647		   MAX (current_frame_info.r[reg_save_b0],
2648			current_frame_info.r[reg_save_ar_pfs]));
2649  if (min_regno > 0
2650      && min_regno + 2 == max_regno
2651      && (current_frame_info.r[reg_fp] == min_regno + 1
2652	  || current_frame_info.r[reg_save_b0] == min_regno + 1
2653	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
2654      && (emitted_frame_related_regs[reg_save_b0] == 0
2655	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
2656      && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
2657	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
2658      && (emitted_frame_related_regs[reg_fp] == 0
2659	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
2660    {
2661      current_frame_info.r[reg_save_b0] = min_regno;
2662      current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
2663      current_frame_info.r[reg_fp] = min_regno + 2;
2664    }
2665
2666  /* See if we need to store the predicate register block.  */
2667  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2668    if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2669      break;
2670  if (regno <= PR_REG (63))
2671    {
2672      SET_HARD_REG_BIT (mask, PR_REG (0));
2673      current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
2674      if (current_frame_info.r[reg_save_pr] == 0)
2675	{
2676	  extra_spill_size += 8;
2677	  n_spilled += 1;
2678	}
2679
2680      /* ??? Mark them all as used so that register renaming and such
2681	 are free to use them.  */
2682      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2683	df_set_regs_ever_live (regno, true);
2684    }
2685
2686  /* If we're forced to use st8.spill, we're forced to save and restore
2687     ar.unat as well.  The check for existing liveness allows inline asm
2688     to touch ar.unat.  */
2689  if (spilled_gr_p || cfun->machine->n_varargs
2690      || df_regs_ever_live_p (AR_UNAT_REGNUM))
2691    {
2692      df_set_regs_ever_live (AR_UNAT_REGNUM, true);
2693      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2694      current_frame_info.r[reg_save_ar_unat]
2695        = find_gr_spill (reg_save_ar_unat, spill_size == 0);
2696      if (current_frame_info.r[reg_save_ar_unat] == 0)
2697	{
2698	  extra_spill_size += 8;
2699	  n_spilled += 1;
2700	}
2701    }
2702
2703  if (df_regs_ever_live_p (AR_LC_REGNUM))
2704    {
2705      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2706      current_frame_info.r[reg_save_ar_lc]
2707        = find_gr_spill (reg_save_ar_lc, spill_size == 0);
2708      if (current_frame_info.r[reg_save_ar_lc] == 0)
2709	{
2710	  extra_spill_size += 8;
2711	  n_spilled += 1;
2712	}
2713    }
2714
2715  /* If we have an odd number of words of pretend arguments written to
2716     the stack, then the FR save area will be unaligned.  We round the
2717     size of this area up to keep things 16 byte aligned.  */
2718  if (spilled_fr_p)
2719    pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
2720  else
2721    pretend_args_size = crtl->args.pretend_args_size;
2722
2723  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2724		+ crtl->outgoing_args_size);
2725  total_size = IA64_STACK_ALIGN (total_size);
2726
2727  /* We always use the 16-byte scratch area provided by the caller, but
2728     if we are a leaf function, there's no one to which we need to provide
2729     a scratch area.  */
2730  if (current_function_is_leaf)
2731    total_size = MAX (0, total_size - 16);
2732
2733  current_frame_info.total_size = total_size;
2734  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2735  current_frame_info.spill_size = spill_size;
2736  current_frame_info.extra_spill_size = extra_spill_size;
2737  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2738  current_frame_info.n_spilled = n_spilled;
2739  current_frame_info.initialized = reload_completed;
2740}
2741
2742/* Worker function for TARGET_CAN_ELIMINATE.  */
2743
2744bool
2745ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
2746{
2747  return (to == BR_REG (0) ? current_function_is_leaf : true);
2748}
2749
2750/* Compute the initial difference between the specified pair of registers.  */
2751
2752HOST_WIDE_INT
2753ia64_initial_elimination_offset (int from, int to)
2754{
2755  HOST_WIDE_INT offset;
2756
2757  ia64_compute_frame_size (get_frame_size ());
2758  switch (from)
2759    {
2760    case FRAME_POINTER_REGNUM:
2761      switch (to)
2762	{
2763	case HARD_FRAME_POINTER_REGNUM:
2764	  if (current_function_is_leaf)
2765	    offset = -current_frame_info.total_size;
2766	  else
2767	    offset = -(current_frame_info.total_size
2768		       - crtl->outgoing_args_size - 16);
2769	  break;
2770
2771	case STACK_POINTER_REGNUM:
2772	  if (current_function_is_leaf)
2773	    offset = 0;
2774	  else
2775	    offset = 16 + crtl->outgoing_args_size;
2776	  break;
2777
2778	default:
2779	  gcc_unreachable ();
2780	}
2781      break;
2782
2783    case ARG_POINTER_REGNUM:
2784      /* Arguments start above the 16 byte save area, unless stdarg
2785	 in which case we store through the 16 byte save area.  */
2786      switch (to)
2787	{
2788	case HARD_FRAME_POINTER_REGNUM:
2789	  offset = 16 - crtl->args.pretend_args_size;
2790	  break;
2791
2792	case STACK_POINTER_REGNUM:
2793	  offset = (current_frame_info.total_size
2794		    + 16 - crtl->args.pretend_args_size);
2795	  break;
2796
2797	default:
2798	  gcc_unreachable ();
2799	}
2800      break;
2801
2802    default:
2803      gcc_unreachable ();
2804    }
2805
2806  return offset;
2807}
2808
2809/* If there are more than a trivial number of register spills, we use
2810   two interleaved iterators so that we can get two memory references
2811   per insn group.
2812
2813   In order to simplify things in the prologue and epilogue expanders,
2814   we use helper functions to fix up the memory references after the
2815   fact with the appropriate offsets to a POST_MODIFY memory mode.
2816   The following data structure tracks the state of the two iterators
2817   while insns are being emitted.  */
2818
2819struct spill_fill_data
2820{
2821  rtx init_after;		/* point at which to emit initializations */
2822  rtx init_reg[2];		/* initial base register */
2823  rtx iter_reg[2];		/* the iterator registers */
2824  rtx *prev_addr[2];		/* address of last memory use */
2825  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2826  HOST_WIDE_INT prev_off[2];	/* last offset */
2827  int n_iter;			/* number of iterators in use */
2828  int next_iter;		/* next iterator to use */
2829  unsigned int save_gr_used_mask;
2830};
2831
2832static struct spill_fill_data spill_fill_data;
2833
2834static void
2835setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2836{
2837  int i;
2838
2839  spill_fill_data.init_after = get_last_insn ();
2840  spill_fill_data.init_reg[0] = init_reg;
2841  spill_fill_data.init_reg[1] = init_reg;
2842  spill_fill_data.prev_addr[0] = NULL;
2843  spill_fill_data.prev_addr[1] = NULL;
2844  spill_fill_data.prev_insn[0] = NULL;
2845  spill_fill_data.prev_insn[1] = NULL;
2846  spill_fill_data.prev_off[0] = cfa_off;
2847  spill_fill_data.prev_off[1] = cfa_off;
2848  spill_fill_data.next_iter = 0;
2849  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2850
2851  spill_fill_data.n_iter = 1 + (n_spills > 2);
2852  for (i = 0; i < spill_fill_data.n_iter; ++i)
2853    {
2854      int regno = next_scratch_gr_reg ();
2855      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2856      current_frame_info.gr_used_mask |= 1 << regno;
2857    }
2858}
2859
2860static void
2861finish_spill_pointers (void)
2862{
2863  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2864}
2865
2866static rtx
2867spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2868{
2869  int iter = spill_fill_data.next_iter;
2870  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2871  rtx disp_rtx = GEN_INT (disp);
2872  rtx mem;
2873
2874  if (spill_fill_data.prev_addr[iter])
2875    {
2876      if (satisfies_constraint_N (disp_rtx))
2877	{
2878	  *spill_fill_data.prev_addr[iter]
2879	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2880				   gen_rtx_PLUS (DImode,
2881						 spill_fill_data.iter_reg[iter],
2882						 disp_rtx));
2883	  add_reg_note (spill_fill_data.prev_insn[iter],
2884			REG_INC, spill_fill_data.iter_reg[iter]);
2885	}
2886      else
2887	{
2888	  /* ??? Could use register post_modify for loads.  */
2889	  if (!satisfies_constraint_I (disp_rtx))
2890	    {
2891	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2892	      emit_move_insn (tmp, disp_rtx);
2893	      disp_rtx = tmp;
2894	    }
2895	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2896				 spill_fill_data.iter_reg[iter], disp_rtx));
2897	}
2898    }
2899  /* Micro-optimization: if we've created a frame pointer, it's at
2900     CFA 0, which may allow the real iterator to be initialized lower,
2901     slightly increasing parallelism.  Also, if there are few saves
2902     it may eliminate the iterator entirely.  */
2903  else if (disp == 0
2904	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2905	   && frame_pointer_needed)
2906    {
2907      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2908      set_mem_alias_set (mem, get_varargs_alias_set ());
2909      return mem;
2910    }
2911  else
2912    {
2913      rtx seq, insn;
2914
2915      if (disp == 0)
2916	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2917			 spill_fill_data.init_reg[iter]);
2918      else
2919	{
2920	  start_sequence ();
2921
2922	  if (!satisfies_constraint_I (disp_rtx))
2923	    {
2924	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2925	      emit_move_insn (tmp, disp_rtx);
2926	      disp_rtx = tmp;
2927	    }
2928
2929	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2930				 spill_fill_data.init_reg[iter],
2931				 disp_rtx));
2932
2933	  seq = get_insns ();
2934	  end_sequence ();
2935	}
2936
2937      /* Careful for being the first insn in a sequence.  */
2938      if (spill_fill_data.init_after)
2939	insn = emit_insn_after (seq, spill_fill_data.init_after);
2940      else
2941	{
2942	  rtx first = get_insns ();
2943	  if (first)
2944	    insn = emit_insn_before (seq, first);
2945	  else
2946	    insn = emit_insn (seq);
2947	}
2948      spill_fill_data.init_after = insn;
2949    }
2950
2951  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2952
2953  /* ??? Not all of the spills are for varargs, but some of them are.
2954     The rest of the spills belong in an alias set of their own.  But
2955     it doesn't actually hurt to include them here.  */
2956  set_mem_alias_set (mem, get_varargs_alias_set ());
2957
2958  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2959  spill_fill_data.prev_off[iter] = cfa_off;
2960
2961  if (++iter >= spill_fill_data.n_iter)
2962    iter = 0;
2963  spill_fill_data.next_iter = iter;
2964
2965  return mem;
2966}
2967
2968static void
2969do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2970	  rtx frame_reg)
2971{
2972  int iter = spill_fill_data.next_iter;
2973  rtx mem, insn;
2974
2975  mem = spill_restore_mem (reg, cfa_off);
2976  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2977  spill_fill_data.prev_insn[iter] = insn;
2978
2979  if (frame_reg)
2980    {
2981      rtx base;
2982      HOST_WIDE_INT off;
2983
2984      RTX_FRAME_RELATED_P (insn) = 1;
2985
2986      /* Don't even pretend that the unwind code can intuit its way
2987	 through a pair of interleaved post_modify iterators.  Just
2988	 provide the correct answer.  */
2989
2990      if (frame_pointer_needed)
2991	{
2992	  base = hard_frame_pointer_rtx;
2993	  off = - cfa_off;
2994	}
2995      else
2996	{
2997	  base = stack_pointer_rtx;
2998	  off = current_frame_info.total_size - cfa_off;
2999	}
3000
3001      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3002		    gen_rtx_SET (VOIDmode,
3003				 gen_rtx_MEM (GET_MODE (reg),
3004					      plus_constant (base, off)),
3005				 frame_reg));
3006    }
3007}
3008
3009static void
3010do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
3011{
3012  int iter = spill_fill_data.next_iter;
3013  rtx insn;
3014
3015  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
3016				GEN_INT (cfa_off)));
3017  spill_fill_data.prev_insn[iter] = insn;
3018}
3019
3020/* Wrapper functions that discards the CONST_INT spill offset.  These
3021   exist so that we can give gr_spill/gr_fill the offset they need and
3022   use a consistent function interface.  */
3023
3024static rtx
3025gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3026{
3027  return gen_movdi (dest, src);
3028}
3029
3030static rtx
3031gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3032{
3033  return gen_fr_spill (dest, src);
3034}
3035
3036static rtx
3037gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
3038{
3039  return gen_fr_restore (dest, src);
3040}
3041
3042/* Called after register allocation to add any instructions needed for the
3043   prologue.  Using a prologue insn is favored compared to putting all of the
3044   instructions in output_function_prologue(), since it allows the scheduler
3045   to intermix instructions with the saves of the caller saved registers.  In
3046   some cases, it might be necessary to emit a barrier instruction as the last
3047   insn to prevent such scheduling.
3048
3049   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
3050   so that the debug info generation code can handle them properly.
3051
3052   The register save area is layed out like so:
3053   cfa+16
3054	[ varargs spill area ]
3055	[ fr register spill area ]
3056	[ br register spill area ]
3057	[ ar register spill area ]
3058	[ pr register spill area ]
3059	[ gr register spill area ] */
3060
3061/* ??? Get inefficient code when the frame size is larger than can fit in an
3062   adds instruction.  */
3063
3064void
3065ia64_expand_prologue (void)
3066{
3067  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
3068  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
3069  rtx reg, alt_reg;
3070
3071  ia64_compute_frame_size (get_frame_size ());
3072  last_scratch_gr_reg = 15;
3073
3074  if (dump_file)
3075    {
3076      fprintf (dump_file, "ia64 frame related registers "
3077               "recorded in current_frame_info.r[]:\n");
3078#define PRINTREG(a) if (current_frame_info.r[a]) \
3079        fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
3080      PRINTREG(reg_fp);
3081      PRINTREG(reg_save_b0);
3082      PRINTREG(reg_save_pr);
3083      PRINTREG(reg_save_ar_pfs);
3084      PRINTREG(reg_save_ar_unat);
3085      PRINTREG(reg_save_ar_lc);
3086      PRINTREG(reg_save_gp);
3087#undef PRINTREG
3088    }
3089
3090  /* If there is no epilogue, then we don't need some prologue insns.
3091     We need to avoid emitting the dead prologue insns, because flow
3092     will complain about them.  */
3093  if (optimize)
3094    {
3095      edge e;
3096      edge_iterator ei;
3097
3098      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
3099	if ((e->flags & EDGE_FAKE) == 0
3100	    && (e->flags & EDGE_FALLTHRU) != 0)
3101	  break;
3102      epilogue_p = (e != NULL);
3103    }
3104  else
3105    epilogue_p = 1;
3106
3107  /* Set the local, input, and output register names.  We need to do this
3108     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
3109     half.  If we use in/loc/out register names, then we get assembler errors
3110     in crtn.S because there is no alloc insn or regstk directive in there.  */
3111  if (! TARGET_REG_NAMES)
3112    {
3113      int inputs = current_frame_info.n_input_regs;
3114      int locals = current_frame_info.n_local_regs;
3115      int outputs = current_frame_info.n_output_regs;
3116
3117      for (i = 0; i < inputs; i++)
3118	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
3119      for (i = 0; i < locals; i++)
3120	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
3121      for (i = 0; i < outputs; i++)
3122	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
3123    }
3124
3125  /* Set the frame pointer register name.  The regnum is logically loc79,
3126     but of course we'll not have allocated that many locals.  Rather than
3127     worrying about renumbering the existing rtxs, we adjust the name.  */
3128  /* ??? This code means that we can never use one local register when
3129     there is a frame pointer.  loc79 gets wasted in this case, as it is
3130     renamed to a register that will never be used.  See also the try_locals
3131     code in find_gr_spill.  */
3132  if (current_frame_info.r[reg_fp])
3133    {
3134      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3135      reg_names[HARD_FRAME_POINTER_REGNUM]
3136	= reg_names[current_frame_info.r[reg_fp]];
3137      reg_names[current_frame_info.r[reg_fp]] = tmp;
3138    }
3139
3140  /* We don't need an alloc instruction if we've used no outputs or locals.  */
3141  if (current_frame_info.n_local_regs == 0
3142      && current_frame_info.n_output_regs == 0
3143      && current_frame_info.n_input_regs <= crtl->args.info.int_regs
3144      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3145    {
3146      /* If there is no alloc, but there are input registers used, then we
3147	 need a .regstk directive.  */
3148      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
3149      ar_pfs_save_reg = NULL_RTX;
3150    }
3151  else
3152    {
3153      current_frame_info.need_regstk = 0;
3154
3155      if (current_frame_info.r[reg_save_ar_pfs])
3156        {
3157	  regno = current_frame_info.r[reg_save_ar_pfs];
3158	  reg_emitted (reg_save_ar_pfs);
3159	}
3160      else
3161	regno = next_scratch_gr_reg ();
3162      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
3163
3164      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
3165				   GEN_INT (current_frame_info.n_input_regs),
3166				   GEN_INT (current_frame_info.n_local_regs),
3167				   GEN_INT (current_frame_info.n_output_regs),
3168				   GEN_INT (current_frame_info.n_rotate_regs)));
3169      RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0);
3170    }
3171
3172  /* Set up frame pointer, stack pointer, and spill iterators.  */
3173
3174  n_varargs = cfun->machine->n_varargs;
3175  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3176			stack_pointer_rtx, 0);
3177
3178  if (frame_pointer_needed)
3179    {
3180      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3181      RTX_FRAME_RELATED_P (insn) = 1;
3182    }
3183
3184  if (current_frame_info.total_size != 0)
3185    {
3186      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3187      rtx offset;
3188
3189      if (satisfies_constraint_I (frame_size_rtx))
3190	offset = frame_size_rtx;
3191      else
3192	{
3193	  regno = next_scratch_gr_reg ();
3194	  offset = gen_rtx_REG (DImode, regno);
3195	  emit_move_insn (offset, frame_size_rtx);
3196	}
3197
3198      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3199				    stack_pointer_rtx, offset));
3200
3201      if (! frame_pointer_needed)
3202	{
3203	  RTX_FRAME_RELATED_P (insn) = 1;
3204	  if (GET_CODE (offset) != CONST_INT)
3205	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3206			  gen_rtx_SET (VOIDmode,
3207				       stack_pointer_rtx,
3208				       gen_rtx_PLUS (DImode,
3209						     stack_pointer_rtx,
3210						     frame_size_rtx)));
3211	}
3212
3213      /* ??? At this point we must generate a magic insn that appears to
3214	 modify the stack pointer, the frame pointer, and all spill
3215	 iterators.  This would allow the most scheduling freedom.  For
3216	 now, just hard stop.  */
3217      emit_insn (gen_blockage ());
3218    }
3219
3220  /* Must copy out ar.unat before doing any integer spills.  */
3221  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3222    {
3223      if (current_frame_info.r[reg_save_ar_unat])
3224        {
3225	  ar_unat_save_reg
3226	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3227	  reg_emitted (reg_save_ar_unat);
3228	}
3229      else
3230	{
3231	  alt_regno = next_scratch_gr_reg ();
3232	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3233	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3234	}
3235
3236      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3237      insn = emit_move_insn (ar_unat_save_reg, reg);
3238      RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0);
3239
3240      /* Even if we're not going to generate an epilogue, we still
3241	 need to save the register so that EH works.  */
3242      if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
3243	emit_insn (gen_prologue_use (ar_unat_save_reg));
3244    }
3245  else
3246    ar_unat_save_reg = NULL_RTX;
3247
3248  /* Spill all varargs registers.  Do this before spilling any GR registers,
3249     since we want the UNAT bits for the GR registers to override the UNAT
3250     bits from varargs, which we don't care about.  */
3251
3252  cfa_off = -16;
3253  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3254    {
3255      reg = gen_rtx_REG (DImode, regno);
3256      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3257    }
3258
3259  /* Locate the bottom of the register save area.  */
3260  cfa_off = (current_frame_info.spill_cfa_off
3261	     + current_frame_info.spill_size
3262	     + current_frame_info.extra_spill_size);
3263
3264  /* Save the predicate register block either in a register or in memory.  */
3265  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3266    {
3267      reg = gen_rtx_REG (DImode, PR_REG (0));
3268      if (current_frame_info.r[reg_save_pr] != 0)
3269	{
3270	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3271	  reg_emitted (reg_save_pr);
3272	  insn = emit_move_insn (alt_reg, reg);
3273
3274	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3275	     64 hard registers.  */
3276	  RTX_FRAME_RELATED_P (insn) = 1;
3277	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3278			gen_rtx_SET (VOIDmode, alt_reg, reg));
3279
3280	  /* Even if we're not going to generate an epilogue, we still
3281	     need to save the register so that EH works.  */
3282	  if (! epilogue_p)
3283	    emit_insn (gen_prologue_use (alt_reg));
3284	}
3285      else
3286	{
3287	  alt_regno = next_scratch_gr_reg ();
3288	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3289	  insn = emit_move_insn (alt_reg, reg);
3290	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3291	  cfa_off -= 8;
3292	}
3293    }
3294
3295  /* Handle AR regs in numerical order.  All of them get special handling.  */
3296  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3297      && current_frame_info.r[reg_save_ar_unat] == 0)
3298    {
3299      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3300      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3301      cfa_off -= 8;
3302    }
3303
3304  /* The alloc insn already copied ar.pfs into a general register.  The
3305     only thing we have to do now is copy that register to a stack slot
3306     if we'd not allocated a local register for the job.  */
3307  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3308      && current_frame_info.r[reg_save_ar_pfs] == 0)
3309    {
3310      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3311      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3312      cfa_off -= 8;
3313    }
3314
3315  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3316    {
3317      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3318      if (current_frame_info.r[reg_save_ar_lc] != 0)
3319	{
3320	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3321	  reg_emitted (reg_save_ar_lc);
3322	  insn = emit_move_insn (alt_reg, reg);
3323	  RTX_FRAME_RELATED_P (insn) = 1;
3324
3325	  /* Even if we're not going to generate an epilogue, we still
3326	     need to save the register so that EH works.  */
3327	  if (! epilogue_p)
3328	    emit_insn (gen_prologue_use (alt_reg));
3329	}
3330      else
3331	{
3332	  alt_regno = next_scratch_gr_reg ();
3333	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3334	  emit_move_insn (alt_reg, reg);
3335	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3336	  cfa_off -= 8;
3337	}
3338    }
3339
3340  /* Save the return pointer.  */
3341  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3342    {
3343      reg = gen_rtx_REG (DImode, BR_REG (0));
3344      if (current_frame_info.r[reg_save_b0] != 0)
3345	{
3346          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3347          reg_emitted (reg_save_b0);
3348	  insn = emit_move_insn (alt_reg, reg);
3349	  RTX_FRAME_RELATED_P (insn) = 1;
3350
3351	  /* Even if we're not going to generate an epilogue, we still
3352	     need to save the register so that EH works.  */
3353	  if (! epilogue_p)
3354	    emit_insn (gen_prologue_use (alt_reg));
3355	}
3356      else
3357	{
3358	  alt_regno = next_scratch_gr_reg ();
3359	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3360	  emit_move_insn (alt_reg, reg);
3361	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3362	  cfa_off -= 8;
3363	}
3364    }
3365
3366  if (current_frame_info.r[reg_save_gp])
3367    {
3368      reg_emitted (reg_save_gp);
3369      insn = emit_move_insn (gen_rtx_REG (DImode,
3370					  current_frame_info.r[reg_save_gp]),
3371			     pic_offset_table_rtx);
3372    }
3373
3374  /* We should now be at the base of the gr/br/fr spill area.  */
3375  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376			  + current_frame_info.spill_size));
3377
3378  /* Spill all general registers.  */
3379  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3380    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3381      {
3382	reg = gen_rtx_REG (DImode, regno);
3383	do_spill (gen_gr_spill, reg, cfa_off, reg);
3384	cfa_off -= 8;
3385      }
3386
3387  /* Spill the rest of the BR registers.  */
3388  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3389    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3390      {
3391	alt_regno = next_scratch_gr_reg ();
3392	alt_reg = gen_rtx_REG (DImode, alt_regno);
3393	reg = gen_rtx_REG (DImode, regno);
3394	emit_move_insn (alt_reg, reg);
3395	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3396	cfa_off -= 8;
3397      }
3398
3399  /* Align the frame and spill all FR registers.  */
3400  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3401    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3402      {
3403        gcc_assert (!(cfa_off & 15));
3404	reg = gen_rtx_REG (XFmode, regno);
3405	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3406	cfa_off -= 16;
3407      }
3408
3409  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3410
3411  finish_spill_pointers ();
3412}
3413
3414/* Called after register allocation to add any instructions needed for the
3415   epilogue.  Using an epilogue insn is favored compared to putting all of the
3416   instructions in output_function_prologue(), since it allows the scheduler
3417   to intermix instructions with the saves of the caller saved registers.  In
3418   some cases, it might be necessary to emit a barrier instruction as the last
3419   insn to prevent such scheduling.  */
3420
3421void
3422ia64_expand_epilogue (int sibcall_p)
3423{
3424  rtx insn, reg, alt_reg, ar_unat_save_reg;
3425  int regno, alt_regno, cfa_off;
3426
3427  ia64_compute_frame_size (get_frame_size ());
3428
3429  /* If there is a frame pointer, then we use it instead of the stack
3430     pointer, so that the stack pointer does not need to be valid when
3431     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3432  if (frame_pointer_needed)
3433    setup_spill_pointers (current_frame_info.n_spilled,
3434			  hard_frame_pointer_rtx, 0);
3435  else
3436    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3437			  current_frame_info.total_size);
3438
3439  if (current_frame_info.total_size != 0)
3440    {
3441      /* ??? At this point we must generate a magic insn that appears to
3442         modify the spill iterators and the frame pointer.  This would
3443	 allow the most scheduling freedom.  For now, just hard stop.  */
3444      emit_insn (gen_blockage ());
3445    }
3446
3447  /* Locate the bottom of the register save area.  */
3448  cfa_off = (current_frame_info.spill_cfa_off
3449	     + current_frame_info.spill_size
3450	     + current_frame_info.extra_spill_size);
3451
3452  /* Restore the predicate registers.  */
3453  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3454    {
3455      if (current_frame_info.r[reg_save_pr] != 0)
3456        {
3457	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
3458	  reg_emitted (reg_save_pr);
3459	}
3460      else
3461	{
3462	  alt_regno = next_scratch_gr_reg ();
3463	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3464	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3465	  cfa_off -= 8;
3466	}
3467      reg = gen_rtx_REG (DImode, PR_REG (0));
3468      emit_move_insn (reg, alt_reg);
3469    }
3470
3471  /* Restore the application registers.  */
3472
3473  /* Load the saved unat from the stack, but do not restore it until
3474     after the GRs have been restored.  */
3475  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3476    {
3477      if (current_frame_info.r[reg_save_ar_unat] != 0)
3478        {
3479          ar_unat_save_reg
3480	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
3481	  reg_emitted (reg_save_ar_unat);
3482	}
3483      else
3484	{
3485	  alt_regno = next_scratch_gr_reg ();
3486	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3487	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3488	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3489	  cfa_off -= 8;
3490	}
3491    }
3492  else
3493    ar_unat_save_reg = NULL_RTX;
3494
3495  if (current_frame_info.r[reg_save_ar_pfs] != 0)
3496    {
3497      reg_emitted (reg_save_ar_pfs);
3498      alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
3499      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3500      emit_move_insn (reg, alt_reg);
3501    }
3502  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3503    {
3504      alt_regno = next_scratch_gr_reg ();
3505      alt_reg = gen_rtx_REG (DImode, alt_regno);
3506      do_restore (gen_movdi_x, alt_reg, cfa_off);
3507      cfa_off -= 8;
3508      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3509      emit_move_insn (reg, alt_reg);
3510    }
3511
3512  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3513    {
3514      if (current_frame_info.r[reg_save_ar_lc] != 0)
3515        {
3516	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
3517          reg_emitted (reg_save_ar_lc);
3518	}
3519      else
3520	{
3521	  alt_regno = next_scratch_gr_reg ();
3522	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3523	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3524	  cfa_off -= 8;
3525	}
3526      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3527      emit_move_insn (reg, alt_reg);
3528    }
3529
3530  /* Restore the return pointer.  */
3531  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3532    {
3533      if (current_frame_info.r[reg_save_b0] != 0)
3534        {
3535         alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3536         reg_emitted (reg_save_b0);
3537        }
3538      else
3539	{
3540	  alt_regno = next_scratch_gr_reg ();
3541	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3542	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3543	  cfa_off -= 8;
3544	}
3545      reg = gen_rtx_REG (DImode, BR_REG (0));
3546      emit_move_insn (reg, alt_reg);
3547    }
3548
3549  /* We should now be at the base of the gr/br/fr spill area.  */
3550  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3551			  + current_frame_info.spill_size));
3552
3553  /* The GP may be stored on the stack in the prologue, but it's
3554     never restored in the epilogue.  Skip the stack slot.  */
3555  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3556    cfa_off -= 8;
3557
3558  /* Restore all general registers.  */
3559  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3560    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3561      {
3562	reg = gen_rtx_REG (DImode, regno);
3563	do_restore (gen_gr_restore, reg, cfa_off);
3564	cfa_off -= 8;
3565      }
3566
3567  /* Restore the branch registers.  */
3568  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3569    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3570      {
3571	alt_regno = next_scratch_gr_reg ();
3572	alt_reg = gen_rtx_REG (DImode, alt_regno);
3573	do_restore (gen_movdi_x, alt_reg, cfa_off);
3574	cfa_off -= 8;
3575	reg = gen_rtx_REG (DImode, regno);
3576	emit_move_insn (reg, alt_reg);
3577      }
3578
3579  /* Restore floating point registers.  */
3580  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3581    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3582      {
3583        gcc_assert (!(cfa_off & 15));
3584	reg = gen_rtx_REG (XFmode, regno);
3585	do_restore (gen_fr_restore_x, reg, cfa_off);
3586	cfa_off -= 16;
3587      }
3588
3589  /* Restore ar.unat for real.  */
3590  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3591    {
3592      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3593      emit_move_insn (reg, ar_unat_save_reg);
3594    }
3595
3596  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3597
3598  finish_spill_pointers ();
3599
3600  if (current_frame_info.total_size
3601      || cfun->machine->ia64_eh_epilogue_sp
3602      || frame_pointer_needed)
3603    {
3604      /* ??? At this point we must generate a magic insn that appears to
3605         modify the spill iterators, the stack pointer, and the frame
3606	 pointer.  This would allow the most scheduling freedom.  For now,
3607	 just hard stop.  */
3608      emit_insn (gen_blockage ());
3609    }
3610
3611  if (cfun->machine->ia64_eh_epilogue_sp)
3612    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3613  else if (frame_pointer_needed)
3614    {
3615      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3616      RTX_FRAME_RELATED_P (insn) = 1;
3617    }
3618  else if (current_frame_info.total_size)
3619    {
3620      rtx offset, frame_size_rtx;
3621
3622      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3623      if (satisfies_constraint_I (frame_size_rtx))
3624	offset = frame_size_rtx;
3625      else
3626	{
3627	  regno = next_scratch_gr_reg ();
3628	  offset = gen_rtx_REG (DImode, regno);
3629	  emit_move_insn (offset, frame_size_rtx);
3630	}
3631
3632      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3633				    offset));
3634
3635      RTX_FRAME_RELATED_P (insn) = 1;
3636      if (GET_CODE (offset) != CONST_INT)
3637	add_reg_note (insn, REG_FRAME_RELATED_EXPR,
3638		      gen_rtx_SET (VOIDmode,
3639				   stack_pointer_rtx,
3640				   gen_rtx_PLUS (DImode,
3641						 stack_pointer_rtx,
3642						 frame_size_rtx)));
3643    }
3644
3645  if (cfun->machine->ia64_eh_epilogue_bsp)
3646    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3647
3648  if (! sibcall_p)
3649    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3650  else
3651    {
3652      int fp = GR_REG (2);
3653      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3654	 first available call clobbered register.  If there was a frame_pointer
3655	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3656	 so we have to make sure we're using the string "r2" when emitting
3657	 the register name for the assembler.  */
3658      if (current_frame_info.r[reg_fp]
3659          && current_frame_info.r[reg_fp] == GR_REG (2))
3660	fp = HARD_FRAME_POINTER_REGNUM;
3661
3662      /* We must emit an alloc to force the input registers to become output
3663	 registers.  Otherwise, if the callee tries to pass its parameters
3664	 through to another call without an intervening alloc, then these
3665	 values get lost.  */
3666      /* ??? We don't need to preserve all input registers.  We only need to
3667	 preserve those input registers used as arguments to the sibling call.
3668	 It is unclear how to compute that number here.  */
3669      if (current_frame_info.n_input_regs != 0)
3670	{
3671	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3672	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3673				const0_rtx, const0_rtx,
3674				n_inputs, const0_rtx));
3675	  RTX_FRAME_RELATED_P (insn) = 1;
3676	}
3677    }
3678}
3679
3680/* Return 1 if br.ret can do all the work required to return from a
3681   function.  */
3682
3683int
3684ia64_direct_return (void)
3685{
3686  if (reload_completed && ! frame_pointer_needed)
3687    {
3688      ia64_compute_frame_size (get_frame_size ());
3689
3690      return (current_frame_info.total_size == 0
3691	      && current_frame_info.n_spilled == 0
3692	      && current_frame_info.r[reg_save_b0] == 0
3693	      && current_frame_info.r[reg_save_pr] == 0
3694	      && current_frame_info.r[reg_save_ar_pfs] == 0
3695	      && current_frame_info.r[reg_save_ar_unat] == 0
3696	      && current_frame_info.r[reg_save_ar_lc] == 0);
3697    }
3698  return 0;
3699}
3700
3701/* Return the magic cookie that we use to hold the return address
3702   during early compilation.  */
3703
3704rtx
3705ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3706{
3707  if (count != 0)
3708    return NULL;
3709  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3710}
3711
3712/* Split this value after reload, now that we know where the return
3713   address is saved.  */
3714
3715void
3716ia64_split_return_addr_rtx (rtx dest)
3717{
3718  rtx src;
3719
3720  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3721    {
3722      if (current_frame_info.r[reg_save_b0] != 0)
3723        {
3724	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
3725	  reg_emitted (reg_save_b0);
3726	}
3727      else
3728	{
3729	  HOST_WIDE_INT off;
3730	  unsigned int regno;
3731	  rtx off_r;
3732
3733	  /* Compute offset from CFA for BR0.  */
3734	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3735	  off = (current_frame_info.spill_cfa_off
3736		 + current_frame_info.spill_size);
3737	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3738	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3739	      off -= 8;
3740
3741	  /* Convert CFA offset to a register based offset.  */
3742	  if (frame_pointer_needed)
3743	    src = hard_frame_pointer_rtx;
3744	  else
3745	    {
3746	      src = stack_pointer_rtx;
3747	      off += current_frame_info.total_size;
3748	    }
3749
3750	  /* Load address into scratch register.  */
3751	  off_r = GEN_INT (off);
3752	  if (satisfies_constraint_I (off_r))
3753	    emit_insn (gen_adddi3 (dest, src, off_r));
3754	  else
3755	    {
3756	      emit_move_insn (dest, off_r);
3757	      emit_insn (gen_adddi3 (dest, src, dest));
3758	    }
3759
3760	  src = gen_rtx_MEM (Pmode, dest);
3761	}
3762    }
3763  else
3764    src = gen_rtx_REG (DImode, BR_REG (0));
3765
3766  emit_move_insn (dest, src);
3767}
3768
3769int
3770ia64_hard_regno_rename_ok (int from, int to)
3771{
3772  /* Don't clobber any of the registers we reserved for the prologue.  */
3773  unsigned int r;
3774
3775  for (r = reg_fp; r <= reg_save_ar_lc; r++)
3776    if (to == current_frame_info.r[r]
3777        || from == current_frame_info.r[r]
3778        || to == emitted_frame_related_regs[r]
3779        || from == emitted_frame_related_regs[r])
3780      return 0;
3781
3782  /* Don't use output registers outside the register frame.  */
3783  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3784    return 0;
3785
3786  /* Retain even/oddness on predicate register pairs.  */
3787  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3788    return (from & 1) == (to & 1);
3789
3790  return 1;
3791}
3792
3793/* Target hook for assembling integer objects.  Handle word-sized
3794   aligned objects and detect the cases when @fptr is needed.  */
3795
3796static bool
3797ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3798{
3799  if (size == POINTER_SIZE / BITS_PER_UNIT
3800      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3801      && GET_CODE (x) == SYMBOL_REF
3802      && SYMBOL_REF_FUNCTION_P (x))
3803    {
3804      static const char * const directive[2][2] = {
3805	  /* 64-bit pointer */  /* 32-bit pointer */
3806	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3807	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3808      };
3809      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3810      output_addr_const (asm_out_file, x);
3811      fputs (")\n", asm_out_file);
3812      return true;
3813    }
3814  return default_assemble_integer (x, size, aligned_p);
3815}
3816
3817/* Emit the function prologue.  */
3818
3819static void
3820ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3821{
3822  int mask, grsave, grsave_prev;
3823
3824  if (current_frame_info.need_regstk)
3825    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3826	     current_frame_info.n_input_regs,
3827	     current_frame_info.n_local_regs,
3828	     current_frame_info.n_output_regs,
3829	     current_frame_info.n_rotate_regs);
3830
3831  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3832    return;
3833
3834  /* Emit the .prologue directive.  */
3835
3836  mask = 0;
3837  grsave = grsave_prev = 0;
3838  if (current_frame_info.r[reg_save_b0] != 0)
3839    {
3840      mask |= 8;
3841      grsave = grsave_prev = current_frame_info.r[reg_save_b0];
3842    }
3843  if (current_frame_info.r[reg_save_ar_pfs] != 0
3844      && (grsave_prev == 0
3845	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
3846    {
3847      mask |= 4;
3848      if (grsave_prev == 0)
3849	grsave = current_frame_info.r[reg_save_ar_pfs];
3850      grsave_prev = current_frame_info.r[reg_save_ar_pfs];
3851    }
3852  if (current_frame_info.r[reg_fp] != 0
3853      && (grsave_prev == 0
3854	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
3855    {
3856      mask |= 2;
3857      if (grsave_prev == 0)
3858	grsave = HARD_FRAME_POINTER_REGNUM;
3859      grsave_prev = current_frame_info.r[reg_fp];
3860    }
3861  if (current_frame_info.r[reg_save_pr] != 0
3862      && (grsave_prev == 0
3863	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
3864    {
3865      mask |= 1;
3866      if (grsave_prev == 0)
3867	grsave = current_frame_info.r[reg_save_pr];
3868    }
3869
3870  if (mask && TARGET_GNU_AS)
3871    fprintf (file, "\t.prologue %d, %d\n", mask,
3872	     ia64_dbx_register_number (grsave));
3873  else
3874    fputs ("\t.prologue\n", file);
3875
3876  /* Emit a .spill directive, if necessary, to relocate the base of
3877     the register spill area.  */
3878  if (current_frame_info.spill_cfa_off != -16)
3879    fprintf (file, "\t.spill %ld\n",
3880	     (long) (current_frame_info.spill_cfa_off
3881		     + current_frame_info.spill_size));
3882}
3883
3884/* Emit the .body directive at the scheduled end of the prologue.  */
3885
3886static void
3887ia64_output_function_end_prologue (FILE *file)
3888{
3889  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3890    return;
3891
3892  fputs ("\t.body\n", file);
3893}
3894
3895/* Emit the function epilogue.  */
3896
3897static void
3898ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3899			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3900{
3901  int i;
3902
3903  if (current_frame_info.r[reg_fp])
3904    {
3905      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3906      reg_names[HARD_FRAME_POINTER_REGNUM]
3907	= reg_names[current_frame_info.r[reg_fp]];
3908      reg_names[current_frame_info.r[reg_fp]] = tmp;
3909      reg_emitted (reg_fp);
3910    }
3911  if (! TARGET_REG_NAMES)
3912    {
3913      for (i = 0; i < current_frame_info.n_input_regs; i++)
3914	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3915      for (i = 0; i < current_frame_info.n_local_regs; i++)
3916	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3917      for (i = 0; i < current_frame_info.n_output_regs; i++)
3918	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3919    }
3920
3921  current_frame_info.initialized = 0;
3922}
3923
3924int
3925ia64_dbx_register_number (int regno)
3926{
3927  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3928     from its home at loc79 to something inside the register frame.  We
3929     must perform the same renumbering here for the debug info.  */
3930  if (current_frame_info.r[reg_fp])
3931    {
3932      if (regno == HARD_FRAME_POINTER_REGNUM)
3933	regno = current_frame_info.r[reg_fp];
3934      else if (regno == current_frame_info.r[reg_fp])
3935	regno = HARD_FRAME_POINTER_REGNUM;
3936    }
3937
3938  if (IN_REGNO_P (regno))
3939    return 32 + regno - IN_REG (0);
3940  else if (LOC_REGNO_P (regno))
3941    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3942  else if (OUT_REGNO_P (regno))
3943    return (32 + current_frame_info.n_input_regs
3944	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3945  else
3946    return regno;
3947}
3948
3949/* Implement TARGET_TRAMPOLINE_INIT.
3950
3951   The trampoline should set the static chain pointer to value placed
3952   into the trampoline and should branch to the specified routine.
3953   To make the normal indirect-subroutine calling convention work,
3954   the trampoline must look like a function descriptor; the first
3955   word being the target address and the second being the target's
3956   global pointer.
3957
3958   We abuse the concept of a global pointer by arranging for it
3959   to point to the data we need to load.  The complete trampoline
3960   has the following form:
3961
3962		+-------------------+ \
3963	TRAMP:	| __ia64_trampoline | |
3964		+-------------------+  > fake function descriptor
3965		| TRAMP+16          | |
3966		+-------------------+ /
3967		| target descriptor |
3968		+-------------------+
3969		| static link	    |
3970		+-------------------+
3971*/
3972
3973static void
3974ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
3975{
3976  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3977  rtx addr, addr_reg, tramp, eight = GEN_INT (8);
3978
3979  /* The Intel assembler requires that the global __ia64_trampoline symbol
3980     be declared explicitly */
3981  if (!TARGET_GNU_AS)
3982    {
3983      static bool declared_ia64_trampoline = false;
3984
3985      if (!declared_ia64_trampoline)
3986	{
3987	  declared_ia64_trampoline = true;
3988	  (*targetm.asm_out.globalize_label) (asm_out_file,
3989					      "__ia64_trampoline");
3990	}
3991    }
3992
3993  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3994  addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
3995  fnaddr = convert_memory_address (Pmode, fnaddr);
3996  static_chain = convert_memory_address (Pmode, static_chain);
3997
3998  /* Load up our iterator.  */
3999  addr_reg = copy_to_reg (addr);
4000  m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
4001
4002  /* The first two words are the fake descriptor:
4003     __ia64_trampoline, ADDR+16.  */
4004  tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
4005  if (TARGET_ABI_OPEN_VMS)
4006    {
4007      /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
4008	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
4009	 relocation against function symbols to make it identical to the
4010	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
4011	 strict ELF and dereference to get the bare code address.  */
4012      rtx reg = gen_reg_rtx (Pmode);
4013      SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
4014      emit_move_insn (reg, tramp);
4015      emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
4016      tramp = reg;
4017   }
4018  emit_move_insn (m_tramp, tramp);
4019  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4020  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4021
4022  emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16)));
4023  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4024  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4025
4026  /* The third word is the target descriptor.  */
4027  emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
4028  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
4029  m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
4030
4031  /* The fourth word is the static chain.  */
4032  emit_move_insn (m_tramp, static_chain);
4033}
4034
4035/* Do any needed setup for a variadic function.  CUM has not been updated
4036   for the last named argument which has type TYPE and mode MODE.
4037
4038   We generate the actual spill instructions during prologue generation.  */
4039
4040static void
4041ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4042			     tree type, int * pretend_size,
4043			     int second_time ATTRIBUTE_UNUSED)
4044{
4045  CUMULATIVE_ARGS next_cum = *cum;
4046
4047  /* Skip the current argument.  */
4048  ia64_function_arg_advance (&next_cum, mode, type, 1);
4049
4050  if (next_cum.words < MAX_ARGUMENT_SLOTS)
4051    {
4052      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
4053      *pretend_size = n * UNITS_PER_WORD;
4054      cfun->machine->n_varargs = n;
4055    }
4056}
4057
4058/* Check whether TYPE is a homogeneous floating point aggregate.  If
4059   it is, return the mode of the floating point type that appears
4060   in all leafs.  If it is not, return VOIDmode.
4061
4062   An aggregate is a homogeneous floating point aggregate is if all
4063   fields/elements in it have the same floating point type (e.g,
4064   SFmode).  128-bit quad-precision floats are excluded.
4065
4066   Variable sized aggregates should never arrive here, since we should
4067   have already decided to pass them by reference.  Top-level zero-sized
4068   aggregates are excluded because our parallels crash the middle-end.  */
4069
4070static enum machine_mode
4071hfa_element_mode (const_tree type, bool nested)
4072{
4073  enum machine_mode element_mode = VOIDmode;
4074  enum machine_mode mode;
4075  enum tree_code code = TREE_CODE (type);
4076  int know_element_mode = 0;
4077  tree t;
4078
4079  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
4080    return VOIDmode;
4081
4082  switch (code)
4083    {
4084    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
4085    case BOOLEAN_TYPE:	case POINTER_TYPE:
4086    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
4087    case LANG_TYPE:		case FUNCTION_TYPE:
4088      return VOIDmode;
4089
4090      /* Fortran complex types are supposed to be HFAs, so we need to handle
4091	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
4092	 types though.  */
4093    case COMPLEX_TYPE:
4094      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
4095	  && TYPE_MODE (type) != TCmode)
4096	return GET_MODE_INNER (TYPE_MODE (type));
4097      else
4098	return VOIDmode;
4099
4100    case REAL_TYPE:
4101      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
4102	 mode if this is contained within an aggregate.  */
4103      if (nested && TYPE_MODE (type) != TFmode)
4104	return TYPE_MODE (type);
4105      else
4106	return VOIDmode;
4107
4108    case ARRAY_TYPE:
4109      return hfa_element_mode (TREE_TYPE (type), 1);
4110
4111    case RECORD_TYPE:
4112    case UNION_TYPE:
4113    case QUAL_UNION_TYPE:
4114      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
4115	{
4116	  if (TREE_CODE (t) != FIELD_DECL)
4117	    continue;
4118
4119	  mode = hfa_element_mode (TREE_TYPE (t), 1);
4120	  if (know_element_mode)
4121	    {
4122	      if (mode != element_mode)
4123		return VOIDmode;
4124	    }
4125	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
4126	    return VOIDmode;
4127	  else
4128	    {
4129	      know_element_mode = 1;
4130	      element_mode = mode;
4131	    }
4132	}
4133      return element_mode;
4134
4135    default:
4136      /* If we reach here, we probably have some front-end specific type
4137	 that the backend doesn't know about.  This can happen via the
4138	 aggregate_value_p call in init_function_start.  All we can do is
4139	 ignore unknown tree types.  */
4140      return VOIDmode;
4141    }
4142
4143  return VOIDmode;
4144}
4145
4146/* Return the number of words required to hold a quantity of TYPE and MODE
4147   when passed as an argument.  */
4148static int
4149ia64_function_arg_words (tree type, enum machine_mode mode)
4150{
4151  int words;
4152
4153  if (mode == BLKmode)
4154    words = int_size_in_bytes (type);
4155  else
4156    words = GET_MODE_SIZE (mode);
4157
4158  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
4159}
4160
4161/* Return the number of registers that should be skipped so the current
4162   argument (described by TYPE and WORDS) will be properly aligned.
4163
4164   Integer and float arguments larger than 8 bytes start at the next
4165   even boundary.  Aggregates larger than 8 bytes start at the next
4166   even boundary if the aggregate has 16 byte alignment.  Note that
4167   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
4168   but are still to be aligned in registers.
4169
4170   ??? The ABI does not specify how to handle aggregates with
4171   alignment from 9 to 15 bytes, or greater than 16.  We handle them
4172   all as if they had 16 byte alignment.  Such aggregates can occur
4173   only if gcc extensions are used.  */
4174static int
4175ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
4176{
4177  /* No registers are skipped on VMS.  */
4178  if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
4179    return 0;
4180
4181  if (type
4182      && TREE_CODE (type) != INTEGER_TYPE
4183      && TREE_CODE (type) != REAL_TYPE)
4184    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
4185  else
4186    return words > 1;
4187}
4188
4189/* Return rtx for register where argument is passed, or zero if it is passed
4190   on the stack.  */
4191/* ??? 128-bit quad-precision floats are always passed in general
4192   registers.  */
4193
4194rtx
4195ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
4196		   int named, int incoming)
4197{
4198  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
4199  int words = ia64_function_arg_words (type, mode);
4200  int offset = ia64_function_arg_offset (cum, type, words);
4201  enum machine_mode hfa_mode = VOIDmode;
4202
4203  /* For OPEN VMS, emit the instruction setting up the argument register here,
4204     when we know this will be together with the other arguments setup related
4205     insns.  This is not the conceptually best place to do this, but this is
4206     the easiest as we have convenient access to cumulative args info.  */
4207
4208  if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node
4209      && named == 1)
4210    {
4211      unsigned HOST_WIDE_INT regval = cum->words;
4212      int i;
4213
4214      for (i = 0; i < 8; i++)
4215	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
4216
4217      emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
4218		      GEN_INT (regval));
4219    }
4220
4221  /* If all argument slots are used, then it must go on the stack.  */
4222  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4223    return 0;
4224
4225  /* Check for and handle homogeneous FP aggregates.  */
4226  if (type)
4227    hfa_mode = hfa_element_mode (type, 0);
4228
4229  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4230     and unprototyped hfas are passed specially.  */
4231  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4232    {
4233      rtx loc[16];
4234      int i = 0;
4235      int fp_regs = cum->fp_regs;
4236      int int_regs = cum->words + offset;
4237      int hfa_size = GET_MODE_SIZE (hfa_mode);
4238      int byte_size;
4239      int args_byte_size;
4240
4241      /* If prototyped, pass it in FR regs then GR regs.
4242	 If not prototyped, pass it in both FR and GR regs.
4243
4244	 If this is an SFmode aggregate, then it is possible to run out of
4245	 FR regs while GR regs are still left.  In that case, we pass the
4246	 remaining part in the GR regs.  */
4247
4248      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4249	 of the argument, the last FP register, or the last argument slot.  */
4250
4251      byte_size = ((mode == BLKmode)
4252		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4253      args_byte_size = int_regs * UNITS_PER_WORD;
4254      offset = 0;
4255      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4256	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4257	{
4258	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4259				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4260							      + fp_regs)),
4261				      GEN_INT (offset));
4262	  offset += hfa_size;
4263	  args_byte_size += hfa_size;
4264	  fp_regs++;
4265	}
4266
4267      /* If no prototype, then the whole thing must go in GR regs.  */
4268      if (! cum->prototype)
4269	offset = 0;
4270      /* If this is an SFmode aggregate, then we might have some left over
4271	 that needs to go in GR regs.  */
4272      else if (byte_size != offset)
4273	int_regs += offset / UNITS_PER_WORD;
4274
4275      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4276
4277      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4278	{
4279	  enum machine_mode gr_mode = DImode;
4280	  unsigned int gr_size;
4281
4282	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4283	     then this goes in a GR reg left adjusted/little endian, right
4284	     adjusted/big endian.  */
4285	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4286	     always right adjusted/little endian.  */
4287	  if (offset & 0x4)
4288	    gr_mode = SImode;
4289	  /* If we have an even 4 byte hunk because the aggregate is a
4290	     multiple of 4 bytes in size, then this goes in a GR reg right
4291	     adjusted/little endian.  */
4292	  else if (byte_size - offset == 4)
4293	    gr_mode = SImode;
4294
4295	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4296				      gen_rtx_REG (gr_mode, (basereg
4297							     + int_regs)),
4298				      GEN_INT (offset));
4299
4300	  gr_size = GET_MODE_SIZE (gr_mode);
4301	  offset += gr_size;
4302	  if (gr_size == UNITS_PER_WORD
4303	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4304	    int_regs++;
4305	  else if (gr_size > UNITS_PER_WORD)
4306	    int_regs += gr_size / UNITS_PER_WORD;
4307	}
4308      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4309    }
4310
4311  /* On OpenVMS variable argument is either in Rn or Fn.  */
4312  else if (TARGET_ABI_OPEN_VMS && named == 0)
4313    {
4314      if (FLOAT_MODE_P (mode))
4315	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words);
4316      else
4317	return gen_rtx_REG (mode, basereg + cum->words);
4318    }
4319
4320  /* Integral and aggregates go in general registers.  If we have run out of
4321     FR registers, then FP values must also go in general registers.  This can
4322     happen when we have a SFmode HFA.  */
4323  else if (mode == TFmode || mode == TCmode
4324	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4325    {
4326      int byte_size = ((mode == BLKmode)
4327                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4328      if (BYTES_BIG_ENDIAN
4329	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4330	&& byte_size < UNITS_PER_WORD
4331	&& byte_size > 0)
4332	{
4333	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4334					  gen_rtx_REG (DImode,
4335						       (basereg + cum->words
4336							+ offset)),
4337					  const0_rtx);
4338	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4339	}
4340      else
4341	return gen_rtx_REG (mode, basereg + cum->words + offset);
4342
4343    }
4344
4345  /* If there is a prototype, then FP values go in a FR register when
4346     named, and in a GR register when unnamed.  */
4347  else if (cum->prototype)
4348    {
4349      if (named)
4350	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4351      /* In big-endian mode, an anonymous SFmode value must be represented
4352         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4353	 the value into the high half of the general register.  */
4354      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4355	return gen_rtx_PARALLEL (mode,
4356		 gen_rtvec (1,
4357                   gen_rtx_EXPR_LIST (VOIDmode,
4358		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4359				      const0_rtx)));
4360      else
4361	return gen_rtx_REG (mode, basereg + cum->words + offset);
4362    }
4363  /* If there is no prototype, then FP values go in both FR and GR
4364     registers.  */
4365  else
4366    {
4367      /* See comment above.  */
4368      enum machine_mode inner_mode =
4369	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4370
4371      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4372				      gen_rtx_REG (mode, (FR_ARG_FIRST
4373							  + cum->fp_regs)),
4374				      const0_rtx);
4375      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4376				      gen_rtx_REG (inner_mode,
4377						   (basereg + cum->words
4378						    + offset)),
4379				      const0_rtx);
4380
4381      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4382    }
4383}
4384
4385/* Return number of bytes, at the beginning of the argument, that must be
4386   put in registers.  0 is the argument is entirely in registers or entirely
4387   in memory.  */
4388
4389static int
4390ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4391			tree type, bool named ATTRIBUTE_UNUSED)
4392{
4393  int words = ia64_function_arg_words (type, mode);
4394  int offset = ia64_function_arg_offset (cum, type, words);
4395
4396  /* If all argument slots are used, then it must go on the stack.  */
4397  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4398    return 0;
4399
4400  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4401     it fits within the 8 argument slots, then it goes entirely in
4402     registers.  If it extends past the last argument slot, then the rest
4403     goes on the stack.  */
4404
4405  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4406    return 0;
4407
4408  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4409}
4410
4411/* Return ivms_arg_type based on machine_mode.  */
4412
4413static enum ivms_arg_type
4414ia64_arg_type (enum machine_mode mode)
4415{
4416  switch (mode)
4417    {
4418    case SFmode:
4419      return FS;
4420    case DFmode:
4421      return FT;
4422    default:
4423      return I64;
4424    }
4425}
4426
4427/* Update CUM to point after this argument.  This is patterned after
4428   ia64_function_arg.  */
4429
4430void
4431ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4432			   tree type, int named)
4433{
4434  int words = ia64_function_arg_words (type, mode);
4435  int offset = ia64_function_arg_offset (cum, type, words);
4436  enum machine_mode hfa_mode = VOIDmode;
4437
4438  /* If all arg slots are already full, then there is nothing to do.  */
4439  if (cum->words >= MAX_ARGUMENT_SLOTS)
4440    {
4441      cum->words += words + offset;
4442      return;
4443    }
4444
4445  cum->atypes[cum->words] = ia64_arg_type (mode);
4446  cum->words += words + offset;
4447
4448  /* Check for and handle homogeneous FP aggregates.  */
4449  if (type)
4450    hfa_mode = hfa_element_mode (type, 0);
4451
4452  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4453     and unprototyped hfas are passed specially.  */
4454  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4455    {
4456      int fp_regs = cum->fp_regs;
4457      /* This is the original value of cum->words + offset.  */
4458      int int_regs = cum->words - words;
4459      int hfa_size = GET_MODE_SIZE (hfa_mode);
4460      int byte_size;
4461      int args_byte_size;
4462
4463      /* If prototyped, pass it in FR regs then GR regs.
4464	 If not prototyped, pass it in both FR and GR regs.
4465
4466	 If this is an SFmode aggregate, then it is possible to run out of
4467	 FR regs while GR regs are still left.  In that case, we pass the
4468	 remaining part in the GR regs.  */
4469
4470      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4471	 of the argument, the last FP register, or the last argument slot.  */
4472
4473      byte_size = ((mode == BLKmode)
4474		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4475      args_byte_size = int_regs * UNITS_PER_WORD;
4476      offset = 0;
4477      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4478	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4479	{
4480	  offset += hfa_size;
4481	  args_byte_size += hfa_size;
4482	  fp_regs++;
4483	}
4484
4485      cum->fp_regs = fp_regs;
4486    }
4487
4488  /* On OpenVMS variable argument is either in Rn or Fn.  */
4489  else if (TARGET_ABI_OPEN_VMS && named == 0)
4490    {
4491      cum->int_regs = cum->words;
4492      cum->fp_regs = cum->words;
4493    }
4494
4495  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4496     If we have run out of FR registers, then other FP values must also go in
4497     general registers.  This can happen when we have a SFmode HFA.  */
4498  else if (mode == TFmode || mode == TCmode
4499           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4500    cum->int_regs = cum->words;
4501
4502  /* If there is a prototype, then FP values go in a FR register when
4503     named, and in a GR register when unnamed.  */
4504  else if (cum->prototype)
4505    {
4506      if (! named)
4507	cum->int_regs = cum->words;
4508      else
4509	/* ??? Complex types should not reach here.  */
4510	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4511    }
4512  /* If there is no prototype, then FP values go in both FR and GR
4513     registers.  */
4514  else
4515    {
4516      /* ??? Complex types should not reach here.  */
4517      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4518      cum->int_regs = cum->words;
4519    }
4520}
4521
4522/* Arguments with alignment larger than 8 bytes start at the next even
4523   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4524   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4525
4526int
4527ia64_function_arg_boundary (enum machine_mode mode, tree type)
4528{
4529
4530  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4531    return PARM_BOUNDARY * 2;
4532
4533  if (type)
4534    {
4535      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4536        return PARM_BOUNDARY * 2;
4537      else
4538        return PARM_BOUNDARY;
4539    }
4540
4541  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4542    return PARM_BOUNDARY * 2;
4543  else
4544    return PARM_BOUNDARY;
4545}
4546
4547/* True if it is OK to do sibling call optimization for the specified
4548   call expression EXP.  DECL will be the called function, or NULL if
4549   this is an indirect call.  */
4550static bool
4551ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4552{
4553  /* We can't perform a sibcall if the current function has the syscall_linkage
4554     attribute.  */
4555  if (lookup_attribute ("syscall_linkage",
4556			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4557    return false;
4558
4559  /* We must always return with our current GP.  This means we can
4560     only sibcall to functions defined in the current module unless
4561     TARGET_CONST_GP is set to true.  */
4562  return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
4563}
4564
4565
4566/* Implement va_arg.  */
4567
4568static tree
4569ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4570		      gimple_seq *post_p)
4571{
4572  /* Variable sized types are passed by reference.  */
4573  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4574    {
4575      tree ptrtype = build_pointer_type (type);
4576      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4577      return build_va_arg_indirect_ref (addr);
4578    }
4579
4580  /* Aggregate arguments with alignment larger than 8 bytes start at
4581     the next even boundary.  Integer and floating point arguments
4582     do so if they are larger than 8 bytes, whether or not they are
4583     also aligned larger than 8 bytes.  */
4584  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4585      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4586    {
4587      tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist,
4588		       size_int (2 * UNITS_PER_WORD - 1));
4589      t = fold_convert (sizetype, t);
4590      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4591		  size_int (-2 * UNITS_PER_WORD));
4592      t = fold_convert (TREE_TYPE (valist), t);
4593      gimplify_assign (unshare_expr (valist), t, pre_p);
4594    }
4595
4596  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4597}
4598
4599/* Return 1 if function return value returned in memory.  Return 0 if it is
4600   in a register.  */
4601
4602static bool
4603ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
4604{
4605  enum machine_mode mode;
4606  enum machine_mode hfa_mode;
4607  HOST_WIDE_INT byte_size;
4608
4609  mode = TYPE_MODE (valtype);
4610  byte_size = GET_MODE_SIZE (mode);
4611  if (mode == BLKmode)
4612    {
4613      byte_size = int_size_in_bytes (valtype);
4614      if (byte_size < 0)
4615	return true;
4616    }
4617
4618  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4619
4620  hfa_mode = hfa_element_mode (valtype, 0);
4621  if (hfa_mode != VOIDmode)
4622    {
4623      int hfa_size = GET_MODE_SIZE (hfa_mode);
4624
4625      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4626	return true;
4627      else
4628	return false;
4629    }
4630  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4631    return true;
4632  else
4633    return false;
4634}
4635
4636/* Return rtx for register that holds the function return value.  */
4637
4638rtx
4639ia64_function_value (const_tree valtype, const_tree func)
4640{
4641  enum machine_mode mode;
4642  enum machine_mode hfa_mode;
4643  int unsignedp;
4644
4645  mode = TYPE_MODE (valtype);
4646  hfa_mode = hfa_element_mode (valtype, 0);
4647
4648  if (hfa_mode != VOIDmode)
4649    {
4650      rtx loc[8];
4651      int i;
4652      int hfa_size;
4653      int byte_size;
4654      int offset;
4655
4656      hfa_size = GET_MODE_SIZE (hfa_mode);
4657      byte_size = ((mode == BLKmode)
4658		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4659      offset = 0;
4660      for (i = 0; offset < byte_size; i++)
4661	{
4662	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4663				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4664				      GEN_INT (offset));
4665	  offset += hfa_size;
4666	}
4667      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4668    }
4669  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4670    return gen_rtx_REG (mode, FR_ARG_FIRST);
4671  else
4672    {
4673      bool need_parallel = false;
4674
4675      /* In big-endian mode, we need to manage the layout of aggregates
4676	 in the registers so that we get the bits properly aligned in
4677	 the highpart of the registers.  */
4678      if (BYTES_BIG_ENDIAN
4679	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4680	need_parallel = true;
4681
4682      /* Something like struct S { long double x; char a[0] } is not an
4683	 HFA structure, and therefore doesn't go in fp registers.  But
4684	 the middle-end will give it XFmode anyway, and XFmode values
4685	 don't normally fit in integer registers.  So we need to smuggle
4686	 the value inside a parallel.  */
4687      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4688	need_parallel = true;
4689
4690      if (need_parallel)
4691	{
4692	  rtx loc[8];
4693	  int offset;
4694	  int bytesize;
4695	  int i;
4696
4697	  offset = 0;
4698	  bytesize = int_size_in_bytes (valtype);
4699	  /* An empty PARALLEL is invalid here, but the return value
4700	     doesn't matter for empty structs.  */
4701	  if (bytesize == 0)
4702	    return gen_rtx_REG (mode, GR_RET_FIRST);
4703	  for (i = 0; offset < bytesize; i++)
4704	    {
4705	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4706					  gen_rtx_REG (DImode,
4707						       GR_RET_FIRST + i),
4708					  GEN_INT (offset));
4709	      offset += UNITS_PER_WORD;
4710	    }
4711	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4712	}
4713
4714      mode = ia64_promote_function_mode (valtype, mode, &unsignedp,
4715					 func ? TREE_TYPE (func) : NULL_TREE,
4716					 true);
4717
4718      return gen_rtx_REG (mode, GR_RET_FIRST);
4719    }
4720}
4721
4722/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4723   We need to emit DTP-relative relocations.  */
4724
4725static void
4726ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4727{
4728  gcc_assert (size == 4 || size == 8);
4729  if (size == 4)
4730    fputs ("\tdata4.ua\t@dtprel(", file);
4731  else
4732    fputs ("\tdata8.ua\t@dtprel(", file);
4733  output_addr_const (file, x);
4734  fputs (")", file);
4735}
4736
4737/* Print a memory address as an operand to reference that memory location.  */
4738
4739/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4740   also call this from ia64_print_operand for memory addresses.  */
4741
4742void
4743ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4744			    rtx address ATTRIBUTE_UNUSED)
4745{
4746}
4747
4748/* Print an operand to an assembler instruction.
4749   C	Swap and print a comparison operator.
4750   D	Print an FP comparison operator.
4751   E    Print 32 - constant, for SImode shifts as extract.
4752   e    Print 64 - constant, for DImode rotates.
4753   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4754        a floating point register emitted normally.
4755   G	A floating point constant.
4756   I	Invert a predicate register by adding 1.
4757   J    Select the proper predicate register for a condition.
4758   j    Select the inverse predicate register for a condition.
4759   O	Append .acq for volatile load.
4760   P	Postincrement of a MEM.
4761   Q	Append .rel for volatile store.
4762   R	Print .s .d or nothing for a single, double or no truncation.
4763   S	Shift amount for shladd instruction.
4764   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4765	for Intel assembler.
4766   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4767	for Intel assembler.
4768   X	A pair of floating point registers.
4769   r	Print register name, or constant 0 as r0.  HP compatibility for
4770	Linux kernel.
4771   v    Print vector constant value as an 8-byte integer value.  */
4772
4773void
4774ia64_print_operand (FILE * file, rtx x, int code)
4775{
4776  const char *str;
4777
4778  switch (code)
4779    {
4780    case 0:
4781      /* Handled below.  */
4782      break;
4783
4784    case 'C':
4785      {
4786	enum rtx_code c = swap_condition (GET_CODE (x));
4787	fputs (GET_RTX_NAME (c), file);
4788	return;
4789      }
4790
4791    case 'D':
4792      switch (GET_CODE (x))
4793	{
4794	case NE:
4795	  str = "neq";
4796	  break;
4797	case UNORDERED:
4798	  str = "unord";
4799	  break;
4800	case ORDERED:
4801	  str = "ord";
4802	  break;
4803	case UNLT:
4804	  str = "nge";
4805	  break;
4806	case UNLE:
4807	  str = "ngt";
4808	  break;
4809	case UNGT:
4810	  str = "nle";
4811	  break;
4812	case UNGE:
4813	  str = "nlt";
4814	  break;
4815	default:
4816	  str = GET_RTX_NAME (GET_CODE (x));
4817	  break;
4818	}
4819      fputs (str, file);
4820      return;
4821
4822    case 'E':
4823      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4824      return;
4825
4826    case 'e':
4827      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4828      return;
4829
4830    case 'F':
4831      if (x == CONST0_RTX (GET_MODE (x)))
4832	str = reg_names [FR_REG (0)];
4833      else if (x == CONST1_RTX (GET_MODE (x)))
4834	str = reg_names [FR_REG (1)];
4835      else
4836	{
4837	  gcc_assert (GET_CODE (x) == REG);
4838	  str = reg_names [REGNO (x)];
4839	}
4840      fputs (str, file);
4841      return;
4842
4843    case 'G':
4844      {
4845	long val[4];
4846	REAL_VALUE_TYPE rv;
4847	REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
4848	real_to_target (val, &rv, GET_MODE (x));
4849	if (GET_MODE (x) == SFmode)
4850	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
4851	else if (GET_MODE (x) == DFmode)
4852	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
4853					  & 0xffffffff,
4854					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
4855					  & 0xffffffff);
4856	else
4857	  output_operand_lossage ("invalid %%G mode");
4858      }
4859      return;
4860
4861    case 'I':
4862      fputs (reg_names [REGNO (x) + 1], file);
4863      return;
4864
4865    case 'J':
4866    case 'j':
4867      {
4868	unsigned int regno = REGNO (XEXP (x, 0));
4869	if (GET_CODE (x) == EQ)
4870	  regno += 1;
4871	if (code == 'j')
4872	  regno ^= 1;
4873        fputs (reg_names [regno], file);
4874      }
4875      return;
4876
4877    case 'O':
4878      if (MEM_VOLATILE_P (x))
4879	fputs(".acq", file);
4880      return;
4881
4882    case 'P':
4883      {
4884	HOST_WIDE_INT value;
4885
4886	switch (GET_CODE (XEXP (x, 0)))
4887	  {
4888	  default:
4889	    return;
4890
4891	  case POST_MODIFY:
4892	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4893	    if (GET_CODE (x) == CONST_INT)
4894	      value = INTVAL (x);
4895	    else
4896	      {
4897		gcc_assert (GET_CODE (x) == REG);
4898		fprintf (file, ", %s", reg_names[REGNO (x)]);
4899		return;
4900	      }
4901	    break;
4902
4903	  case POST_INC:
4904	    value = GET_MODE_SIZE (GET_MODE (x));
4905	    break;
4906
4907	  case POST_DEC:
4908	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4909	    break;
4910	  }
4911
4912	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4913	return;
4914      }
4915
4916    case 'Q':
4917      if (MEM_VOLATILE_P (x))
4918	fputs(".rel", file);
4919      return;
4920
4921    case 'R':
4922      if (x == CONST0_RTX (GET_MODE (x)))
4923	fputs(".s", file);
4924      else if (x == CONST1_RTX (GET_MODE (x)))
4925	fputs(".d", file);
4926      else if (x == CONST2_RTX (GET_MODE (x)))
4927	;
4928      else
4929	output_operand_lossage ("invalid %%R value");
4930      return;
4931
4932    case 'S':
4933      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4934      return;
4935
4936    case 'T':
4937      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4938	{
4939	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4940	  return;
4941	}
4942      break;
4943
4944    case 'U':
4945      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4946	{
4947	  const char *prefix = "0x";
4948	  if (INTVAL (x) & 0x80000000)
4949	    {
4950	      fprintf (file, "0xffffffff");
4951	      prefix = "";
4952	    }
4953	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4954	  return;
4955	}
4956      break;
4957
4958    case 'X':
4959      {
4960	unsigned int regno = REGNO (x);
4961	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4962      }
4963      return;
4964
4965    case 'r':
4966      /* If this operand is the constant zero, write it as register zero.
4967	 Any register, zero, or CONST_INT value is OK here.  */
4968      if (GET_CODE (x) == REG)
4969	fputs (reg_names[REGNO (x)], file);
4970      else if (x == CONST0_RTX (GET_MODE (x)))
4971	fputs ("r0", file);
4972      else if (GET_CODE (x) == CONST_INT)
4973	output_addr_const (file, x);
4974      else
4975	output_operand_lossage ("invalid %%r value");
4976      return;
4977
4978    case 'v':
4979      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4980      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4981      break;
4982
4983    case '+':
4984      {
4985	const char *which;
4986
4987	/* For conditional branches, returns or calls, substitute
4988	   sptk, dptk, dpnt, or spnt for %s.  */
4989	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4990	if (x)
4991	  {
4992	    int pred_val = INTVAL (XEXP (x, 0));
4993
4994	    /* Guess top and bottom 10% statically predicted.  */
4995	    if (pred_val < REG_BR_PROB_BASE / 50
4996		&& br_prob_note_reliable_p (x))
4997	      which = ".spnt";
4998	    else if (pred_val < REG_BR_PROB_BASE / 2)
4999	      which = ".dpnt";
5000	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
5001		     || !br_prob_note_reliable_p (x))
5002	      which = ".dptk";
5003	    else
5004	      which = ".sptk";
5005	  }
5006	else if (GET_CODE (current_output_insn) == CALL_INSN)
5007	  which = ".sptk";
5008	else
5009	  which = ".dptk";
5010
5011	fputs (which, file);
5012	return;
5013      }
5014
5015    case ',':
5016      x = current_insn_predicate;
5017      if (x)
5018	{
5019	  unsigned int regno = REGNO (XEXP (x, 0));
5020	  if (GET_CODE (x) == EQ)
5021	    regno += 1;
5022          fprintf (file, "(%s) ", reg_names [regno]);
5023	}
5024      return;
5025
5026    default:
5027      output_operand_lossage ("ia64_print_operand: unknown code");
5028      return;
5029    }
5030
5031  switch (GET_CODE (x))
5032    {
5033      /* This happens for the spill/restore instructions.  */
5034    case POST_INC:
5035    case POST_DEC:
5036    case POST_MODIFY:
5037      x = XEXP (x, 0);
5038      /* ... fall through ...  */
5039
5040    case REG:
5041      fputs (reg_names [REGNO (x)], file);
5042      break;
5043
5044    case MEM:
5045      {
5046	rtx addr = XEXP (x, 0);
5047	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
5048	  addr = XEXP (addr, 0);
5049	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
5050	break;
5051      }
5052
5053    default:
5054      output_addr_const (file, x);
5055      break;
5056    }
5057
5058  return;
5059}
5060
5061/* Compute a (partial) cost for rtx X.  Return true if the complete
5062   cost has been computed, and false if subexpressions should be
5063   scanned.  In either case, *TOTAL contains the cost result.  */
5064/* ??? This is incomplete.  */
5065
5066static bool
5067ia64_rtx_costs (rtx x, int code, int outer_code, int *total,
5068		bool speed ATTRIBUTE_UNUSED)
5069{
5070  switch (code)
5071    {
5072    case CONST_INT:
5073      switch (outer_code)
5074        {
5075        case SET:
5076	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
5077	  return true;
5078        case PLUS:
5079	  if (satisfies_constraint_I (x))
5080	    *total = 0;
5081	  else if (satisfies_constraint_J (x))
5082	    *total = 1;
5083	  else
5084	    *total = COSTS_N_INSNS (1);
5085	  return true;
5086        default:
5087	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
5088	    *total = 0;
5089	  else
5090	    *total = COSTS_N_INSNS (1);
5091	  return true;
5092	}
5093
5094    case CONST_DOUBLE:
5095      *total = COSTS_N_INSNS (1);
5096      return true;
5097
5098    case CONST:
5099    case SYMBOL_REF:
5100    case LABEL_REF:
5101      *total = COSTS_N_INSNS (3);
5102      return true;
5103
5104    case MULT:
5105      /* For multiplies wider than HImode, we have to go to the FPU,
5106         which normally involves copies.  Plus there's the latency
5107         of the multiply itself, and the latency of the instructions to
5108         transfer integer regs to FP regs.  */
5109      /* ??? Check for FP mode.  */
5110      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
5111        *total = COSTS_N_INSNS (10);
5112      else
5113	*total = COSTS_N_INSNS (2);
5114      return true;
5115
5116    case PLUS:
5117    case MINUS:
5118    case ASHIFT:
5119    case ASHIFTRT:
5120    case LSHIFTRT:
5121      *total = COSTS_N_INSNS (1);
5122      return true;
5123
5124    case DIV:
5125    case UDIV:
5126    case MOD:
5127    case UMOD:
5128      /* We make divide expensive, so that divide-by-constant will be
5129         optimized to a multiply.  */
5130      *total = COSTS_N_INSNS (60);
5131      return true;
5132
5133    default:
5134      return false;
5135    }
5136}
5137
5138/* Calculate the cost of moving data from a register in class FROM to
5139   one in class TO, using MODE.  */
5140
5141int
5142ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
5143			 enum reg_class to)
5144{
5145  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
5146  if (to == ADDL_REGS)
5147    to = GR_REGS;
5148  if (from == ADDL_REGS)
5149    from = GR_REGS;
5150
5151  /* All costs are symmetric, so reduce cases by putting the
5152     lower number class as the destination.  */
5153  if (from < to)
5154    {
5155      enum reg_class tmp = to;
5156      to = from, from = tmp;
5157    }
5158
5159  /* Moving from FR<->GR in XFmode must be more expensive than 2,
5160     so that we get secondary memory reloads.  Between FR_REGS,
5161     we have to make this at least as expensive as MEMORY_MOVE_COST
5162     to avoid spectacularly poor register class preferencing.  */
5163  if (mode == XFmode || mode == RFmode)
5164    {
5165      if (to != GR_REGS || from != GR_REGS)
5166        return MEMORY_MOVE_COST (mode, to, 0);
5167      else
5168	return 3;
5169    }
5170
5171  switch (to)
5172    {
5173    case PR_REGS:
5174      /* Moving between PR registers takes two insns.  */
5175      if (from == PR_REGS)
5176	return 3;
5177      /* Moving between PR and anything but GR is impossible.  */
5178      if (from != GR_REGS)
5179	return MEMORY_MOVE_COST (mode, to, 0);
5180      break;
5181
5182    case BR_REGS:
5183      /* Moving between BR and anything but GR is impossible.  */
5184      if (from != GR_REGS && from != GR_AND_BR_REGS)
5185	return MEMORY_MOVE_COST (mode, to, 0);
5186      break;
5187
5188    case AR_I_REGS:
5189    case AR_M_REGS:
5190      /* Moving between AR and anything but GR is impossible.  */
5191      if (from != GR_REGS)
5192	return MEMORY_MOVE_COST (mode, to, 0);
5193      break;
5194
5195    case GR_REGS:
5196    case FR_REGS:
5197    case FP_REGS:
5198    case GR_AND_FR_REGS:
5199    case GR_AND_BR_REGS:
5200    case ALL_REGS:
5201      break;
5202
5203    default:
5204      gcc_unreachable ();
5205    }
5206
5207  return 2;
5208}
5209
5210/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on RCLASS
5211   to use when copying X into that class.  */
5212
5213enum reg_class
5214ia64_preferred_reload_class (rtx x, enum reg_class rclass)
5215{
5216  switch (rclass)
5217    {
5218    case FR_REGS:
5219    case FP_REGS:
5220      /* Don't allow volatile mem reloads into floating point registers.
5221	 This is defined to force reload to choose the r/m case instead
5222	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
5223      if (MEM_P (x) && MEM_VOLATILE_P (x))
5224	return NO_REGS;
5225
5226      /* Force all unrecognized constants into the constant pool.  */
5227      if (CONSTANT_P (x))
5228	return NO_REGS;
5229      break;
5230
5231    case AR_M_REGS:
5232    case AR_I_REGS:
5233      if (!OBJECT_P (x))
5234	return NO_REGS;
5235      break;
5236
5237    default:
5238      break;
5239    }
5240
5241  return rclass;
5242}
5243
5244/* This function returns the register class required for a secondary
5245   register when copying between one of the registers in RCLASS, and X,
5246   using MODE.  A return value of NO_REGS means that no secondary register
5247   is required.  */
5248
5249enum reg_class
5250ia64_secondary_reload_class (enum reg_class rclass,
5251			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
5252{
5253  int regno = -1;
5254
5255  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
5256    regno = true_regnum (x);
5257
5258  switch (rclass)
5259    {
5260    case BR_REGS:
5261    case AR_M_REGS:
5262    case AR_I_REGS:
5263      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
5264	 interaction.  We end up with two pseudos with overlapping lifetimes
5265	 both of which are equiv to the same constant, and both which need
5266	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
5267	 changes depending on the path length, which means the qty_first_reg
5268	 check in make_regs_eqv can give different answers at different times.
5269	 At some point I'll probably need a reload_indi pattern to handle
5270	 this.
5271
5272	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
5273	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
5274	 non-general registers for good measure.  */
5275      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
5276	return GR_REGS;
5277
5278      /* This is needed if a pseudo used as a call_operand gets spilled to a
5279	 stack slot.  */
5280      if (GET_CODE (x) == MEM)
5281	return GR_REGS;
5282      break;
5283
5284    case FR_REGS:
5285    case FP_REGS:
5286      /* Need to go through general registers to get to other class regs.  */
5287      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
5288	return GR_REGS;
5289
5290      /* This can happen when a paradoxical subreg is an operand to the
5291	 muldi3 pattern.  */
5292      /* ??? This shouldn't be necessary after instruction scheduling is
5293	 enabled, because paradoxical subregs are not accepted by
5294	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
5295	 stop the paradoxical subreg stupidity in the *_operand functions
5296	 in recog.c.  */
5297      if (GET_CODE (x) == MEM
5298	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
5299	      || GET_MODE (x) == QImode))
5300	return GR_REGS;
5301
5302      /* This can happen because of the ior/and/etc patterns that accept FP
5303	 registers as operands.  If the third operand is a constant, then it
5304	 needs to be reloaded into a FP register.  */
5305      if (GET_CODE (x) == CONST_INT)
5306	return GR_REGS;
5307
5308      /* This can happen because of register elimination in a muldi3 insn.
5309	 E.g. `26107 * (unsigned long)&u'.  */
5310      if (GET_CODE (x) == PLUS)
5311	return GR_REGS;
5312      break;
5313
5314    case PR_REGS:
5315      /* ??? This happens if we cse/gcse a BImode value across a call,
5316	 and the function has a nonlocal goto.  This is because global
5317	 does not allocate call crossing pseudos to hard registers when
5318	 crtl->has_nonlocal_goto is true.  This is relatively
5319	 common for C++ programs that use exceptions.  To reproduce,
5320	 return NO_REGS and compile libstdc++.  */
5321      if (GET_CODE (x) == MEM)
5322	return GR_REGS;
5323
5324      /* This can happen when we take a BImode subreg of a DImode value,
5325	 and that DImode value winds up in some non-GR register.  */
5326      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5327	return GR_REGS;
5328      break;
5329
5330    default:
5331      break;
5332    }
5333
5334  return NO_REGS;
5335}
5336
5337
5338/* Implement targetm.unspec_may_trap_p hook.  */
5339static int
5340ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
5341{
5342  if (GET_CODE (x) == UNSPEC)
5343    {
5344      switch (XINT (x, 1))
5345	{
5346	case UNSPEC_LDA:
5347	case UNSPEC_LDS:
5348	case UNSPEC_LDSA:
5349	case UNSPEC_LDCCLR:
5350	case UNSPEC_CHKACLR:
5351	case UNSPEC_CHKS:
5352	  /* These unspecs are just wrappers.  */
5353	  return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
5354	}
5355    }
5356
5357  return default_unspec_may_trap_p (x, flags);
5358}
5359
5360
5361/* Parse the -mfixed-range= option string.  */
5362
5363static void
5364fix_range (const char *const_str)
5365{
5366  int i, first, last;
5367  char *str, *dash, *comma;
5368
5369  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5370     REG2 are either register names or register numbers.  The effect
5371     of this option is to mark the registers in the range from REG1 to
5372     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5373     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5374
5375  i = strlen (const_str);
5376  str = (char *) alloca (i + 1);
5377  memcpy (str, const_str, i + 1);
5378
5379  while (1)
5380    {
5381      dash = strchr (str, '-');
5382      if (!dash)
5383	{
5384	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5385	  return;
5386	}
5387      *dash = '\0';
5388
5389      comma = strchr (dash + 1, ',');
5390      if (comma)
5391	*comma = '\0';
5392
5393      first = decode_reg_name (str);
5394      if (first < 0)
5395	{
5396	  warning (0, "unknown register name: %s", str);
5397	  return;
5398	}
5399
5400      last = decode_reg_name (dash + 1);
5401      if (last < 0)
5402	{
5403	  warning (0, "unknown register name: %s", dash + 1);
5404	  return;
5405	}
5406
5407      *dash = '-';
5408
5409      if (first > last)
5410	{
5411	  warning (0, "%s-%s is an empty range", str, dash + 1);
5412	  return;
5413	}
5414
5415      for (i = first; i <= last; ++i)
5416	fixed_regs[i] = call_used_regs[i] = 1;
5417
5418      if (!comma)
5419	break;
5420
5421      *comma = ',';
5422      str = comma + 1;
5423    }
5424}
5425
5426/* Implement TARGET_HANDLE_OPTION.  */
5427
5428static bool
5429ia64_handle_option (size_t code, const char *arg, int value)
5430{
5431  switch (code)
5432    {
5433    case OPT_mfixed_range_:
5434      fix_range (arg);
5435      return true;
5436
5437    case OPT_mtls_size_:
5438      if (value != 14 && value != 22 && value != 64)
5439	error ("bad value %<%s%> for -mtls-size= switch", arg);
5440      return true;
5441
5442    case OPT_mtune_:
5443      {
5444	static struct pta
5445	  {
5446	    const char *name;		/* processor name or nickname.  */
5447	    enum processor_type processor;
5448	  }
5449	const processor_alias_table[] =
5450	  {
5451	    {"itanium2", PROCESSOR_ITANIUM2},
5452	    {"mckinley", PROCESSOR_ITANIUM2},
5453	  };
5454	int const pta_size = ARRAY_SIZE (processor_alias_table);
5455	int i;
5456
5457	for (i = 0; i < pta_size; i++)
5458	  if (!strcmp (arg, processor_alias_table[i].name))
5459	    {
5460	      ia64_tune = processor_alias_table[i].processor;
5461	      break;
5462	    }
5463	if (i == pta_size)
5464	  error ("bad value %<%s%> for -mtune= switch", arg);
5465	return true;
5466      }
5467
5468    default:
5469      return true;
5470    }
5471}
5472
5473/* Implement OVERRIDE_OPTIONS.  */
5474
5475void
5476ia64_override_options (void)
5477{
5478  if (TARGET_AUTO_PIC)
5479    target_flags |= MASK_CONST_GP;
5480
5481  /* Numerous experiment shows that IRA based loop pressure
5482     calculation works better for RTL loop invariant motion on targets
5483     with enough (>= 32) registers.  It is an expensive optimization.
5484     So it is on only for peak performance.  */
5485  if (optimize >= 3)
5486    flag_ira_loop_pressure = 1;
5487
5488
5489  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5490
5491  init_machine_status = ia64_init_machine_status;
5492
5493  if (align_functions <= 0)
5494    align_functions = 64;
5495  if (align_loops <= 0)
5496    align_loops = 32;
5497  if (TARGET_ABI_OPEN_VMS)
5498    flag_no_common = 1;
5499
5500  ia64_override_options_after_change();
5501}
5502
5503/* Implement targetm.override_options_after_change.  */
5504
5505static void
5506ia64_override_options_after_change (void)
5507{
5508  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5509  flag_schedule_insns_after_reload = 0;
5510
5511  if (optimize >= 3
5512      && ! sel_sched_switch_set)
5513    {
5514      flag_selective_scheduling2 = 1;
5515      flag_sel_sched_pipelining = 1;
5516    }
5517  if (mflag_sched_control_spec == 2)
5518    {
5519      /* Control speculation is on by default for the selective scheduler,
5520         but not for the Haifa scheduler.  */
5521      mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
5522    }
5523  if (flag_sel_sched_pipelining && flag_auto_inc_dec)
5524    {
5525      /* FIXME: remove this when we'd implement breaking autoinsns as
5526         a transformation.  */
5527      flag_auto_inc_dec = 0;
5528    }
5529}
5530
5531/* Initialize the record of emitted frame related registers.  */
5532
5533void ia64_init_expanders (void)
5534{
5535  memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
5536}
5537
5538static struct machine_function *
5539ia64_init_machine_status (void)
5540{
5541  return GGC_CNEW (struct machine_function);
5542}
5543
5544static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5545static enum attr_type ia64_safe_type (rtx);
5546
5547static enum attr_itanium_class
5548ia64_safe_itanium_class (rtx insn)
5549{
5550  if (recog_memoized (insn) >= 0)
5551    return get_attr_itanium_class (insn);
5552  else if (DEBUG_INSN_P (insn))
5553    return ITANIUM_CLASS_IGNORE;
5554  else
5555    return ITANIUM_CLASS_UNKNOWN;
5556}
5557
5558static enum attr_type
5559ia64_safe_type (rtx insn)
5560{
5561  if (recog_memoized (insn) >= 0)
5562    return get_attr_type (insn);
5563  else
5564    return TYPE_UNKNOWN;
5565}
5566
5567/* The following collection of routines emit instruction group stop bits as
5568   necessary to avoid dependencies.  */
5569
5570/* Need to track some additional registers as far as serialization is
5571   concerned so we can properly handle br.call and br.ret.  We could
5572   make these registers visible to gcc, but since these registers are
5573   never explicitly used in gcc generated code, it seems wasteful to
5574   do so (plus it would make the call and return patterns needlessly
5575   complex).  */
5576#define REG_RP		(BR_REG (0))
5577#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
5578/* This is used for volatile asms which may require a stop bit immediately
5579   before and after them.  */
5580#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
5581#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
5582#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
5583
5584/* For each register, we keep track of how it has been written in the
5585   current instruction group.
5586
5587   If a register is written unconditionally (no qualifying predicate),
5588   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5589
5590   If a register is written if its qualifying predicate P is true, we
5591   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5592   may be written again by the complement of P (P^1) and when this happens,
5593   WRITE_COUNT gets set to 2.
5594
5595   The result of this is that whenever an insn attempts to write a register
5596   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5597
5598   If a predicate register is written by a floating-point insn, we set
5599   WRITTEN_BY_FP to true.
5600
5601   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5602   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5603
5604#if GCC_VERSION >= 4000
5605#define RWS_FIELD_TYPE __extension__ unsigned short
5606#else
5607#define RWS_FIELD_TYPE unsigned int
5608#endif
5609struct reg_write_state
5610{
5611  RWS_FIELD_TYPE write_count : 2;
5612  RWS_FIELD_TYPE first_pred : 10;
5613  RWS_FIELD_TYPE written_by_fp : 1;
5614  RWS_FIELD_TYPE written_by_and : 1;
5615  RWS_FIELD_TYPE written_by_or : 1;
5616};
5617
5618/* Cumulative info for the current instruction group.  */
5619struct reg_write_state rws_sum[NUM_REGS];
5620#ifdef ENABLE_CHECKING
5621/* Bitmap whether a register has been written in the current insn.  */
5622HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
5623			   / HOST_BITS_PER_WIDEST_FAST_INT];
5624
5625static inline void
5626rws_insn_set (int regno)
5627{
5628  gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno));
5629  SET_HARD_REG_BIT (rws_insn, regno);
5630}
5631
5632static inline int
5633rws_insn_test (int regno)
5634{
5635  return TEST_HARD_REG_BIT (rws_insn, regno);
5636}
5637#else
5638/* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
5639unsigned char rws_insn[2];
5640
5641static inline void
5642rws_insn_set (int regno)
5643{
5644  if (regno == REG_AR_CFM)
5645    rws_insn[0] = 1;
5646  else if (regno == REG_VOLATILE)
5647    rws_insn[1] = 1;
5648}
5649
5650static inline int
5651rws_insn_test (int regno)
5652{
5653  if (regno == REG_AR_CFM)
5654    return rws_insn[0];
5655  if (regno == REG_VOLATILE)
5656    return rws_insn[1];
5657  return 0;
5658}
5659#endif
5660
5661/* Indicates whether this is the first instruction after a stop bit,
5662   in which case we don't need another stop bit.  Without this,
5663   ia64_variable_issue will die when scheduling an alloc.  */
5664static int first_instruction;
5665
5666/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5667   RTL for one instruction.  */
5668struct reg_flags
5669{
5670  unsigned int is_write : 1;	/* Is register being written?  */
5671  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
5672  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
5673  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
5674  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
5675  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
5676};
5677
5678static void rws_update (int, struct reg_flags, int);
5679static int rws_access_regno (int, struct reg_flags, int);
5680static int rws_access_reg (rtx, struct reg_flags, int);
5681static void update_set_flags (rtx, struct reg_flags *);
5682static int set_src_needs_barrier (rtx, struct reg_flags, int);
5683static int rtx_needs_barrier (rtx, struct reg_flags, int);
5684static void init_insn_group_barriers (void);
5685static int group_barrier_needed (rtx);
5686static int safe_group_barrier_needed (rtx);
5687static int in_safe_group_barrier;
5688
5689/* Update *RWS for REGNO, which is being written by the current instruction,
5690   with predicate PRED, and associated register flags in FLAGS.  */
5691
5692static void
5693rws_update (int regno, struct reg_flags flags, int pred)
5694{
5695  if (pred)
5696    rws_sum[regno].write_count++;
5697  else
5698    rws_sum[regno].write_count = 2;
5699  rws_sum[regno].written_by_fp |= flags.is_fp;
5700  /* ??? Not tracking and/or across differing predicates.  */
5701  rws_sum[regno].written_by_and = flags.is_and;
5702  rws_sum[regno].written_by_or = flags.is_or;
5703  rws_sum[regno].first_pred = pred;
5704}
5705
5706/* Handle an access to register REGNO of type FLAGS using predicate register
5707   PRED.  Update rws_sum array.  Return 1 if this access creates
5708   a dependency with an earlier instruction in the same group.  */
5709
5710static int
5711rws_access_regno (int regno, struct reg_flags flags, int pred)
5712{
5713  int need_barrier = 0;
5714
5715  gcc_assert (regno < NUM_REGS);
5716
5717  if (! PR_REGNO_P (regno))
5718    flags.is_and = flags.is_or = 0;
5719
5720  if (flags.is_write)
5721    {
5722      int write_count;
5723
5724      rws_insn_set (regno);
5725      write_count = rws_sum[regno].write_count;
5726
5727      switch (write_count)
5728	{
5729	case 0:
5730	  /* The register has not been written yet.  */
5731	  if (!in_safe_group_barrier)
5732	    rws_update (regno, flags, pred);
5733	  break;
5734
5735	case 1:
5736	  /* The register has been written via a predicate.  If this is
5737	     not a complementary predicate, then we need a barrier.  */
5738	  /* ??? This assumes that P and P+1 are always complementary
5739	     predicates for P even.  */
5740	  if (flags.is_and && rws_sum[regno].written_by_and)
5741	    ;
5742	  else if (flags.is_or && rws_sum[regno].written_by_or)
5743	    ;
5744	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
5745	    need_barrier = 1;
5746	  if (!in_safe_group_barrier)
5747	    rws_update (regno, flags, pred);
5748	  break;
5749
5750	case 2:
5751	  /* The register has been unconditionally written already.  We
5752	     need a barrier.  */
5753	  if (flags.is_and && rws_sum[regno].written_by_and)
5754	    ;
5755	  else if (flags.is_or && rws_sum[regno].written_by_or)
5756	    ;
5757	  else
5758	    need_barrier = 1;
5759	  if (!in_safe_group_barrier)
5760	    {
5761	      rws_sum[regno].written_by_and = flags.is_and;
5762	      rws_sum[regno].written_by_or = flags.is_or;
5763	    }
5764	  break;
5765
5766	default:
5767	  gcc_unreachable ();
5768	}
5769    }
5770  else
5771    {
5772      if (flags.is_branch)
5773	{
5774	  /* Branches have several RAW exceptions that allow to avoid
5775	     barriers.  */
5776
5777	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5778	    /* RAW dependencies on branch regs are permissible as long
5779	       as the writer is a non-branch instruction.  Since we
5780	       never generate code that uses a branch register written
5781	       by a branch instruction, handling this case is
5782	       easy.  */
5783	    return 0;
5784
5785	  if (REGNO_REG_CLASS (regno) == PR_REGS
5786	      && ! rws_sum[regno].written_by_fp)
5787	    /* The predicates of a branch are available within the
5788	       same insn group as long as the predicate was written by
5789	       something other than a floating-point instruction.  */
5790	    return 0;
5791	}
5792
5793      if (flags.is_and && rws_sum[regno].written_by_and)
5794	return 0;
5795      if (flags.is_or && rws_sum[regno].written_by_or)
5796	return 0;
5797
5798      switch (rws_sum[regno].write_count)
5799	{
5800	case 0:
5801	  /* The register has not been written yet.  */
5802	  break;
5803
5804	case 1:
5805	  /* The register has been written via a predicate.  If this is
5806	     not a complementary predicate, then we need a barrier.  */
5807	  /* ??? This assumes that P and P+1 are always complementary
5808	     predicates for P even.  */
5809	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5810	    need_barrier = 1;
5811	  break;
5812
5813	case 2:
5814	  /* The register has been unconditionally written already.  We
5815	     need a barrier.  */
5816	  need_barrier = 1;
5817	  break;
5818
5819	default:
5820	  gcc_unreachable ();
5821	}
5822    }
5823
5824  return need_barrier;
5825}
5826
5827static int
5828rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5829{
5830  int regno = REGNO (reg);
5831  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5832
5833  if (n == 1)
5834    return rws_access_regno (regno, flags, pred);
5835  else
5836    {
5837      int need_barrier = 0;
5838      while (--n >= 0)
5839	need_barrier |= rws_access_regno (regno + n, flags, pred);
5840      return need_barrier;
5841    }
5842}
5843
5844/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5845   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5846
5847static void
5848update_set_flags (rtx x, struct reg_flags *pflags)
5849{
5850  rtx src = SET_SRC (x);
5851
5852  switch (GET_CODE (src))
5853    {
5854    case CALL:
5855      return;
5856
5857    case IF_THEN_ELSE:
5858      /* There are four cases here:
5859	 (1) The destination is (pc), in which case this is a branch,
5860	 nothing here applies.
5861	 (2) The destination is ar.lc, in which case this is a
5862	 doloop_end_internal,
5863	 (3) The destination is an fp register, in which case this is
5864	 an fselect instruction.
5865	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5866	 this is a check load.
5867	 In all cases, nothing we do in this function applies.  */
5868      return;
5869
5870    default:
5871      if (COMPARISON_P (src)
5872	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5873	/* Set pflags->is_fp to 1 so that we know we're dealing
5874	   with a floating point comparison when processing the
5875	   destination of the SET.  */
5876	pflags->is_fp = 1;
5877
5878      /* Discover if this is a parallel comparison.  We only handle
5879	 and.orcm and or.andcm at present, since we must retain a
5880	 strict inverse on the predicate pair.  */
5881      else if (GET_CODE (src) == AND)
5882	pflags->is_and = 1;
5883      else if (GET_CODE (src) == IOR)
5884	pflags->is_or = 1;
5885
5886      break;
5887    }
5888}
5889
5890/* Subroutine of rtx_needs_barrier; this function determines whether the
5891   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5892   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5893   for this insn.  */
5894
5895static int
5896set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5897{
5898  int need_barrier = 0;
5899  rtx dst;
5900  rtx src = SET_SRC (x);
5901
5902  if (GET_CODE (src) == CALL)
5903    /* We don't need to worry about the result registers that
5904       get written by subroutine call.  */
5905    return rtx_needs_barrier (src, flags, pred);
5906  else if (SET_DEST (x) == pc_rtx)
5907    {
5908      /* X is a conditional branch.  */
5909      /* ??? This seems redundant, as the caller sets this bit for
5910	 all JUMP_INSNs.  */
5911      if (!ia64_spec_check_src_p (src))
5912	flags.is_branch = 1;
5913      return rtx_needs_barrier (src, flags, pred);
5914    }
5915
5916  if (ia64_spec_check_src_p (src))
5917    /* Avoid checking one register twice (in condition
5918       and in 'then' section) for ldc pattern.  */
5919    {
5920      gcc_assert (REG_P (XEXP (src, 2)));
5921      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5922
5923      /* We process MEM below.  */
5924      src = XEXP (src, 1);
5925    }
5926
5927  need_barrier |= rtx_needs_barrier (src, flags, pred);
5928
5929  dst = SET_DEST (x);
5930  if (GET_CODE (dst) == ZERO_EXTRACT)
5931    {
5932      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5933      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5934    }
5935  return need_barrier;
5936}
5937
5938/* Handle an access to rtx X of type FLAGS using predicate register
5939   PRED.  Return 1 if this access creates a dependency with an earlier
5940   instruction in the same group.  */
5941
5942static int
5943rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5944{
5945  int i, j;
5946  int is_complemented = 0;
5947  int need_barrier = 0;
5948  const char *format_ptr;
5949  struct reg_flags new_flags;
5950  rtx cond;
5951
5952  if (! x)
5953    return 0;
5954
5955  new_flags = flags;
5956
5957  switch (GET_CODE (x))
5958    {
5959    case SET:
5960      update_set_flags (x, &new_flags);
5961      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5962      if (GET_CODE (SET_SRC (x)) != CALL)
5963	{
5964	  new_flags.is_write = 1;
5965	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5966	}
5967      break;
5968
5969    case CALL:
5970      new_flags.is_write = 0;
5971      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5972
5973      /* Avoid multiple register writes, in case this is a pattern with
5974	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5975      if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
5976	{
5977	  new_flags.is_write = 1;
5978	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5979	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5980	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5981	}
5982      break;
5983
5984    case COND_EXEC:
5985      /* X is a predicated instruction.  */
5986
5987      cond = COND_EXEC_TEST (x);
5988      gcc_assert (!pred);
5989      need_barrier = rtx_needs_barrier (cond, flags, 0);
5990
5991      if (GET_CODE (cond) == EQ)
5992	is_complemented = 1;
5993      cond = XEXP (cond, 0);
5994      gcc_assert (GET_CODE (cond) == REG
5995		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5996      pred = REGNO (cond);
5997      if (is_complemented)
5998	++pred;
5999
6000      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
6001      return need_barrier;
6002
6003    case CLOBBER:
6004    case USE:
6005      /* Clobber & use are for earlier compiler-phases only.  */
6006      break;
6007
6008    case ASM_OPERANDS:
6009    case ASM_INPUT:
6010      /* We always emit stop bits for traditional asms.  We emit stop bits
6011	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
6012      if (GET_CODE (x) != ASM_OPERANDS
6013	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
6014	{
6015	  /* Avoid writing the register multiple times if we have multiple
6016	     asm outputs.  This avoids a failure in rws_access_reg.  */
6017	  if (! rws_insn_test (REG_VOLATILE))
6018	    {
6019	      new_flags.is_write = 1;
6020	      rws_access_regno (REG_VOLATILE, new_flags, pred);
6021	    }
6022	  return 1;
6023	}
6024
6025      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
6026	 We cannot just fall through here since then we would be confused
6027	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
6028	 traditional asms unlike their normal usage.  */
6029
6030      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
6031	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
6032	  need_barrier = 1;
6033      break;
6034
6035    case PARALLEL:
6036      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6037	{
6038	  rtx pat = XVECEXP (x, 0, i);
6039	  switch (GET_CODE (pat))
6040	    {
6041	    case SET:
6042	      update_set_flags (pat, &new_flags);
6043	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
6044	      break;
6045
6046	    case USE:
6047	    case CALL:
6048	    case ASM_OPERANDS:
6049	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
6050	      break;
6051
6052	    case CLOBBER:
6053	      if (REG_P (XEXP (pat, 0))
6054		  && extract_asm_operands (x) != NULL_RTX
6055		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
6056		{
6057		  new_flags.is_write = 1;
6058		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
6059						     new_flags, pred);
6060		  new_flags = flags;
6061		}
6062	      break;
6063
6064	    case RETURN:
6065	      break;
6066
6067	    default:
6068	      gcc_unreachable ();
6069	    }
6070	}
6071      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
6072	{
6073	  rtx pat = XVECEXP (x, 0, i);
6074	  if (GET_CODE (pat) == SET)
6075	    {
6076	      if (GET_CODE (SET_SRC (pat)) != CALL)
6077		{
6078		  new_flags.is_write = 1;
6079		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
6080						     pred);
6081		}
6082	    }
6083	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
6084	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
6085	}
6086      break;
6087
6088    case SUBREG:
6089      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
6090      break;
6091    case REG:
6092      if (REGNO (x) == AR_UNAT_REGNUM)
6093	{
6094	  for (i = 0; i < 64; ++i)
6095	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
6096	}
6097      else
6098	need_barrier = rws_access_reg (x, flags, pred);
6099      break;
6100
6101    case MEM:
6102      /* Find the regs used in memory address computation.  */
6103      new_flags.is_write = 0;
6104      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6105      break;
6106
6107    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
6108    case SYMBOL_REF:  case LABEL_REF:     case CONST:
6109      break;
6110
6111      /* Operators with side-effects.  */
6112    case POST_INC:    case POST_DEC:
6113      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6114
6115      new_flags.is_write = 0;
6116      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6117      new_flags.is_write = 1;
6118      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6119      break;
6120
6121    case POST_MODIFY:
6122      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
6123
6124      new_flags.is_write = 0;
6125      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
6126      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6127      new_flags.is_write = 1;
6128      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
6129      break;
6130
6131      /* Handle common unary and binary ops for efficiency.  */
6132    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
6133    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
6134    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
6135    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
6136    case NE:       case EQ:      case GE:      case GT:        case LE:
6137    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
6138      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
6139      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
6140      break;
6141
6142    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
6143    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
6144    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
6145    case SQRT:     case FFS:		case POPCOUNT:
6146      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6147      break;
6148
6149    case VEC_SELECT:
6150      /* VEC_SELECT's second argument is a PARALLEL with integers that
6151	 describe the elements selected.  On ia64, those integers are
6152	 always constants.  Avoid walking the PARALLEL so that we don't
6153	 get confused with "normal" parallels and then die.  */
6154      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
6155      break;
6156
6157    case UNSPEC:
6158      switch (XINT (x, 1))
6159	{
6160	case UNSPEC_LTOFF_DTPMOD:
6161	case UNSPEC_LTOFF_DTPREL:
6162	case UNSPEC_DTPREL:
6163	case UNSPEC_LTOFF_TPREL:
6164	case UNSPEC_TPREL:
6165	case UNSPEC_PRED_REL_MUTEX:
6166	case UNSPEC_PIC_CALL:
6167        case UNSPEC_MF:
6168        case UNSPEC_FETCHADD_ACQ:
6169	case UNSPEC_BSP_VALUE:
6170	case UNSPEC_FLUSHRS:
6171	case UNSPEC_BUNDLE_SELECTOR:
6172          break;
6173
6174	case UNSPEC_GR_SPILL:
6175	case UNSPEC_GR_RESTORE:
6176	  {
6177	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
6178	    HOST_WIDE_INT bit = (offset >> 3) & 63;
6179
6180	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6181	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
6182	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
6183					      new_flags, pred);
6184	    break;
6185	  }
6186
6187	case UNSPEC_FR_SPILL:
6188	case UNSPEC_FR_RESTORE:
6189	case UNSPEC_GETF_EXP:
6190	case UNSPEC_SETF_EXP:
6191        case UNSPEC_ADDP4:
6192	case UNSPEC_FR_SQRT_RECIP_APPROX:
6193	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
6194	case UNSPEC_LDA:
6195	case UNSPEC_LDS:
6196	case UNSPEC_LDS_A:
6197	case UNSPEC_LDSA:
6198	case UNSPEC_CHKACLR:
6199        case UNSPEC_CHKS:
6200	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6201	  break;
6202
6203	case UNSPEC_FR_RECIP_APPROX:
6204	case UNSPEC_SHRP:
6205	case UNSPEC_COPYSIGN:
6206	case UNSPEC_FR_RECIP_APPROX_RES:
6207	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
6208	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6209	  break;
6210
6211        case UNSPEC_CMPXCHG_ACQ:
6212	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
6213	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
6214	  break;
6215
6216	default:
6217	  gcc_unreachable ();
6218	}
6219      break;
6220
6221    case UNSPEC_VOLATILE:
6222      switch (XINT (x, 1))
6223	{
6224	case UNSPECV_ALLOC:
6225	  /* Alloc must always be the first instruction of a group.
6226	     We force this by always returning true.  */
6227	  /* ??? We might get better scheduling if we explicitly check for
6228	     input/local/output register dependencies, and modify the
6229	     scheduler so that alloc is always reordered to the start of
6230	     the current group.  We could then eliminate all of the
6231	     first_instruction code.  */
6232	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
6233
6234	  new_flags.is_write = 1;
6235	  rws_access_regno (REG_AR_CFM, new_flags, pred);
6236	  return 1;
6237
6238	case UNSPECV_SET_BSP:
6239	  need_barrier = 1;
6240          break;
6241
6242	case UNSPECV_BLOCKAGE:
6243	case UNSPECV_INSN_GROUP_BARRIER:
6244	case UNSPECV_BREAK:
6245	case UNSPECV_PSAC_ALL:
6246	case UNSPECV_PSAC_NORMAL:
6247	  return 0;
6248
6249	default:
6250	  gcc_unreachable ();
6251	}
6252      break;
6253
6254    case RETURN:
6255      new_flags.is_write = 0;
6256      need_barrier  = rws_access_regno (REG_RP, flags, pred);
6257      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
6258
6259      new_flags.is_write = 1;
6260      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
6261      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
6262      break;
6263
6264    default:
6265      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
6266      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6267	switch (format_ptr[i])
6268	  {
6269	  case '0':	/* unused field */
6270	  case 'i':	/* integer */
6271	  case 'n':	/* note */
6272	  case 'w':	/* wide integer */
6273	  case 's':	/* pointer to string */
6274	  case 'S':	/* optional pointer to string */
6275	    break;
6276
6277	  case 'e':
6278	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
6279	      need_barrier = 1;
6280	    break;
6281
6282	  case 'E':
6283	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
6284	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
6285		need_barrier = 1;
6286	    break;
6287
6288	  default:
6289	    gcc_unreachable ();
6290	  }
6291      break;
6292    }
6293  return need_barrier;
6294}
6295
6296/* Clear out the state for group_barrier_needed at the start of a
6297   sequence of insns.  */
6298
6299static void
6300init_insn_group_barriers (void)
6301{
6302  memset (rws_sum, 0, sizeof (rws_sum));
6303  first_instruction = 1;
6304}
6305
6306/* Given the current state, determine whether a group barrier (a stop bit) is
6307   necessary before INSN.  Return nonzero if so.  This modifies the state to
6308   include the effects of INSN as a side-effect.  */
6309
6310static int
6311group_barrier_needed (rtx insn)
6312{
6313  rtx pat;
6314  int need_barrier = 0;
6315  struct reg_flags flags;
6316
6317  memset (&flags, 0, sizeof (flags));
6318  switch (GET_CODE (insn))
6319    {
6320    case NOTE:
6321    case DEBUG_INSN:
6322      break;
6323
6324    case BARRIER:
6325      /* A barrier doesn't imply an instruction group boundary.  */
6326      break;
6327
6328    case CODE_LABEL:
6329      memset (rws_insn, 0, sizeof (rws_insn));
6330      return 1;
6331
6332    case CALL_INSN:
6333      flags.is_branch = 1;
6334      flags.is_sibcall = SIBLING_CALL_P (insn);
6335      memset (rws_insn, 0, sizeof (rws_insn));
6336
6337      /* Don't bundle a call following another call.  */
6338      if ((pat = prev_active_insn (insn))
6339	  && GET_CODE (pat) == CALL_INSN)
6340	{
6341	  need_barrier = 1;
6342	  break;
6343	}
6344
6345      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
6346      break;
6347
6348    case JUMP_INSN:
6349      if (!ia64_spec_check_p (insn))
6350	flags.is_branch = 1;
6351
6352      /* Don't bundle a jump following a call.  */
6353      if ((pat = prev_active_insn (insn))
6354	  && GET_CODE (pat) == CALL_INSN)
6355	{
6356	  need_barrier = 1;
6357	  break;
6358	}
6359      /* FALLTHRU */
6360
6361    case INSN:
6362      if (GET_CODE (PATTERN (insn)) == USE
6363	  || GET_CODE (PATTERN (insn)) == CLOBBER)
6364	/* Don't care about USE and CLOBBER "insns"---those are used to
6365	   indicate to the optimizer that it shouldn't get rid of
6366	   certain operations.  */
6367	break;
6368
6369      pat = PATTERN (insn);
6370
6371      /* Ug.  Hack hacks hacked elsewhere.  */
6372      switch (recog_memoized (insn))
6373	{
6374	  /* We play dependency tricks with the epilogue in order
6375	     to get proper schedules.  Undo this for dv analysis.  */
6376	case CODE_FOR_epilogue_deallocate_stack:
6377	case CODE_FOR_prologue_allocate_stack:
6378	  pat = XVECEXP (pat, 0, 0);
6379	  break;
6380
6381	  /* The pattern we use for br.cloop confuses the code above.
6382	     The second element of the vector is representative.  */
6383	case CODE_FOR_doloop_end_internal:
6384	  pat = XVECEXP (pat, 0, 1);
6385	  break;
6386
6387	  /* Doesn't generate code.  */
6388	case CODE_FOR_pred_rel_mutex:
6389	case CODE_FOR_prologue_use:
6390	  return 0;
6391
6392	default:
6393	  break;
6394	}
6395
6396      memset (rws_insn, 0, sizeof (rws_insn));
6397      need_barrier = rtx_needs_barrier (pat, flags, 0);
6398
6399      /* Check to see if the previous instruction was a volatile
6400	 asm.  */
6401      if (! need_barrier)
6402	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6403
6404      break;
6405
6406    default:
6407      gcc_unreachable ();
6408    }
6409
6410  if (first_instruction && INSN_P (insn)
6411      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6412      && GET_CODE (PATTERN (insn)) != USE
6413      && GET_CODE (PATTERN (insn)) != CLOBBER)
6414    {
6415      need_barrier = 0;
6416      first_instruction = 0;
6417    }
6418
6419  return need_barrier;
6420}
6421
6422/* Like group_barrier_needed, but do not clobber the current state.  */
6423
6424static int
6425safe_group_barrier_needed (rtx insn)
6426{
6427  int saved_first_instruction;
6428  int t;
6429
6430  saved_first_instruction = first_instruction;
6431  in_safe_group_barrier = 1;
6432
6433  t = group_barrier_needed (insn);
6434
6435  first_instruction = saved_first_instruction;
6436  in_safe_group_barrier = 0;
6437
6438  return t;
6439}
6440
6441/* Scan the current function and insert stop bits as necessary to
6442   eliminate dependencies.  This function assumes that a final
6443   instruction scheduling pass has been run which has already
6444   inserted most of the necessary stop bits.  This function only
6445   inserts new ones at basic block boundaries, since these are
6446   invisible to the scheduler.  */
6447
6448static void
6449emit_insn_group_barriers (FILE *dump)
6450{
6451  rtx insn;
6452  rtx last_label = 0;
6453  int insns_since_last_label = 0;
6454
6455  init_insn_group_barriers ();
6456
6457  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6458    {
6459      if (GET_CODE (insn) == CODE_LABEL)
6460	{
6461	  if (insns_since_last_label)
6462	    last_label = insn;
6463	  insns_since_last_label = 0;
6464	}
6465      else if (GET_CODE (insn) == NOTE
6466	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
6467	{
6468	  if (insns_since_last_label)
6469	    last_label = insn;
6470	  insns_since_last_label = 0;
6471	}
6472      else if (GET_CODE (insn) == INSN
6473	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6474	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6475	{
6476	  init_insn_group_barriers ();
6477	  last_label = 0;
6478	}
6479      else if (NONDEBUG_INSN_P (insn))
6480	{
6481	  insns_since_last_label = 1;
6482
6483	  if (group_barrier_needed (insn))
6484	    {
6485	      if (last_label)
6486		{
6487		  if (dump)
6488		    fprintf (dump, "Emitting stop before label %d\n",
6489			     INSN_UID (last_label));
6490		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6491		  insn = last_label;
6492
6493		  init_insn_group_barriers ();
6494		  last_label = 0;
6495		}
6496	    }
6497	}
6498    }
6499}
6500
6501/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6502   This function has to emit all necessary group barriers.  */
6503
6504static void
6505emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6506{
6507  rtx insn;
6508
6509  init_insn_group_barriers ();
6510
6511  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6512    {
6513      if (GET_CODE (insn) == BARRIER)
6514	{
6515	  rtx last = prev_active_insn (insn);
6516
6517	  if (! last)
6518	    continue;
6519	  if (GET_CODE (last) == JUMP_INSN
6520	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6521	    last = prev_active_insn (last);
6522	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6523	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6524
6525	  init_insn_group_barriers ();
6526	}
6527      else if (NONDEBUG_INSN_P (insn))
6528	{
6529	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6530	    init_insn_group_barriers ();
6531	  else if (group_barrier_needed (insn))
6532	    {
6533	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6534	      init_insn_group_barriers ();
6535	      group_barrier_needed (insn);
6536	    }
6537	}
6538    }
6539}
6540
6541
6542
6543/* Instruction scheduling support.  */
6544
6545#define NR_BUNDLES 10
6546
6547/* A list of names of all available bundles.  */
6548
6549static const char *bundle_name [NR_BUNDLES] =
6550{
6551  ".mii",
6552  ".mmi",
6553  ".mfi",
6554  ".mmf",
6555#if NR_BUNDLES == 10
6556  ".bbb",
6557  ".mbb",
6558#endif
6559  ".mib",
6560  ".mmb",
6561  ".mfb",
6562  ".mlx"
6563};
6564
6565/* Nonzero if we should insert stop bits into the schedule.  */
6566
6567int ia64_final_schedule = 0;
6568
6569/* Codes of the corresponding queried units: */
6570
6571static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6572static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6573
6574static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6575static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6576
6577static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6578
6579/* The following variable value is an insn group barrier.  */
6580
6581static rtx dfa_stop_insn;
6582
6583/* The following variable value is the last issued insn.  */
6584
6585static rtx last_scheduled_insn;
6586
6587/* The following variable value is pointer to a DFA state used as
6588   temporary variable.  */
6589
6590static state_t temp_dfa_state = NULL;
6591
6592/* The following variable value is DFA state after issuing the last
6593   insn.  */
6594
6595static state_t prev_cycle_state = NULL;
6596
6597/* The following array element values are TRUE if the corresponding
6598   insn requires to add stop bits before it.  */
6599
6600static char *stops_p = NULL;
6601
6602/* The following variable is used to set up the mentioned above array.  */
6603
6604static int stop_before_p = 0;
6605
6606/* The following variable value is length of the arrays `clocks' and
6607   `add_cycles'. */
6608
6609static int clocks_length;
6610
6611/* The following variable value is number of data speculations in progress.  */
6612static int pending_data_specs = 0;
6613
6614/* Number of memory references on current and three future processor cycles.  */
6615static char mem_ops_in_group[4];
6616
6617/* Number of current processor cycle (from scheduler's point of view).  */
6618static int current_cycle;
6619
6620static rtx ia64_single_set (rtx);
6621static void ia64_emit_insn_before (rtx, rtx);
6622
6623/* Map a bundle number to its pseudo-op.  */
6624
6625const char *
6626get_bundle_name (int b)
6627{
6628  return bundle_name[b];
6629}
6630
6631
6632/* Return the maximum number of instructions a cpu can issue.  */
6633
6634static int
6635ia64_issue_rate (void)
6636{
6637  return 6;
6638}
6639
6640/* Helper function - like single_set, but look inside COND_EXEC.  */
6641
6642static rtx
6643ia64_single_set (rtx insn)
6644{
6645  rtx x = PATTERN (insn), ret;
6646  if (GET_CODE (x) == COND_EXEC)
6647    x = COND_EXEC_CODE (x);
6648  if (GET_CODE (x) == SET)
6649    return x;
6650
6651  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6652     Although they are not classical single set, the second set is there just
6653     to protect it from moving past FP-relative stack accesses.  */
6654  switch (recog_memoized (insn))
6655    {
6656    case CODE_FOR_prologue_allocate_stack:
6657    case CODE_FOR_epilogue_deallocate_stack:
6658      ret = XVECEXP (x, 0, 0);
6659      break;
6660
6661    default:
6662      ret = single_set_2 (insn, x);
6663      break;
6664    }
6665
6666  return ret;
6667}
6668
6669/* Adjust the cost of a scheduling dependency.
6670   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6671   COST is the current cost, DW is dependency weakness.  */
6672static int
6673ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw)
6674{
6675  enum reg_note dep_type = (enum reg_note) dep_type1;
6676  enum attr_itanium_class dep_class;
6677  enum attr_itanium_class insn_class;
6678
6679  insn_class = ia64_safe_itanium_class (insn);
6680  dep_class = ia64_safe_itanium_class (dep_insn);
6681
6682  /* Treat true memory dependencies separately.  Ignore apparent true
6683     dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
6684  if (dep_type == REG_DEP_TRUE
6685      && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
6686      && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
6687    return 0;
6688
6689  if (dw == MIN_DEP_WEAK)
6690    /* Store and load are likely to alias, use higher cost to avoid stall.  */
6691    return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST);
6692  else if (dw > MIN_DEP_WEAK)
6693    {
6694      /* Store and load are less likely to alias.  */
6695      if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
6696	/* Assume there will be no cache conflict for floating-point data.
6697	   For integer data, L1 conflict penalty is huge (17 cycles), so we
6698	   never assume it will not cause a conflict.  */
6699	return 0;
6700      else
6701	return cost;
6702    }
6703
6704  if (dep_type != REG_DEP_OUTPUT)
6705    return cost;
6706
6707  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6708      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6709    return 0;
6710
6711  return cost;
6712}
6713
6714/* Like emit_insn_before, but skip cycle_display notes.
6715   ??? When cycle display notes are implemented, update this.  */
6716
6717static void
6718ia64_emit_insn_before (rtx insn, rtx before)
6719{
6720  emit_insn_before (insn, before);
6721}
6722
6723/* The following function marks insns who produce addresses for load
6724   and store insns.  Such insns will be placed into M slots because it
6725   decrease latency time for Itanium1 (see function
6726   `ia64_produce_address_p' and the DFA descriptions).  */
6727
6728static void
6729ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6730{
6731  rtx insn, next, next_tail;
6732
6733  /* Before reload, which_alternative is not set, which means that
6734     ia64_safe_itanium_class will produce wrong results for (at least)
6735     move instructions.  */
6736  if (!reload_completed)
6737    return;
6738
6739  next_tail = NEXT_INSN (tail);
6740  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6741    if (INSN_P (insn))
6742      insn->call = 0;
6743  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6744    if (INSN_P (insn)
6745	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6746      {
6747	sd_iterator_def sd_it;
6748	dep_t dep;
6749	bool has_mem_op_consumer_p = false;
6750
6751	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
6752	  {
6753	    enum attr_itanium_class c;
6754
6755	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
6756	      continue;
6757
6758	    next = DEP_CON (dep);
6759	    c = ia64_safe_itanium_class (next);
6760	    if ((c == ITANIUM_CLASS_ST
6761		 || c == ITANIUM_CLASS_STF)
6762		&& ia64_st_address_bypass_p (insn, next))
6763	      {
6764		has_mem_op_consumer_p = true;
6765		break;
6766	      }
6767	    else if ((c == ITANIUM_CLASS_LD
6768		      || c == ITANIUM_CLASS_FLD
6769		      || c == ITANIUM_CLASS_FLDP)
6770		     && ia64_ld_address_bypass_p (insn, next))
6771	      {
6772		has_mem_op_consumer_p = true;
6773		break;
6774	      }
6775	  }
6776
6777	insn->call = has_mem_op_consumer_p;
6778      }
6779}
6780
6781/* We're beginning a new block.  Initialize data structures as necessary.  */
6782
6783static void
6784ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6785		 int sched_verbose ATTRIBUTE_UNUSED,
6786		 int max_ready ATTRIBUTE_UNUSED)
6787{
6788#ifdef ENABLE_CHECKING
6789  rtx insn;
6790
6791  if (!sel_sched_p () && reload_completed)
6792    for (insn = NEXT_INSN (current_sched_info->prev_head);
6793	 insn != current_sched_info->next_tail;
6794	 insn = NEXT_INSN (insn))
6795      gcc_assert (!SCHED_GROUP_P (insn));
6796#endif
6797  last_scheduled_insn = NULL_RTX;
6798  init_insn_group_barriers ();
6799
6800  current_cycle = 0;
6801  memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
6802}
6803
6804/* We're beginning a scheduling pass.  Check assertion.  */
6805
6806static void
6807ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6808                        int sched_verbose ATTRIBUTE_UNUSED,
6809                        int max_ready ATTRIBUTE_UNUSED)
6810{
6811  gcc_assert (pending_data_specs == 0);
6812}
6813
6814/* Scheduling pass is now finished.  Free/reset static variable.  */
6815static void
6816ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6817			  int sched_verbose ATTRIBUTE_UNUSED)
6818{
6819  gcc_assert (pending_data_specs == 0);
6820}
6821
6822/* Return TRUE if INSN is a load (either normal or speculative, but not a
6823   speculation check), FALSE otherwise.  */
6824static bool
6825is_load_p (rtx insn)
6826{
6827  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6828
6829  return
6830   ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
6831    && get_attr_check_load (insn) == CHECK_LOAD_NO);
6832}
6833
6834/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
6835   (taking account for 3-cycle cache reference postponing for stores: Intel
6836   Itanium 2 Reference Manual for Software Development and Optimization,
6837   6.7.3.1).  */
6838static void
6839record_memory_reference (rtx insn)
6840{
6841  enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
6842
6843  switch (insn_class) {
6844    case ITANIUM_CLASS_FLD:
6845    case ITANIUM_CLASS_LD:
6846      mem_ops_in_group[current_cycle % 4]++;
6847      break;
6848    case ITANIUM_CLASS_STF:
6849    case ITANIUM_CLASS_ST:
6850      mem_ops_in_group[(current_cycle + 3) % 4]++;
6851      break;
6852    default:;
6853  }
6854}
6855
6856/* We are about to being issuing insns for this clock cycle.
6857   Override the default sort algorithm to better slot instructions.  */
6858
6859static int
6860ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6861			int *pn_ready, int clock_var,
6862			int reorder_type)
6863{
6864  int n_asms;
6865  int n_ready = *pn_ready;
6866  rtx *e_ready = ready + n_ready;
6867  rtx *insnp;
6868
6869  if (sched_verbose)
6870    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6871
6872  if (reorder_type == 0)
6873    {
6874      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6875      n_asms = 0;
6876      for (insnp = ready; insnp < e_ready; insnp++)
6877	if (insnp < e_ready)
6878	  {
6879	    rtx insn = *insnp;
6880	    enum attr_type t = ia64_safe_type (insn);
6881	    if (t == TYPE_UNKNOWN)
6882	      {
6883		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6884		    || asm_noperands (PATTERN (insn)) >= 0)
6885		  {
6886		    rtx lowest = ready[n_asms];
6887		    ready[n_asms] = insn;
6888		    *insnp = lowest;
6889		    n_asms++;
6890		  }
6891		else
6892		  {
6893		    rtx highest = ready[n_ready - 1];
6894		    ready[n_ready - 1] = insn;
6895		    *insnp = highest;
6896		    return 1;
6897		  }
6898	      }
6899	  }
6900
6901      if (n_asms < n_ready)
6902	{
6903	  /* Some normal insns to process.  Skip the asms.  */
6904	  ready += n_asms;
6905	  n_ready -= n_asms;
6906	}
6907      else if (n_ready > 0)
6908	return 1;
6909    }
6910
6911  if (ia64_final_schedule)
6912    {
6913      int deleted = 0;
6914      int nr_need_stop = 0;
6915
6916      for (insnp = ready; insnp < e_ready; insnp++)
6917	if (safe_group_barrier_needed (*insnp))
6918	  nr_need_stop++;
6919
6920      if (reorder_type == 1 && n_ready == nr_need_stop)
6921	return 0;
6922      if (reorder_type == 0)
6923	return 1;
6924      insnp = e_ready;
6925      /* Move down everything that needs a stop bit, preserving
6926	 relative order.  */
6927      while (insnp-- > ready + deleted)
6928	while (insnp >= ready + deleted)
6929	  {
6930	    rtx insn = *insnp;
6931	    if (! safe_group_barrier_needed (insn))
6932	      break;
6933	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6934	    *ready = insn;
6935	    deleted++;
6936	  }
6937      n_ready -= deleted;
6938      ready += deleted;
6939    }
6940
6941  current_cycle = clock_var;
6942  if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
6943    {
6944      int moved = 0;
6945
6946      insnp = e_ready;
6947      /* Move down loads/stores, preserving relative order.  */
6948      while (insnp-- > ready + moved)
6949	while (insnp >= ready + moved)
6950	  {
6951	    rtx insn = *insnp;
6952	    if (! is_load_p (insn))
6953	      break;
6954	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6955	    *ready = insn;
6956	    moved++;
6957	  }
6958      n_ready -= moved;
6959      ready += moved;
6960    }
6961
6962  return 1;
6963}
6964
6965/* We are about to being issuing insns for this clock cycle.  Override
6966   the default sort algorithm to better slot instructions.  */
6967
6968static int
6969ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6970		    int clock_var)
6971{
6972  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6973				 pn_ready, clock_var, 0);
6974}
6975
6976/* Like ia64_sched_reorder, but called after issuing each insn.
6977   Override the default sort algorithm to better slot instructions.  */
6978
6979static int
6980ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6981		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6982		     int *pn_ready, int clock_var)
6983{
6984  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6985				 clock_var, 1);
6986}
6987
6988/* We are about to issue INSN.  Return the number of insns left on the
6989   ready queue that can be issued this cycle.  */
6990
6991static int
6992ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6993		     int sched_verbose ATTRIBUTE_UNUSED,
6994		     rtx insn ATTRIBUTE_UNUSED,
6995		     int can_issue_more ATTRIBUTE_UNUSED)
6996{
6997  if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
6998    /* Modulo scheduling does not extend h_i_d when emitting
6999       new instructions.  Don't use h_i_d, if we don't have to.  */
7000    {
7001      if (DONE_SPEC (insn) & BEGIN_DATA)
7002	pending_data_specs++;
7003      if (CHECK_SPEC (insn) & BEGIN_DATA)
7004	pending_data_specs--;
7005    }
7006
7007  if (DEBUG_INSN_P (insn))
7008    return 1;
7009
7010  last_scheduled_insn = insn;
7011  memcpy (prev_cycle_state, curr_state, dfa_state_size);
7012  if (reload_completed)
7013    {
7014      int needed = group_barrier_needed (insn);
7015
7016      gcc_assert (!needed);
7017      if (GET_CODE (insn) == CALL_INSN)
7018	init_insn_group_barriers ();
7019      stops_p [INSN_UID (insn)] = stop_before_p;
7020      stop_before_p = 0;
7021
7022      record_memory_reference (insn);
7023    }
7024  return 1;
7025}
7026
7027/* We are choosing insn from the ready queue.  Return nonzero if INSN
7028   can be chosen.  */
7029
7030static int
7031ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
7032{
7033  gcc_assert (insn && INSN_P (insn));
7034  return ((!reload_completed
7035	   || !safe_group_barrier_needed (insn))
7036	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn)
7037	  && (!mflag_sched_mem_insns_hard_limit
7038	      || !is_load_p (insn)
7039	      || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns));
7040}
7041
7042/* We are choosing insn from the ready queue.  Return nonzero if INSN
7043   can be chosen.  */
7044
7045static bool
7046ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn)
7047{
7048  gcc_assert (insn  && INSN_P (insn));
7049  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
7050     we keep ALAT half-empty.  */
7051  return (pending_data_specs < 16
7052	  || !(TODO_SPEC (insn) & BEGIN_DATA));
7053}
7054
7055/* The following variable value is pseudo-insn used by the DFA insn
7056   scheduler to change the DFA state when the simulated clock is
7057   increased.  */
7058
7059static rtx dfa_pre_cycle_insn;
7060
7061/* Returns 1 when a meaningful insn was scheduled between the last group
7062   barrier and LAST.  */
7063static int
7064scheduled_good_insn (rtx last)
7065{
7066  if (last && recog_memoized (last) >= 0)
7067    return 1;
7068
7069  for ( ;
7070       last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
7071       && !stops_p[INSN_UID (last)];
7072       last = PREV_INSN (last))
7073    /* We could hit a NOTE_INSN_DELETED here which is actually outside
7074       the ebb we're scheduling.  */
7075    if (INSN_P (last) && recog_memoized (last) >= 0)
7076      return 1;
7077
7078  return 0;
7079}
7080
7081/* We are about to being issuing INSN.  Return nonzero if we cannot
7082   issue it on given cycle CLOCK and return zero if we should not sort
7083   the ready queue on the next clock start.  */
7084
7085static int
7086ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
7087		    int clock, int *sort_p)
7088{
7089  int setup_clocks_p = FALSE;
7090
7091  gcc_assert (insn && INSN_P (insn));
7092
7093  if (DEBUG_INSN_P (insn))
7094    return 0;
7095
7096  /* When a group barrier is needed for insn, last_scheduled_insn
7097     should be set.  */
7098  gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
7099              || last_scheduled_insn);
7100
7101  if ((reload_completed
7102       && (safe_group_barrier_needed (insn)
7103	   || (mflag_sched_stop_bits_after_every_cycle
7104	       && last_clock != clock
7105	       && last_scheduled_insn
7106	       && scheduled_good_insn (last_scheduled_insn))))
7107      || (last_scheduled_insn
7108	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
7109	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7110	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
7111    {
7112      init_insn_group_barriers ();
7113
7114      if (verbose && dump)
7115	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
7116		 last_clock == clock ? " + cycle advance" : "");
7117
7118      stop_before_p = 1;
7119      current_cycle = clock;
7120      mem_ops_in_group[current_cycle % 4] = 0;
7121
7122      if (last_clock == clock)
7123	{
7124	  state_transition (curr_state, dfa_stop_insn);
7125	  if (TARGET_EARLY_STOP_BITS)
7126	    *sort_p = (last_scheduled_insn == NULL_RTX
7127		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
7128	  else
7129	    *sort_p = 0;
7130	  return 1;
7131	}
7132      else if (reload_completed)
7133	setup_clocks_p = TRUE;
7134
7135      if (last_scheduled_insn)
7136	{
7137	  if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
7138	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
7139	    state_reset (curr_state);
7140	  else
7141	    {
7142	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
7143	      state_transition (curr_state, dfa_stop_insn);
7144	      state_transition (curr_state, dfa_pre_cycle_insn);
7145	      state_transition (curr_state, NULL);
7146	    }
7147	}
7148    }
7149  else if (reload_completed)
7150    setup_clocks_p = TRUE;
7151
7152  return 0;
7153}
7154
7155/* Implement targetm.sched.h_i_d_extended hook.
7156   Extend internal data structures.  */
7157static void
7158ia64_h_i_d_extended (void)
7159{
7160  if (stops_p != NULL)
7161    {
7162      int new_clocks_length = get_max_uid () * 3 / 2;
7163      stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
7164      clocks_length = new_clocks_length;
7165    }
7166}
7167
7168
7169/* This structure describes the data used by the backend to guide scheduling.
7170   When the current scheduling point is switched, this data should be saved
7171   and restored later, if the scheduler returns to this point.  */
7172struct _ia64_sched_context
7173{
7174  state_t prev_cycle_state;
7175  rtx last_scheduled_insn;
7176  struct reg_write_state rws_sum[NUM_REGS];
7177  struct reg_write_state rws_insn[NUM_REGS];
7178  int first_instruction;
7179  int pending_data_specs;
7180  int current_cycle;
7181  char mem_ops_in_group[4];
7182};
7183typedef struct _ia64_sched_context *ia64_sched_context_t;
7184
7185/* Allocates a scheduling context.  */
7186static void *
7187ia64_alloc_sched_context (void)
7188{
7189  return xmalloc (sizeof (struct _ia64_sched_context));
7190}
7191
7192/* Initializes the _SC context with clean data, if CLEAN_P, and from
7193   the global context otherwise.  */
7194static void
7195ia64_init_sched_context (void *_sc, bool clean_p)
7196{
7197  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7198
7199  sc->prev_cycle_state = xmalloc (dfa_state_size);
7200  if (clean_p)
7201    {
7202      state_reset (sc->prev_cycle_state);
7203      sc->last_scheduled_insn = NULL_RTX;
7204      memset (sc->rws_sum, 0, sizeof (rws_sum));
7205      memset (sc->rws_insn, 0, sizeof (rws_insn));
7206      sc->first_instruction = 1;
7207      sc->pending_data_specs = 0;
7208      sc->current_cycle = 0;
7209      memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
7210    }
7211  else
7212    {
7213      memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
7214      sc->last_scheduled_insn = last_scheduled_insn;
7215      memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
7216      memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
7217      sc->first_instruction = first_instruction;
7218      sc->pending_data_specs = pending_data_specs;
7219      sc->current_cycle = current_cycle;
7220      memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
7221    }
7222}
7223
7224/* Sets the global scheduling context to the one pointed to by _SC.  */
7225static void
7226ia64_set_sched_context (void *_sc)
7227{
7228  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7229
7230  gcc_assert (sc != NULL);
7231
7232  memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
7233  last_scheduled_insn = sc->last_scheduled_insn;
7234  memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
7235  memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
7236  first_instruction = sc->first_instruction;
7237  pending_data_specs = sc->pending_data_specs;
7238  current_cycle = sc->current_cycle;
7239  memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
7240}
7241
7242/* Clears the data in the _SC scheduling context.  */
7243static void
7244ia64_clear_sched_context (void *_sc)
7245{
7246  ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
7247
7248  free (sc->prev_cycle_state);
7249  sc->prev_cycle_state = NULL;
7250}
7251
7252/* Frees the _SC scheduling context.  */
7253static void
7254ia64_free_sched_context (void *_sc)
7255{
7256  gcc_assert (_sc != NULL);
7257
7258  free (_sc);
7259}
7260
7261typedef rtx (* gen_func_t) (rtx, rtx);
7262
7263/* Return a function that will generate a load of mode MODE_NO
7264   with speculation types TS.  */
7265static gen_func_t
7266get_spec_load_gen_function (ds_t ts, int mode_no)
7267{
7268  static gen_func_t gen_ld_[] = {
7269    gen_movbi,
7270    gen_movqi_internal,
7271    gen_movhi_internal,
7272    gen_movsi_internal,
7273    gen_movdi_internal,
7274    gen_movsf_internal,
7275    gen_movdf_internal,
7276    gen_movxf_internal,
7277    gen_movti_internal,
7278    gen_zero_extendqidi2,
7279    gen_zero_extendhidi2,
7280    gen_zero_extendsidi2,
7281  };
7282
7283  static gen_func_t gen_ld_a[] = {
7284    gen_movbi_advanced,
7285    gen_movqi_advanced,
7286    gen_movhi_advanced,
7287    gen_movsi_advanced,
7288    gen_movdi_advanced,
7289    gen_movsf_advanced,
7290    gen_movdf_advanced,
7291    gen_movxf_advanced,
7292    gen_movti_advanced,
7293    gen_zero_extendqidi2_advanced,
7294    gen_zero_extendhidi2_advanced,
7295    gen_zero_extendsidi2_advanced,
7296  };
7297  static gen_func_t gen_ld_s[] = {
7298    gen_movbi_speculative,
7299    gen_movqi_speculative,
7300    gen_movhi_speculative,
7301    gen_movsi_speculative,
7302    gen_movdi_speculative,
7303    gen_movsf_speculative,
7304    gen_movdf_speculative,
7305    gen_movxf_speculative,
7306    gen_movti_speculative,
7307    gen_zero_extendqidi2_speculative,
7308    gen_zero_extendhidi2_speculative,
7309    gen_zero_extendsidi2_speculative,
7310  };
7311  static gen_func_t gen_ld_sa[] = {
7312    gen_movbi_speculative_advanced,
7313    gen_movqi_speculative_advanced,
7314    gen_movhi_speculative_advanced,
7315    gen_movsi_speculative_advanced,
7316    gen_movdi_speculative_advanced,
7317    gen_movsf_speculative_advanced,
7318    gen_movdf_speculative_advanced,
7319    gen_movxf_speculative_advanced,
7320    gen_movti_speculative_advanced,
7321    gen_zero_extendqidi2_speculative_advanced,
7322    gen_zero_extendhidi2_speculative_advanced,
7323    gen_zero_extendsidi2_speculative_advanced,
7324  };
7325  static gen_func_t gen_ld_s_a[] = {
7326    gen_movbi_speculative_a,
7327    gen_movqi_speculative_a,
7328    gen_movhi_speculative_a,
7329    gen_movsi_speculative_a,
7330    gen_movdi_speculative_a,
7331    gen_movsf_speculative_a,
7332    gen_movdf_speculative_a,
7333    gen_movxf_speculative_a,
7334    gen_movti_speculative_a,
7335    gen_zero_extendqidi2_speculative_a,
7336    gen_zero_extendhidi2_speculative_a,
7337    gen_zero_extendsidi2_speculative_a,
7338  };
7339
7340  gen_func_t *gen_ld;
7341
7342  if (ts & BEGIN_DATA)
7343    {
7344      if (ts & BEGIN_CONTROL)
7345	gen_ld = gen_ld_sa;
7346      else
7347	gen_ld = gen_ld_a;
7348    }
7349  else if (ts & BEGIN_CONTROL)
7350    {
7351      if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
7352	  || ia64_needs_block_p (ts))
7353	gen_ld = gen_ld_s;
7354      else
7355	gen_ld = gen_ld_s_a;
7356    }
7357  else if (ts == 0)
7358    gen_ld = gen_ld_;
7359  else
7360    gcc_unreachable ();
7361
7362  return gen_ld[mode_no];
7363}
7364
7365/* Constants that help mapping 'enum machine_mode' to int.  */
7366enum SPEC_MODES
7367  {
7368    SPEC_MODE_INVALID = -1,
7369    SPEC_MODE_FIRST = 0,
7370    SPEC_MODE_FOR_EXTEND_FIRST = 1,
7371    SPEC_MODE_FOR_EXTEND_LAST = 3,
7372    SPEC_MODE_LAST = 8
7373  };
7374
7375enum
7376  {
7377    /* Offset to reach ZERO_EXTEND patterns.  */
7378    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
7379  };
7380
7381/* Return index of the MODE.  */
7382static int
7383ia64_mode_to_int (enum machine_mode mode)
7384{
7385  switch (mode)
7386    {
7387    case BImode: return 0; /* SPEC_MODE_FIRST  */
7388    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
7389    case HImode: return 2;
7390    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
7391    case DImode: return 4;
7392    case SFmode: return 5;
7393    case DFmode: return 6;
7394    case XFmode: return 7;
7395    case TImode:
7396      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
7397	 mentioned in itanium[12].md.  Predicate fp_register_operand also
7398	 needs to be defined.  Bottom line: better disable for now.  */
7399      return SPEC_MODE_INVALID;
7400    default:     return SPEC_MODE_INVALID;
7401    }
7402}
7403
7404/* Provide information about speculation capabilities.  */
7405static void
7406ia64_set_sched_flags (spec_info_t spec_info)
7407{
7408  unsigned int *flags = &(current_sched_info->flags);
7409
7410  if (*flags & SCHED_RGN
7411      || *flags & SCHED_EBB
7412      || *flags & SEL_SCHED)
7413    {
7414      int mask = 0;
7415
7416      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
7417          || (mflag_sched_ar_data_spec && reload_completed))
7418	{
7419	  mask |= BEGIN_DATA;
7420
7421	  if (!sel_sched_p ()
7422	      && ((mflag_sched_br_in_data_spec && !reload_completed)
7423		  || (mflag_sched_ar_in_data_spec && reload_completed)))
7424	    mask |= BE_IN_DATA;
7425	}
7426
7427      if (mflag_sched_control_spec
7428          && (!sel_sched_p ()
7429	      || reload_completed))
7430	{
7431	  mask |= BEGIN_CONTROL;
7432
7433	  if (!sel_sched_p () && mflag_sched_in_control_spec)
7434	    mask |= BE_IN_CONTROL;
7435	}
7436
7437      spec_info->mask = mask;
7438
7439      if (mask)
7440	{
7441	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
7442
7443	  if (mask & BE_IN_SPEC)
7444	    *flags |= NEW_BBS;
7445
7446	  spec_info->flags = 0;
7447
7448	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
7449	    spec_info->flags |= PREFER_NON_DATA_SPEC;
7450
7451	  if (mask & CONTROL_SPEC)
7452	    {
7453	      if (mflag_sched_prefer_non_control_spec_insns)
7454		spec_info->flags |= PREFER_NON_CONTROL_SPEC;
7455
7456	      if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
7457		spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
7458	    }
7459
7460	  if (sched_verbose >= 1)
7461	    spec_info->dump = sched_dump;
7462	  else
7463	    spec_info->dump = 0;
7464
7465	  if (mflag_sched_count_spec_in_critical_path)
7466	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
7467	}
7468    }
7469  else
7470    spec_info->mask = 0;
7471}
7472
7473/* If INSN is an appropriate load return its mode.
7474   Return -1 otherwise.  */
7475static int
7476get_mode_no_for_insn (rtx insn)
7477{
7478  rtx reg, mem, mode_rtx;
7479  int mode_no;
7480  bool extend_p;
7481
7482  extract_insn_cached (insn);
7483
7484  /* We use WHICH_ALTERNATIVE only after reload.  This will
7485     guarantee that reload won't touch a speculative insn.  */
7486
7487  if (recog_data.n_operands != 2)
7488    return -1;
7489
7490  reg = recog_data.operand[0];
7491  mem = recog_data.operand[1];
7492
7493  /* We should use MEM's mode since REG's mode in presence of
7494     ZERO_EXTEND will always be DImode.  */
7495  if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
7496    /* Process non-speculative ld.  */
7497    {
7498      if (!reload_completed)
7499	{
7500	  /* Do not speculate into regs like ar.lc.  */
7501	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
7502	    return -1;
7503
7504	  if (!MEM_P (mem))
7505	    return -1;
7506
7507	  {
7508	    rtx mem_reg = XEXP (mem, 0);
7509
7510	    if (!REG_P (mem_reg))
7511	      return -1;
7512	  }
7513
7514	  mode_rtx = mem;
7515	}
7516      else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
7517	{
7518	  gcc_assert (REG_P (reg) && MEM_P (mem));
7519	  mode_rtx = mem;
7520	}
7521      else
7522	return -1;
7523    }
7524  else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
7525	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
7526	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
7527    /* Process speculative ld or ld.c.  */
7528    {
7529      gcc_assert (REG_P (reg) && MEM_P (mem));
7530      mode_rtx = mem;
7531    }
7532  else
7533    {
7534      enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
7535
7536      if (attr_class == ITANIUM_CLASS_CHK_A
7537	  || attr_class == ITANIUM_CLASS_CHK_S_I
7538	  || attr_class == ITANIUM_CLASS_CHK_S_F)
7539	/* Process chk.  */
7540	mode_rtx = reg;
7541      else
7542	return -1;
7543    }
7544
7545  mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
7546
7547  if (mode_no == SPEC_MODE_INVALID)
7548    return -1;
7549
7550  extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
7551
7552  if (extend_p)
7553    {
7554      if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
7555	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
7556	return -1;
7557
7558      mode_no += SPEC_GEN_EXTEND_OFFSET;
7559    }
7560
7561  return mode_no;
7562}
7563
7564/* If X is an unspec part of a speculative load, return its code.
7565   Return -1 otherwise.  */
7566static int
7567get_spec_unspec_code (const_rtx x)
7568{
7569  if (GET_CODE (x) != UNSPEC)
7570    return -1;
7571
7572  {
7573    int code;
7574
7575    code = XINT (x, 1);
7576
7577    switch (code)
7578      {
7579      case UNSPEC_LDA:
7580      case UNSPEC_LDS:
7581      case UNSPEC_LDS_A:
7582      case UNSPEC_LDSA:
7583	return code;
7584
7585      default:
7586	return -1;
7587      }
7588  }
7589}
7590
7591/* Implement skip_rtx_p hook.  */
7592static bool
7593ia64_skip_rtx_p (const_rtx x)
7594{
7595  return get_spec_unspec_code (x) != -1;
7596}
7597
7598/* If INSN is a speculative load, return its UNSPEC code.
7599   Return -1 otherwise.  */
7600static int
7601get_insn_spec_code (const_rtx insn)
7602{
7603  rtx pat, reg, mem;
7604
7605  pat = PATTERN (insn);
7606
7607  if (GET_CODE (pat) == COND_EXEC)
7608    pat = COND_EXEC_CODE (pat);
7609
7610  if (GET_CODE (pat) != SET)
7611    return -1;
7612
7613  reg = SET_DEST (pat);
7614  if (!REG_P (reg))
7615    return -1;
7616
7617  mem = SET_SRC (pat);
7618  if (GET_CODE (mem) == ZERO_EXTEND)
7619    mem = XEXP (mem, 0);
7620
7621  return get_spec_unspec_code (mem);
7622}
7623
7624/* If INSN is a speculative load, return a ds with the speculation types.
7625   Otherwise [if INSN is a normal instruction] return 0.  */
7626static ds_t
7627ia64_get_insn_spec_ds (rtx insn)
7628{
7629  int code = get_insn_spec_code (insn);
7630
7631  switch (code)
7632    {
7633    case UNSPEC_LDA:
7634      return BEGIN_DATA;
7635
7636    case UNSPEC_LDS:
7637    case UNSPEC_LDS_A:
7638      return BEGIN_CONTROL;
7639
7640    case UNSPEC_LDSA:
7641      return BEGIN_DATA | BEGIN_CONTROL;
7642
7643    default:
7644      return 0;
7645    }
7646}
7647
7648/* If INSN is a speculative load return a ds with the speculation types that
7649   will be checked.
7650   Otherwise [if INSN is a normal instruction] return 0.  */
7651static ds_t
7652ia64_get_insn_checked_ds (rtx insn)
7653{
7654  int code = get_insn_spec_code (insn);
7655
7656  switch (code)
7657    {
7658    case UNSPEC_LDA:
7659      return BEGIN_DATA | BEGIN_CONTROL;
7660
7661    case UNSPEC_LDS:
7662      return BEGIN_CONTROL;
7663
7664    case UNSPEC_LDS_A:
7665    case UNSPEC_LDSA:
7666      return BEGIN_DATA | BEGIN_CONTROL;
7667
7668    default:
7669      return 0;
7670    }
7671}
7672
7673/* If GEN_P is true, calculate the index of needed speculation check and return
7674   speculative pattern for INSN with speculative mode TS, machine mode
7675   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
7676   If GEN_P is false, just calculate the index of needed speculation check.  */
7677static rtx
7678ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
7679{
7680  rtx pat, new_pat;
7681  gen_func_t gen_load;
7682
7683  gen_load = get_spec_load_gen_function (ts, mode_no);
7684
7685  new_pat = gen_load (copy_rtx (recog_data.operand[0]),
7686		      copy_rtx (recog_data.operand[1]));
7687
7688  pat = PATTERN (insn);
7689  if (GET_CODE (pat) == COND_EXEC)
7690    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7691				 new_pat);
7692
7693  return new_pat;
7694}
7695
7696static bool
7697insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
7698			      ds_t ds ATTRIBUTE_UNUSED)
7699{
7700  return false;
7701}
7702
7703/* Implement targetm.sched.speculate_insn hook.
7704   Check if the INSN can be TS speculative.
7705   If 'no' - return -1.
7706   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
7707   If current pattern of the INSN already provides TS speculation,
7708   return 0.  */
7709static int
7710ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
7711{
7712  int mode_no;
7713  int res;
7714
7715  gcc_assert (!(ts & ~SPECULATIVE));
7716
7717  if (ia64_spec_check_p (insn))
7718    return -1;
7719
7720  if ((ts & BE_IN_SPEC)
7721      && !insn_can_be_in_speculative_p (insn, ts))
7722    return -1;
7723
7724  mode_no = get_mode_no_for_insn (insn);
7725
7726  if (mode_no != SPEC_MODE_INVALID)
7727    {
7728      if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
7729	res = 0;
7730      else
7731	{
7732	  res = 1;
7733	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
7734	}
7735    }
7736  else
7737    res = -1;
7738
7739  return res;
7740}
7741
7742/* Return a function that will generate a check for speculation TS with mode
7743   MODE_NO.
7744   If simple check is needed, pass true for SIMPLE_CHECK_P.
7745   If clearing check is needed, pass true for CLEARING_CHECK_P.  */
7746static gen_func_t
7747get_spec_check_gen_function (ds_t ts, int mode_no,
7748			     bool simple_check_p, bool clearing_check_p)
7749{
7750  static gen_func_t gen_ld_c_clr[] = {
7751    gen_movbi_clr,
7752    gen_movqi_clr,
7753    gen_movhi_clr,
7754    gen_movsi_clr,
7755    gen_movdi_clr,
7756    gen_movsf_clr,
7757    gen_movdf_clr,
7758    gen_movxf_clr,
7759    gen_movti_clr,
7760    gen_zero_extendqidi2_clr,
7761    gen_zero_extendhidi2_clr,
7762    gen_zero_extendsidi2_clr,
7763  };
7764  static gen_func_t gen_ld_c_nc[] = {
7765    gen_movbi_nc,
7766    gen_movqi_nc,
7767    gen_movhi_nc,
7768    gen_movsi_nc,
7769    gen_movdi_nc,
7770    gen_movsf_nc,
7771    gen_movdf_nc,
7772    gen_movxf_nc,
7773    gen_movti_nc,
7774    gen_zero_extendqidi2_nc,
7775    gen_zero_extendhidi2_nc,
7776    gen_zero_extendsidi2_nc,
7777  };
7778  static gen_func_t gen_chk_a_clr[] = {
7779    gen_advanced_load_check_clr_bi,
7780    gen_advanced_load_check_clr_qi,
7781    gen_advanced_load_check_clr_hi,
7782    gen_advanced_load_check_clr_si,
7783    gen_advanced_load_check_clr_di,
7784    gen_advanced_load_check_clr_sf,
7785    gen_advanced_load_check_clr_df,
7786    gen_advanced_load_check_clr_xf,
7787    gen_advanced_load_check_clr_ti,
7788    gen_advanced_load_check_clr_di,
7789    gen_advanced_load_check_clr_di,
7790    gen_advanced_load_check_clr_di,
7791  };
7792  static gen_func_t gen_chk_a_nc[] = {
7793    gen_advanced_load_check_nc_bi,
7794    gen_advanced_load_check_nc_qi,
7795    gen_advanced_load_check_nc_hi,
7796    gen_advanced_load_check_nc_si,
7797    gen_advanced_load_check_nc_di,
7798    gen_advanced_load_check_nc_sf,
7799    gen_advanced_load_check_nc_df,
7800    gen_advanced_load_check_nc_xf,
7801    gen_advanced_load_check_nc_ti,
7802    gen_advanced_load_check_nc_di,
7803    gen_advanced_load_check_nc_di,
7804    gen_advanced_load_check_nc_di,
7805  };
7806  static gen_func_t gen_chk_s[] = {
7807    gen_speculation_check_bi,
7808    gen_speculation_check_qi,
7809    gen_speculation_check_hi,
7810    gen_speculation_check_si,
7811    gen_speculation_check_di,
7812    gen_speculation_check_sf,
7813    gen_speculation_check_df,
7814    gen_speculation_check_xf,
7815    gen_speculation_check_ti,
7816    gen_speculation_check_di,
7817    gen_speculation_check_di,
7818    gen_speculation_check_di,
7819  };
7820
7821  gen_func_t *gen_check;
7822
7823  if (ts & BEGIN_DATA)
7824    {
7825      /* We don't need recovery because even if this is ld.sa
7826	 ALAT entry will be allocated only if NAT bit is set to zero.
7827	 So it is enough to use ld.c here.  */
7828
7829      if (simple_check_p)
7830	{
7831	  gcc_assert (mflag_sched_spec_ldc);
7832
7833	  if (clearing_check_p)
7834	    gen_check = gen_ld_c_clr;
7835	  else
7836	    gen_check = gen_ld_c_nc;
7837	}
7838      else
7839	{
7840	  if (clearing_check_p)
7841	    gen_check = gen_chk_a_clr;
7842	  else
7843	    gen_check = gen_chk_a_nc;
7844	}
7845    }
7846  else if (ts & BEGIN_CONTROL)
7847    {
7848      if (simple_check_p)
7849	/* We might want to use ld.sa -> ld.c instead of
7850	   ld.s -> chk.s.  */
7851	{
7852	  gcc_assert (!ia64_needs_block_p (ts));
7853
7854	  if (clearing_check_p)
7855	    gen_check = gen_ld_c_clr;
7856	  else
7857	    gen_check = gen_ld_c_nc;
7858	}
7859      else
7860	{
7861	  gen_check = gen_chk_s;
7862	}
7863    }
7864  else
7865    gcc_unreachable ();
7866
7867  gcc_assert (mode_no >= 0);
7868  return gen_check[mode_no];
7869}
7870
7871/* Return nonzero, if INSN needs branchy recovery check.  */
7872static bool
7873ia64_needs_block_p (ds_t ts)
7874{
7875  if (ts & BEGIN_DATA)
7876    return !mflag_sched_spec_ldc;
7877
7878  gcc_assert ((ts & BEGIN_CONTROL) != 0);
7879
7880  return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
7881}
7882
7883/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7884   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7885   Otherwise, generate a simple check.  */
7886static rtx
7887ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
7888{
7889  rtx op1, pat, check_pat;
7890  gen_func_t gen_check;
7891  int mode_no;
7892
7893  mode_no = get_mode_no_for_insn (insn);
7894  gcc_assert (mode_no >= 0);
7895
7896  if (label)
7897    op1 = label;
7898  else
7899    {
7900      gcc_assert (!ia64_needs_block_p (ds));
7901      op1 = copy_rtx (recog_data.operand[1]);
7902    }
7903
7904  gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
7905					   true);
7906
7907  check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
7908
7909  pat = PATTERN (insn);
7910  if (GET_CODE (pat) == COND_EXEC)
7911    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7912				   check_pat);
7913
7914  return check_pat;
7915}
7916
7917/* Return nonzero, if X is branchy recovery check.  */
7918static int
7919ia64_spec_check_p (rtx x)
7920{
7921  x = PATTERN (x);
7922  if (GET_CODE (x) == COND_EXEC)
7923    x = COND_EXEC_CODE (x);
7924  if (GET_CODE (x) == SET)
7925    return ia64_spec_check_src_p (SET_SRC (x));
7926  return 0;
7927}
7928
7929/* Return nonzero, if SRC belongs to recovery check.  */
7930static int
7931ia64_spec_check_src_p (rtx src)
7932{
7933  if (GET_CODE (src) == IF_THEN_ELSE)
7934    {
7935      rtx t;
7936
7937      t = XEXP (src, 0);
7938      if (GET_CODE (t) == NE)
7939	{
7940	  t = XEXP (t, 0);
7941
7942	  if (GET_CODE (t) == UNSPEC)
7943	    {
7944	      int code;
7945
7946	      code = XINT (t, 1);
7947
7948	      if (code == UNSPEC_LDCCLR
7949		  || code == UNSPEC_LDCNC
7950		  || code == UNSPEC_CHKACLR
7951		  || code == UNSPEC_CHKANC
7952		  || code == UNSPEC_CHKS)
7953		{
7954		  gcc_assert (code != 0);
7955		  return code;
7956		}
7957	    }
7958	}
7959    }
7960  return 0;
7961}
7962
7963
7964/* The following page contains abstract data `bundle states' which are
7965   used for bundling insns (inserting nops and template generation).  */
7966
7967/* The following describes state of insn bundling.  */
7968
7969struct bundle_state
7970{
7971  /* Unique bundle state number to identify them in the debugging
7972     output  */
7973  int unique_num;
7974  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7975  /* number nops before and after the insn  */
7976  short before_nops_num, after_nops_num;
7977  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7978                   insn */
7979  int cost;     /* cost of the state in cycles */
7980  int accumulated_insns_num; /* number of all previous insns including
7981				nops.  L is considered as 2 insns */
7982  int branch_deviation; /* deviation of previous branches from 3rd slots  */
7983  int middle_bundle_stops; /* number of stop bits in the middle of bundles */
7984  struct bundle_state *next;  /* next state with the same insn_num  */
7985  struct bundle_state *originator; /* originator (previous insn state)  */
7986  /* All bundle states are in the following chain.  */
7987  struct bundle_state *allocated_states_chain;
7988  /* The DFA State after issuing the insn and the nops.  */
7989  state_t dfa_state;
7990};
7991
7992/* The following is map insn number to the corresponding bundle state.  */
7993
7994static struct bundle_state **index_to_bundle_states;
7995
7996/* The unique number of next bundle state.  */
7997
7998static int bundle_states_num;
7999
8000/* All allocated bundle states are in the following chain.  */
8001
8002static struct bundle_state *allocated_bundle_states_chain;
8003
8004/* All allocated but not used bundle states are in the following
8005   chain.  */
8006
8007static struct bundle_state *free_bundle_state_chain;
8008
8009
8010/* The following function returns a free bundle state.  */
8011
8012static struct bundle_state *
8013get_free_bundle_state (void)
8014{
8015  struct bundle_state *result;
8016
8017  if (free_bundle_state_chain != NULL)
8018    {
8019      result = free_bundle_state_chain;
8020      free_bundle_state_chain = result->next;
8021    }
8022  else
8023    {
8024      result = XNEW (struct bundle_state);
8025      result->dfa_state = xmalloc (dfa_state_size);
8026      result->allocated_states_chain = allocated_bundle_states_chain;
8027      allocated_bundle_states_chain = result;
8028    }
8029  result->unique_num = bundle_states_num++;
8030  return result;
8031
8032}
8033
8034/* The following function frees given bundle state.  */
8035
8036static void
8037free_bundle_state (struct bundle_state *state)
8038{
8039  state->next = free_bundle_state_chain;
8040  free_bundle_state_chain = state;
8041}
8042
8043/* Start work with abstract data `bundle states'.  */
8044
8045static void
8046initiate_bundle_states (void)
8047{
8048  bundle_states_num = 0;
8049  free_bundle_state_chain = NULL;
8050  allocated_bundle_states_chain = NULL;
8051}
8052
8053/* Finish work with abstract data `bundle states'.  */
8054
8055static void
8056finish_bundle_states (void)
8057{
8058  struct bundle_state *curr_state, *next_state;
8059
8060  for (curr_state = allocated_bundle_states_chain;
8061       curr_state != NULL;
8062       curr_state = next_state)
8063    {
8064      next_state = curr_state->allocated_states_chain;
8065      free (curr_state->dfa_state);
8066      free (curr_state);
8067    }
8068}
8069
8070/* Hash table of the bundle states.  The key is dfa_state and insn_num
8071   of the bundle states.  */
8072
8073static htab_t bundle_state_table;
8074
8075/* The function returns hash of BUNDLE_STATE.  */
8076
8077static unsigned
8078bundle_state_hash (const void *bundle_state)
8079{
8080  const struct bundle_state *const state
8081    = (const struct bundle_state *) bundle_state;
8082  unsigned result, i;
8083
8084  for (result = i = 0; i < dfa_state_size; i++)
8085    result += (((unsigned char *) state->dfa_state) [i]
8086	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
8087  return result + state->insn_num;
8088}
8089
8090/* The function returns nonzero if the bundle state keys are equal.  */
8091
8092static int
8093bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
8094{
8095  const struct bundle_state *const state1
8096    = (const struct bundle_state *) bundle_state_1;
8097  const struct bundle_state *const state2
8098    = (const struct bundle_state *) bundle_state_2;
8099
8100  return (state1->insn_num == state2->insn_num
8101	  && memcmp (state1->dfa_state, state2->dfa_state,
8102		     dfa_state_size) == 0);
8103}
8104
8105/* The function inserts the BUNDLE_STATE into the hash table.  The
8106   function returns nonzero if the bundle has been inserted into the
8107   table.  The table contains the best bundle state with given key.  */
8108
8109static int
8110insert_bundle_state (struct bundle_state *bundle_state)
8111{
8112  void **entry_ptr;
8113
8114  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
8115  if (*entry_ptr == NULL)
8116    {
8117      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
8118      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
8119      *entry_ptr = (void *) bundle_state;
8120      return TRUE;
8121    }
8122  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
8123	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
8124	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
8125		   > bundle_state->accumulated_insns_num
8126		   || (((struct bundle_state *)
8127			*entry_ptr)->accumulated_insns_num
8128		       == bundle_state->accumulated_insns_num
8129		       && (((struct bundle_state *)
8130			    *entry_ptr)->branch_deviation
8131			   > bundle_state->branch_deviation
8132			   || (((struct bundle_state *)
8133				*entry_ptr)->branch_deviation
8134			       == bundle_state->branch_deviation
8135			       && ((struct bundle_state *)
8136				   *entry_ptr)->middle_bundle_stops
8137			       > bundle_state->middle_bundle_stops))))))
8138
8139    {
8140      struct bundle_state temp;
8141
8142      temp = *(struct bundle_state *) *entry_ptr;
8143      *(struct bundle_state *) *entry_ptr = *bundle_state;
8144      ((struct bundle_state *) *entry_ptr)->next = temp.next;
8145      *bundle_state = temp;
8146    }
8147  return FALSE;
8148}
8149
8150/* Start work with the hash table.  */
8151
8152static void
8153initiate_bundle_state_table (void)
8154{
8155  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
8156				    (htab_del) 0);
8157}
8158
8159/* Finish work with the hash table.  */
8160
8161static void
8162finish_bundle_state_table (void)
8163{
8164  htab_delete (bundle_state_table);
8165}
8166
8167
8168
8169/* The following variable is a insn `nop' used to check bundle states
8170   with different number of inserted nops.  */
8171
8172static rtx ia64_nop;
8173
8174/* The following function tries to issue NOPS_NUM nops for the current
8175   state without advancing processor cycle.  If it failed, the
8176   function returns FALSE and frees the current state.  */
8177
8178static int
8179try_issue_nops (struct bundle_state *curr_state, int nops_num)
8180{
8181  int i;
8182
8183  for (i = 0; i < nops_num; i++)
8184    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
8185      {
8186	free_bundle_state (curr_state);
8187	return FALSE;
8188      }
8189  return TRUE;
8190}
8191
8192/* The following function tries to issue INSN for the current
8193   state without advancing processor cycle.  If it failed, the
8194   function returns FALSE and frees the current state.  */
8195
8196static int
8197try_issue_insn (struct bundle_state *curr_state, rtx insn)
8198{
8199  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
8200    {
8201      free_bundle_state (curr_state);
8202      return FALSE;
8203    }
8204  return TRUE;
8205}
8206
8207/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
8208   starting with ORIGINATOR without advancing processor cycle.  If
8209   TRY_BUNDLE_END_P is TRUE, the function also/only (if
8210   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
8211   If it was successful, the function creates new bundle state and
8212   insert into the hash table and into `index_to_bundle_states'.  */
8213
8214static void
8215issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
8216		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
8217{
8218  struct bundle_state *curr_state;
8219
8220  curr_state = get_free_bundle_state ();
8221  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
8222  curr_state->insn = insn;
8223  curr_state->insn_num = originator->insn_num + 1;
8224  curr_state->cost = originator->cost;
8225  curr_state->originator = originator;
8226  curr_state->before_nops_num = before_nops_num;
8227  curr_state->after_nops_num = 0;
8228  curr_state->accumulated_insns_num
8229    = originator->accumulated_insns_num + before_nops_num;
8230  curr_state->branch_deviation = originator->branch_deviation;
8231  curr_state->middle_bundle_stops = originator->middle_bundle_stops;
8232  gcc_assert (insn);
8233  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
8234    {
8235      gcc_assert (GET_MODE (insn) != TImode);
8236      if (!try_issue_nops (curr_state, before_nops_num))
8237	return;
8238      if (!try_issue_insn (curr_state, insn))
8239	return;
8240      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
8241      if (curr_state->accumulated_insns_num % 3 != 0)
8242	curr_state->middle_bundle_stops++;
8243      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
8244	  && curr_state->accumulated_insns_num % 3 != 0)
8245	{
8246	  free_bundle_state (curr_state);
8247	  return;
8248	}
8249    }
8250  else if (GET_MODE (insn) != TImode)
8251    {
8252      if (!try_issue_nops (curr_state, before_nops_num))
8253	return;
8254      if (!try_issue_insn (curr_state, insn))
8255	return;
8256      curr_state->accumulated_insns_num++;
8257      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
8258		  && asm_noperands (PATTERN (insn)) < 0);
8259
8260      if (ia64_safe_type (insn) == TYPE_L)
8261	curr_state->accumulated_insns_num++;
8262    }
8263  else
8264    {
8265      /* If this is an insn that must be first in a group, then don't allow
8266	 nops to be emitted before it.  Currently, alloc is the only such
8267	 supported instruction.  */
8268      /* ??? The bundling automatons should handle this for us, but they do
8269	 not yet have support for the first_insn attribute.  */
8270      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
8271	{
8272	  free_bundle_state (curr_state);
8273	  return;
8274	}
8275
8276      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
8277      state_transition (curr_state->dfa_state, NULL);
8278      curr_state->cost++;
8279      if (!try_issue_nops (curr_state, before_nops_num))
8280	return;
8281      if (!try_issue_insn (curr_state, insn))
8282	return;
8283      curr_state->accumulated_insns_num++;
8284      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
8285	  || asm_noperands (PATTERN (insn)) >= 0)
8286	{
8287	  /* Finish bundle containing asm insn.  */
8288	  curr_state->after_nops_num
8289	    = 3 - curr_state->accumulated_insns_num % 3;
8290	  curr_state->accumulated_insns_num
8291	    += 3 - curr_state->accumulated_insns_num % 3;
8292	}
8293      else if (ia64_safe_type (insn) == TYPE_L)
8294	curr_state->accumulated_insns_num++;
8295    }
8296  if (ia64_safe_type (insn) == TYPE_B)
8297    curr_state->branch_deviation
8298      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
8299  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
8300    {
8301      if (!only_bundle_end_p && insert_bundle_state (curr_state))
8302	{
8303	  state_t dfa_state;
8304	  struct bundle_state *curr_state1;
8305	  struct bundle_state *allocated_states_chain;
8306
8307	  curr_state1 = get_free_bundle_state ();
8308	  dfa_state = curr_state1->dfa_state;
8309	  allocated_states_chain = curr_state1->allocated_states_chain;
8310	  *curr_state1 = *curr_state;
8311	  curr_state1->dfa_state = dfa_state;
8312	  curr_state1->allocated_states_chain = allocated_states_chain;
8313	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
8314		  dfa_state_size);
8315	  curr_state = curr_state1;
8316	}
8317      if (!try_issue_nops (curr_state,
8318			   3 - curr_state->accumulated_insns_num % 3))
8319	return;
8320      curr_state->after_nops_num
8321	= 3 - curr_state->accumulated_insns_num % 3;
8322      curr_state->accumulated_insns_num
8323	+= 3 - curr_state->accumulated_insns_num % 3;
8324    }
8325  if (!insert_bundle_state (curr_state))
8326    free_bundle_state (curr_state);
8327  return;
8328}
8329
8330/* The following function returns position in the two window bundle
8331   for given STATE.  */
8332
8333static int
8334get_max_pos (state_t state)
8335{
8336  if (cpu_unit_reservation_p (state, pos_6))
8337    return 6;
8338  else if (cpu_unit_reservation_p (state, pos_5))
8339    return 5;
8340  else if (cpu_unit_reservation_p (state, pos_4))
8341    return 4;
8342  else if (cpu_unit_reservation_p (state, pos_3))
8343    return 3;
8344  else if (cpu_unit_reservation_p (state, pos_2))
8345    return 2;
8346  else if (cpu_unit_reservation_p (state, pos_1))
8347    return 1;
8348  else
8349    return 0;
8350}
8351
8352/* The function returns code of a possible template for given position
8353   and state.  The function should be called only with 2 values of
8354   position equal to 3 or 6.  We avoid generating F NOPs by putting
8355   templates containing F insns at the end of the template search
8356   because undocumented anomaly in McKinley derived cores which can
8357   cause stalls if an F-unit insn (including a NOP) is issued within a
8358   six-cycle window after reading certain application registers (such
8359   as ar.bsp).  Furthermore, power-considerations also argue against
8360   the use of F-unit instructions unless they're really needed.  */
8361
8362static int
8363get_template (state_t state, int pos)
8364{
8365  switch (pos)
8366    {
8367    case 3:
8368      if (cpu_unit_reservation_p (state, _0mmi_))
8369	return 1;
8370      else if (cpu_unit_reservation_p (state, _0mii_))
8371	return 0;
8372      else if (cpu_unit_reservation_p (state, _0mmb_))
8373	return 7;
8374      else if (cpu_unit_reservation_p (state, _0mib_))
8375	return 6;
8376      else if (cpu_unit_reservation_p (state, _0mbb_))
8377	return 5;
8378      else if (cpu_unit_reservation_p (state, _0bbb_))
8379	return 4;
8380      else if (cpu_unit_reservation_p (state, _0mmf_))
8381	return 3;
8382      else if (cpu_unit_reservation_p (state, _0mfi_))
8383	return 2;
8384      else if (cpu_unit_reservation_p (state, _0mfb_))
8385	return 8;
8386      else if (cpu_unit_reservation_p (state, _0mlx_))
8387	return 9;
8388      else
8389	gcc_unreachable ();
8390    case 6:
8391      if (cpu_unit_reservation_p (state, _1mmi_))
8392	return 1;
8393      else if (cpu_unit_reservation_p (state, _1mii_))
8394	return 0;
8395      else if (cpu_unit_reservation_p (state, _1mmb_))
8396	return 7;
8397      else if (cpu_unit_reservation_p (state, _1mib_))
8398	return 6;
8399      else if (cpu_unit_reservation_p (state, _1mbb_))
8400	return 5;
8401      else if (cpu_unit_reservation_p (state, _1bbb_))
8402	return 4;
8403      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
8404	return 3;
8405      else if (cpu_unit_reservation_p (state, _1mfi_))
8406	return 2;
8407      else if (cpu_unit_reservation_p (state, _1mfb_))
8408	return 8;
8409      else if (cpu_unit_reservation_p (state, _1mlx_))
8410	return 9;
8411      else
8412	gcc_unreachable ();
8413    default:
8414      gcc_unreachable ();
8415    }
8416}
8417
8418/* True when INSN is important for bundling.  */
8419static bool
8420important_for_bundling_p (rtx insn)
8421{
8422  return (INSN_P (insn)
8423	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8424	  && GET_CODE (PATTERN (insn)) != USE
8425	  && GET_CODE (PATTERN (insn)) != CLOBBER);
8426}
8427
8428/* The following function returns an insn important for insn bundling
8429   followed by INSN and before TAIL.  */
8430
8431static rtx
8432get_next_important_insn (rtx insn, rtx tail)
8433{
8434  for (; insn && insn != tail; insn = NEXT_INSN (insn))
8435    if (important_for_bundling_p (insn))
8436      return insn;
8437  return NULL_RTX;
8438}
8439
8440/* Add a bundle selector TEMPLATE0 before INSN.  */
8441
8442static void
8443ia64_add_bundle_selector_before (int template0, rtx insn)
8444{
8445  rtx b = gen_bundle_selector (GEN_INT (template0));
8446
8447  ia64_emit_insn_before (b, insn);
8448#if NR_BUNDLES == 10
8449  if ((template0 == 4 || template0 == 5)
8450      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
8451    {
8452      int i;
8453      rtx note = NULL_RTX;
8454
8455      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
8456	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
8457	 to following nops, as br.call sets rp to the address of following
8458	 bundle and therefore an EH region end must be on a bundle
8459	 boundary.  */
8460      insn = PREV_INSN (insn);
8461      for (i = 0; i < 3; i++)
8462	{
8463	  do
8464	    insn = next_active_insn (insn);
8465	  while (GET_CODE (insn) == INSN
8466		 && get_attr_empty (insn) == EMPTY_YES);
8467	  if (GET_CODE (insn) == CALL_INSN)
8468	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
8469	  else if (note)
8470	    {
8471	      int code;
8472
8473	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
8474			  || code == CODE_FOR_nop_b);
8475	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
8476		note = NULL_RTX;
8477	      else
8478		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
8479	    }
8480	}
8481    }
8482#endif
8483}
8484
8485/* The following function does insn bundling.  Bundling means
8486   inserting templates and nop insns to fit insn groups into permitted
8487   templates.  Instruction scheduling uses NDFA (non-deterministic
8488   finite automata) encoding informations about the templates and the
8489   inserted nops.  Nondeterminism of the automata permits follows
8490   all possible insn sequences very fast.
8491
8492   Unfortunately it is not possible to get information about inserting
8493   nop insns and used templates from the automata states.  The
8494   automata only says that we can issue an insn possibly inserting
8495   some nops before it and using some template.  Therefore insn
8496   bundling in this function is implemented by using DFA
8497   (deterministic finite automata).  We follow all possible insn
8498   sequences by inserting 0-2 nops (that is what the NDFA describe for
8499   insn scheduling) before/after each insn being bundled.  We know the
8500   start of simulated processor cycle from insn scheduling (insn
8501   starting a new cycle has TImode).
8502
8503   Simple implementation of insn bundling would create enormous
8504   number of possible insn sequences satisfying information about new
8505   cycle ticks taken from the insn scheduling.  To make the algorithm
8506   practical we use dynamic programming.  Each decision (about
8507   inserting nops and implicitly about previous decisions) is described
8508   by structure bundle_state (see above).  If we generate the same
8509   bundle state (key is automaton state after issuing the insns and
8510   nops for it), we reuse already generated one.  As consequence we
8511   reject some decisions which cannot improve the solution and
8512   reduce memory for the algorithm.
8513
8514   When we reach the end of EBB (extended basic block), we choose the
8515   best sequence and then, moving back in EBB, insert templates for
8516   the best alternative.  The templates are taken from querying
8517   automaton state for each insn in chosen bundle states.
8518
8519   So the algorithm makes two (forward and backward) passes through
8520   EBB.  */
8521
8522static void
8523bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
8524{
8525  struct bundle_state *curr_state, *next_state, *best_state;
8526  rtx insn, next_insn;
8527  int insn_num;
8528  int i, bundle_end_p, only_bundle_end_p, asm_p;
8529  int pos = 0, max_pos, template0, template1;
8530  rtx b;
8531  rtx nop;
8532  enum attr_type type;
8533
8534  insn_num = 0;
8535  /* Count insns in the EBB.  */
8536  for (insn = NEXT_INSN (prev_head_insn);
8537       insn && insn != tail;
8538       insn = NEXT_INSN (insn))
8539    if (INSN_P (insn))
8540      insn_num++;
8541  if (insn_num == 0)
8542    return;
8543  bundling_p = 1;
8544  dfa_clean_insn_cache ();
8545  initiate_bundle_state_table ();
8546  index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
8547  /* First (forward) pass -- generation of bundle states.  */
8548  curr_state = get_free_bundle_state ();
8549  curr_state->insn = NULL;
8550  curr_state->before_nops_num = 0;
8551  curr_state->after_nops_num = 0;
8552  curr_state->insn_num = 0;
8553  curr_state->cost = 0;
8554  curr_state->accumulated_insns_num = 0;
8555  curr_state->branch_deviation = 0;
8556  curr_state->middle_bundle_stops = 0;
8557  curr_state->next = NULL;
8558  curr_state->originator = NULL;
8559  state_reset (curr_state->dfa_state);
8560  index_to_bundle_states [0] = curr_state;
8561  insn_num = 0;
8562  /* Shift cycle mark if it is put on insn which could be ignored.  */
8563  for (insn = NEXT_INSN (prev_head_insn);
8564       insn != tail;
8565       insn = NEXT_INSN (insn))
8566    if (INSN_P (insn)
8567	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
8568	    || GET_CODE (PATTERN (insn)) == USE
8569	    || GET_CODE (PATTERN (insn)) == CLOBBER)
8570	&& GET_MODE (insn) == TImode)
8571      {
8572	PUT_MODE (insn, VOIDmode);
8573	for (next_insn = NEXT_INSN (insn);
8574	     next_insn != tail;
8575	     next_insn = NEXT_INSN (next_insn))
8576	  if (INSN_P (next_insn)
8577	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
8578	      && GET_CODE (PATTERN (next_insn)) != USE
8579	      && GET_CODE (PATTERN (next_insn)) != CLOBBER
8580	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
8581	    {
8582	      PUT_MODE (next_insn, TImode);
8583	      break;
8584	    }
8585      }
8586  /* Forward pass: generation of bundle states.  */
8587  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8588       insn != NULL_RTX;
8589       insn = next_insn)
8590    {
8591      gcc_assert (INSN_P (insn)
8592		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8593		  && GET_CODE (PATTERN (insn)) != USE
8594		  && GET_CODE (PATTERN (insn)) != CLOBBER);
8595      type = ia64_safe_type (insn);
8596      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8597      insn_num++;
8598      index_to_bundle_states [insn_num] = NULL;
8599      for (curr_state = index_to_bundle_states [insn_num - 1];
8600	   curr_state != NULL;
8601	   curr_state = next_state)
8602	{
8603	  pos = curr_state->accumulated_insns_num % 3;
8604	  next_state = curr_state->next;
8605	  /* We must fill up the current bundle in order to start a
8606	     subsequent asm insn in a new bundle.  Asm insn is always
8607	     placed in a separate bundle.  */
8608	  only_bundle_end_p
8609	    = (next_insn != NULL_RTX
8610	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
8611	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
8612	  /* We may fill up the current bundle if it is the cycle end
8613	     without a group barrier.  */
8614	  bundle_end_p
8615	    = (only_bundle_end_p || next_insn == NULL_RTX
8616	       || (GET_MODE (next_insn) == TImode
8617		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
8618	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
8619	      || type == TYPE_S)
8620	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
8621				 only_bundle_end_p);
8622	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
8623			       only_bundle_end_p);
8624	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
8625			       only_bundle_end_p);
8626	}
8627      gcc_assert (index_to_bundle_states [insn_num]);
8628      for (curr_state = index_to_bundle_states [insn_num];
8629	   curr_state != NULL;
8630	   curr_state = curr_state->next)
8631	if (verbose >= 2 && dump)
8632	  {
8633	    /* This structure is taken from generated code of the
8634	       pipeline hazard recognizer (see file insn-attrtab.c).
8635	       Please don't forget to change the structure if a new
8636	       automaton is added to .md file.  */
8637	    struct DFA_chip
8638	    {
8639	      unsigned short one_automaton_state;
8640	      unsigned short oneb_automaton_state;
8641	      unsigned short two_automaton_state;
8642	      unsigned short twob_automaton_state;
8643	    };
8644
8645	    fprintf
8646	      (dump,
8647	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
8648	       curr_state->unique_num,
8649	       (curr_state->originator == NULL
8650		? -1 : curr_state->originator->unique_num),
8651	       curr_state->cost,
8652	       curr_state->before_nops_num, curr_state->after_nops_num,
8653	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
8654	       curr_state->middle_bundle_stops,
8655	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8656	       INSN_UID (insn));
8657	  }
8658    }
8659
8660  /* We should find a solution because the 2nd insn scheduling has
8661     found one.  */
8662  gcc_assert (index_to_bundle_states [insn_num]);
8663  /* Find a state corresponding to the best insn sequence.  */
8664  best_state = NULL;
8665  for (curr_state = index_to_bundle_states [insn_num];
8666       curr_state != NULL;
8667       curr_state = curr_state->next)
8668    /* We are just looking at the states with fully filled up last
8669       bundle.  The first we prefer insn sequences with minimal cost
8670       then with minimal inserted nops and finally with branch insns
8671       placed in the 3rd slots.  */
8672    if (curr_state->accumulated_insns_num % 3 == 0
8673	&& (best_state == NULL || best_state->cost > curr_state->cost
8674	    || (best_state->cost == curr_state->cost
8675		&& (curr_state->accumulated_insns_num
8676		    < best_state->accumulated_insns_num
8677		    || (curr_state->accumulated_insns_num
8678			== best_state->accumulated_insns_num
8679			&& (curr_state->branch_deviation
8680			    < best_state->branch_deviation
8681			    || (curr_state->branch_deviation
8682				== best_state->branch_deviation
8683				&& curr_state->middle_bundle_stops
8684				< best_state->middle_bundle_stops)))))))
8685      best_state = curr_state;
8686  /* Second (backward) pass: adding nops and templates.  */
8687  gcc_assert (best_state);
8688  insn_num = best_state->before_nops_num;
8689  template0 = template1 = -1;
8690  for (curr_state = best_state;
8691       curr_state->originator != NULL;
8692       curr_state = curr_state->originator)
8693    {
8694      insn = curr_state->insn;
8695      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
8696	       || asm_noperands (PATTERN (insn)) >= 0);
8697      insn_num++;
8698      if (verbose >= 2 && dump)
8699	{
8700	  struct DFA_chip
8701	  {
8702	    unsigned short one_automaton_state;
8703	    unsigned short oneb_automaton_state;
8704	    unsigned short two_automaton_state;
8705	    unsigned short twob_automaton_state;
8706	  };
8707
8708	  fprintf
8709	    (dump,
8710	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
8711	     curr_state->unique_num,
8712	     (curr_state->originator == NULL
8713	      ? -1 : curr_state->originator->unique_num),
8714	     curr_state->cost,
8715	     curr_state->before_nops_num, curr_state->after_nops_num,
8716	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
8717	     curr_state->middle_bundle_stops,
8718	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
8719	     INSN_UID (insn));
8720	}
8721      /* Find the position in the current bundle window.  The window can
8722	 contain at most two bundles.  Two bundle window means that
8723	 the processor will make two bundle rotation.  */
8724      max_pos = get_max_pos (curr_state->dfa_state);
8725      if (max_pos == 6
8726	  /* The following (negative template number) means that the
8727	     processor did one bundle rotation.  */
8728	  || (max_pos == 3 && template0 < 0))
8729	{
8730	  /* We are at the end of the window -- find template(s) for
8731	     its bundle(s).  */
8732	  pos = max_pos;
8733	  if (max_pos == 3)
8734	    template0 = get_template (curr_state->dfa_state, 3);
8735	  else
8736	    {
8737	      template1 = get_template (curr_state->dfa_state, 3);
8738	      template0 = get_template (curr_state->dfa_state, 6);
8739	    }
8740	}
8741      if (max_pos > 3 && template1 < 0)
8742	/* It may happen when we have the stop inside a bundle.  */
8743	{
8744	  gcc_assert (pos <= 3);
8745	  template1 = get_template (curr_state->dfa_state, 3);
8746	  pos += 3;
8747	}
8748      if (!asm_p)
8749	/* Emit nops after the current insn.  */
8750	for (i = 0; i < curr_state->after_nops_num; i++)
8751	  {
8752	    nop = gen_nop ();
8753	    emit_insn_after (nop, insn);
8754	    pos--;
8755	    gcc_assert (pos >= 0);
8756	    if (pos % 3 == 0)
8757	      {
8758		/* We are at the start of a bundle: emit the template
8759		   (it should be defined).  */
8760		gcc_assert (template0 >= 0);
8761		ia64_add_bundle_selector_before (template0, nop);
8762		/* If we have two bundle window, we make one bundle
8763		   rotation.  Otherwise template0 will be undefined
8764		   (negative value).  */
8765		template0 = template1;
8766		template1 = -1;
8767	      }
8768	  }
8769      /* Move the position backward in the window.  Group barrier has
8770	 no slot.  Asm insn takes all bundle.  */
8771      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8772	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8773	  && asm_noperands (PATTERN (insn)) < 0)
8774	pos--;
8775      /* Long insn takes 2 slots.  */
8776      if (ia64_safe_type (insn) == TYPE_L)
8777	pos--;
8778      gcc_assert (pos >= 0);
8779      if (pos % 3 == 0
8780	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8781	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8782	  && asm_noperands (PATTERN (insn)) < 0)
8783	{
8784	  /* The current insn is at the bundle start: emit the
8785	     template.  */
8786	  gcc_assert (template0 >= 0);
8787	  ia64_add_bundle_selector_before (template0, insn);
8788	  b = PREV_INSN (insn);
8789	  insn = b;
8790	  /* See comment above in analogous place for emitting nops
8791	     after the insn.  */
8792	  template0 = template1;
8793	  template1 = -1;
8794	}
8795      /* Emit nops after the current insn.  */
8796      for (i = 0; i < curr_state->before_nops_num; i++)
8797	{
8798	  nop = gen_nop ();
8799	  ia64_emit_insn_before (nop, insn);
8800	  nop = PREV_INSN (insn);
8801	  insn = nop;
8802	  pos--;
8803	  gcc_assert (pos >= 0);
8804	  if (pos % 3 == 0)
8805	    {
8806	      /* See comment above in analogous place for emitting nops
8807		 after the insn.  */
8808	      gcc_assert (template0 >= 0);
8809	      ia64_add_bundle_selector_before (template0, insn);
8810	      b = PREV_INSN (insn);
8811	      insn = b;
8812	      template0 = template1;
8813	      template1 = -1;
8814	    }
8815	}
8816    }
8817
8818#ifdef ENABLE_CHECKING
8819  {
8820    /* Assert right calculation of middle_bundle_stops.  */
8821    int num = best_state->middle_bundle_stops;
8822    bool start_bundle = true, end_bundle = false;
8823
8824    for (insn = NEXT_INSN (prev_head_insn);
8825	 insn && insn != tail;
8826	 insn = NEXT_INSN (insn))
8827      {
8828	if (!INSN_P (insn))
8829	  continue;
8830	if (recog_memoized (insn) == CODE_FOR_bundle_selector)
8831	  start_bundle = true;
8832	else
8833	  {
8834	    rtx next_insn;
8835
8836	    for (next_insn = NEXT_INSN (insn);
8837		 next_insn && next_insn != tail;
8838		 next_insn = NEXT_INSN (next_insn))
8839	      if (INSN_P (next_insn)
8840		  && (ia64_safe_itanium_class (next_insn)
8841		      != ITANIUM_CLASS_IGNORE
8842		      || recog_memoized (next_insn)
8843		      == CODE_FOR_bundle_selector)
8844		  && GET_CODE (PATTERN (next_insn)) != USE
8845		  && GET_CODE (PATTERN (next_insn)) != CLOBBER)
8846		break;
8847
8848	    end_bundle = next_insn == NULL_RTX
8849	     || next_insn == tail
8850	     || (INSN_P (next_insn)
8851		 && recog_memoized (next_insn)
8852		 == CODE_FOR_bundle_selector);
8853	    if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
8854		&& !start_bundle && !end_bundle
8855		&& next_insn
8856		&& GET_CODE (PATTERN (next_insn)) != ASM_INPUT
8857		&& asm_noperands (PATTERN (next_insn)) < 0)
8858	      num--;
8859
8860	    start_bundle = false;
8861	  }
8862      }
8863
8864    gcc_assert (num == 0);
8865  }
8866#endif
8867
8868  free (index_to_bundle_states);
8869  finish_bundle_state_table ();
8870  bundling_p = 0;
8871  dfa_clean_insn_cache ();
8872}
8873
8874/* The following function is called at the end of scheduling BB or
8875   EBB.  After reload, it inserts stop bits and does insn bundling.  */
8876
8877static void
8878ia64_sched_finish (FILE *dump, int sched_verbose)
8879{
8880  if (sched_verbose)
8881    fprintf (dump, "// Finishing schedule.\n");
8882  if (!reload_completed)
8883    return;
8884  if (reload_completed)
8885    {
8886      final_emit_insn_group_barriers (dump);
8887      bundling (dump, sched_verbose, current_sched_info->prev_head,
8888		current_sched_info->next_tail);
8889      if (sched_verbose && dump)
8890	fprintf (dump, "//    finishing %d-%d\n",
8891		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8892		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8893
8894      return;
8895    }
8896}
8897
8898/* The following function inserts stop bits in scheduled BB or EBB.  */
8899
8900static void
8901final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8902{
8903  rtx insn;
8904  int need_barrier_p = 0;
8905  int seen_good_insn = 0;
8906  rtx prev_insn = NULL_RTX;
8907
8908  init_insn_group_barriers ();
8909
8910  for (insn = NEXT_INSN (current_sched_info->prev_head);
8911       insn != current_sched_info->next_tail;
8912       insn = NEXT_INSN (insn))
8913    {
8914      if (GET_CODE (insn) == BARRIER)
8915	{
8916	  rtx last = prev_active_insn (insn);
8917
8918	  if (! last)
8919	    continue;
8920	  if (GET_CODE (last) == JUMP_INSN
8921	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8922	    last = prev_active_insn (last);
8923	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8924	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8925
8926	  init_insn_group_barriers ();
8927	  seen_good_insn = 0;
8928	  need_barrier_p = 0;
8929	  prev_insn = NULL_RTX;
8930	}
8931      else if (NONDEBUG_INSN_P (insn))
8932	{
8933	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8934	    {
8935	      init_insn_group_barriers ();
8936	      seen_good_insn = 0;
8937	      need_barrier_p = 0;
8938	      prev_insn = NULL_RTX;
8939	    }
8940	  else if (need_barrier_p || group_barrier_needed (insn)
8941		   || (mflag_sched_stop_bits_after_every_cycle
8942		       && GET_MODE (insn) == TImode
8943		       && seen_good_insn))
8944	    {
8945	      if (TARGET_EARLY_STOP_BITS)
8946		{
8947		  rtx last;
8948
8949		  for (last = insn;
8950		       last != current_sched_info->prev_head;
8951		       last = PREV_INSN (last))
8952		    if (INSN_P (last) && GET_MODE (last) == TImode
8953			&& stops_p [INSN_UID (last)])
8954		      break;
8955		  if (last == current_sched_info->prev_head)
8956		    last = insn;
8957		  last = prev_active_insn (last);
8958		  if (last
8959		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8960		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8961				     last);
8962		  init_insn_group_barriers ();
8963		  for (last = NEXT_INSN (last);
8964		       last != insn;
8965		       last = NEXT_INSN (last))
8966		    if (INSN_P (last))
8967		      {
8968			group_barrier_needed (last);
8969			if (recog_memoized (last) >= 0
8970			    && important_for_bundling_p (last))
8971			  seen_good_insn = 1;
8972		      }
8973		}
8974	      else
8975		{
8976		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8977				    insn);
8978		  init_insn_group_barriers ();
8979		  seen_good_insn = 0;
8980		}
8981	      group_barrier_needed (insn);
8982	      if (recog_memoized (insn) >= 0
8983		  && important_for_bundling_p (insn))
8984		seen_good_insn = 1;
8985	      prev_insn = NULL_RTX;
8986	    }
8987	  else if (recog_memoized (insn) >= 0
8988		   && important_for_bundling_p (insn))
8989	    {
8990	      prev_insn = insn;
8991	      seen_good_insn = 1;
8992	    }
8993	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
8994			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
8995			    || asm_noperands (PATTERN (insn)) >= 0);
8996	}
8997    }
8998}
8999
9000
9001
9002/* If the following function returns TRUE, we will use the DFA
9003   insn scheduler.  */
9004
9005static int
9006ia64_first_cycle_multipass_dfa_lookahead (void)
9007{
9008  return (reload_completed ? 6 : 4);
9009}
9010
9011/* The following function initiates variable `dfa_pre_cycle_insn'.  */
9012
9013static void
9014ia64_init_dfa_pre_cycle_insn (void)
9015{
9016  if (temp_dfa_state == NULL)
9017    {
9018      dfa_state_size = state_size ();
9019      temp_dfa_state = xmalloc (dfa_state_size);
9020      prev_cycle_state = xmalloc (dfa_state_size);
9021    }
9022  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
9023  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
9024  recog_memoized (dfa_pre_cycle_insn);
9025  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
9026  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
9027  recog_memoized (dfa_stop_insn);
9028}
9029
9030/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
9031   used by the DFA insn scheduler.  */
9032
9033static rtx
9034ia64_dfa_pre_cycle_insn (void)
9035{
9036  return dfa_pre_cycle_insn;
9037}
9038
9039/* The following function returns TRUE if PRODUCER (of type ilog or
9040   ld) produces address for CONSUMER (of type st or stf). */
9041
9042int
9043ia64_st_address_bypass_p (rtx producer, rtx consumer)
9044{
9045  rtx dest, reg, mem;
9046
9047  gcc_assert (producer && consumer);
9048  dest = ia64_single_set (producer);
9049  gcc_assert (dest);
9050  reg = SET_DEST (dest);
9051  gcc_assert (reg);
9052  if (GET_CODE (reg) == SUBREG)
9053    reg = SUBREG_REG (reg);
9054  gcc_assert (GET_CODE (reg) == REG);
9055
9056  dest = ia64_single_set (consumer);
9057  gcc_assert (dest);
9058  mem = SET_DEST (dest);
9059  gcc_assert (mem && GET_CODE (mem) == MEM);
9060  return reg_mentioned_p (reg, mem);
9061}
9062
9063/* The following function returns TRUE if PRODUCER (of type ilog or
9064   ld) produces address for CONSUMER (of type ld or fld). */
9065
9066int
9067ia64_ld_address_bypass_p (rtx producer, rtx consumer)
9068{
9069  rtx dest, src, reg, mem;
9070
9071  gcc_assert (producer && consumer);
9072  dest = ia64_single_set (producer);
9073  gcc_assert (dest);
9074  reg = SET_DEST (dest);
9075  gcc_assert (reg);
9076  if (GET_CODE (reg) == SUBREG)
9077    reg = SUBREG_REG (reg);
9078  gcc_assert (GET_CODE (reg) == REG);
9079
9080  src = ia64_single_set (consumer);
9081  gcc_assert (src);
9082  mem = SET_SRC (src);
9083  gcc_assert (mem);
9084
9085  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
9086    mem = XVECEXP (mem, 0, 0);
9087  else if (GET_CODE (mem) == IF_THEN_ELSE)
9088    /* ??? Is this bypass necessary for ld.c?  */
9089    {
9090      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
9091      mem = XEXP (mem, 1);
9092    }
9093
9094  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
9095    mem = XEXP (mem, 0);
9096
9097  if (GET_CODE (mem) == UNSPEC)
9098    {
9099      int c = XINT (mem, 1);
9100
9101      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
9102		  || c == UNSPEC_LDSA);
9103      mem = XVECEXP (mem, 0, 0);
9104    }
9105
9106  /* Note that LO_SUM is used for GOT loads.  */
9107  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
9108
9109  return reg_mentioned_p (reg, mem);
9110}
9111
9112/* The following function returns TRUE if INSN produces address for a
9113   load/store insn.  We will place such insns into M slot because it
9114   decreases its latency time.  */
9115
9116int
9117ia64_produce_address_p (rtx insn)
9118{
9119  return insn->call;
9120}
9121
9122
9123/* Emit pseudo-ops for the assembler to describe predicate relations.
9124   At present this assumes that we only consider predicate pairs to
9125   be mutex, and that the assembler can deduce proper values from
9126   straight-line code.  */
9127
9128static void
9129emit_predicate_relation_info (void)
9130{
9131  basic_block bb;
9132
9133  FOR_EACH_BB_REVERSE (bb)
9134    {
9135      int r;
9136      rtx head = BB_HEAD (bb);
9137
9138      /* We only need such notes at code labels.  */
9139      if (GET_CODE (head) != CODE_LABEL)
9140	continue;
9141      if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
9142	head = NEXT_INSN (head);
9143
9144      /* Skip p0, which may be thought to be live due to (reg:DI p0)
9145	 grabbing the entire block of predicate registers.  */
9146      for (r = PR_REG (2); r < PR_REG (64); r += 2)
9147	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
9148	  {
9149	    rtx p = gen_rtx_REG (BImode, r);
9150	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
9151	    if (head == BB_END (bb))
9152	      BB_END (bb) = n;
9153	    head = n;
9154	  }
9155    }
9156
9157  /* Look for conditional calls that do not return, and protect predicate
9158     relations around them.  Otherwise the assembler will assume the call
9159     returns, and complain about uses of call-clobbered predicates after
9160     the call.  */
9161  FOR_EACH_BB_REVERSE (bb)
9162    {
9163      rtx insn = BB_HEAD (bb);
9164
9165      while (1)
9166	{
9167	  if (GET_CODE (insn) == CALL_INSN
9168	      && GET_CODE (PATTERN (insn)) == COND_EXEC
9169	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
9170	    {
9171	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
9172	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
9173	      if (BB_HEAD (bb) == insn)
9174		BB_HEAD (bb) = b;
9175	      if (BB_END (bb) == insn)
9176		BB_END (bb) = a;
9177	    }
9178
9179	  if (insn == BB_END (bb))
9180	    break;
9181	  insn = NEXT_INSN (insn);
9182	}
9183    }
9184}
9185
9186/* Perform machine dependent operations on the rtl chain INSNS.  */
9187
9188static void
9189ia64_reorg (void)
9190{
9191  /* We are freeing block_for_insn in the toplev to keep compatibility
9192     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
9193  compute_bb_for_insn ();
9194
9195  /* If optimizing, we'll have split before scheduling.  */
9196  if (optimize == 0)
9197    split_all_insns ();
9198
9199  if (optimize && ia64_flag_schedule_insns2
9200      && dbg_cnt (ia64_sched2))
9201    {
9202      timevar_push (TV_SCHED2);
9203      ia64_final_schedule = 1;
9204
9205      initiate_bundle_states ();
9206      ia64_nop = make_insn_raw (gen_nop ());
9207      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
9208      recog_memoized (ia64_nop);
9209      clocks_length = get_max_uid () + 1;
9210      stops_p = XCNEWVEC (char, clocks_length);
9211
9212      if (ia64_tune == PROCESSOR_ITANIUM2)
9213	{
9214	  pos_1 = get_cpu_unit_code ("2_1");
9215	  pos_2 = get_cpu_unit_code ("2_2");
9216	  pos_3 = get_cpu_unit_code ("2_3");
9217	  pos_4 = get_cpu_unit_code ("2_4");
9218	  pos_5 = get_cpu_unit_code ("2_5");
9219	  pos_6 = get_cpu_unit_code ("2_6");
9220	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
9221	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
9222	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
9223	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
9224	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
9225	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
9226	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
9227	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
9228	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
9229	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
9230	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
9231	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
9232	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
9233	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
9234	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
9235	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
9236	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
9237	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
9238	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
9239	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
9240	}
9241      else
9242	{
9243	  pos_1 = get_cpu_unit_code ("1_1");
9244	  pos_2 = get_cpu_unit_code ("1_2");
9245	  pos_3 = get_cpu_unit_code ("1_3");
9246	  pos_4 = get_cpu_unit_code ("1_4");
9247	  pos_5 = get_cpu_unit_code ("1_5");
9248	  pos_6 = get_cpu_unit_code ("1_6");
9249	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
9250	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
9251	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
9252	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
9253	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
9254	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
9255	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
9256	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
9257	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
9258	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
9259	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
9260	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
9261	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
9262	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
9263	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
9264	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
9265	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
9266	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
9267	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
9268	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
9269	}
9270
9271      if (flag_selective_scheduling2
9272	  && !maybe_skip_selective_scheduling ())
9273        run_selective_scheduling ();
9274      else
9275	schedule_ebbs ();
9276
9277      /* Redo alignment computation, as it might gone wrong.  */
9278      compute_alignments ();
9279
9280      /* We cannot reuse this one because it has been corrupted by the
9281	 evil glat.  */
9282      finish_bundle_states ();
9283      free (stops_p);
9284      stops_p = NULL;
9285      emit_insn_group_barriers (dump_file);
9286
9287      ia64_final_schedule = 0;
9288      timevar_pop (TV_SCHED2);
9289    }
9290  else
9291    emit_all_insn_group_barriers (dump_file);
9292
9293  df_analyze ();
9294
9295  /* A call must not be the last instruction in a function, so that the
9296     return address is still within the function, so that unwinding works
9297     properly.  Note that IA-64 differs from dwarf2 on this point.  */
9298  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
9299    {
9300      rtx insn;
9301      int saw_stop = 0;
9302
9303      insn = get_last_insn ();
9304      if (! INSN_P (insn))
9305        insn = prev_active_insn (insn);
9306      if (insn)
9307	{
9308	  /* Skip over insns that expand to nothing.  */
9309	  while (GET_CODE (insn) == INSN
9310		 && get_attr_empty (insn) == EMPTY_YES)
9311	    {
9312	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
9313		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
9314		saw_stop = 1;
9315	      insn = prev_active_insn (insn);
9316	    }
9317	  if (GET_CODE (insn) == CALL_INSN)
9318	    {
9319	      if (! saw_stop)
9320		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9321	      emit_insn (gen_break_f ());
9322	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
9323	    }
9324	}
9325    }
9326
9327  emit_predicate_relation_info ();
9328
9329  if (ia64_flag_var_tracking)
9330    {
9331      timevar_push (TV_VAR_TRACKING);
9332      variable_tracking_main ();
9333      timevar_pop (TV_VAR_TRACKING);
9334    }
9335  df_finish_pass (false);
9336}
9337
9338/* Return true if REGNO is used by the epilogue.  */
9339
9340int
9341ia64_epilogue_uses (int regno)
9342{
9343  switch (regno)
9344    {
9345    case R_GR (1):
9346      /* With a call to a function in another module, we will write a new
9347	 value to "gp".  After returning from such a call, we need to make
9348	 sure the function restores the original gp-value, even if the
9349	 function itself does not use the gp anymore.  */
9350      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
9351
9352    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
9353    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
9354      /* For functions defined with the syscall_linkage attribute, all
9355	 input registers are marked as live at all function exits.  This
9356	 prevents the register allocator from using the input registers,
9357	 which in turn makes it possible to restart a system call after
9358	 an interrupt without having to save/restore the input registers.
9359	 This also prevents kernel data from leaking to application code.  */
9360      return lookup_attribute ("syscall_linkage",
9361	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
9362
9363    case R_BR (0):
9364      /* Conditional return patterns can't represent the use of `b0' as
9365         the return address, so we force the value live this way.  */
9366      return 1;
9367
9368    case AR_PFS_REGNUM:
9369      /* Likewise for ar.pfs, which is used by br.ret.  */
9370      return 1;
9371
9372    default:
9373      return 0;
9374    }
9375}
9376
9377/* Return true if REGNO is used by the frame unwinder.  */
9378
9379int
9380ia64_eh_uses (int regno)
9381{
9382  unsigned int r;
9383
9384  if (! reload_completed)
9385    return 0;
9386
9387  if (regno == 0)
9388    return 0;
9389
9390  for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
9391    if (regno == current_frame_info.r[r]
9392       || regno == emitted_frame_related_regs[r])
9393      return 1;
9394
9395  return 0;
9396}
9397
9398/* Return true if this goes in small data/bss.  */
9399
9400/* ??? We could also support own long data here.  Generating movl/add/ld8
9401   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
9402   code faster because there is one less load.  This also includes incomplete
9403   types which can't go in sdata/sbss.  */
9404
9405static bool
9406ia64_in_small_data_p (const_tree exp)
9407{
9408  if (TARGET_NO_SDATA)
9409    return false;
9410
9411  /* We want to merge strings, so we never consider them small data.  */
9412  if (TREE_CODE (exp) == STRING_CST)
9413    return false;
9414
9415  /* Functions are never small data.  */
9416  if (TREE_CODE (exp) == FUNCTION_DECL)
9417    return false;
9418
9419  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
9420    {
9421      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
9422
9423      if (strcmp (section, ".sdata") == 0
9424	  || strncmp (section, ".sdata.", 7) == 0
9425	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
9426	  || strcmp (section, ".sbss") == 0
9427	  || strncmp (section, ".sbss.", 6) == 0
9428	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
9429	return true;
9430    }
9431  else
9432    {
9433      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
9434
9435      /* If this is an incomplete type with size 0, then we can't put it
9436	 in sdata because it might be too big when completed.  */
9437      if (size > 0 && size <= ia64_section_threshold)
9438	return true;
9439    }
9440
9441  return false;
9442}
9443
9444/* Output assembly directives for prologue regions.  */
9445
9446/* The current basic block number.  */
9447
9448static bool last_block;
9449
9450/* True if we need a copy_state command at the start of the next block.  */
9451
9452static bool need_copy_state;
9453
9454#ifndef MAX_ARTIFICIAL_LABEL_BYTES
9455# define MAX_ARTIFICIAL_LABEL_BYTES 30
9456#endif
9457
9458/* Emit a debugging label after a call-frame-related insn.  We'd
9459   rather output the label right away, but we'd have to output it
9460   after, not before, the instruction, and the instruction has not
9461   been output yet.  So we emit the label after the insn, delete it to
9462   avoid introducing basic blocks, and mark it as preserved, such that
9463   it is still output, given that it is referenced in debug info.  */
9464
9465static const char *
9466ia64_emit_deleted_label_after_insn (rtx insn)
9467{
9468  char label[MAX_ARTIFICIAL_LABEL_BYTES];
9469  rtx lb = gen_label_rtx ();
9470  rtx label_insn = emit_label_after (lb, insn);
9471
9472  LABEL_PRESERVE_P (lb) = 1;
9473
9474  delete_insn (label_insn);
9475
9476  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
9477
9478  return xstrdup (label);
9479}
9480
9481/* Define the CFA after INSN with the steady-state definition.  */
9482
9483static void
9484ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame)
9485{
9486  rtx fp = frame_pointer_needed
9487    ? hard_frame_pointer_rtx
9488    : stack_pointer_rtx;
9489  const char *label = ia64_emit_deleted_label_after_insn (insn);
9490
9491  if (!frame)
9492    return;
9493
9494  dwarf2out_def_cfa
9495    (label, REGNO (fp),
9496     ia64_initial_elimination_offset
9497     (REGNO (arg_pointer_rtx), REGNO (fp))
9498     + ARG_POINTER_CFA_OFFSET (current_function_decl));
9499}
9500
9501/* The generic dwarf2 frame debug info generator does not define a
9502   separate region for the very end of the epilogue, so refrain from
9503   doing so in the IA64-specific code as well.  */
9504
9505#define IA64_CHANGE_CFA_IN_EPILOGUE 0
9506
9507/* The function emits unwind directives for the start of an epilogue.  */
9508
9509static void
9510process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
9511{
9512  /* If this isn't the last block of the function, then we need to label the
9513     current state, and copy it back in at the start of the next block.  */
9514
9515  if (!last_block)
9516    {
9517      if (unwind)
9518	fprintf (asm_out_file, "\t.label_state %d\n",
9519		 ++cfun->machine->state_num);
9520      need_copy_state = true;
9521    }
9522
9523  if (unwind)
9524    fprintf (asm_out_file, "\t.restore sp\n");
9525  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9526    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
9527		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
9528}
9529
9530/* This function processes a SET pattern looking for specific patterns
9531   which result in emitting an assembly directive required for unwinding.  */
9532
9533static int
9534process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
9535{
9536  rtx src = SET_SRC (pat);
9537  rtx dest = SET_DEST (pat);
9538  int src_regno, dest_regno;
9539
9540  /* Look for the ALLOC insn.  */
9541  if (GET_CODE (src) == UNSPEC_VOLATILE
9542      && XINT (src, 1) == UNSPECV_ALLOC
9543      && GET_CODE (dest) == REG)
9544    {
9545      dest_regno = REGNO (dest);
9546
9547      /* If this is the final destination for ar.pfs, then this must
9548	 be the alloc in the prologue.  */
9549      if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
9550	{
9551	  if (unwind)
9552	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
9553		     ia64_dbx_register_number (dest_regno));
9554	}
9555      else
9556	{
9557	  /* This must be an alloc before a sibcall.  We must drop the
9558	     old frame info.  The easiest way to drop the old frame
9559	     info is to ensure we had a ".restore sp" directive
9560	     followed by a new prologue.  If the procedure doesn't
9561	     have a memory-stack frame, we'll issue a dummy ".restore
9562	     sp" now.  */
9563	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
9564	    /* if haven't done process_epilogue() yet, do it now */
9565	    process_epilogue (asm_out_file, insn, unwind, frame);
9566	  if (unwind)
9567	    fprintf (asm_out_file, "\t.prologue\n");
9568	}
9569      return 1;
9570    }
9571
9572  /* Look for SP = ....  */
9573  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
9574    {
9575      if (GET_CODE (src) == PLUS)
9576        {
9577	  rtx op0 = XEXP (src, 0);
9578	  rtx op1 = XEXP (src, 1);
9579
9580	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
9581
9582	  if (INTVAL (op1) < 0)
9583	    {
9584	      gcc_assert (!frame_pointer_needed);
9585	      if (unwind)
9586		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
9587			 -INTVAL (op1));
9588	      ia64_dwarf2out_def_steady_cfa (insn, frame);
9589	    }
9590	  else
9591	    process_epilogue (asm_out_file, insn, unwind, frame);
9592	}
9593      else
9594	{
9595	  gcc_assert (GET_CODE (src) == REG
9596		      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
9597	  process_epilogue (asm_out_file, insn, unwind, frame);
9598	}
9599
9600      return 1;
9601    }
9602
9603  /* Register move we need to look at.  */
9604  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
9605    {
9606      src_regno = REGNO (src);
9607      dest_regno = REGNO (dest);
9608
9609      switch (src_regno)
9610	{
9611	case BR_REG (0):
9612	  /* Saving return address pointer.  */
9613	  gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]);
9614	  if (unwind)
9615	    fprintf (asm_out_file, "\t.save rp, r%d\n",
9616		     ia64_dbx_register_number (dest_regno));
9617	  return 1;
9618
9619	case PR_REG (0):
9620	  gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
9621	  if (unwind)
9622	    fprintf (asm_out_file, "\t.save pr, r%d\n",
9623		     ia64_dbx_register_number (dest_regno));
9624	  return 1;
9625
9626	case AR_UNAT_REGNUM:
9627	  gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
9628	  if (unwind)
9629	    fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
9630		     ia64_dbx_register_number (dest_regno));
9631	  return 1;
9632
9633	case AR_LC_REGNUM:
9634	  gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
9635	  if (unwind)
9636	    fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
9637		     ia64_dbx_register_number (dest_regno));
9638	  return 1;
9639
9640	case STACK_POINTER_REGNUM:
9641	  gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
9642		      && frame_pointer_needed);
9643	  if (unwind)
9644	    fprintf (asm_out_file, "\t.vframe r%d\n",
9645		     ia64_dbx_register_number (dest_regno));
9646	  ia64_dwarf2out_def_steady_cfa (insn, frame);
9647	  return 1;
9648
9649	default:
9650	  /* Everything else should indicate being stored to memory.  */
9651	  gcc_unreachable ();
9652	}
9653    }
9654
9655  /* Memory store we need to look at.  */
9656  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
9657    {
9658      long off;
9659      rtx base;
9660      const char *saveop;
9661
9662      if (GET_CODE (XEXP (dest, 0)) == REG)
9663	{
9664	  base = XEXP (dest, 0);
9665	  off = 0;
9666	}
9667      else
9668	{
9669	  gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
9670		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
9671	  base = XEXP (XEXP (dest, 0), 0);
9672	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
9673	}
9674
9675      if (base == hard_frame_pointer_rtx)
9676	{
9677	  saveop = ".savepsp";
9678	  off = - off;
9679	}
9680      else
9681	{
9682	  gcc_assert (base == stack_pointer_rtx);
9683	  saveop = ".savesp";
9684	}
9685
9686      src_regno = REGNO (src);
9687      switch (src_regno)
9688	{
9689	case BR_REG (0):
9690	  gcc_assert (!current_frame_info.r[reg_save_b0]);
9691	  if (unwind)
9692	    fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
9693	  return 1;
9694
9695	case PR_REG (0):
9696	  gcc_assert (!current_frame_info.r[reg_save_pr]);
9697	  if (unwind)
9698	    fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
9699	  return 1;
9700
9701	case AR_LC_REGNUM:
9702	  gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
9703	  if (unwind)
9704	    fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9705	  return 1;
9706
9707	case AR_PFS_REGNUM:
9708	  gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
9709	  if (unwind)
9710	    fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9711	  return 1;
9712
9713	case AR_UNAT_REGNUM:
9714	  gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
9715	  if (unwind)
9716	    fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9717	  return 1;
9718
9719	case GR_REG (4):
9720	case GR_REG (5):
9721	case GR_REG (6):
9722	case GR_REG (7):
9723	  if (unwind)
9724	    fprintf (asm_out_file, "\t.save.g 0x%x\n",
9725		     1 << (src_regno - GR_REG (4)));
9726	  return 1;
9727
9728	case BR_REG (1):
9729	case BR_REG (2):
9730	case BR_REG (3):
9731	case BR_REG (4):
9732	case BR_REG (5):
9733	  if (unwind)
9734	    fprintf (asm_out_file, "\t.save.b 0x%x\n",
9735		     1 << (src_regno - BR_REG (1)));
9736	  return 1;
9737
9738	case FR_REG (2):
9739	case FR_REG (3):
9740	case FR_REG (4):
9741	case FR_REG (5):
9742	  if (unwind)
9743	    fprintf (asm_out_file, "\t.save.f 0x%x\n",
9744		     1 << (src_regno - FR_REG (2)));
9745	  return 1;
9746
9747	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9748	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9749	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9750	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9751	  if (unwind)
9752	    fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9753		     1 << (src_regno - FR_REG (12)));
9754	  return 1;
9755
9756	default:
9757	  return 0;
9758	}
9759    }
9760
9761  return 0;
9762}
9763
9764
9765/* This function looks at a single insn and emits any directives
9766   required to unwind this insn.  */
9767void
9768process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9769{
9770  bool unwind = (flag_unwind_tables
9771		 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9772  bool frame = dwarf2out_do_frame ();
9773
9774  if (unwind || frame)
9775    {
9776      rtx pat;
9777
9778      if (NOTE_INSN_BASIC_BLOCK_P (insn))
9779	{
9780	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9781
9782	  /* Restore unwind state from immediately before the epilogue.  */
9783	  if (need_copy_state)
9784	    {
9785	      if (unwind)
9786		{
9787		  fprintf (asm_out_file, "\t.body\n");
9788		  fprintf (asm_out_file, "\t.copy_state %d\n",
9789			   cfun->machine->state_num);
9790		}
9791	      if (IA64_CHANGE_CFA_IN_EPILOGUE)
9792		ia64_dwarf2out_def_steady_cfa (insn, frame);
9793	      need_copy_state = false;
9794	    }
9795	}
9796
9797      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9798	return;
9799
9800      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9801      if (pat)
9802	pat = XEXP (pat, 0);
9803      else
9804	pat = PATTERN (insn);
9805
9806      switch (GET_CODE (pat))
9807        {
9808	case SET:
9809	  process_set (asm_out_file, pat, insn, unwind, frame);
9810	  break;
9811
9812	case PARALLEL:
9813	  {
9814	    int par_index;
9815	    int limit = XVECLEN (pat, 0);
9816	    for (par_index = 0; par_index < limit; par_index++)
9817	      {
9818		rtx x = XVECEXP (pat, 0, par_index);
9819		if (GET_CODE (x) == SET)
9820		  process_set (asm_out_file, x, insn, unwind, frame);
9821	      }
9822	    break;
9823	  }
9824
9825	default:
9826	  gcc_unreachable ();
9827	}
9828    }
9829}
9830
9831
9832enum ia64_builtins
9833{
9834  IA64_BUILTIN_BSP,
9835  IA64_BUILTIN_COPYSIGNQ,
9836  IA64_BUILTIN_FABSQ,
9837  IA64_BUILTIN_FLUSHRS,
9838  IA64_BUILTIN_INFQ,
9839  IA64_BUILTIN_HUGE_VALQ
9840};
9841
9842void
9843ia64_init_builtins (void)
9844{
9845  tree fpreg_type;
9846  tree float80_type;
9847
9848  /* The __fpreg type.  */
9849  fpreg_type = make_node (REAL_TYPE);
9850  TYPE_PRECISION (fpreg_type) = 82;
9851  layout_type (fpreg_type);
9852  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9853
9854  /* The __float80 type.  */
9855  float80_type = make_node (REAL_TYPE);
9856  TYPE_PRECISION (float80_type) = 80;
9857  layout_type (float80_type);
9858  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9859
9860  /* The __float128 type.  */
9861  if (!TARGET_HPUX)
9862    {
9863      tree ftype, decl;
9864      tree float128_type = make_node (REAL_TYPE);
9865
9866      TYPE_PRECISION (float128_type) = 128;
9867      layout_type (float128_type);
9868      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9869
9870      /* TFmode support builtins.  */
9871      ftype = build_function_type (float128_type, void_list_node);
9872      add_builtin_function ("__builtin_infq", ftype,
9873			    IA64_BUILTIN_INFQ, BUILT_IN_MD,
9874			    NULL, NULL_TREE);
9875
9876      add_builtin_function ("__builtin_huge_valq", ftype,
9877			    IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
9878			    NULL, NULL_TREE);
9879
9880      ftype = build_function_type_list (float128_type,
9881					float128_type,
9882					NULL_TREE);
9883      decl = add_builtin_function ("__builtin_fabsq", ftype,
9884				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
9885				   "__fabstf2", NULL_TREE);
9886      TREE_READONLY (decl) = 1;
9887
9888      ftype = build_function_type_list (float128_type,
9889					float128_type,
9890					float128_type,
9891					NULL_TREE);
9892      decl = add_builtin_function ("__builtin_copysignq", ftype,
9893				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
9894				   "__copysigntf3", NULL_TREE);
9895      TREE_READONLY (decl) = 1;
9896    }
9897  else
9898    /* Under HPUX, this is a synonym for "long double".  */
9899    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9900					       "__float128");
9901
9902  /* Fwrite on VMS is non-standard.  */
9903  if (TARGET_ABI_OPEN_VMS)
9904    {
9905      implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE;
9906      implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE;
9907    }
9908
9909#define def_builtin(name, type, code)					\
9910  add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
9911		       NULL, NULL_TREE)
9912
9913  def_builtin ("__builtin_ia64_bsp",
9914	       build_function_type (ptr_type_node, void_list_node),
9915	       IA64_BUILTIN_BSP);
9916
9917  def_builtin ("__builtin_ia64_flushrs",
9918	       build_function_type (void_type_node, void_list_node),
9919	       IA64_BUILTIN_FLUSHRS);
9920
9921#undef def_builtin
9922
9923  if (TARGET_HPUX)
9924    {
9925      if (built_in_decls [BUILT_IN_FINITE])
9926	set_user_assembler_name (built_in_decls [BUILT_IN_FINITE],
9927	  "_Isfinite");
9928      if (built_in_decls [BUILT_IN_FINITEF])
9929	set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF],
9930	  "_Isfinitef");
9931      if (built_in_decls [BUILT_IN_FINITEL])
9932	set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL],
9933	  "_Isfinitef128");
9934    }
9935}
9936
9937rtx
9938ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9939		     enum machine_mode mode ATTRIBUTE_UNUSED,
9940		     int ignore ATTRIBUTE_UNUSED)
9941{
9942  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9943  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9944
9945  switch (fcode)
9946    {
9947    case IA64_BUILTIN_BSP:
9948      if (! target || ! register_operand (target, DImode))
9949	target = gen_reg_rtx (DImode);
9950      emit_insn (gen_bsp_value (target));
9951#ifdef POINTERS_EXTEND_UNSIGNED
9952      target = convert_memory_address (ptr_mode, target);
9953#endif
9954      return target;
9955
9956    case IA64_BUILTIN_FLUSHRS:
9957      emit_insn (gen_flushrs ());
9958      return const0_rtx;
9959
9960    case IA64_BUILTIN_INFQ:
9961    case IA64_BUILTIN_HUGE_VALQ:
9962      {
9963	REAL_VALUE_TYPE inf;
9964	rtx tmp;
9965
9966	real_inf (&inf);
9967	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
9968
9969	tmp = validize_mem (force_const_mem (mode, tmp));
9970
9971	if (target == 0)
9972	  target = gen_reg_rtx (mode);
9973
9974	emit_move_insn (target, tmp);
9975	return target;
9976      }
9977
9978    case IA64_BUILTIN_FABSQ:
9979    case IA64_BUILTIN_COPYSIGNQ:
9980      return expand_call (exp, target, ignore);
9981
9982    default:
9983      gcc_unreachable ();
9984    }
9985
9986  return NULL_RTX;
9987}
9988
9989/* For the HP-UX IA64 aggregate parameters are passed stored in the
9990   most significant bits of the stack slot.  */
9991
9992enum direction
9993ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type)
9994{
9995   /* Exception to normal case for structures/unions/etc.  */
9996
9997   if (type && AGGREGATE_TYPE_P (type)
9998       && int_size_in_bytes (type) < UNITS_PER_WORD)
9999     return upward;
10000
10001   /* Fall back to the default.  */
10002   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
10003}
10004
10005/* Emit text to declare externally defined variables and functions, because
10006   the Intel assembler does not support undefined externals.  */
10007
10008void
10009ia64_asm_output_external (FILE *file, tree decl, const char *name)
10010{
10011  /* We output the name if and only if TREE_SYMBOL_REFERENCED is
10012     set in order to avoid putting out names that are never really
10013     used. */
10014  if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
10015    {
10016      /* maybe_assemble_visibility will return 1 if the assembler
10017	 visibility directive is output.  */
10018      int need_visibility = ((*targetm.binds_local_p) (decl)
10019			     && maybe_assemble_visibility (decl));
10020
10021#ifdef DO_CRTL_NAMES
10022      DO_CRTL_NAMES;
10023#endif
10024
10025      /* GNU as does not need anything here, but the HP linker does
10026	 need something for external functions.  */
10027      if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
10028	  && TREE_CODE (decl) == FUNCTION_DECL)
10029	  (*targetm.asm_out.globalize_decl_name) (file, decl);
10030      else if (need_visibility && !TARGET_GNU_AS)
10031	(*targetm.asm_out.globalize_label) (file, name);
10032    }
10033}
10034
10035/* Set SImode div/mod functions, init_integral_libfuncs only initializes
10036   modes of word_mode and larger.  Rename the TFmode libfuncs using the
10037   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
10038   backward compatibility. */
10039
10040static void
10041ia64_init_libfuncs (void)
10042{
10043  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
10044  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
10045  set_optab_libfunc (smod_optab, SImode, "__modsi3");
10046  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
10047
10048  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
10049  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
10050  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
10051  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
10052  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
10053
10054  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
10055  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
10056  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
10057  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
10058  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
10059  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
10060
10061  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
10062  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
10063  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
10064  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
10065  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
10066
10067  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
10068  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
10069  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
10070  /* HP-UX 11.23 libc does not have a function for unsigned
10071     SImode-to-TFmode conversion.  */
10072  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
10073}
10074
10075/* Rename all the TFmode libfuncs using the HPUX conventions.  */
10076
10077static void
10078ia64_hpux_init_libfuncs (void)
10079{
10080  ia64_init_libfuncs ();
10081
10082  /* The HP SI millicode division and mod functions expect DI arguments.
10083     By turning them off completely we avoid using both libgcc and the
10084     non-standard millicode routines and use the HP DI millicode routines
10085     instead.  */
10086
10087  set_optab_libfunc (sdiv_optab, SImode, 0);
10088  set_optab_libfunc (udiv_optab, SImode, 0);
10089  set_optab_libfunc (smod_optab, SImode, 0);
10090  set_optab_libfunc (umod_optab, SImode, 0);
10091
10092  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
10093  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
10094  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
10095  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
10096
10097  /* HP-UX libc has TF min/max/abs routines in it.  */
10098  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
10099  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
10100  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
10101
10102  /* ia64_expand_compare uses this.  */
10103  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
10104
10105  /* These should never be used.  */
10106  set_optab_libfunc (eq_optab, TFmode, 0);
10107  set_optab_libfunc (ne_optab, TFmode, 0);
10108  set_optab_libfunc (gt_optab, TFmode, 0);
10109  set_optab_libfunc (ge_optab, TFmode, 0);
10110  set_optab_libfunc (lt_optab, TFmode, 0);
10111  set_optab_libfunc (le_optab, TFmode, 0);
10112}
10113
10114/* Rename the division and modulus functions in VMS.  */
10115
10116static void
10117ia64_vms_init_libfuncs (void)
10118{
10119  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
10120  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
10121  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
10122  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
10123  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
10124  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
10125  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
10126  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
10127  abort_libfunc = init_one_libfunc ("decc$abort");
10128  memcmp_libfunc = init_one_libfunc ("decc$memcmp");
10129#ifdef MEM_LIBFUNCS_INIT
10130  MEM_LIBFUNCS_INIT;
10131#endif
10132}
10133
10134/* Rename the TFmode libfuncs available from soft-fp in glibc using
10135   the HPUX conventions.  */
10136
10137static void
10138ia64_sysv4_init_libfuncs (void)
10139{
10140  ia64_init_libfuncs ();
10141
10142  /* These functions are not part of the HPUX TFmode interface.  We
10143     use them instead of _U_Qfcmp, which doesn't work the way we
10144     expect.  */
10145  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
10146  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
10147  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
10148  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
10149  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
10150  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
10151
10152  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
10153     glibc doesn't have them.  */
10154}
10155
10156/* Use soft-fp.  */
10157
10158static void
10159ia64_soft_fp_init_libfuncs (void)
10160{
10161}
10162
10163static bool
10164ia64_vms_valid_pointer_mode (enum machine_mode mode)
10165{
10166  return (mode == SImode || mode == DImode);
10167}
10168
10169/* For HPUX, it is illegal to have relocations in shared segments.  */
10170
10171static int
10172ia64_hpux_reloc_rw_mask (void)
10173{
10174  return 3;
10175}
10176
10177/* For others, relax this so that relocations to local data goes in
10178   read-only segments, but we still cannot allow global relocations
10179   in read-only segments.  */
10180
10181static int
10182ia64_reloc_rw_mask (void)
10183{
10184  return flag_pic ? 3 : 2;
10185}
10186
10187/* Return the section to use for X.  The only special thing we do here
10188   is to honor small data.  */
10189
10190static section *
10191ia64_select_rtx_section (enum machine_mode mode, rtx x,
10192			 unsigned HOST_WIDE_INT align)
10193{
10194  if (GET_MODE_SIZE (mode) > 0
10195      && GET_MODE_SIZE (mode) <= ia64_section_threshold
10196      && !TARGET_NO_SDATA)
10197    return sdata_section;
10198  else
10199    return default_elf_select_rtx_section (mode, x, align);
10200}
10201
10202static unsigned int
10203ia64_section_type_flags (tree decl, const char *name, int reloc)
10204{
10205  unsigned int flags = 0;
10206
10207  if (strcmp (name, ".sdata") == 0
10208      || strncmp (name, ".sdata.", 7) == 0
10209      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
10210      || strncmp (name, ".sdata2.", 8) == 0
10211      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
10212      || strcmp (name, ".sbss") == 0
10213      || strncmp (name, ".sbss.", 6) == 0
10214      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
10215    flags = SECTION_SMALL;
10216
10217#if TARGET_ABI_OPEN_VMS
10218  if (decl && DECL_ATTRIBUTES (decl)
10219      && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl)))
10220    flags |= SECTION_VMS_OVERLAY;
10221#endif
10222
10223  flags |= default_section_type_flags (decl, name, reloc);
10224  return flags;
10225}
10226
10227/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
10228   structure type and that the address of that type should be passed
10229   in out0, rather than in r8.  */
10230
10231static bool
10232ia64_struct_retval_addr_is_first_parm_p (tree fntype)
10233{
10234  tree ret_type = TREE_TYPE (fntype);
10235
10236  /* The Itanium C++ ABI requires that out0, rather than r8, be used
10237     as the structure return address parameter, if the return value
10238     type has a non-trivial copy constructor or destructor.  It is not
10239     clear if this same convention should be used for other
10240     programming languages.  Until G++ 3.4, we incorrectly used r8 for
10241     these return values.  */
10242  return (abi_version_at_least (2)
10243	  && ret_type
10244	  && TYPE_MODE (ret_type) == BLKmode
10245	  && TREE_ADDRESSABLE (ret_type)
10246	  && strcmp (lang_hooks.name, "GNU C++") == 0);
10247}
10248
10249/* Output the assembler code for a thunk function.  THUNK_DECL is the
10250   declaration for the thunk function itself, FUNCTION is the decl for
10251   the target function.  DELTA is an immediate constant offset to be
10252   added to THIS.  If VCALL_OFFSET is nonzero, the word at
10253   *(*this + vcall_offset) should be added to THIS.  */
10254
10255static void
10256ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10257		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10258		      tree function)
10259{
10260  rtx this_rtx, insn, funexp;
10261  unsigned int this_parmno;
10262  unsigned int this_regno;
10263  rtx delta_rtx;
10264
10265  reload_completed = 1;
10266  epilogue_completed = 1;
10267
10268  /* Set things up as ia64_expand_prologue might.  */
10269  last_scratch_gr_reg = 15;
10270
10271  memset (&current_frame_info, 0, sizeof (current_frame_info));
10272  current_frame_info.spill_cfa_off = -16;
10273  current_frame_info.n_input_regs = 1;
10274  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
10275
10276  /* Mark the end of the (empty) prologue.  */
10277  emit_note (NOTE_INSN_PROLOGUE_END);
10278
10279  /* Figure out whether "this" will be the first parameter (the
10280     typical case) or the second parameter (as happens when the
10281     virtual function returns certain class objects).  */
10282  this_parmno
10283    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
10284       ? 1 : 0);
10285  this_regno = IN_REG (this_parmno);
10286  if (!TARGET_REG_NAMES)
10287    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
10288
10289  this_rtx = gen_rtx_REG (Pmode, this_regno);
10290
10291  /* Apply the constant offset, if required.  */
10292  delta_rtx = GEN_INT (delta);
10293  if (TARGET_ILP32)
10294    {
10295      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
10296      REG_POINTER (tmp) = 1;
10297      if (delta && satisfies_constraint_I (delta_rtx))
10298	{
10299	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
10300	  delta = 0;
10301	}
10302      else
10303	emit_insn (gen_ptr_extend (this_rtx, tmp));
10304    }
10305  if (delta)
10306    {
10307      if (!satisfies_constraint_I (delta_rtx))
10308	{
10309	  rtx tmp = gen_rtx_REG (Pmode, 2);
10310	  emit_move_insn (tmp, delta_rtx);
10311	  delta_rtx = tmp;
10312	}
10313      emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
10314    }
10315
10316  /* Apply the offset from the vtable, if required.  */
10317  if (vcall_offset)
10318    {
10319      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
10320      rtx tmp = gen_rtx_REG (Pmode, 2);
10321
10322      if (TARGET_ILP32)
10323	{
10324	  rtx t = gen_rtx_REG (ptr_mode, 2);
10325	  REG_POINTER (t) = 1;
10326	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
10327	  if (satisfies_constraint_I (vcall_offset_rtx))
10328	    {
10329	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
10330	      vcall_offset = 0;
10331	    }
10332	  else
10333	    emit_insn (gen_ptr_extend (tmp, t));
10334	}
10335      else
10336	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
10337
10338      if (vcall_offset)
10339	{
10340	  if (!satisfies_constraint_J (vcall_offset_rtx))
10341	    {
10342	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
10343	      emit_move_insn (tmp2, vcall_offset_rtx);
10344	      vcall_offset_rtx = tmp2;
10345	    }
10346	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
10347	}
10348
10349      if (TARGET_ILP32)
10350	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
10351      else
10352	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
10353
10354      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
10355    }
10356
10357  /* Generate a tail call to the target function.  */
10358  if (! TREE_USED (function))
10359    {
10360      assemble_external (function);
10361      TREE_USED (function) = 1;
10362    }
10363  funexp = XEXP (DECL_RTL (function), 0);
10364  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
10365  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
10366  insn = get_last_insn ();
10367  SIBLING_CALL_P (insn) = 1;
10368
10369  /* Code generation for calls relies on splitting.  */
10370  reload_completed = 1;
10371  epilogue_completed = 1;
10372  try_split (PATTERN (insn), insn, 0);
10373
10374  emit_barrier ();
10375
10376  /* Run just enough of rest_of_compilation to get the insns emitted.
10377     There's not really enough bulk here to make other passes such as
10378     instruction scheduling worth while.  Note that use_thunk calls
10379     assemble_start_function and assemble_end_function.  */
10380
10381  insn_locators_alloc ();
10382  emit_all_insn_group_barriers (NULL);
10383  insn = get_insns ();
10384  shorten_branches (insn);
10385  final_start_function (insn, file, 1);
10386  final (insn, file, 1);
10387  final_end_function ();
10388
10389  reload_completed = 0;
10390  epilogue_completed = 0;
10391}
10392
10393/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
10394
10395static rtx
10396ia64_struct_value_rtx (tree fntype,
10397		       int incoming ATTRIBUTE_UNUSED)
10398{
10399  if (TARGET_ABI_OPEN_VMS ||
10400      (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
10401    return NULL_RTX;
10402  return gen_rtx_REG (Pmode, GR_REG (8));
10403}
10404
10405static bool
10406ia64_scalar_mode_supported_p (enum machine_mode mode)
10407{
10408  switch (mode)
10409    {
10410    case QImode:
10411    case HImode:
10412    case SImode:
10413    case DImode:
10414    case TImode:
10415      return true;
10416
10417    case SFmode:
10418    case DFmode:
10419    case XFmode:
10420    case RFmode:
10421      return true;
10422
10423    case TFmode:
10424      return true;
10425
10426    default:
10427      return false;
10428    }
10429}
10430
10431static bool
10432ia64_vector_mode_supported_p (enum machine_mode mode)
10433{
10434  switch (mode)
10435    {
10436    case V8QImode:
10437    case V4HImode:
10438    case V2SImode:
10439      return true;
10440
10441    case V2SFmode:
10442      return true;
10443
10444    default:
10445      return false;
10446    }
10447}
10448
10449/* Implement the FUNCTION_PROFILER macro.  */
10450
10451void
10452ia64_output_function_profiler (FILE *file, int labelno)
10453{
10454  bool indirect_call;
10455
10456  /* If the function needs a static chain and the static chain
10457     register is r15, we use an indirect call so as to bypass
10458     the PLT stub in case the executable is dynamically linked,
10459     because the stub clobbers r15 as per 5.3.6 of the psABI.
10460     We don't need to do that in non canonical PIC mode.  */
10461
10462  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
10463    {
10464      gcc_assert (STATIC_CHAIN_REGNUM == 15);
10465      indirect_call = true;
10466    }
10467  else
10468    indirect_call = false;
10469
10470  if (TARGET_GNU_AS)
10471    fputs ("\t.prologue 4, r40\n", file);
10472  else
10473    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
10474  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
10475
10476  if (NO_PROFILE_COUNTERS)
10477    fputs ("\tmov out3 = r0\n", file);
10478  else
10479    {
10480      char buf[20];
10481      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10482
10483      if (TARGET_AUTO_PIC)
10484	fputs ("\tmovl out3 = @gprel(", file);
10485      else
10486	fputs ("\taddl out3 = @ltoff(", file);
10487      assemble_name (file, buf);
10488      if (TARGET_AUTO_PIC)
10489	fputs (")\n", file);
10490      else
10491	fputs ("), r1\n", file);
10492    }
10493
10494  if (indirect_call)
10495    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
10496  fputs ("\t;;\n", file);
10497
10498  fputs ("\t.save rp, r42\n", file);
10499  fputs ("\tmov out2 = b0\n", file);
10500  if (indirect_call)
10501    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
10502  fputs ("\t.body\n", file);
10503  fputs ("\tmov out1 = r1\n", file);
10504  if (indirect_call)
10505    {
10506      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
10507      fputs ("\tmov b6 = r16\n", file);
10508      fputs ("\tld8 r1 = [r14]\n", file);
10509      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
10510    }
10511  else
10512    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
10513}
10514
10515static GTY(()) rtx mcount_func_rtx;
10516static rtx
10517gen_mcount_func_rtx (void)
10518{
10519  if (!mcount_func_rtx)
10520    mcount_func_rtx = init_one_libfunc ("_mcount");
10521  return mcount_func_rtx;
10522}
10523
10524void
10525ia64_profile_hook (int labelno)
10526{
10527  rtx label, ip;
10528
10529  if (NO_PROFILE_COUNTERS)
10530    label = const0_rtx;
10531  else
10532    {
10533      char buf[30];
10534      const char *label_name;
10535      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10536      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
10537      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
10538      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
10539    }
10540  ip = gen_reg_rtx (Pmode);
10541  emit_insn (gen_ip_value (ip));
10542  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
10543                     VOIDmode, 3,
10544		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
10545		     ip, Pmode,
10546		     label, Pmode);
10547}
10548
10549/* Return the mangling of TYPE if it is an extended fundamental type.  */
10550
10551static const char *
10552ia64_mangle_type (const_tree type)
10553{
10554  type = TYPE_MAIN_VARIANT (type);
10555
10556  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
10557      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
10558    return NULL;
10559
10560  /* On HP-UX, "long double" is mangled as "e" so __float128 is
10561     mangled as "e".  */
10562  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
10563    return "g";
10564  /* On HP-UX, "e" is not available as a mangling of __float80 so use
10565     an extended mangling.  Elsewhere, "e" is available since long
10566     double is 80 bits.  */
10567  if (TYPE_MODE (type) == XFmode)
10568    return TARGET_HPUX ? "u9__float80" : "e";
10569  if (TYPE_MODE (type) == RFmode)
10570    return "u7__fpreg";
10571  return NULL;
10572}
10573
10574/* Return the diagnostic message string if conversion from FROMTYPE to
10575   TOTYPE is not allowed, NULL otherwise.  */
10576static const char *
10577ia64_invalid_conversion (const_tree fromtype, const_tree totype)
10578{
10579  /* Reject nontrivial conversion to or from __fpreg.  */
10580  if (TYPE_MODE (fromtype) == RFmode
10581      && TYPE_MODE (totype) != RFmode
10582      && TYPE_MODE (totype) != VOIDmode)
10583    return N_("invalid conversion from %<__fpreg%>");
10584  if (TYPE_MODE (totype) == RFmode
10585      && TYPE_MODE (fromtype) != RFmode)
10586    return N_("invalid conversion to %<__fpreg%>");
10587  return NULL;
10588}
10589
10590/* Return the diagnostic message string if the unary operation OP is
10591   not permitted on TYPE, NULL otherwise.  */
10592static const char *
10593ia64_invalid_unary_op (int op, const_tree type)
10594{
10595  /* Reject operations on __fpreg other than unary + or &.  */
10596  if (TYPE_MODE (type) == RFmode
10597      && op != CONVERT_EXPR
10598      && op != ADDR_EXPR)
10599    return N_("invalid operation on %<__fpreg%>");
10600  return NULL;
10601}
10602
10603/* Return the diagnostic message string if the binary operation OP is
10604   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
10605static const char *
10606ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
10607{
10608  /* Reject operations on __fpreg.  */
10609  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
10610    return N_("invalid operation on %<__fpreg%>");
10611  return NULL;
10612}
10613
10614/* Implement overriding of the optimization options.  */
10615void
10616ia64_optimization_options (int level ATTRIBUTE_UNUSED,
10617                           int size ATTRIBUTE_UNUSED)
10618{
10619  /* Let the scheduler form additional regions.  */
10620  set_param_value ("max-sched-extend-regions-iters", 2);
10621
10622  /* Set the default values for cache-related parameters.  */
10623  set_param_value ("simultaneous-prefetches", 6);
10624  set_param_value ("l1-cache-line-size", 32);
10625
10626  set_param_value("sched-mem-true-dep-cost", 4);
10627}
10628
10629/* HP-UX version_id attribute.
10630   For object foo, if the version_id is set to 1234 put out an alias
10631   of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
10632   other than an alias statement because it is an illegal symbol name.  */
10633
10634static tree
10635ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
10636                                 tree name ATTRIBUTE_UNUSED,
10637                                 tree args,
10638                                 int flags ATTRIBUTE_UNUSED,
10639                                 bool *no_add_attrs)
10640{
10641  tree arg = TREE_VALUE (args);
10642
10643  if (TREE_CODE (arg) != STRING_CST)
10644    {
10645      error("version attribute is not a string");
10646      *no_add_attrs = true;
10647      return NULL_TREE;
10648    }
10649  return NULL_TREE;
10650}
10651
10652/* Target hook for c_mode_for_suffix.  */
10653
10654static enum machine_mode
10655ia64_c_mode_for_suffix (char suffix)
10656{
10657  if (suffix == 'q')
10658    return TFmode;
10659  if (suffix == 'w')
10660    return XFmode;
10661
10662  return VOIDmode;
10663}
10664
10665static enum machine_mode
10666ia64_promote_function_mode (const_tree type,
10667			    enum machine_mode mode,
10668			    int *punsignedp,
10669			    const_tree funtype,
10670			    int for_return)
10671{
10672  /* Special processing required for OpenVMS ...  */
10673
10674  if (!TARGET_ABI_OPEN_VMS)
10675    return default_promote_function_mode(type, mode, punsignedp, funtype,
10676					 for_return);
10677
10678  /* HP OpenVMS Calling Standard dated June, 2004, that describes
10679     HP OpenVMS I64 Version 8.2EFT,
10680     chapter 4 "OpenVMS I64 Conventions"
10681     section 4.7 "Procedure Linkage"
10682     subsection 4.7.5.2, "Normal Register Parameters"
10683
10684     "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point
10685     values passed in registers are zero-filled; signed integral values as
10686     well as unsigned 32-bit integral values are sign-extended to 64 bits.
10687     For all other types passed in the general registers, unused bits are
10688     undefined."  */
10689
10690  if (!AGGREGATE_TYPE_P (type)
10691      && GET_MODE_CLASS (mode) == MODE_INT
10692      && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
10693    {
10694      if (mode == SImode)
10695	*punsignedp = 0;
10696      return DImode;
10697    }
10698  else
10699    return promote_mode (type, mode, punsignedp);
10700}
10701
10702static GTY(()) rtx ia64_dconst_0_5_rtx;
10703
10704rtx
10705ia64_dconst_0_5 (void)
10706{
10707  if (! ia64_dconst_0_5_rtx)
10708    {
10709      REAL_VALUE_TYPE rv;
10710      real_from_string (&rv, "0.5");
10711      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
10712    }
10713  return ia64_dconst_0_5_rtx;
10714}
10715
10716static GTY(()) rtx ia64_dconst_0_375_rtx;
10717
10718rtx
10719ia64_dconst_0_375 (void)
10720{
10721  if (! ia64_dconst_0_375_rtx)
10722    {
10723      REAL_VALUE_TYPE rv;
10724      real_from_string (&rv, "0.375");
10725      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
10726    }
10727  return ia64_dconst_0_375_rtx;
10728}
10729
10730
10731#include "gt-ia64.h"
10732