ia64.c revision 169690
1/* Definitions of target machine for GNU compiler.
2   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
3   Free Software Foundation, Inc.
4   Contributed by James E. Wilson <wilson@cygnus.com> and
5		  David Mosberger <davidm@hpl.hp.com>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify
10it under the terms of the GNU General Public License as published by
11the Free Software Foundation; either version 2, or (at your option)
12any later version.
13
14GCC is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING.  If not, write to
21the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22Boston, MA 02110-1301, USA.  */
23
24#include "config.h"
25#include "system.h"
26#include "coretypes.h"
27#include "tm.h"
28#include "rtl.h"
29#include "tree.h"
30#include "regs.h"
31#include "hard-reg-set.h"
32#include "real.h"
33#include "insn-config.h"
34#include "conditions.h"
35#include "output.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "recog.h"
39#include "expr.h"
40#include "optabs.h"
41#include "except.h"
42#include "function.h"
43#include "ggc.h"
44#include "basic-block.h"
45#include "toplev.h"
46#include "sched-int.h"
47#include "timevar.h"
48#include "target.h"
49#include "target-def.h"
50#include "tm_p.h"
51#include "hashtab.h"
52#include "langhooks.h"
53#include "cfglayout.h"
54#include "tree-gimple.h"
55#include "intl.h"
56#include "debug.h"
57#include "params.h"
58
59/* This is used for communication between ASM_OUTPUT_LABEL and
60   ASM_OUTPUT_LABELREF.  */
61int ia64_asm_output_label = 0;
62
63/* Define the information needed to generate branch and scc insns.  This is
64   stored from the compare operation.  */
65struct rtx_def * ia64_compare_op0;
66struct rtx_def * ia64_compare_op1;
67
68/* Register names for ia64_expand_prologue.  */
69static const char * const ia64_reg_numbers[96] =
70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
71  "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
72  "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
73  "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
74  "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
75  "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
76  "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
77  "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
78  "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
79  "r104","r105","r106","r107","r108","r109","r110","r111",
80  "r112","r113","r114","r115","r116","r117","r118","r119",
81  "r120","r121","r122","r123","r124","r125","r126","r127"};
82
83/* ??? These strings could be shared with REGISTER_NAMES.  */
84static const char * const ia64_input_reg_names[8] =
85{ "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
86
87/* ??? These strings could be shared with REGISTER_NAMES.  */
88static const char * const ia64_local_reg_names[80] =
89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
90  "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
91  "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
92  "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
93  "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
94  "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
95  "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
96  "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
97  "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
98  "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
99
100/* ??? These strings could be shared with REGISTER_NAMES.  */
101static const char * const ia64_output_reg_names[8] =
102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
103
104/* Which cpu are we scheduling for.  */
105enum processor_type ia64_tune = PROCESSOR_ITANIUM2;
106
107/* Determines whether we run our final scheduling pass or not.  We always
108   avoid the normal second scheduling pass.  */
109static int ia64_flag_schedule_insns2;
110
111/* Determines whether we run variable tracking in machine dependent
112   reorganization.  */
113static int ia64_flag_var_tracking;
114
115/* Variables which are this size or smaller are put in the sdata/sbss
116   sections.  */
117
118unsigned int ia64_section_threshold;
119
120/* The following variable is used by the DFA insn scheduler.  The value is
121   TRUE if we do insn bundling instead of insn scheduling.  */
122int bundling_p = 0;
123
124/* Structure to be filled in by ia64_compute_frame_size with register
125   save masks and offsets for the current function.  */
126
127struct ia64_frame_info
128{
129  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
130				   the caller's scratch area.  */
131  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
132  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
133  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
134  HARD_REG_SET mask;		/* mask of saved registers.  */
135  unsigned int gr_used_mask;	/* mask of registers in use as gr spill
136				   registers or long-term scratches.  */
137  int n_spilled;		/* number of spilled registers.  */
138  int reg_fp;			/* register for fp.  */
139  int reg_save_b0;		/* save register for b0.  */
140  int reg_save_pr;		/* save register for prs.  */
141  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
142  int reg_save_ar_unat;		/* save register for ar.unat.  */
143  int reg_save_ar_lc;		/* save register for ar.lc.  */
144  int reg_save_gp;		/* save register for gp.  */
145  int n_input_regs;		/* number of input registers used.  */
146  int n_local_regs;		/* number of local registers used.  */
147  int n_output_regs;		/* number of output registers used.  */
148  int n_rotate_regs;		/* number of rotating registers used.  */
149
150  char need_regstk;		/* true if a .regstk directive needed.  */
151  char initialized;		/* true if the data is finalized.  */
152};
153
154/* Current frame information calculated by ia64_compute_frame_size.  */
155static struct ia64_frame_info current_frame_info;
156
157static int ia64_first_cycle_multipass_dfa_lookahead (void);
158static void ia64_dependencies_evaluation_hook (rtx, rtx);
159static void ia64_init_dfa_pre_cycle_insn (void);
160static rtx ia64_dfa_pre_cycle_insn (void);
161static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx);
162static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx);
163static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *);
164static void ia64_h_i_d_extended (void);
165static int ia64_mode_to_int (enum machine_mode);
166static void ia64_set_sched_flags (spec_info_t);
167static int ia64_speculate_insn (rtx, ds_t, rtx *);
168static rtx ia64_gen_spec_insn (rtx, ds_t, int, bool, bool);
169static bool ia64_needs_block_p (rtx);
170static rtx ia64_gen_check (rtx, rtx, bool);
171static int ia64_spec_check_p (rtx);
172static int ia64_spec_check_src_p (rtx);
173static rtx gen_tls_get_addr (void);
174static rtx gen_thread_pointer (void);
175static int find_gr_spill (int);
176static int next_scratch_gr_reg (void);
177static void mark_reg_gr_used_mask (rtx, void *);
178static void ia64_compute_frame_size (HOST_WIDE_INT);
179static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
180static void finish_spill_pointers (void);
181static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
182static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
183static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
184static rtx gen_movdi_x (rtx, rtx, rtx);
185static rtx gen_fr_spill_x (rtx, rtx, rtx);
186static rtx gen_fr_restore_x (rtx, rtx, rtx);
187
188static enum machine_mode hfa_element_mode (tree, bool);
189static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
190					 tree, int *, int);
191static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
192				   tree, bool);
193static bool ia64_function_ok_for_sibcall (tree, tree);
194static bool ia64_return_in_memory (tree, tree);
195static bool ia64_rtx_costs (rtx, int, int, int *);
196static void fix_range (const char *);
197static bool ia64_handle_option (size_t, const char *, int);
198static struct machine_function * ia64_init_machine_status (void);
199static void emit_insn_group_barriers (FILE *);
200static void emit_all_insn_group_barriers (FILE *);
201static void final_emit_insn_group_barriers (FILE *);
202static void emit_predicate_relation_info (void);
203static void ia64_reorg (void);
204static bool ia64_in_small_data_p (tree);
205static void process_epilogue (FILE *, rtx, bool, bool);
206static int process_set (FILE *, rtx, rtx, bool, bool);
207
208static bool ia64_assemble_integer (rtx, unsigned int, int);
209static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
210static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
211static void ia64_output_function_end_prologue (FILE *);
212
213static int ia64_issue_rate (void);
214static int ia64_adjust_cost_2 (rtx, int, rtx, int);
215static void ia64_sched_init (FILE *, int, int);
216static void ia64_sched_init_global (FILE *, int, int);
217static void ia64_sched_finish_global (FILE *, int);
218static void ia64_sched_finish (FILE *, int);
219static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
220static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
221static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int);
222static int ia64_variable_issue (FILE *, int, rtx, int);
223
224static struct bundle_state *get_free_bundle_state (void);
225static void free_bundle_state (struct bundle_state *);
226static void initiate_bundle_states (void);
227static void finish_bundle_states (void);
228static unsigned bundle_state_hash (const void *);
229static int bundle_state_eq_p (const void *, const void *);
230static int insert_bundle_state (struct bundle_state *);
231static void initiate_bundle_state_table (void);
232static void finish_bundle_state_table (void);
233static int try_issue_nops (struct bundle_state *, int);
234static int try_issue_insn (struct bundle_state *, rtx);
235static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int);
236static int get_max_pos (state_t);
237static int get_template (state_t, int);
238
239static rtx get_next_important_insn (rtx, rtx);
240static void bundling (FILE *, int, rtx, rtx);
241
242static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
243				  HOST_WIDE_INT, tree);
244static void ia64_file_start (void);
245
246static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
247static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
248static section *ia64_select_rtx_section (enum machine_mode, rtx,
249					 unsigned HOST_WIDE_INT);
250static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
251     ATTRIBUTE_UNUSED;
252static unsigned int ia64_section_type_flags (tree, const char *, int);
253static void ia64_hpux_add_extern_decl (tree decl)
254     ATTRIBUTE_UNUSED;
255static void ia64_hpux_file_end (void)
256     ATTRIBUTE_UNUSED;
257static void ia64_init_libfuncs (void)
258     ATTRIBUTE_UNUSED;
259static void ia64_hpux_init_libfuncs (void)
260     ATTRIBUTE_UNUSED;
261static void ia64_sysv4_init_libfuncs (void)
262     ATTRIBUTE_UNUSED;
263static void ia64_vms_init_libfuncs (void)
264     ATTRIBUTE_UNUSED;
265
266static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
267static void ia64_encode_section_info (tree, rtx, int);
268static rtx ia64_struct_value_rtx (tree, int);
269static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *);
270static bool ia64_scalar_mode_supported_p (enum machine_mode mode);
271static bool ia64_vector_mode_supported_p (enum machine_mode mode);
272static bool ia64_cannot_force_const_mem (rtx);
273static const char *ia64_mangle_fundamental_type (tree);
274static const char *ia64_invalid_conversion (tree, tree);
275static const char *ia64_invalid_unary_op (int, tree);
276static const char *ia64_invalid_binary_op (int, tree, tree);
277
278/* Table of valid machine attributes.  */
279static const struct attribute_spec ia64_attribute_table[] =
280{
281  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
282  { "syscall_linkage", 0, 0, false, true,  true,  NULL },
283  { "model",	       1, 1, true, false, false, ia64_handle_model_attribute },
284  { NULL,	       0, 0, false, false, false, NULL }
285};
286
287/* Initialize the GCC target structure.  */
288#undef TARGET_ATTRIBUTE_TABLE
289#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
290
291#undef TARGET_INIT_BUILTINS
292#define TARGET_INIT_BUILTINS ia64_init_builtins
293
294#undef TARGET_EXPAND_BUILTIN
295#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
296
297#undef TARGET_ASM_BYTE_OP
298#define TARGET_ASM_BYTE_OP "\tdata1\t"
299#undef TARGET_ASM_ALIGNED_HI_OP
300#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
301#undef TARGET_ASM_ALIGNED_SI_OP
302#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
303#undef TARGET_ASM_ALIGNED_DI_OP
304#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
305#undef TARGET_ASM_UNALIGNED_HI_OP
306#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
307#undef TARGET_ASM_UNALIGNED_SI_OP
308#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
309#undef TARGET_ASM_UNALIGNED_DI_OP
310#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
311#undef TARGET_ASM_INTEGER
312#define TARGET_ASM_INTEGER ia64_assemble_integer
313
314#undef TARGET_ASM_FUNCTION_PROLOGUE
315#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
316#undef TARGET_ASM_FUNCTION_END_PROLOGUE
317#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
318#undef TARGET_ASM_FUNCTION_EPILOGUE
319#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
320
321#undef TARGET_IN_SMALL_DATA_P
322#define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
323
324#undef TARGET_SCHED_ADJUST_COST_2
325#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
326#undef TARGET_SCHED_ISSUE_RATE
327#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
328#undef TARGET_SCHED_VARIABLE_ISSUE
329#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
330#undef TARGET_SCHED_INIT
331#define TARGET_SCHED_INIT ia64_sched_init
332#undef TARGET_SCHED_FINISH
333#define TARGET_SCHED_FINISH ia64_sched_finish
334#undef TARGET_SCHED_INIT_GLOBAL
335#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
336#undef TARGET_SCHED_FINISH_GLOBAL
337#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
338#undef TARGET_SCHED_REORDER
339#define TARGET_SCHED_REORDER ia64_sched_reorder
340#undef TARGET_SCHED_REORDER2
341#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
342
343#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
344#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
345
346#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
347#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
348
349#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
350#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
351#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
352#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
353
354#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
355#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
356  ia64_first_cycle_multipass_dfa_lookahead_guard
357
358#undef TARGET_SCHED_DFA_NEW_CYCLE
359#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
360
361#undef TARGET_SCHED_H_I_D_EXTENDED
362#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
363
364#undef TARGET_SCHED_SET_SCHED_FLAGS
365#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
366
367#undef TARGET_SCHED_SPECULATE_INSN
368#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
369
370#undef TARGET_SCHED_NEEDS_BLOCK_P
371#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
372
373#undef TARGET_SCHED_GEN_CHECK
374#define TARGET_SCHED_GEN_CHECK ia64_gen_check
375
376#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC
377#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\
378  ia64_first_cycle_multipass_dfa_lookahead_guard_spec
379
380#undef TARGET_FUNCTION_OK_FOR_SIBCALL
381#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
382#undef TARGET_ARG_PARTIAL_BYTES
383#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
384
385#undef TARGET_ASM_OUTPUT_MI_THUNK
386#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
387#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
388#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
389
390#undef TARGET_ASM_FILE_START
391#define TARGET_ASM_FILE_START ia64_file_start
392
393#undef TARGET_RTX_COSTS
394#define TARGET_RTX_COSTS ia64_rtx_costs
395#undef TARGET_ADDRESS_COST
396#define TARGET_ADDRESS_COST hook_int_rtx_0
397
398#undef TARGET_MACHINE_DEPENDENT_REORG
399#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
400
401#undef TARGET_ENCODE_SECTION_INFO
402#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
403
404#undef  TARGET_SECTION_TYPE_FLAGS
405#define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
406
407#ifdef HAVE_AS_TLS
408#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
409#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
410#endif
411
412/* ??? ABI doesn't allow us to define this.  */
413#if 0
414#undef TARGET_PROMOTE_FUNCTION_ARGS
415#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
416#endif
417
418/* ??? ABI doesn't allow us to define this.  */
419#if 0
420#undef TARGET_PROMOTE_FUNCTION_RETURN
421#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
422#endif
423
424/* ??? Investigate.  */
425#if 0
426#undef TARGET_PROMOTE_PROTOTYPES
427#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
428#endif
429
430#undef TARGET_STRUCT_VALUE_RTX
431#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
432#undef TARGET_RETURN_IN_MEMORY
433#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
434#undef TARGET_SETUP_INCOMING_VARARGS
435#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
436#undef TARGET_STRICT_ARGUMENT_NAMING
437#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
438#undef TARGET_MUST_PASS_IN_STACK
439#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
440
441#undef TARGET_GIMPLIFY_VA_ARG_EXPR
442#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
443
444#undef TARGET_UNWIND_EMIT
445#define TARGET_UNWIND_EMIT process_for_unwind_directive
446
447#undef TARGET_SCALAR_MODE_SUPPORTED_P
448#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
449#undef TARGET_VECTOR_MODE_SUPPORTED_P
450#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
451
452/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
453   in an order different from the specified program order.  */
454#undef TARGET_RELAXED_ORDERING
455#define TARGET_RELAXED_ORDERING true
456
457#undef TARGET_DEFAULT_TARGET_FLAGS
458#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
459#undef TARGET_HANDLE_OPTION
460#define TARGET_HANDLE_OPTION ia64_handle_option
461
462#undef TARGET_CANNOT_FORCE_CONST_MEM
463#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
464
465#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
466#define TARGET_MANGLE_FUNDAMENTAL_TYPE ia64_mangle_fundamental_type
467
468#undef TARGET_INVALID_CONVERSION
469#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
470#undef TARGET_INVALID_UNARY_OP
471#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
472#undef TARGET_INVALID_BINARY_OP
473#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
474
475struct gcc_target targetm = TARGET_INITIALIZER;
476
477typedef enum
478  {
479    ADDR_AREA_NORMAL,	/* normal address area */
480    ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
481  }
482ia64_addr_area;
483
484static GTY(()) tree small_ident1;
485static GTY(()) tree small_ident2;
486
487static void
488init_idents (void)
489{
490  if (small_ident1 == 0)
491    {
492      small_ident1 = get_identifier ("small");
493      small_ident2 = get_identifier ("__small__");
494    }
495}
496
497/* Retrieve the address area that has been chosen for the given decl.  */
498
499static ia64_addr_area
500ia64_get_addr_area (tree decl)
501{
502  tree model_attr;
503
504  model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
505  if (model_attr)
506    {
507      tree id;
508
509      init_idents ();
510      id = TREE_VALUE (TREE_VALUE (model_attr));
511      if (id == small_ident1 || id == small_ident2)
512	return ADDR_AREA_SMALL;
513    }
514  return ADDR_AREA_NORMAL;
515}
516
517static tree
518ia64_handle_model_attribute (tree *node, tree name, tree args,
519			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
520{
521  ia64_addr_area addr_area = ADDR_AREA_NORMAL;
522  ia64_addr_area area;
523  tree arg, decl = *node;
524
525  init_idents ();
526  arg = TREE_VALUE (args);
527  if (arg == small_ident1 || arg == small_ident2)
528    {
529      addr_area = ADDR_AREA_SMALL;
530    }
531  else
532    {
533      warning (OPT_Wattributes, "invalid argument of %qs attribute",
534	       IDENTIFIER_POINTER (name));
535      *no_add_attrs = true;
536    }
537
538  switch (TREE_CODE (decl))
539    {
540    case VAR_DECL:
541      if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
542	   == FUNCTION_DECL)
543	  && !TREE_STATIC (decl))
544	{
545	  error ("%Jan address area attribute cannot be specified for "
546		 "local variables", decl);
547	  *no_add_attrs = true;
548	}
549      area = ia64_get_addr_area (decl);
550      if (area != ADDR_AREA_NORMAL && addr_area != area)
551	{
552	  error ("address area of %q+D conflicts with previous "
553		 "declaration", decl);
554	  *no_add_attrs = true;
555	}
556      break;
557
558    case FUNCTION_DECL:
559      error ("%Jaddress area attribute cannot be specified for functions",
560	     decl);
561      *no_add_attrs = true;
562      break;
563
564    default:
565      warning (OPT_Wattributes, "%qs attribute ignored",
566	       IDENTIFIER_POINTER (name));
567      *no_add_attrs = true;
568      break;
569    }
570
571  return NULL_TREE;
572}
573
574static void
575ia64_encode_addr_area (tree decl, rtx symbol)
576{
577  int flags;
578
579  flags = SYMBOL_REF_FLAGS (symbol);
580  switch (ia64_get_addr_area (decl))
581    {
582    case ADDR_AREA_NORMAL: break;
583    case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
584    default: gcc_unreachable ();
585    }
586  SYMBOL_REF_FLAGS (symbol) = flags;
587}
588
589static void
590ia64_encode_section_info (tree decl, rtx rtl, int first)
591{
592  default_encode_section_info (decl, rtl, first);
593
594  /* Careful not to prod global register variables.  */
595  if (TREE_CODE (decl) == VAR_DECL
596      && GET_CODE (DECL_RTL (decl)) == MEM
597      && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
598      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
599    ia64_encode_addr_area (decl, XEXP (rtl, 0));
600}
601
602/* Implement CONST_OK_FOR_LETTER_P.  */
603
604bool
605ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c)
606{
607  switch (c)
608    {
609    case 'I':
610      return CONST_OK_FOR_I (value);
611    case 'J':
612      return CONST_OK_FOR_J (value);
613    case 'K':
614      return CONST_OK_FOR_K (value);
615    case 'L':
616      return CONST_OK_FOR_L (value);
617    case 'M':
618      return CONST_OK_FOR_M (value);
619    case 'N':
620      return CONST_OK_FOR_N (value);
621    case 'O':
622      return CONST_OK_FOR_O (value);
623    case 'P':
624      return CONST_OK_FOR_P (value);
625    default:
626      return false;
627    }
628}
629
630/* Implement CONST_DOUBLE_OK_FOR_LETTER_P.  */
631
632bool
633ia64_const_double_ok_for_letter_p (rtx value, char c)
634{
635  switch (c)
636    {
637    case 'G':
638      return CONST_DOUBLE_OK_FOR_G (value);
639    default:
640      return false;
641    }
642}
643
644/* Implement EXTRA_CONSTRAINT.  */
645
646bool
647ia64_extra_constraint (rtx value, char c)
648{
649  switch (c)
650    {
651    case 'Q':
652      /* Non-volatile memory for FP_REG loads/stores.  */
653      return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value);
654
655    case 'R':
656      /* 1..4 for shladd arguments.  */
657      return (GET_CODE (value) == CONST_INT
658	      && INTVAL (value) >= 1 && INTVAL (value) <= 4);
659
660    case 'S':
661      /* Non-post-inc memory for asms and other unsavory creatures.  */
662      return (GET_CODE (value) == MEM
663	      && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC
664	      && (reload_in_progress || memory_operand (value, VOIDmode)));
665
666    case 'T':
667      /* Symbol ref to small-address-area.  */
668      return small_addr_symbolic_operand (value, VOIDmode);
669
670    case 'U':
671      /* Vector zero.  */
672      return value == CONST0_RTX (GET_MODE (value));
673
674    case 'W':
675      /* An integer vector, such that conversion to an integer yields a
676	 value appropriate for an integer 'J' constraint.  */
677      if (GET_CODE (value) == CONST_VECTOR
678	  && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT)
679	{
680	  value = simplify_subreg (DImode, value, GET_MODE (value), 0);
681	  return ia64_const_ok_for_letter_p (INTVAL (value), 'J');
682	}
683      return false;
684
685    case 'Y':
686      /* A V2SF vector containing elements that satisfy 'G'.  */
687      return
688	(GET_CODE (value) == CONST_VECTOR
689	 && GET_MODE (value) == V2SFmode
690	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G')
691	 && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G'));
692
693    default:
694      return false;
695    }
696}
697
698/* Return 1 if the operands of a move are ok.  */
699
700int
701ia64_move_ok (rtx dst, rtx src)
702{
703  /* If we're under init_recog_no_volatile, we'll not be able to use
704     memory_operand.  So check the code directly and don't worry about
705     the validity of the underlying address, which should have been
706     checked elsewhere anyway.  */
707  if (GET_CODE (dst) != MEM)
708    return 1;
709  if (GET_CODE (src) == MEM)
710    return 0;
711  if (register_operand (src, VOIDmode))
712    return 1;
713
714  /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
715  if (INTEGRAL_MODE_P (GET_MODE (dst)))
716    return src == const0_rtx;
717  else
718    return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
719}
720
721/* Return 1 if the operands are ok for a floating point load pair.  */
722
723int
724ia64_load_pair_ok (rtx dst, rtx src)
725{
726  if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst)))
727    return 0;
728  if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
729    return 0;
730  switch (GET_CODE (XEXP (src, 0)))
731    {
732    case REG:
733    case POST_INC:
734      break;
735    case POST_DEC:
736      return 0;
737    case POST_MODIFY:
738      {
739	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
740
741	if (GET_CODE (adjust) != CONST_INT
742	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
743	  return 0;
744      }
745      break;
746    default:
747      abort ();
748    }
749  return 1;
750}
751
752int
753addp4_optimize_ok (rtx op1, rtx op2)
754{
755  return (basereg_operand (op1, GET_MODE(op1)) !=
756	  basereg_operand (op2, GET_MODE(op2)));
757}
758
759/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
760   Return the length of the field, or <= 0 on failure.  */
761
762int
763ia64_depz_field_mask (rtx rop, rtx rshift)
764{
765  unsigned HOST_WIDE_INT op = INTVAL (rop);
766  unsigned HOST_WIDE_INT shift = INTVAL (rshift);
767
768  /* Get rid of the zero bits we're shifting in.  */
769  op >>= shift;
770
771  /* We must now have a solid block of 1's at bit 0.  */
772  return exact_log2 (op + 1);
773}
774
775/* Return the TLS model to use for ADDR.  */
776
777static enum tls_model
778tls_symbolic_operand_type (rtx addr)
779{
780  enum tls_model tls_kind = 0;
781
782  if (GET_CODE (addr) == CONST)
783    {
784      if (GET_CODE (XEXP (addr, 0)) == PLUS
785	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
786        tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
787    }
788  else if (GET_CODE (addr) == SYMBOL_REF)
789    tls_kind = SYMBOL_REF_TLS_MODEL (addr);
790
791  return tls_kind;
792}
793
794/* Return true if X is a constant that is valid for some immediate
795   field in an instruction.  */
796
797bool
798ia64_legitimate_constant_p (rtx x)
799{
800  switch (GET_CODE (x))
801    {
802    case CONST_INT:
803    case LABEL_REF:
804      return true;
805
806    case CONST_DOUBLE:
807      if (GET_MODE (x) == VOIDmode)
808	return true;
809      return CONST_DOUBLE_OK_FOR_G (x);
810
811    case CONST:
812    case SYMBOL_REF:
813      /* ??? Short term workaround for PR 28490.  We must make the code here
814	 match the code in ia64_expand_move and move_operand, even though they
815	 are both technically wrong.  */
816      if (tls_symbolic_operand_type (x) == 0)
817	{
818	  HOST_WIDE_INT addend = 0;
819	  rtx op = x;
820
821	  if (GET_CODE (op) == CONST
822	      && GET_CODE (XEXP (op, 0)) == PLUS
823	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
824	    {
825	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
826	      op = XEXP (XEXP (op, 0), 0);
827	    }
828
829          if (any_offset_symbol_operand (op, GET_MODE (op))
830              || function_operand (op, GET_MODE (op)))
831            return true;
832	  if (aligned_offset_symbol_operand (op, GET_MODE (op)))
833	    return (addend & 0x3fff) == 0;
834	  return false;
835	}
836      return false;
837
838    case CONST_VECTOR:
839      {
840	enum machine_mode mode = GET_MODE (x);
841
842	if (mode == V2SFmode)
843	  return ia64_extra_constraint (x, 'Y');
844
845	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
846		&& GET_MODE_SIZE (mode) <= 8);
847      }
848
849    default:
850      return false;
851    }
852}
853
854/* Don't allow TLS addresses to get spilled to memory.  */
855
856static bool
857ia64_cannot_force_const_mem (rtx x)
858{
859  return tls_symbolic_operand_type (x) != 0;
860}
861
862/* Expand a symbolic constant load.  */
863
864bool
865ia64_expand_load_address (rtx dest, rtx src)
866{
867  gcc_assert (GET_CODE (dest) == REG);
868
869  /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
870     having to pointer-extend the value afterward.  Other forms of address
871     computation below are also more natural to compute as 64-bit quantities.
872     If we've been given an SImode destination register, change it.  */
873  if (GET_MODE (dest) != Pmode)
874    dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 0);
875
876  if (TARGET_NO_PIC)
877    return false;
878  if (small_addr_symbolic_operand (src, VOIDmode))
879    return false;
880
881  if (TARGET_AUTO_PIC)
882    emit_insn (gen_load_gprel64 (dest, src));
883  else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
884    emit_insn (gen_load_fptr (dest, src));
885  else if (sdata_symbolic_operand (src, VOIDmode))
886    emit_insn (gen_load_gprel (dest, src));
887  else
888    {
889      HOST_WIDE_INT addend = 0;
890      rtx tmp;
891
892      /* We did split constant offsets in ia64_expand_move, and we did try
893	 to keep them split in move_operand, but we also allowed reload to
894	 rematerialize arbitrary constants rather than spill the value to
895	 the stack and reload it.  So we have to be prepared here to split
896	 them apart again.  */
897      if (GET_CODE (src) == CONST)
898	{
899	  HOST_WIDE_INT hi, lo;
900
901	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
902	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
903	  hi = hi - lo;
904
905	  if (lo != 0)
906	    {
907	      addend = lo;
908	      src = plus_constant (XEXP (XEXP (src, 0), 0), hi);
909	    }
910	}
911
912      tmp = gen_rtx_HIGH (Pmode, src);
913      tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
914      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
915
916      tmp = gen_rtx_LO_SUM (Pmode, dest, src);
917      emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
918
919      if (addend)
920	{
921	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
922	  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
923	}
924    }
925
926  return true;
927}
928
929static GTY(()) rtx gen_tls_tga;
930static rtx
931gen_tls_get_addr (void)
932{
933  if (!gen_tls_tga)
934    gen_tls_tga = init_one_libfunc ("__tls_get_addr");
935  return gen_tls_tga;
936}
937
938static GTY(()) rtx thread_pointer_rtx;
939static rtx
940gen_thread_pointer (void)
941{
942  if (!thread_pointer_rtx)
943    thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
944  return thread_pointer_rtx;
945}
946
947static rtx
948ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
949			 rtx orig_op1, HOST_WIDE_INT addend)
950{
951  rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns;
952  rtx orig_op0 = op0;
953  HOST_WIDE_INT addend_lo, addend_hi;
954
955  switch (tls_kind)
956    {
957    case TLS_MODEL_GLOBAL_DYNAMIC:
958      start_sequence ();
959
960      tga_op1 = gen_reg_rtx (Pmode);
961      emit_insn (gen_load_dtpmod (tga_op1, op1));
962
963      tga_op2 = gen_reg_rtx (Pmode);
964      emit_insn (gen_load_dtprel (tga_op2, op1));
965
966      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
967					 LCT_CONST, Pmode, 2, tga_op1,
968					 Pmode, tga_op2, Pmode);
969
970      insns = get_insns ();
971      end_sequence ();
972
973      if (GET_MODE (op0) != Pmode)
974	op0 = tga_ret;
975      emit_libcall_block (insns, op0, tga_ret, op1);
976      break;
977
978    case TLS_MODEL_LOCAL_DYNAMIC:
979      /* ??? This isn't the completely proper way to do local-dynamic
980	 If the call to __tls_get_addr is used only by a single symbol,
981	 then we should (somehow) move the dtprel to the second arg
982	 to avoid the extra add.  */
983      start_sequence ();
984
985      tga_op1 = gen_reg_rtx (Pmode);
986      emit_insn (gen_load_dtpmod (tga_op1, op1));
987
988      tga_op2 = const0_rtx;
989
990      tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
991					 LCT_CONST, Pmode, 2, tga_op1,
992					 Pmode, tga_op2, Pmode);
993
994      insns = get_insns ();
995      end_sequence ();
996
997      tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
998				UNSPEC_LD_BASE);
999      tmp = gen_reg_rtx (Pmode);
1000      emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
1001
1002      if (!register_operand (op0, Pmode))
1003	op0 = gen_reg_rtx (Pmode);
1004      if (TARGET_TLS64)
1005	{
1006	  emit_insn (gen_load_dtprel (op0, op1));
1007	  emit_insn (gen_adddi3 (op0, tmp, op0));
1008	}
1009      else
1010	emit_insn (gen_add_dtprel (op0, op1, tmp));
1011      break;
1012
1013    case TLS_MODEL_INITIAL_EXEC:
1014      addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1015      addend_hi = addend - addend_lo;
1016
1017      op1 = plus_constant (op1, addend_hi);
1018      addend = addend_lo;
1019
1020      tmp = gen_reg_rtx (Pmode);
1021      emit_insn (gen_load_tprel (tmp, op1));
1022
1023      if (!register_operand (op0, Pmode))
1024	op0 = gen_reg_rtx (Pmode);
1025      emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
1026      break;
1027
1028    case TLS_MODEL_LOCAL_EXEC:
1029      if (!register_operand (op0, Pmode))
1030	op0 = gen_reg_rtx (Pmode);
1031
1032      op1 = orig_op1;
1033      addend = 0;
1034      if (TARGET_TLS64)
1035	{
1036	  emit_insn (gen_load_tprel (op0, op1));
1037	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
1038	}
1039      else
1040	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
1041      break;
1042
1043    default:
1044      gcc_unreachable ();
1045    }
1046
1047  if (addend)
1048    op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
1049			       orig_op0, 1, OPTAB_DIRECT);
1050  if (orig_op0 == op0)
1051    return NULL_RTX;
1052  if (GET_MODE (orig_op0) == Pmode)
1053    return op0;
1054  return gen_lowpart (GET_MODE (orig_op0), op0);
1055}
1056
1057rtx
1058ia64_expand_move (rtx op0, rtx op1)
1059{
1060  enum machine_mode mode = GET_MODE (op0);
1061
1062  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
1063    op1 = force_reg (mode, op1);
1064
1065  if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
1066    {
1067      HOST_WIDE_INT addend = 0;
1068      enum tls_model tls_kind;
1069      rtx sym = op1;
1070
1071      if (GET_CODE (op1) == CONST
1072	  && GET_CODE (XEXP (op1, 0)) == PLUS
1073	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
1074	{
1075	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
1076	  sym = XEXP (XEXP (op1, 0), 0);
1077	}
1078
1079      tls_kind = tls_symbolic_operand_type (sym);
1080      if (tls_kind)
1081	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
1082
1083      if (any_offset_symbol_operand (sym, mode))
1084	addend = 0;
1085      else if (aligned_offset_symbol_operand (sym, mode))
1086	{
1087	  HOST_WIDE_INT addend_lo, addend_hi;
1088
1089	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
1090	  addend_hi = addend - addend_lo;
1091
1092	  if (addend_lo != 0)
1093	    {
1094	      op1 = plus_constant (sym, addend_hi);
1095	      addend = addend_lo;
1096	    }
1097	  else
1098	    addend = 0;
1099	}
1100      else
1101	op1 = sym;
1102
1103      if (reload_completed)
1104	{
1105	  /* We really should have taken care of this offset earlier.  */
1106	  gcc_assert (addend == 0);
1107	  if (ia64_expand_load_address (op0, op1))
1108	    return NULL_RTX;
1109	}
1110
1111      if (addend)
1112	{
1113	  rtx subtarget = no_new_pseudos ? op0 : gen_reg_rtx (mode);
1114
1115	  emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
1116
1117	  op1 = expand_simple_binop (mode, PLUS, subtarget,
1118				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
1119	  if (op0 == op1)
1120	    return NULL_RTX;
1121	}
1122    }
1123
1124  return op1;
1125}
1126
1127/* Split a move from OP1 to OP0 conditional on COND.  */
1128
1129void
1130ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
1131{
1132  rtx insn, first = get_last_insn ();
1133
1134  emit_move_insn (op0, op1);
1135
1136  for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
1137    if (INSN_P (insn))
1138      PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
1139					  PATTERN (insn));
1140}
1141
1142/* Split a post-reload TImode or TFmode reference into two DImode
1143   components.  This is made extra difficult by the fact that we do
1144   not get any scratch registers to work with, because reload cannot
1145   be prevented from giving us a scratch that overlaps the register
1146   pair involved.  So instead, when addressing memory, we tweak the
1147   pointer register up and back down with POST_INCs.  Or up and not
1148   back down when we can get away with it.
1149
1150   REVERSED is true when the loads must be done in reversed order
1151   (high word first) for correctness.  DEAD is true when the pointer
1152   dies with the second insn we generate and therefore the second
1153   address must not carry a postmodify.
1154
1155   May return an insn which is to be emitted after the moves.  */
1156
1157static rtx
1158ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
1159{
1160  rtx fixup = 0;
1161
1162  switch (GET_CODE (in))
1163    {
1164    case REG:
1165      out[reversed] = gen_rtx_REG (DImode, REGNO (in));
1166      out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
1167      break;
1168
1169    case CONST_INT:
1170    case CONST_DOUBLE:
1171      /* Cannot occur reversed.  */
1172      gcc_assert (!reversed);
1173
1174      if (GET_MODE (in) != TFmode)
1175	split_double (in, &out[0], &out[1]);
1176      else
1177	/* split_double does not understand how to split a TFmode
1178	   quantity into a pair of DImode constants.  */
1179	{
1180	  REAL_VALUE_TYPE r;
1181	  unsigned HOST_WIDE_INT p[2];
1182	  long l[4];  /* TFmode is 128 bits */
1183
1184	  REAL_VALUE_FROM_CONST_DOUBLE (r, in);
1185	  real_to_target (l, &r, TFmode);
1186
1187	  if (FLOAT_WORDS_BIG_ENDIAN)
1188	    {
1189	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
1190	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
1191	    }
1192	  else
1193	    {
1194	      p[0] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
1195	      p[1] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
1196	    }
1197	  out[0] = GEN_INT (p[0]);
1198	  out[1] = GEN_INT (p[1]);
1199	}
1200      break;
1201
1202    case MEM:
1203      {
1204	rtx base = XEXP (in, 0);
1205	rtx offset;
1206
1207	switch (GET_CODE (base))
1208	  {
1209	  case REG:
1210	    if (!reversed)
1211	      {
1212		out[0] = adjust_automodify_address
1213		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1214		out[1] = adjust_automodify_address
1215		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
1216	      }
1217	    else
1218	      {
1219		/* Reversal requires a pre-increment, which can only
1220		   be done as a separate insn.  */
1221		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
1222		out[0] = adjust_automodify_address
1223		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
1224		out[1] = adjust_address (in, DImode, 0);
1225	      }
1226	    break;
1227
1228	  case POST_INC:
1229	    gcc_assert (!reversed && !dead);
1230
1231	    /* Just do the increment in two steps.  */
1232	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
1233	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
1234	    break;
1235
1236	  case POST_DEC:
1237	    gcc_assert (!reversed && !dead);
1238
1239	    /* Add 8, subtract 24.  */
1240	    base = XEXP (base, 0);
1241	    out[0] = adjust_automodify_address
1242	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1243	    out[1] = adjust_automodify_address
1244	      (in, DImode,
1245	       gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)),
1246	       8);
1247	    break;
1248
1249	  case POST_MODIFY:
1250	    gcc_assert (!reversed && !dead);
1251
1252	    /* Extract and adjust the modification.  This case is
1253	       trickier than the others, because we might have an
1254	       index register, or we might have a combined offset that
1255	       doesn't fit a signed 9-bit displacement field.  We can
1256	       assume the incoming expression is already legitimate.  */
1257	    offset = XEXP (base, 1);
1258	    base = XEXP (base, 0);
1259
1260	    out[0] = adjust_automodify_address
1261	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
1262
1263	    if (GET_CODE (XEXP (offset, 1)) == REG)
1264	      {
1265		/* Can't adjust the postmodify to match.  Emit the
1266		   original, then a separate addition insn.  */
1267		out[1] = adjust_automodify_address (in, DImode, 0, 8);
1268		fixup = gen_adddi3 (base, base, GEN_INT (-8));
1269	      }
1270	    else
1271	      {
1272		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
1273		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
1274		  {
1275		    /* Again the postmodify cannot be made to match,
1276		       but in this case it's more efficient to get rid
1277		       of the postmodify entirely and fix up with an
1278		       add insn.  */
1279		    out[1] = adjust_automodify_address (in, DImode, base, 8);
1280		    fixup = gen_adddi3
1281		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
1282		  }
1283		else
1284		  {
1285		    /* Combined offset still fits in the displacement field.
1286		       (We cannot overflow it at the high end.)  */
1287		    out[1] = adjust_automodify_address
1288		      (in, DImode, gen_rtx_POST_MODIFY
1289		       (Pmode, base, gen_rtx_PLUS
1290			(Pmode, base,
1291			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
1292		       8);
1293		  }
1294	      }
1295	    break;
1296
1297	  default:
1298	    gcc_unreachable ();
1299	  }
1300	break;
1301      }
1302
1303    default:
1304      gcc_unreachable ();
1305    }
1306
1307  return fixup;
1308}
1309
1310/* Split a TImode or TFmode move instruction after reload.
1311   This is used by *movtf_internal and *movti_internal.  */
1312void
1313ia64_split_tmode_move (rtx operands[])
1314{
1315  rtx in[2], out[2], insn;
1316  rtx fixup[2];
1317  bool dead = false;
1318  bool reversed = false;
1319
1320  /* It is possible for reload to decide to overwrite a pointer with
1321     the value it points to.  In that case we have to do the loads in
1322     the appropriate order so that the pointer is not destroyed too
1323     early.  Also we must not generate a postmodify for that second
1324     load, or rws_access_regno will die.  */
1325  if (GET_CODE (operands[1]) == MEM
1326      && reg_overlap_mentioned_p (operands[0], operands[1]))
1327    {
1328      rtx base = XEXP (operands[1], 0);
1329      while (GET_CODE (base) != REG)
1330	base = XEXP (base, 0);
1331
1332      if (REGNO (base) == REGNO (operands[0]))
1333	reversed = true;
1334      dead = true;
1335    }
1336  /* Another reason to do the moves in reversed order is if the first
1337     element of the target register pair is also the second element of
1338     the source register pair.  */
1339  if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
1340      && REGNO (operands[0]) == REGNO (operands[1]) + 1)
1341    reversed = true;
1342
1343  fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
1344  fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
1345
1346#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
1347  if (GET_CODE (EXP) == MEM						\
1348      && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
1349	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
1350	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
1351    REG_NOTES (INSN) = gen_rtx_EXPR_LIST (REG_INC,			\
1352					  XEXP (XEXP (EXP, 0), 0),	\
1353					  REG_NOTES (INSN))
1354
1355  insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
1356  MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
1357  MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
1358
1359  insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
1360  MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
1361  MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
1362
1363  if (fixup[0])
1364    emit_insn (fixup[0]);
1365  if (fixup[1])
1366    emit_insn (fixup[1]);
1367
1368#undef MAYBE_ADD_REG_INC_NOTE
1369}
1370
1371/* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
1372   through memory plus an extra GR scratch register.  Except that you can
1373   either get the first from SECONDARY_MEMORY_NEEDED or the second from
1374   SECONDARY_RELOAD_CLASS, but not both.
1375
1376   We got into problems in the first place by allowing a construct like
1377   (subreg:XF (reg:TI)), which we got from a union containing a long double.
1378   This solution attempts to prevent this situation from occurring.  When
1379   we see something like the above, we spill the inner register to memory.  */
1380
1381static rtx
1382spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode)
1383{
1384  if (GET_CODE (in) == SUBREG
1385      && GET_MODE (SUBREG_REG (in)) == TImode
1386      && GET_CODE (SUBREG_REG (in)) == REG)
1387    {
1388      rtx memt = assign_stack_temp (TImode, 16, 0);
1389      emit_move_insn (memt, SUBREG_REG (in));
1390      return adjust_address (memt, mode, 0);
1391    }
1392  else if (force && GET_CODE (in) == REG)
1393    {
1394      rtx memx = assign_stack_temp (mode, 16, 0);
1395      emit_move_insn (memx, in);
1396      return memx;
1397    }
1398  else
1399    return in;
1400}
1401
1402/* Expand the movxf or movrf pattern (MODE says which) with the given
1403   OPERANDS, returning true if the pattern should then invoke
1404   DONE.  */
1405
1406bool
1407ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[])
1408{
1409  rtx op0 = operands[0];
1410
1411  if (GET_CODE (op0) == SUBREG)
1412    op0 = SUBREG_REG (op0);
1413
1414  /* We must support XFmode loads into general registers for stdarg/vararg,
1415     unprototyped calls, and a rare case where a long double is passed as
1416     an argument after a float HFA fills the FP registers.  We split them into
1417     DImode loads for convenience.  We also need to support XFmode stores
1418     for the last case.  This case does not happen for stdarg/vararg routines,
1419     because we do a block store to memory of unnamed arguments.  */
1420
1421  if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
1422    {
1423      rtx out[2];
1424
1425      /* We're hoping to transform everything that deals with XFmode
1426	 quantities and GR registers early in the compiler.  */
1427      gcc_assert (!no_new_pseudos);
1428
1429      /* Struct to register can just use TImode instead.  */
1430      if ((GET_CODE (operands[1]) == SUBREG
1431	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
1432	  || (GET_CODE (operands[1]) == REG
1433	      && GR_REGNO_P (REGNO (operands[1]))))
1434	{
1435	  rtx op1 = operands[1];
1436
1437	  if (GET_CODE (op1) == SUBREG)
1438	    op1 = SUBREG_REG (op1);
1439	  else
1440	    op1 = gen_rtx_REG (TImode, REGNO (op1));
1441
1442	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
1443	  return true;
1444	}
1445
1446      if (GET_CODE (operands[1]) == CONST_DOUBLE)
1447	{
1448	  /* Don't word-swap when reading in the constant.  */
1449	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
1450			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
1451					   0, mode));
1452	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
1453			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
1454					   0, mode));
1455	  return true;
1456	}
1457
1458      /* If the quantity is in a register not known to be GR, spill it.  */
1459      if (register_operand (operands[1], mode))
1460	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
1461
1462      gcc_assert (GET_CODE (operands[1]) == MEM);
1463
1464      /* Don't word-swap when reading in the value.  */
1465      out[0] = gen_rtx_REG (DImode, REGNO (op0));
1466      out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
1467
1468      emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
1469      emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
1470      return true;
1471    }
1472
1473  if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
1474    {
1475      /* We're hoping to transform everything that deals with XFmode
1476	 quantities and GR registers early in the compiler.  */
1477      gcc_assert (!no_new_pseudos);
1478
1479      /* Op0 can't be a GR_REG here, as that case is handled above.
1480	 If op0 is a register, then we spill op1, so that we now have a
1481	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
1482	 to force the spill.  */
1483      if (register_operand (operands[0], mode))
1484	{
1485	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
1486	  op1 = gen_rtx_SUBREG (mode, op1, 0);
1487	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
1488	}
1489
1490      else
1491	{
1492	  rtx in[2];
1493
1494	  gcc_assert (GET_CODE (operands[0]) == MEM);
1495
1496	  /* Don't word-swap when writing out the value.  */
1497	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
1498	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
1499
1500	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
1501	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
1502	  return true;
1503	}
1504    }
1505
1506  if (!reload_in_progress && !reload_completed)
1507    {
1508      operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
1509
1510      if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
1511	{
1512	  rtx memt, memx, in = operands[1];
1513	  if (CONSTANT_P (in))
1514	    in = validize_mem (force_const_mem (mode, in));
1515	  if (GET_CODE (in) == MEM)
1516	    memt = adjust_address (in, TImode, 0);
1517	  else
1518	    {
1519	      memt = assign_stack_temp (TImode, 16, 0);
1520	      memx = adjust_address (memt, mode, 0);
1521	      emit_move_insn (memx, in);
1522	    }
1523	  emit_move_insn (op0, memt);
1524	  return true;
1525	}
1526
1527      if (!ia64_move_ok (operands[0], operands[1]))
1528	operands[1] = force_reg (mode, operands[1]);
1529    }
1530
1531  return false;
1532}
1533
1534/* Emit comparison instruction if necessary, returning the expression
1535   that holds the compare result in the proper mode.  */
1536
1537static GTY(()) rtx cmptf_libfunc;
1538
1539rtx
1540ia64_expand_compare (enum rtx_code code, enum machine_mode mode)
1541{
1542  rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1;
1543  rtx cmp;
1544
1545  /* If we have a BImode input, then we already have a compare result, and
1546     do not need to emit another comparison.  */
1547  if (GET_MODE (op0) == BImode)
1548    {
1549      gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
1550      cmp = op0;
1551    }
1552  /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
1553     magic number as its third argument, that indicates what to do.
1554     The return value is an integer to be compared against zero.  */
1555  else if (GET_MODE (op0) == TFmode)
1556    {
1557      enum qfcmp_magic {
1558	QCMP_INV = 1,	/* Raise FP_INVALID on SNaN as a side effect.  */
1559	QCMP_UNORD = 2,
1560	QCMP_EQ = 4,
1561	QCMP_LT = 8,
1562	QCMP_GT = 16
1563      } magic;
1564      enum rtx_code ncode;
1565      rtx ret, insns;
1566
1567      gcc_assert (cmptf_libfunc && GET_MODE (op1) == TFmode);
1568      switch (code)
1569	{
1570	  /* 1 = equal, 0 = not equal.  Equality operators do
1571	     not raise FP_INVALID when given an SNaN operand.  */
1572	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
1573	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
1574	  /* isunordered() from C99.  */
1575	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
1576	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
1577	  /* Relational operators raise FP_INVALID when given
1578	     an SNaN operand.  */
1579	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
1580	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1581	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
1582	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
1583	  /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT.
1584	     Expanders for buneq etc. weuld have to be added to ia64.md
1585	     for this to be useful.  */
1586	default: gcc_unreachable ();
1587	}
1588
1589      start_sequence ();
1590
1591      ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
1592				     op0, TFmode, op1, TFmode,
1593				     GEN_INT (magic), DImode);
1594      cmp = gen_reg_rtx (BImode);
1595      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1596			      gen_rtx_fmt_ee (ncode, BImode,
1597					      ret, const0_rtx)));
1598
1599      insns = get_insns ();
1600      end_sequence ();
1601
1602      emit_libcall_block (insns, cmp, cmp,
1603			  gen_rtx_fmt_ee (code, BImode, op0, op1));
1604      code = NE;
1605    }
1606  else
1607    {
1608      cmp = gen_reg_rtx (BImode);
1609      emit_insn (gen_rtx_SET (VOIDmode, cmp,
1610			      gen_rtx_fmt_ee (code, BImode, op0, op1)));
1611      code = NE;
1612    }
1613
1614  return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx);
1615}
1616
1617/* Generate an integral vector comparison.  Return true if the condition has
1618   been reversed, and so the sense of the comparison should be inverted.  */
1619
1620static bool
1621ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode,
1622			    rtx dest, rtx op0, rtx op1)
1623{
1624  bool negate = false;
1625  rtx x;
1626
1627  /* Canonicalize the comparison to EQ, GT, GTU.  */
1628  switch (code)
1629    {
1630    case EQ:
1631    case GT:
1632    case GTU:
1633      break;
1634
1635    case NE:
1636    case LE:
1637    case LEU:
1638      code = reverse_condition (code);
1639      negate = true;
1640      break;
1641
1642    case GE:
1643    case GEU:
1644      code = reverse_condition (code);
1645      negate = true;
1646      /* FALLTHRU */
1647
1648    case LT:
1649    case LTU:
1650      code = swap_condition (code);
1651      x = op0, op0 = op1, op1 = x;
1652      break;
1653
1654    default:
1655      gcc_unreachable ();
1656    }
1657
1658  /* Unsigned parallel compare is not supported by the hardware.  Play some
1659     tricks to turn this into a signed comparison against 0.  */
1660  if (code == GTU)
1661    {
1662      switch (mode)
1663	{
1664	case V2SImode:
1665	  {
1666	    rtx t1, t2, mask;
1667
1668	    /* Perform a parallel modulo subtraction.  */
1669	    t1 = gen_reg_rtx (V2SImode);
1670	    emit_insn (gen_subv2si3 (t1, op0, op1));
1671
1672	    /* Extract the original sign bit of op0.  */
1673	    mask = GEN_INT (-0x80000000);
1674	    mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
1675	    mask = force_reg (V2SImode, mask);
1676	    t2 = gen_reg_rtx (V2SImode);
1677	    emit_insn (gen_andv2si3 (t2, op0, mask));
1678
1679	    /* XOR it back into the result of the subtraction.  This results
1680	       in the sign bit set iff we saw unsigned underflow.  */
1681	    x = gen_reg_rtx (V2SImode);
1682	    emit_insn (gen_xorv2si3 (x, t1, t2));
1683
1684	    code = GT;
1685	    op0 = x;
1686	    op1 = CONST0_RTX (mode);
1687	  }
1688	  break;
1689
1690	case V8QImode:
1691	case V4HImode:
1692	  /* Perform a parallel unsigned saturating subtraction.  */
1693	  x = gen_reg_rtx (mode);
1694	  emit_insn (gen_rtx_SET (VOIDmode, x,
1695				  gen_rtx_US_MINUS (mode, op0, op1)));
1696
1697	  code = EQ;
1698	  op0 = x;
1699	  op1 = CONST0_RTX (mode);
1700	  negate = !negate;
1701	  break;
1702
1703	default:
1704	  gcc_unreachable ();
1705	}
1706    }
1707
1708  x = gen_rtx_fmt_ee (code, mode, op0, op1);
1709  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
1710
1711  return negate;
1712}
1713
1714/* Emit an integral vector conditional move.  */
1715
1716void
1717ia64_expand_vecint_cmov (rtx operands[])
1718{
1719  enum machine_mode mode = GET_MODE (operands[0]);
1720  enum rtx_code code = GET_CODE (operands[3]);
1721  bool negate;
1722  rtx cmp, x, ot, of;
1723
1724  cmp = gen_reg_rtx (mode);
1725  negate = ia64_expand_vecint_compare (code, mode, cmp,
1726				       operands[4], operands[5]);
1727
1728  ot = operands[1+negate];
1729  of = operands[2-negate];
1730
1731  if (ot == CONST0_RTX (mode))
1732    {
1733      if (of == CONST0_RTX (mode))
1734	{
1735	  emit_move_insn (operands[0], ot);
1736	  return;
1737	}
1738
1739      x = gen_rtx_NOT (mode, cmp);
1740      x = gen_rtx_AND (mode, x, of);
1741      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1742    }
1743  else if (of == CONST0_RTX (mode))
1744    {
1745      x = gen_rtx_AND (mode, cmp, ot);
1746      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1747    }
1748  else
1749    {
1750      rtx t, f;
1751
1752      t = gen_reg_rtx (mode);
1753      x = gen_rtx_AND (mode, cmp, operands[1+negate]);
1754      emit_insn (gen_rtx_SET (VOIDmode, t, x));
1755
1756      f = gen_reg_rtx (mode);
1757      x = gen_rtx_NOT (mode, cmp);
1758      x = gen_rtx_AND (mode, x, operands[2-negate]);
1759      emit_insn (gen_rtx_SET (VOIDmode, f, x));
1760
1761      x = gen_rtx_IOR (mode, t, f);
1762      emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
1763    }
1764}
1765
1766/* Emit an integral vector min or max operation.  Return true if all done.  */
1767
1768bool
1769ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
1770			   rtx operands[])
1771{
1772  rtx xops[6];
1773
1774  /* These four combinations are supported directly.  */
1775  if (mode == V8QImode && (code == UMIN || code == UMAX))
1776    return false;
1777  if (mode == V4HImode && (code == SMIN || code == SMAX))
1778    return false;
1779
1780  /* This combination can be implemented with only saturating subtraction.  */
1781  if (mode == V4HImode && code == UMAX)
1782    {
1783      rtx x, tmp = gen_reg_rtx (mode);
1784
1785      x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
1786      emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
1787
1788      emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
1789      return true;
1790    }
1791
1792  /* Everything else implemented via vector comparisons.  */
1793  xops[0] = operands[0];
1794  xops[4] = xops[1] = operands[1];
1795  xops[5] = xops[2] = operands[2];
1796
1797  switch (code)
1798    {
1799    case UMIN:
1800      code = LTU;
1801      break;
1802    case UMAX:
1803      code = GTU;
1804      break;
1805    case SMIN:
1806      code = LT;
1807      break;
1808    case SMAX:
1809      code = GT;
1810      break;
1811    default:
1812      gcc_unreachable ();
1813    }
1814  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
1815
1816  ia64_expand_vecint_cmov (xops);
1817  return true;
1818}
1819
1820/* Emit an integral vector widening sum operations.  */
1821
1822void
1823ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
1824{
1825  rtx l, h, x, s;
1826  enum machine_mode wmode, mode;
1827  rtx (*unpack_l) (rtx, rtx, rtx);
1828  rtx (*unpack_h) (rtx, rtx, rtx);
1829  rtx (*plus) (rtx, rtx, rtx);
1830
1831  wmode = GET_MODE (operands[0]);
1832  mode = GET_MODE (operands[1]);
1833
1834  switch (mode)
1835    {
1836    case V8QImode:
1837      unpack_l = gen_unpack1_l;
1838      unpack_h = gen_unpack1_h;
1839      plus = gen_addv4hi3;
1840      break;
1841    case V4HImode:
1842      unpack_l = gen_unpack2_l;
1843      unpack_h = gen_unpack2_h;
1844      plus = gen_addv2si3;
1845      break;
1846    default:
1847      gcc_unreachable ();
1848    }
1849
1850  /* Fill in x with the sign extension of each element in op1.  */
1851  if (unsignedp)
1852    x = CONST0_RTX (mode);
1853  else
1854    {
1855      bool neg;
1856
1857      x = gen_reg_rtx (mode);
1858
1859      neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
1860					CONST0_RTX (mode));
1861      gcc_assert (!neg);
1862    }
1863
1864  l = gen_reg_rtx (wmode);
1865  h = gen_reg_rtx (wmode);
1866  s = gen_reg_rtx (wmode);
1867
1868  emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
1869  emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
1870  emit_insn (plus (s, l, operands[2]));
1871  emit_insn (plus (operands[0], h, s));
1872}
1873
1874/* Emit a signed or unsigned V8QI dot product operation.  */
1875
1876void
1877ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
1878{
1879  rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
1880
1881  /* Fill in x1 and x2 with the sign extension of each element.  */
1882  if (unsignedp)
1883    x1 = x2 = CONST0_RTX (V8QImode);
1884  else
1885    {
1886      bool neg;
1887
1888      x1 = gen_reg_rtx (V8QImode);
1889      x2 = gen_reg_rtx (V8QImode);
1890
1891      neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
1892					CONST0_RTX (V8QImode));
1893      gcc_assert (!neg);
1894      neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
1895					CONST0_RTX (V8QImode));
1896      gcc_assert (!neg);
1897    }
1898
1899  l1 = gen_reg_rtx (V4HImode);
1900  l2 = gen_reg_rtx (V4HImode);
1901  h1 = gen_reg_rtx (V4HImode);
1902  h2 = gen_reg_rtx (V4HImode);
1903
1904  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
1905  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
1906  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
1907  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
1908
1909  p1 = gen_reg_rtx (V2SImode);
1910  p2 = gen_reg_rtx (V2SImode);
1911  p3 = gen_reg_rtx (V2SImode);
1912  p4 = gen_reg_rtx (V2SImode);
1913  emit_insn (gen_pmpy2_r (p1, l1, l2));
1914  emit_insn (gen_pmpy2_l (p2, l1, l2));
1915  emit_insn (gen_pmpy2_r (p3, h1, h2));
1916  emit_insn (gen_pmpy2_l (p4, h1, h2));
1917
1918  s1 = gen_reg_rtx (V2SImode);
1919  s2 = gen_reg_rtx (V2SImode);
1920  s3 = gen_reg_rtx (V2SImode);
1921  emit_insn (gen_addv2si3 (s1, p1, p2));
1922  emit_insn (gen_addv2si3 (s2, p3, p4));
1923  emit_insn (gen_addv2si3 (s3, s1, operands[3]));
1924  emit_insn (gen_addv2si3 (operands[0], s2, s3));
1925}
1926
1927/* Emit the appropriate sequence for a call.  */
1928
1929void
1930ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
1931		  int sibcall_p)
1932{
1933  rtx insn, b0;
1934
1935  addr = XEXP (addr, 0);
1936  addr = convert_memory_address (DImode, addr);
1937  b0 = gen_rtx_REG (DImode, R_BR (0));
1938
1939  /* ??? Should do this for functions known to bind local too.  */
1940  if (TARGET_NO_PIC || TARGET_AUTO_PIC)
1941    {
1942      if (sibcall_p)
1943	insn = gen_sibcall_nogp (addr);
1944      else if (! retval)
1945	insn = gen_call_nogp (addr, b0);
1946      else
1947	insn = gen_call_value_nogp (retval, addr, b0);
1948      insn = emit_call_insn (insn);
1949    }
1950  else
1951    {
1952      if (sibcall_p)
1953	insn = gen_sibcall_gp (addr);
1954      else if (! retval)
1955	insn = gen_call_gp (addr, b0);
1956      else
1957	insn = gen_call_value_gp (retval, addr, b0);
1958      insn = emit_call_insn (insn);
1959
1960      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
1961    }
1962
1963  if (sibcall_p)
1964    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
1965}
1966
1967void
1968ia64_reload_gp (void)
1969{
1970  rtx tmp;
1971
1972  if (current_frame_info.reg_save_gp)
1973    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
1974  else
1975    {
1976      HOST_WIDE_INT offset;
1977
1978      offset = (current_frame_info.spill_cfa_off
1979	        + current_frame_info.spill_size);
1980      if (frame_pointer_needed)
1981        {
1982          tmp = hard_frame_pointer_rtx;
1983          offset = -offset;
1984        }
1985      else
1986        {
1987          tmp = stack_pointer_rtx;
1988          offset = current_frame_info.total_size - offset;
1989        }
1990
1991      if (CONST_OK_FOR_I (offset))
1992        emit_insn (gen_adddi3 (pic_offset_table_rtx,
1993			       tmp, GEN_INT (offset)));
1994      else
1995        {
1996          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
1997          emit_insn (gen_adddi3 (pic_offset_table_rtx,
1998			         pic_offset_table_rtx, tmp));
1999        }
2000
2001      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
2002    }
2003
2004  emit_move_insn (pic_offset_table_rtx, tmp);
2005}
2006
2007void
2008ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
2009		 rtx scratch_b, int noreturn_p, int sibcall_p)
2010{
2011  rtx insn;
2012  bool is_desc = false;
2013
2014  /* If we find we're calling through a register, then we're actually
2015     calling through a descriptor, so load up the values.  */
2016  if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
2017    {
2018      rtx tmp;
2019      bool addr_dead_p;
2020
2021      /* ??? We are currently constrained to *not* use peep2, because
2022	 we can legitimately change the global lifetime of the GP
2023	 (in the form of killing where previously live).  This is
2024	 because a call through a descriptor doesn't use the previous
2025	 value of the GP, while a direct call does, and we do not
2026	 commit to either form until the split here.
2027
2028	 That said, this means that we lack precise life info for
2029	 whether ADDR is dead after this call.  This is not terribly
2030	 important, since we can fix things up essentially for free
2031	 with the POST_DEC below, but it's nice to not use it when we
2032	 can immediately tell it's not necessary.  */
2033      addr_dead_p = ((noreturn_p || sibcall_p
2034		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
2035					    REGNO (addr)))
2036		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
2037
2038      /* Load the code address into scratch_b.  */
2039      tmp = gen_rtx_POST_INC (Pmode, addr);
2040      tmp = gen_rtx_MEM (Pmode, tmp);
2041      emit_move_insn (scratch_r, tmp);
2042      emit_move_insn (scratch_b, scratch_r);
2043
2044      /* Load the GP address.  If ADDR is not dead here, then we must
2045	 revert the change made above via the POST_INCREMENT.  */
2046      if (!addr_dead_p)
2047	tmp = gen_rtx_POST_DEC (Pmode, addr);
2048      else
2049	tmp = addr;
2050      tmp = gen_rtx_MEM (Pmode, tmp);
2051      emit_move_insn (pic_offset_table_rtx, tmp);
2052
2053      is_desc = true;
2054      addr = scratch_b;
2055    }
2056
2057  if (sibcall_p)
2058    insn = gen_sibcall_nogp (addr);
2059  else if (retval)
2060    insn = gen_call_value_nogp (retval, addr, retaddr);
2061  else
2062    insn = gen_call_nogp (addr, retaddr);
2063  emit_call_insn (insn);
2064
2065  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
2066    ia64_reload_gp ();
2067}
2068
2069/* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
2070
2071   This differs from the generic code in that we know about the zero-extending
2072   properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
2073   also know that ld.acq+cmpxchg.rel equals a full barrier.
2074
2075   The loop we want to generate looks like
2076
2077	cmp_reg = mem;
2078      label:
2079        old_reg = cmp_reg;
2080	new_reg = cmp_reg op val;
2081	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
2082	if (cmp_reg != old_reg)
2083	  goto label;
2084
2085   Note that we only do the plain load from memory once.  Subsequent
2086   iterations use the value loaded by the compare-and-swap pattern.  */
2087
2088void
2089ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
2090		       rtx old_dst, rtx new_dst)
2091{
2092  enum machine_mode mode = GET_MODE (mem);
2093  rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
2094  enum insn_code icode;
2095
2096  /* Special case for using fetchadd.  */
2097  if ((mode == SImode || mode == DImode)
2098      && (code == PLUS || code == MINUS)
2099      && fetchadd_operand (val, mode))
2100    {
2101      if (code == MINUS)
2102	val = GEN_INT (-INTVAL (val));
2103
2104      if (!old_dst)
2105        old_dst = gen_reg_rtx (mode);
2106
2107      emit_insn (gen_memory_barrier ());
2108
2109      if (mode == SImode)
2110	icode = CODE_FOR_fetchadd_acq_si;
2111      else
2112	icode = CODE_FOR_fetchadd_acq_di;
2113      emit_insn (GEN_FCN (icode) (old_dst, mem, val));
2114
2115      if (new_dst)
2116	{
2117	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
2118					 true, OPTAB_WIDEN);
2119	  if (new_reg != new_dst)
2120	    emit_move_insn (new_dst, new_reg);
2121	}
2122      return;
2123    }
2124
2125  /* Because of the volatile mem read, we get an ld.acq, which is the
2126     front half of the full barrier.  The end half is the cmpxchg.rel.  */
2127  gcc_assert (MEM_VOLATILE_P (mem));
2128
2129  old_reg = gen_reg_rtx (DImode);
2130  cmp_reg = gen_reg_rtx (DImode);
2131  label = gen_label_rtx ();
2132
2133  if (mode != DImode)
2134    {
2135      val = simplify_gen_subreg (DImode, val, mode, 0);
2136      emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
2137    }
2138  else
2139    emit_move_insn (cmp_reg, mem);
2140
2141  emit_label (label);
2142
2143  ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
2144  emit_move_insn (old_reg, cmp_reg);
2145  emit_move_insn (ar_ccv, cmp_reg);
2146
2147  if (old_dst)
2148    emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
2149
2150  new_reg = cmp_reg;
2151  if (code == NOT)
2152    {
2153      new_reg = expand_simple_unop (DImode, NOT, new_reg, NULL_RTX, true);
2154      code = AND;
2155    }
2156  new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
2157				 true, OPTAB_DIRECT);
2158
2159  if (mode != DImode)
2160    new_reg = gen_lowpart (mode, new_reg);
2161  if (new_dst)
2162    emit_move_insn (new_dst, new_reg);
2163
2164  switch (mode)
2165    {
2166    case QImode:  icode = CODE_FOR_cmpxchg_rel_qi;  break;
2167    case HImode:  icode = CODE_FOR_cmpxchg_rel_hi;  break;
2168    case SImode:  icode = CODE_FOR_cmpxchg_rel_si;  break;
2169    case DImode:  icode = CODE_FOR_cmpxchg_rel_di;  break;
2170    default:
2171      gcc_unreachable ();
2172    }
2173
2174  emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
2175
2176  emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
2177}
2178
2179/* Begin the assembly file.  */
2180
2181static void
2182ia64_file_start (void)
2183{
2184  /* Variable tracking should be run after all optimizations which change order
2185     of insns.  It also needs a valid CFG.  This can't be done in
2186     ia64_override_options, because flag_var_tracking is finalized after
2187     that.  */
2188  ia64_flag_var_tracking = flag_var_tracking;
2189  flag_var_tracking = 0;
2190
2191  default_file_start ();
2192  emit_safe_across_calls ();
2193}
2194
2195void
2196emit_safe_across_calls (void)
2197{
2198  unsigned int rs, re;
2199  int out_state;
2200
2201  rs = 1;
2202  out_state = 0;
2203  while (1)
2204    {
2205      while (rs < 64 && call_used_regs[PR_REG (rs)])
2206	rs++;
2207      if (rs >= 64)
2208	break;
2209      for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
2210	continue;
2211      if (out_state == 0)
2212	{
2213	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
2214	  out_state = 1;
2215	}
2216      else
2217	fputc (',', asm_out_file);
2218      if (re == rs + 1)
2219	fprintf (asm_out_file, "p%u", rs);
2220      else
2221	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
2222      rs = re + 1;
2223    }
2224  if (out_state)
2225    fputc ('\n', asm_out_file);
2226}
2227
2228/* Helper function for ia64_compute_frame_size: find an appropriate general
2229   register to spill some special register to.  SPECIAL_SPILL_MASK contains
2230   bits in GR0 to GR31 that have already been allocated by this routine.
2231   TRY_LOCALS is true if we should attempt to locate a local regnum.  */
2232
2233static int
2234find_gr_spill (int try_locals)
2235{
2236  int regno;
2237
2238  /* If this is a leaf function, first try an otherwise unused
2239     call-clobbered register.  */
2240  if (current_function_is_leaf)
2241    {
2242      for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2243	if (! regs_ever_live[regno]
2244	    && call_used_regs[regno]
2245	    && ! fixed_regs[regno]
2246	    && ! global_regs[regno]
2247	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2248	  {
2249	    current_frame_info.gr_used_mask |= 1 << regno;
2250	    return regno;
2251	  }
2252    }
2253
2254  if (try_locals)
2255    {
2256      regno = current_frame_info.n_local_regs;
2257      /* If there is a frame pointer, then we can't use loc79, because
2258	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
2259	 reg_name switching code in ia64_expand_prologue.  */
2260      if (regno < (80 - frame_pointer_needed))
2261	{
2262	  current_frame_info.n_local_regs = regno + 1;
2263	  return LOC_REG (0) + regno;
2264	}
2265    }
2266
2267  /* Failed to find a general register to spill to.  Must use stack.  */
2268  return 0;
2269}
2270
2271/* In order to make for nice schedules, we try to allocate every temporary
2272   to a different register.  We must of course stay away from call-saved,
2273   fixed, and global registers.  We must also stay away from registers
2274   allocated in current_frame_info.gr_used_mask, since those include regs
2275   used all through the prologue.
2276
2277   Any register allocated here must be used immediately.  The idea is to
2278   aid scheduling, not to solve data flow problems.  */
2279
2280static int last_scratch_gr_reg;
2281
2282static int
2283next_scratch_gr_reg (void)
2284{
2285  int i, regno;
2286
2287  for (i = 0; i < 32; ++i)
2288    {
2289      regno = (last_scratch_gr_reg + i + 1) & 31;
2290      if (call_used_regs[regno]
2291	  && ! fixed_regs[regno]
2292	  && ! global_regs[regno]
2293	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
2294	{
2295	  last_scratch_gr_reg = regno;
2296	  return regno;
2297	}
2298    }
2299
2300  /* There must be _something_ available.  */
2301  gcc_unreachable ();
2302}
2303
2304/* Helper function for ia64_compute_frame_size, called through
2305   diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
2306
2307static void
2308mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
2309{
2310  unsigned int regno = REGNO (reg);
2311  if (regno < 32)
2312    {
2313      unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
2314      for (i = 0; i < n; ++i)
2315	current_frame_info.gr_used_mask |= 1 << (regno + i);
2316    }
2317}
2318
2319/* Returns the number of bytes offset between the frame pointer and the stack
2320   pointer for the current function.  SIZE is the number of bytes of space
2321   needed for local variables.  */
2322
2323static void
2324ia64_compute_frame_size (HOST_WIDE_INT size)
2325{
2326  HOST_WIDE_INT total_size;
2327  HOST_WIDE_INT spill_size = 0;
2328  HOST_WIDE_INT extra_spill_size = 0;
2329  HOST_WIDE_INT pretend_args_size;
2330  HARD_REG_SET mask;
2331  int n_spilled = 0;
2332  int spilled_gr_p = 0;
2333  int spilled_fr_p = 0;
2334  unsigned int regno;
2335  int i;
2336
2337  if (current_frame_info.initialized)
2338    return;
2339
2340  memset (&current_frame_info, 0, sizeof current_frame_info);
2341  CLEAR_HARD_REG_SET (mask);
2342
2343  /* Don't allocate scratches to the return register.  */
2344  diddle_return_value (mark_reg_gr_used_mask, NULL);
2345
2346  /* Don't allocate scratches to the EH scratch registers.  */
2347  if (cfun->machine->ia64_eh_epilogue_sp)
2348    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
2349  if (cfun->machine->ia64_eh_epilogue_bsp)
2350    mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
2351
2352  /* Find the size of the register stack frame.  We have only 80 local
2353     registers, because we reserve 8 for the inputs and 8 for the
2354     outputs.  */
2355
2356  /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
2357     since we'll be adjusting that down later.  */
2358  regno = LOC_REG (78) + ! frame_pointer_needed;
2359  for (; regno >= LOC_REG (0); regno--)
2360    if (regs_ever_live[regno])
2361      break;
2362  current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
2363
2364  /* For functions marked with the syscall_linkage attribute, we must mark
2365     all eight input registers as in use, so that locals aren't visible to
2366     the caller.  */
2367
2368  if (cfun->machine->n_varargs > 0
2369      || lookup_attribute ("syscall_linkage",
2370			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
2371    current_frame_info.n_input_regs = 8;
2372  else
2373    {
2374      for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
2375	if (regs_ever_live[regno])
2376	  break;
2377      current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
2378    }
2379
2380  for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
2381    if (regs_ever_live[regno])
2382      break;
2383  i = regno - OUT_REG (0) + 1;
2384
2385#ifndef PROFILE_HOOK
2386  /* When -p profiling, we need one output register for the mcount argument.
2387     Likewise for -a profiling for the bb_init_func argument.  For -ax
2388     profiling, we need two output registers for the two bb_init_trace_func
2389     arguments.  */
2390  if (current_function_profile)
2391    i = MAX (i, 1);
2392#endif
2393  current_frame_info.n_output_regs = i;
2394
2395  /* ??? No rotating register support yet.  */
2396  current_frame_info.n_rotate_regs = 0;
2397
2398  /* Discover which registers need spilling, and how much room that
2399     will take.  Begin with floating point and general registers,
2400     which will always wind up on the stack.  */
2401
2402  for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
2403    if (regs_ever_live[regno] && ! call_used_regs[regno])
2404      {
2405	SET_HARD_REG_BIT (mask, regno);
2406	spill_size += 16;
2407	n_spilled += 1;
2408	spilled_fr_p = 1;
2409      }
2410
2411  for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
2412    if (regs_ever_live[regno] && ! call_used_regs[regno])
2413      {
2414	SET_HARD_REG_BIT (mask, regno);
2415	spill_size += 8;
2416	n_spilled += 1;
2417	spilled_gr_p = 1;
2418      }
2419
2420  for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
2421    if (regs_ever_live[regno] && ! call_used_regs[regno])
2422      {
2423	SET_HARD_REG_BIT (mask, regno);
2424	spill_size += 8;
2425	n_spilled += 1;
2426      }
2427
2428  /* Now come all special registers that might get saved in other
2429     general registers.  */
2430
2431  if (frame_pointer_needed)
2432    {
2433      current_frame_info.reg_fp = find_gr_spill (1);
2434      /* If we did not get a register, then we take LOC79.  This is guaranteed
2435	 to be free, even if regs_ever_live is already set, because this is
2436	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
2437	 as we don't count loc79 above.  */
2438      if (current_frame_info.reg_fp == 0)
2439	{
2440	  current_frame_info.reg_fp = LOC_REG (79);
2441	  current_frame_info.n_local_regs++;
2442	}
2443    }
2444
2445  if (! current_function_is_leaf)
2446    {
2447      /* Emit a save of BR0 if we call other functions.  Do this even
2448	 if this function doesn't return, as EH depends on this to be
2449	 able to unwind the stack.  */
2450      SET_HARD_REG_BIT (mask, BR_REG (0));
2451
2452      current_frame_info.reg_save_b0 = find_gr_spill (1);
2453      if (current_frame_info.reg_save_b0 == 0)
2454	{
2455	  extra_spill_size += 8;
2456	  n_spilled += 1;
2457	}
2458
2459      /* Similarly for ar.pfs.  */
2460      SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2461      current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2462      if (current_frame_info.reg_save_ar_pfs == 0)
2463	{
2464	  extra_spill_size += 8;
2465	  n_spilled += 1;
2466	}
2467
2468      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
2469	 registers are clobbered, so we fall back to the stack.  */
2470      current_frame_info.reg_save_gp
2471	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
2472      if (current_frame_info.reg_save_gp == 0)
2473	{
2474	  SET_HARD_REG_BIT (mask, GR_REG (1));
2475	  spill_size += 8;
2476	  n_spilled += 1;
2477	}
2478    }
2479  else
2480    {
2481      if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)])
2482	{
2483	  SET_HARD_REG_BIT (mask, BR_REG (0));
2484	  extra_spill_size += 8;
2485	  n_spilled += 1;
2486	}
2487
2488      if (regs_ever_live[AR_PFS_REGNUM])
2489	{
2490	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
2491	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
2492	  if (current_frame_info.reg_save_ar_pfs == 0)
2493	    {
2494	      extra_spill_size += 8;
2495	      n_spilled += 1;
2496	    }
2497	}
2498    }
2499
2500  /* Unwind descriptor hackery: things are most efficient if we allocate
2501     consecutive GR save registers for RP, PFS, FP in that order. However,
2502     it is absolutely critical that FP get the only hard register that's
2503     guaranteed to be free, so we allocated it first.  If all three did
2504     happen to be allocated hard regs, and are consecutive, rearrange them
2505     into the preferred order now.  */
2506  if (current_frame_info.reg_fp != 0
2507      && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1
2508      && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2)
2509    {
2510      current_frame_info.reg_save_b0 = current_frame_info.reg_fp;
2511      current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1;
2512      current_frame_info.reg_fp = current_frame_info.reg_fp + 2;
2513    }
2514
2515  /* See if we need to store the predicate register block.  */
2516  for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2517    if (regs_ever_live[regno] && ! call_used_regs[regno])
2518      break;
2519  if (regno <= PR_REG (63))
2520    {
2521      SET_HARD_REG_BIT (mask, PR_REG (0));
2522      current_frame_info.reg_save_pr = find_gr_spill (1);
2523      if (current_frame_info.reg_save_pr == 0)
2524	{
2525	  extra_spill_size += 8;
2526	  n_spilled += 1;
2527	}
2528
2529      /* ??? Mark them all as used so that register renaming and such
2530	 are free to use them.  */
2531      for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
2532	regs_ever_live[regno] = 1;
2533    }
2534
2535  /* If we're forced to use st8.spill, we're forced to save and restore
2536     ar.unat as well.  The check for existing liveness allows inline asm
2537     to touch ar.unat.  */
2538  if (spilled_gr_p || cfun->machine->n_varargs
2539      || regs_ever_live[AR_UNAT_REGNUM])
2540    {
2541      regs_ever_live[AR_UNAT_REGNUM] = 1;
2542      SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
2543      current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0);
2544      if (current_frame_info.reg_save_ar_unat == 0)
2545	{
2546	  extra_spill_size += 8;
2547	  n_spilled += 1;
2548	}
2549    }
2550
2551  if (regs_ever_live[AR_LC_REGNUM])
2552    {
2553      SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
2554      current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0);
2555      if (current_frame_info.reg_save_ar_lc == 0)
2556	{
2557	  extra_spill_size += 8;
2558	  n_spilled += 1;
2559	}
2560    }
2561
2562  /* If we have an odd number of words of pretend arguments written to
2563     the stack, then the FR save area will be unaligned.  We round the
2564     size of this area up to keep things 16 byte aligned.  */
2565  if (spilled_fr_p)
2566    pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size);
2567  else
2568    pretend_args_size = current_function_pretend_args_size;
2569
2570  total_size = (spill_size + extra_spill_size + size + pretend_args_size
2571		+ current_function_outgoing_args_size);
2572  total_size = IA64_STACK_ALIGN (total_size);
2573
2574  /* We always use the 16-byte scratch area provided by the caller, but
2575     if we are a leaf function, there's no one to which we need to provide
2576     a scratch area.  */
2577  if (current_function_is_leaf)
2578    total_size = MAX (0, total_size - 16);
2579
2580  current_frame_info.total_size = total_size;
2581  current_frame_info.spill_cfa_off = pretend_args_size - 16;
2582  current_frame_info.spill_size = spill_size;
2583  current_frame_info.extra_spill_size = extra_spill_size;
2584  COPY_HARD_REG_SET (current_frame_info.mask, mask);
2585  current_frame_info.n_spilled = n_spilled;
2586  current_frame_info.initialized = reload_completed;
2587}
2588
2589/* Compute the initial difference between the specified pair of registers.  */
2590
2591HOST_WIDE_INT
2592ia64_initial_elimination_offset (int from, int to)
2593{
2594  HOST_WIDE_INT offset;
2595
2596  ia64_compute_frame_size (get_frame_size ());
2597  switch (from)
2598    {
2599    case FRAME_POINTER_REGNUM:
2600      switch (to)
2601	{
2602	case HARD_FRAME_POINTER_REGNUM:
2603	  if (current_function_is_leaf)
2604	    offset = -current_frame_info.total_size;
2605	  else
2606	    offset = -(current_frame_info.total_size
2607		       - current_function_outgoing_args_size - 16);
2608	  break;
2609
2610	case STACK_POINTER_REGNUM:
2611	  if (current_function_is_leaf)
2612	    offset = 0;
2613	  else
2614	    offset = 16 + current_function_outgoing_args_size;
2615	  break;
2616
2617	default:
2618	  gcc_unreachable ();
2619	}
2620      break;
2621
2622    case ARG_POINTER_REGNUM:
2623      /* Arguments start above the 16 byte save area, unless stdarg
2624	 in which case we store through the 16 byte save area.  */
2625      switch (to)
2626	{
2627	case HARD_FRAME_POINTER_REGNUM:
2628	  offset = 16 - current_function_pretend_args_size;
2629	  break;
2630
2631	case STACK_POINTER_REGNUM:
2632	  offset = (current_frame_info.total_size
2633		    + 16 - current_function_pretend_args_size);
2634	  break;
2635
2636	default:
2637	  gcc_unreachable ();
2638	}
2639      break;
2640
2641    default:
2642      gcc_unreachable ();
2643    }
2644
2645  return offset;
2646}
2647
2648/* If there are more than a trivial number of register spills, we use
2649   two interleaved iterators so that we can get two memory references
2650   per insn group.
2651
2652   In order to simplify things in the prologue and epilogue expanders,
2653   we use helper functions to fix up the memory references after the
2654   fact with the appropriate offsets to a POST_MODIFY memory mode.
2655   The following data structure tracks the state of the two iterators
2656   while insns are being emitted.  */
2657
2658struct spill_fill_data
2659{
2660  rtx init_after;		/* point at which to emit initializations */
2661  rtx init_reg[2];		/* initial base register */
2662  rtx iter_reg[2];		/* the iterator registers */
2663  rtx *prev_addr[2];		/* address of last memory use */
2664  rtx prev_insn[2];		/* the insn corresponding to prev_addr */
2665  HOST_WIDE_INT prev_off[2];	/* last offset */
2666  int n_iter;			/* number of iterators in use */
2667  int next_iter;		/* next iterator to use */
2668  unsigned int save_gr_used_mask;
2669};
2670
2671static struct spill_fill_data spill_fill_data;
2672
2673static void
2674setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
2675{
2676  int i;
2677
2678  spill_fill_data.init_after = get_last_insn ();
2679  spill_fill_data.init_reg[0] = init_reg;
2680  spill_fill_data.init_reg[1] = init_reg;
2681  spill_fill_data.prev_addr[0] = NULL;
2682  spill_fill_data.prev_addr[1] = NULL;
2683  spill_fill_data.prev_insn[0] = NULL;
2684  spill_fill_data.prev_insn[1] = NULL;
2685  spill_fill_data.prev_off[0] = cfa_off;
2686  spill_fill_data.prev_off[1] = cfa_off;
2687  spill_fill_data.next_iter = 0;
2688  spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
2689
2690  spill_fill_data.n_iter = 1 + (n_spills > 2);
2691  for (i = 0; i < spill_fill_data.n_iter; ++i)
2692    {
2693      int regno = next_scratch_gr_reg ();
2694      spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
2695      current_frame_info.gr_used_mask |= 1 << regno;
2696    }
2697}
2698
2699static void
2700finish_spill_pointers (void)
2701{
2702  current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
2703}
2704
2705static rtx
2706spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
2707{
2708  int iter = spill_fill_data.next_iter;
2709  HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
2710  rtx disp_rtx = GEN_INT (disp);
2711  rtx mem;
2712
2713  if (spill_fill_data.prev_addr[iter])
2714    {
2715      if (CONST_OK_FOR_N (disp))
2716	{
2717	  *spill_fill_data.prev_addr[iter]
2718	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
2719				   gen_rtx_PLUS (DImode,
2720						 spill_fill_data.iter_reg[iter],
2721						 disp_rtx));
2722	  REG_NOTES (spill_fill_data.prev_insn[iter])
2723	    = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter],
2724				 REG_NOTES (spill_fill_data.prev_insn[iter]));
2725	}
2726      else
2727	{
2728	  /* ??? Could use register post_modify for loads.  */
2729	  if (! CONST_OK_FOR_I (disp))
2730	    {
2731	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2732	      emit_move_insn (tmp, disp_rtx);
2733	      disp_rtx = tmp;
2734	    }
2735	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2736				 spill_fill_data.iter_reg[iter], disp_rtx));
2737	}
2738    }
2739  /* Micro-optimization: if we've created a frame pointer, it's at
2740     CFA 0, which may allow the real iterator to be initialized lower,
2741     slightly increasing parallelism.  Also, if there are few saves
2742     it may eliminate the iterator entirely.  */
2743  else if (disp == 0
2744	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
2745	   && frame_pointer_needed)
2746    {
2747      mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
2748      set_mem_alias_set (mem, get_varargs_alias_set ());
2749      return mem;
2750    }
2751  else
2752    {
2753      rtx seq, insn;
2754
2755      if (disp == 0)
2756	seq = gen_movdi (spill_fill_data.iter_reg[iter],
2757			 spill_fill_data.init_reg[iter]);
2758      else
2759	{
2760	  start_sequence ();
2761
2762	  if (! CONST_OK_FOR_I (disp))
2763	    {
2764	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
2765	      emit_move_insn (tmp, disp_rtx);
2766	      disp_rtx = tmp;
2767	    }
2768
2769	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
2770				 spill_fill_data.init_reg[iter],
2771				 disp_rtx));
2772
2773	  seq = get_insns ();
2774	  end_sequence ();
2775	}
2776
2777      /* Careful for being the first insn in a sequence.  */
2778      if (spill_fill_data.init_after)
2779	insn = emit_insn_after (seq, spill_fill_data.init_after);
2780      else
2781	{
2782	  rtx first = get_insns ();
2783	  if (first)
2784	    insn = emit_insn_before (seq, first);
2785	  else
2786	    insn = emit_insn (seq);
2787	}
2788      spill_fill_data.init_after = insn;
2789
2790      /* If DISP is 0, we may or may not have a further adjustment
2791	 afterward.  If we do, then the load/store insn may be modified
2792	 to be a post-modify.  If we don't, then this copy may be
2793	 eliminated by copyprop_hardreg_forward, which makes this
2794	 insn garbage, which runs afoul of the sanity check in
2795	 propagate_one_insn.  So mark this insn as legal to delete.  */
2796      if (disp == 0)
2797	REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx,
2798					     REG_NOTES (insn));
2799    }
2800
2801  mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
2802
2803  /* ??? Not all of the spills are for varargs, but some of them are.
2804     The rest of the spills belong in an alias set of their own.  But
2805     it doesn't actually hurt to include them here.  */
2806  set_mem_alias_set (mem, get_varargs_alias_set ());
2807
2808  spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
2809  spill_fill_data.prev_off[iter] = cfa_off;
2810
2811  if (++iter >= spill_fill_data.n_iter)
2812    iter = 0;
2813  spill_fill_data.next_iter = iter;
2814
2815  return mem;
2816}
2817
2818static void
2819do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
2820	  rtx frame_reg)
2821{
2822  int iter = spill_fill_data.next_iter;
2823  rtx mem, insn;
2824
2825  mem = spill_restore_mem (reg, cfa_off);
2826  insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
2827  spill_fill_data.prev_insn[iter] = insn;
2828
2829  if (frame_reg)
2830    {
2831      rtx base;
2832      HOST_WIDE_INT off;
2833
2834      RTX_FRAME_RELATED_P (insn) = 1;
2835
2836      /* Don't even pretend that the unwind code can intuit its way
2837	 through a pair of interleaved post_modify iterators.  Just
2838	 provide the correct answer.  */
2839
2840      if (frame_pointer_needed)
2841	{
2842	  base = hard_frame_pointer_rtx;
2843	  off = - cfa_off;
2844	}
2845      else
2846	{
2847	  base = stack_pointer_rtx;
2848	  off = current_frame_info.total_size - cfa_off;
2849	}
2850
2851      REG_NOTES (insn)
2852	= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2853		gen_rtx_SET (VOIDmode,
2854			     gen_rtx_MEM (GET_MODE (reg),
2855					  plus_constant (base, off)),
2856			     frame_reg),
2857		REG_NOTES (insn));
2858    }
2859}
2860
2861static void
2862do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
2863{
2864  int iter = spill_fill_data.next_iter;
2865  rtx insn;
2866
2867  insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
2868				GEN_INT (cfa_off)));
2869  spill_fill_data.prev_insn[iter] = insn;
2870}
2871
2872/* Wrapper functions that discards the CONST_INT spill offset.  These
2873   exist so that we can give gr_spill/gr_fill the offset they need and
2874   use a consistent function interface.  */
2875
2876static rtx
2877gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2878{
2879  return gen_movdi (dest, src);
2880}
2881
2882static rtx
2883gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2884{
2885  return gen_fr_spill (dest, src);
2886}
2887
2888static rtx
2889gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
2890{
2891  return gen_fr_restore (dest, src);
2892}
2893
2894/* Called after register allocation to add any instructions needed for the
2895   prologue.  Using a prologue insn is favored compared to putting all of the
2896   instructions in output_function_prologue(), since it allows the scheduler
2897   to intermix instructions with the saves of the caller saved registers.  In
2898   some cases, it might be necessary to emit a barrier instruction as the last
2899   insn to prevent such scheduling.
2900
2901   Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
2902   so that the debug info generation code can handle them properly.
2903
2904   The register save area is layed out like so:
2905   cfa+16
2906	[ varargs spill area ]
2907	[ fr register spill area ]
2908	[ br register spill area ]
2909	[ ar register spill area ]
2910	[ pr register spill area ]
2911	[ gr register spill area ] */
2912
2913/* ??? Get inefficient code when the frame size is larger than can fit in an
2914   adds instruction.  */
2915
2916void
2917ia64_expand_prologue (void)
2918{
2919  rtx insn, ar_pfs_save_reg, ar_unat_save_reg;
2920  int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
2921  rtx reg, alt_reg;
2922
2923  ia64_compute_frame_size (get_frame_size ());
2924  last_scratch_gr_reg = 15;
2925
2926  /* If there is no epilogue, then we don't need some prologue insns.
2927     We need to avoid emitting the dead prologue insns, because flow
2928     will complain about them.  */
2929  if (optimize)
2930    {
2931      edge e;
2932      edge_iterator ei;
2933
2934      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
2935	if ((e->flags & EDGE_FAKE) == 0
2936	    && (e->flags & EDGE_FALLTHRU) != 0)
2937	  break;
2938      epilogue_p = (e != NULL);
2939    }
2940  else
2941    epilogue_p = 1;
2942
2943  /* Set the local, input, and output register names.  We need to do this
2944     for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
2945     half.  If we use in/loc/out register names, then we get assembler errors
2946     in crtn.S because there is no alloc insn or regstk directive in there.  */
2947  if (! TARGET_REG_NAMES)
2948    {
2949      int inputs = current_frame_info.n_input_regs;
2950      int locals = current_frame_info.n_local_regs;
2951      int outputs = current_frame_info.n_output_regs;
2952
2953      for (i = 0; i < inputs; i++)
2954	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
2955      for (i = 0; i < locals; i++)
2956	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
2957      for (i = 0; i < outputs; i++)
2958	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
2959    }
2960
2961  /* Set the frame pointer register name.  The regnum is logically loc79,
2962     but of course we'll not have allocated that many locals.  Rather than
2963     worrying about renumbering the existing rtxs, we adjust the name.  */
2964  /* ??? This code means that we can never use one local register when
2965     there is a frame pointer.  loc79 gets wasted in this case, as it is
2966     renamed to a register that will never be used.  See also the try_locals
2967     code in find_gr_spill.  */
2968  if (current_frame_info.reg_fp)
2969    {
2970      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
2971      reg_names[HARD_FRAME_POINTER_REGNUM]
2972	= reg_names[current_frame_info.reg_fp];
2973      reg_names[current_frame_info.reg_fp] = tmp;
2974    }
2975
2976  /* We don't need an alloc instruction if we've used no outputs or locals.  */
2977  if (current_frame_info.n_local_regs == 0
2978      && current_frame_info.n_output_regs == 0
2979      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
2980      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
2981    {
2982      /* If there is no alloc, but there are input registers used, then we
2983	 need a .regstk directive.  */
2984      current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
2985      ar_pfs_save_reg = NULL_RTX;
2986    }
2987  else
2988    {
2989      current_frame_info.need_regstk = 0;
2990
2991      if (current_frame_info.reg_save_ar_pfs)
2992	regno = current_frame_info.reg_save_ar_pfs;
2993      else
2994	regno = next_scratch_gr_reg ();
2995      ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
2996
2997      insn = emit_insn (gen_alloc (ar_pfs_save_reg,
2998				   GEN_INT (current_frame_info.n_input_regs),
2999				   GEN_INT (current_frame_info.n_local_regs),
3000				   GEN_INT (current_frame_info.n_output_regs),
3001				   GEN_INT (current_frame_info.n_rotate_regs)));
3002      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0);
3003    }
3004
3005  /* Set up frame pointer, stack pointer, and spill iterators.  */
3006
3007  n_varargs = cfun->machine->n_varargs;
3008  setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
3009			stack_pointer_rtx, 0);
3010
3011  if (frame_pointer_needed)
3012    {
3013      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
3014      RTX_FRAME_RELATED_P (insn) = 1;
3015    }
3016
3017  if (current_frame_info.total_size != 0)
3018    {
3019      rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
3020      rtx offset;
3021
3022      if (CONST_OK_FOR_I (- current_frame_info.total_size))
3023	offset = frame_size_rtx;
3024      else
3025	{
3026	  regno = next_scratch_gr_reg ();
3027	  offset = gen_rtx_REG (DImode, regno);
3028	  emit_move_insn (offset, frame_size_rtx);
3029	}
3030
3031      insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
3032				    stack_pointer_rtx, offset));
3033
3034      if (! frame_pointer_needed)
3035	{
3036	  RTX_FRAME_RELATED_P (insn) = 1;
3037	  if (GET_CODE (offset) != CONST_INT)
3038	    {
3039	      REG_NOTES (insn)
3040		= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3041			gen_rtx_SET (VOIDmode,
3042				     stack_pointer_rtx,
3043				     gen_rtx_PLUS (DImode,
3044						   stack_pointer_rtx,
3045						   frame_size_rtx)),
3046			REG_NOTES (insn));
3047	    }
3048	}
3049
3050      /* ??? At this point we must generate a magic insn that appears to
3051	 modify the stack pointer, the frame pointer, and all spill
3052	 iterators.  This would allow the most scheduling freedom.  For
3053	 now, just hard stop.  */
3054      emit_insn (gen_blockage ());
3055    }
3056
3057  /* Must copy out ar.unat before doing any integer spills.  */
3058  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3059    {
3060      if (current_frame_info.reg_save_ar_unat)
3061	ar_unat_save_reg
3062	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3063      else
3064	{
3065	  alt_regno = next_scratch_gr_reg ();
3066	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3067	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3068	}
3069
3070      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3071      insn = emit_move_insn (ar_unat_save_reg, reg);
3072      RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0);
3073
3074      /* Even if we're not going to generate an epilogue, we still
3075	 need to save the register so that EH works.  */
3076      if (! epilogue_p && current_frame_info.reg_save_ar_unat)
3077	emit_insn (gen_prologue_use (ar_unat_save_reg));
3078    }
3079  else
3080    ar_unat_save_reg = NULL_RTX;
3081
3082  /* Spill all varargs registers.  Do this before spilling any GR registers,
3083     since we want the UNAT bits for the GR registers to override the UNAT
3084     bits from varargs, which we don't care about.  */
3085
3086  cfa_off = -16;
3087  for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
3088    {
3089      reg = gen_rtx_REG (DImode, regno);
3090      do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
3091    }
3092
3093  /* Locate the bottom of the register save area.  */
3094  cfa_off = (current_frame_info.spill_cfa_off
3095	     + current_frame_info.spill_size
3096	     + current_frame_info.extra_spill_size);
3097
3098  /* Save the predicate register block either in a register or in memory.  */
3099  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3100    {
3101      reg = gen_rtx_REG (DImode, PR_REG (0));
3102      if (current_frame_info.reg_save_pr != 0)
3103	{
3104	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3105	  insn = emit_move_insn (alt_reg, reg);
3106
3107	  /* ??? Denote pr spill/fill by a DImode move that modifies all
3108	     64 hard registers.  */
3109	  RTX_FRAME_RELATED_P (insn) = 1;
3110	  REG_NOTES (insn)
3111	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3112			gen_rtx_SET (VOIDmode, alt_reg, reg),
3113			REG_NOTES (insn));
3114
3115	  /* Even if we're not going to generate an epilogue, we still
3116	     need to save the register so that EH works.  */
3117	  if (! epilogue_p)
3118	    emit_insn (gen_prologue_use (alt_reg));
3119	}
3120      else
3121	{
3122	  alt_regno = next_scratch_gr_reg ();
3123	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3124	  insn = emit_move_insn (alt_reg, reg);
3125	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3126	  cfa_off -= 8;
3127	}
3128    }
3129
3130  /* Handle AR regs in numerical order.  All of them get special handling.  */
3131  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
3132      && current_frame_info.reg_save_ar_unat == 0)
3133    {
3134      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3135      do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
3136      cfa_off -= 8;
3137    }
3138
3139  /* The alloc insn already copied ar.pfs into a general register.  The
3140     only thing we have to do now is copy that register to a stack slot
3141     if we'd not allocated a local register for the job.  */
3142  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
3143      && current_frame_info.reg_save_ar_pfs == 0)
3144    {
3145      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3146      do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
3147      cfa_off -= 8;
3148    }
3149
3150  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3151    {
3152      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3153      if (current_frame_info.reg_save_ar_lc != 0)
3154	{
3155	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3156	  insn = emit_move_insn (alt_reg, reg);
3157	  RTX_FRAME_RELATED_P (insn) = 1;
3158
3159	  /* Even if we're not going to generate an epilogue, we still
3160	     need to save the register so that EH works.  */
3161	  if (! epilogue_p)
3162	    emit_insn (gen_prologue_use (alt_reg));
3163	}
3164      else
3165	{
3166	  alt_regno = next_scratch_gr_reg ();
3167	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3168	  emit_move_insn (alt_reg, reg);
3169	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3170	  cfa_off -= 8;
3171	}
3172    }
3173
3174  /* Save the return pointer.  */
3175  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3176    {
3177      reg = gen_rtx_REG (DImode, BR_REG (0));
3178      if (current_frame_info.reg_save_b0 != 0)
3179	{
3180	  alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3181	  insn = emit_move_insn (alt_reg, reg);
3182	  RTX_FRAME_RELATED_P (insn) = 1;
3183
3184	  /* Even if we're not going to generate an epilogue, we still
3185	     need to save the register so that EH works.  */
3186	  if (! epilogue_p)
3187	    emit_insn (gen_prologue_use (alt_reg));
3188	}
3189      else
3190	{
3191	  alt_regno = next_scratch_gr_reg ();
3192	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3193	  emit_move_insn (alt_reg, reg);
3194	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3195	  cfa_off -= 8;
3196	}
3197    }
3198
3199  if (current_frame_info.reg_save_gp)
3200    {
3201      insn = emit_move_insn (gen_rtx_REG (DImode,
3202					  current_frame_info.reg_save_gp),
3203			     pic_offset_table_rtx);
3204      /* We don't know for sure yet if this is actually needed, since
3205	 we've not split the PIC call patterns.  If all of the calls
3206	 are indirect, and not followed by any uses of the gp, then
3207	 this save is dead.  Allow it to go away.  */
3208      REG_NOTES (insn)
3209	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
3210    }
3211
3212  /* We should now be at the base of the gr/br/fr spill area.  */
3213  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3214			  + current_frame_info.spill_size));
3215
3216  /* Spill all general registers.  */
3217  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3218    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3219      {
3220	reg = gen_rtx_REG (DImode, regno);
3221	do_spill (gen_gr_spill, reg, cfa_off, reg);
3222	cfa_off -= 8;
3223      }
3224
3225  /* Spill the rest of the BR registers.  */
3226  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3227    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3228      {
3229	alt_regno = next_scratch_gr_reg ();
3230	alt_reg = gen_rtx_REG (DImode, alt_regno);
3231	reg = gen_rtx_REG (DImode, regno);
3232	emit_move_insn (alt_reg, reg);
3233	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
3234	cfa_off -= 8;
3235      }
3236
3237  /* Align the frame and spill all FR registers.  */
3238  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3239    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3240      {
3241        gcc_assert (!(cfa_off & 15));
3242	reg = gen_rtx_REG (XFmode, regno);
3243	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
3244	cfa_off -= 16;
3245      }
3246
3247  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3248
3249  finish_spill_pointers ();
3250}
3251
3252/* Called after register allocation to add any instructions needed for the
3253   epilogue.  Using an epilogue insn is favored compared to putting all of the
3254   instructions in output_function_prologue(), since it allows the scheduler
3255   to intermix instructions with the saves of the caller saved registers.  In
3256   some cases, it might be necessary to emit a barrier instruction as the last
3257   insn to prevent such scheduling.  */
3258
3259void
3260ia64_expand_epilogue (int sibcall_p)
3261{
3262  rtx insn, reg, alt_reg, ar_unat_save_reg;
3263  int regno, alt_regno, cfa_off;
3264
3265  ia64_compute_frame_size (get_frame_size ());
3266
3267  /* If there is a frame pointer, then we use it instead of the stack
3268     pointer, so that the stack pointer does not need to be valid when
3269     the epilogue starts.  See EXIT_IGNORE_STACK.  */
3270  if (frame_pointer_needed)
3271    setup_spill_pointers (current_frame_info.n_spilled,
3272			  hard_frame_pointer_rtx, 0);
3273  else
3274    setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
3275			  current_frame_info.total_size);
3276
3277  if (current_frame_info.total_size != 0)
3278    {
3279      /* ??? At this point we must generate a magic insn that appears to
3280         modify the spill iterators and the frame pointer.  This would
3281	 allow the most scheduling freedom.  For now, just hard stop.  */
3282      emit_insn (gen_blockage ());
3283    }
3284
3285  /* Locate the bottom of the register save area.  */
3286  cfa_off = (current_frame_info.spill_cfa_off
3287	     + current_frame_info.spill_size
3288	     + current_frame_info.extra_spill_size);
3289
3290  /* Restore the predicate registers.  */
3291  if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
3292    {
3293      if (current_frame_info.reg_save_pr != 0)
3294	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr);
3295      else
3296	{
3297	  alt_regno = next_scratch_gr_reg ();
3298	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3299	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3300	  cfa_off -= 8;
3301	}
3302      reg = gen_rtx_REG (DImode, PR_REG (0));
3303      emit_move_insn (reg, alt_reg);
3304    }
3305
3306  /* Restore the application registers.  */
3307
3308  /* Load the saved unat from the stack, but do not restore it until
3309     after the GRs have been restored.  */
3310  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3311    {
3312      if (current_frame_info.reg_save_ar_unat != 0)
3313        ar_unat_save_reg
3314	  = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat);
3315      else
3316	{
3317	  alt_regno = next_scratch_gr_reg ();
3318	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
3319	  current_frame_info.gr_used_mask |= 1 << alt_regno;
3320	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
3321	  cfa_off -= 8;
3322	}
3323    }
3324  else
3325    ar_unat_save_reg = NULL_RTX;
3326
3327  if (current_frame_info.reg_save_ar_pfs != 0)
3328    {
3329      alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs);
3330      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3331      emit_move_insn (reg, alt_reg);
3332    }
3333  else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
3334    {
3335      alt_regno = next_scratch_gr_reg ();
3336      alt_reg = gen_rtx_REG (DImode, alt_regno);
3337      do_restore (gen_movdi_x, alt_reg, cfa_off);
3338      cfa_off -= 8;
3339      reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
3340      emit_move_insn (reg, alt_reg);
3341    }
3342
3343  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
3344    {
3345      if (current_frame_info.reg_save_ar_lc != 0)
3346	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc);
3347      else
3348	{
3349	  alt_regno = next_scratch_gr_reg ();
3350	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3351	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3352	  cfa_off -= 8;
3353	}
3354      reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
3355      emit_move_insn (reg, alt_reg);
3356    }
3357
3358  /* Restore the return pointer.  */
3359  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3360    {
3361      if (current_frame_info.reg_save_b0 != 0)
3362	alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3363      else
3364	{
3365	  alt_regno = next_scratch_gr_reg ();
3366	  alt_reg = gen_rtx_REG (DImode, alt_regno);
3367	  do_restore (gen_movdi_x, alt_reg, cfa_off);
3368	  cfa_off -= 8;
3369	}
3370      reg = gen_rtx_REG (DImode, BR_REG (0));
3371      emit_move_insn (reg, alt_reg);
3372    }
3373
3374  /* We should now be at the base of the gr/br/fr spill area.  */
3375  gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
3376			  + current_frame_info.spill_size));
3377
3378  /* The GP may be stored on the stack in the prologue, but it's
3379     never restored in the epilogue.  Skip the stack slot.  */
3380  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
3381    cfa_off -= 8;
3382
3383  /* Restore all general registers.  */
3384  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
3385    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3386      {
3387	reg = gen_rtx_REG (DImode, regno);
3388	do_restore (gen_gr_restore, reg, cfa_off);
3389	cfa_off -= 8;
3390      }
3391
3392  /* Restore the branch registers.  */
3393  for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
3394    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3395      {
3396	alt_regno = next_scratch_gr_reg ();
3397	alt_reg = gen_rtx_REG (DImode, alt_regno);
3398	do_restore (gen_movdi_x, alt_reg, cfa_off);
3399	cfa_off -= 8;
3400	reg = gen_rtx_REG (DImode, regno);
3401	emit_move_insn (reg, alt_reg);
3402      }
3403
3404  /* Restore floating point registers.  */
3405  for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
3406    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3407      {
3408        gcc_assert (!(cfa_off & 15));
3409	reg = gen_rtx_REG (XFmode, regno);
3410	do_restore (gen_fr_restore_x, reg, cfa_off);
3411	cfa_off -= 16;
3412      }
3413
3414  /* Restore ar.unat for real.  */
3415  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
3416    {
3417      reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
3418      emit_move_insn (reg, ar_unat_save_reg);
3419    }
3420
3421  gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
3422
3423  finish_spill_pointers ();
3424
3425  if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp)
3426    {
3427      /* ??? At this point we must generate a magic insn that appears to
3428         modify the spill iterators, the stack pointer, and the frame
3429	 pointer.  This would allow the most scheduling freedom.  For now,
3430	 just hard stop.  */
3431      emit_insn (gen_blockage ());
3432    }
3433
3434  if (cfun->machine->ia64_eh_epilogue_sp)
3435    emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
3436  else if (frame_pointer_needed)
3437    {
3438      insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
3439      RTX_FRAME_RELATED_P (insn) = 1;
3440    }
3441  else if (current_frame_info.total_size)
3442    {
3443      rtx offset, frame_size_rtx;
3444
3445      frame_size_rtx = GEN_INT (current_frame_info.total_size);
3446      if (CONST_OK_FOR_I (current_frame_info.total_size))
3447	offset = frame_size_rtx;
3448      else
3449	{
3450	  regno = next_scratch_gr_reg ();
3451	  offset = gen_rtx_REG (DImode, regno);
3452	  emit_move_insn (offset, frame_size_rtx);
3453	}
3454
3455      insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
3456				    offset));
3457
3458      RTX_FRAME_RELATED_P (insn) = 1;
3459      if (GET_CODE (offset) != CONST_INT)
3460	{
3461	  REG_NOTES (insn)
3462	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
3463			gen_rtx_SET (VOIDmode,
3464				     stack_pointer_rtx,
3465				     gen_rtx_PLUS (DImode,
3466						   stack_pointer_rtx,
3467						   frame_size_rtx)),
3468			REG_NOTES (insn));
3469	}
3470    }
3471
3472  if (cfun->machine->ia64_eh_epilogue_bsp)
3473    emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
3474
3475  if (! sibcall_p)
3476    emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
3477  else
3478    {
3479      int fp = GR_REG (2);
3480      /* We need a throw away register here, r0 and r1 are reserved, so r2 is the
3481	 first available call clobbered register.  If there was a frame_pointer
3482	 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM,
3483	 so we have to make sure we're using the string "r2" when emitting
3484	 the register name for the assembler.  */
3485      if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2))
3486	fp = HARD_FRAME_POINTER_REGNUM;
3487
3488      /* We must emit an alloc to force the input registers to become output
3489	 registers.  Otherwise, if the callee tries to pass its parameters
3490	 through to another call without an intervening alloc, then these
3491	 values get lost.  */
3492      /* ??? We don't need to preserve all input registers.  We only need to
3493	 preserve those input registers used as arguments to the sibling call.
3494	 It is unclear how to compute that number here.  */
3495      if (current_frame_info.n_input_regs != 0)
3496	{
3497	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
3498	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
3499				const0_rtx, const0_rtx,
3500				n_inputs, const0_rtx));
3501	  RTX_FRAME_RELATED_P (insn) = 1;
3502	}
3503    }
3504}
3505
3506/* Return 1 if br.ret can do all the work required to return from a
3507   function.  */
3508
3509int
3510ia64_direct_return (void)
3511{
3512  if (reload_completed && ! frame_pointer_needed)
3513    {
3514      ia64_compute_frame_size (get_frame_size ());
3515
3516      return (current_frame_info.total_size == 0
3517	      && current_frame_info.n_spilled == 0
3518	      && current_frame_info.reg_save_b0 == 0
3519	      && current_frame_info.reg_save_pr == 0
3520	      && current_frame_info.reg_save_ar_pfs == 0
3521	      && current_frame_info.reg_save_ar_unat == 0
3522	      && current_frame_info.reg_save_ar_lc == 0);
3523    }
3524  return 0;
3525}
3526
3527/* Return the magic cookie that we use to hold the return address
3528   during early compilation.  */
3529
3530rtx
3531ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
3532{
3533  if (count != 0)
3534    return NULL;
3535  return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
3536}
3537
3538/* Split this value after reload, now that we know where the return
3539   address is saved.  */
3540
3541void
3542ia64_split_return_addr_rtx (rtx dest)
3543{
3544  rtx src;
3545
3546  if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
3547    {
3548      if (current_frame_info.reg_save_b0 != 0)
3549	src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0);
3550      else
3551	{
3552	  HOST_WIDE_INT off;
3553	  unsigned int regno;
3554
3555	  /* Compute offset from CFA for BR0.  */
3556	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
3557	  off = (current_frame_info.spill_cfa_off
3558		 + current_frame_info.spill_size);
3559	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
3560	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
3561	      off -= 8;
3562
3563	  /* Convert CFA offset to a register based offset.  */
3564	  if (frame_pointer_needed)
3565	    src = hard_frame_pointer_rtx;
3566	  else
3567	    {
3568	      src = stack_pointer_rtx;
3569	      off += current_frame_info.total_size;
3570	    }
3571
3572	  /* Load address into scratch register.  */
3573	  if (CONST_OK_FOR_I (off))
3574	    emit_insn (gen_adddi3 (dest, src, GEN_INT (off)));
3575	  else
3576	    {
3577	      emit_move_insn (dest, GEN_INT (off));
3578	      emit_insn (gen_adddi3 (dest, src, dest));
3579	    }
3580
3581	  src = gen_rtx_MEM (Pmode, dest);
3582	}
3583    }
3584  else
3585    src = gen_rtx_REG (DImode, BR_REG (0));
3586
3587  emit_move_insn (dest, src);
3588}
3589
3590int
3591ia64_hard_regno_rename_ok (int from, int to)
3592{
3593  /* Don't clobber any of the registers we reserved for the prologue.  */
3594  if (to == current_frame_info.reg_fp
3595      || to == current_frame_info.reg_save_b0
3596      || to == current_frame_info.reg_save_pr
3597      || to == current_frame_info.reg_save_ar_pfs
3598      || to == current_frame_info.reg_save_ar_unat
3599      || to == current_frame_info.reg_save_ar_lc)
3600    return 0;
3601
3602  if (from == current_frame_info.reg_fp
3603      || from == current_frame_info.reg_save_b0
3604      || from == current_frame_info.reg_save_pr
3605      || from == current_frame_info.reg_save_ar_pfs
3606      || from == current_frame_info.reg_save_ar_unat
3607      || from == current_frame_info.reg_save_ar_lc)
3608    return 0;
3609
3610  /* Don't use output registers outside the register frame.  */
3611  if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
3612    return 0;
3613
3614  /* Retain even/oddness on predicate register pairs.  */
3615  if (PR_REGNO_P (from) && PR_REGNO_P (to))
3616    return (from & 1) == (to & 1);
3617
3618  return 1;
3619}
3620
3621/* Target hook for assembling integer objects.  Handle word-sized
3622   aligned objects and detect the cases when @fptr is needed.  */
3623
3624static bool
3625ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
3626{
3627  if (size == POINTER_SIZE / BITS_PER_UNIT
3628      && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
3629      && GET_CODE (x) == SYMBOL_REF
3630      && SYMBOL_REF_FUNCTION_P (x))
3631    {
3632      static const char * const directive[2][2] = {
3633	  /* 64-bit pointer */  /* 32-bit pointer */
3634	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
3635	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
3636      };
3637      fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
3638      output_addr_const (asm_out_file, x);
3639      fputs (")\n", asm_out_file);
3640      return true;
3641    }
3642  return default_assemble_integer (x, size, aligned_p);
3643}
3644
3645/* Emit the function prologue.  */
3646
3647static void
3648ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3649{
3650  int mask, grsave, grsave_prev;
3651
3652  if (current_frame_info.need_regstk)
3653    fprintf (file, "\t.regstk %d, %d, %d, %d\n",
3654	     current_frame_info.n_input_regs,
3655	     current_frame_info.n_local_regs,
3656	     current_frame_info.n_output_regs,
3657	     current_frame_info.n_rotate_regs);
3658
3659  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3660    return;
3661
3662  /* Emit the .prologue directive.  */
3663
3664  mask = 0;
3665  grsave = grsave_prev = 0;
3666  if (current_frame_info.reg_save_b0 != 0)
3667    {
3668      mask |= 8;
3669      grsave = grsave_prev = current_frame_info.reg_save_b0;
3670    }
3671  if (current_frame_info.reg_save_ar_pfs != 0
3672      && (grsave_prev == 0
3673	  || current_frame_info.reg_save_ar_pfs == grsave_prev + 1))
3674    {
3675      mask |= 4;
3676      if (grsave_prev == 0)
3677	grsave = current_frame_info.reg_save_ar_pfs;
3678      grsave_prev = current_frame_info.reg_save_ar_pfs;
3679    }
3680  if (current_frame_info.reg_fp != 0
3681      && (grsave_prev == 0
3682	  || current_frame_info.reg_fp == grsave_prev + 1))
3683    {
3684      mask |= 2;
3685      if (grsave_prev == 0)
3686	grsave = HARD_FRAME_POINTER_REGNUM;
3687      grsave_prev = current_frame_info.reg_fp;
3688    }
3689  if (current_frame_info.reg_save_pr != 0
3690      && (grsave_prev == 0
3691	  || current_frame_info.reg_save_pr == grsave_prev + 1))
3692    {
3693      mask |= 1;
3694      if (grsave_prev == 0)
3695	grsave = current_frame_info.reg_save_pr;
3696    }
3697
3698  if (mask && TARGET_GNU_AS)
3699    fprintf (file, "\t.prologue %d, %d\n", mask,
3700	     ia64_dbx_register_number (grsave));
3701  else
3702    fputs ("\t.prologue\n", file);
3703
3704  /* Emit a .spill directive, if necessary, to relocate the base of
3705     the register spill area.  */
3706  if (current_frame_info.spill_cfa_off != -16)
3707    fprintf (file, "\t.spill %ld\n",
3708	     (long) (current_frame_info.spill_cfa_off
3709		     + current_frame_info.spill_size));
3710}
3711
3712/* Emit the .body directive at the scheduled end of the prologue.  */
3713
3714static void
3715ia64_output_function_end_prologue (FILE *file)
3716{
3717  if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS))
3718    return;
3719
3720  fputs ("\t.body\n", file);
3721}
3722
3723/* Emit the function epilogue.  */
3724
3725static void
3726ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
3727			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
3728{
3729  int i;
3730
3731  if (current_frame_info.reg_fp)
3732    {
3733      const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
3734      reg_names[HARD_FRAME_POINTER_REGNUM]
3735	= reg_names[current_frame_info.reg_fp];
3736      reg_names[current_frame_info.reg_fp] = tmp;
3737    }
3738  if (! TARGET_REG_NAMES)
3739    {
3740      for (i = 0; i < current_frame_info.n_input_regs; i++)
3741	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
3742      for (i = 0; i < current_frame_info.n_local_regs; i++)
3743	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
3744      for (i = 0; i < current_frame_info.n_output_regs; i++)
3745	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
3746    }
3747
3748  current_frame_info.initialized = 0;
3749}
3750
3751int
3752ia64_dbx_register_number (int regno)
3753{
3754  /* In ia64_expand_prologue we quite literally renamed the frame pointer
3755     from its home at loc79 to something inside the register frame.  We
3756     must perform the same renumbering here for the debug info.  */
3757  if (current_frame_info.reg_fp)
3758    {
3759      if (regno == HARD_FRAME_POINTER_REGNUM)
3760	regno = current_frame_info.reg_fp;
3761      else if (regno == current_frame_info.reg_fp)
3762	regno = HARD_FRAME_POINTER_REGNUM;
3763    }
3764
3765  if (IN_REGNO_P (regno))
3766    return 32 + regno - IN_REG (0);
3767  else if (LOC_REGNO_P (regno))
3768    return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
3769  else if (OUT_REGNO_P (regno))
3770    return (32 + current_frame_info.n_input_regs
3771	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
3772  else
3773    return regno;
3774}
3775
3776void
3777ia64_initialize_trampoline (rtx addr, rtx fnaddr, rtx static_chain)
3778{
3779  rtx addr_reg, eight = GEN_INT (8);
3780
3781  /* The Intel assembler requires that the global __ia64_trampoline symbol
3782     be declared explicitly */
3783  if (!TARGET_GNU_AS)
3784    {
3785      static bool declared_ia64_trampoline = false;
3786
3787      if (!declared_ia64_trampoline)
3788	{
3789	  declared_ia64_trampoline = true;
3790	  (*targetm.asm_out.globalize_label) (asm_out_file,
3791					      "__ia64_trampoline");
3792	}
3793    }
3794
3795  /* Make sure addresses are Pmode even if we are in ILP32 mode. */
3796  addr = convert_memory_address (Pmode, addr);
3797  fnaddr = convert_memory_address (Pmode, fnaddr);
3798  static_chain = convert_memory_address (Pmode, static_chain);
3799
3800  /* Load up our iterator.  */
3801  addr_reg = gen_reg_rtx (Pmode);
3802  emit_move_insn (addr_reg, addr);
3803
3804  /* The first two words are the fake descriptor:
3805     __ia64_trampoline, ADDR+16.  */
3806  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3807		  gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"));
3808  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3809
3810  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg),
3811		  copy_to_reg (plus_constant (addr, 16)));
3812  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3813
3814  /* The third word is the target descriptor.  */
3815  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr);
3816  emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
3817
3818  /* The fourth word is the static chain.  */
3819  emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain);
3820}
3821
3822/* Do any needed setup for a variadic function.  CUM has not been updated
3823   for the last named argument which has type TYPE and mode MODE.
3824
3825   We generate the actual spill instructions during prologue generation.  */
3826
3827static void
3828ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3829			     tree type, int * pretend_size,
3830			     int second_time ATTRIBUTE_UNUSED)
3831{
3832  CUMULATIVE_ARGS next_cum = *cum;
3833
3834  /* Skip the current argument.  */
3835  ia64_function_arg_advance (&next_cum, mode, type, 1);
3836
3837  if (next_cum.words < MAX_ARGUMENT_SLOTS)
3838    {
3839      int n = MAX_ARGUMENT_SLOTS - next_cum.words;
3840      *pretend_size = n * UNITS_PER_WORD;
3841      cfun->machine->n_varargs = n;
3842    }
3843}
3844
3845/* Check whether TYPE is a homogeneous floating point aggregate.  If
3846   it is, return the mode of the floating point type that appears
3847   in all leafs.  If it is not, return VOIDmode.
3848
3849   An aggregate is a homogeneous floating point aggregate is if all
3850   fields/elements in it have the same floating point type (e.g,
3851   SFmode).  128-bit quad-precision floats are excluded.
3852
3853   Variable sized aggregates should never arrive here, since we should
3854   have already decided to pass them by reference.  Top-level zero-sized
3855   aggregates are excluded because our parallels crash the middle-end.  */
3856
3857static enum machine_mode
3858hfa_element_mode (tree type, bool nested)
3859{
3860  enum machine_mode element_mode = VOIDmode;
3861  enum machine_mode mode;
3862  enum tree_code code = TREE_CODE (type);
3863  int know_element_mode = 0;
3864  tree t;
3865
3866  if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
3867    return VOIDmode;
3868
3869  switch (code)
3870    {
3871    case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
3872    case BOOLEAN_TYPE:	case POINTER_TYPE:
3873    case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
3874    case LANG_TYPE:		case FUNCTION_TYPE:
3875      return VOIDmode;
3876
3877      /* Fortran complex types are supposed to be HFAs, so we need to handle
3878	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
3879	 types though.  */
3880    case COMPLEX_TYPE:
3881      if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
3882	  && TYPE_MODE (type) != TCmode)
3883	return GET_MODE_INNER (TYPE_MODE (type));
3884      else
3885	return VOIDmode;
3886
3887    case REAL_TYPE:
3888      /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
3889	 mode if this is contained within an aggregate.  */
3890      if (nested && TYPE_MODE (type) != TFmode)
3891	return TYPE_MODE (type);
3892      else
3893	return VOIDmode;
3894
3895    case ARRAY_TYPE:
3896      return hfa_element_mode (TREE_TYPE (type), 1);
3897
3898    case RECORD_TYPE:
3899    case UNION_TYPE:
3900    case QUAL_UNION_TYPE:
3901      for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t))
3902	{
3903	  if (TREE_CODE (t) != FIELD_DECL)
3904	    continue;
3905
3906	  mode = hfa_element_mode (TREE_TYPE (t), 1);
3907	  if (know_element_mode)
3908	    {
3909	      if (mode != element_mode)
3910		return VOIDmode;
3911	    }
3912	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
3913	    return VOIDmode;
3914	  else
3915	    {
3916	      know_element_mode = 1;
3917	      element_mode = mode;
3918	    }
3919	}
3920      return element_mode;
3921
3922    default:
3923      /* If we reach here, we probably have some front-end specific type
3924	 that the backend doesn't know about.  This can happen via the
3925	 aggregate_value_p call in init_function_start.  All we can do is
3926	 ignore unknown tree types.  */
3927      return VOIDmode;
3928    }
3929
3930  return VOIDmode;
3931}
3932
3933/* Return the number of words required to hold a quantity of TYPE and MODE
3934   when passed as an argument.  */
3935static int
3936ia64_function_arg_words (tree type, enum machine_mode mode)
3937{
3938  int words;
3939
3940  if (mode == BLKmode)
3941    words = int_size_in_bytes (type);
3942  else
3943    words = GET_MODE_SIZE (mode);
3944
3945  return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
3946}
3947
3948/* Return the number of registers that should be skipped so the current
3949   argument (described by TYPE and WORDS) will be properly aligned.
3950
3951   Integer and float arguments larger than 8 bytes start at the next
3952   even boundary.  Aggregates larger than 8 bytes start at the next
3953   even boundary if the aggregate has 16 byte alignment.  Note that
3954   in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
3955   but are still to be aligned in registers.
3956
3957   ??? The ABI does not specify how to handle aggregates with
3958   alignment from 9 to 15 bytes, or greater than 16.  We handle them
3959   all as if they had 16 byte alignment.  Such aggregates can occur
3960   only if gcc extensions are used.  */
3961static int
3962ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words)
3963{
3964  if ((cum->words & 1) == 0)
3965    return 0;
3966
3967  if (type
3968      && TREE_CODE (type) != INTEGER_TYPE
3969      && TREE_CODE (type) != REAL_TYPE)
3970    return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
3971  else
3972    return words > 1;
3973}
3974
3975/* Return rtx for register where argument is passed, or zero if it is passed
3976   on the stack.  */
3977/* ??? 128-bit quad-precision floats are always passed in general
3978   registers.  */
3979
3980rtx
3981ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
3982		   int named, int incoming)
3983{
3984  int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
3985  int words = ia64_function_arg_words (type, mode);
3986  int offset = ia64_function_arg_offset (cum, type, words);
3987  enum machine_mode hfa_mode = VOIDmode;
3988
3989  /* If all argument slots are used, then it must go on the stack.  */
3990  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
3991    return 0;
3992
3993  /* Check for and handle homogeneous FP aggregates.  */
3994  if (type)
3995    hfa_mode = hfa_element_mode (type, 0);
3996
3997  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
3998     and unprototyped hfas are passed specially.  */
3999  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4000    {
4001      rtx loc[16];
4002      int i = 0;
4003      int fp_regs = cum->fp_regs;
4004      int int_regs = cum->words + offset;
4005      int hfa_size = GET_MODE_SIZE (hfa_mode);
4006      int byte_size;
4007      int args_byte_size;
4008
4009      /* If prototyped, pass it in FR regs then GR regs.
4010	 If not prototyped, pass it in both FR and GR regs.
4011
4012	 If this is an SFmode aggregate, then it is possible to run out of
4013	 FR regs while GR regs are still left.  In that case, we pass the
4014	 remaining part in the GR regs.  */
4015
4016      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4017	 of the argument, the last FP register, or the last argument slot.  */
4018
4019      byte_size = ((mode == BLKmode)
4020		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4021      args_byte_size = int_regs * UNITS_PER_WORD;
4022      offset = 0;
4023      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4024	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
4025	{
4026	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4027				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
4028							      + fp_regs)),
4029				      GEN_INT (offset));
4030	  offset += hfa_size;
4031	  args_byte_size += hfa_size;
4032	  fp_regs++;
4033	}
4034
4035      /* If no prototype, then the whole thing must go in GR regs.  */
4036      if (! cum->prototype)
4037	offset = 0;
4038      /* If this is an SFmode aggregate, then we might have some left over
4039	 that needs to go in GR regs.  */
4040      else if (byte_size != offset)
4041	int_regs += offset / UNITS_PER_WORD;
4042
4043      /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
4044
4045      for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
4046	{
4047	  enum machine_mode gr_mode = DImode;
4048	  unsigned int gr_size;
4049
4050	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
4051	     then this goes in a GR reg left adjusted/little endian, right
4052	     adjusted/big endian.  */
4053	  /* ??? Currently this is handled wrong, because 4-byte hunks are
4054	     always right adjusted/little endian.  */
4055	  if (offset & 0x4)
4056	    gr_mode = SImode;
4057	  /* If we have an even 4 byte hunk because the aggregate is a
4058	     multiple of 4 bytes in size, then this goes in a GR reg right
4059	     adjusted/little endian.  */
4060	  else if (byte_size - offset == 4)
4061	    gr_mode = SImode;
4062
4063	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4064				      gen_rtx_REG (gr_mode, (basereg
4065							     + int_regs)),
4066				      GEN_INT (offset));
4067
4068	  gr_size = GET_MODE_SIZE (gr_mode);
4069	  offset += gr_size;
4070	  if (gr_size == UNITS_PER_WORD
4071	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
4072	    int_regs++;
4073	  else if (gr_size > UNITS_PER_WORD)
4074	    int_regs += gr_size / UNITS_PER_WORD;
4075	}
4076      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4077    }
4078
4079  /* Integral and aggregates go in general registers.  If we have run out of
4080     FR registers, then FP values must also go in general registers.  This can
4081     happen when we have a SFmode HFA.  */
4082  else if (mode == TFmode || mode == TCmode
4083	   || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4084    {
4085      int byte_size = ((mode == BLKmode)
4086                       ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4087      if (BYTES_BIG_ENDIAN
4088	&& (mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4089	&& byte_size < UNITS_PER_WORD
4090	&& byte_size > 0)
4091	{
4092	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4093					  gen_rtx_REG (DImode,
4094						       (basereg + cum->words
4095							+ offset)),
4096					  const0_rtx);
4097	  return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4098	}
4099      else
4100	return gen_rtx_REG (mode, basereg + cum->words + offset);
4101
4102    }
4103
4104  /* If there is a prototype, then FP values go in a FR register when
4105     named, and in a GR register when unnamed.  */
4106  else if (cum->prototype)
4107    {
4108      if (named)
4109	return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs);
4110      /* In big-endian mode, an anonymous SFmode value must be represented
4111         as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
4112	 the value into the high half of the general register.  */
4113      else if (BYTES_BIG_ENDIAN && mode == SFmode)
4114	return gen_rtx_PARALLEL (mode,
4115		 gen_rtvec (1,
4116                   gen_rtx_EXPR_LIST (VOIDmode,
4117		     gen_rtx_REG (DImode, basereg + cum->words + offset),
4118				      const0_rtx)));
4119      else
4120	return gen_rtx_REG (mode, basereg + cum->words + offset);
4121    }
4122  /* If there is no prototype, then FP values go in both FR and GR
4123     registers.  */
4124  else
4125    {
4126      /* See comment above.  */
4127      enum machine_mode inner_mode =
4128	(BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode;
4129
4130      rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
4131				      gen_rtx_REG (mode, (FR_ARG_FIRST
4132							  + cum->fp_regs)),
4133				      const0_rtx);
4134      rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4135				      gen_rtx_REG (inner_mode,
4136						   (basereg + cum->words
4137						    + offset)),
4138				      const0_rtx);
4139
4140      return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg));
4141    }
4142}
4143
4144/* Return number of bytes, at the beginning of the argument, that must be
4145   put in registers.  0 is the argument is entirely in registers or entirely
4146   in memory.  */
4147
4148static int
4149ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4150			tree type, bool named ATTRIBUTE_UNUSED)
4151{
4152  int words = ia64_function_arg_words (type, mode);
4153  int offset = ia64_function_arg_offset (cum, type, words);
4154
4155  /* If all argument slots are used, then it must go on the stack.  */
4156  if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
4157    return 0;
4158
4159  /* It doesn't matter whether the argument goes in FR or GR regs.  If
4160     it fits within the 8 argument slots, then it goes entirely in
4161     registers.  If it extends past the last argument slot, then the rest
4162     goes on the stack.  */
4163
4164  if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
4165    return 0;
4166
4167  return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
4168}
4169
4170/* Update CUM to point after this argument.  This is patterned after
4171   ia64_function_arg.  */
4172
4173void
4174ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4175			   tree type, int named)
4176{
4177  int words = ia64_function_arg_words (type, mode);
4178  int offset = ia64_function_arg_offset (cum, type, words);
4179  enum machine_mode hfa_mode = VOIDmode;
4180
4181  /* If all arg slots are already full, then there is nothing to do.  */
4182  if (cum->words >= MAX_ARGUMENT_SLOTS)
4183    return;
4184
4185  cum->words += words + offset;
4186
4187  /* Check for and handle homogeneous FP aggregates.  */
4188  if (type)
4189    hfa_mode = hfa_element_mode (type, 0);
4190
4191  /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
4192     and unprototyped hfas are passed specially.  */
4193  if (hfa_mode != VOIDmode && (! cum->prototype || named))
4194    {
4195      int fp_regs = cum->fp_regs;
4196      /* This is the original value of cum->words + offset.  */
4197      int int_regs = cum->words - words;
4198      int hfa_size = GET_MODE_SIZE (hfa_mode);
4199      int byte_size;
4200      int args_byte_size;
4201
4202      /* If prototyped, pass it in FR regs then GR regs.
4203	 If not prototyped, pass it in both FR and GR regs.
4204
4205	 If this is an SFmode aggregate, then it is possible to run out of
4206	 FR regs while GR regs are still left.  In that case, we pass the
4207	 remaining part in the GR regs.  */
4208
4209      /* Fill the FP regs.  We do this always.  We stop if we reach the end
4210	 of the argument, the last FP register, or the last argument slot.  */
4211
4212      byte_size = ((mode == BLKmode)
4213		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4214      args_byte_size = int_regs * UNITS_PER_WORD;
4215      offset = 0;
4216      for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
4217	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
4218	{
4219	  offset += hfa_size;
4220	  args_byte_size += hfa_size;
4221	  fp_regs++;
4222	}
4223
4224      cum->fp_regs = fp_regs;
4225    }
4226
4227  /* Integral and aggregates go in general registers.  So do TFmode FP values.
4228     If we have run out of FR registers, then other FP values must also go in
4229     general registers.  This can happen when we have a SFmode HFA.  */
4230  else if (mode == TFmode || mode == TCmode
4231           || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS))
4232    cum->int_regs = cum->words;
4233
4234  /* If there is a prototype, then FP values go in a FR register when
4235     named, and in a GR register when unnamed.  */
4236  else if (cum->prototype)
4237    {
4238      if (! named)
4239	cum->int_regs = cum->words;
4240      else
4241	/* ??? Complex types should not reach here.  */
4242	cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4243    }
4244  /* If there is no prototype, then FP values go in both FR and GR
4245     registers.  */
4246  else
4247    {
4248      /* ??? Complex types should not reach here.  */
4249      cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
4250      cum->int_regs = cum->words;
4251    }
4252}
4253
4254/* Arguments with alignment larger than 8 bytes start at the next even
4255   boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
4256   even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
4257
4258int
4259ia64_function_arg_boundary (enum machine_mode mode, tree type)
4260{
4261
4262  if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
4263    return PARM_BOUNDARY * 2;
4264
4265  if (type)
4266    {
4267      if (TYPE_ALIGN (type) > PARM_BOUNDARY)
4268        return PARM_BOUNDARY * 2;
4269      else
4270        return PARM_BOUNDARY;
4271    }
4272
4273  if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
4274    return PARM_BOUNDARY * 2;
4275  else
4276    return PARM_BOUNDARY;
4277}
4278
4279/* True if it is OK to do sibling call optimization for the specified
4280   call expression EXP.  DECL will be the called function, or NULL if
4281   this is an indirect call.  */
4282static bool
4283ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4284{
4285  /* We can't perform a sibcall if the current function has the syscall_linkage
4286     attribute.  */
4287  if (lookup_attribute ("syscall_linkage",
4288			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
4289    return false;
4290
4291  /* We must always return with our current GP.  This means we can
4292     only sibcall to functions defined in the current module.  */
4293  return decl && (*targetm.binds_local_p) (decl);
4294}
4295
4296
4297/* Implement va_arg.  */
4298
4299static tree
4300ia64_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4301{
4302  /* Variable sized types are passed by reference.  */
4303  if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
4304    {
4305      tree ptrtype = build_pointer_type (type);
4306      tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
4307      return build_va_arg_indirect_ref (addr);
4308    }
4309
4310  /* Aggregate arguments with alignment larger than 8 bytes start at
4311     the next even boundary.  Integer and floating point arguments
4312     do so if they are larger than 8 bytes, whether or not they are
4313     also aligned larger than 8 bytes.  */
4314  if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
4315      ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
4316    {
4317      tree t = build2 (PLUS_EXPR, TREE_TYPE (valist), valist,
4318		       build_int_cst (NULL_TREE, 2 * UNITS_PER_WORD - 1));
4319      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4320		  build_int_cst (NULL_TREE, -2 * UNITS_PER_WORD));
4321      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
4322      gimplify_and_add (t, pre_p);
4323    }
4324
4325  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4326}
4327
4328/* Return 1 if function return value returned in memory.  Return 0 if it is
4329   in a register.  */
4330
4331static bool
4332ia64_return_in_memory (tree valtype, tree fntype ATTRIBUTE_UNUSED)
4333{
4334  enum machine_mode mode;
4335  enum machine_mode hfa_mode;
4336  HOST_WIDE_INT byte_size;
4337
4338  mode = TYPE_MODE (valtype);
4339  byte_size = GET_MODE_SIZE (mode);
4340  if (mode == BLKmode)
4341    {
4342      byte_size = int_size_in_bytes (valtype);
4343      if (byte_size < 0)
4344	return true;
4345    }
4346
4347  /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
4348
4349  hfa_mode = hfa_element_mode (valtype, 0);
4350  if (hfa_mode != VOIDmode)
4351    {
4352      int hfa_size = GET_MODE_SIZE (hfa_mode);
4353
4354      if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
4355	return true;
4356      else
4357	return false;
4358    }
4359  else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
4360    return true;
4361  else
4362    return false;
4363}
4364
4365/* Return rtx for register that holds the function return value.  */
4366
4367rtx
4368ia64_function_value (tree valtype, tree func ATTRIBUTE_UNUSED)
4369{
4370  enum machine_mode mode;
4371  enum machine_mode hfa_mode;
4372
4373  mode = TYPE_MODE (valtype);
4374  hfa_mode = hfa_element_mode (valtype, 0);
4375
4376  if (hfa_mode != VOIDmode)
4377    {
4378      rtx loc[8];
4379      int i;
4380      int hfa_size;
4381      int byte_size;
4382      int offset;
4383
4384      hfa_size = GET_MODE_SIZE (hfa_mode);
4385      byte_size = ((mode == BLKmode)
4386		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
4387      offset = 0;
4388      for (i = 0; offset < byte_size; i++)
4389	{
4390	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4391				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
4392				      GEN_INT (offset));
4393	  offset += hfa_size;
4394	}
4395      return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4396    }
4397  else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
4398    return gen_rtx_REG (mode, FR_ARG_FIRST);
4399  else
4400    {
4401      bool need_parallel = false;
4402
4403      /* In big-endian mode, we need to manage the layout of aggregates
4404	 in the registers so that we get the bits properly aligned in
4405	 the highpart of the registers.  */
4406      if (BYTES_BIG_ENDIAN
4407	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
4408	need_parallel = true;
4409
4410      /* Something like struct S { long double x; char a[0] } is not an
4411	 HFA structure, and therefore doesn't go in fp registers.  But
4412	 the middle-end will give it XFmode anyway, and XFmode values
4413	 don't normally fit in integer registers.  So we need to smuggle
4414	 the value inside a parallel.  */
4415      else if (mode == XFmode || mode == XCmode || mode == RFmode)
4416	need_parallel = true;
4417
4418      if (need_parallel)
4419	{
4420	  rtx loc[8];
4421	  int offset;
4422	  int bytesize;
4423	  int i;
4424
4425	  offset = 0;
4426	  bytesize = int_size_in_bytes (valtype);
4427	  /* An empty PARALLEL is invalid here, but the return value
4428	     doesn't matter for empty structs.  */
4429	  if (bytesize == 0)
4430	    return gen_rtx_REG (mode, GR_RET_FIRST);
4431	  for (i = 0; offset < bytesize; i++)
4432	    {
4433	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
4434					  gen_rtx_REG (DImode,
4435						       GR_RET_FIRST + i),
4436					  GEN_INT (offset));
4437	      offset += UNITS_PER_WORD;
4438	    }
4439	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
4440	}
4441
4442      return gen_rtx_REG (mode, GR_RET_FIRST);
4443    }
4444}
4445
4446/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
4447   We need to emit DTP-relative relocations.  */
4448
4449static void
4450ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
4451{
4452  gcc_assert (size == 4 || size == 8);
4453  if (size == 4)
4454    fputs ("\tdata4.ua\t@dtprel(", file);
4455  else
4456    fputs ("\tdata8.ua\t@dtprel(", file);
4457  output_addr_const (file, x);
4458  fputs (")", file);
4459}
4460
4461/* Print a memory address as an operand to reference that memory location.  */
4462
4463/* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
4464   also call this from ia64_print_operand for memory addresses.  */
4465
4466void
4467ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
4468			    rtx address ATTRIBUTE_UNUSED)
4469{
4470}
4471
4472/* Print an operand to an assembler instruction.
4473   C	Swap and print a comparison operator.
4474   D	Print an FP comparison operator.
4475   E    Print 32 - constant, for SImode shifts as extract.
4476   e    Print 64 - constant, for DImode rotates.
4477   F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
4478        a floating point register emitted normally.
4479   I	Invert a predicate register by adding 1.
4480   J    Select the proper predicate register for a condition.
4481   j    Select the inverse predicate register for a condition.
4482   O	Append .acq for volatile load.
4483   P	Postincrement of a MEM.
4484   Q	Append .rel for volatile store.
4485   S	Shift amount for shladd instruction.
4486   T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
4487	for Intel assembler.
4488   U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
4489	for Intel assembler.
4490   X	A pair of floating point registers.
4491   r	Print register name, or constant 0 as r0.  HP compatibility for
4492	Linux kernel.
4493   v    Print vector constant value as an 8-byte integer value.  */
4494
4495void
4496ia64_print_operand (FILE * file, rtx x, int code)
4497{
4498  const char *str;
4499
4500  switch (code)
4501    {
4502    case 0:
4503      /* Handled below.  */
4504      break;
4505
4506    case 'C':
4507      {
4508	enum rtx_code c = swap_condition (GET_CODE (x));
4509	fputs (GET_RTX_NAME (c), file);
4510	return;
4511      }
4512
4513    case 'D':
4514      switch (GET_CODE (x))
4515	{
4516	case NE:
4517	  str = "neq";
4518	  break;
4519	case UNORDERED:
4520	  str = "unord";
4521	  break;
4522	case ORDERED:
4523	  str = "ord";
4524	  break;
4525	default:
4526	  str = GET_RTX_NAME (GET_CODE (x));
4527	  break;
4528	}
4529      fputs (str, file);
4530      return;
4531
4532    case 'E':
4533      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
4534      return;
4535
4536    case 'e':
4537      fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
4538      return;
4539
4540    case 'F':
4541      if (x == CONST0_RTX (GET_MODE (x)))
4542	str = reg_names [FR_REG (0)];
4543      else if (x == CONST1_RTX (GET_MODE (x)))
4544	str = reg_names [FR_REG (1)];
4545      else
4546	{
4547	  gcc_assert (GET_CODE (x) == REG);
4548	  str = reg_names [REGNO (x)];
4549	}
4550      fputs (str, file);
4551      return;
4552
4553    case 'I':
4554      fputs (reg_names [REGNO (x) + 1], file);
4555      return;
4556
4557    case 'J':
4558    case 'j':
4559      {
4560	unsigned int regno = REGNO (XEXP (x, 0));
4561	if (GET_CODE (x) == EQ)
4562	  regno += 1;
4563	if (code == 'j')
4564	  regno ^= 1;
4565        fputs (reg_names [regno], file);
4566      }
4567      return;
4568
4569    case 'O':
4570      if (MEM_VOLATILE_P (x))
4571	fputs(".acq", file);
4572      return;
4573
4574    case 'P':
4575      {
4576	HOST_WIDE_INT value;
4577
4578	switch (GET_CODE (XEXP (x, 0)))
4579	  {
4580	  default:
4581	    return;
4582
4583	  case POST_MODIFY:
4584	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
4585	    if (GET_CODE (x) == CONST_INT)
4586	      value = INTVAL (x);
4587	    else
4588	      {
4589		gcc_assert (GET_CODE (x) == REG);
4590		fprintf (file, ", %s", reg_names[REGNO (x)]);
4591		return;
4592	      }
4593	    break;
4594
4595	  case POST_INC:
4596	    value = GET_MODE_SIZE (GET_MODE (x));
4597	    break;
4598
4599	  case POST_DEC:
4600	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
4601	    break;
4602	  }
4603
4604	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
4605	return;
4606      }
4607
4608    case 'Q':
4609      if (MEM_VOLATILE_P (x))
4610	fputs(".rel", file);
4611      return;
4612
4613    case 'S':
4614      fprintf (file, "%d", exact_log2 (INTVAL (x)));
4615      return;
4616
4617    case 'T':
4618      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4619	{
4620	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
4621	  return;
4622	}
4623      break;
4624
4625    case 'U':
4626      if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
4627	{
4628	  const char *prefix = "0x";
4629	  if (INTVAL (x) & 0x80000000)
4630	    {
4631	      fprintf (file, "0xffffffff");
4632	      prefix = "";
4633	    }
4634	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
4635	  return;
4636	}
4637      break;
4638
4639    case 'X':
4640      {
4641	unsigned int regno = REGNO (x);
4642	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
4643      }
4644      return;
4645
4646    case 'r':
4647      /* If this operand is the constant zero, write it as register zero.
4648	 Any register, zero, or CONST_INT value is OK here.  */
4649      if (GET_CODE (x) == REG)
4650	fputs (reg_names[REGNO (x)], file);
4651      else if (x == CONST0_RTX (GET_MODE (x)))
4652	fputs ("r0", file);
4653      else if (GET_CODE (x) == CONST_INT)
4654	output_addr_const (file, x);
4655      else
4656	output_operand_lossage ("invalid %%r value");
4657      return;
4658
4659    case 'v':
4660      gcc_assert (GET_CODE (x) == CONST_VECTOR);
4661      x = simplify_subreg (DImode, x, GET_MODE (x), 0);
4662      break;
4663
4664    case '+':
4665      {
4666	const char *which;
4667
4668	/* For conditional branches, returns or calls, substitute
4669	   sptk, dptk, dpnt, or spnt for %s.  */
4670	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
4671	if (x)
4672	  {
4673	    int pred_val = INTVAL (XEXP (x, 0));
4674
4675	    /* Guess top and bottom 10% statically predicted.  */
4676	    if (pred_val < REG_BR_PROB_BASE / 50
4677		&& br_prob_note_reliable_p (x))
4678	      which = ".spnt";
4679	    else if (pred_val < REG_BR_PROB_BASE / 2)
4680	      which = ".dpnt";
4681	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
4682		     || !br_prob_note_reliable_p (x))
4683	      which = ".dptk";
4684	    else
4685	      which = ".sptk";
4686	  }
4687	else if (GET_CODE (current_output_insn) == CALL_INSN)
4688	  which = ".sptk";
4689	else
4690	  which = ".dptk";
4691
4692	fputs (which, file);
4693	return;
4694      }
4695
4696    case ',':
4697      x = current_insn_predicate;
4698      if (x)
4699	{
4700	  unsigned int regno = REGNO (XEXP (x, 0));
4701	  if (GET_CODE (x) == EQ)
4702	    regno += 1;
4703          fprintf (file, "(%s) ", reg_names [regno]);
4704	}
4705      return;
4706
4707    default:
4708      output_operand_lossage ("ia64_print_operand: unknown code");
4709      return;
4710    }
4711
4712  switch (GET_CODE (x))
4713    {
4714      /* This happens for the spill/restore instructions.  */
4715    case POST_INC:
4716    case POST_DEC:
4717    case POST_MODIFY:
4718      x = XEXP (x, 0);
4719      /* ... fall through ...  */
4720
4721    case REG:
4722      fputs (reg_names [REGNO (x)], file);
4723      break;
4724
4725    case MEM:
4726      {
4727	rtx addr = XEXP (x, 0);
4728	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
4729	  addr = XEXP (addr, 0);
4730	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
4731	break;
4732      }
4733
4734    default:
4735      output_addr_const (file, x);
4736      break;
4737    }
4738
4739  return;
4740}
4741
4742/* Compute a (partial) cost for rtx X.  Return true if the complete
4743   cost has been computed, and false if subexpressions should be
4744   scanned.  In either case, *TOTAL contains the cost result.  */
4745/* ??? This is incomplete.  */
4746
4747static bool
4748ia64_rtx_costs (rtx x, int code, int outer_code, int *total)
4749{
4750  switch (code)
4751    {
4752    case CONST_INT:
4753      switch (outer_code)
4754        {
4755        case SET:
4756	  *total = CONST_OK_FOR_J (INTVAL (x)) ? 0 : COSTS_N_INSNS (1);
4757	  return true;
4758        case PLUS:
4759	  if (CONST_OK_FOR_I (INTVAL (x)))
4760	    *total = 0;
4761	  else if (CONST_OK_FOR_J (INTVAL (x)))
4762	    *total = 1;
4763	  else
4764	    *total = COSTS_N_INSNS (1);
4765	  return true;
4766        default:
4767	  if (CONST_OK_FOR_K (INTVAL (x)) || CONST_OK_FOR_L (INTVAL (x)))
4768	    *total = 0;
4769	  else
4770	    *total = COSTS_N_INSNS (1);
4771	  return true;
4772	}
4773
4774    case CONST_DOUBLE:
4775      *total = COSTS_N_INSNS (1);
4776      return true;
4777
4778    case CONST:
4779    case SYMBOL_REF:
4780    case LABEL_REF:
4781      *total = COSTS_N_INSNS (3);
4782      return true;
4783
4784    case MULT:
4785      /* For multiplies wider than HImode, we have to go to the FPU,
4786         which normally involves copies.  Plus there's the latency
4787         of the multiply itself, and the latency of the instructions to
4788         transfer integer regs to FP regs.  */
4789      /* ??? Check for FP mode.  */
4790      if (GET_MODE_SIZE (GET_MODE (x)) > 2)
4791        *total = COSTS_N_INSNS (10);
4792      else
4793	*total = COSTS_N_INSNS (2);
4794      return true;
4795
4796    case PLUS:
4797    case MINUS:
4798    case ASHIFT:
4799    case ASHIFTRT:
4800    case LSHIFTRT:
4801      *total = COSTS_N_INSNS (1);
4802      return true;
4803
4804    case DIV:
4805    case UDIV:
4806    case MOD:
4807    case UMOD:
4808      /* We make divide expensive, so that divide-by-constant will be
4809         optimized to a multiply.  */
4810      *total = COSTS_N_INSNS (60);
4811      return true;
4812
4813    default:
4814      return false;
4815    }
4816}
4817
4818/* Calculate the cost of moving data from a register in class FROM to
4819   one in class TO, using MODE.  */
4820
4821int
4822ia64_register_move_cost (enum machine_mode mode, enum reg_class from,
4823			 enum reg_class to)
4824{
4825  /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
4826  if (to == ADDL_REGS)
4827    to = GR_REGS;
4828  if (from == ADDL_REGS)
4829    from = GR_REGS;
4830
4831  /* All costs are symmetric, so reduce cases by putting the
4832     lower number class as the destination.  */
4833  if (from < to)
4834    {
4835      enum reg_class tmp = to;
4836      to = from, from = tmp;
4837    }
4838
4839  /* Moving from FR<->GR in XFmode must be more expensive than 2,
4840     so that we get secondary memory reloads.  Between FR_REGS,
4841     we have to make this at least as expensive as MEMORY_MOVE_COST
4842     to avoid spectacularly poor register class preferencing.  */
4843  if (mode == XFmode || mode == RFmode)
4844    {
4845      if (to != GR_REGS || from != GR_REGS)
4846        return MEMORY_MOVE_COST (mode, to, 0);
4847      else
4848	return 3;
4849    }
4850
4851  switch (to)
4852    {
4853    case PR_REGS:
4854      /* Moving between PR registers takes two insns.  */
4855      if (from == PR_REGS)
4856	return 3;
4857      /* Moving between PR and anything but GR is impossible.  */
4858      if (from != GR_REGS)
4859	return MEMORY_MOVE_COST (mode, to, 0);
4860      break;
4861
4862    case BR_REGS:
4863      /* Moving between BR and anything but GR is impossible.  */
4864      if (from != GR_REGS && from != GR_AND_BR_REGS)
4865	return MEMORY_MOVE_COST (mode, to, 0);
4866      break;
4867
4868    case AR_I_REGS:
4869    case AR_M_REGS:
4870      /* Moving between AR and anything but GR is impossible.  */
4871      if (from != GR_REGS)
4872	return MEMORY_MOVE_COST (mode, to, 0);
4873      break;
4874
4875    case GR_REGS:
4876    case FR_REGS:
4877    case FP_REGS:
4878    case GR_AND_FR_REGS:
4879    case GR_AND_BR_REGS:
4880    case ALL_REGS:
4881      break;
4882
4883    default:
4884      gcc_unreachable ();
4885    }
4886
4887  return 2;
4888}
4889
4890/* Implement PREFERRED_RELOAD_CLASS.  Place additional restrictions on CLASS
4891   to use when copying X into that class.  */
4892
4893enum reg_class
4894ia64_preferred_reload_class (rtx x, enum reg_class class)
4895{
4896  switch (class)
4897    {
4898    case FR_REGS:
4899    case FP_REGS:
4900      /* Don't allow volatile mem reloads into floating point registers.
4901	 This is defined to force reload to choose the r/m case instead
4902	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
4903      if (MEM_P (x) && MEM_VOLATILE_P (x))
4904	return NO_REGS;
4905
4906      /* Force all unrecognized constants into the constant pool.  */
4907      if (CONSTANT_P (x))
4908	return NO_REGS;
4909      break;
4910
4911    case AR_M_REGS:
4912    case AR_I_REGS:
4913      if (!OBJECT_P (x))
4914	return NO_REGS;
4915      break;
4916
4917    default:
4918      break;
4919    }
4920
4921  return class;
4922}
4923
4924/* This function returns the register class required for a secondary
4925   register when copying between one of the registers in CLASS, and X,
4926   using MODE.  A return value of NO_REGS means that no secondary register
4927   is required.  */
4928
4929enum reg_class
4930ia64_secondary_reload_class (enum reg_class class,
4931			     enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
4932{
4933  int regno = -1;
4934
4935  if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
4936    regno = true_regnum (x);
4937
4938  switch (class)
4939    {
4940    case BR_REGS:
4941    case AR_M_REGS:
4942    case AR_I_REGS:
4943      /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
4944	 interaction.  We end up with two pseudos with overlapping lifetimes
4945	 both of which are equiv to the same constant, and both which need
4946	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
4947	 changes depending on the path length, which means the qty_first_reg
4948	 check in make_regs_eqv can give different answers at different times.
4949	 At some point I'll probably need a reload_indi pattern to handle
4950	 this.
4951
4952	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
4953	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
4954	 non-general registers for good measure.  */
4955      if (regno >= 0 && ! GENERAL_REGNO_P (regno))
4956	return GR_REGS;
4957
4958      /* This is needed if a pseudo used as a call_operand gets spilled to a
4959	 stack slot.  */
4960      if (GET_CODE (x) == MEM)
4961	return GR_REGS;
4962      break;
4963
4964    case FR_REGS:
4965    case FP_REGS:
4966      /* Need to go through general registers to get to other class regs.  */
4967      if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
4968	return GR_REGS;
4969
4970      /* This can happen when a paradoxical subreg is an operand to the
4971	 muldi3 pattern.  */
4972      /* ??? This shouldn't be necessary after instruction scheduling is
4973	 enabled, because paradoxical subregs are not accepted by
4974	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
4975	 stop the paradoxical subreg stupidity in the *_operand functions
4976	 in recog.c.  */
4977      if (GET_CODE (x) == MEM
4978	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
4979	      || GET_MODE (x) == QImode))
4980	return GR_REGS;
4981
4982      /* This can happen because of the ior/and/etc patterns that accept FP
4983	 registers as operands.  If the third operand is a constant, then it
4984	 needs to be reloaded into a FP register.  */
4985      if (GET_CODE (x) == CONST_INT)
4986	return GR_REGS;
4987
4988      /* This can happen because of register elimination in a muldi3 insn.
4989	 E.g. `26107 * (unsigned long)&u'.  */
4990      if (GET_CODE (x) == PLUS)
4991	return GR_REGS;
4992      break;
4993
4994    case PR_REGS:
4995      /* ??? This happens if we cse/gcse a BImode value across a call,
4996	 and the function has a nonlocal goto.  This is because global
4997	 does not allocate call crossing pseudos to hard registers when
4998	 current_function_has_nonlocal_goto is true.  This is relatively
4999	 common for C++ programs that use exceptions.  To reproduce,
5000	 return NO_REGS and compile libstdc++.  */
5001      if (GET_CODE (x) == MEM)
5002	return GR_REGS;
5003
5004      /* This can happen when we take a BImode subreg of a DImode value,
5005	 and that DImode value winds up in some non-GR register.  */
5006      if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
5007	return GR_REGS;
5008      break;
5009
5010    default:
5011      break;
5012    }
5013
5014  return NO_REGS;
5015}
5016
5017
5018/* Emit text to declare externally defined variables and functions, because
5019   the Intel assembler does not support undefined externals.  */
5020
5021void
5022ia64_asm_output_external (FILE *file, tree decl, const char *name)
5023{
5024  int save_referenced;
5025
5026  /* GNU as does not need anything here, but the HP linker does need
5027     something for external functions.  */
5028
5029  if (TARGET_GNU_AS
5030      && (!TARGET_HPUX_LD
5031	  || TREE_CODE (decl) != FUNCTION_DECL
5032	  || strstr (name, "__builtin_") == name))
5033    return;
5034
5035  /* ??? The Intel assembler creates a reference that needs to be satisfied by
5036     the linker when we do this, so we need to be careful not to do this for
5037     builtin functions which have no library equivalent.  Unfortunately, we
5038     can't tell here whether or not a function will actually be called by
5039     expand_expr, so we pull in library functions even if we may not need
5040     them later.  */
5041  if (! strcmp (name, "__builtin_next_arg")
5042      || ! strcmp (name, "alloca")
5043      || ! strcmp (name, "__builtin_constant_p")
5044      || ! strcmp (name, "__builtin_args_info"))
5045    return;
5046
5047  if (TARGET_HPUX_LD)
5048    ia64_hpux_add_extern_decl (decl);
5049  else
5050    {
5051      /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and
5052         restore it.  */
5053      save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl));
5054      if (TREE_CODE (decl) == FUNCTION_DECL)
5055        ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
5056      (*targetm.asm_out.globalize_label) (file, name);
5057      TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced;
5058    }
5059}
5060
5061/* Parse the -mfixed-range= option string.  */
5062
5063static void
5064fix_range (const char *const_str)
5065{
5066  int i, first, last;
5067  char *str, *dash, *comma;
5068
5069  /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5070     REG2 are either register names or register numbers.  The effect
5071     of this option is to mark the registers in the range from REG1 to
5072     REG2 as ``fixed'' so they won't be used by the compiler.  This is
5073     used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
5074
5075  i = strlen (const_str);
5076  str = (char *) alloca (i + 1);
5077  memcpy (str, const_str, i + 1);
5078
5079  while (1)
5080    {
5081      dash = strchr (str, '-');
5082      if (!dash)
5083	{
5084	  warning (0, "value of -mfixed-range must have form REG1-REG2");
5085	  return;
5086	}
5087      *dash = '\0';
5088
5089      comma = strchr (dash + 1, ',');
5090      if (comma)
5091	*comma = '\0';
5092
5093      first = decode_reg_name (str);
5094      if (first < 0)
5095	{
5096	  warning (0, "unknown register name: %s", str);
5097	  return;
5098	}
5099
5100      last = decode_reg_name (dash + 1);
5101      if (last < 0)
5102	{
5103	  warning (0, "unknown register name: %s", dash + 1);
5104	  return;
5105	}
5106
5107      *dash = '-';
5108
5109      if (first > last)
5110	{
5111	  warning (0, "%s-%s is an empty range", str, dash + 1);
5112	  return;
5113	}
5114
5115      for (i = first; i <= last; ++i)
5116	fixed_regs[i] = call_used_regs[i] = 1;
5117
5118      if (!comma)
5119	break;
5120
5121      *comma = ',';
5122      str = comma + 1;
5123    }
5124}
5125
5126/* Implement TARGET_HANDLE_OPTION.  */
5127
5128static bool
5129ia64_handle_option (size_t code, const char *arg, int value)
5130{
5131  switch (code)
5132    {
5133    case OPT_mfixed_range_:
5134      fix_range (arg);
5135      return true;
5136
5137    case OPT_mtls_size_:
5138      if (value != 14 && value != 22 && value != 64)
5139	error ("bad value %<%s%> for -mtls-size= switch", arg);
5140      return true;
5141
5142    case OPT_mtune_:
5143      {
5144	static struct pta
5145	  {
5146	    const char *name;		/* processor name or nickname.  */
5147	    enum processor_type processor;
5148	  }
5149	const processor_alias_table[] =
5150	  {
5151	    {"itanium", PROCESSOR_ITANIUM},
5152	    {"itanium1", PROCESSOR_ITANIUM},
5153	    {"merced", PROCESSOR_ITANIUM},
5154	    {"itanium2", PROCESSOR_ITANIUM2},
5155	    {"mckinley", PROCESSOR_ITANIUM2},
5156	  };
5157	int const pta_size = ARRAY_SIZE (processor_alias_table);
5158	int i;
5159
5160	for (i = 0; i < pta_size; i++)
5161	  if (!strcmp (arg, processor_alias_table[i].name))
5162	    {
5163	      ia64_tune = processor_alias_table[i].processor;
5164	      break;
5165	    }
5166	if (i == pta_size)
5167	  error ("bad value %<%s%> for -mtune= switch", arg);
5168	return true;
5169      }
5170
5171    default:
5172      return true;
5173    }
5174}
5175
5176/* Implement OVERRIDE_OPTIONS.  */
5177
5178void
5179ia64_override_options (void)
5180{
5181  if (TARGET_AUTO_PIC)
5182    target_flags |= MASK_CONST_GP;
5183
5184  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
5185    {
5186      warning (0, "not yet implemented: latency-optimized inline square root");
5187      TARGET_INLINE_SQRT = INL_MAX_THR;
5188    }
5189
5190  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
5191  flag_schedule_insns_after_reload = 0;
5192
5193  ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE;
5194
5195  init_machine_status = ia64_init_machine_status;
5196}
5197
5198static struct machine_function *
5199ia64_init_machine_status (void)
5200{
5201  return ggc_alloc_cleared (sizeof (struct machine_function));
5202}
5203
5204static enum attr_itanium_class ia64_safe_itanium_class (rtx);
5205static enum attr_type ia64_safe_type (rtx);
5206
5207static enum attr_itanium_class
5208ia64_safe_itanium_class (rtx insn)
5209{
5210  if (recog_memoized (insn) >= 0)
5211    return get_attr_itanium_class (insn);
5212  else
5213    return ITANIUM_CLASS_UNKNOWN;
5214}
5215
5216static enum attr_type
5217ia64_safe_type (rtx insn)
5218{
5219  if (recog_memoized (insn) >= 0)
5220    return get_attr_type (insn);
5221  else
5222    return TYPE_UNKNOWN;
5223}
5224
5225/* The following collection of routines emit instruction group stop bits as
5226   necessary to avoid dependencies.  */
5227
5228/* Need to track some additional registers as far as serialization is
5229   concerned so we can properly handle br.call and br.ret.  We could
5230   make these registers visible to gcc, but since these registers are
5231   never explicitly used in gcc generated code, it seems wasteful to
5232   do so (plus it would make the call and return patterns needlessly
5233   complex).  */
5234#define REG_RP		(BR_REG (0))
5235#define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
5236/* This is used for volatile asms which may require a stop bit immediately
5237   before and after them.  */
5238#define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
5239#define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
5240#define NUM_REGS	(AR_UNAT_BIT_0 + 64)
5241
5242/* For each register, we keep track of how it has been written in the
5243   current instruction group.
5244
5245   If a register is written unconditionally (no qualifying predicate),
5246   WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
5247
5248   If a register is written if its qualifying predicate P is true, we
5249   set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
5250   may be written again by the complement of P (P^1) and when this happens,
5251   WRITE_COUNT gets set to 2.
5252
5253   The result of this is that whenever an insn attempts to write a register
5254   whose WRITE_COUNT is two, we need to issue an insn group barrier first.
5255
5256   If a predicate register is written by a floating-point insn, we set
5257   WRITTEN_BY_FP to true.
5258
5259   If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
5260   to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
5261
5262struct reg_write_state
5263{
5264  unsigned int write_count : 2;
5265  unsigned int first_pred : 16;
5266  unsigned int written_by_fp : 1;
5267  unsigned int written_by_and : 1;
5268  unsigned int written_by_or : 1;
5269};
5270
5271/* Cumulative info for the current instruction group.  */
5272struct reg_write_state rws_sum[NUM_REGS];
5273/* Info for the current instruction.  This gets copied to rws_sum after a
5274   stop bit is emitted.  */
5275struct reg_write_state rws_insn[NUM_REGS];
5276
5277/* Indicates whether this is the first instruction after a stop bit,
5278   in which case we don't need another stop bit.  Without this,
5279   ia64_variable_issue will die when scheduling an alloc.  */
5280static int first_instruction;
5281
5282/* Misc flags needed to compute RAW/WAW dependencies while we are traversing
5283   RTL for one instruction.  */
5284struct reg_flags
5285{
5286  unsigned int is_write : 1;	/* Is register being written?  */
5287  unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
5288  unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
5289  unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
5290  unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
5291  unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
5292};
5293
5294static void rws_update (struct reg_write_state *, int, struct reg_flags, int);
5295static int rws_access_regno (int, struct reg_flags, int);
5296static int rws_access_reg (rtx, struct reg_flags, int);
5297static void update_set_flags (rtx, struct reg_flags *);
5298static int set_src_needs_barrier (rtx, struct reg_flags, int);
5299static int rtx_needs_barrier (rtx, struct reg_flags, int);
5300static void init_insn_group_barriers (void);
5301static int group_barrier_needed (rtx);
5302static int safe_group_barrier_needed (rtx);
5303
5304/* Update *RWS for REGNO, which is being written by the current instruction,
5305   with predicate PRED, and associated register flags in FLAGS.  */
5306
5307static void
5308rws_update (struct reg_write_state *rws, int regno, struct reg_flags flags, int pred)
5309{
5310  if (pred)
5311    rws[regno].write_count++;
5312  else
5313    rws[regno].write_count = 2;
5314  rws[regno].written_by_fp |= flags.is_fp;
5315  /* ??? Not tracking and/or across differing predicates.  */
5316  rws[regno].written_by_and = flags.is_and;
5317  rws[regno].written_by_or = flags.is_or;
5318  rws[regno].first_pred = pred;
5319}
5320
5321/* Handle an access to register REGNO of type FLAGS using predicate register
5322   PRED.  Update rws_insn and rws_sum arrays.  Return 1 if this access creates
5323   a dependency with an earlier instruction in the same group.  */
5324
5325static int
5326rws_access_regno (int regno, struct reg_flags flags, int pred)
5327{
5328  int need_barrier = 0;
5329
5330  gcc_assert (regno < NUM_REGS);
5331
5332  if (! PR_REGNO_P (regno))
5333    flags.is_and = flags.is_or = 0;
5334
5335  if (flags.is_write)
5336    {
5337      int write_count;
5338
5339      /* One insn writes same reg multiple times?  */
5340      gcc_assert (!rws_insn[regno].write_count);
5341
5342      /* Update info for current instruction.  */
5343      rws_update (rws_insn, regno, flags, pred);
5344      write_count = rws_sum[regno].write_count;
5345
5346      switch (write_count)
5347	{
5348	case 0:
5349	  /* The register has not been written yet.  */
5350	  rws_update (rws_sum, regno, flags, pred);
5351	  break;
5352
5353	case 1:
5354	  /* The register has been written via a predicate.  If this is
5355	     not a complementary predicate, then we need a barrier.  */
5356	  /* ??? This assumes that P and P+1 are always complementary
5357	     predicates for P even.  */
5358	  if (flags.is_and && rws_sum[regno].written_by_and)
5359	    ;
5360	  else if (flags.is_or && rws_sum[regno].written_by_or)
5361	    ;
5362	  else if ((rws_sum[regno].first_pred ^ 1) != pred)
5363	    need_barrier = 1;
5364	  rws_update (rws_sum, regno, flags, pred);
5365	  break;
5366
5367	case 2:
5368	  /* The register has been unconditionally written already.  We
5369	     need a barrier.  */
5370	  if (flags.is_and && rws_sum[regno].written_by_and)
5371	    ;
5372	  else if (flags.is_or && rws_sum[regno].written_by_or)
5373	    ;
5374	  else
5375	    need_barrier = 1;
5376	  rws_sum[regno].written_by_and = flags.is_and;
5377	  rws_sum[regno].written_by_or = flags.is_or;
5378	  break;
5379
5380	default:
5381	  gcc_unreachable ();
5382	}
5383    }
5384  else
5385    {
5386      if (flags.is_branch)
5387	{
5388	  /* Branches have several RAW exceptions that allow to avoid
5389	     barriers.  */
5390
5391	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
5392	    /* RAW dependencies on branch regs are permissible as long
5393	       as the writer is a non-branch instruction.  Since we
5394	       never generate code that uses a branch register written
5395	       by a branch instruction, handling this case is
5396	       easy.  */
5397	    return 0;
5398
5399	  if (REGNO_REG_CLASS (regno) == PR_REGS
5400	      && ! rws_sum[regno].written_by_fp)
5401	    /* The predicates of a branch are available within the
5402	       same insn group as long as the predicate was written by
5403	       something other than a floating-point instruction.  */
5404	    return 0;
5405	}
5406
5407      if (flags.is_and && rws_sum[regno].written_by_and)
5408	return 0;
5409      if (flags.is_or && rws_sum[regno].written_by_or)
5410	return 0;
5411
5412      switch (rws_sum[regno].write_count)
5413	{
5414	case 0:
5415	  /* The register has not been written yet.  */
5416	  break;
5417
5418	case 1:
5419	  /* The register has been written via a predicate.  If this is
5420	     not a complementary predicate, then we need a barrier.  */
5421	  /* ??? This assumes that P and P+1 are always complementary
5422	     predicates for P even.  */
5423	  if ((rws_sum[regno].first_pred ^ 1) != pred)
5424	    need_barrier = 1;
5425	  break;
5426
5427	case 2:
5428	  /* The register has been unconditionally written already.  We
5429	     need a barrier.  */
5430	  need_barrier = 1;
5431	  break;
5432
5433	default:
5434	  gcc_unreachable ();
5435	}
5436    }
5437
5438  return need_barrier;
5439}
5440
5441static int
5442rws_access_reg (rtx reg, struct reg_flags flags, int pred)
5443{
5444  int regno = REGNO (reg);
5445  int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
5446
5447  if (n == 1)
5448    return rws_access_regno (regno, flags, pred);
5449  else
5450    {
5451      int need_barrier = 0;
5452      while (--n >= 0)
5453	need_barrier |= rws_access_regno (regno + n, flags, pred);
5454      return need_barrier;
5455    }
5456}
5457
5458/* Examine X, which is a SET rtx, and update the flags, the predicate, and
5459   the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
5460
5461static void
5462update_set_flags (rtx x, struct reg_flags *pflags)
5463{
5464  rtx src = SET_SRC (x);
5465
5466  switch (GET_CODE (src))
5467    {
5468    case CALL:
5469      return;
5470
5471    case IF_THEN_ELSE:
5472      /* There are four cases here:
5473	 (1) The destination is (pc), in which case this is a branch,
5474	 nothing here applies.
5475	 (2) The destination is ar.lc, in which case this is a
5476	 doloop_end_internal,
5477	 (3) The destination is an fp register, in which case this is
5478	 an fselect instruction.
5479	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
5480	 this is a check load.
5481	 In all cases, nothing we do in this function applies.  */
5482      return;
5483
5484    default:
5485      if (COMPARISON_P (src)
5486	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
5487	/* Set pflags->is_fp to 1 so that we know we're dealing
5488	   with a floating point comparison when processing the
5489	   destination of the SET.  */
5490	pflags->is_fp = 1;
5491
5492      /* Discover if this is a parallel comparison.  We only handle
5493	 and.orcm and or.andcm at present, since we must retain a
5494	 strict inverse on the predicate pair.  */
5495      else if (GET_CODE (src) == AND)
5496	pflags->is_and = 1;
5497      else if (GET_CODE (src) == IOR)
5498	pflags->is_or = 1;
5499
5500      break;
5501    }
5502}
5503
5504/* Subroutine of rtx_needs_barrier; this function determines whether the
5505   source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
5506   are as in rtx_needs_barrier.  COND is an rtx that holds the condition
5507   for this insn.  */
5508
5509static int
5510set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
5511{
5512  int need_barrier = 0;
5513  rtx dst;
5514  rtx src = SET_SRC (x);
5515
5516  if (GET_CODE (src) == CALL)
5517    /* We don't need to worry about the result registers that
5518       get written by subroutine call.  */
5519    return rtx_needs_barrier (src, flags, pred);
5520  else if (SET_DEST (x) == pc_rtx)
5521    {
5522      /* X is a conditional branch.  */
5523      /* ??? This seems redundant, as the caller sets this bit for
5524	 all JUMP_INSNs.  */
5525      if (!ia64_spec_check_src_p (src))
5526	flags.is_branch = 1;
5527      return rtx_needs_barrier (src, flags, pred);
5528    }
5529
5530  if (ia64_spec_check_src_p (src))
5531    /* Avoid checking one register twice (in condition
5532       and in 'then' section) for ldc pattern.  */
5533    {
5534      gcc_assert (REG_P (XEXP (src, 2)));
5535      need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
5536
5537      /* We process MEM below.  */
5538      src = XEXP (src, 1);
5539    }
5540
5541  need_barrier |= rtx_needs_barrier (src, flags, pred);
5542
5543  dst = SET_DEST (x);
5544  if (GET_CODE (dst) == ZERO_EXTRACT)
5545    {
5546      need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
5547      need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
5548    }
5549  return need_barrier;
5550}
5551
5552/* Handle an access to rtx X of type FLAGS using predicate register
5553   PRED.  Return 1 if this access creates a dependency with an earlier
5554   instruction in the same group.  */
5555
5556static int
5557rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
5558{
5559  int i, j;
5560  int is_complemented = 0;
5561  int need_barrier = 0;
5562  const char *format_ptr;
5563  struct reg_flags new_flags;
5564  rtx cond;
5565
5566  if (! x)
5567    return 0;
5568
5569  new_flags = flags;
5570
5571  switch (GET_CODE (x))
5572    {
5573    case SET:
5574      update_set_flags (x, &new_flags);
5575      need_barrier = set_src_needs_barrier (x, new_flags, pred);
5576      if (GET_CODE (SET_SRC (x)) != CALL)
5577	{
5578	  new_flags.is_write = 1;
5579	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
5580	}
5581      break;
5582
5583    case CALL:
5584      new_flags.is_write = 0;
5585      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5586
5587      /* Avoid multiple register writes, in case this is a pattern with
5588	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
5589      if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count)
5590	{
5591	  new_flags.is_write = 1;
5592	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
5593	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
5594	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5595	}
5596      break;
5597
5598    case COND_EXEC:
5599      /* X is a predicated instruction.  */
5600
5601      cond = COND_EXEC_TEST (x);
5602      gcc_assert (!pred);
5603      need_barrier = rtx_needs_barrier (cond, flags, 0);
5604
5605      if (GET_CODE (cond) == EQ)
5606	is_complemented = 1;
5607      cond = XEXP (cond, 0);
5608      gcc_assert (GET_CODE (cond) == REG
5609		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
5610      pred = REGNO (cond);
5611      if (is_complemented)
5612	++pred;
5613
5614      need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
5615      return need_barrier;
5616
5617    case CLOBBER:
5618    case USE:
5619      /* Clobber & use are for earlier compiler-phases only.  */
5620      break;
5621
5622    case ASM_OPERANDS:
5623    case ASM_INPUT:
5624      /* We always emit stop bits for traditional asms.  We emit stop bits
5625	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
5626      if (GET_CODE (x) != ASM_OPERANDS
5627	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
5628	{
5629	  /* Avoid writing the register multiple times if we have multiple
5630	     asm outputs.  This avoids a failure in rws_access_reg.  */
5631	  if (! rws_insn[REG_VOLATILE].write_count)
5632	    {
5633	      new_flags.is_write = 1;
5634	      rws_access_regno (REG_VOLATILE, new_flags, pred);
5635	    }
5636	  return 1;
5637	}
5638
5639      /* For all ASM_OPERANDS, we must traverse the vector of input operands.
5640	 We cannot just fall through here since then we would be confused
5641	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
5642	 traditional asms unlike their normal usage.  */
5643
5644      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
5645	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
5646	  need_barrier = 1;
5647      break;
5648
5649    case PARALLEL:
5650      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5651	{
5652	  rtx pat = XVECEXP (x, 0, i);
5653	  switch (GET_CODE (pat))
5654	    {
5655	    case SET:
5656	      update_set_flags (pat, &new_flags);
5657	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
5658	      break;
5659
5660	    case USE:
5661	    case CALL:
5662	    case ASM_OPERANDS:
5663	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
5664	      break;
5665
5666	    case CLOBBER:
5667	    case RETURN:
5668	      break;
5669
5670	    default:
5671	      gcc_unreachable ();
5672	    }
5673	}
5674      for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
5675	{
5676	  rtx pat = XVECEXP (x, 0, i);
5677	  if (GET_CODE (pat) == SET)
5678	    {
5679	      if (GET_CODE (SET_SRC (pat)) != CALL)
5680		{
5681		  new_flags.is_write = 1;
5682		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
5683						     pred);
5684		}
5685	    }
5686	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
5687	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
5688	}
5689      break;
5690
5691    case SUBREG:
5692      need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
5693      break;
5694    case REG:
5695      if (REGNO (x) == AR_UNAT_REGNUM)
5696	{
5697	  for (i = 0; i < 64; ++i)
5698	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
5699	}
5700      else
5701	need_barrier = rws_access_reg (x, flags, pred);
5702      break;
5703
5704    case MEM:
5705      /* Find the regs used in memory address computation.  */
5706      new_flags.is_write = 0;
5707      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5708      break;
5709
5710    case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
5711    case SYMBOL_REF:  case LABEL_REF:     case CONST:
5712      break;
5713
5714      /* Operators with side-effects.  */
5715    case POST_INC:    case POST_DEC:
5716      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5717
5718      new_flags.is_write = 0;
5719      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5720      new_flags.is_write = 1;
5721      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5722      break;
5723
5724    case POST_MODIFY:
5725      gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
5726
5727      new_flags.is_write = 0;
5728      need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
5729      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5730      new_flags.is_write = 1;
5731      need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
5732      break;
5733
5734      /* Handle common unary and binary ops for efficiency.  */
5735    case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
5736    case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
5737    case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
5738    case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
5739    case NE:       case EQ:      case GE:      case GT:        case LE:
5740    case LT:       case GEU:     case GTU:     case LEU:       case LTU:
5741      need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
5742      need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
5743      break;
5744
5745    case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
5746    case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
5747    case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
5748    case SQRT:     case FFS:		case POPCOUNT:
5749      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5750      break;
5751
5752    case VEC_SELECT:
5753      /* VEC_SELECT's second argument is a PARALLEL with integers that
5754	 describe the elements selected.  On ia64, those integers are
5755	 always constants.  Avoid walking the PARALLEL so that we don't
5756	 get confused with "normal" parallels and then die.  */
5757      need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
5758      break;
5759
5760    case UNSPEC:
5761      switch (XINT (x, 1))
5762	{
5763	case UNSPEC_LTOFF_DTPMOD:
5764	case UNSPEC_LTOFF_DTPREL:
5765	case UNSPEC_DTPREL:
5766	case UNSPEC_LTOFF_TPREL:
5767	case UNSPEC_TPREL:
5768	case UNSPEC_PRED_REL_MUTEX:
5769	case UNSPEC_PIC_CALL:
5770        case UNSPEC_MF:
5771        case UNSPEC_FETCHADD_ACQ:
5772	case UNSPEC_BSP_VALUE:
5773	case UNSPEC_FLUSHRS:
5774	case UNSPEC_BUNDLE_SELECTOR:
5775          break;
5776
5777	case UNSPEC_GR_SPILL:
5778	case UNSPEC_GR_RESTORE:
5779	  {
5780	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
5781	    HOST_WIDE_INT bit = (offset >> 3) & 63;
5782
5783	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5784	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
5785	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
5786					      new_flags, pred);
5787	    break;
5788	  }
5789
5790	case UNSPEC_FR_SPILL:
5791	case UNSPEC_FR_RESTORE:
5792	case UNSPEC_GETF_EXP:
5793	case UNSPEC_SETF_EXP:
5794        case UNSPEC_ADDP4:
5795	case UNSPEC_FR_SQRT_RECIP_APPROX:
5796	case UNSPEC_LDA:
5797	case UNSPEC_LDS:
5798	case UNSPEC_LDSA:
5799	case UNSPEC_CHKACLR:
5800        case UNSPEC_CHKS:
5801	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5802	  break;
5803
5804	case UNSPEC_FR_RECIP_APPROX:
5805	case UNSPEC_SHRP:
5806	case UNSPEC_COPYSIGN:
5807	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
5808	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5809	  break;
5810
5811        case UNSPEC_CMPXCHG_ACQ:
5812	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
5813	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
5814	  break;
5815
5816	default:
5817	  gcc_unreachable ();
5818	}
5819      break;
5820
5821    case UNSPEC_VOLATILE:
5822      switch (XINT (x, 1))
5823	{
5824	case UNSPECV_ALLOC:
5825	  /* Alloc must always be the first instruction of a group.
5826	     We force this by always returning true.  */
5827	  /* ??? We might get better scheduling if we explicitly check for
5828	     input/local/output register dependencies, and modify the
5829	     scheduler so that alloc is always reordered to the start of
5830	     the current group.  We could then eliminate all of the
5831	     first_instruction code.  */
5832	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
5833
5834	  new_flags.is_write = 1;
5835	  rws_access_regno (REG_AR_CFM, new_flags, pred);
5836	  return 1;
5837
5838	case UNSPECV_SET_BSP:
5839	  need_barrier = 1;
5840          break;
5841
5842	case UNSPECV_BLOCKAGE:
5843	case UNSPECV_INSN_GROUP_BARRIER:
5844	case UNSPECV_BREAK:
5845	case UNSPECV_PSAC_ALL:
5846	case UNSPECV_PSAC_NORMAL:
5847	  return 0;
5848
5849	default:
5850	  gcc_unreachable ();
5851	}
5852      break;
5853
5854    case RETURN:
5855      new_flags.is_write = 0;
5856      need_barrier  = rws_access_regno (REG_RP, flags, pred);
5857      need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
5858
5859      new_flags.is_write = 1;
5860      need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
5861      need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
5862      break;
5863
5864    default:
5865      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
5866      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5867	switch (format_ptr[i])
5868	  {
5869	  case '0':	/* unused field */
5870	  case 'i':	/* integer */
5871	  case 'n':	/* note */
5872	  case 'w':	/* wide integer */
5873	  case 's':	/* pointer to string */
5874	  case 'S':	/* optional pointer to string */
5875	    break;
5876
5877	  case 'e':
5878	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
5879	      need_barrier = 1;
5880	    break;
5881
5882	  case 'E':
5883	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
5884	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
5885		need_barrier = 1;
5886	    break;
5887
5888	  default:
5889	    gcc_unreachable ();
5890	  }
5891      break;
5892    }
5893  return need_barrier;
5894}
5895
5896/* Clear out the state for group_barrier_needed at the start of a
5897   sequence of insns.  */
5898
5899static void
5900init_insn_group_barriers (void)
5901{
5902  memset (rws_sum, 0, sizeof (rws_sum));
5903  first_instruction = 1;
5904}
5905
5906/* Given the current state, determine whether a group barrier (a stop bit) is
5907   necessary before INSN.  Return nonzero if so.  This modifies the state to
5908   include the effects of INSN as a side-effect.  */
5909
5910static int
5911group_barrier_needed (rtx insn)
5912{
5913  rtx pat;
5914  int need_barrier = 0;
5915  struct reg_flags flags;
5916
5917  memset (&flags, 0, sizeof (flags));
5918  switch (GET_CODE (insn))
5919    {
5920    case NOTE:
5921      break;
5922
5923    case BARRIER:
5924      /* A barrier doesn't imply an instruction group boundary.  */
5925      break;
5926
5927    case CODE_LABEL:
5928      memset (rws_insn, 0, sizeof (rws_insn));
5929      return 1;
5930
5931    case CALL_INSN:
5932      flags.is_branch = 1;
5933      flags.is_sibcall = SIBLING_CALL_P (insn);
5934      memset (rws_insn, 0, sizeof (rws_insn));
5935
5936      /* Don't bundle a call following another call.  */
5937      if ((pat = prev_active_insn (insn))
5938	  && GET_CODE (pat) == CALL_INSN)
5939	{
5940	  need_barrier = 1;
5941	  break;
5942	}
5943
5944      need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
5945      break;
5946
5947    case JUMP_INSN:
5948      if (!ia64_spec_check_p (insn))
5949	flags.is_branch = 1;
5950
5951      /* Don't bundle a jump following a call.  */
5952      if ((pat = prev_active_insn (insn))
5953	  && GET_CODE (pat) == CALL_INSN)
5954	{
5955	  need_barrier = 1;
5956	  break;
5957	}
5958      /* FALLTHRU */
5959
5960    case INSN:
5961      if (GET_CODE (PATTERN (insn)) == USE
5962	  || GET_CODE (PATTERN (insn)) == CLOBBER)
5963	/* Don't care about USE and CLOBBER "insns"---those are used to
5964	   indicate to the optimizer that it shouldn't get rid of
5965	   certain operations.  */
5966	break;
5967
5968      pat = PATTERN (insn);
5969
5970      /* Ug.  Hack hacks hacked elsewhere.  */
5971      switch (recog_memoized (insn))
5972	{
5973	  /* We play dependency tricks with the epilogue in order
5974	     to get proper schedules.  Undo this for dv analysis.  */
5975	case CODE_FOR_epilogue_deallocate_stack:
5976	case CODE_FOR_prologue_allocate_stack:
5977	  pat = XVECEXP (pat, 0, 0);
5978	  break;
5979
5980	  /* The pattern we use for br.cloop confuses the code above.
5981	     The second element of the vector is representative.  */
5982	case CODE_FOR_doloop_end_internal:
5983	  pat = XVECEXP (pat, 0, 1);
5984	  break;
5985
5986	  /* Doesn't generate code.  */
5987	case CODE_FOR_pred_rel_mutex:
5988	case CODE_FOR_prologue_use:
5989	  return 0;
5990
5991	default:
5992	  break;
5993	}
5994
5995      memset (rws_insn, 0, sizeof (rws_insn));
5996      need_barrier = rtx_needs_barrier (pat, flags, 0);
5997
5998      /* Check to see if the previous instruction was a volatile
5999	 asm.  */
6000      if (! need_barrier)
6001	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
6002      break;
6003
6004    default:
6005      gcc_unreachable ();
6006    }
6007
6008  if (first_instruction && INSN_P (insn)
6009      && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
6010      && GET_CODE (PATTERN (insn)) != USE
6011      && GET_CODE (PATTERN (insn)) != CLOBBER)
6012    {
6013      need_barrier = 0;
6014      first_instruction = 0;
6015    }
6016
6017  return need_barrier;
6018}
6019
6020/* Like group_barrier_needed, but do not clobber the current state.  */
6021
6022static int
6023safe_group_barrier_needed (rtx insn)
6024{
6025  struct reg_write_state rws_saved[NUM_REGS];
6026  int saved_first_instruction;
6027  int t;
6028
6029  memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved);
6030  saved_first_instruction = first_instruction;
6031
6032  t = group_barrier_needed (insn);
6033
6034  memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved);
6035  first_instruction = saved_first_instruction;
6036
6037  return t;
6038}
6039
6040/* Scan the current function and insert stop bits as necessary to
6041   eliminate dependencies.  This function assumes that a final
6042   instruction scheduling pass has been run which has already
6043   inserted most of the necessary stop bits.  This function only
6044   inserts new ones at basic block boundaries, since these are
6045   invisible to the scheduler.  */
6046
6047static void
6048emit_insn_group_barriers (FILE *dump)
6049{
6050  rtx insn;
6051  rtx last_label = 0;
6052  int insns_since_last_label = 0;
6053
6054  init_insn_group_barriers ();
6055
6056  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6057    {
6058      if (GET_CODE (insn) == CODE_LABEL)
6059	{
6060	  if (insns_since_last_label)
6061	    last_label = insn;
6062	  insns_since_last_label = 0;
6063	}
6064      else if (GET_CODE (insn) == NOTE
6065	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
6066	{
6067	  if (insns_since_last_label)
6068	    last_label = insn;
6069	  insns_since_last_label = 0;
6070	}
6071      else if (GET_CODE (insn) == INSN
6072	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
6073	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
6074	{
6075	  init_insn_group_barriers ();
6076	  last_label = 0;
6077	}
6078      else if (INSN_P (insn))
6079	{
6080	  insns_since_last_label = 1;
6081
6082	  if (group_barrier_needed (insn))
6083	    {
6084	      if (last_label)
6085		{
6086		  if (dump)
6087		    fprintf (dump, "Emitting stop before label %d\n",
6088			     INSN_UID (last_label));
6089		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
6090		  insn = last_label;
6091
6092		  init_insn_group_barriers ();
6093		  last_label = 0;
6094		}
6095	    }
6096	}
6097    }
6098}
6099
6100/* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
6101   This function has to emit all necessary group barriers.  */
6102
6103static void
6104emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
6105{
6106  rtx insn;
6107
6108  init_insn_group_barriers ();
6109
6110  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6111    {
6112      if (GET_CODE (insn) == BARRIER)
6113	{
6114	  rtx last = prev_active_insn (insn);
6115
6116	  if (! last)
6117	    continue;
6118	  if (GET_CODE (last) == JUMP_INSN
6119	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
6120	    last = prev_active_insn (last);
6121	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
6122	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
6123
6124	  init_insn_group_barriers ();
6125	}
6126      else if (INSN_P (insn))
6127	{
6128	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
6129	    init_insn_group_barriers ();
6130	  else if (group_barrier_needed (insn))
6131	    {
6132	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
6133	      init_insn_group_barriers ();
6134	      group_barrier_needed (insn);
6135	    }
6136	}
6137    }
6138}
6139
6140
6141
6142/* Instruction scheduling support.  */
6143
6144#define NR_BUNDLES 10
6145
6146/* A list of names of all available bundles.  */
6147
6148static const char *bundle_name [NR_BUNDLES] =
6149{
6150  ".mii",
6151  ".mmi",
6152  ".mfi",
6153  ".mmf",
6154#if NR_BUNDLES == 10
6155  ".bbb",
6156  ".mbb",
6157#endif
6158  ".mib",
6159  ".mmb",
6160  ".mfb",
6161  ".mlx"
6162};
6163
6164/* Nonzero if we should insert stop bits into the schedule.  */
6165
6166int ia64_final_schedule = 0;
6167
6168/* Codes of the corresponding queried units: */
6169
6170static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
6171static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
6172
6173static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
6174static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
6175
6176static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
6177
6178/* The following variable value is an insn group barrier.  */
6179
6180static rtx dfa_stop_insn;
6181
6182/* The following variable value is the last issued insn.  */
6183
6184static rtx last_scheduled_insn;
6185
6186/* The following variable value is size of the DFA state.  */
6187
6188static size_t dfa_state_size;
6189
6190/* The following variable value is pointer to a DFA state used as
6191   temporary variable.  */
6192
6193static state_t temp_dfa_state = NULL;
6194
6195/* The following variable value is DFA state after issuing the last
6196   insn.  */
6197
6198static state_t prev_cycle_state = NULL;
6199
6200/* The following array element values are TRUE if the corresponding
6201   insn requires to add stop bits before it.  */
6202
6203static char *stops_p = NULL;
6204
6205/* The following array element values are ZERO for non-speculative
6206   instructions and hold corresponding speculation check number for
6207   speculative instructions.  */
6208static int *spec_check_no = NULL;
6209
6210/* Size of spec_check_no array.  */
6211static int max_uid = 0;
6212
6213/* The following variable is used to set up the mentioned above array.  */
6214
6215static int stop_before_p = 0;
6216
6217/* The following variable value is length of the arrays `clocks' and
6218   `add_cycles'. */
6219
6220static int clocks_length;
6221
6222/* The following array element values are cycles on which the
6223   corresponding insn will be issued.  The array is used only for
6224   Itanium1.  */
6225
6226static int *clocks;
6227
6228/* The following array element values are numbers of cycles should be
6229   added to improve insn scheduling for MM_insns for Itanium1.  */
6230
6231static int *add_cycles;
6232
6233/* The following variable value is number of data speculations in progress.  */
6234static int pending_data_specs = 0;
6235
6236static rtx ia64_single_set (rtx);
6237static void ia64_emit_insn_before (rtx, rtx);
6238
6239/* Map a bundle number to its pseudo-op.  */
6240
6241const char *
6242get_bundle_name (int b)
6243{
6244  return bundle_name[b];
6245}
6246
6247
6248/* Return the maximum number of instructions a cpu can issue.  */
6249
6250static int
6251ia64_issue_rate (void)
6252{
6253  return 6;
6254}
6255
6256/* Helper function - like single_set, but look inside COND_EXEC.  */
6257
6258static rtx
6259ia64_single_set (rtx insn)
6260{
6261  rtx x = PATTERN (insn), ret;
6262  if (GET_CODE (x) == COND_EXEC)
6263    x = COND_EXEC_CODE (x);
6264  if (GET_CODE (x) == SET)
6265    return x;
6266
6267  /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
6268     Although they are not classical single set, the second set is there just
6269     to protect it from moving past FP-relative stack accesses.  */
6270  switch (recog_memoized (insn))
6271    {
6272    case CODE_FOR_prologue_allocate_stack:
6273    case CODE_FOR_epilogue_deallocate_stack:
6274      ret = XVECEXP (x, 0, 0);
6275      break;
6276
6277    default:
6278      ret = single_set_2 (insn, x);
6279      break;
6280    }
6281
6282  return ret;
6283}
6284
6285/* Adjust the cost of a scheduling dependency.
6286   Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
6287   COST is the current cost.  */
6288
6289static int
6290ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost)
6291{
6292  enum reg_note dep_type = (enum reg_note) dep_type1;
6293  enum attr_itanium_class dep_class;
6294  enum attr_itanium_class insn_class;
6295
6296  if (dep_type != REG_DEP_OUTPUT)
6297    return cost;
6298
6299  insn_class = ia64_safe_itanium_class (insn);
6300  dep_class = ia64_safe_itanium_class (dep_insn);
6301  if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
6302      || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
6303    return 0;
6304
6305  return cost;
6306}
6307
6308/* Like emit_insn_before, but skip cycle_display notes.
6309   ??? When cycle display notes are implemented, update this.  */
6310
6311static void
6312ia64_emit_insn_before (rtx insn, rtx before)
6313{
6314  emit_insn_before (insn, before);
6315}
6316
6317/* The following function marks insns who produce addresses for load
6318   and store insns.  Such insns will be placed into M slots because it
6319   decrease latency time for Itanium1 (see function
6320   `ia64_produce_address_p' and the DFA descriptions).  */
6321
6322static void
6323ia64_dependencies_evaluation_hook (rtx head, rtx tail)
6324{
6325  rtx insn, link, next, next_tail;
6326
6327  /* Before reload, which_alternative is not set, which means that
6328     ia64_safe_itanium_class will produce wrong results for (at least)
6329     move instructions.  */
6330  if (!reload_completed)
6331    return;
6332
6333  next_tail = NEXT_INSN (tail);
6334  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6335    if (INSN_P (insn))
6336      insn->call = 0;
6337  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6338    if (INSN_P (insn)
6339	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
6340      {
6341	for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
6342	  {
6343	    enum attr_itanium_class c;
6344
6345	    if (REG_NOTE_KIND (link) != REG_DEP_TRUE)
6346	      continue;
6347	    next = XEXP (link, 0);
6348	    c = ia64_safe_itanium_class (next);
6349	    if ((c == ITANIUM_CLASS_ST
6350		 || c == ITANIUM_CLASS_STF)
6351		&& ia64_st_address_bypass_p (insn, next))
6352	      break;
6353	    else if ((c == ITANIUM_CLASS_LD
6354		      || c == ITANIUM_CLASS_FLD
6355		      || c == ITANIUM_CLASS_FLDP)
6356		     && ia64_ld_address_bypass_p (insn, next))
6357	      break;
6358	  }
6359	insn->call = link != 0;
6360      }
6361}
6362
6363/* We're beginning a new block.  Initialize data structures as necessary.  */
6364
6365static void
6366ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
6367		 int sched_verbose ATTRIBUTE_UNUSED,
6368		 int max_ready ATTRIBUTE_UNUSED)
6369{
6370#ifdef ENABLE_CHECKING
6371  rtx insn;
6372
6373  if (reload_completed)
6374    for (insn = NEXT_INSN (current_sched_info->prev_head);
6375	 insn != current_sched_info->next_tail;
6376	 insn = NEXT_INSN (insn))
6377      gcc_assert (!SCHED_GROUP_P (insn));
6378#endif
6379  last_scheduled_insn = NULL_RTX;
6380  init_insn_group_barriers ();
6381}
6382
6383/* We're beginning a scheduling pass.  Check assertion.  */
6384
6385static void
6386ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
6387                        int sched_verbose ATTRIBUTE_UNUSED,
6388                        int max_ready ATTRIBUTE_UNUSED)
6389{
6390  gcc_assert (!pending_data_specs);
6391}
6392
6393/* Scheduling pass is now finished.  Free/reset static variable.  */
6394static void
6395ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
6396			  int sched_verbose ATTRIBUTE_UNUSED)
6397{
6398  free (spec_check_no);
6399  spec_check_no = 0;
6400  max_uid = 0;
6401}
6402
6403/* We are about to being issuing insns for this clock cycle.
6404   Override the default sort algorithm to better slot instructions.  */
6405
6406static int
6407ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready,
6408			int *pn_ready, int clock_var ATTRIBUTE_UNUSED,
6409			int reorder_type)
6410{
6411  int n_asms;
6412  int n_ready = *pn_ready;
6413  rtx *e_ready = ready + n_ready;
6414  rtx *insnp;
6415
6416  if (sched_verbose)
6417    fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
6418
6419  if (reorder_type == 0)
6420    {
6421      /* First, move all USEs, CLOBBERs and other crud out of the way.  */
6422      n_asms = 0;
6423      for (insnp = ready; insnp < e_ready; insnp++)
6424	if (insnp < e_ready)
6425	  {
6426	    rtx insn = *insnp;
6427	    enum attr_type t = ia64_safe_type (insn);
6428	    if (t == TYPE_UNKNOWN)
6429	      {
6430		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
6431		    || asm_noperands (PATTERN (insn)) >= 0)
6432		  {
6433		    rtx lowest = ready[n_asms];
6434		    ready[n_asms] = insn;
6435		    *insnp = lowest;
6436		    n_asms++;
6437		  }
6438		else
6439		  {
6440		    rtx highest = ready[n_ready - 1];
6441		    ready[n_ready - 1] = insn;
6442		    *insnp = highest;
6443		    return 1;
6444		  }
6445	      }
6446	  }
6447
6448      if (n_asms < n_ready)
6449	{
6450	  /* Some normal insns to process.  Skip the asms.  */
6451	  ready += n_asms;
6452	  n_ready -= n_asms;
6453	}
6454      else if (n_ready > 0)
6455	return 1;
6456    }
6457
6458  if (ia64_final_schedule)
6459    {
6460      int deleted = 0;
6461      int nr_need_stop = 0;
6462
6463      for (insnp = ready; insnp < e_ready; insnp++)
6464	if (safe_group_barrier_needed (*insnp))
6465	  nr_need_stop++;
6466
6467      if (reorder_type == 1 && n_ready == nr_need_stop)
6468	return 0;
6469      if (reorder_type == 0)
6470	return 1;
6471      insnp = e_ready;
6472      /* Move down everything that needs a stop bit, preserving
6473	 relative order.  */
6474      while (insnp-- > ready + deleted)
6475	while (insnp >= ready + deleted)
6476	  {
6477	    rtx insn = *insnp;
6478	    if (! safe_group_barrier_needed (insn))
6479	      break;
6480	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
6481	    *ready = insn;
6482	    deleted++;
6483	  }
6484      n_ready -= deleted;
6485      ready += deleted;
6486    }
6487
6488  return 1;
6489}
6490
6491/* We are about to being issuing insns for this clock cycle.  Override
6492   the default sort algorithm to better slot instructions.  */
6493
6494static int
6495ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
6496		    int clock_var)
6497{
6498  return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
6499				 pn_ready, clock_var, 0);
6500}
6501
6502/* Like ia64_sched_reorder, but called after issuing each insn.
6503   Override the default sort algorithm to better slot instructions.  */
6504
6505static int
6506ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
6507		     int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
6508		     int *pn_ready, int clock_var)
6509{
6510  if (ia64_tune == PROCESSOR_ITANIUM && reload_completed && last_scheduled_insn)
6511    clocks [INSN_UID (last_scheduled_insn)] = clock_var;
6512  return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
6513				 clock_var, 1);
6514}
6515
6516/* We are about to issue INSN.  Return the number of insns left on the
6517   ready queue that can be issued this cycle.  */
6518
6519static int
6520ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
6521		     int sched_verbose ATTRIBUTE_UNUSED,
6522		     rtx insn ATTRIBUTE_UNUSED,
6523		     int can_issue_more ATTRIBUTE_UNUSED)
6524{
6525  if (current_sched_info->flags & DO_SPECULATION)
6526    /* Modulo scheduling does not extend h_i_d when emitting
6527       new instructions.  Deal with it.  */
6528    {
6529      if (DONE_SPEC (insn) & BEGIN_DATA)
6530	pending_data_specs++;
6531      if (CHECK_SPEC (insn) & BEGIN_DATA)
6532	pending_data_specs--;
6533    }
6534
6535  last_scheduled_insn = insn;
6536  memcpy (prev_cycle_state, curr_state, dfa_state_size);
6537  if (reload_completed)
6538    {
6539      int needed = group_barrier_needed (insn);
6540
6541      gcc_assert (!needed);
6542      if (GET_CODE (insn) == CALL_INSN)
6543	init_insn_group_barriers ();
6544      stops_p [INSN_UID (insn)] = stop_before_p;
6545      stop_before_p = 0;
6546    }
6547  return 1;
6548}
6549
6550/* We are choosing insn from the ready queue.  Return nonzero if INSN
6551   can be chosen.  */
6552
6553static int
6554ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn)
6555{
6556  gcc_assert (insn  && INSN_P (insn));
6557  return ((!reload_completed
6558	   || !safe_group_barrier_needed (insn))
6559	  && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn));
6560}
6561
6562/* We are choosing insn from the ready queue.  Return nonzero if INSN
6563   can be chosen.  */
6564
6565static bool
6566ia64_first_cycle_multipass_dfa_lookahead_guard_spec (rtx insn)
6567{
6568  gcc_assert (insn  && INSN_P (insn));
6569  /* Size of ALAT is 32.  As far as we perform conservative data speculation,
6570     we keep ALAT half-empty.  */
6571  return (pending_data_specs < 16
6572	  || !(TODO_SPEC (insn) & BEGIN_DATA));
6573}
6574
6575/* The following variable value is pseudo-insn used by the DFA insn
6576   scheduler to change the DFA state when the simulated clock is
6577   increased.  */
6578
6579static rtx dfa_pre_cycle_insn;
6580
6581/* We are about to being issuing INSN.  Return nonzero if we cannot
6582   issue it on given cycle CLOCK and return zero if we should not sort
6583   the ready queue on the next clock start.  */
6584
6585static int
6586ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock,
6587		    int clock, int *sort_p)
6588{
6589  int setup_clocks_p = FALSE;
6590
6591  gcc_assert (insn && INSN_P (insn));
6592  if ((reload_completed && safe_group_barrier_needed (insn))
6593      || (last_scheduled_insn
6594	  && (GET_CODE (last_scheduled_insn) == CALL_INSN
6595	      || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6596	      || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)))
6597    {
6598      init_insn_group_barriers ();
6599      if (verbose && dump)
6600	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
6601		 last_clock == clock ? " + cycle advance" : "");
6602      stop_before_p = 1;
6603      if (last_clock == clock)
6604	{
6605	  state_transition (curr_state, dfa_stop_insn);
6606	  if (TARGET_EARLY_STOP_BITS)
6607	    *sort_p = (last_scheduled_insn == NULL_RTX
6608		       || GET_CODE (last_scheduled_insn) != CALL_INSN);
6609	  else
6610	    *sort_p = 0;
6611	  return 1;
6612	}
6613      else if (reload_completed)
6614	setup_clocks_p = TRUE;
6615      if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT
6616	  || asm_noperands (PATTERN (last_scheduled_insn)) >= 0)
6617	state_reset (curr_state);
6618      else
6619	{
6620	  memcpy (curr_state, prev_cycle_state, dfa_state_size);
6621	  state_transition (curr_state, dfa_stop_insn);
6622	  state_transition (curr_state, dfa_pre_cycle_insn);
6623	  state_transition (curr_state, NULL);
6624	}
6625    }
6626  else if (reload_completed)
6627    setup_clocks_p = TRUE;
6628  if (setup_clocks_p && ia64_tune == PROCESSOR_ITANIUM
6629      && GET_CODE (PATTERN (insn)) != ASM_INPUT
6630      && asm_noperands (PATTERN (insn)) < 0)
6631    {
6632      enum attr_itanium_class c = ia64_safe_itanium_class (insn);
6633
6634      if (c != ITANIUM_CLASS_MMMUL && c != ITANIUM_CLASS_MMSHF)
6635	{
6636	  rtx link;
6637	  int d = -1;
6638
6639	  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6640	    if (REG_NOTE_KIND (link) == 0)
6641	      {
6642		enum attr_itanium_class dep_class;
6643		rtx dep_insn = XEXP (link, 0);
6644
6645		dep_class = ia64_safe_itanium_class (dep_insn);
6646		if ((dep_class == ITANIUM_CLASS_MMMUL
6647		     || dep_class == ITANIUM_CLASS_MMSHF)
6648		    && last_clock - clocks [INSN_UID (dep_insn)] < 4
6649		    && (d < 0
6650			|| last_clock - clocks [INSN_UID (dep_insn)] < d))
6651		  d = last_clock - clocks [INSN_UID (dep_insn)];
6652	      }
6653	  if (d >= 0)
6654	    add_cycles [INSN_UID (insn)] = 3 - d;
6655	}
6656    }
6657  return 0;
6658}
6659
6660/* Implement targetm.sched.h_i_d_extended hook.
6661   Extend internal data structures.  */
6662static void
6663ia64_h_i_d_extended (void)
6664{
6665  if (current_sched_info->flags & DO_SPECULATION)
6666    {
6667      int new_max_uid = get_max_uid () + 1;
6668
6669      spec_check_no = xrecalloc (spec_check_no, new_max_uid,
6670				 max_uid, sizeof (*spec_check_no));
6671      max_uid = new_max_uid;
6672    }
6673
6674  if (stops_p != NULL)
6675    {
6676      int new_clocks_length = get_max_uid () + 1;
6677
6678      stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
6679
6680      if (ia64_tune == PROCESSOR_ITANIUM)
6681	{
6682	  clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
6683			      sizeof (int));
6684	  add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
6685				  sizeof (int));
6686	}
6687
6688      clocks_length = new_clocks_length;
6689    }
6690}
6691
6692/* Constants that help mapping 'enum machine_mode' to int.  */
6693enum SPEC_MODES
6694  {
6695    SPEC_MODE_INVALID = -1,
6696    SPEC_MODE_FIRST = 0,
6697    SPEC_MODE_FOR_EXTEND_FIRST = 1,
6698    SPEC_MODE_FOR_EXTEND_LAST = 3,
6699    SPEC_MODE_LAST = 8
6700  };
6701
6702/* Return index of the MODE.  */
6703static int
6704ia64_mode_to_int (enum machine_mode mode)
6705{
6706  switch (mode)
6707    {
6708    case BImode: return 0; /* SPEC_MODE_FIRST  */
6709    case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
6710    case HImode: return 2;
6711    case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
6712    case DImode: return 4;
6713    case SFmode: return 5;
6714    case DFmode: return 6;
6715    case XFmode: return 7;
6716    case TImode:
6717      /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
6718	 mentioned in itanium[12].md.  Predicate fp_register_operand also
6719	 needs to be defined.  Bottom line: better disable for now.  */
6720      return SPEC_MODE_INVALID;
6721    default:     return SPEC_MODE_INVALID;
6722    }
6723}
6724
6725/* Provide information about speculation capabilities.  */
6726static void
6727ia64_set_sched_flags (spec_info_t spec_info)
6728{
6729  unsigned int *flags = &(current_sched_info->flags);
6730
6731  if (*flags & SCHED_RGN
6732      || *flags & SCHED_EBB)
6733    {
6734      int mask = 0;
6735
6736      if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
6737	  || (mflag_sched_ar_data_spec && reload_completed))
6738	{
6739	  mask |= BEGIN_DATA;
6740
6741	  if ((mflag_sched_br_in_data_spec && !reload_completed)
6742	      || (mflag_sched_ar_in_data_spec && reload_completed))
6743	    mask |= BE_IN_DATA;
6744	}
6745
6746      if (mflag_sched_control_spec)
6747	{
6748	  mask |= BEGIN_CONTROL;
6749
6750	  if (mflag_sched_in_control_spec)
6751	    mask |= BE_IN_CONTROL;
6752	}
6753
6754      gcc_assert (*flags & USE_GLAT);
6755
6756      if (mask)
6757	{
6758	  *flags |= USE_DEPS_LIST | DETACH_LIFE_INFO | DO_SPECULATION;
6759
6760	  spec_info->mask = mask;
6761	  spec_info->flags = 0;
6762
6763	  if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns)
6764	    spec_info->flags |= PREFER_NON_DATA_SPEC;
6765
6766	  if ((mask & CONTROL_SPEC)
6767	      && mflag_sched_prefer_non_control_spec_insns)
6768	    spec_info->flags |= PREFER_NON_CONTROL_SPEC;
6769
6770	  if (mflag_sched_spec_verbose)
6771	    {
6772	      if (sched_verbose >= 1)
6773		spec_info->dump = sched_dump;
6774	      else
6775		spec_info->dump = stderr;
6776	    }
6777	  else
6778	    spec_info->dump = 0;
6779
6780	  if (mflag_sched_count_spec_in_critical_path)
6781	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
6782	}
6783    }
6784}
6785
6786/* Implement targetm.sched.speculate_insn hook.
6787   Check if the INSN can be TS speculative.
6788   If 'no' - return -1.
6789   If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
6790   If current pattern of the INSN already provides TS speculation, return 0.  */
6791static int
6792ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
6793{
6794  rtx pat, reg, mem, mem_reg;
6795  int mode_no, gen_p = 1;
6796  bool extend_p;
6797
6798  gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
6799
6800  pat = PATTERN (insn);
6801
6802  if (GET_CODE (pat) == COND_EXEC)
6803    pat = COND_EXEC_CODE (pat);
6804
6805  /* This should be a SET ...  */
6806  if (GET_CODE (pat) != SET)
6807    return -1;
6808
6809  reg = SET_DEST (pat);
6810  /* ... to the general/fp register ...  */
6811  if (!REG_P (reg) || !(GR_REGNO_P (REGNO (reg)) || FP_REGNO_P (REGNO (reg))))
6812    return -1;
6813
6814  /* ... from the mem ...  */
6815  mem = SET_SRC (pat);
6816
6817  /* ... that can, possibly, be a zero_extend ...  */
6818  if (GET_CODE (mem) == ZERO_EXTEND)
6819    {
6820      mem = XEXP (mem, 0);
6821      extend_p = true;
6822    }
6823  else
6824    extend_p = false;
6825
6826  /* ... or a speculative load.  */
6827  if (GET_CODE (mem) == UNSPEC)
6828    {
6829      int code;
6830
6831      code = XINT (mem, 1);
6832      if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
6833	return -1;
6834
6835      if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
6836	  || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
6837	  || code == UNSPEC_LDSA)
6838	gen_p = 0;
6839
6840      mem = XVECEXP (mem, 0, 0);
6841      gcc_assert (MEM_P (mem));
6842    }
6843
6844  /* Source should be a mem ...  */
6845  if (!MEM_P (mem))
6846    return -1;
6847
6848  /* ... addressed by a register.  */
6849  mem_reg = XEXP (mem, 0);
6850  if (!REG_P (mem_reg))
6851    return -1;
6852
6853  /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
6854     will always be DImode.  */
6855  mode_no = ia64_mode_to_int (GET_MODE (mem));
6856
6857  if (mode_no == SPEC_MODE_INVALID
6858      || (extend_p
6859	  && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
6860	       && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
6861    return -1;
6862
6863  extract_insn_cached (insn);
6864  gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
6865
6866  *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
6867
6868  return gen_p;
6869}
6870
6871enum
6872  {
6873    /* Offset to reach ZERO_EXTEND patterns.  */
6874    SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1,
6875    /* Number of patterns for each speculation mode.  */
6876    SPEC_N = (SPEC_MODE_LAST
6877              + SPEC_MODE_FOR_EXTEND_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 2)
6878  };
6879
6880enum SPEC_GEN_LD_MAP
6881  {
6882    /* Offset to ld.a patterns.  */
6883    SPEC_GEN_A = 0 * SPEC_N,
6884    /* Offset to ld.s patterns.  */
6885    SPEC_GEN_S = 1 * SPEC_N,
6886    /* Offset to ld.sa patterns.  */
6887    SPEC_GEN_SA = 2 * SPEC_N,
6888    /* Offset to ld.sa patterns.  For this patterns corresponding ld.c will
6889       mutate to chk.s.  */
6890    SPEC_GEN_SA_FOR_S = 3 * SPEC_N
6891  };
6892
6893/* These offsets are used to get (4 * SPEC_N).  */
6894enum SPEC_GEN_CHECK_OFFSET
6895  {
6896    SPEC_GEN_CHKA_FOR_A_OFFSET = 4 * SPEC_N - SPEC_GEN_A,
6897    SPEC_GEN_CHKA_FOR_SA_OFFSET = 4 * SPEC_N - SPEC_GEN_SA
6898  };
6899
6900/* If GEN_P is true, calculate the index of needed speculation check and return
6901   speculative pattern for INSN with speculative mode TS, machine mode
6902   MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
6903   If GEN_P is false, just calculate the index of needed speculation check.  */
6904static rtx
6905ia64_gen_spec_insn (rtx insn, ds_t ts, int mode_no, bool gen_p, bool extend_p)
6906{
6907  rtx pat, new_pat;
6908  int load_no;
6909  int shift = 0;
6910
6911  static rtx (* const gen_load[]) (rtx, rtx) = {
6912    gen_movbi_advanced,
6913    gen_movqi_advanced,
6914    gen_movhi_advanced,
6915    gen_movsi_advanced,
6916    gen_movdi_advanced,
6917    gen_movsf_advanced,
6918    gen_movdf_advanced,
6919    gen_movxf_advanced,
6920    gen_movti_advanced,
6921    gen_zero_extendqidi2_advanced,
6922    gen_zero_extendhidi2_advanced,
6923    gen_zero_extendsidi2_advanced,
6924
6925    gen_movbi_speculative,
6926    gen_movqi_speculative,
6927    gen_movhi_speculative,
6928    gen_movsi_speculative,
6929    gen_movdi_speculative,
6930    gen_movsf_speculative,
6931    gen_movdf_speculative,
6932    gen_movxf_speculative,
6933    gen_movti_speculative,
6934    gen_zero_extendqidi2_speculative,
6935    gen_zero_extendhidi2_speculative,
6936    gen_zero_extendsidi2_speculative,
6937
6938    gen_movbi_speculative_advanced,
6939    gen_movqi_speculative_advanced,
6940    gen_movhi_speculative_advanced,
6941    gen_movsi_speculative_advanced,
6942    gen_movdi_speculative_advanced,
6943    gen_movsf_speculative_advanced,
6944    gen_movdf_speculative_advanced,
6945    gen_movxf_speculative_advanced,
6946    gen_movti_speculative_advanced,
6947    gen_zero_extendqidi2_speculative_advanced,
6948    gen_zero_extendhidi2_speculative_advanced,
6949    gen_zero_extendsidi2_speculative_advanced,
6950
6951    gen_movbi_speculative_advanced,
6952    gen_movqi_speculative_advanced,
6953    gen_movhi_speculative_advanced,
6954    gen_movsi_speculative_advanced,
6955    gen_movdi_speculative_advanced,
6956    gen_movsf_speculative_advanced,
6957    gen_movdf_speculative_advanced,
6958    gen_movxf_speculative_advanced,
6959    gen_movti_speculative_advanced,
6960    gen_zero_extendqidi2_speculative_advanced,
6961    gen_zero_extendhidi2_speculative_advanced,
6962    gen_zero_extendsidi2_speculative_advanced
6963  };
6964
6965  load_no = extend_p ? mode_no + SPEC_GEN_EXTEND_OFFSET : mode_no;
6966
6967  if (ts & BEGIN_DATA)
6968    {
6969      /* We don't need recovery because even if this is ld.sa
6970	 ALAT entry will be allocated only if NAT bit is set to zero.
6971	 So it is enough to use ld.c here.  */
6972
6973      if (ts & BEGIN_CONTROL)
6974	{
6975	  load_no += SPEC_GEN_SA;
6976
6977	  if (!mflag_sched_ldc)
6978	    shift = SPEC_GEN_CHKA_FOR_SA_OFFSET;
6979	}
6980      else
6981	{
6982	  load_no += SPEC_GEN_A;
6983
6984	  if (!mflag_sched_ldc)
6985	    shift = SPEC_GEN_CHKA_FOR_A_OFFSET;
6986	}
6987    }
6988  else if (ts & BEGIN_CONTROL)
6989    {
6990      /* ld.sa can be used instead of ld.s to avoid basic block splitting.  */
6991      if (!mflag_control_ldc)
6992	load_no += SPEC_GEN_S;
6993      else
6994	{
6995	  gcc_assert (mflag_sched_ldc);
6996	  load_no += SPEC_GEN_SA_FOR_S;
6997	}
6998    }
6999  else
7000    gcc_unreachable ();
7001
7002  /* Set the desired check index.  We add '1', because zero element in this
7003     array means, that instruction with such uid is non-speculative.  */
7004  spec_check_no[INSN_UID (insn)] = load_no + shift + 1;
7005
7006  if (!gen_p)
7007    return 0;
7008
7009  new_pat = gen_load[load_no] (copy_rtx (recog_data.operand[0]),
7010			       copy_rtx (recog_data.operand[1]));
7011
7012  pat = PATTERN (insn);
7013  if (GET_CODE (pat) == COND_EXEC)
7014    new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx
7015				 (COND_EXEC_TEST (pat)), new_pat);
7016
7017  return new_pat;
7018}
7019
7020/* Offset to branchy checks.  */
7021enum { SPEC_GEN_CHECK_MUTATION_OFFSET = 5 * SPEC_N };
7022
7023/* Return nonzero, if INSN needs branchy recovery check.  */
7024static bool
7025ia64_needs_block_p (rtx insn)
7026{
7027  int check_no;
7028
7029  check_no = spec_check_no[INSN_UID(insn)] - 1;
7030  gcc_assert (0 <= check_no && check_no < SPEC_GEN_CHECK_MUTATION_OFFSET);
7031
7032  return ((SPEC_GEN_S <= check_no && check_no < SPEC_GEN_S + SPEC_N)
7033	  || (4 * SPEC_N <= check_no && check_no < 4 * SPEC_N + SPEC_N));
7034}
7035
7036/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
7037   If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
7038   Otherwise, generate a simple check.  */
7039static rtx
7040ia64_gen_check (rtx insn, rtx label, bool mutate_p)
7041{
7042  rtx op1, pat, check_pat;
7043
7044  static rtx (* const gen_check[]) (rtx, rtx) = {
7045    gen_movbi_clr,
7046    gen_movqi_clr,
7047    gen_movhi_clr,
7048    gen_movsi_clr,
7049    gen_movdi_clr,
7050    gen_movsf_clr,
7051    gen_movdf_clr,
7052    gen_movxf_clr,
7053    gen_movti_clr,
7054    gen_zero_extendqidi2_clr,
7055    gen_zero_extendhidi2_clr,
7056    gen_zero_extendsidi2_clr,
7057
7058    gen_speculation_check_bi,
7059    gen_speculation_check_qi,
7060    gen_speculation_check_hi,
7061    gen_speculation_check_si,
7062    gen_speculation_check_di,
7063    gen_speculation_check_sf,
7064    gen_speculation_check_df,
7065    gen_speculation_check_xf,
7066    gen_speculation_check_ti,
7067    gen_speculation_check_di,
7068    gen_speculation_check_di,
7069    gen_speculation_check_di,
7070
7071    gen_movbi_clr,
7072    gen_movqi_clr,
7073    gen_movhi_clr,
7074    gen_movsi_clr,
7075    gen_movdi_clr,
7076    gen_movsf_clr,
7077    gen_movdf_clr,
7078    gen_movxf_clr,
7079    gen_movti_clr,
7080    gen_zero_extendqidi2_clr,
7081    gen_zero_extendhidi2_clr,
7082    gen_zero_extendsidi2_clr,
7083
7084    gen_movbi_clr,
7085    gen_movqi_clr,
7086    gen_movhi_clr,
7087    gen_movsi_clr,
7088    gen_movdi_clr,
7089    gen_movsf_clr,
7090    gen_movdf_clr,
7091    gen_movxf_clr,
7092    gen_movti_clr,
7093    gen_zero_extendqidi2_clr,
7094    gen_zero_extendhidi2_clr,
7095    gen_zero_extendsidi2_clr,
7096
7097    gen_advanced_load_check_clr_bi,
7098    gen_advanced_load_check_clr_qi,
7099    gen_advanced_load_check_clr_hi,
7100    gen_advanced_load_check_clr_si,
7101    gen_advanced_load_check_clr_di,
7102    gen_advanced_load_check_clr_sf,
7103    gen_advanced_load_check_clr_df,
7104    gen_advanced_load_check_clr_xf,
7105    gen_advanced_load_check_clr_ti,
7106    gen_advanced_load_check_clr_di,
7107    gen_advanced_load_check_clr_di,
7108    gen_advanced_load_check_clr_di,
7109
7110    /* Following checks are generated during mutation.  */
7111    gen_advanced_load_check_clr_bi,
7112    gen_advanced_load_check_clr_qi,
7113    gen_advanced_load_check_clr_hi,
7114    gen_advanced_load_check_clr_si,
7115    gen_advanced_load_check_clr_di,
7116    gen_advanced_load_check_clr_sf,
7117    gen_advanced_load_check_clr_df,
7118    gen_advanced_load_check_clr_xf,
7119    gen_advanced_load_check_clr_ti,
7120    gen_advanced_load_check_clr_di,
7121    gen_advanced_load_check_clr_di,
7122    gen_advanced_load_check_clr_di,
7123
7124    0,0,0,0,0,0,0,0,0,0,0,0,
7125
7126    gen_advanced_load_check_clr_bi,
7127    gen_advanced_load_check_clr_qi,
7128    gen_advanced_load_check_clr_hi,
7129    gen_advanced_load_check_clr_si,
7130    gen_advanced_load_check_clr_di,
7131    gen_advanced_load_check_clr_sf,
7132    gen_advanced_load_check_clr_df,
7133    gen_advanced_load_check_clr_xf,
7134    gen_advanced_load_check_clr_ti,
7135    gen_advanced_load_check_clr_di,
7136    gen_advanced_load_check_clr_di,
7137    gen_advanced_load_check_clr_di,
7138
7139    gen_speculation_check_bi,
7140    gen_speculation_check_qi,
7141    gen_speculation_check_hi,
7142    gen_speculation_check_si,
7143    gen_speculation_check_di,
7144    gen_speculation_check_sf,
7145    gen_speculation_check_df,
7146    gen_speculation_check_xf,
7147    gen_speculation_check_ti,
7148    gen_speculation_check_di,
7149    gen_speculation_check_di,
7150    gen_speculation_check_di
7151  };
7152
7153  extract_insn_cached (insn);
7154
7155  if (label)
7156    {
7157      gcc_assert (mutate_p || ia64_needs_block_p (insn));
7158      op1 = label;
7159    }
7160  else
7161    {
7162      gcc_assert (!mutate_p && !ia64_needs_block_p (insn));
7163      op1 = copy_rtx (recog_data.operand[1]);
7164    }
7165
7166  if (mutate_p)
7167    /* INSN is ld.c.
7168       Find the speculation check number by searching for original
7169       speculative load in the RESOLVED_DEPS list of INSN.
7170       As long as patterns are unique for each instruction, this can be
7171       accomplished by matching ORIG_PAT fields.  */
7172    {
7173      rtx link;
7174      int check_no = 0;
7175      rtx orig_pat = ORIG_PAT (insn);
7176
7177      for (link = RESOLVED_DEPS (insn); link; link = XEXP (link, 1))
7178	{
7179	  rtx x = XEXP (link, 0);
7180
7181	  if (ORIG_PAT (x) == orig_pat)
7182	    check_no = spec_check_no[INSN_UID (x)];
7183	}
7184      gcc_assert (check_no);
7185
7186      spec_check_no[INSN_UID (insn)] = (check_no
7187					+ SPEC_GEN_CHECK_MUTATION_OFFSET);
7188    }
7189
7190  check_pat = (gen_check[spec_check_no[INSN_UID (insn)] - 1]
7191	       (copy_rtx (recog_data.operand[0]), op1));
7192
7193  pat = PATTERN (insn);
7194  if (GET_CODE (pat) == COND_EXEC)
7195    check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
7196				   check_pat);
7197
7198  return check_pat;
7199}
7200
7201/* Return nonzero, if X is branchy recovery check.  */
7202static int
7203ia64_spec_check_p (rtx x)
7204{
7205  x = PATTERN (x);
7206  if (GET_CODE (x) == COND_EXEC)
7207    x = COND_EXEC_CODE (x);
7208  if (GET_CODE (x) == SET)
7209    return ia64_spec_check_src_p (SET_SRC (x));
7210  return 0;
7211}
7212
7213/* Return nonzero, if SRC belongs to recovery check.  */
7214static int
7215ia64_spec_check_src_p (rtx src)
7216{
7217  if (GET_CODE (src) == IF_THEN_ELSE)
7218    {
7219      rtx t;
7220
7221      t = XEXP (src, 0);
7222      if (GET_CODE (t) == NE)
7223	{
7224	  t = XEXP (t, 0);
7225
7226	  if (GET_CODE (t) == UNSPEC)
7227	    {
7228	      int code;
7229
7230	      code = XINT (t, 1);
7231
7232	      if (code == UNSPEC_CHKACLR
7233		  || code == UNSPEC_CHKS
7234		  || code == UNSPEC_LDCCLR)
7235		{
7236		  gcc_assert (code != 0);
7237		  return code;
7238		}
7239	    }
7240	}
7241    }
7242  return 0;
7243}
7244
7245
7246/* The following page contains abstract data `bundle states' which are
7247   used for bundling insns (inserting nops and template generation).  */
7248
7249/* The following describes state of insn bundling.  */
7250
7251struct bundle_state
7252{
7253  /* Unique bundle state number to identify them in the debugging
7254     output  */
7255  int unique_num;
7256  rtx insn;     /* corresponding insn, NULL for the 1st and the last state  */
7257  /* number nops before and after the insn  */
7258  short before_nops_num, after_nops_num;
7259  int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
7260                   insn */
7261  int cost;     /* cost of the state in cycles */
7262  int accumulated_insns_num; /* number of all previous insns including
7263				nops.  L is considered as 2 insns */
7264  int branch_deviation; /* deviation of previous branches from 3rd slots  */
7265  struct bundle_state *next;  /* next state with the same insn_num  */
7266  struct bundle_state *originator; /* originator (previous insn state)  */
7267  /* All bundle states are in the following chain.  */
7268  struct bundle_state *allocated_states_chain;
7269  /* The DFA State after issuing the insn and the nops.  */
7270  state_t dfa_state;
7271};
7272
7273/* The following is map insn number to the corresponding bundle state.  */
7274
7275static struct bundle_state **index_to_bundle_states;
7276
7277/* The unique number of next bundle state.  */
7278
7279static int bundle_states_num;
7280
7281/* All allocated bundle states are in the following chain.  */
7282
7283static struct bundle_state *allocated_bundle_states_chain;
7284
7285/* All allocated but not used bundle states are in the following
7286   chain.  */
7287
7288static struct bundle_state *free_bundle_state_chain;
7289
7290
7291/* The following function returns a free bundle state.  */
7292
7293static struct bundle_state *
7294get_free_bundle_state (void)
7295{
7296  struct bundle_state *result;
7297
7298  if (free_bundle_state_chain != NULL)
7299    {
7300      result = free_bundle_state_chain;
7301      free_bundle_state_chain = result->next;
7302    }
7303  else
7304    {
7305      result = xmalloc (sizeof (struct bundle_state));
7306      result->dfa_state = xmalloc (dfa_state_size);
7307      result->allocated_states_chain = allocated_bundle_states_chain;
7308      allocated_bundle_states_chain = result;
7309    }
7310  result->unique_num = bundle_states_num++;
7311  return result;
7312
7313}
7314
7315/* The following function frees given bundle state.  */
7316
7317static void
7318free_bundle_state (struct bundle_state *state)
7319{
7320  state->next = free_bundle_state_chain;
7321  free_bundle_state_chain = state;
7322}
7323
7324/* Start work with abstract data `bundle states'.  */
7325
7326static void
7327initiate_bundle_states (void)
7328{
7329  bundle_states_num = 0;
7330  free_bundle_state_chain = NULL;
7331  allocated_bundle_states_chain = NULL;
7332}
7333
7334/* Finish work with abstract data `bundle states'.  */
7335
7336static void
7337finish_bundle_states (void)
7338{
7339  struct bundle_state *curr_state, *next_state;
7340
7341  for (curr_state = allocated_bundle_states_chain;
7342       curr_state != NULL;
7343       curr_state = next_state)
7344    {
7345      next_state = curr_state->allocated_states_chain;
7346      free (curr_state->dfa_state);
7347      free (curr_state);
7348    }
7349}
7350
7351/* Hash table of the bundle states.  The key is dfa_state and insn_num
7352   of the bundle states.  */
7353
7354static htab_t bundle_state_table;
7355
7356/* The function returns hash of BUNDLE_STATE.  */
7357
7358static unsigned
7359bundle_state_hash (const void *bundle_state)
7360{
7361  const struct bundle_state *state = (struct bundle_state *) bundle_state;
7362  unsigned result, i;
7363
7364  for (result = i = 0; i < dfa_state_size; i++)
7365    result += (((unsigned char *) state->dfa_state) [i]
7366	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
7367  return result + state->insn_num;
7368}
7369
7370/* The function returns nonzero if the bundle state keys are equal.  */
7371
7372static int
7373bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
7374{
7375  const struct bundle_state * state1 = (struct bundle_state *) bundle_state_1;
7376  const struct bundle_state * state2 = (struct bundle_state *) bundle_state_2;
7377
7378  return (state1->insn_num == state2->insn_num
7379	  && memcmp (state1->dfa_state, state2->dfa_state,
7380		     dfa_state_size) == 0);
7381}
7382
7383/* The function inserts the BUNDLE_STATE into the hash table.  The
7384   function returns nonzero if the bundle has been inserted into the
7385   table.  The table contains the best bundle state with given key.  */
7386
7387static int
7388insert_bundle_state (struct bundle_state *bundle_state)
7389{
7390  void **entry_ptr;
7391
7392  entry_ptr = htab_find_slot (bundle_state_table, bundle_state, 1);
7393  if (*entry_ptr == NULL)
7394    {
7395      bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
7396      index_to_bundle_states [bundle_state->insn_num] = bundle_state;
7397      *entry_ptr = (void *) bundle_state;
7398      return TRUE;
7399    }
7400  else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
7401	   || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
7402	       && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
7403		   > bundle_state->accumulated_insns_num
7404		   || (((struct bundle_state *)
7405			*entry_ptr)->accumulated_insns_num
7406		       == bundle_state->accumulated_insns_num
7407		       && ((struct bundle_state *)
7408			   *entry_ptr)->branch_deviation
7409		       > bundle_state->branch_deviation))))
7410
7411    {
7412      struct bundle_state temp;
7413
7414      temp = *(struct bundle_state *) *entry_ptr;
7415      *(struct bundle_state *) *entry_ptr = *bundle_state;
7416      ((struct bundle_state *) *entry_ptr)->next = temp.next;
7417      *bundle_state = temp;
7418    }
7419  return FALSE;
7420}
7421
7422/* Start work with the hash table.  */
7423
7424static void
7425initiate_bundle_state_table (void)
7426{
7427  bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
7428				    (htab_del) 0);
7429}
7430
7431/* Finish work with the hash table.  */
7432
7433static void
7434finish_bundle_state_table (void)
7435{
7436  htab_delete (bundle_state_table);
7437}
7438
7439
7440
7441/* The following variable is a insn `nop' used to check bundle states
7442   with different number of inserted nops.  */
7443
7444static rtx ia64_nop;
7445
7446/* The following function tries to issue NOPS_NUM nops for the current
7447   state without advancing processor cycle.  If it failed, the
7448   function returns FALSE and frees the current state.  */
7449
7450static int
7451try_issue_nops (struct bundle_state *curr_state, int nops_num)
7452{
7453  int i;
7454
7455  for (i = 0; i < nops_num; i++)
7456    if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
7457      {
7458	free_bundle_state (curr_state);
7459	return FALSE;
7460      }
7461  return TRUE;
7462}
7463
7464/* The following function tries to issue INSN for the current
7465   state without advancing processor cycle.  If it failed, the
7466   function returns FALSE and frees the current state.  */
7467
7468static int
7469try_issue_insn (struct bundle_state *curr_state, rtx insn)
7470{
7471  if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
7472    {
7473      free_bundle_state (curr_state);
7474      return FALSE;
7475    }
7476  return TRUE;
7477}
7478
7479/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
7480   starting with ORIGINATOR without advancing processor cycle.  If
7481   TRY_BUNDLE_END_P is TRUE, the function also/only (if
7482   ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
7483   If it was successful, the function creates new bundle state and
7484   insert into the hash table and into `index_to_bundle_states'.  */
7485
7486static void
7487issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
7488		     rtx insn, int try_bundle_end_p, int only_bundle_end_p)
7489{
7490  struct bundle_state *curr_state;
7491
7492  curr_state = get_free_bundle_state ();
7493  memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
7494  curr_state->insn = insn;
7495  curr_state->insn_num = originator->insn_num + 1;
7496  curr_state->cost = originator->cost;
7497  curr_state->originator = originator;
7498  curr_state->before_nops_num = before_nops_num;
7499  curr_state->after_nops_num = 0;
7500  curr_state->accumulated_insns_num
7501    = originator->accumulated_insns_num + before_nops_num;
7502  curr_state->branch_deviation = originator->branch_deviation;
7503  gcc_assert (insn);
7504  if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
7505    {
7506      gcc_assert (GET_MODE (insn) != TImode);
7507      if (!try_issue_nops (curr_state, before_nops_num))
7508	return;
7509      if (!try_issue_insn (curr_state, insn))
7510	return;
7511      memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
7512      if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
7513	  && curr_state->accumulated_insns_num % 3 != 0)
7514	{
7515	  free_bundle_state (curr_state);
7516	  return;
7517	}
7518    }
7519  else if (GET_MODE (insn) != TImode)
7520    {
7521      if (!try_issue_nops (curr_state, before_nops_num))
7522	return;
7523      if (!try_issue_insn (curr_state, insn))
7524	return;
7525      curr_state->accumulated_insns_num++;
7526      gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT
7527		  && asm_noperands (PATTERN (insn)) < 0);
7528
7529      if (ia64_safe_type (insn) == TYPE_L)
7530	curr_state->accumulated_insns_num++;
7531    }
7532  else
7533    {
7534      /* If this is an insn that must be first in a group, then don't allow
7535	 nops to be emitted before it.  Currently, alloc is the only such
7536	 supported instruction.  */
7537      /* ??? The bundling automatons should handle this for us, but they do
7538	 not yet have support for the first_insn attribute.  */
7539      if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
7540	{
7541	  free_bundle_state (curr_state);
7542	  return;
7543	}
7544
7545      state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
7546      state_transition (curr_state->dfa_state, NULL);
7547      curr_state->cost++;
7548      if (!try_issue_nops (curr_state, before_nops_num))
7549	return;
7550      if (!try_issue_insn (curr_state, insn))
7551	return;
7552      curr_state->accumulated_insns_num++;
7553      if (GET_CODE (PATTERN (insn)) == ASM_INPUT
7554	  || asm_noperands (PATTERN (insn)) >= 0)
7555	{
7556	  /* Finish bundle containing asm insn.  */
7557	  curr_state->after_nops_num
7558	    = 3 - curr_state->accumulated_insns_num % 3;
7559	  curr_state->accumulated_insns_num
7560	    += 3 - curr_state->accumulated_insns_num % 3;
7561	}
7562      else if (ia64_safe_type (insn) == TYPE_L)
7563	curr_state->accumulated_insns_num++;
7564    }
7565  if (ia64_safe_type (insn) == TYPE_B)
7566    curr_state->branch_deviation
7567      += 2 - (curr_state->accumulated_insns_num - 1) % 3;
7568  if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
7569    {
7570      if (!only_bundle_end_p && insert_bundle_state (curr_state))
7571	{
7572	  state_t dfa_state;
7573	  struct bundle_state *curr_state1;
7574	  struct bundle_state *allocated_states_chain;
7575
7576	  curr_state1 = get_free_bundle_state ();
7577	  dfa_state = curr_state1->dfa_state;
7578	  allocated_states_chain = curr_state1->allocated_states_chain;
7579	  *curr_state1 = *curr_state;
7580	  curr_state1->dfa_state = dfa_state;
7581	  curr_state1->allocated_states_chain = allocated_states_chain;
7582	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
7583		  dfa_state_size);
7584	  curr_state = curr_state1;
7585	}
7586      if (!try_issue_nops (curr_state,
7587			   3 - curr_state->accumulated_insns_num % 3))
7588	return;
7589      curr_state->after_nops_num
7590	= 3 - curr_state->accumulated_insns_num % 3;
7591      curr_state->accumulated_insns_num
7592	+= 3 - curr_state->accumulated_insns_num % 3;
7593    }
7594  if (!insert_bundle_state (curr_state))
7595    free_bundle_state (curr_state);
7596  return;
7597}
7598
7599/* The following function returns position in the two window bundle
7600   for given STATE.  */
7601
7602static int
7603get_max_pos (state_t state)
7604{
7605  if (cpu_unit_reservation_p (state, pos_6))
7606    return 6;
7607  else if (cpu_unit_reservation_p (state, pos_5))
7608    return 5;
7609  else if (cpu_unit_reservation_p (state, pos_4))
7610    return 4;
7611  else if (cpu_unit_reservation_p (state, pos_3))
7612    return 3;
7613  else if (cpu_unit_reservation_p (state, pos_2))
7614    return 2;
7615  else if (cpu_unit_reservation_p (state, pos_1))
7616    return 1;
7617  else
7618    return 0;
7619}
7620
7621/* The function returns code of a possible template for given position
7622   and state.  The function should be called only with 2 values of
7623   position equal to 3 or 6.  We avoid generating F NOPs by putting
7624   templates containing F insns at the end of the template search
7625   because undocumented anomaly in McKinley derived cores which can
7626   cause stalls if an F-unit insn (including a NOP) is issued within a
7627   six-cycle window after reading certain application registers (such
7628   as ar.bsp).  Furthermore, power-considerations also argue against
7629   the use of F-unit instructions unless they're really needed.  */
7630
7631static int
7632get_template (state_t state, int pos)
7633{
7634  switch (pos)
7635    {
7636    case 3:
7637      if (cpu_unit_reservation_p (state, _0mmi_))
7638	return 1;
7639      else if (cpu_unit_reservation_p (state, _0mii_))
7640	return 0;
7641      else if (cpu_unit_reservation_p (state, _0mmb_))
7642	return 7;
7643      else if (cpu_unit_reservation_p (state, _0mib_))
7644	return 6;
7645      else if (cpu_unit_reservation_p (state, _0mbb_))
7646	return 5;
7647      else if (cpu_unit_reservation_p (state, _0bbb_))
7648	return 4;
7649      else if (cpu_unit_reservation_p (state, _0mmf_))
7650	return 3;
7651      else if (cpu_unit_reservation_p (state, _0mfi_))
7652	return 2;
7653      else if (cpu_unit_reservation_p (state, _0mfb_))
7654	return 8;
7655      else if (cpu_unit_reservation_p (state, _0mlx_))
7656	return 9;
7657      else
7658	gcc_unreachable ();
7659    case 6:
7660      if (cpu_unit_reservation_p (state, _1mmi_))
7661	return 1;
7662      else if (cpu_unit_reservation_p (state, _1mii_))
7663	return 0;
7664      else if (cpu_unit_reservation_p (state, _1mmb_))
7665	return 7;
7666      else if (cpu_unit_reservation_p (state, _1mib_))
7667	return 6;
7668      else if (cpu_unit_reservation_p (state, _1mbb_))
7669	return 5;
7670      else if (cpu_unit_reservation_p (state, _1bbb_))
7671	return 4;
7672      else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
7673	return 3;
7674      else if (cpu_unit_reservation_p (state, _1mfi_))
7675	return 2;
7676      else if (cpu_unit_reservation_p (state, _1mfb_))
7677	return 8;
7678      else if (cpu_unit_reservation_p (state, _1mlx_))
7679	return 9;
7680      else
7681	gcc_unreachable ();
7682    default:
7683      gcc_unreachable ();
7684    }
7685}
7686
7687/* The following function returns an insn important for insn bundling
7688   followed by INSN and before TAIL.  */
7689
7690static rtx
7691get_next_important_insn (rtx insn, rtx tail)
7692{
7693  for (; insn && insn != tail; insn = NEXT_INSN (insn))
7694    if (INSN_P (insn)
7695	&& ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7696	&& GET_CODE (PATTERN (insn)) != USE
7697	&& GET_CODE (PATTERN (insn)) != CLOBBER)
7698      return insn;
7699  return NULL_RTX;
7700}
7701
7702/* Add a bundle selector TEMPLATE0 before INSN.  */
7703
7704static void
7705ia64_add_bundle_selector_before (int template0, rtx insn)
7706{
7707  rtx b = gen_bundle_selector (GEN_INT (template0));
7708
7709  ia64_emit_insn_before (b, insn);
7710#if NR_BUNDLES == 10
7711  if ((template0 == 4 || template0 == 5)
7712      && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
7713    {
7714      int i;
7715      rtx note = NULL_RTX;
7716
7717      /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
7718	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
7719	 to following nops, as br.call sets rp to the address of following
7720	 bundle and therefore an EH region end must be on a bundle
7721	 boundary.  */
7722      insn = PREV_INSN (insn);
7723      for (i = 0; i < 3; i++)
7724	{
7725	  do
7726	    insn = next_active_insn (insn);
7727	  while (GET_CODE (insn) == INSN
7728		 && get_attr_empty (insn) == EMPTY_YES);
7729	  if (GET_CODE (insn) == CALL_INSN)
7730	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
7731	  else if (note)
7732	    {
7733	      int code;
7734
7735	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
7736			  || code == CODE_FOR_nop_b);
7737	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
7738		note = NULL_RTX;
7739	      else
7740		REG_NOTES (insn)
7741		  = gen_rtx_EXPR_LIST (REG_EH_REGION, XEXP (note, 0),
7742				       REG_NOTES (insn));
7743	    }
7744	}
7745    }
7746#endif
7747}
7748
7749/* The following function does insn bundling.  Bundling means
7750   inserting templates and nop insns to fit insn groups into permitted
7751   templates.  Instruction scheduling uses NDFA (non-deterministic
7752   finite automata) encoding informations about the templates and the
7753   inserted nops.  Nondeterminism of the automata permits follows
7754   all possible insn sequences very fast.
7755
7756   Unfortunately it is not possible to get information about inserting
7757   nop insns and used templates from the automata states.  The
7758   automata only says that we can issue an insn possibly inserting
7759   some nops before it and using some template.  Therefore insn
7760   bundling in this function is implemented by using DFA
7761   (deterministic finite automata).  We follow all possible insn
7762   sequences by inserting 0-2 nops (that is what the NDFA describe for
7763   insn scheduling) before/after each insn being bundled.  We know the
7764   start of simulated processor cycle from insn scheduling (insn
7765   starting a new cycle has TImode).
7766
7767   Simple implementation of insn bundling would create enormous
7768   number of possible insn sequences satisfying information about new
7769   cycle ticks taken from the insn scheduling.  To make the algorithm
7770   practical we use dynamic programming.  Each decision (about
7771   inserting nops and implicitly about previous decisions) is described
7772   by structure bundle_state (see above).  If we generate the same
7773   bundle state (key is automaton state after issuing the insns and
7774   nops for it), we reuse already generated one.  As consequence we
7775   reject some decisions which cannot improve the solution and
7776   reduce memory for the algorithm.
7777
7778   When we reach the end of EBB (extended basic block), we choose the
7779   best sequence and then, moving back in EBB, insert templates for
7780   the best alternative.  The templates are taken from querying
7781   automaton state for each insn in chosen bundle states.
7782
7783   So the algorithm makes two (forward and backward) passes through
7784   EBB.  There is an additional forward pass through EBB for Itanium1
7785   processor.  This pass inserts more nops to make dependency between
7786   a producer insn and MMMUL/MMSHF at least 4 cycles long.  */
7787
7788static void
7789bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail)
7790{
7791  struct bundle_state *curr_state, *next_state, *best_state;
7792  rtx insn, next_insn;
7793  int insn_num;
7794  int i, bundle_end_p, only_bundle_end_p, asm_p;
7795  int pos = 0, max_pos, template0, template1;
7796  rtx b;
7797  rtx nop;
7798  enum attr_type type;
7799
7800  insn_num = 0;
7801  /* Count insns in the EBB.  */
7802  for (insn = NEXT_INSN (prev_head_insn);
7803       insn && insn != tail;
7804       insn = NEXT_INSN (insn))
7805    if (INSN_P (insn))
7806      insn_num++;
7807  if (insn_num == 0)
7808    return;
7809  bundling_p = 1;
7810  dfa_clean_insn_cache ();
7811  initiate_bundle_state_table ();
7812  index_to_bundle_states = xmalloc ((insn_num + 2)
7813				    * sizeof (struct bundle_state *));
7814  /* First (forward) pass -- generation of bundle states.  */
7815  curr_state = get_free_bundle_state ();
7816  curr_state->insn = NULL;
7817  curr_state->before_nops_num = 0;
7818  curr_state->after_nops_num = 0;
7819  curr_state->insn_num = 0;
7820  curr_state->cost = 0;
7821  curr_state->accumulated_insns_num = 0;
7822  curr_state->branch_deviation = 0;
7823  curr_state->next = NULL;
7824  curr_state->originator = NULL;
7825  state_reset (curr_state->dfa_state);
7826  index_to_bundle_states [0] = curr_state;
7827  insn_num = 0;
7828  /* Shift cycle mark if it is put on insn which could be ignored.  */
7829  for (insn = NEXT_INSN (prev_head_insn);
7830       insn != tail;
7831       insn = NEXT_INSN (insn))
7832    if (INSN_P (insn)
7833	&& (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE
7834	    || GET_CODE (PATTERN (insn)) == USE
7835	    || GET_CODE (PATTERN (insn)) == CLOBBER)
7836	&& GET_MODE (insn) == TImode)
7837      {
7838	PUT_MODE (insn, VOIDmode);
7839	for (next_insn = NEXT_INSN (insn);
7840	     next_insn != tail;
7841	     next_insn = NEXT_INSN (next_insn))
7842	  if (INSN_P (next_insn)
7843	      && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE
7844	      && GET_CODE (PATTERN (next_insn)) != USE
7845	      && GET_CODE (PATTERN (next_insn)) != CLOBBER)
7846	    {
7847	      PUT_MODE (next_insn, TImode);
7848	      break;
7849	    }
7850      }
7851  /* Forward pass: generation of bundle states.  */
7852  for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
7853       insn != NULL_RTX;
7854       insn = next_insn)
7855    {
7856      gcc_assert (INSN_P (insn)
7857		  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
7858		  && GET_CODE (PATTERN (insn)) != USE
7859		  && GET_CODE (PATTERN (insn)) != CLOBBER);
7860      type = ia64_safe_type (insn);
7861      next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
7862      insn_num++;
7863      index_to_bundle_states [insn_num] = NULL;
7864      for (curr_state = index_to_bundle_states [insn_num - 1];
7865	   curr_state != NULL;
7866	   curr_state = next_state)
7867	{
7868	  pos = curr_state->accumulated_insns_num % 3;
7869	  next_state = curr_state->next;
7870	  /* We must fill up the current bundle in order to start a
7871	     subsequent asm insn in a new bundle.  Asm insn is always
7872	     placed in a separate bundle.  */
7873	  only_bundle_end_p
7874	    = (next_insn != NULL_RTX
7875	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
7876	       && ia64_safe_type (next_insn) == TYPE_UNKNOWN);
7877	  /* We may fill up the current bundle if it is the cycle end
7878	     without a group barrier.  */
7879	  bundle_end_p
7880	    = (only_bundle_end_p || next_insn == NULL_RTX
7881	       || (GET_MODE (next_insn) == TImode
7882		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
7883	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
7884	      || type == TYPE_S
7885	      /* We need to insert 2 nops for cases like M_MII.  To
7886		 guarantee issuing all insns on the same cycle for
7887		 Itanium 1, we need to issue 2 nops after the first M
7888		 insn (MnnMII where n is a nop insn).  */
7889	      || ((type == TYPE_M || type == TYPE_A)
7890		  && ia64_tune == PROCESSOR_ITANIUM
7891		  && !bundle_end_p && pos == 1))
7892	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
7893				 only_bundle_end_p);
7894	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
7895			       only_bundle_end_p);
7896	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
7897			       only_bundle_end_p);
7898	}
7899      gcc_assert (index_to_bundle_states [insn_num]);
7900      for (curr_state = index_to_bundle_states [insn_num];
7901	   curr_state != NULL;
7902	   curr_state = curr_state->next)
7903	if (verbose >= 2 && dump)
7904	  {
7905	    /* This structure is taken from generated code of the
7906	       pipeline hazard recognizer (see file insn-attrtab.c).
7907	       Please don't forget to change the structure if a new
7908	       automaton is added to .md file.  */
7909	    struct DFA_chip
7910	    {
7911	      unsigned short one_automaton_state;
7912	      unsigned short oneb_automaton_state;
7913	      unsigned short two_automaton_state;
7914	      unsigned short twob_automaton_state;
7915	    };
7916
7917	    fprintf
7918	      (dump,
7919	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7920	       curr_state->unique_num,
7921	       (curr_state->originator == NULL
7922		? -1 : curr_state->originator->unique_num),
7923	       curr_state->cost,
7924	       curr_state->before_nops_num, curr_state->after_nops_num,
7925	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
7926	       (ia64_tune == PROCESSOR_ITANIUM
7927		? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7928		: ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7929	       INSN_UID (insn));
7930	  }
7931    }
7932
7933  /* We should find a solution because the 2nd insn scheduling has
7934     found one.  */
7935  gcc_assert (index_to_bundle_states [insn_num]);
7936  /* Find a state corresponding to the best insn sequence.  */
7937  best_state = NULL;
7938  for (curr_state = index_to_bundle_states [insn_num];
7939       curr_state != NULL;
7940       curr_state = curr_state->next)
7941    /* We are just looking at the states with fully filled up last
7942       bundle.  The first we prefer insn sequences with minimal cost
7943       then with minimal inserted nops and finally with branch insns
7944       placed in the 3rd slots.  */
7945    if (curr_state->accumulated_insns_num % 3 == 0
7946	&& (best_state == NULL || best_state->cost > curr_state->cost
7947	    || (best_state->cost == curr_state->cost
7948		&& (curr_state->accumulated_insns_num
7949		    < best_state->accumulated_insns_num
7950		    || (curr_state->accumulated_insns_num
7951			== best_state->accumulated_insns_num
7952			&& curr_state->branch_deviation
7953			< best_state->branch_deviation)))))
7954      best_state = curr_state;
7955  /* Second (backward) pass: adding nops and templates.  */
7956  insn_num = best_state->before_nops_num;
7957  template0 = template1 = -1;
7958  for (curr_state = best_state;
7959       curr_state->originator != NULL;
7960       curr_state = curr_state->originator)
7961    {
7962      insn = curr_state->insn;
7963      asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT
7964	       || asm_noperands (PATTERN (insn)) >= 0);
7965      insn_num++;
7966      if (verbose >= 2 && dump)
7967	{
7968	  struct DFA_chip
7969	  {
7970	    unsigned short one_automaton_state;
7971	    unsigned short oneb_automaton_state;
7972	    unsigned short two_automaton_state;
7973	    unsigned short twob_automaton_state;
7974	  };
7975
7976	  fprintf
7977	    (dump,
7978	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, state %d) for %d\n",
7979	     curr_state->unique_num,
7980	     (curr_state->originator == NULL
7981	      ? -1 : curr_state->originator->unique_num),
7982	     curr_state->cost,
7983	     curr_state->before_nops_num, curr_state->after_nops_num,
7984	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
7985	     (ia64_tune == PROCESSOR_ITANIUM
7986	      ? ((struct DFA_chip *) curr_state->dfa_state)->oneb_automaton_state
7987	      : ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state),
7988	     INSN_UID (insn));
7989	}
7990      /* Find the position in the current bundle window.  The window can
7991	 contain at most two bundles.  Two bundle window means that
7992	 the processor will make two bundle rotation.  */
7993      max_pos = get_max_pos (curr_state->dfa_state);
7994      if (max_pos == 6
7995	  /* The following (negative template number) means that the
7996	     processor did one bundle rotation.  */
7997	  || (max_pos == 3 && template0 < 0))
7998	{
7999	  /* We are at the end of the window -- find template(s) for
8000	     its bundle(s).  */
8001	  pos = max_pos;
8002	  if (max_pos == 3)
8003	    template0 = get_template (curr_state->dfa_state, 3);
8004	  else
8005	    {
8006	      template1 = get_template (curr_state->dfa_state, 3);
8007	      template0 = get_template (curr_state->dfa_state, 6);
8008	    }
8009	}
8010      if (max_pos > 3 && template1 < 0)
8011	/* It may happen when we have the stop inside a bundle.  */
8012	{
8013	  gcc_assert (pos <= 3);
8014	  template1 = get_template (curr_state->dfa_state, 3);
8015	  pos += 3;
8016	}
8017      if (!asm_p)
8018	/* Emit nops after the current insn.  */
8019	for (i = 0; i < curr_state->after_nops_num; i++)
8020	  {
8021	    nop = gen_nop ();
8022	    emit_insn_after (nop, insn);
8023	    pos--;
8024	    gcc_assert (pos >= 0);
8025	    if (pos % 3 == 0)
8026	      {
8027		/* We are at the start of a bundle: emit the template
8028		   (it should be defined).  */
8029		gcc_assert (template0 >= 0);
8030		ia64_add_bundle_selector_before (template0, nop);
8031		/* If we have two bundle window, we make one bundle
8032		   rotation.  Otherwise template0 will be undefined
8033		   (negative value).  */
8034		template0 = template1;
8035		template1 = -1;
8036	      }
8037	  }
8038      /* Move the position backward in the window.  Group barrier has
8039	 no slot.  Asm insn takes all bundle.  */
8040      if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8041	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8042	  && asm_noperands (PATTERN (insn)) < 0)
8043	pos--;
8044      /* Long insn takes 2 slots.  */
8045      if (ia64_safe_type (insn) == TYPE_L)
8046	pos--;
8047      gcc_assert (pos >= 0);
8048      if (pos % 3 == 0
8049	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
8050	  && GET_CODE (PATTERN (insn)) != ASM_INPUT
8051	  && asm_noperands (PATTERN (insn)) < 0)
8052	{
8053	  /* The current insn is at the bundle start: emit the
8054	     template.  */
8055	  gcc_assert (template0 >= 0);
8056	  ia64_add_bundle_selector_before (template0, insn);
8057	  b = PREV_INSN (insn);
8058	  insn = b;
8059	  /* See comment above in analogous place for emitting nops
8060	     after the insn.  */
8061	  template0 = template1;
8062	  template1 = -1;
8063	}
8064      /* Emit nops after the current insn.  */
8065      for (i = 0; i < curr_state->before_nops_num; i++)
8066	{
8067	  nop = gen_nop ();
8068	  ia64_emit_insn_before (nop, insn);
8069	  nop = PREV_INSN (insn);
8070	  insn = nop;
8071	  pos--;
8072	  gcc_assert (pos >= 0);
8073	  if (pos % 3 == 0)
8074	    {
8075	      /* See comment above in analogous place for emitting nops
8076		 after the insn.  */
8077	      gcc_assert (template0 >= 0);
8078	      ia64_add_bundle_selector_before (template0, insn);
8079	      b = PREV_INSN (insn);
8080	      insn = b;
8081	      template0 = template1;
8082	      template1 = -1;
8083	    }
8084	}
8085    }
8086  if (ia64_tune == PROCESSOR_ITANIUM)
8087    /* Insert additional cycles for MM-insns (MMMUL and MMSHF).
8088       Itanium1 has a strange design, if the distance between an insn
8089       and dependent MM-insn is less 4 then we have a 6 additional
8090       cycles stall.  So we make the distance equal to 4 cycles if it
8091       is less.  */
8092    for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
8093	 insn != NULL_RTX;
8094	 insn = next_insn)
8095      {
8096	gcc_assert (INSN_P (insn)
8097		    && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
8098		    && GET_CODE (PATTERN (insn)) != USE
8099		    && GET_CODE (PATTERN (insn)) != CLOBBER);
8100	next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
8101	if (INSN_UID (insn) < clocks_length && add_cycles [INSN_UID (insn)])
8102	  /* We found a MM-insn which needs additional cycles.  */
8103	  {
8104	    rtx last;
8105	    int i, j, n;
8106	    int pred_stop_p;
8107
8108	    /* Now we are searching for a template of the bundle in
8109	       which the MM-insn is placed and the position of the
8110	       insn in the bundle (0, 1, 2).  Also we are searching
8111	       for that there is a stop before the insn.  */
8112	    last = prev_active_insn (insn);
8113	    pred_stop_p = recog_memoized (last) == CODE_FOR_insn_group_barrier;
8114	    if (pred_stop_p)
8115	      last = prev_active_insn (last);
8116	    n = 0;
8117	    for (;; last = prev_active_insn (last))
8118	      if (recog_memoized (last) == CODE_FOR_bundle_selector)
8119		{
8120		  template0 = XINT (XVECEXP (PATTERN (last), 0, 0), 0);
8121		  if (template0 == 9)
8122		    /* The insn is in MLX bundle.  Change the template
8123		       onto MFI because we will add nops before the
8124		       insn.  It simplifies subsequent code a lot.  */
8125		    PATTERN (last)
8126		      = gen_bundle_selector (const2_rtx); /* -> MFI */
8127		  break;
8128		}
8129	      else if (recog_memoized (last) != CODE_FOR_insn_group_barrier
8130		       && (ia64_safe_itanium_class (last)
8131			   != ITANIUM_CLASS_IGNORE))
8132		n++;
8133	    /* Some check of correctness: the stop is not at the
8134	       bundle start, there are no more 3 insns in the bundle,
8135	       and the MM-insn is not at the start of bundle with
8136	       template MLX.  */
8137	    gcc_assert ((!pred_stop_p || n)
8138			&& n <= 2
8139			&& (template0 != 9 || !n));
8140	    /* Put nops after the insn in the bundle.  */
8141	    for (j = 3 - n; j > 0; j --)
8142	      ia64_emit_insn_before (gen_nop (), insn);
8143	    /* It takes into account that we will add more N nops
8144	       before the insn lately -- please see code below.  */
8145	    add_cycles [INSN_UID (insn)]--;
8146	    if (!pred_stop_p || add_cycles [INSN_UID (insn)])
8147	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8148				     insn);
8149	    if (pred_stop_p)
8150	      add_cycles [INSN_UID (insn)]--;
8151	    for (i = add_cycles [INSN_UID (insn)]; i > 0; i--)
8152	      {
8153		/* Insert "MII;" template.  */
8154		ia64_emit_insn_before (gen_bundle_selector (const0_rtx),
8155				       insn);
8156		ia64_emit_insn_before (gen_nop (), insn);
8157		ia64_emit_insn_before (gen_nop (), insn);
8158		if (i > 1)
8159		  {
8160		    /* To decrease code size, we use "MI;I;"
8161		       template.  */
8162		    ia64_emit_insn_before
8163		      (gen_insn_group_barrier (GEN_INT (3)), insn);
8164		    i--;
8165		  }
8166		ia64_emit_insn_before (gen_nop (), insn);
8167		ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8168				       insn);
8169	      }
8170	    /* Put the MM-insn in the same slot of a bundle with the
8171	       same template as the original one.  */
8172	    ia64_add_bundle_selector_before (template0, insn);
8173	    /* To put the insn in the same slot, add necessary number
8174	       of nops.  */
8175	    for (j = n; j > 0; j --)
8176	      ia64_emit_insn_before (gen_nop (), insn);
8177	    /* Put the stop if the original bundle had it.  */
8178	    if (pred_stop_p)
8179	      ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8180				     insn);
8181	  }
8182      }
8183  free (index_to_bundle_states);
8184  finish_bundle_state_table ();
8185  bundling_p = 0;
8186  dfa_clean_insn_cache ();
8187}
8188
8189/* The following function is called at the end of scheduling BB or
8190   EBB.  After reload, it inserts stop bits and does insn bundling.  */
8191
8192static void
8193ia64_sched_finish (FILE *dump, int sched_verbose)
8194{
8195  if (sched_verbose)
8196    fprintf (dump, "// Finishing schedule.\n");
8197  if (!reload_completed)
8198    return;
8199  if (reload_completed)
8200    {
8201      final_emit_insn_group_barriers (dump);
8202      bundling (dump, sched_verbose, current_sched_info->prev_head,
8203		current_sched_info->next_tail);
8204      if (sched_verbose && dump)
8205	fprintf (dump, "//    finishing %d-%d\n",
8206		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
8207		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
8208
8209      return;
8210    }
8211}
8212
8213/* The following function inserts stop bits in scheduled BB or EBB.  */
8214
8215static void
8216final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
8217{
8218  rtx insn;
8219  int need_barrier_p = 0;
8220  rtx prev_insn = NULL_RTX;
8221
8222  init_insn_group_barriers ();
8223
8224  for (insn = NEXT_INSN (current_sched_info->prev_head);
8225       insn != current_sched_info->next_tail;
8226       insn = NEXT_INSN (insn))
8227    {
8228      if (GET_CODE (insn) == BARRIER)
8229	{
8230	  rtx last = prev_active_insn (insn);
8231
8232	  if (! last)
8233	    continue;
8234	  if (GET_CODE (last) == JUMP_INSN
8235	      && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC)
8236	    last = prev_active_insn (last);
8237	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
8238	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
8239
8240	  init_insn_group_barriers ();
8241	  need_barrier_p = 0;
8242	  prev_insn = NULL_RTX;
8243	}
8244      else if (INSN_P (insn))
8245	{
8246	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
8247	    {
8248	      init_insn_group_barriers ();
8249	      need_barrier_p = 0;
8250	      prev_insn = NULL_RTX;
8251	    }
8252	  else if (need_barrier_p || group_barrier_needed (insn))
8253	    {
8254	      if (TARGET_EARLY_STOP_BITS)
8255		{
8256		  rtx last;
8257
8258		  for (last = insn;
8259		       last != current_sched_info->prev_head;
8260		       last = PREV_INSN (last))
8261		    if (INSN_P (last) && GET_MODE (last) == TImode
8262			&& stops_p [INSN_UID (last)])
8263		      break;
8264		  if (last == current_sched_info->prev_head)
8265		    last = insn;
8266		  last = prev_active_insn (last);
8267		  if (last
8268		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
8269		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
8270				     last);
8271		  init_insn_group_barriers ();
8272		  for (last = NEXT_INSN (last);
8273		       last != insn;
8274		       last = NEXT_INSN (last))
8275		    if (INSN_P (last))
8276		      group_barrier_needed (last);
8277		}
8278	      else
8279		{
8280		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
8281				    insn);
8282		  init_insn_group_barriers ();
8283		}
8284	      group_barrier_needed (insn);
8285	      prev_insn = NULL_RTX;
8286	    }
8287	  else if (recog_memoized (insn) >= 0)
8288	    prev_insn = insn;
8289	  need_barrier_p = (GET_CODE (insn) == CALL_INSN
8290			    || GET_CODE (PATTERN (insn)) == ASM_INPUT
8291			    || asm_noperands (PATTERN (insn)) >= 0);
8292	}
8293    }
8294}
8295
8296
8297
8298/* If the following function returns TRUE, we will use the DFA
8299   insn scheduler.  */
8300
8301static int
8302ia64_first_cycle_multipass_dfa_lookahead (void)
8303{
8304  return (reload_completed ? 6 : 4);
8305}
8306
8307/* The following function initiates variable `dfa_pre_cycle_insn'.  */
8308
8309static void
8310ia64_init_dfa_pre_cycle_insn (void)
8311{
8312  if (temp_dfa_state == NULL)
8313    {
8314      dfa_state_size = state_size ();
8315      temp_dfa_state = xmalloc (dfa_state_size);
8316      prev_cycle_state = xmalloc (dfa_state_size);
8317    }
8318  dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
8319  PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
8320  recog_memoized (dfa_pre_cycle_insn);
8321  dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
8322  PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX;
8323  recog_memoized (dfa_stop_insn);
8324}
8325
8326/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
8327   used by the DFA insn scheduler.  */
8328
8329static rtx
8330ia64_dfa_pre_cycle_insn (void)
8331{
8332  return dfa_pre_cycle_insn;
8333}
8334
8335/* The following function returns TRUE if PRODUCER (of type ilog or
8336   ld) produces address for CONSUMER (of type st or stf). */
8337
8338int
8339ia64_st_address_bypass_p (rtx producer, rtx consumer)
8340{
8341  rtx dest, reg, mem;
8342
8343  gcc_assert (producer && consumer);
8344  dest = ia64_single_set (producer);
8345  gcc_assert (dest);
8346  reg = SET_DEST (dest);
8347  gcc_assert (reg);
8348  if (GET_CODE (reg) == SUBREG)
8349    reg = SUBREG_REG (reg);
8350  gcc_assert (GET_CODE (reg) == REG);
8351
8352  dest = ia64_single_set (consumer);
8353  gcc_assert (dest);
8354  mem = SET_DEST (dest);
8355  gcc_assert (mem && GET_CODE (mem) == MEM);
8356  return reg_mentioned_p (reg, mem);
8357}
8358
8359/* The following function returns TRUE if PRODUCER (of type ilog or
8360   ld) produces address for CONSUMER (of type ld or fld). */
8361
8362int
8363ia64_ld_address_bypass_p (rtx producer, rtx consumer)
8364{
8365  rtx dest, src, reg, mem;
8366
8367  gcc_assert (producer && consumer);
8368  dest = ia64_single_set (producer);
8369  gcc_assert (dest);
8370  reg = SET_DEST (dest);
8371  gcc_assert (reg);
8372  if (GET_CODE (reg) == SUBREG)
8373    reg = SUBREG_REG (reg);
8374  gcc_assert (GET_CODE (reg) == REG);
8375
8376  src = ia64_single_set (consumer);
8377  gcc_assert (src);
8378  mem = SET_SRC (src);
8379  gcc_assert (mem);
8380
8381  if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
8382    mem = XVECEXP (mem, 0, 0);
8383  else if (GET_CODE (mem) == IF_THEN_ELSE)
8384    /* ??? Is this bypass necessary for ld.c?  */
8385    {
8386      gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
8387      mem = XEXP (mem, 1);
8388    }
8389
8390  while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
8391    mem = XEXP (mem, 0);
8392
8393  if (GET_CODE (mem) == UNSPEC)
8394    {
8395      int c = XINT (mem, 1);
8396
8397      gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDSA);
8398      mem = XVECEXP (mem, 0, 0);
8399    }
8400
8401  /* Note that LO_SUM is used for GOT loads.  */
8402  gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
8403
8404  return reg_mentioned_p (reg, mem);
8405}
8406
8407/* The following function returns TRUE if INSN produces address for a
8408   load/store insn.  We will place such insns into M slot because it
8409   decreases its latency time.  */
8410
8411int
8412ia64_produce_address_p (rtx insn)
8413{
8414  return insn->call;
8415}
8416
8417
8418/* Emit pseudo-ops for the assembler to describe predicate relations.
8419   At present this assumes that we only consider predicate pairs to
8420   be mutex, and that the assembler can deduce proper values from
8421   straight-line code.  */
8422
8423static void
8424emit_predicate_relation_info (void)
8425{
8426  basic_block bb;
8427
8428  FOR_EACH_BB_REVERSE (bb)
8429    {
8430      int r;
8431      rtx head = BB_HEAD (bb);
8432
8433      /* We only need such notes at code labels.  */
8434      if (GET_CODE (head) != CODE_LABEL)
8435	continue;
8436      if (GET_CODE (NEXT_INSN (head)) == NOTE
8437	  && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK)
8438	head = NEXT_INSN (head);
8439
8440      /* Skip p0, which may be thought to be live due to (reg:DI p0)
8441	 grabbing the entire block of predicate registers.  */
8442      for (r = PR_REG (2); r < PR_REG (64); r += 2)
8443	if (REGNO_REG_SET_P (bb->il.rtl->global_live_at_start, r))
8444	  {
8445	    rtx p = gen_rtx_REG (BImode, r);
8446	    rtx n = emit_insn_after (gen_pred_rel_mutex (p), head);
8447	    if (head == BB_END (bb))
8448	      BB_END (bb) = n;
8449	    head = n;
8450	  }
8451    }
8452
8453  /* Look for conditional calls that do not return, and protect predicate
8454     relations around them.  Otherwise the assembler will assume the call
8455     returns, and complain about uses of call-clobbered predicates after
8456     the call.  */
8457  FOR_EACH_BB_REVERSE (bb)
8458    {
8459      rtx insn = BB_HEAD (bb);
8460
8461      while (1)
8462	{
8463	  if (GET_CODE (insn) == CALL_INSN
8464	      && GET_CODE (PATTERN (insn)) == COND_EXEC
8465	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
8466	    {
8467	      rtx b = emit_insn_before (gen_safe_across_calls_all (), insn);
8468	      rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn);
8469	      if (BB_HEAD (bb) == insn)
8470		BB_HEAD (bb) = b;
8471	      if (BB_END (bb) == insn)
8472		BB_END (bb) = a;
8473	    }
8474
8475	  if (insn == BB_END (bb))
8476	    break;
8477	  insn = NEXT_INSN (insn);
8478	}
8479    }
8480}
8481
8482/* Perform machine dependent operations on the rtl chain INSNS.  */
8483
8484static void
8485ia64_reorg (void)
8486{
8487  /* We are freeing block_for_insn in the toplev to keep compatibility
8488     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
8489  compute_bb_for_insn ();
8490
8491  /* If optimizing, we'll have split before scheduling.  */
8492  if (optimize == 0)
8493    split_all_insns (0);
8494
8495  /* ??? update_life_info_in_dirty_blocks fails to terminate during
8496     non-optimizing bootstrap.  */
8497  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES);
8498
8499  if (optimize && ia64_flag_schedule_insns2)
8500    {
8501      timevar_push (TV_SCHED2);
8502      ia64_final_schedule = 1;
8503
8504      initiate_bundle_states ();
8505      ia64_nop = make_insn_raw (gen_nop ());
8506      PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX;
8507      recog_memoized (ia64_nop);
8508      clocks_length = get_max_uid () + 1;
8509      stops_p = xcalloc (1, clocks_length);
8510      if (ia64_tune == PROCESSOR_ITANIUM)
8511	{
8512	  clocks = xcalloc (clocks_length, sizeof (int));
8513	  add_cycles = xcalloc (clocks_length, sizeof (int));
8514	}
8515      if (ia64_tune == PROCESSOR_ITANIUM2)
8516	{
8517	  pos_1 = get_cpu_unit_code ("2_1");
8518	  pos_2 = get_cpu_unit_code ("2_2");
8519	  pos_3 = get_cpu_unit_code ("2_3");
8520	  pos_4 = get_cpu_unit_code ("2_4");
8521	  pos_5 = get_cpu_unit_code ("2_5");
8522	  pos_6 = get_cpu_unit_code ("2_6");
8523	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
8524	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
8525	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
8526	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
8527	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
8528	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
8529	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
8530	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
8531	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
8532	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
8533	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
8534	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
8535	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
8536	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
8537	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
8538	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
8539	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
8540	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
8541	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
8542	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
8543	}
8544      else
8545	{
8546	  pos_1 = get_cpu_unit_code ("1_1");
8547	  pos_2 = get_cpu_unit_code ("1_2");
8548	  pos_3 = get_cpu_unit_code ("1_3");
8549	  pos_4 = get_cpu_unit_code ("1_4");
8550	  pos_5 = get_cpu_unit_code ("1_5");
8551	  pos_6 = get_cpu_unit_code ("1_6");
8552	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
8553	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
8554	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
8555	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
8556	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
8557	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
8558	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
8559	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
8560	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
8561	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
8562	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
8563	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
8564	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
8565	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
8566	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
8567	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
8568	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
8569	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
8570	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
8571	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
8572	}
8573      schedule_ebbs ();
8574      finish_bundle_states ();
8575      if (ia64_tune == PROCESSOR_ITANIUM)
8576	{
8577	  free (add_cycles);
8578	  free (clocks);
8579	}
8580      free (stops_p);
8581      stops_p = NULL;
8582      emit_insn_group_barriers (dump_file);
8583
8584      ia64_final_schedule = 0;
8585      timevar_pop (TV_SCHED2);
8586    }
8587  else
8588    emit_all_insn_group_barriers (dump_file);
8589
8590  /* A call must not be the last instruction in a function, so that the
8591     return address is still within the function, so that unwinding works
8592     properly.  Note that IA-64 differs from dwarf2 on this point.  */
8593  if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))
8594    {
8595      rtx insn;
8596      int saw_stop = 0;
8597
8598      insn = get_last_insn ();
8599      if (! INSN_P (insn))
8600        insn = prev_active_insn (insn);
8601      /* Skip over insns that expand to nothing.  */
8602      while (GET_CODE (insn) == INSN && get_attr_empty (insn) == EMPTY_YES)
8603        {
8604	  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
8605	      && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
8606	    saw_stop = 1;
8607	  insn = prev_active_insn (insn);
8608	}
8609      if (GET_CODE (insn) == CALL_INSN)
8610	{
8611	  if (! saw_stop)
8612	    emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8613	  emit_insn (gen_break_f ());
8614	  emit_insn (gen_insn_group_barrier (GEN_INT (3)));
8615	}
8616    }
8617
8618  emit_predicate_relation_info ();
8619
8620  if (ia64_flag_var_tracking)
8621    {
8622      timevar_push (TV_VAR_TRACKING);
8623      variable_tracking_main ();
8624      timevar_pop (TV_VAR_TRACKING);
8625    }
8626}
8627
8628/* Return true if REGNO is used by the epilogue.  */
8629
8630int
8631ia64_epilogue_uses (int regno)
8632{
8633  switch (regno)
8634    {
8635    case R_GR (1):
8636      /* With a call to a function in another module, we will write a new
8637	 value to "gp".  After returning from such a call, we need to make
8638	 sure the function restores the original gp-value, even if the
8639	 function itself does not use the gp anymore.  */
8640      return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
8641
8642    case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
8643    case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
8644      /* For functions defined with the syscall_linkage attribute, all
8645	 input registers are marked as live at all function exits.  This
8646	 prevents the register allocator from using the input registers,
8647	 which in turn makes it possible to restart a system call after
8648	 an interrupt without having to save/restore the input registers.
8649	 This also prevents kernel data from leaking to application code.  */
8650      return lookup_attribute ("syscall_linkage",
8651	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
8652
8653    case R_BR (0):
8654      /* Conditional return patterns can't represent the use of `b0' as
8655         the return address, so we force the value live this way.  */
8656      return 1;
8657
8658    case AR_PFS_REGNUM:
8659      /* Likewise for ar.pfs, which is used by br.ret.  */
8660      return 1;
8661
8662    default:
8663      return 0;
8664    }
8665}
8666
8667/* Return true if REGNO is used by the frame unwinder.  */
8668
8669int
8670ia64_eh_uses (int regno)
8671{
8672  if (! reload_completed)
8673    return 0;
8674
8675  if (current_frame_info.reg_save_b0
8676      && regno == current_frame_info.reg_save_b0)
8677    return 1;
8678  if (current_frame_info.reg_save_pr
8679      && regno == current_frame_info.reg_save_pr)
8680    return 1;
8681  if (current_frame_info.reg_save_ar_pfs
8682      && regno == current_frame_info.reg_save_ar_pfs)
8683    return 1;
8684  if (current_frame_info.reg_save_ar_unat
8685      && regno == current_frame_info.reg_save_ar_unat)
8686    return 1;
8687  if (current_frame_info.reg_save_ar_lc
8688      && regno == current_frame_info.reg_save_ar_lc)
8689    return 1;
8690
8691  return 0;
8692}
8693
8694/* Return true if this goes in small data/bss.  */
8695
8696/* ??? We could also support own long data here.  Generating movl/add/ld8
8697   instead of addl,ld8/ld8.  This makes the code bigger, but should make the
8698   code faster because there is one less load.  This also includes incomplete
8699   types which can't go in sdata/sbss.  */
8700
8701static bool
8702ia64_in_small_data_p (tree exp)
8703{
8704  if (TARGET_NO_SDATA)
8705    return false;
8706
8707  /* We want to merge strings, so we never consider them small data.  */
8708  if (TREE_CODE (exp) == STRING_CST)
8709    return false;
8710
8711  /* Functions are never small data.  */
8712  if (TREE_CODE (exp) == FUNCTION_DECL)
8713    return false;
8714
8715  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
8716    {
8717      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
8718
8719      if (strcmp (section, ".sdata") == 0
8720	  || strncmp (section, ".sdata.", 7) == 0
8721	  || strncmp (section, ".gnu.linkonce.s.", 16) == 0
8722	  || strcmp (section, ".sbss") == 0
8723	  || strncmp (section, ".sbss.", 6) == 0
8724	  || strncmp (section, ".gnu.linkonce.sb.", 17) == 0)
8725	return true;
8726    }
8727  else
8728    {
8729      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
8730
8731      /* If this is an incomplete type with size 0, then we can't put it
8732	 in sdata because it might be too big when completed.  */
8733      if (size > 0 && size <= ia64_section_threshold)
8734	return true;
8735    }
8736
8737  return false;
8738}
8739
8740/* Output assembly directives for prologue regions.  */
8741
8742/* The current basic block number.  */
8743
8744static bool last_block;
8745
8746/* True if we need a copy_state command at the start of the next block.  */
8747
8748static bool need_copy_state;
8749
8750#ifndef MAX_ARTIFICIAL_LABEL_BYTES
8751# define MAX_ARTIFICIAL_LABEL_BYTES 30
8752#endif
8753
8754/* Emit a debugging label after a call-frame-related insn.  We'd
8755   rather output the label right away, but we'd have to output it
8756   after, not before, the instruction, and the instruction has not
8757   been output yet.  So we emit the label after the insn, delete it to
8758   avoid introducing basic blocks, and mark it as preserved, such that
8759   it is still output, given that it is referenced in debug info.  */
8760
8761static const char *
8762ia64_emit_deleted_label_after_insn (rtx insn)
8763{
8764  char label[MAX_ARTIFICIAL_LABEL_BYTES];
8765  rtx lb = gen_label_rtx ();
8766  rtx label_insn = emit_label_after (lb, insn);
8767
8768  LABEL_PRESERVE_P (lb) = 1;
8769
8770  delete_insn (label_insn);
8771
8772  ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn));
8773
8774  return xstrdup (label);
8775}
8776
8777/* Define the CFA after INSN with the steady-state definition.  */
8778
8779static void
8780ia64_dwarf2out_def_steady_cfa (rtx insn)
8781{
8782  rtx fp = frame_pointer_needed
8783    ? hard_frame_pointer_rtx
8784    : stack_pointer_rtx;
8785
8786  dwarf2out_def_cfa
8787    (ia64_emit_deleted_label_after_insn (insn),
8788     REGNO (fp),
8789     ia64_initial_elimination_offset
8790     (REGNO (arg_pointer_rtx), REGNO (fp))
8791     + ARG_POINTER_CFA_OFFSET (current_function_decl));
8792}
8793
8794/* The generic dwarf2 frame debug info generator does not define a
8795   separate region for the very end of the epilogue, so refrain from
8796   doing so in the IA64-specific code as well.  */
8797
8798#define IA64_CHANGE_CFA_IN_EPILOGUE 0
8799
8800/* The function emits unwind directives for the start of an epilogue.  */
8801
8802static void
8803process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame)
8804{
8805  /* If this isn't the last block of the function, then we need to label the
8806     current state, and copy it back in at the start of the next block.  */
8807
8808  if (!last_block)
8809    {
8810      if (unwind)
8811	fprintf (asm_out_file, "\t.label_state %d\n",
8812		 ++cfun->machine->state_num);
8813      need_copy_state = true;
8814    }
8815
8816  if (unwind)
8817    fprintf (asm_out_file, "\t.restore sp\n");
8818  if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
8819    dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn),
8820		       STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET);
8821}
8822
8823/* This function processes a SET pattern looking for specific patterns
8824   which result in emitting an assembly directive required for unwinding.  */
8825
8826static int
8827process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame)
8828{
8829  rtx src = SET_SRC (pat);
8830  rtx dest = SET_DEST (pat);
8831  int src_regno, dest_regno;
8832
8833  /* Look for the ALLOC insn.  */
8834  if (GET_CODE (src) == UNSPEC_VOLATILE
8835      && XINT (src, 1) == UNSPECV_ALLOC
8836      && GET_CODE (dest) == REG)
8837    {
8838      dest_regno = REGNO (dest);
8839
8840      /* If this is the final destination for ar.pfs, then this must
8841	 be the alloc in the prologue.  */
8842      if (dest_regno == current_frame_info.reg_save_ar_pfs)
8843	{
8844	  if (unwind)
8845	    fprintf (asm_out_file, "\t.save ar.pfs, r%d\n",
8846		     ia64_dbx_register_number (dest_regno));
8847	}
8848      else
8849	{
8850	  /* This must be an alloc before a sibcall.  We must drop the
8851	     old frame info.  The easiest way to drop the old frame
8852	     info is to ensure we had a ".restore sp" directive
8853	     followed by a new prologue.  If the procedure doesn't
8854	     have a memory-stack frame, we'll issue a dummy ".restore
8855	     sp" now.  */
8856	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
8857	    /* if haven't done process_epilogue() yet, do it now */
8858	    process_epilogue (asm_out_file, insn, unwind, frame);
8859	  if (unwind)
8860	    fprintf (asm_out_file, "\t.prologue\n");
8861	}
8862      return 1;
8863    }
8864
8865  /* Look for SP = ....  */
8866  if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM)
8867    {
8868      if (GET_CODE (src) == PLUS)
8869        {
8870	  rtx op0 = XEXP (src, 0);
8871	  rtx op1 = XEXP (src, 1);
8872
8873	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
8874
8875	  if (INTVAL (op1) < 0)
8876	    {
8877	      gcc_assert (!frame_pointer_needed);
8878	      if (unwind)
8879		fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n",
8880			 -INTVAL (op1));
8881	      if (frame)
8882		ia64_dwarf2out_def_steady_cfa (insn);
8883	    }
8884	  else
8885	    process_epilogue (asm_out_file, insn, unwind, frame);
8886	}
8887      else
8888	{
8889	  gcc_assert (GET_CODE (src) == REG
8890		      && REGNO (src) == HARD_FRAME_POINTER_REGNUM);
8891	  process_epilogue (asm_out_file, insn, unwind, frame);
8892	}
8893
8894      return 1;
8895    }
8896
8897  /* Register move we need to look at.  */
8898  if (GET_CODE (dest) == REG && GET_CODE (src) == REG)
8899    {
8900      src_regno = REGNO (src);
8901      dest_regno = REGNO (dest);
8902
8903      switch (src_regno)
8904	{
8905	case BR_REG (0):
8906	  /* Saving return address pointer.  */
8907	  gcc_assert (dest_regno == current_frame_info.reg_save_b0);
8908	  if (unwind)
8909	    fprintf (asm_out_file, "\t.save rp, r%d\n",
8910		     ia64_dbx_register_number (dest_regno));
8911	  return 1;
8912
8913	case PR_REG (0):
8914	  gcc_assert (dest_regno == current_frame_info.reg_save_pr);
8915	  if (unwind)
8916	    fprintf (asm_out_file, "\t.save pr, r%d\n",
8917		     ia64_dbx_register_number (dest_regno));
8918	  return 1;
8919
8920	case AR_UNAT_REGNUM:
8921	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_unat);
8922	  if (unwind)
8923	    fprintf (asm_out_file, "\t.save ar.unat, r%d\n",
8924		     ia64_dbx_register_number (dest_regno));
8925	  return 1;
8926
8927	case AR_LC_REGNUM:
8928	  gcc_assert (dest_regno == current_frame_info.reg_save_ar_lc);
8929	  if (unwind)
8930	    fprintf (asm_out_file, "\t.save ar.lc, r%d\n",
8931		     ia64_dbx_register_number (dest_regno));
8932	  return 1;
8933
8934	case STACK_POINTER_REGNUM:
8935	  gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM
8936		      && frame_pointer_needed);
8937	  if (unwind)
8938	    fprintf (asm_out_file, "\t.vframe r%d\n",
8939		     ia64_dbx_register_number (dest_regno));
8940	  if (frame)
8941	    ia64_dwarf2out_def_steady_cfa (insn);
8942	  return 1;
8943
8944	default:
8945	  /* Everything else should indicate being stored to memory.  */
8946	  gcc_unreachable ();
8947	}
8948    }
8949
8950  /* Memory store we need to look at.  */
8951  if (GET_CODE (dest) == MEM && GET_CODE (src) == REG)
8952    {
8953      long off;
8954      rtx base;
8955      const char *saveop;
8956
8957      if (GET_CODE (XEXP (dest, 0)) == REG)
8958	{
8959	  base = XEXP (dest, 0);
8960	  off = 0;
8961	}
8962      else
8963	{
8964	  gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
8965		      && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
8966	  base = XEXP (XEXP (dest, 0), 0);
8967	  off = INTVAL (XEXP (XEXP (dest, 0), 1));
8968	}
8969
8970      if (base == hard_frame_pointer_rtx)
8971	{
8972	  saveop = ".savepsp";
8973	  off = - off;
8974	}
8975      else
8976	{
8977	  gcc_assert (base == stack_pointer_rtx);
8978	  saveop = ".savesp";
8979	}
8980
8981      src_regno = REGNO (src);
8982      switch (src_regno)
8983	{
8984	case BR_REG (0):
8985	  gcc_assert (!current_frame_info.reg_save_b0);
8986	  if (unwind)
8987	    fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off);
8988	  return 1;
8989
8990	case PR_REG (0):
8991	  gcc_assert (!current_frame_info.reg_save_pr);
8992	  if (unwind)
8993	    fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off);
8994	  return 1;
8995
8996	case AR_LC_REGNUM:
8997	  gcc_assert (!current_frame_info.reg_save_ar_lc);
8998	  if (unwind)
8999	    fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off);
9000	  return 1;
9001
9002	case AR_PFS_REGNUM:
9003	  gcc_assert (!current_frame_info.reg_save_ar_pfs);
9004	  if (unwind)
9005	    fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off);
9006	  return 1;
9007
9008	case AR_UNAT_REGNUM:
9009	  gcc_assert (!current_frame_info.reg_save_ar_unat);
9010	  if (unwind)
9011	    fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off);
9012	  return 1;
9013
9014	case GR_REG (4):
9015	case GR_REG (5):
9016	case GR_REG (6):
9017	case GR_REG (7):
9018	  if (unwind)
9019	    fprintf (asm_out_file, "\t.save.g 0x%x\n",
9020		     1 << (src_regno - GR_REG (4)));
9021	  return 1;
9022
9023	case BR_REG (1):
9024	case BR_REG (2):
9025	case BR_REG (3):
9026	case BR_REG (4):
9027	case BR_REG (5):
9028	  if (unwind)
9029	    fprintf (asm_out_file, "\t.save.b 0x%x\n",
9030		     1 << (src_regno - BR_REG (1)));
9031	  return 1;
9032
9033	case FR_REG (2):
9034	case FR_REG (3):
9035	case FR_REG (4):
9036	case FR_REG (5):
9037	  if (unwind)
9038	    fprintf (asm_out_file, "\t.save.f 0x%x\n",
9039		     1 << (src_regno - FR_REG (2)));
9040	  return 1;
9041
9042	case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
9043	case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
9044	case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
9045	case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
9046	  if (unwind)
9047	    fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n",
9048		     1 << (src_regno - FR_REG (12)));
9049	  return 1;
9050
9051	default:
9052	  return 0;
9053	}
9054    }
9055
9056  return 0;
9057}
9058
9059
9060/* This function looks at a single insn and emits any directives
9061   required to unwind this insn.  */
9062void
9063process_for_unwind_directive (FILE *asm_out_file, rtx insn)
9064{
9065  bool unwind = (flag_unwind_tables
9066		 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS));
9067  bool frame = dwarf2out_do_frame ();
9068
9069  if (unwind || frame)
9070    {
9071      rtx pat;
9072
9073      if (GET_CODE (insn) == NOTE
9074	  && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK)
9075	{
9076	  last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR;
9077
9078	  /* Restore unwind state from immediately before the epilogue.  */
9079	  if (need_copy_state)
9080	    {
9081	      if (unwind)
9082		{
9083		  fprintf (asm_out_file, "\t.body\n");
9084		  fprintf (asm_out_file, "\t.copy_state %d\n",
9085			   cfun->machine->state_num);
9086		}
9087	      if (IA64_CHANGE_CFA_IN_EPILOGUE && frame)
9088		ia64_dwarf2out_def_steady_cfa (insn);
9089	      need_copy_state = false;
9090	    }
9091	}
9092
9093      if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn))
9094	return;
9095
9096      pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
9097      if (pat)
9098	pat = XEXP (pat, 0);
9099      else
9100	pat = PATTERN (insn);
9101
9102      switch (GET_CODE (pat))
9103        {
9104	case SET:
9105	  process_set (asm_out_file, pat, insn, unwind, frame);
9106	  break;
9107
9108	case PARALLEL:
9109	  {
9110	    int par_index;
9111	    int limit = XVECLEN (pat, 0);
9112	    for (par_index = 0; par_index < limit; par_index++)
9113	      {
9114		rtx x = XVECEXP (pat, 0, par_index);
9115		if (GET_CODE (x) == SET)
9116		  process_set (asm_out_file, x, insn, unwind, frame);
9117	      }
9118	    break;
9119	  }
9120
9121	default:
9122	  gcc_unreachable ();
9123	}
9124    }
9125}
9126
9127
9128enum ia64_builtins
9129{
9130  IA64_BUILTIN_BSP,
9131  IA64_BUILTIN_FLUSHRS
9132};
9133
9134void
9135ia64_init_builtins (void)
9136{
9137  tree fpreg_type;
9138  tree float80_type;
9139
9140  /* The __fpreg type.  */
9141  fpreg_type = make_node (REAL_TYPE);
9142  TYPE_PRECISION (fpreg_type) = 82;
9143  layout_type (fpreg_type);
9144  (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
9145
9146  /* The __float80 type.  */
9147  float80_type = make_node (REAL_TYPE);
9148  TYPE_PRECISION (float80_type) = 80;
9149  layout_type (float80_type);
9150  (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
9151
9152  /* The __float128 type.  */
9153  if (!TARGET_HPUX)
9154    {
9155      tree float128_type = make_node (REAL_TYPE);
9156      TYPE_PRECISION (float128_type) = 128;
9157      layout_type (float128_type);
9158      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
9159    }
9160  else
9161    /* Under HPUX, this is a synonym for "long double".  */
9162    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
9163					       "__float128");
9164
9165#define def_builtin(name, type, code)					\
9166  lang_hooks.builtin_function ((name), (type), (code), BUILT_IN_MD,	\
9167			       NULL, NULL_TREE)
9168
9169  def_builtin ("__builtin_ia64_bsp",
9170	       build_function_type (ptr_type_node, void_list_node),
9171	       IA64_BUILTIN_BSP);
9172
9173  def_builtin ("__builtin_ia64_flushrs",
9174	       build_function_type (void_type_node, void_list_node),
9175	       IA64_BUILTIN_FLUSHRS);
9176
9177#undef def_builtin
9178}
9179
9180rtx
9181ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9182		     enum machine_mode mode ATTRIBUTE_UNUSED,
9183		     int ignore ATTRIBUTE_UNUSED)
9184{
9185  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9186  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9187
9188  switch (fcode)
9189    {
9190    case IA64_BUILTIN_BSP:
9191      if (! target || ! register_operand (target, DImode))
9192	target = gen_reg_rtx (DImode);
9193      emit_insn (gen_bsp_value (target));
9194#ifdef POINTERS_EXTEND_UNSIGNED
9195      target = convert_memory_address (ptr_mode, target);
9196#endif
9197      return target;
9198
9199    case IA64_BUILTIN_FLUSHRS:
9200      emit_insn (gen_flushrs ());
9201      return const0_rtx;
9202
9203    default:
9204      break;
9205    }
9206
9207  return NULL_RTX;
9208}
9209
9210/* For the HP-UX IA64 aggregate parameters are passed stored in the
9211   most significant bits of the stack slot.  */
9212
9213enum direction
9214ia64_hpux_function_arg_padding (enum machine_mode mode, tree type)
9215{
9216   /* Exception to normal case for structures/unions/etc.  */
9217
9218   if (type && AGGREGATE_TYPE_P (type)
9219       && int_size_in_bytes (type) < UNITS_PER_WORD)
9220     return upward;
9221
9222   /* Fall back to the default.  */
9223   return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9224}
9225
9226/* Linked list of all external functions that are to be emitted by GCC.
9227   We output the name if and only if TREE_SYMBOL_REFERENCED is set in
9228   order to avoid putting out names that are never really used.  */
9229
9230struct extern_func_list GTY(())
9231{
9232  struct extern_func_list *next;
9233  tree decl;
9234};
9235
9236static GTY(()) struct extern_func_list *extern_func_head;
9237
9238static void
9239ia64_hpux_add_extern_decl (tree decl)
9240{
9241  struct extern_func_list *p = ggc_alloc (sizeof (struct extern_func_list));
9242
9243  p->decl = decl;
9244  p->next = extern_func_head;
9245  extern_func_head = p;
9246}
9247
9248/* Print out the list of used global functions.  */
9249
9250static void
9251ia64_hpux_file_end (void)
9252{
9253  struct extern_func_list *p;
9254
9255  for (p = extern_func_head; p; p = p->next)
9256    {
9257      tree decl = p->decl;
9258      tree id = DECL_ASSEMBLER_NAME (decl);
9259
9260      gcc_assert (id);
9261
9262      if (!TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (id))
9263        {
9264	  const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
9265
9266	  TREE_ASM_WRITTEN (decl) = 1;
9267	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
9268	  fputs (TYPE_ASM_OP, asm_out_file);
9269	  assemble_name (asm_out_file, name);
9270	  fprintf (asm_out_file, "," TYPE_OPERAND_FMT "\n", "function");
9271        }
9272    }
9273
9274  extern_func_head = 0;
9275}
9276
9277/* Set SImode div/mod functions, init_integral_libfuncs only initializes
9278   modes of word_mode and larger.  Rename the TFmode libfuncs using the
9279   HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
9280   backward compatibility. */
9281
9282static void
9283ia64_init_libfuncs (void)
9284{
9285  set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
9286  set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
9287  set_optab_libfunc (smod_optab, SImode, "__modsi3");
9288  set_optab_libfunc (umod_optab, SImode, "__umodsi3");
9289
9290  set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
9291  set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
9292  set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
9293  set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
9294  set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
9295
9296  set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
9297  set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
9298  set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
9299  set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
9300  set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
9301  set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
9302
9303  set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
9304  set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
9305  set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
9306  set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
9307  set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
9308
9309  set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
9310  set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
9311  set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
9312  /* HP-UX 11.23 libc does not have a function for unsigned
9313     SImode-to-TFmode conversion.  */
9314  set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
9315}
9316
9317/* Rename all the TFmode libfuncs using the HPUX conventions.  */
9318
9319static void
9320ia64_hpux_init_libfuncs (void)
9321{
9322  ia64_init_libfuncs ();
9323
9324  /* The HP SI millicode division and mod functions expect DI arguments.
9325     By turning them off completely we avoid using both libgcc and the
9326     non-standard millicode routines and use the HP DI millicode routines
9327     instead.  */
9328
9329  set_optab_libfunc (sdiv_optab, SImode, 0);
9330  set_optab_libfunc (udiv_optab, SImode, 0);
9331  set_optab_libfunc (smod_optab, SImode, 0);
9332  set_optab_libfunc (umod_optab, SImode, 0);
9333
9334  set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
9335  set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
9336  set_optab_libfunc (smod_optab, DImode, "__milli_remI");
9337  set_optab_libfunc (umod_optab, DImode, "__milli_remU");
9338
9339  /* HP-UX libc has TF min/max/abs routines in it.  */
9340  set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
9341  set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
9342  set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
9343
9344  /* ia64_expand_compare uses this.  */
9345  cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
9346
9347  /* These should never be used.  */
9348  set_optab_libfunc (eq_optab, TFmode, 0);
9349  set_optab_libfunc (ne_optab, TFmode, 0);
9350  set_optab_libfunc (gt_optab, TFmode, 0);
9351  set_optab_libfunc (ge_optab, TFmode, 0);
9352  set_optab_libfunc (lt_optab, TFmode, 0);
9353  set_optab_libfunc (le_optab, TFmode, 0);
9354}
9355
9356/* Rename the division and modulus functions in VMS.  */
9357
9358static void
9359ia64_vms_init_libfuncs (void)
9360{
9361  set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9362  set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9363  set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9364  set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9365  set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9366  set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9367  set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9368  set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9369}
9370
9371/* Rename the TFmode libfuncs available from soft-fp in glibc using
9372   the HPUX conventions.  */
9373
9374static void
9375ia64_sysv4_init_libfuncs (void)
9376{
9377  ia64_init_libfuncs ();
9378
9379  /* These functions are not part of the HPUX TFmode interface.  We
9380     use them instead of _U_Qfcmp, which doesn't work the way we
9381     expect.  */
9382  set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
9383  set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
9384  set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
9385  set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
9386  set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
9387  set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
9388
9389  /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
9390     glibc doesn't have them.  */
9391}
9392
9393/* For HPUX, it is illegal to have relocations in shared segments.  */
9394
9395static int
9396ia64_hpux_reloc_rw_mask (void)
9397{
9398  return 3;
9399}
9400
9401/* For others, relax this so that relocations to local data goes in
9402   read-only segments, but we still cannot allow global relocations
9403   in read-only segments.  */
9404
9405static int
9406ia64_reloc_rw_mask (void)
9407{
9408  return flag_pic ? 3 : 2;
9409}
9410
9411/* Return the section to use for X.  The only special thing we do here
9412   is to honor small data.  */
9413
9414static section *
9415ia64_select_rtx_section (enum machine_mode mode, rtx x,
9416			 unsigned HOST_WIDE_INT align)
9417{
9418  if (GET_MODE_SIZE (mode) > 0
9419      && GET_MODE_SIZE (mode) <= ia64_section_threshold
9420      && !TARGET_NO_SDATA)
9421    return sdata_section;
9422  else
9423    return default_elf_select_rtx_section (mode, x, align);
9424}
9425
9426static unsigned int
9427ia64_section_type_flags (tree decl, const char *name, int reloc)
9428{
9429  unsigned int flags = 0;
9430
9431  if (strcmp (name, ".sdata") == 0
9432      || strncmp (name, ".sdata.", 7) == 0
9433      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9434      || strncmp (name, ".sdata2.", 8) == 0
9435      || strncmp (name, ".gnu.linkonce.s2.", 17) == 0
9436      || strcmp (name, ".sbss") == 0
9437      || strncmp (name, ".sbss.", 6) == 0
9438      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9439    flags = SECTION_SMALL;
9440
9441  flags |= default_section_type_flags (decl, name, reloc);
9442  return flags;
9443}
9444
9445/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
9446   structure type and that the address of that type should be passed
9447   in out0, rather than in r8.  */
9448
9449static bool
9450ia64_struct_retval_addr_is_first_parm_p (tree fntype)
9451{
9452  tree ret_type = TREE_TYPE (fntype);
9453
9454  /* The Itanium C++ ABI requires that out0, rather than r8, be used
9455     as the structure return address parameter, if the return value
9456     type has a non-trivial copy constructor or destructor.  It is not
9457     clear if this same convention should be used for other
9458     programming languages.  Until G++ 3.4, we incorrectly used r8 for
9459     these return values.  */
9460  return (abi_version_at_least (2)
9461	  && ret_type
9462	  && TYPE_MODE (ret_type) == BLKmode
9463	  && TREE_ADDRESSABLE (ret_type)
9464	  && strcmp (lang_hooks.name, "GNU C++") == 0);
9465}
9466
9467/* Output the assembler code for a thunk function.  THUNK_DECL is the
9468   declaration for the thunk function itself, FUNCTION is the decl for
9469   the target function.  DELTA is an immediate constant offset to be
9470   added to THIS.  If VCALL_OFFSET is nonzero, the word at
9471   *(*this + vcall_offset) should be added to THIS.  */
9472
9473static void
9474ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9475		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9476		      tree function)
9477{
9478  rtx this, insn, funexp;
9479  unsigned int this_parmno;
9480  unsigned int this_regno;
9481
9482  reload_completed = 1;
9483  epilogue_completed = 1;
9484  no_new_pseudos = 1;
9485  reset_block_changes ();
9486
9487  /* Set things up as ia64_expand_prologue might.  */
9488  last_scratch_gr_reg = 15;
9489
9490  memset (&current_frame_info, 0, sizeof (current_frame_info));
9491  current_frame_info.spill_cfa_off = -16;
9492  current_frame_info.n_input_regs = 1;
9493  current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
9494
9495  /* Mark the end of the (empty) prologue.  */
9496  emit_note (NOTE_INSN_PROLOGUE_END);
9497
9498  /* Figure out whether "this" will be the first parameter (the
9499     typical case) or the second parameter (as happens when the
9500     virtual function returns certain class objects).  */
9501  this_parmno
9502    = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
9503       ? 1 : 0);
9504  this_regno = IN_REG (this_parmno);
9505  if (!TARGET_REG_NAMES)
9506    reg_names[this_regno] = ia64_reg_numbers[this_parmno];
9507
9508  this = gen_rtx_REG (Pmode, this_regno);
9509  if (TARGET_ILP32)
9510    {
9511      rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
9512      REG_POINTER (tmp) = 1;
9513      if (delta && CONST_OK_FOR_I (delta))
9514	{
9515	  emit_insn (gen_ptr_extend_plus_imm (this, tmp, GEN_INT (delta)));
9516	  delta = 0;
9517	}
9518      else
9519	emit_insn (gen_ptr_extend (this, tmp));
9520    }
9521
9522  /* Apply the constant offset, if required.  */
9523  if (delta)
9524    {
9525      rtx delta_rtx = GEN_INT (delta);
9526
9527      if (!CONST_OK_FOR_I (delta))
9528	{
9529	  rtx tmp = gen_rtx_REG (Pmode, 2);
9530	  emit_move_insn (tmp, delta_rtx);
9531	  delta_rtx = tmp;
9532	}
9533      emit_insn (gen_adddi3 (this, this, delta_rtx));
9534    }
9535
9536  /* Apply the offset from the vtable, if required.  */
9537  if (vcall_offset)
9538    {
9539      rtx vcall_offset_rtx = GEN_INT (vcall_offset);
9540      rtx tmp = gen_rtx_REG (Pmode, 2);
9541
9542      if (TARGET_ILP32)
9543	{
9544	  rtx t = gen_rtx_REG (ptr_mode, 2);
9545	  REG_POINTER (t) = 1;
9546	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this));
9547	  if (CONST_OK_FOR_I (vcall_offset))
9548	    {
9549	      emit_insn (gen_ptr_extend_plus_imm (tmp, t,
9550						  vcall_offset_rtx));
9551	      vcall_offset = 0;
9552	    }
9553	  else
9554	    emit_insn (gen_ptr_extend (tmp, t));
9555	}
9556      else
9557	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this));
9558
9559      if (vcall_offset)
9560	{
9561	  if (!CONST_OK_FOR_J (vcall_offset))
9562	    {
9563	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
9564	      emit_move_insn (tmp2, vcall_offset_rtx);
9565	      vcall_offset_rtx = tmp2;
9566	    }
9567	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
9568	}
9569
9570      if (TARGET_ILP32)
9571	emit_move_insn (gen_rtx_REG (ptr_mode, 2),
9572			gen_rtx_MEM (ptr_mode, tmp));
9573      else
9574	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
9575
9576      emit_insn (gen_adddi3 (this, this, tmp));
9577    }
9578
9579  /* Generate a tail call to the target function.  */
9580  if (! TREE_USED (function))
9581    {
9582      assemble_external (function);
9583      TREE_USED (function) = 1;
9584    }
9585  funexp = XEXP (DECL_RTL (function), 0);
9586  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
9587  ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
9588  insn = get_last_insn ();
9589  SIBLING_CALL_P (insn) = 1;
9590
9591  /* Code generation for calls relies on splitting.  */
9592  reload_completed = 1;
9593  epilogue_completed = 1;
9594  try_split (PATTERN (insn), insn, 0);
9595
9596  emit_barrier ();
9597
9598  /* Run just enough of rest_of_compilation to get the insns emitted.
9599     There's not really enough bulk here to make other passes such as
9600     instruction scheduling worth while.  Note that use_thunk calls
9601     assemble_start_function and assemble_end_function.  */
9602
9603  insn_locators_initialize ();
9604  emit_all_insn_group_barriers (NULL);
9605  insn = get_insns ();
9606  shorten_branches (insn);
9607  final_start_function (insn, file, 1);
9608  final (insn, file, 1);
9609  final_end_function ();
9610
9611  reload_completed = 0;
9612  epilogue_completed = 0;
9613  no_new_pseudos = 0;
9614}
9615
9616/* Worker function for TARGET_STRUCT_VALUE_RTX.  */
9617
9618static rtx
9619ia64_struct_value_rtx (tree fntype,
9620		       int incoming ATTRIBUTE_UNUSED)
9621{
9622  if (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))
9623    return NULL_RTX;
9624  return gen_rtx_REG (Pmode, GR_REG (8));
9625}
9626
9627static bool
9628ia64_scalar_mode_supported_p (enum machine_mode mode)
9629{
9630  switch (mode)
9631    {
9632    case QImode:
9633    case HImode:
9634    case SImode:
9635    case DImode:
9636    case TImode:
9637      return true;
9638
9639    case SFmode:
9640    case DFmode:
9641    case XFmode:
9642    case RFmode:
9643      return true;
9644
9645    case TFmode:
9646      return TARGET_HPUX;
9647
9648    default:
9649      return false;
9650    }
9651}
9652
9653static bool
9654ia64_vector_mode_supported_p (enum machine_mode mode)
9655{
9656  switch (mode)
9657    {
9658    case V8QImode:
9659    case V4HImode:
9660    case V2SImode:
9661      return true;
9662
9663    case V2SFmode:
9664      return true;
9665
9666    default:
9667      return false;
9668    }
9669}
9670
9671/* Implement the FUNCTION_PROFILER macro.  */
9672
9673void
9674ia64_output_function_profiler (FILE *file, int labelno)
9675{
9676  bool indirect_call;
9677
9678  /* If the function needs a static chain and the static chain
9679     register is r15, we use an indirect call so as to bypass
9680     the PLT stub in case the executable is dynamically linked,
9681     because the stub clobbers r15 as per 5.3.6 of the psABI.
9682     We don't need to do that in non canonical PIC mode.  */
9683
9684  if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
9685    {
9686      gcc_assert (STATIC_CHAIN_REGNUM == 15);
9687      indirect_call = true;
9688    }
9689  else
9690    indirect_call = false;
9691
9692  if (TARGET_GNU_AS)
9693    fputs ("\t.prologue 4, r40\n", file);
9694  else
9695    fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
9696  fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
9697
9698  if (NO_PROFILE_COUNTERS)
9699    fputs ("\tmov out3 = r0\n", file);
9700  else
9701    {
9702      char buf[20];
9703      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9704
9705      if (TARGET_AUTO_PIC)
9706	fputs ("\tmovl out3 = @gprel(", file);
9707      else
9708	fputs ("\taddl out3 = @ltoff(", file);
9709      assemble_name (file, buf);
9710      if (TARGET_AUTO_PIC)
9711	fputs (")\n", file);
9712      else
9713	fputs ("), r1\n", file);
9714    }
9715
9716  if (indirect_call)
9717    fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
9718  fputs ("\t;;\n", file);
9719
9720  fputs ("\t.save rp, r42\n", file);
9721  fputs ("\tmov out2 = b0\n", file);
9722  if (indirect_call)
9723    fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
9724  fputs ("\t.body\n", file);
9725  fputs ("\tmov out1 = r1\n", file);
9726  if (indirect_call)
9727    {
9728      fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
9729      fputs ("\tmov b6 = r16\n", file);
9730      fputs ("\tld8 r1 = [r14]\n", file);
9731      fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
9732    }
9733  else
9734    fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
9735}
9736
9737static GTY(()) rtx mcount_func_rtx;
9738static rtx
9739gen_mcount_func_rtx (void)
9740{
9741  if (!mcount_func_rtx)
9742    mcount_func_rtx = init_one_libfunc ("_mcount");
9743  return mcount_func_rtx;
9744}
9745
9746void
9747ia64_profile_hook (int labelno)
9748{
9749  rtx label, ip;
9750
9751  if (NO_PROFILE_COUNTERS)
9752    label = const0_rtx;
9753  else
9754    {
9755      char buf[30];
9756      const char *label_name;
9757      ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
9758      label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf));
9759      label = gen_rtx_SYMBOL_REF (Pmode, label_name);
9760      SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
9761    }
9762  ip = gen_reg_rtx (Pmode);
9763  emit_insn (gen_ip_value (ip));
9764  emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
9765                     VOIDmode, 3,
9766		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
9767		     ip, Pmode,
9768		     label, Pmode);
9769}
9770
9771/* Return the mangling of TYPE if it is an extended fundamental type.  */
9772
9773static const char *
9774ia64_mangle_fundamental_type (tree type)
9775{
9776  /* On HP-UX, "long double" is mangled as "e" so __float128 is
9777     mangled as "e".  */
9778  if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
9779    return "g";
9780  /* On HP-UX, "e" is not available as a mangling of __float80 so use
9781     an extended mangling.  Elsewhere, "e" is available since long
9782     double is 80 bits.  */
9783  if (TYPE_MODE (type) == XFmode)
9784    return TARGET_HPUX ? "u9__float80" : "e";
9785  if (TYPE_MODE (type) == RFmode)
9786    return "u7__fpreg";
9787  return NULL;
9788}
9789
9790/* Return the diagnostic message string if conversion from FROMTYPE to
9791   TOTYPE is not allowed, NULL otherwise.  */
9792static const char *
9793ia64_invalid_conversion (tree fromtype, tree totype)
9794{
9795  /* Reject nontrivial conversion to or from __fpreg.  */
9796  if (TYPE_MODE (fromtype) == RFmode
9797      && TYPE_MODE (totype) != RFmode
9798      && TYPE_MODE (totype) != VOIDmode)
9799    return N_("invalid conversion from %<__fpreg%>");
9800  if (TYPE_MODE (totype) == RFmode
9801      && TYPE_MODE (fromtype) != RFmode)
9802    return N_("invalid conversion to %<__fpreg%>");
9803  return NULL;
9804}
9805
9806/* Return the diagnostic message string if the unary operation OP is
9807   not permitted on TYPE, NULL otherwise.  */
9808static const char *
9809ia64_invalid_unary_op (int op, tree type)
9810{
9811  /* Reject operations on __fpreg other than unary + or &.  */
9812  if (TYPE_MODE (type) == RFmode
9813      && op != CONVERT_EXPR
9814      && op != ADDR_EXPR)
9815    return N_("invalid operation on %<__fpreg%>");
9816  return NULL;
9817}
9818
9819/* Return the diagnostic message string if the binary operation OP is
9820   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
9821static const char *
9822ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, tree type1, tree type2)
9823{
9824  /* Reject operations on __fpreg.  */
9825  if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
9826    return N_("invalid operation on %<__fpreg%>");
9827  return NULL;
9828}
9829
9830/* Implement overriding of the optimization options.  */
9831void
9832ia64_optimization_options (int level ATTRIBUTE_UNUSED,
9833                           int size ATTRIBUTE_UNUSED)
9834{
9835  /* Let the scheduler form additional regions.  */
9836  set_param_value ("max-sched-extend-regions-iters", 2);
9837}
9838
9839#include "gt-ia64.h"
9840