1/* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999-2015 Free Software Foundation, Inc. 3 Contributed by James E. Wilson <wilson@cygnus.com> and 4 David Mosberger <davidm@hpl.hp.com>. 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 3, or (at your option) 11any later version. 12 13GCC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING3. If not see 20<http://www.gnu.org/licenses/>. */ 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "tm.h" 26#include "rtl.h" 27#include "hash-set.h" 28#include "machmode.h" 29#include "vec.h" 30#include "double-int.h" 31#include "input.h" 32#include "alias.h" 33#include "symtab.h" 34#include "wide-int.h" 35#include "inchash.h" 36#include "tree.h" 37#include "fold-const.h" 38#include "stringpool.h" 39#include "stor-layout.h" 40#include "calls.h" 41#include "varasm.h" 42#include "regs.h" 43#include "hard-reg-set.h" 44#include "insn-config.h" 45#include "conditions.h" 46#include "output.h" 47#include "insn-attr.h" 48#include "flags.h" 49#include "recog.h" 50#include "hashtab.h" 51#include "function.h" 52#include "statistics.h" 53#include "real.h" 54#include "fixed-value.h" 55#include "expmed.h" 56#include "dojump.h" 57#include "explow.h" 58#include "emit-rtl.h" 59#include "stmt.h" 60#include "expr.h" 61#include "insn-codes.h" 62#include "optabs.h" 63#include "except.h" 64#include "ggc.h" 65#include "predict.h" 66#include "dominance.h" 67#include "cfg.h" 68#include "cfgrtl.h" 69#include "cfganal.h" 70#include "lcm.h" 71#include "cfgbuild.h" 72#include "cfgcleanup.h" 73#include "basic-block.h" 74#include "libfuncs.h" 75#include "diagnostic-core.h" 76#include "sched-int.h" 77#include "timevar.h" 78#include "target.h" 79#include "target-def.h" 80#include "common/common-target.h" 81#include "tm_p.h" 82#include "hash-table.h" 83#include "langhooks.h" 84#include "tree-ssa-alias.h" 85#include "internal-fn.h" 86#include "gimple-fold.h" 87#include "tree-eh.h" 88#include "gimple-expr.h" 89#include "is-a.h" 90#include "gimple.h" 91#include "gimplify.h" 92#include "intl.h" 93#include "df.h" 94#include "debug.h" 95#include "params.h" 96#include "dbgcnt.h" 97#include "tm-constrs.h" 98#include "sel-sched.h" 99#include "reload.h" 100#include "opts.h" 101#include "dumpfile.h" 102#include "builtins.h" 103 104/* This is used for communication between ASM_OUTPUT_LABEL and 105 ASM_OUTPUT_LABELREF. */ 106int ia64_asm_output_label = 0; 107 108/* Register names for ia64_expand_prologue. */ 109static const char * const ia64_reg_numbers[96] = 110{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 111 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 112 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 113 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 114 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 115 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 116 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 117 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 118 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 119 "r104","r105","r106","r107","r108","r109","r110","r111", 120 "r112","r113","r114","r115","r116","r117","r118","r119", 121 "r120","r121","r122","r123","r124","r125","r126","r127"}; 122 123/* ??? These strings could be shared with REGISTER_NAMES. */ 124static const char * const ia64_input_reg_names[8] = 125{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 126 127/* ??? These strings could be shared with REGISTER_NAMES. */ 128static const char * const ia64_local_reg_names[80] = 129{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 130 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 131 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 132 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 133 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 134 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 135 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 136 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 137 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 138 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 139 140/* ??? These strings could be shared with REGISTER_NAMES. */ 141static const char * const ia64_output_reg_names[8] = 142{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 143 144/* Variables which are this size or smaller are put in the sdata/sbss 145 sections. */ 146 147unsigned int ia64_section_threshold; 148 149/* The following variable is used by the DFA insn scheduler. The value is 150 TRUE if we do insn bundling instead of insn scheduling. */ 151int bundling_p = 0; 152 153enum ia64_frame_regs 154{ 155 reg_fp, 156 reg_save_b0, 157 reg_save_pr, 158 reg_save_ar_pfs, 159 reg_save_ar_unat, 160 reg_save_ar_lc, 161 reg_save_gp, 162 number_of_ia64_frame_regs 163}; 164 165/* Structure to be filled in by ia64_compute_frame_size with register 166 save masks and offsets for the current function. */ 167 168struct ia64_frame_info 169{ 170 HOST_WIDE_INT total_size; /* size of the stack frame, not including 171 the caller's scratch area. */ 172 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 173 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 174 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 175 HARD_REG_SET mask; /* mask of saved registers. */ 176 unsigned int gr_used_mask; /* mask of registers in use as gr spill 177 registers or long-term scratches. */ 178 int n_spilled; /* number of spilled registers. */ 179 int r[number_of_ia64_frame_regs]; /* Frame related registers. */ 180 int n_input_regs; /* number of input registers used. */ 181 int n_local_regs; /* number of local registers used. */ 182 int n_output_regs; /* number of output registers used. */ 183 int n_rotate_regs; /* number of rotating registers used. */ 184 185 char need_regstk; /* true if a .regstk directive needed. */ 186 char initialized; /* true if the data is finalized. */ 187}; 188 189/* Current frame information calculated by ia64_compute_frame_size. */ 190static struct ia64_frame_info current_frame_info; 191/* The actual registers that are emitted. */ 192static int emitted_frame_related_regs[number_of_ia64_frame_regs]; 193 194static int ia64_first_cycle_multipass_dfa_lookahead (void); 195static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *); 196static void ia64_init_dfa_pre_cycle_insn (void); 197static rtx ia64_dfa_pre_cycle_insn (void); 198static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); 199static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *); 200static void ia64_h_i_d_extended (void); 201static void * ia64_alloc_sched_context (void); 202static void ia64_init_sched_context (void *, bool); 203static void ia64_set_sched_context (void *); 204static void ia64_clear_sched_context (void *); 205static void ia64_free_sched_context (void *); 206static int ia64_mode_to_int (machine_mode); 207static void ia64_set_sched_flags (spec_info_t); 208static ds_t ia64_get_insn_spec_ds (rtx_insn *); 209static ds_t ia64_get_insn_checked_ds (rtx_insn *); 210static bool ia64_skip_rtx_p (const_rtx); 211static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *); 212static bool ia64_needs_block_p (ds_t); 213static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t); 214static int ia64_spec_check_p (rtx); 215static int ia64_spec_check_src_p (rtx); 216static rtx gen_tls_get_addr (void); 217static rtx gen_thread_pointer (void); 218static int find_gr_spill (enum ia64_frame_regs, int); 219static int next_scratch_gr_reg (void); 220static void mark_reg_gr_used_mask (rtx, void *); 221static void ia64_compute_frame_size (HOST_WIDE_INT); 222static void setup_spill_pointers (int, rtx, HOST_WIDE_INT); 223static void finish_spill_pointers (void); 224static rtx spill_restore_mem (rtx, HOST_WIDE_INT); 225static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx); 226static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT); 227static rtx gen_movdi_x (rtx, rtx, rtx); 228static rtx gen_fr_spill_x (rtx, rtx, rtx); 229static rtx gen_fr_restore_x (rtx, rtx, rtx); 230 231static void ia64_option_override (void); 232static bool ia64_can_eliminate (const int, const int); 233static machine_mode hfa_element_mode (const_tree, bool); 234static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode, 235 tree, int *, int); 236static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode, 237 tree, bool); 238static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode, 239 const_tree, bool, bool); 240static rtx ia64_function_arg (cumulative_args_t, machine_mode, 241 const_tree, bool); 242static rtx ia64_function_incoming_arg (cumulative_args_t, 243 machine_mode, const_tree, bool); 244static void ia64_function_arg_advance (cumulative_args_t, machine_mode, 245 const_tree, bool); 246static unsigned int ia64_function_arg_boundary (machine_mode, 247 const_tree); 248static bool ia64_function_ok_for_sibcall (tree, tree); 249static bool ia64_return_in_memory (const_tree, const_tree); 250static rtx ia64_function_value (const_tree, const_tree, bool); 251static rtx ia64_libcall_value (machine_mode, const_rtx); 252static bool ia64_function_value_regno_p (const unsigned int); 253static int ia64_register_move_cost (machine_mode, reg_class_t, 254 reg_class_t); 255static int ia64_memory_move_cost (machine_mode mode, reg_class_t, 256 bool); 257static bool ia64_rtx_costs (rtx, int, int, int, int *, bool); 258static int ia64_unspec_may_trap_p (const_rtx, unsigned); 259static void fix_range (const char *); 260static struct machine_function * ia64_init_machine_status (void); 261static void emit_insn_group_barriers (FILE *); 262static void emit_all_insn_group_barriers (FILE *); 263static void final_emit_insn_group_barriers (FILE *); 264static void emit_predicate_relation_info (void); 265static void ia64_reorg (void); 266static bool ia64_in_small_data_p (const_tree); 267static void process_epilogue (FILE *, rtx, bool, bool); 268 269static bool ia64_assemble_integer (rtx, unsigned int, int); 270static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT); 271static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT); 272static void ia64_output_function_end_prologue (FILE *); 273 274static void ia64_print_operand (FILE *, rtx, int); 275static void ia64_print_operand_address (FILE *, rtx); 276static bool ia64_print_operand_punct_valid_p (unsigned char code); 277 278static int ia64_issue_rate (void); 279static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t); 280static void ia64_sched_init (FILE *, int, int); 281static void ia64_sched_init_global (FILE *, int, int); 282static void ia64_sched_finish_global (FILE *, int); 283static void ia64_sched_finish (FILE *, int); 284static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int); 285static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int); 286static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int); 287static int ia64_variable_issue (FILE *, int, rtx_insn *, int); 288 289static void ia64_asm_unwind_emit (FILE *, rtx_insn *); 290static void ia64_asm_emit_except_personality (rtx); 291static void ia64_asm_init_sections (void); 292 293static enum unwind_info_type ia64_debug_unwind_info (void); 294 295static struct bundle_state *get_free_bundle_state (void); 296static void free_bundle_state (struct bundle_state *); 297static void initiate_bundle_states (void); 298static void finish_bundle_states (void); 299static int insert_bundle_state (struct bundle_state *); 300static void initiate_bundle_state_table (void); 301static void finish_bundle_state_table (void); 302static int try_issue_nops (struct bundle_state *, int); 303static int try_issue_insn (struct bundle_state *, rtx); 304static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *, 305 int, int); 306static int get_max_pos (state_t); 307static int get_template (state_t, int); 308 309static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *); 310static bool important_for_bundling_p (rtx_insn *); 311static bool unknown_for_bundling_p (rtx_insn *); 312static void bundling (FILE *, int, rtx_insn *, rtx_insn *); 313 314static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 315 HOST_WIDE_INT, tree); 316static void ia64_file_start (void); 317static void ia64_globalize_decl_name (FILE *, tree); 318 319static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 320static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 321static section *ia64_select_rtx_section (machine_mode, rtx, 322 unsigned HOST_WIDE_INT); 323static void ia64_output_dwarf_dtprel (FILE *, int, rtx) 324 ATTRIBUTE_UNUSED; 325static unsigned int ia64_section_type_flags (tree, const char *, int); 326static void ia64_init_libfuncs (void) 327 ATTRIBUTE_UNUSED; 328static void ia64_hpux_init_libfuncs (void) 329 ATTRIBUTE_UNUSED; 330static void ia64_sysv4_init_libfuncs (void) 331 ATTRIBUTE_UNUSED; 332static void ia64_vms_init_libfuncs (void) 333 ATTRIBUTE_UNUSED; 334static void ia64_soft_fp_init_libfuncs (void) 335 ATTRIBUTE_UNUSED; 336static bool ia64_vms_valid_pointer_mode (machine_mode mode) 337 ATTRIBUTE_UNUSED; 338static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *) 339 ATTRIBUTE_UNUSED; 340 341static bool ia64_attribute_takes_identifier_p (const_tree); 342static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *); 343static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *); 344static void ia64_encode_section_info (tree, rtx, int); 345static rtx ia64_struct_value_rtx (tree, int); 346static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 347static bool ia64_scalar_mode_supported_p (machine_mode mode); 348static bool ia64_vector_mode_supported_p (machine_mode mode); 349static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode); 350static bool ia64_legitimate_constant_p (machine_mode, rtx); 351static bool ia64_legitimate_address_p (machine_mode, rtx, bool); 352static bool ia64_cannot_force_const_mem (machine_mode, rtx); 353static const char *ia64_mangle_type (const_tree); 354static const char *ia64_invalid_conversion (const_tree, const_tree); 355static const char *ia64_invalid_unary_op (int, const_tree); 356static const char *ia64_invalid_binary_op (int, const_tree, const_tree); 357static machine_mode ia64_c_mode_for_suffix (char); 358static void ia64_trampoline_init (rtx, tree, rtx); 359static void ia64_override_options_after_change (void); 360static bool ia64_member_type_forces_blk (const_tree, machine_mode); 361 362static tree ia64_builtin_decl (unsigned, bool); 363 364static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t); 365static machine_mode ia64_get_reg_raw_mode (int regno); 366static section * ia64_hpux_function_section (tree, enum node_frequency, 367 bool, bool); 368 369static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode, 370 const unsigned char *sel); 371 372#define MAX_VECT_LEN 8 373 374struct expand_vec_perm_d 375{ 376 rtx target, op0, op1; 377 unsigned char perm[MAX_VECT_LEN]; 378 machine_mode vmode; 379 unsigned char nelt; 380 bool one_operand_p; 381 bool testing_p; 382}; 383 384static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d); 385 386 387/* Table of valid machine attributes. */ 388static const struct attribute_spec ia64_attribute_table[] = 389{ 390 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 391 affects_type_identity } */ 392 { "syscall_linkage", 0, 0, false, true, true, NULL, false }, 393 { "model", 1, 1, true, false, false, ia64_handle_model_attribute, 394 false }, 395#if TARGET_ABI_OPEN_VMS 396 { "common_object", 1, 1, true, false, false, 397 ia64_vms_common_object_attribute, false }, 398#endif 399 { "version_id", 1, 1, true, false, false, 400 ia64_handle_version_id_attribute, false }, 401 { NULL, 0, 0, false, false, false, NULL, false } 402}; 403 404/* Initialize the GCC target structure. */ 405#undef TARGET_ATTRIBUTE_TABLE 406#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 407 408#undef TARGET_INIT_BUILTINS 409#define TARGET_INIT_BUILTINS ia64_init_builtins 410 411#undef TARGET_EXPAND_BUILTIN 412#define TARGET_EXPAND_BUILTIN ia64_expand_builtin 413 414#undef TARGET_BUILTIN_DECL 415#define TARGET_BUILTIN_DECL ia64_builtin_decl 416 417#undef TARGET_ASM_BYTE_OP 418#define TARGET_ASM_BYTE_OP "\tdata1\t" 419#undef TARGET_ASM_ALIGNED_HI_OP 420#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 421#undef TARGET_ASM_ALIGNED_SI_OP 422#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 423#undef TARGET_ASM_ALIGNED_DI_OP 424#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 425#undef TARGET_ASM_UNALIGNED_HI_OP 426#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 427#undef TARGET_ASM_UNALIGNED_SI_OP 428#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 429#undef TARGET_ASM_UNALIGNED_DI_OP 430#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 431#undef TARGET_ASM_INTEGER 432#define TARGET_ASM_INTEGER ia64_assemble_integer 433 434#undef TARGET_OPTION_OVERRIDE 435#define TARGET_OPTION_OVERRIDE ia64_option_override 436 437#undef TARGET_ASM_FUNCTION_PROLOGUE 438#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 439#undef TARGET_ASM_FUNCTION_END_PROLOGUE 440#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 441#undef TARGET_ASM_FUNCTION_EPILOGUE 442#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 443 444#undef TARGET_PRINT_OPERAND 445#define TARGET_PRINT_OPERAND ia64_print_operand 446#undef TARGET_PRINT_OPERAND_ADDRESS 447#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address 448#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 449#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p 450 451#undef TARGET_IN_SMALL_DATA_P 452#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 453 454#undef TARGET_SCHED_ADJUST_COST_2 455#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2 456#undef TARGET_SCHED_ISSUE_RATE 457#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 458#undef TARGET_SCHED_VARIABLE_ISSUE 459#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 460#undef TARGET_SCHED_INIT 461#define TARGET_SCHED_INIT ia64_sched_init 462#undef TARGET_SCHED_FINISH 463#define TARGET_SCHED_FINISH ia64_sched_finish 464#undef TARGET_SCHED_INIT_GLOBAL 465#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global 466#undef TARGET_SCHED_FINISH_GLOBAL 467#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global 468#undef TARGET_SCHED_REORDER 469#define TARGET_SCHED_REORDER ia64_sched_reorder 470#undef TARGET_SCHED_REORDER2 471#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 472 473#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 474#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook 475 476#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 477#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead 478 479#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 480#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn 481#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN 482#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn 483 484#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 485#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ 486 ia64_first_cycle_multipass_dfa_lookahead_guard 487 488#undef TARGET_SCHED_DFA_NEW_CYCLE 489#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle 490 491#undef TARGET_SCHED_H_I_D_EXTENDED 492#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended 493 494#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT 495#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context 496 497#undef TARGET_SCHED_INIT_SCHED_CONTEXT 498#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context 499 500#undef TARGET_SCHED_SET_SCHED_CONTEXT 501#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context 502 503#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT 504#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context 505 506#undef TARGET_SCHED_FREE_SCHED_CONTEXT 507#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context 508 509#undef TARGET_SCHED_SET_SCHED_FLAGS 510#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags 511 512#undef TARGET_SCHED_GET_INSN_SPEC_DS 513#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds 514 515#undef TARGET_SCHED_GET_INSN_CHECKED_DS 516#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds 517 518#undef TARGET_SCHED_SPECULATE_INSN 519#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn 520 521#undef TARGET_SCHED_NEEDS_BLOCK_P 522#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p 523 524#undef TARGET_SCHED_GEN_SPEC_CHECK 525#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check 526 527#undef TARGET_SCHED_SKIP_RTX_P 528#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p 529 530#undef TARGET_FUNCTION_OK_FOR_SIBCALL 531#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall 532#undef TARGET_ARG_PARTIAL_BYTES 533#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes 534#undef TARGET_FUNCTION_ARG 535#define TARGET_FUNCTION_ARG ia64_function_arg 536#undef TARGET_FUNCTION_INCOMING_ARG 537#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg 538#undef TARGET_FUNCTION_ARG_ADVANCE 539#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance 540#undef TARGET_FUNCTION_ARG_BOUNDARY 541#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary 542 543#undef TARGET_ASM_OUTPUT_MI_THUNK 544#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 545#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 546#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 547 548#undef TARGET_ASM_FILE_START 549#define TARGET_ASM_FILE_START ia64_file_start 550 551#undef TARGET_ASM_GLOBALIZE_DECL_NAME 552#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name 553 554#undef TARGET_REGISTER_MOVE_COST 555#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost 556#undef TARGET_MEMORY_MOVE_COST 557#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost 558#undef TARGET_RTX_COSTS 559#define TARGET_RTX_COSTS ia64_rtx_costs 560#undef TARGET_ADDRESS_COST 561#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 562 563#undef TARGET_UNSPEC_MAY_TRAP_P 564#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p 565 566#undef TARGET_MACHINE_DEPENDENT_REORG 567#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg 568 569#undef TARGET_ENCODE_SECTION_INFO 570#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 571 572#undef TARGET_SECTION_TYPE_FLAGS 573#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags 574 575#ifdef HAVE_AS_TLS 576#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 577#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel 578#endif 579 580/* ??? Investigate. */ 581#if 0 582#undef TARGET_PROMOTE_PROTOTYPES 583#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 584#endif 585 586#undef TARGET_FUNCTION_VALUE 587#define TARGET_FUNCTION_VALUE ia64_function_value 588#undef TARGET_LIBCALL_VALUE 589#define TARGET_LIBCALL_VALUE ia64_libcall_value 590#undef TARGET_FUNCTION_VALUE_REGNO_P 591#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p 592 593#undef TARGET_STRUCT_VALUE_RTX 594#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx 595#undef TARGET_RETURN_IN_MEMORY 596#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory 597#undef TARGET_SETUP_INCOMING_VARARGS 598#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs 599#undef TARGET_STRICT_ARGUMENT_NAMING 600#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 601#undef TARGET_MUST_PASS_IN_STACK 602#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 603#undef TARGET_GET_RAW_RESULT_MODE 604#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode 605#undef TARGET_GET_RAW_ARG_MODE 606#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode 607 608#undef TARGET_MEMBER_TYPE_FORCES_BLK 609#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk 610 611#undef TARGET_GIMPLIFY_VA_ARG_EXPR 612#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg 613 614#undef TARGET_ASM_UNWIND_EMIT 615#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit 616#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY 617#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality 618#undef TARGET_ASM_INIT_SECTIONS 619#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections 620 621#undef TARGET_DEBUG_UNWIND_INFO 622#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info 623 624#undef TARGET_SCALAR_MODE_SUPPORTED_P 625#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p 626#undef TARGET_VECTOR_MODE_SUPPORTED_P 627#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p 628 629#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P 630#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ 631 ia64_libgcc_floating_mode_supported_p 632 633/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur 634 in an order different from the specified program order. */ 635#undef TARGET_RELAXED_ORDERING 636#define TARGET_RELAXED_ORDERING true 637 638#undef TARGET_LEGITIMATE_CONSTANT_P 639#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p 640#undef TARGET_LEGITIMATE_ADDRESS_P 641#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p 642 643#undef TARGET_CANNOT_FORCE_CONST_MEM 644#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem 645 646#undef TARGET_MANGLE_TYPE 647#define TARGET_MANGLE_TYPE ia64_mangle_type 648 649#undef TARGET_INVALID_CONVERSION 650#define TARGET_INVALID_CONVERSION ia64_invalid_conversion 651#undef TARGET_INVALID_UNARY_OP 652#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op 653#undef TARGET_INVALID_BINARY_OP 654#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op 655 656#undef TARGET_C_MODE_FOR_SUFFIX 657#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix 658 659#undef TARGET_CAN_ELIMINATE 660#define TARGET_CAN_ELIMINATE ia64_can_eliminate 661 662#undef TARGET_TRAMPOLINE_INIT 663#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init 664 665#undef TARGET_CAN_USE_DOLOOP_P 666#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 667#undef TARGET_INVALID_WITHIN_DOLOOP 668#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null 669 670#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 671#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change 672 673#undef TARGET_PREFERRED_RELOAD_CLASS 674#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class 675 676#undef TARGET_DELAY_SCHED2 677#define TARGET_DELAY_SCHED2 true 678 679/* Variable tracking should be run after all optimizations which 680 change order of insns. It also needs a valid CFG. */ 681#undef TARGET_DELAY_VARTRACK 682#define TARGET_DELAY_VARTRACK true 683 684#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK 685#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok 686 687#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P 688#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p 689 690struct gcc_target targetm = TARGET_INITIALIZER; 691 692/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain 693 identifier as an argument, so the front end shouldn't look it up. */ 694 695static bool 696ia64_attribute_takes_identifier_p (const_tree attr_id) 697{ 698 if (is_attribute_p ("model", attr_id)) 699 return true; 700#if TARGET_ABI_OPEN_VMS 701 if (is_attribute_p ("common_object", attr_id)) 702 return true; 703#endif 704 return false; 705} 706 707typedef enum 708 { 709 ADDR_AREA_NORMAL, /* normal address area */ 710 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */ 711 } 712ia64_addr_area; 713 714static GTY(()) tree small_ident1; 715static GTY(()) tree small_ident2; 716 717static void 718init_idents (void) 719{ 720 if (small_ident1 == 0) 721 { 722 small_ident1 = get_identifier ("small"); 723 small_ident2 = get_identifier ("__small__"); 724 } 725} 726 727/* Retrieve the address area that has been chosen for the given decl. */ 728 729static ia64_addr_area 730ia64_get_addr_area (tree decl) 731{ 732 tree model_attr; 733 734 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl)); 735 if (model_attr) 736 { 737 tree id; 738 739 init_idents (); 740 id = TREE_VALUE (TREE_VALUE (model_attr)); 741 if (id == small_ident1 || id == small_ident2) 742 return ADDR_AREA_SMALL; 743 } 744 return ADDR_AREA_NORMAL; 745} 746 747static tree 748ia64_handle_model_attribute (tree *node, tree name, tree args, 749 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 750{ 751 ia64_addr_area addr_area = ADDR_AREA_NORMAL; 752 ia64_addr_area area; 753 tree arg, decl = *node; 754 755 init_idents (); 756 arg = TREE_VALUE (args); 757 if (arg == small_ident1 || arg == small_ident2) 758 { 759 addr_area = ADDR_AREA_SMALL; 760 } 761 else 762 { 763 warning (OPT_Wattributes, "invalid argument of %qE attribute", 764 name); 765 *no_add_attrs = true; 766 } 767 768 switch (TREE_CODE (decl)) 769 { 770 case VAR_DECL: 771 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) 772 == FUNCTION_DECL) 773 && !TREE_STATIC (decl)) 774 { 775 error_at (DECL_SOURCE_LOCATION (decl), 776 "an address area attribute cannot be specified for " 777 "local variables"); 778 *no_add_attrs = true; 779 } 780 area = ia64_get_addr_area (decl); 781 if (area != ADDR_AREA_NORMAL && addr_area != area) 782 { 783 error ("address area of %q+D conflicts with previous " 784 "declaration", decl); 785 *no_add_attrs = true; 786 } 787 break; 788 789 case FUNCTION_DECL: 790 error_at (DECL_SOURCE_LOCATION (decl), 791 "address area attribute cannot be specified for " 792 "functions"); 793 *no_add_attrs = true; 794 break; 795 796 default: 797 warning (OPT_Wattributes, "%qE attribute ignored", 798 name); 799 *no_add_attrs = true; 800 break; 801 } 802 803 return NULL_TREE; 804} 805 806/* Part of the low level implementation of DEC Ada pragma Common_Object which 807 enables the shared use of variables stored in overlaid linker areas 808 corresponding to the use of Fortran COMMON. */ 809 810static tree 811ia64_vms_common_object_attribute (tree *node, tree name, tree args, 812 int flags ATTRIBUTE_UNUSED, 813 bool *no_add_attrs) 814{ 815 tree decl = *node; 816 tree id; 817 818 gcc_assert (DECL_P (decl)); 819 820 DECL_COMMON (decl) = 1; 821 id = TREE_VALUE (args); 822 if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST) 823 { 824 error ("%qE attribute requires a string constant argument", name); 825 *no_add_attrs = true; 826 return NULL_TREE; 827 } 828 return NULL_TREE; 829} 830 831/* Part of the low level implementation of DEC Ada pragma Common_Object. */ 832 833void 834ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name, 835 unsigned HOST_WIDE_INT size, 836 unsigned int align) 837{ 838 tree attr = DECL_ATTRIBUTES (decl); 839 840 if (attr) 841 attr = lookup_attribute ("common_object", attr); 842 if (attr) 843 { 844 tree id = TREE_VALUE (TREE_VALUE (attr)); 845 const char *name; 846 847 if (TREE_CODE (id) == IDENTIFIER_NODE) 848 name = IDENTIFIER_POINTER (id); 849 else if (TREE_CODE (id) == STRING_CST) 850 name = TREE_STRING_POINTER (id); 851 else 852 abort (); 853 854 fprintf (file, "\t.vms_common\t\"%s\",", name); 855 } 856 else 857 fprintf (file, "%s", COMMON_ASM_OP); 858 859 /* Code from elfos.h. */ 860 assemble_name (file, name); 861 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u", 862 size, align / BITS_PER_UNIT); 863 864 fputc ('\n', file); 865} 866 867static void 868ia64_encode_addr_area (tree decl, rtx symbol) 869{ 870 int flags; 871 872 flags = SYMBOL_REF_FLAGS (symbol); 873 switch (ia64_get_addr_area (decl)) 874 { 875 case ADDR_AREA_NORMAL: break; 876 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break; 877 default: gcc_unreachable (); 878 } 879 SYMBOL_REF_FLAGS (symbol) = flags; 880} 881 882static void 883ia64_encode_section_info (tree decl, rtx rtl, int first) 884{ 885 default_encode_section_info (decl, rtl, first); 886 887 /* Careful not to prod global register variables. */ 888 if (TREE_CODE (decl) == VAR_DECL 889 && GET_CODE (DECL_RTL (decl)) == MEM 890 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF 891 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) 892 ia64_encode_addr_area (decl, XEXP (rtl, 0)); 893} 894 895/* Return 1 if the operands of a move are ok. */ 896 897int 898ia64_move_ok (rtx dst, rtx src) 899{ 900 /* If we're under init_recog_no_volatile, we'll not be able to use 901 memory_operand. So check the code directly and don't worry about 902 the validity of the underlying address, which should have been 903 checked elsewhere anyway. */ 904 if (GET_CODE (dst) != MEM) 905 return 1; 906 if (GET_CODE (src) == MEM) 907 return 0; 908 if (register_operand (src, VOIDmode)) 909 return 1; 910 911 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 912 if (INTEGRAL_MODE_P (GET_MODE (dst))) 913 return src == const0_rtx; 914 else 915 return satisfies_constraint_G (src); 916} 917 918/* Return 1 if the operands are ok for a floating point load pair. */ 919 920int 921ia64_load_pair_ok (rtx dst, rtx src) 922{ 923 /* ??? There is a thinko in the implementation of the "x" constraint and the 924 FP_REGS class. The constraint will also reject (reg f30:TI) so we must 925 also return false for it. */ 926 if (GET_CODE (dst) != REG 927 || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1))) 928 return 0; 929 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src)) 930 return 0; 931 switch (GET_CODE (XEXP (src, 0))) 932 { 933 case REG: 934 case POST_INC: 935 break; 936 case POST_DEC: 937 return 0; 938 case POST_MODIFY: 939 { 940 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1); 941 942 if (GET_CODE (adjust) != CONST_INT 943 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src))) 944 return 0; 945 } 946 break; 947 default: 948 abort (); 949 } 950 return 1; 951} 952 953int 954addp4_optimize_ok (rtx op1, rtx op2) 955{ 956 return (basereg_operand (op1, GET_MODE(op1)) != 957 basereg_operand (op2, GET_MODE(op2))); 958} 959 960/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction. 961 Return the length of the field, or <= 0 on failure. */ 962 963int 964ia64_depz_field_mask (rtx rop, rtx rshift) 965{ 966 unsigned HOST_WIDE_INT op = INTVAL (rop); 967 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 968 969 /* Get rid of the zero bits we're shifting in. */ 970 op >>= shift; 971 972 /* We must now have a solid block of 1's at bit 0. */ 973 return exact_log2 (op + 1); 974} 975 976/* Return the TLS model to use for ADDR. */ 977 978static enum tls_model 979tls_symbolic_operand_type (rtx addr) 980{ 981 enum tls_model tls_kind = TLS_MODEL_NONE; 982 983 if (GET_CODE (addr) == CONST) 984 { 985 if (GET_CODE (XEXP (addr, 0)) == PLUS 986 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF) 987 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0)); 988 } 989 else if (GET_CODE (addr) == SYMBOL_REF) 990 tls_kind = SYMBOL_REF_TLS_MODEL (addr); 991 992 return tls_kind; 993} 994 995/* Returns true if REG (assumed to be a `reg' RTX) is valid for use 996 as a base register. */ 997 998static inline bool 999ia64_reg_ok_for_base_p (const_rtx reg, bool strict) 1000{ 1001 if (strict 1002 && REGNO_OK_FOR_BASE_P (REGNO (reg))) 1003 return true; 1004 else if (!strict 1005 && (GENERAL_REGNO_P (REGNO (reg)) 1006 || !HARD_REGISTER_P (reg))) 1007 return true; 1008 else 1009 return false; 1010} 1011 1012static bool 1013ia64_legitimate_address_reg (const_rtx reg, bool strict) 1014{ 1015 if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict)) 1016 || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0)) 1017 && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict))) 1018 return true; 1019 1020 return false; 1021} 1022 1023static bool 1024ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict) 1025{ 1026 if (GET_CODE (disp) == PLUS 1027 && rtx_equal_p (reg, XEXP (disp, 0)) 1028 && (ia64_legitimate_address_reg (XEXP (disp, 1), strict) 1029 || (CONST_INT_P (XEXP (disp, 1)) 1030 && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255)))) 1031 return true; 1032 1033 return false; 1034} 1035 1036/* Implement TARGET_LEGITIMATE_ADDRESS_P. */ 1037 1038static bool 1039ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, 1040 rtx x, bool strict) 1041{ 1042 if (ia64_legitimate_address_reg (x, strict)) 1043 return true; 1044 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC) 1045 && ia64_legitimate_address_reg (XEXP (x, 0), strict) 1046 && XEXP (x, 0) != arg_pointer_rtx) 1047 return true; 1048 else if (GET_CODE (x) == POST_MODIFY 1049 && ia64_legitimate_address_reg (XEXP (x, 0), strict) 1050 && XEXP (x, 0) != arg_pointer_rtx 1051 && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict)) 1052 return true; 1053 else 1054 return false; 1055} 1056 1057/* Return true if X is a constant that is valid for some immediate 1058 field in an instruction. */ 1059 1060static bool 1061ia64_legitimate_constant_p (machine_mode mode, rtx x) 1062{ 1063 switch (GET_CODE (x)) 1064 { 1065 case CONST_INT: 1066 case LABEL_REF: 1067 return true; 1068 1069 case CONST_DOUBLE: 1070 if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode) 1071 return true; 1072 return satisfies_constraint_G (x); 1073 1074 case CONST: 1075 case SYMBOL_REF: 1076 /* ??? Short term workaround for PR 28490. We must make the code here 1077 match the code in ia64_expand_move and move_operand, even though they 1078 are both technically wrong. */ 1079 if (tls_symbolic_operand_type (x) == 0) 1080 { 1081 HOST_WIDE_INT addend = 0; 1082 rtx op = x; 1083 1084 if (GET_CODE (op) == CONST 1085 && GET_CODE (XEXP (op, 0)) == PLUS 1086 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 1087 { 1088 addend = INTVAL (XEXP (XEXP (op, 0), 1)); 1089 op = XEXP (XEXP (op, 0), 0); 1090 } 1091 1092 if (any_offset_symbol_operand (op, mode) 1093 || function_operand (op, mode)) 1094 return true; 1095 if (aligned_offset_symbol_operand (op, mode)) 1096 return (addend & 0x3fff) == 0; 1097 return false; 1098 } 1099 return false; 1100 1101 case CONST_VECTOR: 1102 if (mode == V2SFmode) 1103 return satisfies_constraint_Y (x); 1104 1105 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 1106 && GET_MODE_SIZE (mode) <= 8); 1107 1108 default: 1109 return false; 1110 } 1111} 1112 1113/* Don't allow TLS addresses to get spilled to memory. */ 1114 1115static bool 1116ia64_cannot_force_const_mem (machine_mode mode, rtx x) 1117{ 1118 if (mode == RFmode) 1119 return true; 1120 return tls_symbolic_operand_type (x) != 0; 1121} 1122 1123/* Expand a symbolic constant load. */ 1124 1125bool 1126ia64_expand_load_address (rtx dest, rtx src) 1127{ 1128 gcc_assert (GET_CODE (dest) == REG); 1129 1130 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids 1131 having to pointer-extend the value afterward. Other forms of address 1132 computation below are also more natural to compute as 64-bit quantities. 1133 If we've been given an SImode destination register, change it. */ 1134 if (GET_MODE (dest) != Pmode) 1135 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 1136 byte_lowpart_offset (Pmode, GET_MODE (dest))); 1137 1138 if (TARGET_NO_PIC) 1139 return false; 1140 if (small_addr_symbolic_operand (src, VOIDmode)) 1141 return false; 1142 1143 if (TARGET_AUTO_PIC) 1144 emit_insn (gen_load_gprel64 (dest, src)); 1145 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src)) 1146 emit_insn (gen_load_fptr (dest, src)); 1147 else if (sdata_symbolic_operand (src, VOIDmode)) 1148 emit_insn (gen_load_gprel (dest, src)); 1149 else if (local_symbolic_operand64 (src, VOIDmode)) 1150 { 1151 /* We want to use @gprel rather than @ltoff relocations for local 1152 symbols: 1153 - @gprel does not require dynamic linker 1154 - and does not use .sdata section 1155 https://gcc.gnu.org/bugzilla/60465 */ 1156 emit_insn (gen_load_gprel64 (dest, src)); 1157 } 1158 else 1159 { 1160 HOST_WIDE_INT addend = 0; 1161 rtx tmp; 1162 1163 /* We did split constant offsets in ia64_expand_move, and we did try 1164 to keep them split in move_operand, but we also allowed reload to 1165 rematerialize arbitrary constants rather than spill the value to 1166 the stack and reload it. So we have to be prepared here to split 1167 them apart again. */ 1168 if (GET_CODE (src) == CONST) 1169 { 1170 HOST_WIDE_INT hi, lo; 1171 1172 hi = INTVAL (XEXP (XEXP (src, 0), 1)); 1173 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000; 1174 hi = hi - lo; 1175 1176 if (lo != 0) 1177 { 1178 addend = lo; 1179 src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi); 1180 } 1181 } 1182 1183 tmp = gen_rtx_HIGH (Pmode, src); 1184 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx); 1185 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 1186 1187 tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src); 1188 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 1189 1190 if (addend) 1191 { 1192 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend)); 1193 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 1194 } 1195 } 1196 1197 return true; 1198} 1199 1200static GTY(()) rtx gen_tls_tga; 1201static rtx 1202gen_tls_get_addr (void) 1203{ 1204 if (!gen_tls_tga) 1205 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1206 return gen_tls_tga; 1207} 1208 1209static GTY(()) rtx thread_pointer_rtx; 1210static rtx 1211gen_thread_pointer (void) 1212{ 1213 if (!thread_pointer_rtx) 1214 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1215 return thread_pointer_rtx; 1216} 1217 1218static rtx 1219ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1, 1220 rtx orig_op1, HOST_WIDE_INT addend) 1221{ 1222 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp; 1223 rtx_insn *insns; 1224 rtx orig_op0 = op0; 1225 HOST_WIDE_INT addend_lo, addend_hi; 1226 1227 switch (tls_kind) 1228 { 1229 case TLS_MODEL_GLOBAL_DYNAMIC: 1230 start_sequence (); 1231 1232 tga_op1 = gen_reg_rtx (Pmode); 1233 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1234 1235 tga_op2 = gen_reg_rtx (Pmode); 1236 emit_insn (gen_load_dtprel (tga_op2, op1)); 1237 1238 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1239 LCT_CONST, Pmode, 2, tga_op1, 1240 Pmode, tga_op2, Pmode); 1241 1242 insns = get_insns (); 1243 end_sequence (); 1244 1245 if (GET_MODE (op0) != Pmode) 1246 op0 = tga_ret; 1247 emit_libcall_block (insns, op0, tga_ret, op1); 1248 break; 1249 1250 case TLS_MODEL_LOCAL_DYNAMIC: 1251 /* ??? This isn't the completely proper way to do local-dynamic 1252 If the call to __tls_get_addr is used only by a single symbol, 1253 then we should (somehow) move the dtprel to the second arg 1254 to avoid the extra add. */ 1255 start_sequence (); 1256 1257 tga_op1 = gen_reg_rtx (Pmode); 1258 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1259 1260 tga_op2 = const0_rtx; 1261 1262 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1263 LCT_CONST, Pmode, 2, tga_op1, 1264 Pmode, tga_op2, Pmode); 1265 1266 insns = get_insns (); 1267 end_sequence (); 1268 1269 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1270 UNSPEC_LD_BASE); 1271 tmp = gen_reg_rtx (Pmode); 1272 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1273 1274 if (!register_operand (op0, Pmode)) 1275 op0 = gen_reg_rtx (Pmode); 1276 if (TARGET_TLS64) 1277 { 1278 emit_insn (gen_load_dtprel (op0, op1)); 1279 emit_insn (gen_adddi3 (op0, tmp, op0)); 1280 } 1281 else 1282 emit_insn (gen_add_dtprel (op0, op1, tmp)); 1283 break; 1284 1285 case TLS_MODEL_INITIAL_EXEC: 1286 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1287 addend_hi = addend - addend_lo; 1288 1289 op1 = plus_constant (Pmode, op1, addend_hi); 1290 addend = addend_lo; 1291 1292 tmp = gen_reg_rtx (Pmode); 1293 emit_insn (gen_load_tprel (tmp, op1)); 1294 1295 if (!register_operand (op0, Pmode)) 1296 op0 = gen_reg_rtx (Pmode); 1297 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); 1298 break; 1299 1300 case TLS_MODEL_LOCAL_EXEC: 1301 if (!register_operand (op0, Pmode)) 1302 op0 = gen_reg_rtx (Pmode); 1303 1304 op1 = orig_op1; 1305 addend = 0; 1306 if (TARGET_TLS64) 1307 { 1308 emit_insn (gen_load_tprel (op0, op1)); 1309 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ())); 1310 } 1311 else 1312 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ())); 1313 break; 1314 1315 default: 1316 gcc_unreachable (); 1317 } 1318 1319 if (addend) 1320 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend), 1321 orig_op0, 1, OPTAB_DIRECT); 1322 if (orig_op0 == op0) 1323 return NULL_RTX; 1324 if (GET_MODE (orig_op0) == Pmode) 1325 return op0; 1326 return gen_lowpart (GET_MODE (orig_op0), op0); 1327} 1328 1329rtx 1330ia64_expand_move (rtx op0, rtx op1) 1331{ 1332 machine_mode mode = GET_MODE (op0); 1333 1334 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1335 op1 = force_reg (mode, op1); 1336 1337 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode)) 1338 { 1339 HOST_WIDE_INT addend = 0; 1340 enum tls_model tls_kind; 1341 rtx sym = op1; 1342 1343 if (GET_CODE (op1) == CONST 1344 && GET_CODE (XEXP (op1, 0)) == PLUS 1345 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT) 1346 { 1347 addend = INTVAL (XEXP (XEXP (op1, 0), 1)); 1348 sym = XEXP (XEXP (op1, 0), 0); 1349 } 1350 1351 tls_kind = tls_symbolic_operand_type (sym); 1352 if (tls_kind) 1353 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend); 1354 1355 if (any_offset_symbol_operand (sym, mode)) 1356 addend = 0; 1357 else if (aligned_offset_symbol_operand (sym, mode)) 1358 { 1359 HOST_WIDE_INT addend_lo, addend_hi; 1360 1361 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1362 addend_hi = addend - addend_lo; 1363 1364 if (addend_lo != 0) 1365 { 1366 op1 = plus_constant (mode, sym, addend_hi); 1367 addend = addend_lo; 1368 } 1369 else 1370 addend = 0; 1371 } 1372 else 1373 op1 = sym; 1374 1375 if (reload_completed) 1376 { 1377 /* We really should have taken care of this offset earlier. */ 1378 gcc_assert (addend == 0); 1379 if (ia64_expand_load_address (op0, op1)) 1380 return NULL_RTX; 1381 } 1382 1383 if (addend) 1384 { 1385 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode); 1386 1387 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1)); 1388 1389 op1 = expand_simple_binop (mode, PLUS, subtarget, 1390 GEN_INT (addend), op0, 1, OPTAB_DIRECT); 1391 if (op0 == op1) 1392 return NULL_RTX; 1393 } 1394 } 1395 1396 return op1; 1397} 1398 1399/* Split a move from OP1 to OP0 conditional on COND. */ 1400 1401void 1402ia64_emit_cond_move (rtx op0, rtx op1, rtx cond) 1403{ 1404 rtx_insn *insn, *first = get_last_insn (); 1405 1406 emit_move_insn (op0, op1); 1407 1408 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn)) 1409 if (INSN_P (insn)) 1410 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), 1411 PATTERN (insn)); 1412} 1413 1414/* Split a post-reload TImode or TFmode reference into two DImode 1415 components. This is made extra difficult by the fact that we do 1416 not get any scratch registers to work with, because reload cannot 1417 be prevented from giving us a scratch that overlaps the register 1418 pair involved. So instead, when addressing memory, we tweak the 1419 pointer register up and back down with POST_INCs. Or up and not 1420 back down when we can get away with it. 1421 1422 REVERSED is true when the loads must be done in reversed order 1423 (high word first) for correctness. DEAD is true when the pointer 1424 dies with the second insn we generate and therefore the second 1425 address must not carry a postmodify. 1426 1427 May return an insn which is to be emitted after the moves. */ 1428 1429static rtx 1430ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead) 1431{ 1432 rtx fixup = 0; 1433 1434 switch (GET_CODE (in)) 1435 { 1436 case REG: 1437 out[reversed] = gen_rtx_REG (DImode, REGNO (in)); 1438 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1); 1439 break; 1440 1441 case CONST_INT: 1442 case CONST_DOUBLE: 1443 /* Cannot occur reversed. */ 1444 gcc_assert (!reversed); 1445 1446 if (GET_MODE (in) != TFmode) 1447 split_double (in, &out[0], &out[1]); 1448 else 1449 /* split_double does not understand how to split a TFmode 1450 quantity into a pair of DImode constants. */ 1451 { 1452 REAL_VALUE_TYPE r; 1453 unsigned HOST_WIDE_INT p[2]; 1454 long l[4]; /* TFmode is 128 bits */ 1455 1456 REAL_VALUE_FROM_CONST_DOUBLE (r, in); 1457 real_to_target (l, &r, TFmode); 1458 1459 if (FLOAT_WORDS_BIG_ENDIAN) 1460 { 1461 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1]; 1462 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3]; 1463 } 1464 else 1465 { 1466 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0]; 1467 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2]; 1468 } 1469 out[0] = GEN_INT (p[0]); 1470 out[1] = GEN_INT (p[1]); 1471 } 1472 break; 1473 1474 case MEM: 1475 { 1476 rtx base = XEXP (in, 0); 1477 rtx offset; 1478 1479 switch (GET_CODE (base)) 1480 { 1481 case REG: 1482 if (!reversed) 1483 { 1484 out[0] = adjust_automodify_address 1485 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1486 out[1] = adjust_automodify_address 1487 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8); 1488 } 1489 else 1490 { 1491 /* Reversal requires a pre-increment, which can only 1492 be done as a separate insn. */ 1493 emit_insn (gen_adddi3 (base, base, GEN_INT (8))); 1494 out[0] = adjust_automodify_address 1495 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8); 1496 out[1] = adjust_address (in, DImode, 0); 1497 } 1498 break; 1499 1500 case POST_INC: 1501 gcc_assert (!reversed && !dead); 1502 1503 /* Just do the increment in two steps. */ 1504 out[0] = adjust_automodify_address (in, DImode, 0, 0); 1505 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1506 break; 1507 1508 case POST_DEC: 1509 gcc_assert (!reversed && !dead); 1510 1511 /* Add 8, subtract 24. */ 1512 base = XEXP (base, 0); 1513 out[0] = adjust_automodify_address 1514 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1515 out[1] = adjust_automodify_address 1516 (in, DImode, 1517 gen_rtx_POST_MODIFY (Pmode, base, 1518 plus_constant (Pmode, base, -24)), 1519 8); 1520 break; 1521 1522 case POST_MODIFY: 1523 gcc_assert (!reversed && !dead); 1524 1525 /* Extract and adjust the modification. This case is 1526 trickier than the others, because we might have an 1527 index register, or we might have a combined offset that 1528 doesn't fit a signed 9-bit displacement field. We can 1529 assume the incoming expression is already legitimate. */ 1530 offset = XEXP (base, 1); 1531 base = XEXP (base, 0); 1532 1533 out[0] = adjust_automodify_address 1534 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1535 1536 if (GET_CODE (XEXP (offset, 1)) == REG) 1537 { 1538 /* Can't adjust the postmodify to match. Emit the 1539 original, then a separate addition insn. */ 1540 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1541 fixup = gen_adddi3 (base, base, GEN_INT (-8)); 1542 } 1543 else 1544 { 1545 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT); 1546 if (INTVAL (XEXP (offset, 1)) < -256 + 8) 1547 { 1548 /* Again the postmodify cannot be made to match, 1549 but in this case it's more efficient to get rid 1550 of the postmodify entirely and fix up with an 1551 add insn. */ 1552 out[1] = adjust_automodify_address (in, DImode, base, 8); 1553 fixup = gen_adddi3 1554 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8)); 1555 } 1556 else 1557 { 1558 /* Combined offset still fits in the displacement field. 1559 (We cannot overflow it at the high end.) */ 1560 out[1] = adjust_automodify_address 1561 (in, DImode, gen_rtx_POST_MODIFY 1562 (Pmode, base, gen_rtx_PLUS 1563 (Pmode, base, 1564 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))), 1565 8); 1566 } 1567 } 1568 break; 1569 1570 default: 1571 gcc_unreachable (); 1572 } 1573 break; 1574 } 1575 1576 default: 1577 gcc_unreachable (); 1578 } 1579 1580 return fixup; 1581} 1582 1583/* Split a TImode or TFmode move instruction after reload. 1584 This is used by *movtf_internal and *movti_internal. */ 1585void 1586ia64_split_tmode_move (rtx operands[]) 1587{ 1588 rtx in[2], out[2], insn; 1589 rtx fixup[2]; 1590 bool dead = false; 1591 bool reversed = false; 1592 1593 /* It is possible for reload to decide to overwrite a pointer with 1594 the value it points to. In that case we have to do the loads in 1595 the appropriate order so that the pointer is not destroyed too 1596 early. Also we must not generate a postmodify for that second 1597 load, or rws_access_regno will die. And we must not generate a 1598 postmodify for the second load if the destination register 1599 overlaps with the base register. */ 1600 if (GET_CODE (operands[1]) == MEM 1601 && reg_overlap_mentioned_p (operands[0], operands[1])) 1602 { 1603 rtx base = XEXP (operands[1], 0); 1604 while (GET_CODE (base) != REG) 1605 base = XEXP (base, 0); 1606 1607 if (REGNO (base) == REGNO (operands[0])) 1608 reversed = true; 1609 1610 if (refers_to_regno_p (REGNO (operands[0]), 1611 REGNO (operands[0])+2, 1612 base, 0)) 1613 dead = true; 1614 } 1615 /* Another reason to do the moves in reversed order is if the first 1616 element of the target register pair is also the second element of 1617 the source register pair. */ 1618 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG 1619 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 1620 reversed = true; 1621 1622 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead); 1623 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead); 1624 1625#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \ 1626 if (GET_CODE (EXP) == MEM \ 1627 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \ 1628 || GET_CODE (XEXP (EXP, 0)) == POST_INC \ 1629 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \ 1630 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0)) 1631 1632 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0])); 1633 MAYBE_ADD_REG_INC_NOTE (insn, in[0]); 1634 MAYBE_ADD_REG_INC_NOTE (insn, out[0]); 1635 1636 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1])); 1637 MAYBE_ADD_REG_INC_NOTE (insn, in[1]); 1638 MAYBE_ADD_REG_INC_NOTE (insn, out[1]); 1639 1640 if (fixup[0]) 1641 emit_insn (fixup[0]); 1642 if (fixup[1]) 1643 emit_insn (fixup[1]); 1644 1645#undef MAYBE_ADD_REG_INC_NOTE 1646} 1647 1648/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go 1649 through memory plus an extra GR scratch register. Except that you can 1650 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1651 SECONDARY_RELOAD_CLASS, but not both. 1652 1653 We got into problems in the first place by allowing a construct like 1654 (subreg:XF (reg:TI)), which we got from a union containing a long double. 1655 This solution attempts to prevent this situation from occurring. When 1656 we see something like the above, we spill the inner register to memory. */ 1657 1658static rtx 1659spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode) 1660{ 1661 if (GET_CODE (in) == SUBREG 1662 && GET_MODE (SUBREG_REG (in)) == TImode 1663 && GET_CODE (SUBREG_REG (in)) == REG) 1664 { 1665 rtx memt = assign_stack_temp (TImode, 16); 1666 emit_move_insn (memt, SUBREG_REG (in)); 1667 return adjust_address (memt, mode, 0); 1668 } 1669 else if (force && GET_CODE (in) == REG) 1670 { 1671 rtx memx = assign_stack_temp (mode, 16); 1672 emit_move_insn (memx, in); 1673 return memx; 1674 } 1675 else 1676 return in; 1677} 1678 1679/* Expand the movxf or movrf pattern (MODE says which) with the given 1680 OPERANDS, returning true if the pattern should then invoke 1681 DONE. */ 1682 1683bool 1684ia64_expand_movxf_movrf (machine_mode mode, rtx operands[]) 1685{ 1686 rtx op0 = operands[0]; 1687 1688 if (GET_CODE (op0) == SUBREG) 1689 op0 = SUBREG_REG (op0); 1690 1691 /* We must support XFmode loads into general registers for stdarg/vararg, 1692 unprototyped calls, and a rare case where a long double is passed as 1693 an argument after a float HFA fills the FP registers. We split them into 1694 DImode loads for convenience. We also need to support XFmode stores 1695 for the last case. This case does not happen for stdarg/vararg routines, 1696 because we do a block store to memory of unnamed arguments. */ 1697 1698 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0))) 1699 { 1700 rtx out[2]; 1701 1702 /* We're hoping to transform everything that deals with XFmode 1703 quantities and GR registers early in the compiler. */ 1704 gcc_assert (can_create_pseudo_p ()); 1705 1706 /* Struct to register can just use TImode instead. */ 1707 if ((GET_CODE (operands[1]) == SUBREG 1708 && GET_MODE (SUBREG_REG (operands[1])) == TImode) 1709 || (GET_CODE (operands[1]) == REG 1710 && GR_REGNO_P (REGNO (operands[1])))) 1711 { 1712 rtx op1 = operands[1]; 1713 1714 if (GET_CODE (op1) == SUBREG) 1715 op1 = SUBREG_REG (op1); 1716 else 1717 op1 = gen_rtx_REG (TImode, REGNO (op1)); 1718 1719 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1); 1720 return true; 1721 } 1722 1723 if (GET_CODE (operands[1]) == CONST_DOUBLE) 1724 { 1725 /* Don't word-swap when reading in the constant. */ 1726 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)), 1727 operand_subword (operands[1], WORDS_BIG_ENDIAN, 1728 0, mode)); 1729 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1), 1730 operand_subword (operands[1], !WORDS_BIG_ENDIAN, 1731 0, mode)); 1732 return true; 1733 } 1734 1735 /* If the quantity is in a register not known to be GR, spill it. */ 1736 if (register_operand (operands[1], mode)) 1737 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode); 1738 1739 gcc_assert (GET_CODE (operands[1]) == MEM); 1740 1741 /* Don't word-swap when reading in the value. */ 1742 out[0] = gen_rtx_REG (DImode, REGNO (op0)); 1743 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1); 1744 1745 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); 1746 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); 1747 return true; 1748 } 1749 1750 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1]))) 1751 { 1752 /* We're hoping to transform everything that deals with XFmode 1753 quantities and GR registers early in the compiler. */ 1754 gcc_assert (can_create_pseudo_p ()); 1755 1756 /* Op0 can't be a GR_REG here, as that case is handled above. 1757 If op0 is a register, then we spill op1, so that we now have a 1758 MEM operand. This requires creating an XFmode subreg of a TImode reg 1759 to force the spill. */ 1760 if (register_operand (operands[0], mode)) 1761 { 1762 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); 1763 op1 = gen_rtx_SUBREG (mode, op1, 0); 1764 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode); 1765 } 1766 1767 else 1768 { 1769 rtx in[2]; 1770 1771 gcc_assert (GET_CODE (operands[0]) == MEM); 1772 1773 /* Don't word-swap when writing out the value. */ 1774 in[0] = gen_rtx_REG (DImode, REGNO (operands[1])); 1775 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 1776 1777 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]); 1778 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]); 1779 return true; 1780 } 1781 } 1782 1783 if (!reload_in_progress && !reload_completed) 1784 { 1785 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode); 1786 1787 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG) 1788 { 1789 rtx memt, memx, in = operands[1]; 1790 if (CONSTANT_P (in)) 1791 in = validize_mem (force_const_mem (mode, in)); 1792 if (GET_CODE (in) == MEM) 1793 memt = adjust_address (in, TImode, 0); 1794 else 1795 { 1796 memt = assign_stack_temp (TImode, 16); 1797 memx = adjust_address (memt, mode, 0); 1798 emit_move_insn (memx, in); 1799 } 1800 emit_move_insn (op0, memt); 1801 return true; 1802 } 1803 1804 if (!ia64_move_ok (operands[0], operands[1])) 1805 operands[1] = force_reg (mode, operands[1]); 1806 } 1807 1808 return false; 1809} 1810 1811/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1 1812 with the expression that holds the compare result (in VOIDmode). */ 1813 1814static GTY(()) rtx cmptf_libfunc; 1815 1816void 1817ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1) 1818{ 1819 enum rtx_code code = GET_CODE (*expr); 1820 rtx cmp; 1821 1822 /* If we have a BImode input, then we already have a compare result, and 1823 do not need to emit another comparison. */ 1824 if (GET_MODE (*op0) == BImode) 1825 { 1826 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx); 1827 cmp = *op0; 1828 } 1829 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a 1830 magic number as its third argument, that indicates what to do. 1831 The return value is an integer to be compared against zero. */ 1832 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode) 1833 { 1834 enum qfcmp_magic { 1835 QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */ 1836 QCMP_UNORD = 2, 1837 QCMP_EQ = 4, 1838 QCMP_LT = 8, 1839 QCMP_GT = 16 1840 }; 1841 int magic; 1842 enum rtx_code ncode; 1843 rtx ret, insns; 1844 1845 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode); 1846 switch (code) 1847 { 1848 /* 1 = equal, 0 = not equal. Equality operators do 1849 not raise FP_INVALID when given a NaN operand. */ 1850 case EQ: magic = QCMP_EQ; ncode = NE; break; 1851 case NE: magic = QCMP_EQ; ncode = EQ; break; 1852 /* isunordered() from C99. */ 1853 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break; 1854 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break; 1855 /* Relational operators raise FP_INVALID when given 1856 a NaN operand. */ 1857 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break; 1858 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1859 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break; 1860 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1861 /* Unordered relational operators do not raise FP_INVALID 1862 when given a NaN operand. */ 1863 case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break; 1864 case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; 1865 case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break; 1866 case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break; 1867 /* Not supported. */ 1868 case UNEQ: 1869 case LTGT: 1870 default: gcc_unreachable (); 1871 } 1872 1873 start_sequence (); 1874 1875 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3, 1876 *op0, TFmode, *op1, TFmode, 1877 GEN_INT (magic), DImode); 1878 cmp = gen_reg_rtx (BImode); 1879 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1880 gen_rtx_fmt_ee (ncode, BImode, 1881 ret, const0_rtx))); 1882 1883 insns = get_insns (); 1884 end_sequence (); 1885 1886 emit_libcall_block (insns, cmp, cmp, 1887 gen_rtx_fmt_ee (code, BImode, *op0, *op1)); 1888 code = NE; 1889 } 1890 else 1891 { 1892 cmp = gen_reg_rtx (BImode); 1893 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1894 gen_rtx_fmt_ee (code, BImode, *op0, *op1))); 1895 code = NE; 1896 } 1897 1898 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx); 1899 *op0 = cmp; 1900 *op1 = const0_rtx; 1901} 1902 1903/* Generate an integral vector comparison. Return true if the condition has 1904 been reversed, and so the sense of the comparison should be inverted. */ 1905 1906static bool 1907ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode, 1908 rtx dest, rtx op0, rtx op1) 1909{ 1910 bool negate = false; 1911 rtx x; 1912 1913 /* Canonicalize the comparison to EQ, GT, GTU. */ 1914 switch (code) 1915 { 1916 case EQ: 1917 case GT: 1918 case GTU: 1919 break; 1920 1921 case NE: 1922 case LE: 1923 case LEU: 1924 code = reverse_condition (code); 1925 negate = true; 1926 break; 1927 1928 case GE: 1929 case GEU: 1930 code = reverse_condition (code); 1931 negate = true; 1932 /* FALLTHRU */ 1933 1934 case LT: 1935 case LTU: 1936 code = swap_condition (code); 1937 x = op0, op0 = op1, op1 = x; 1938 break; 1939 1940 default: 1941 gcc_unreachable (); 1942 } 1943 1944 /* Unsigned parallel compare is not supported by the hardware. Play some 1945 tricks to turn this into a signed comparison against 0. */ 1946 if (code == GTU) 1947 { 1948 switch (mode) 1949 { 1950 case V2SImode: 1951 { 1952 rtx t1, t2, mask; 1953 1954 /* Subtract (-(INT MAX) - 1) from both operands to make 1955 them signed. */ 1956 mask = gen_int_mode (0x80000000, SImode); 1957 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask)); 1958 mask = force_reg (mode, mask); 1959 t1 = gen_reg_rtx (mode); 1960 emit_insn (gen_subv2si3 (t1, op0, mask)); 1961 t2 = gen_reg_rtx (mode); 1962 emit_insn (gen_subv2si3 (t2, op1, mask)); 1963 op0 = t1; 1964 op1 = t2; 1965 code = GT; 1966 } 1967 break; 1968 1969 case V8QImode: 1970 case V4HImode: 1971 /* Perform a parallel unsigned saturating subtraction. */ 1972 x = gen_reg_rtx (mode); 1973 emit_insn (gen_rtx_SET (VOIDmode, x, 1974 gen_rtx_US_MINUS (mode, op0, op1))); 1975 1976 code = EQ; 1977 op0 = x; 1978 op1 = CONST0_RTX (mode); 1979 negate = !negate; 1980 break; 1981 1982 default: 1983 gcc_unreachable (); 1984 } 1985 } 1986 1987 x = gen_rtx_fmt_ee (code, mode, op0, op1); 1988 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 1989 1990 return negate; 1991} 1992 1993/* Emit an integral vector conditional move. */ 1994 1995void 1996ia64_expand_vecint_cmov (rtx operands[]) 1997{ 1998 machine_mode mode = GET_MODE (operands[0]); 1999 enum rtx_code code = GET_CODE (operands[3]); 2000 bool negate; 2001 rtx cmp, x, ot, of; 2002 2003 cmp = gen_reg_rtx (mode); 2004 negate = ia64_expand_vecint_compare (code, mode, cmp, 2005 operands[4], operands[5]); 2006 2007 ot = operands[1+negate]; 2008 of = operands[2-negate]; 2009 2010 if (ot == CONST0_RTX (mode)) 2011 { 2012 if (of == CONST0_RTX (mode)) 2013 { 2014 emit_move_insn (operands[0], ot); 2015 return; 2016 } 2017 2018 x = gen_rtx_NOT (mode, cmp); 2019 x = gen_rtx_AND (mode, x, of); 2020 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 2021 } 2022 else if (of == CONST0_RTX (mode)) 2023 { 2024 x = gen_rtx_AND (mode, cmp, ot); 2025 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 2026 } 2027 else 2028 { 2029 rtx t, f; 2030 2031 t = gen_reg_rtx (mode); 2032 x = gen_rtx_AND (mode, cmp, operands[1+negate]); 2033 emit_insn (gen_rtx_SET (VOIDmode, t, x)); 2034 2035 f = gen_reg_rtx (mode); 2036 x = gen_rtx_NOT (mode, cmp); 2037 x = gen_rtx_AND (mode, x, operands[2-negate]); 2038 emit_insn (gen_rtx_SET (VOIDmode, f, x)); 2039 2040 x = gen_rtx_IOR (mode, t, f); 2041 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 2042 } 2043} 2044 2045/* Emit an integral vector min or max operation. Return true if all done. */ 2046 2047bool 2048ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode, 2049 rtx operands[]) 2050{ 2051 rtx xops[6]; 2052 2053 /* These four combinations are supported directly. */ 2054 if (mode == V8QImode && (code == UMIN || code == UMAX)) 2055 return false; 2056 if (mode == V4HImode && (code == SMIN || code == SMAX)) 2057 return false; 2058 2059 /* This combination can be implemented with only saturating subtraction. */ 2060 if (mode == V4HImode && code == UMAX) 2061 { 2062 rtx x, tmp = gen_reg_rtx (mode); 2063 2064 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]); 2065 emit_insn (gen_rtx_SET (VOIDmode, tmp, x)); 2066 2067 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2])); 2068 return true; 2069 } 2070 2071 /* Everything else implemented via vector comparisons. */ 2072 xops[0] = operands[0]; 2073 xops[4] = xops[1] = operands[1]; 2074 xops[5] = xops[2] = operands[2]; 2075 2076 switch (code) 2077 { 2078 case UMIN: 2079 code = LTU; 2080 break; 2081 case UMAX: 2082 code = GTU; 2083 break; 2084 case SMIN: 2085 code = LT; 2086 break; 2087 case SMAX: 2088 code = GT; 2089 break; 2090 default: 2091 gcc_unreachable (); 2092 } 2093 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 2094 2095 ia64_expand_vecint_cmov (xops); 2096 return true; 2097} 2098 2099/* The vectors LO and HI each contain N halves of a double-wide vector. 2100 Reassemble either the first N/2 or the second N/2 elements. */ 2101 2102void 2103ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp) 2104{ 2105 machine_mode vmode = GET_MODE (lo); 2106 unsigned int i, high, nelt = GET_MODE_NUNITS (vmode); 2107 struct expand_vec_perm_d d; 2108 bool ok; 2109 2110 d.target = gen_lowpart (vmode, out); 2111 d.op0 = (TARGET_BIG_ENDIAN ? hi : lo); 2112 d.op1 = (TARGET_BIG_ENDIAN ? lo : hi); 2113 d.vmode = vmode; 2114 d.nelt = nelt; 2115 d.one_operand_p = false; 2116 d.testing_p = false; 2117 2118 high = (highp ? nelt / 2 : 0); 2119 for (i = 0; i < nelt / 2; ++i) 2120 { 2121 d.perm[i * 2] = i + high; 2122 d.perm[i * 2 + 1] = i + high + nelt; 2123 } 2124 2125 ok = ia64_expand_vec_perm_const_1 (&d); 2126 gcc_assert (ok); 2127} 2128 2129/* Return a vector of the sign-extension of VEC. */ 2130 2131static rtx 2132ia64_unpack_sign (rtx vec, bool unsignedp) 2133{ 2134 machine_mode mode = GET_MODE (vec); 2135 rtx zero = CONST0_RTX (mode); 2136 2137 if (unsignedp) 2138 return zero; 2139 else 2140 { 2141 rtx sign = gen_reg_rtx (mode); 2142 bool neg; 2143 2144 neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero); 2145 gcc_assert (!neg); 2146 2147 return sign; 2148 } 2149} 2150 2151/* Emit an integral vector unpack operation. */ 2152 2153void 2154ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) 2155{ 2156 rtx sign = ia64_unpack_sign (operands[1], unsignedp); 2157 ia64_unpack_assemble (operands[0], operands[1], sign, highp); 2158} 2159 2160/* Emit an integral vector widening sum operations. */ 2161 2162void 2163ia64_expand_widen_sum (rtx operands[3], bool unsignedp) 2164{ 2165 machine_mode wmode; 2166 rtx l, h, t, sign; 2167 2168 sign = ia64_unpack_sign (operands[1], unsignedp); 2169 2170 wmode = GET_MODE (operands[0]); 2171 l = gen_reg_rtx (wmode); 2172 h = gen_reg_rtx (wmode); 2173 2174 ia64_unpack_assemble (l, operands[1], sign, false); 2175 ia64_unpack_assemble (h, operands[1], sign, true); 2176 2177 t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT); 2178 t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT); 2179 if (t != operands[0]) 2180 emit_move_insn (operands[0], t); 2181} 2182 2183/* Emit the appropriate sequence for a call. */ 2184 2185void 2186ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, 2187 int sibcall_p) 2188{ 2189 rtx insn, b0; 2190 2191 addr = XEXP (addr, 0); 2192 addr = convert_memory_address (DImode, addr); 2193 b0 = gen_rtx_REG (DImode, R_BR (0)); 2194 2195 /* ??? Should do this for functions known to bind local too. */ 2196 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 2197 { 2198 if (sibcall_p) 2199 insn = gen_sibcall_nogp (addr); 2200 else if (! retval) 2201 insn = gen_call_nogp (addr, b0); 2202 else 2203 insn = gen_call_value_nogp (retval, addr, b0); 2204 insn = emit_call_insn (insn); 2205 } 2206 else 2207 { 2208 if (sibcall_p) 2209 insn = gen_sibcall_gp (addr); 2210 else if (! retval) 2211 insn = gen_call_gp (addr, b0); 2212 else 2213 insn = gen_call_value_gp (retval, addr, b0); 2214 insn = emit_call_insn (insn); 2215 2216 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 2217 } 2218 2219 if (sibcall_p) 2220 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 2221 2222 if (TARGET_ABI_OPEN_VMS) 2223 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 2224 gen_rtx_REG (DImode, GR_REG (25))); 2225} 2226 2227static void 2228reg_emitted (enum ia64_frame_regs r) 2229{ 2230 if (emitted_frame_related_regs[r] == 0) 2231 emitted_frame_related_regs[r] = current_frame_info.r[r]; 2232 else 2233 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]); 2234} 2235 2236static int 2237get_reg (enum ia64_frame_regs r) 2238{ 2239 reg_emitted (r); 2240 return current_frame_info.r[r]; 2241} 2242 2243static bool 2244is_emitted (int regno) 2245{ 2246 unsigned int r; 2247 2248 for (r = reg_fp; r < number_of_ia64_frame_regs; r++) 2249 if (emitted_frame_related_regs[r] == regno) 2250 return true; 2251 return false; 2252} 2253 2254void 2255ia64_reload_gp (void) 2256{ 2257 rtx tmp; 2258 2259 if (current_frame_info.r[reg_save_gp]) 2260 { 2261 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp)); 2262 } 2263 else 2264 { 2265 HOST_WIDE_INT offset; 2266 rtx offset_r; 2267 2268 offset = (current_frame_info.spill_cfa_off 2269 + current_frame_info.spill_size); 2270 if (frame_pointer_needed) 2271 { 2272 tmp = hard_frame_pointer_rtx; 2273 offset = -offset; 2274 } 2275 else 2276 { 2277 tmp = stack_pointer_rtx; 2278 offset = current_frame_info.total_size - offset; 2279 } 2280 2281 offset_r = GEN_INT (offset); 2282 if (satisfies_constraint_I (offset_r)) 2283 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r)); 2284 else 2285 { 2286 emit_move_insn (pic_offset_table_rtx, offset_r); 2287 emit_insn (gen_adddi3 (pic_offset_table_rtx, 2288 pic_offset_table_rtx, tmp)); 2289 } 2290 2291 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 2292 } 2293 2294 emit_move_insn (pic_offset_table_rtx, tmp); 2295} 2296 2297void 2298ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, 2299 rtx scratch_b, int noreturn_p, int sibcall_p) 2300{ 2301 rtx insn; 2302 bool is_desc = false; 2303 2304 /* If we find we're calling through a register, then we're actually 2305 calling through a descriptor, so load up the values. */ 2306 if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) 2307 { 2308 rtx tmp; 2309 bool addr_dead_p; 2310 2311 /* ??? We are currently constrained to *not* use peep2, because 2312 we can legitimately change the global lifetime of the GP 2313 (in the form of killing where previously live). This is 2314 because a call through a descriptor doesn't use the previous 2315 value of the GP, while a direct call does, and we do not 2316 commit to either form until the split here. 2317 2318 That said, this means that we lack precise life info for 2319 whether ADDR is dead after this call. This is not terribly 2320 important, since we can fix things up essentially for free 2321 with the POST_DEC below, but it's nice to not use it when we 2322 can immediately tell it's not necessary. */ 2323 addr_dead_p = ((noreturn_p || sibcall_p 2324 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 2325 REGNO (addr))) 2326 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 2327 2328 /* Load the code address into scratch_b. */ 2329 tmp = gen_rtx_POST_INC (Pmode, addr); 2330 tmp = gen_rtx_MEM (Pmode, tmp); 2331 emit_move_insn (scratch_r, tmp); 2332 emit_move_insn (scratch_b, scratch_r); 2333 2334 /* Load the GP address. If ADDR is not dead here, then we must 2335 revert the change made above via the POST_INCREMENT. */ 2336 if (!addr_dead_p) 2337 tmp = gen_rtx_POST_DEC (Pmode, addr); 2338 else 2339 tmp = addr; 2340 tmp = gen_rtx_MEM (Pmode, tmp); 2341 emit_move_insn (pic_offset_table_rtx, tmp); 2342 2343 is_desc = true; 2344 addr = scratch_b; 2345 } 2346 2347 if (sibcall_p) 2348 insn = gen_sibcall_nogp (addr); 2349 else if (retval) 2350 insn = gen_call_value_nogp (retval, addr, retaddr); 2351 else 2352 insn = gen_call_nogp (addr, retaddr); 2353 emit_call_insn (insn); 2354 2355 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 2356 ia64_reload_gp (); 2357} 2358 2359/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically. 2360 2361 This differs from the generic code in that we know about the zero-extending 2362 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We 2363 also know that ld.acq+cmpxchg.rel equals a full barrier. 2364 2365 The loop we want to generate looks like 2366 2367 cmp_reg = mem; 2368 label: 2369 old_reg = cmp_reg; 2370 new_reg = cmp_reg op val; 2371 cmp_reg = compare-and-swap(mem, old_reg, new_reg) 2372 if (cmp_reg != old_reg) 2373 goto label; 2374 2375 Note that we only do the plain load from memory once. Subsequent 2376 iterations use the value loaded by the compare-and-swap pattern. */ 2377 2378void 2379ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 2380 rtx old_dst, rtx new_dst, enum memmodel model) 2381{ 2382 machine_mode mode = GET_MODE (mem); 2383 rtx old_reg, new_reg, cmp_reg, ar_ccv, label; 2384 enum insn_code icode; 2385 2386 /* Special case for using fetchadd. */ 2387 if ((mode == SImode || mode == DImode) 2388 && (code == PLUS || code == MINUS) 2389 && fetchadd_operand (val, mode)) 2390 { 2391 if (code == MINUS) 2392 val = GEN_INT (-INTVAL (val)); 2393 2394 if (!old_dst) 2395 old_dst = gen_reg_rtx (mode); 2396 2397 switch (model) 2398 { 2399 case MEMMODEL_ACQ_REL: 2400 case MEMMODEL_SEQ_CST: 2401 case MEMMODEL_SYNC_SEQ_CST: 2402 emit_insn (gen_memory_barrier ()); 2403 /* FALLTHRU */ 2404 case MEMMODEL_RELAXED: 2405 case MEMMODEL_ACQUIRE: 2406 case MEMMODEL_SYNC_ACQUIRE: 2407 case MEMMODEL_CONSUME: 2408 if (mode == SImode) 2409 icode = CODE_FOR_fetchadd_acq_si; 2410 else 2411 icode = CODE_FOR_fetchadd_acq_di; 2412 break; 2413 case MEMMODEL_RELEASE: 2414 case MEMMODEL_SYNC_RELEASE: 2415 if (mode == SImode) 2416 icode = CODE_FOR_fetchadd_rel_si; 2417 else 2418 icode = CODE_FOR_fetchadd_rel_di; 2419 break; 2420 2421 default: 2422 gcc_unreachable (); 2423 } 2424 2425 emit_insn (GEN_FCN (icode) (old_dst, mem, val)); 2426 2427 if (new_dst) 2428 { 2429 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, 2430 true, OPTAB_WIDEN); 2431 if (new_reg != new_dst) 2432 emit_move_insn (new_dst, new_reg); 2433 } 2434 return; 2435 } 2436 2437 /* Because of the volatile mem read, we get an ld.acq, which is the 2438 front half of the full barrier. The end half is the cmpxchg.rel. 2439 For relaxed and release memory models, we don't need this. But we 2440 also don't bother trying to prevent it either. */ 2441 gcc_assert (is_mm_relaxed (model) || is_mm_release (model) 2442 || MEM_VOLATILE_P (mem)); 2443 2444 old_reg = gen_reg_rtx (DImode); 2445 cmp_reg = gen_reg_rtx (DImode); 2446 label = gen_label_rtx (); 2447 2448 if (mode != DImode) 2449 { 2450 val = simplify_gen_subreg (DImode, val, mode, 0); 2451 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); 2452 } 2453 else 2454 emit_move_insn (cmp_reg, mem); 2455 2456 emit_label (label); 2457 2458 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 2459 emit_move_insn (old_reg, cmp_reg); 2460 emit_move_insn (ar_ccv, cmp_reg); 2461 2462 if (old_dst) 2463 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); 2464 2465 new_reg = cmp_reg; 2466 if (code == NOT) 2467 { 2468 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX, 2469 true, OPTAB_DIRECT); 2470 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true); 2471 } 2472 else 2473 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, 2474 true, OPTAB_DIRECT); 2475 2476 if (mode != DImode) 2477 new_reg = gen_lowpart (mode, new_reg); 2478 if (new_dst) 2479 emit_move_insn (new_dst, new_reg); 2480 2481 switch (model) 2482 { 2483 case MEMMODEL_RELAXED: 2484 case MEMMODEL_ACQUIRE: 2485 case MEMMODEL_SYNC_ACQUIRE: 2486 case MEMMODEL_CONSUME: 2487 switch (mode) 2488 { 2489 case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break; 2490 case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break; 2491 case SImode: icode = CODE_FOR_cmpxchg_acq_si; break; 2492 case DImode: icode = CODE_FOR_cmpxchg_acq_di; break; 2493 default: 2494 gcc_unreachable (); 2495 } 2496 break; 2497 2498 case MEMMODEL_RELEASE: 2499 case MEMMODEL_SYNC_RELEASE: 2500 case MEMMODEL_ACQ_REL: 2501 case MEMMODEL_SEQ_CST: 2502 case MEMMODEL_SYNC_SEQ_CST: 2503 switch (mode) 2504 { 2505 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; 2506 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; 2507 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; 2508 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; 2509 default: 2510 gcc_unreachable (); 2511 } 2512 break; 2513 2514 default: 2515 gcc_unreachable (); 2516 } 2517 2518 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); 2519 2520 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label); 2521} 2522 2523/* Begin the assembly file. */ 2524 2525static void 2526ia64_file_start (void) 2527{ 2528 default_file_start (); 2529 emit_safe_across_calls (); 2530} 2531 2532void 2533emit_safe_across_calls (void) 2534{ 2535 unsigned int rs, re; 2536 int out_state; 2537 2538 rs = 1; 2539 out_state = 0; 2540 while (1) 2541 { 2542 while (rs < 64 && call_used_regs[PR_REG (rs)]) 2543 rs++; 2544 if (rs >= 64) 2545 break; 2546 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 2547 continue; 2548 if (out_state == 0) 2549 { 2550 fputs ("\t.pred.safe_across_calls ", asm_out_file); 2551 out_state = 1; 2552 } 2553 else 2554 fputc (',', asm_out_file); 2555 if (re == rs + 1) 2556 fprintf (asm_out_file, "p%u", rs); 2557 else 2558 fprintf (asm_out_file, "p%u-p%u", rs, re - 1); 2559 rs = re + 1; 2560 } 2561 if (out_state) 2562 fputc ('\n', asm_out_file); 2563} 2564 2565/* Globalize a declaration. */ 2566 2567static void 2568ia64_globalize_decl_name (FILE * stream, tree decl) 2569{ 2570 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 2571 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl)); 2572 if (version_attr) 2573 { 2574 tree v = TREE_VALUE (TREE_VALUE (version_attr)); 2575 const char *p = TREE_STRING_POINTER (v); 2576 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p); 2577 } 2578 targetm.asm_out.globalize_label (stream, name); 2579 if (TREE_CODE (decl) == FUNCTION_DECL) 2580 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function"); 2581} 2582 2583/* Helper function for ia64_compute_frame_size: find an appropriate general 2584 register to spill some special register to. SPECIAL_SPILL_MASK contains 2585 bits in GR0 to GR31 that have already been allocated by this routine. 2586 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 2587 2588static int 2589find_gr_spill (enum ia64_frame_regs r, int try_locals) 2590{ 2591 int regno; 2592 2593 if (emitted_frame_related_regs[r] != 0) 2594 { 2595 regno = emitted_frame_related_regs[r]; 2596 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed) 2597 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1) 2598 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2599 else if (crtl->is_leaf 2600 && regno >= GR_REG (1) && regno <= GR_REG (31)) 2601 current_frame_info.gr_used_mask |= 1 << regno; 2602 2603 return regno; 2604 } 2605 2606 /* If this is a leaf function, first try an otherwise unused 2607 call-clobbered register. */ 2608 if (crtl->is_leaf) 2609 { 2610 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2611 if (! df_regs_ever_live_p (regno) 2612 && call_used_regs[regno] 2613 && ! fixed_regs[regno] 2614 && ! global_regs[regno] 2615 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0 2616 && ! is_emitted (regno)) 2617 { 2618 current_frame_info.gr_used_mask |= 1 << regno; 2619 return regno; 2620 } 2621 } 2622 2623 if (try_locals) 2624 { 2625 regno = current_frame_info.n_local_regs; 2626 /* If there is a frame pointer, then we can't use loc79, because 2627 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 2628 reg_name switching code in ia64_expand_prologue. */ 2629 while (regno < (80 - frame_pointer_needed)) 2630 if (! is_emitted (LOC_REG (regno++))) 2631 { 2632 current_frame_info.n_local_regs = regno; 2633 return LOC_REG (regno - 1); 2634 } 2635 } 2636 2637 /* Failed to find a general register to spill to. Must use stack. */ 2638 return 0; 2639} 2640 2641/* In order to make for nice schedules, we try to allocate every temporary 2642 to a different register. We must of course stay away from call-saved, 2643 fixed, and global registers. We must also stay away from registers 2644 allocated in current_frame_info.gr_used_mask, since those include regs 2645 used all through the prologue. 2646 2647 Any register allocated here must be used immediately. The idea is to 2648 aid scheduling, not to solve data flow problems. */ 2649 2650static int last_scratch_gr_reg; 2651 2652static int 2653next_scratch_gr_reg (void) 2654{ 2655 int i, regno; 2656 2657 for (i = 0; i < 32; ++i) 2658 { 2659 regno = (last_scratch_gr_reg + i + 1) & 31; 2660 if (call_used_regs[regno] 2661 && ! fixed_regs[regno] 2662 && ! global_regs[regno] 2663 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 2664 { 2665 last_scratch_gr_reg = regno; 2666 return regno; 2667 } 2668 } 2669 2670 /* There must be _something_ available. */ 2671 gcc_unreachable (); 2672} 2673 2674/* Helper function for ia64_compute_frame_size, called through 2675 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 2676 2677static void 2678mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED) 2679{ 2680 unsigned int regno = REGNO (reg); 2681 if (regno < 32) 2682 { 2683 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)]; 2684 for (i = 0; i < n; ++i) 2685 current_frame_info.gr_used_mask |= 1 << (regno + i); 2686 } 2687} 2688 2689 2690/* Returns the number of bytes offset between the frame pointer and the stack 2691 pointer for the current function. SIZE is the number of bytes of space 2692 needed for local variables. */ 2693 2694static void 2695ia64_compute_frame_size (HOST_WIDE_INT size) 2696{ 2697 HOST_WIDE_INT total_size; 2698 HOST_WIDE_INT spill_size = 0; 2699 HOST_WIDE_INT extra_spill_size = 0; 2700 HOST_WIDE_INT pretend_args_size; 2701 HARD_REG_SET mask; 2702 int n_spilled = 0; 2703 int spilled_gr_p = 0; 2704 int spilled_fr_p = 0; 2705 unsigned int regno; 2706 int min_regno; 2707 int max_regno; 2708 int i; 2709 2710 if (current_frame_info.initialized) 2711 return; 2712 2713 memset (¤t_frame_info, 0, sizeof current_frame_info); 2714 CLEAR_HARD_REG_SET (mask); 2715 2716 /* Don't allocate scratches to the return register. */ 2717 diddle_return_value (mark_reg_gr_used_mask, NULL); 2718 2719 /* Don't allocate scratches to the EH scratch registers. */ 2720 if (cfun->machine->ia64_eh_epilogue_sp) 2721 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 2722 if (cfun->machine->ia64_eh_epilogue_bsp) 2723 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 2724 2725 /* Static stack checking uses r2 and r3. */ 2726 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 2727 current_frame_info.gr_used_mask |= 0xc; 2728 2729 /* Find the size of the register stack frame. We have only 80 local 2730 registers, because we reserve 8 for the inputs and 8 for the 2731 outputs. */ 2732 2733 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 2734 since we'll be adjusting that down later. */ 2735 regno = LOC_REG (78) + ! frame_pointer_needed; 2736 for (; regno >= LOC_REG (0); regno--) 2737 if (df_regs_ever_live_p (regno) && !is_emitted (regno)) 2738 break; 2739 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2740 2741 /* For functions marked with the syscall_linkage attribute, we must mark 2742 all eight input registers as in use, so that locals aren't visible to 2743 the caller. */ 2744 2745 if (cfun->machine->n_varargs > 0 2746 || lookup_attribute ("syscall_linkage", 2747 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 2748 current_frame_info.n_input_regs = 8; 2749 else 2750 { 2751 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 2752 if (df_regs_ever_live_p (regno)) 2753 break; 2754 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 2755 } 2756 2757 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 2758 if (df_regs_ever_live_p (regno)) 2759 break; 2760 i = regno - OUT_REG (0) + 1; 2761 2762#ifndef PROFILE_HOOK 2763 /* When -p profiling, we need one output register for the mcount argument. 2764 Likewise for -a profiling for the bb_init_func argument. For -ax 2765 profiling, we need two output registers for the two bb_init_trace_func 2766 arguments. */ 2767 if (crtl->profile) 2768 i = MAX (i, 1); 2769#endif 2770 current_frame_info.n_output_regs = i; 2771 2772 /* ??? No rotating register support yet. */ 2773 current_frame_info.n_rotate_regs = 0; 2774 2775 /* Discover which registers need spilling, and how much room that 2776 will take. Begin with floating point and general registers, 2777 which will always wind up on the stack. */ 2778 2779 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 2780 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2781 { 2782 SET_HARD_REG_BIT (mask, regno); 2783 spill_size += 16; 2784 n_spilled += 1; 2785 spilled_fr_p = 1; 2786 } 2787 2788 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2789 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2790 { 2791 SET_HARD_REG_BIT (mask, regno); 2792 spill_size += 8; 2793 n_spilled += 1; 2794 spilled_gr_p = 1; 2795 } 2796 2797 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 2798 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2799 { 2800 SET_HARD_REG_BIT (mask, regno); 2801 spill_size += 8; 2802 n_spilled += 1; 2803 } 2804 2805 /* Now come all special registers that might get saved in other 2806 general registers. */ 2807 2808 if (frame_pointer_needed) 2809 { 2810 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1); 2811 /* If we did not get a register, then we take LOC79. This is guaranteed 2812 to be free, even if regs_ever_live is already set, because this is 2813 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 2814 as we don't count loc79 above. */ 2815 if (current_frame_info.r[reg_fp] == 0) 2816 { 2817 current_frame_info.r[reg_fp] = LOC_REG (79); 2818 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1; 2819 } 2820 } 2821 2822 if (! crtl->is_leaf) 2823 { 2824 /* Emit a save of BR0 if we call other functions. Do this even 2825 if this function doesn't return, as EH depends on this to be 2826 able to unwind the stack. */ 2827 SET_HARD_REG_BIT (mask, BR_REG (0)); 2828 2829 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1); 2830 if (current_frame_info.r[reg_save_b0] == 0) 2831 { 2832 extra_spill_size += 8; 2833 n_spilled += 1; 2834 } 2835 2836 /* Similarly for ar.pfs. */ 2837 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2838 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1); 2839 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2840 { 2841 extra_spill_size += 8; 2842 n_spilled += 1; 2843 } 2844 2845 /* Similarly for gp. Note that if we're calling setjmp, the stacked 2846 registers are clobbered, so we fall back to the stack. */ 2847 current_frame_info.r[reg_save_gp] 2848 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1)); 2849 if (current_frame_info.r[reg_save_gp] == 0) 2850 { 2851 SET_HARD_REG_BIT (mask, GR_REG (1)); 2852 spill_size += 8; 2853 n_spilled += 1; 2854 } 2855 } 2856 else 2857 { 2858 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)]) 2859 { 2860 SET_HARD_REG_BIT (mask, BR_REG (0)); 2861 extra_spill_size += 8; 2862 n_spilled += 1; 2863 } 2864 2865 if (df_regs_ever_live_p (AR_PFS_REGNUM)) 2866 { 2867 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2868 current_frame_info.r[reg_save_ar_pfs] 2869 = find_gr_spill (reg_save_ar_pfs, 1); 2870 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2871 { 2872 extra_spill_size += 8; 2873 n_spilled += 1; 2874 } 2875 } 2876 } 2877 2878 /* Unwind descriptor hackery: things are most efficient if we allocate 2879 consecutive GR save registers for RP, PFS, FP in that order. However, 2880 it is absolutely critical that FP get the only hard register that's 2881 guaranteed to be free, so we allocated it first. If all three did 2882 happen to be allocated hard regs, and are consecutive, rearrange them 2883 into the preferred order now. 2884 2885 If we have already emitted code for any of those registers, 2886 then it's already too late to change. */ 2887 min_regno = MIN (current_frame_info.r[reg_fp], 2888 MIN (current_frame_info.r[reg_save_b0], 2889 current_frame_info.r[reg_save_ar_pfs])); 2890 max_regno = MAX (current_frame_info.r[reg_fp], 2891 MAX (current_frame_info.r[reg_save_b0], 2892 current_frame_info.r[reg_save_ar_pfs])); 2893 if (min_regno > 0 2894 && min_regno + 2 == max_regno 2895 && (current_frame_info.r[reg_fp] == min_regno + 1 2896 || current_frame_info.r[reg_save_b0] == min_regno + 1 2897 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1) 2898 && (emitted_frame_related_regs[reg_save_b0] == 0 2899 || emitted_frame_related_regs[reg_save_b0] == min_regno) 2900 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0 2901 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1) 2902 && (emitted_frame_related_regs[reg_fp] == 0 2903 || emitted_frame_related_regs[reg_fp] == min_regno + 2)) 2904 { 2905 current_frame_info.r[reg_save_b0] = min_regno; 2906 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1; 2907 current_frame_info.r[reg_fp] = min_regno + 2; 2908 } 2909 2910 /* See if we need to store the predicate register block. */ 2911 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2912 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2913 break; 2914 if (regno <= PR_REG (63)) 2915 { 2916 SET_HARD_REG_BIT (mask, PR_REG (0)); 2917 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1); 2918 if (current_frame_info.r[reg_save_pr] == 0) 2919 { 2920 extra_spill_size += 8; 2921 n_spilled += 1; 2922 } 2923 2924 /* ??? Mark them all as used so that register renaming and such 2925 are free to use them. */ 2926 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2927 df_set_regs_ever_live (regno, true); 2928 } 2929 2930 /* If we're forced to use st8.spill, we're forced to save and restore 2931 ar.unat as well. The check for existing liveness allows inline asm 2932 to touch ar.unat. */ 2933 if (spilled_gr_p || cfun->machine->n_varargs 2934 || df_regs_ever_live_p (AR_UNAT_REGNUM)) 2935 { 2936 df_set_regs_ever_live (AR_UNAT_REGNUM, true); 2937 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 2938 current_frame_info.r[reg_save_ar_unat] 2939 = find_gr_spill (reg_save_ar_unat, spill_size == 0); 2940 if (current_frame_info.r[reg_save_ar_unat] == 0) 2941 { 2942 extra_spill_size += 8; 2943 n_spilled += 1; 2944 } 2945 } 2946 2947 if (df_regs_ever_live_p (AR_LC_REGNUM)) 2948 { 2949 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 2950 current_frame_info.r[reg_save_ar_lc] 2951 = find_gr_spill (reg_save_ar_lc, spill_size == 0); 2952 if (current_frame_info.r[reg_save_ar_lc] == 0) 2953 { 2954 extra_spill_size += 8; 2955 n_spilled += 1; 2956 } 2957 } 2958 2959 /* If we have an odd number of words of pretend arguments written to 2960 the stack, then the FR save area will be unaligned. We round the 2961 size of this area up to keep things 16 byte aligned. */ 2962 if (spilled_fr_p) 2963 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size); 2964 else 2965 pretend_args_size = crtl->args.pretend_args_size; 2966 2967 total_size = (spill_size + extra_spill_size + size + pretend_args_size 2968 + crtl->outgoing_args_size); 2969 total_size = IA64_STACK_ALIGN (total_size); 2970 2971 /* We always use the 16-byte scratch area provided by the caller, but 2972 if we are a leaf function, there's no one to which we need to provide 2973 a scratch area. However, if the function allocates dynamic stack space, 2974 the dynamic offset is computed early and contains STACK_POINTER_OFFSET, 2975 so we need to cope. */ 2976 if (crtl->is_leaf && !cfun->calls_alloca) 2977 total_size = MAX (0, total_size - 16); 2978 2979 current_frame_info.total_size = total_size; 2980 current_frame_info.spill_cfa_off = pretend_args_size - 16; 2981 current_frame_info.spill_size = spill_size; 2982 current_frame_info.extra_spill_size = extra_spill_size; 2983 COPY_HARD_REG_SET (current_frame_info.mask, mask); 2984 current_frame_info.n_spilled = n_spilled; 2985 current_frame_info.initialized = reload_completed; 2986} 2987 2988/* Worker function for TARGET_CAN_ELIMINATE. */ 2989 2990bool 2991ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 2992{ 2993 return (to == BR_REG (0) ? crtl->is_leaf : true); 2994} 2995 2996/* Compute the initial difference between the specified pair of registers. */ 2997 2998HOST_WIDE_INT 2999ia64_initial_elimination_offset (int from, int to) 3000{ 3001 HOST_WIDE_INT offset; 3002 3003 ia64_compute_frame_size (get_frame_size ()); 3004 switch (from) 3005 { 3006 case FRAME_POINTER_REGNUM: 3007 switch (to) 3008 { 3009 case HARD_FRAME_POINTER_REGNUM: 3010 offset = -current_frame_info.total_size; 3011 if (!crtl->is_leaf || cfun->calls_alloca) 3012 offset += 16 + crtl->outgoing_args_size; 3013 break; 3014 3015 case STACK_POINTER_REGNUM: 3016 offset = 0; 3017 if (!crtl->is_leaf || cfun->calls_alloca) 3018 offset += 16 + crtl->outgoing_args_size; 3019 break; 3020 3021 default: 3022 gcc_unreachable (); 3023 } 3024 break; 3025 3026 case ARG_POINTER_REGNUM: 3027 /* Arguments start above the 16 byte save area, unless stdarg 3028 in which case we store through the 16 byte save area. */ 3029 switch (to) 3030 { 3031 case HARD_FRAME_POINTER_REGNUM: 3032 offset = 16 - crtl->args.pretend_args_size; 3033 break; 3034 3035 case STACK_POINTER_REGNUM: 3036 offset = (current_frame_info.total_size 3037 + 16 - crtl->args.pretend_args_size); 3038 break; 3039 3040 default: 3041 gcc_unreachable (); 3042 } 3043 break; 3044 3045 default: 3046 gcc_unreachable (); 3047 } 3048 3049 return offset; 3050} 3051 3052/* If there are more than a trivial number of register spills, we use 3053 two interleaved iterators so that we can get two memory references 3054 per insn group. 3055 3056 In order to simplify things in the prologue and epilogue expanders, 3057 we use helper functions to fix up the memory references after the 3058 fact with the appropriate offsets to a POST_MODIFY memory mode. 3059 The following data structure tracks the state of the two iterators 3060 while insns are being emitted. */ 3061 3062struct spill_fill_data 3063{ 3064 rtx_insn *init_after; /* point at which to emit initializations */ 3065 rtx init_reg[2]; /* initial base register */ 3066 rtx iter_reg[2]; /* the iterator registers */ 3067 rtx *prev_addr[2]; /* address of last memory use */ 3068 rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */ 3069 HOST_WIDE_INT prev_off[2]; /* last offset */ 3070 int n_iter; /* number of iterators in use */ 3071 int next_iter; /* next iterator to use */ 3072 unsigned int save_gr_used_mask; 3073}; 3074 3075static struct spill_fill_data spill_fill_data; 3076 3077static void 3078setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off) 3079{ 3080 int i; 3081 3082 spill_fill_data.init_after = get_last_insn (); 3083 spill_fill_data.init_reg[0] = init_reg; 3084 spill_fill_data.init_reg[1] = init_reg; 3085 spill_fill_data.prev_addr[0] = NULL; 3086 spill_fill_data.prev_addr[1] = NULL; 3087 spill_fill_data.prev_insn[0] = NULL; 3088 spill_fill_data.prev_insn[1] = NULL; 3089 spill_fill_data.prev_off[0] = cfa_off; 3090 spill_fill_data.prev_off[1] = cfa_off; 3091 spill_fill_data.next_iter = 0; 3092 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 3093 3094 spill_fill_data.n_iter = 1 + (n_spills > 2); 3095 for (i = 0; i < spill_fill_data.n_iter; ++i) 3096 { 3097 int regno = next_scratch_gr_reg (); 3098 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 3099 current_frame_info.gr_used_mask |= 1 << regno; 3100 } 3101} 3102 3103static void 3104finish_spill_pointers (void) 3105{ 3106 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 3107} 3108 3109static rtx 3110spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off) 3111{ 3112 int iter = spill_fill_data.next_iter; 3113 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 3114 rtx disp_rtx = GEN_INT (disp); 3115 rtx mem; 3116 3117 if (spill_fill_data.prev_addr[iter]) 3118 { 3119 if (satisfies_constraint_N (disp_rtx)) 3120 { 3121 *spill_fill_data.prev_addr[iter] 3122 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 3123 gen_rtx_PLUS (DImode, 3124 spill_fill_data.iter_reg[iter], 3125 disp_rtx)); 3126 add_reg_note (spill_fill_data.prev_insn[iter], 3127 REG_INC, spill_fill_data.iter_reg[iter]); 3128 } 3129 else 3130 { 3131 /* ??? Could use register post_modify for loads. */ 3132 if (!satisfies_constraint_I (disp_rtx)) 3133 { 3134 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 3135 emit_move_insn (tmp, disp_rtx); 3136 disp_rtx = tmp; 3137 } 3138 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 3139 spill_fill_data.iter_reg[iter], disp_rtx)); 3140 } 3141 } 3142 /* Micro-optimization: if we've created a frame pointer, it's at 3143 CFA 0, which may allow the real iterator to be initialized lower, 3144 slightly increasing parallelism. Also, if there are few saves 3145 it may eliminate the iterator entirely. */ 3146 else if (disp == 0 3147 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 3148 && frame_pointer_needed) 3149 { 3150 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 3151 set_mem_alias_set (mem, get_varargs_alias_set ()); 3152 return mem; 3153 } 3154 else 3155 { 3156 rtx seq; 3157 rtx_insn *insn; 3158 3159 if (disp == 0) 3160 seq = gen_movdi (spill_fill_data.iter_reg[iter], 3161 spill_fill_data.init_reg[iter]); 3162 else 3163 { 3164 start_sequence (); 3165 3166 if (!satisfies_constraint_I (disp_rtx)) 3167 { 3168 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 3169 emit_move_insn (tmp, disp_rtx); 3170 disp_rtx = tmp; 3171 } 3172 3173 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 3174 spill_fill_data.init_reg[iter], 3175 disp_rtx)); 3176 3177 seq = get_insns (); 3178 end_sequence (); 3179 } 3180 3181 /* Careful for being the first insn in a sequence. */ 3182 if (spill_fill_data.init_after) 3183 insn = emit_insn_after (seq, spill_fill_data.init_after); 3184 else 3185 { 3186 rtx_insn *first = get_insns (); 3187 if (first) 3188 insn = emit_insn_before (seq, first); 3189 else 3190 insn = emit_insn (seq); 3191 } 3192 spill_fill_data.init_after = insn; 3193 } 3194 3195 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 3196 3197 /* ??? Not all of the spills are for varargs, but some of them are. 3198 The rest of the spills belong in an alias set of their own. But 3199 it doesn't actually hurt to include them here. */ 3200 set_mem_alias_set (mem, get_varargs_alias_set ()); 3201 3202 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 3203 spill_fill_data.prev_off[iter] = cfa_off; 3204 3205 if (++iter >= spill_fill_data.n_iter) 3206 iter = 0; 3207 spill_fill_data.next_iter = iter; 3208 3209 return mem; 3210} 3211 3212static void 3213do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off, 3214 rtx frame_reg) 3215{ 3216 int iter = spill_fill_data.next_iter; 3217 rtx mem; 3218 rtx_insn *insn; 3219 3220 mem = spill_restore_mem (reg, cfa_off); 3221 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 3222 spill_fill_data.prev_insn[iter] = insn; 3223 3224 if (frame_reg) 3225 { 3226 rtx base; 3227 HOST_WIDE_INT off; 3228 3229 RTX_FRAME_RELATED_P (insn) = 1; 3230 3231 /* Don't even pretend that the unwind code can intuit its way 3232 through a pair of interleaved post_modify iterators. Just 3233 provide the correct answer. */ 3234 3235 if (frame_pointer_needed) 3236 { 3237 base = hard_frame_pointer_rtx; 3238 off = - cfa_off; 3239 } 3240 else 3241 { 3242 base = stack_pointer_rtx; 3243 off = current_frame_info.total_size - cfa_off; 3244 } 3245 3246 add_reg_note (insn, REG_CFA_OFFSET, 3247 gen_rtx_SET (VOIDmode, 3248 gen_rtx_MEM (GET_MODE (reg), 3249 plus_constant (Pmode, 3250 base, off)), 3251 frame_reg)); 3252 } 3253} 3254 3255static void 3256do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off) 3257{ 3258 int iter = spill_fill_data.next_iter; 3259 rtx_insn *insn; 3260 3261 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 3262 GEN_INT (cfa_off))); 3263 spill_fill_data.prev_insn[iter] = insn; 3264} 3265 3266/* Wrapper functions that discards the CONST_INT spill offset. These 3267 exist so that we can give gr_spill/gr_fill the offset they need and 3268 use a consistent function interface. */ 3269 3270static rtx 3271gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3272{ 3273 return gen_movdi (dest, src); 3274} 3275 3276static rtx 3277gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3278{ 3279 return gen_fr_spill (dest, src); 3280} 3281 3282static rtx 3283gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3284{ 3285 return gen_fr_restore (dest, src); 3286} 3287 3288#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 3289 3290/* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2. */ 3291#define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0) 3292 3293/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 3294 inclusive. These are offsets from the current stack pointer. BS_SIZE 3295 is the size of the backing store. ??? This clobbers r2 and r3. */ 3296 3297static void 3298ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, 3299 int bs_size) 3300{ 3301 rtx r2 = gen_rtx_REG (Pmode, GR_REG (2)); 3302 rtx r3 = gen_rtx_REG (Pmode, GR_REG (3)); 3303 rtx p6 = gen_rtx_REG (BImode, PR_REG (6)); 3304 3305 /* On the IA-64 there is a second stack in memory, namely the Backing Store 3306 of the Register Stack Engine. We also need to probe it after checking 3307 that the 2 stacks don't overlap. */ 3308 emit_insn (gen_bsp_value (r3)); 3309 emit_move_insn (r2, GEN_INT (-(first + size))); 3310 3311 /* Compare current value of BSP and SP registers. */ 3312 emit_insn (gen_rtx_SET (VOIDmode, p6, 3313 gen_rtx_fmt_ee (LTU, BImode, 3314 r3, stack_pointer_rtx))); 3315 3316 /* Compute the address of the probe for the Backing Store (which grows 3317 towards higher addresses). We probe only at the first offset of 3318 the next page because some OS (eg Linux/ia64) only extend the 3319 backing store when this specific address is hit (but generate a SEGV 3320 on other address). Page size is the worst case (4KB). The reserve 3321 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough. 3322 Also compute the address of the last probe for the memory stack 3323 (which grows towards lower addresses). */ 3324 emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095))); 3325 emit_insn (gen_rtx_SET (VOIDmode, r2, 3326 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3327 3328 /* Compare them and raise SEGV if the former has topped the latter. */ 3329 emit_insn (gen_rtx_COND_EXEC (VOIDmode, 3330 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), 3331 gen_rtx_SET (VOIDmode, p6, 3332 gen_rtx_fmt_ee (GEU, BImode, 3333 r3, r2)))); 3334 emit_insn (gen_rtx_SET (VOIDmode, 3335 gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12), 3336 const0_rtx), 3337 const0_rtx)); 3338 emit_insn (gen_rtx_COND_EXEC (VOIDmode, 3339 gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx), 3340 gen_rtx_TRAP_IF (VOIDmode, const1_rtx, 3341 GEN_INT (11)))); 3342 3343 /* Probe the Backing Store if necessary. */ 3344 if (bs_size > 0) 3345 emit_stack_probe (r3); 3346 3347 /* Probe the memory stack if necessary. */ 3348 if (size == 0) 3349 ; 3350 3351 /* See if we have a constant small number of probes to generate. If so, 3352 that's the easy case. */ 3353 else if (size <= PROBE_INTERVAL) 3354 emit_stack_probe (r2); 3355 3356 /* The run-time loop is made up of 8 insns in the generic case while this 3357 compile-time loop is made up of 5+2*(n-2) insns for n # of intervals. */ 3358 else if (size <= 4 * PROBE_INTERVAL) 3359 { 3360 HOST_WIDE_INT i; 3361 3362 emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL))); 3363 emit_insn (gen_rtx_SET (VOIDmode, r2, 3364 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3365 emit_stack_probe (r2); 3366 3367 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 3368 it exceeds SIZE. If only two probes are needed, this will not 3369 generate any code. Then probe at FIRST + SIZE. */ 3370 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 3371 { 3372 emit_insn (gen_rtx_SET (VOIDmode, r2, 3373 plus_constant (Pmode, r2, -PROBE_INTERVAL))); 3374 emit_stack_probe (r2); 3375 } 3376 3377 emit_insn (gen_rtx_SET (VOIDmode, r2, 3378 plus_constant (Pmode, r2, 3379 (i - PROBE_INTERVAL) - size))); 3380 emit_stack_probe (r2); 3381 } 3382 3383 /* Otherwise, do the same as above, but in a loop. Note that we must be 3384 extra careful with variables wrapping around because we might be at 3385 the very top (or the very bottom) of the address space and we have 3386 to be able to handle this case properly; in particular, we use an 3387 equality test for the loop condition. */ 3388 else 3389 { 3390 HOST_WIDE_INT rounded_size; 3391 3392 emit_move_insn (r2, GEN_INT (-first)); 3393 3394 3395 /* Step 1: round SIZE to the previous multiple of the interval. */ 3396 3397 rounded_size = size & -PROBE_INTERVAL; 3398 3399 3400 /* Step 2: compute initial and final value of the loop counter. */ 3401 3402 /* TEST_ADDR = SP + FIRST. */ 3403 emit_insn (gen_rtx_SET (VOIDmode, r2, 3404 gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2))); 3405 3406 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 3407 if (rounded_size > (1 << 21)) 3408 { 3409 emit_move_insn (r3, GEN_INT (-rounded_size)); 3410 emit_insn (gen_rtx_SET (VOIDmode, r3, gen_rtx_PLUS (Pmode, r2, r3))); 3411 } 3412 else 3413 emit_insn (gen_rtx_SET (VOIDmode, r3, 3414 gen_rtx_PLUS (Pmode, r2, 3415 GEN_INT (-rounded_size)))); 3416 3417 3418 /* Step 3: the loop 3419 3420 while (TEST_ADDR != LAST_ADDR) 3421 { 3422 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 3423 probe at TEST_ADDR 3424 } 3425 3426 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 3427 until it is equal to ROUNDED_SIZE. */ 3428 3429 emit_insn (gen_probe_stack_range (r2, r2, r3)); 3430 3431 3432 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 3433 that SIZE is equal to ROUNDED_SIZE. */ 3434 3435 /* TEMP = SIZE - ROUNDED_SIZE. */ 3436 if (size != rounded_size) 3437 { 3438 emit_insn (gen_rtx_SET (VOIDmode, r2, 3439 plus_constant (Pmode, r2, 3440 rounded_size - size))); 3441 emit_stack_probe (r2); 3442 } 3443 } 3444 3445 /* Make sure nothing is scheduled before we are done. */ 3446 emit_insn (gen_blockage ()); 3447} 3448 3449/* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 3450 absolute addresses. */ 3451 3452const char * 3453output_probe_stack_range (rtx reg1, rtx reg2) 3454{ 3455 static int labelno = 0; 3456 char loop_lab[32], end_lab[32]; 3457 rtx xops[3]; 3458 3459 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); 3460 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); 3461 3462 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 3463 3464 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ 3465 xops[0] = reg1; 3466 xops[1] = reg2; 3467 xops[2] = gen_rtx_REG (BImode, PR_REG (6)); 3468 output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops); 3469 fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [REGNO (xops[2])]); 3470 assemble_name_raw (asm_out_file, end_lab); 3471 fputc ('\n', asm_out_file); 3472 3473 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 3474 xops[1] = GEN_INT (-PROBE_INTERVAL); 3475 output_asm_insn ("addl %0 = %1, %0", xops); 3476 fputs ("\t;;\n", asm_out_file); 3477 3478 /* Probe at TEST_ADDR and branch. */ 3479 output_asm_insn ("probe.w.fault %0, 0", xops); 3480 fprintf (asm_out_file, "\tbr "); 3481 assemble_name_raw (asm_out_file, loop_lab); 3482 fputc ('\n', asm_out_file); 3483 3484 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); 3485 3486 return ""; 3487} 3488 3489/* Called after register allocation to add any instructions needed for the 3490 prologue. Using a prologue insn is favored compared to putting all of the 3491 instructions in output_function_prologue(), since it allows the scheduler 3492 to intermix instructions with the saves of the caller saved registers. In 3493 some cases, it might be necessary to emit a barrier instruction as the last 3494 insn to prevent such scheduling. 3495 3496 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 3497 so that the debug info generation code can handle them properly. 3498 3499 The register save area is laid out like so: 3500 cfa+16 3501 [ varargs spill area ] 3502 [ fr register spill area ] 3503 [ br register spill area ] 3504 [ ar register spill area ] 3505 [ pr register spill area ] 3506 [ gr register spill area ] */ 3507 3508/* ??? Get inefficient code when the frame size is larger than can fit in an 3509 adds instruction. */ 3510 3511void 3512ia64_expand_prologue (void) 3513{ 3514 rtx_insn *insn; 3515 rtx ar_pfs_save_reg, ar_unat_save_reg; 3516 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 3517 rtx reg, alt_reg; 3518 3519 ia64_compute_frame_size (get_frame_size ()); 3520 last_scratch_gr_reg = 15; 3521 3522 if (flag_stack_usage_info) 3523 current_function_static_stack_size = current_frame_info.total_size; 3524 3525 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 3526 { 3527 HOST_WIDE_INT size = current_frame_info.total_size; 3528 int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs 3529 + current_frame_info.n_local_regs); 3530 3531 if (crtl->is_leaf && !cfun->calls_alloca) 3532 { 3533 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) 3534 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 3535 size - STACK_CHECK_PROTECT, 3536 bs_size); 3537 else if (size + bs_size > STACK_CHECK_PROTECT) 3538 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size); 3539 } 3540 else if (size + bs_size > 0) 3541 ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size); 3542 } 3543 3544 if (dump_file) 3545 { 3546 fprintf (dump_file, "ia64 frame related registers " 3547 "recorded in current_frame_info.r[]:\n"); 3548#define PRINTREG(a) if (current_frame_info.r[a]) \ 3549 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a]) 3550 PRINTREG(reg_fp); 3551 PRINTREG(reg_save_b0); 3552 PRINTREG(reg_save_pr); 3553 PRINTREG(reg_save_ar_pfs); 3554 PRINTREG(reg_save_ar_unat); 3555 PRINTREG(reg_save_ar_lc); 3556 PRINTREG(reg_save_gp); 3557#undef PRINTREG 3558 } 3559 3560 /* If there is no epilogue, then we don't need some prologue insns. 3561 We need to avoid emitting the dead prologue insns, because flow 3562 will complain about them. */ 3563 if (optimize) 3564 { 3565 edge e; 3566 edge_iterator ei; 3567 3568 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 3569 if ((e->flags & EDGE_FAKE) == 0 3570 && (e->flags & EDGE_FALLTHRU) != 0) 3571 break; 3572 epilogue_p = (e != NULL); 3573 } 3574 else 3575 epilogue_p = 1; 3576 3577 /* Set the local, input, and output register names. We need to do this 3578 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 3579 half. If we use in/loc/out register names, then we get assembler errors 3580 in crtn.S because there is no alloc insn or regstk directive in there. */ 3581 if (! TARGET_REG_NAMES) 3582 { 3583 int inputs = current_frame_info.n_input_regs; 3584 int locals = current_frame_info.n_local_regs; 3585 int outputs = current_frame_info.n_output_regs; 3586 3587 for (i = 0; i < inputs; i++) 3588 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 3589 for (i = 0; i < locals; i++) 3590 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 3591 for (i = 0; i < outputs; i++) 3592 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 3593 } 3594 3595 /* Set the frame pointer register name. The regnum is logically loc79, 3596 but of course we'll not have allocated that many locals. Rather than 3597 worrying about renumbering the existing rtxs, we adjust the name. */ 3598 /* ??? This code means that we can never use one local register when 3599 there is a frame pointer. loc79 gets wasted in this case, as it is 3600 renamed to a register that will never be used. See also the try_locals 3601 code in find_gr_spill. */ 3602 if (current_frame_info.r[reg_fp]) 3603 { 3604 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3605 reg_names[HARD_FRAME_POINTER_REGNUM] 3606 = reg_names[current_frame_info.r[reg_fp]]; 3607 reg_names[current_frame_info.r[reg_fp]] = tmp; 3608 } 3609 3610 /* We don't need an alloc instruction if we've used no outputs or locals. */ 3611 if (current_frame_info.n_local_regs == 0 3612 && current_frame_info.n_output_regs == 0 3613 && current_frame_info.n_input_regs <= crtl->args.info.int_regs 3614 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 3615 { 3616 /* If there is no alloc, but there are input registers used, then we 3617 need a .regstk directive. */ 3618 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 3619 ar_pfs_save_reg = NULL_RTX; 3620 } 3621 else 3622 { 3623 current_frame_info.need_regstk = 0; 3624 3625 if (current_frame_info.r[reg_save_ar_pfs]) 3626 { 3627 regno = current_frame_info.r[reg_save_ar_pfs]; 3628 reg_emitted (reg_save_ar_pfs); 3629 } 3630 else 3631 regno = next_scratch_gr_reg (); 3632 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 3633 3634 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 3635 GEN_INT (current_frame_info.n_input_regs), 3636 GEN_INT (current_frame_info.n_local_regs), 3637 GEN_INT (current_frame_info.n_output_regs), 3638 GEN_INT (current_frame_info.n_rotate_regs))); 3639 if (current_frame_info.r[reg_save_ar_pfs]) 3640 { 3641 RTX_FRAME_RELATED_P (insn) = 1; 3642 add_reg_note (insn, REG_CFA_REGISTER, 3643 gen_rtx_SET (VOIDmode, 3644 ar_pfs_save_reg, 3645 gen_rtx_REG (DImode, AR_PFS_REGNUM))); 3646 } 3647 } 3648 3649 /* Set up frame pointer, stack pointer, and spill iterators. */ 3650 3651 n_varargs = cfun->machine->n_varargs; 3652 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 3653 stack_pointer_rtx, 0); 3654 3655 if (frame_pointer_needed) 3656 { 3657 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3658 RTX_FRAME_RELATED_P (insn) = 1; 3659 3660 /* Force the unwind info to recognize this as defining a new CFA, 3661 rather than some temp register setup. */ 3662 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX); 3663 } 3664 3665 if (current_frame_info.total_size != 0) 3666 { 3667 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 3668 rtx offset; 3669 3670 if (satisfies_constraint_I (frame_size_rtx)) 3671 offset = frame_size_rtx; 3672 else 3673 { 3674 regno = next_scratch_gr_reg (); 3675 offset = gen_rtx_REG (DImode, regno); 3676 emit_move_insn (offset, frame_size_rtx); 3677 } 3678 3679 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 3680 stack_pointer_rtx, offset)); 3681 3682 if (! frame_pointer_needed) 3683 { 3684 RTX_FRAME_RELATED_P (insn) = 1; 3685 add_reg_note (insn, REG_CFA_ADJUST_CFA, 3686 gen_rtx_SET (VOIDmode, 3687 stack_pointer_rtx, 3688 gen_rtx_PLUS (DImode, 3689 stack_pointer_rtx, 3690 frame_size_rtx))); 3691 } 3692 3693 /* ??? At this point we must generate a magic insn that appears to 3694 modify the stack pointer, the frame pointer, and all spill 3695 iterators. This would allow the most scheduling freedom. For 3696 now, just hard stop. */ 3697 emit_insn (gen_blockage ()); 3698 } 3699 3700 /* Must copy out ar.unat before doing any integer spills. */ 3701 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3702 { 3703 if (current_frame_info.r[reg_save_ar_unat]) 3704 { 3705 ar_unat_save_reg 3706 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3707 reg_emitted (reg_save_ar_unat); 3708 } 3709 else 3710 { 3711 alt_regno = next_scratch_gr_reg (); 3712 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3713 current_frame_info.gr_used_mask |= 1 << alt_regno; 3714 } 3715 3716 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3717 insn = emit_move_insn (ar_unat_save_reg, reg); 3718 if (current_frame_info.r[reg_save_ar_unat]) 3719 { 3720 RTX_FRAME_RELATED_P (insn) = 1; 3721 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3722 } 3723 3724 /* Even if we're not going to generate an epilogue, we still 3725 need to save the register so that EH works. */ 3726 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat]) 3727 emit_insn (gen_prologue_use (ar_unat_save_reg)); 3728 } 3729 else 3730 ar_unat_save_reg = NULL_RTX; 3731 3732 /* Spill all varargs registers. Do this before spilling any GR registers, 3733 since we want the UNAT bits for the GR registers to override the UNAT 3734 bits from varargs, which we don't care about. */ 3735 3736 cfa_off = -16; 3737 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 3738 { 3739 reg = gen_rtx_REG (DImode, regno); 3740 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 3741 } 3742 3743 /* Locate the bottom of the register save area. */ 3744 cfa_off = (current_frame_info.spill_cfa_off 3745 + current_frame_info.spill_size 3746 + current_frame_info.extra_spill_size); 3747 3748 /* Save the predicate register block either in a register or in memory. */ 3749 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3750 { 3751 reg = gen_rtx_REG (DImode, PR_REG (0)); 3752 if (current_frame_info.r[reg_save_pr] != 0) 3753 { 3754 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3755 reg_emitted (reg_save_pr); 3756 insn = emit_move_insn (alt_reg, reg); 3757 3758 /* ??? Denote pr spill/fill by a DImode move that modifies all 3759 64 hard registers. */ 3760 RTX_FRAME_RELATED_P (insn) = 1; 3761 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3762 3763 /* Even if we're not going to generate an epilogue, we still 3764 need to save the register so that EH works. */ 3765 if (! epilogue_p) 3766 emit_insn (gen_prologue_use (alt_reg)); 3767 } 3768 else 3769 { 3770 alt_regno = next_scratch_gr_reg (); 3771 alt_reg = gen_rtx_REG (DImode, alt_regno); 3772 insn = emit_move_insn (alt_reg, reg); 3773 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3774 cfa_off -= 8; 3775 } 3776 } 3777 3778 /* Handle AR regs in numerical order. All of them get special handling. */ 3779 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 3780 && current_frame_info.r[reg_save_ar_unat] == 0) 3781 { 3782 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3783 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 3784 cfa_off -= 8; 3785 } 3786 3787 /* The alloc insn already copied ar.pfs into a general register. The 3788 only thing we have to do now is copy that register to a stack slot 3789 if we'd not allocated a local register for the job. */ 3790 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 3791 && current_frame_info.r[reg_save_ar_pfs] == 0) 3792 { 3793 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3794 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 3795 cfa_off -= 8; 3796 } 3797 3798 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 3799 { 3800 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 3801 if (current_frame_info.r[reg_save_ar_lc] != 0) 3802 { 3803 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 3804 reg_emitted (reg_save_ar_lc); 3805 insn = emit_move_insn (alt_reg, reg); 3806 RTX_FRAME_RELATED_P (insn) = 1; 3807 add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX); 3808 3809 /* Even if we're not going to generate an epilogue, we still 3810 need to save the register so that EH works. */ 3811 if (! epilogue_p) 3812 emit_insn (gen_prologue_use (alt_reg)); 3813 } 3814 else 3815 { 3816 alt_regno = next_scratch_gr_reg (); 3817 alt_reg = gen_rtx_REG (DImode, alt_regno); 3818 emit_move_insn (alt_reg, reg); 3819 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3820 cfa_off -= 8; 3821 } 3822 } 3823 3824 /* Save the return pointer. */ 3825 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3826 { 3827 reg = gen_rtx_REG (DImode, BR_REG (0)); 3828 if (current_frame_info.r[reg_save_b0] != 0) 3829 { 3830 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3831 reg_emitted (reg_save_b0); 3832 insn = emit_move_insn (alt_reg, reg); 3833 RTX_FRAME_RELATED_P (insn) = 1; 3834 add_reg_note (insn, REG_CFA_REGISTER, 3835 gen_rtx_SET (VOIDmode, alt_reg, pc_rtx)); 3836 3837 /* Even if we're not going to generate an epilogue, we still 3838 need to save the register so that EH works. */ 3839 if (! epilogue_p) 3840 emit_insn (gen_prologue_use (alt_reg)); 3841 } 3842 else 3843 { 3844 alt_regno = next_scratch_gr_reg (); 3845 alt_reg = gen_rtx_REG (DImode, alt_regno); 3846 emit_move_insn (alt_reg, reg); 3847 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3848 cfa_off -= 8; 3849 } 3850 } 3851 3852 if (current_frame_info.r[reg_save_gp]) 3853 { 3854 reg_emitted (reg_save_gp); 3855 insn = emit_move_insn (gen_rtx_REG (DImode, 3856 current_frame_info.r[reg_save_gp]), 3857 pic_offset_table_rtx); 3858 } 3859 3860 /* We should now be at the base of the gr/br/fr spill area. */ 3861 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 3862 + current_frame_info.spill_size)); 3863 3864 /* Spill all general registers. */ 3865 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 3866 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3867 { 3868 reg = gen_rtx_REG (DImode, regno); 3869 do_spill (gen_gr_spill, reg, cfa_off, reg); 3870 cfa_off -= 8; 3871 } 3872 3873 /* Spill the rest of the BR registers. */ 3874 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 3875 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3876 { 3877 alt_regno = next_scratch_gr_reg (); 3878 alt_reg = gen_rtx_REG (DImode, alt_regno); 3879 reg = gen_rtx_REG (DImode, regno); 3880 emit_move_insn (alt_reg, reg); 3881 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3882 cfa_off -= 8; 3883 } 3884 3885 /* Align the frame and spill all FR registers. */ 3886 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 3887 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3888 { 3889 gcc_assert (!(cfa_off & 15)); 3890 reg = gen_rtx_REG (XFmode, regno); 3891 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 3892 cfa_off -= 16; 3893 } 3894 3895 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 3896 3897 finish_spill_pointers (); 3898} 3899 3900/* Output the textual info surrounding the prologue. */ 3901 3902void 3903ia64_start_function (FILE *file, const char *fnname, 3904 tree decl ATTRIBUTE_UNUSED) 3905{ 3906#if TARGET_ABI_OPEN_VMS 3907 vms_start_function (fnname); 3908#endif 3909 3910 fputs ("\t.proc ", file); 3911 assemble_name (file, fnname); 3912 fputc ('\n', file); 3913 ASM_OUTPUT_LABEL (file, fnname); 3914} 3915 3916/* Called after register allocation to add any instructions needed for the 3917 epilogue. Using an epilogue insn is favored compared to putting all of the 3918 instructions in output_function_prologue(), since it allows the scheduler 3919 to intermix instructions with the saves of the caller saved registers. In 3920 some cases, it might be necessary to emit a barrier instruction as the last 3921 insn to prevent such scheduling. */ 3922 3923void 3924ia64_expand_epilogue (int sibcall_p) 3925{ 3926 rtx_insn *insn; 3927 rtx reg, alt_reg, ar_unat_save_reg; 3928 int regno, alt_regno, cfa_off; 3929 3930 ia64_compute_frame_size (get_frame_size ()); 3931 3932 /* If there is a frame pointer, then we use it instead of the stack 3933 pointer, so that the stack pointer does not need to be valid when 3934 the epilogue starts. See EXIT_IGNORE_STACK. */ 3935 if (frame_pointer_needed) 3936 setup_spill_pointers (current_frame_info.n_spilled, 3937 hard_frame_pointer_rtx, 0); 3938 else 3939 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 3940 current_frame_info.total_size); 3941 3942 if (current_frame_info.total_size != 0) 3943 { 3944 /* ??? At this point we must generate a magic insn that appears to 3945 modify the spill iterators and the frame pointer. This would 3946 allow the most scheduling freedom. For now, just hard stop. */ 3947 emit_insn (gen_blockage ()); 3948 } 3949 3950 /* Locate the bottom of the register save area. */ 3951 cfa_off = (current_frame_info.spill_cfa_off 3952 + current_frame_info.spill_size 3953 + current_frame_info.extra_spill_size); 3954 3955 /* Restore the predicate registers. */ 3956 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3957 { 3958 if (current_frame_info.r[reg_save_pr] != 0) 3959 { 3960 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3961 reg_emitted (reg_save_pr); 3962 } 3963 else 3964 { 3965 alt_regno = next_scratch_gr_reg (); 3966 alt_reg = gen_rtx_REG (DImode, alt_regno); 3967 do_restore (gen_movdi_x, alt_reg, cfa_off); 3968 cfa_off -= 8; 3969 } 3970 reg = gen_rtx_REG (DImode, PR_REG (0)); 3971 emit_move_insn (reg, alt_reg); 3972 } 3973 3974 /* Restore the application registers. */ 3975 3976 /* Load the saved unat from the stack, but do not restore it until 3977 after the GRs have been restored. */ 3978 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3979 { 3980 if (current_frame_info.r[reg_save_ar_unat] != 0) 3981 { 3982 ar_unat_save_reg 3983 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3984 reg_emitted (reg_save_ar_unat); 3985 } 3986 else 3987 { 3988 alt_regno = next_scratch_gr_reg (); 3989 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3990 current_frame_info.gr_used_mask |= 1 << alt_regno; 3991 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 3992 cfa_off -= 8; 3993 } 3994 } 3995 else 3996 ar_unat_save_reg = NULL_RTX; 3997 3998 if (current_frame_info.r[reg_save_ar_pfs] != 0) 3999 { 4000 reg_emitted (reg_save_ar_pfs); 4001 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]); 4002 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 4003 emit_move_insn (reg, alt_reg); 4004 } 4005 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 4006 { 4007 alt_regno = next_scratch_gr_reg (); 4008 alt_reg = gen_rtx_REG (DImode, alt_regno); 4009 do_restore (gen_movdi_x, alt_reg, cfa_off); 4010 cfa_off -= 8; 4011 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 4012 emit_move_insn (reg, alt_reg); 4013 } 4014 4015 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 4016 { 4017 if (current_frame_info.r[reg_save_ar_lc] != 0) 4018 { 4019 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 4020 reg_emitted (reg_save_ar_lc); 4021 } 4022 else 4023 { 4024 alt_regno = next_scratch_gr_reg (); 4025 alt_reg = gen_rtx_REG (DImode, alt_regno); 4026 do_restore (gen_movdi_x, alt_reg, cfa_off); 4027 cfa_off -= 8; 4028 } 4029 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 4030 emit_move_insn (reg, alt_reg); 4031 } 4032 4033 /* Restore the return pointer. */ 4034 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 4035 { 4036 if (current_frame_info.r[reg_save_b0] != 0) 4037 { 4038 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 4039 reg_emitted (reg_save_b0); 4040 } 4041 else 4042 { 4043 alt_regno = next_scratch_gr_reg (); 4044 alt_reg = gen_rtx_REG (DImode, alt_regno); 4045 do_restore (gen_movdi_x, alt_reg, cfa_off); 4046 cfa_off -= 8; 4047 } 4048 reg = gen_rtx_REG (DImode, BR_REG (0)); 4049 emit_move_insn (reg, alt_reg); 4050 } 4051 4052 /* We should now be at the base of the gr/br/fr spill area. */ 4053 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 4054 + current_frame_info.spill_size)); 4055 4056 /* The GP may be stored on the stack in the prologue, but it's 4057 never restored in the epilogue. Skip the stack slot. */ 4058 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 4059 cfa_off -= 8; 4060 4061 /* Restore all general registers. */ 4062 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 4063 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4064 { 4065 reg = gen_rtx_REG (DImode, regno); 4066 do_restore (gen_gr_restore, reg, cfa_off); 4067 cfa_off -= 8; 4068 } 4069 4070 /* Restore the branch registers. */ 4071 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 4072 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4073 { 4074 alt_regno = next_scratch_gr_reg (); 4075 alt_reg = gen_rtx_REG (DImode, alt_regno); 4076 do_restore (gen_movdi_x, alt_reg, cfa_off); 4077 cfa_off -= 8; 4078 reg = gen_rtx_REG (DImode, regno); 4079 emit_move_insn (reg, alt_reg); 4080 } 4081 4082 /* Restore floating point registers. */ 4083 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 4084 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4085 { 4086 gcc_assert (!(cfa_off & 15)); 4087 reg = gen_rtx_REG (XFmode, regno); 4088 do_restore (gen_fr_restore_x, reg, cfa_off); 4089 cfa_off -= 16; 4090 } 4091 4092 /* Restore ar.unat for real. */ 4093 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 4094 { 4095 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 4096 emit_move_insn (reg, ar_unat_save_reg); 4097 } 4098 4099 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 4100 4101 finish_spill_pointers (); 4102 4103 if (current_frame_info.total_size 4104 || cfun->machine->ia64_eh_epilogue_sp 4105 || frame_pointer_needed) 4106 { 4107 /* ??? At this point we must generate a magic insn that appears to 4108 modify the spill iterators, the stack pointer, and the frame 4109 pointer. This would allow the most scheduling freedom. For now, 4110 just hard stop. */ 4111 emit_insn (gen_blockage ()); 4112 } 4113 4114 if (cfun->machine->ia64_eh_epilogue_sp) 4115 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 4116 else if (frame_pointer_needed) 4117 { 4118 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 4119 RTX_FRAME_RELATED_P (insn) = 1; 4120 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); 4121 } 4122 else if (current_frame_info.total_size) 4123 { 4124 rtx offset, frame_size_rtx; 4125 4126 frame_size_rtx = GEN_INT (current_frame_info.total_size); 4127 if (satisfies_constraint_I (frame_size_rtx)) 4128 offset = frame_size_rtx; 4129 else 4130 { 4131 regno = next_scratch_gr_reg (); 4132 offset = gen_rtx_REG (DImode, regno); 4133 emit_move_insn (offset, frame_size_rtx); 4134 } 4135 4136 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 4137 offset)); 4138 4139 RTX_FRAME_RELATED_P (insn) = 1; 4140 add_reg_note (insn, REG_CFA_ADJUST_CFA, 4141 gen_rtx_SET (VOIDmode, 4142 stack_pointer_rtx, 4143 gen_rtx_PLUS (DImode, 4144 stack_pointer_rtx, 4145 frame_size_rtx))); 4146 } 4147 4148 if (cfun->machine->ia64_eh_epilogue_bsp) 4149 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 4150 4151 if (! sibcall_p) 4152 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 4153 else 4154 { 4155 int fp = GR_REG (2); 4156 /* We need a throw away register here, r0 and r1 are reserved, 4157 so r2 is the first available call clobbered register. If 4158 there was a frame_pointer register, we may have swapped the 4159 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make 4160 sure we're using the string "r2" when emitting the register 4161 name for the assembler. */ 4162 if (current_frame_info.r[reg_fp] 4163 && current_frame_info.r[reg_fp] == GR_REG (2)) 4164 fp = HARD_FRAME_POINTER_REGNUM; 4165 4166 /* We must emit an alloc to force the input registers to become output 4167 registers. Otherwise, if the callee tries to pass its parameters 4168 through to another call without an intervening alloc, then these 4169 values get lost. */ 4170 /* ??? We don't need to preserve all input registers. We only need to 4171 preserve those input registers used as arguments to the sibling call. 4172 It is unclear how to compute that number here. */ 4173 if (current_frame_info.n_input_regs != 0) 4174 { 4175 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); 4176 4177 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 4178 const0_rtx, const0_rtx, 4179 n_inputs, const0_rtx)); 4180 RTX_FRAME_RELATED_P (insn) = 1; 4181 4182 /* ??? We need to mark the alloc as frame-related so that it gets 4183 passed into ia64_asm_unwind_emit for ia64-specific unwinding. 4184 But there's nothing dwarf2 related to be done wrt the register 4185 windows. If we do nothing, dwarf2out will abort on the UNSPEC; 4186 the empty parallel means dwarf2out will not see anything. */ 4187 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 4188 gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0))); 4189 } 4190 } 4191} 4192 4193/* Return 1 if br.ret can do all the work required to return from a 4194 function. */ 4195 4196int 4197ia64_direct_return (void) 4198{ 4199 if (reload_completed && ! frame_pointer_needed) 4200 { 4201 ia64_compute_frame_size (get_frame_size ()); 4202 4203 return (current_frame_info.total_size == 0 4204 && current_frame_info.n_spilled == 0 4205 && current_frame_info.r[reg_save_b0] == 0 4206 && current_frame_info.r[reg_save_pr] == 0 4207 && current_frame_info.r[reg_save_ar_pfs] == 0 4208 && current_frame_info.r[reg_save_ar_unat] == 0 4209 && current_frame_info.r[reg_save_ar_lc] == 0); 4210 } 4211 return 0; 4212} 4213 4214/* Return the magic cookie that we use to hold the return address 4215 during early compilation. */ 4216 4217rtx 4218ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED) 4219{ 4220 if (count != 0) 4221 return NULL; 4222 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); 4223} 4224 4225/* Split this value after reload, now that we know where the return 4226 address is saved. */ 4227 4228void 4229ia64_split_return_addr_rtx (rtx dest) 4230{ 4231 rtx src; 4232 4233 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 4234 { 4235 if (current_frame_info.r[reg_save_b0] != 0) 4236 { 4237 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 4238 reg_emitted (reg_save_b0); 4239 } 4240 else 4241 { 4242 HOST_WIDE_INT off; 4243 unsigned int regno; 4244 rtx off_r; 4245 4246 /* Compute offset from CFA for BR0. */ 4247 /* ??? Must be kept in sync with ia64_expand_prologue. */ 4248 off = (current_frame_info.spill_cfa_off 4249 + current_frame_info.spill_size); 4250 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 4251 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 4252 off -= 8; 4253 4254 /* Convert CFA offset to a register based offset. */ 4255 if (frame_pointer_needed) 4256 src = hard_frame_pointer_rtx; 4257 else 4258 { 4259 src = stack_pointer_rtx; 4260 off += current_frame_info.total_size; 4261 } 4262 4263 /* Load address into scratch register. */ 4264 off_r = GEN_INT (off); 4265 if (satisfies_constraint_I (off_r)) 4266 emit_insn (gen_adddi3 (dest, src, off_r)); 4267 else 4268 { 4269 emit_move_insn (dest, off_r); 4270 emit_insn (gen_adddi3 (dest, src, dest)); 4271 } 4272 4273 src = gen_rtx_MEM (Pmode, dest); 4274 } 4275 } 4276 else 4277 src = gen_rtx_REG (DImode, BR_REG (0)); 4278 4279 emit_move_insn (dest, src); 4280} 4281 4282int 4283ia64_hard_regno_rename_ok (int from, int to) 4284{ 4285 /* Don't clobber any of the registers we reserved for the prologue. */ 4286 unsigned int r; 4287 4288 for (r = reg_fp; r <= reg_save_ar_lc; r++) 4289 if (to == current_frame_info.r[r] 4290 || from == current_frame_info.r[r] 4291 || to == emitted_frame_related_regs[r] 4292 || from == emitted_frame_related_regs[r]) 4293 return 0; 4294 4295 /* Don't use output registers outside the register frame. */ 4296 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 4297 return 0; 4298 4299 /* Retain even/oddness on predicate register pairs. */ 4300 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 4301 return (from & 1) == (to & 1); 4302 4303 return 1; 4304} 4305 4306/* Target hook for assembling integer objects. Handle word-sized 4307 aligned objects and detect the cases when @fptr is needed. */ 4308 4309static bool 4310ia64_assemble_integer (rtx x, unsigned int size, int aligned_p) 4311{ 4312 if (size == POINTER_SIZE / BITS_PER_UNIT 4313 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 4314 && GET_CODE (x) == SYMBOL_REF 4315 && SYMBOL_REF_FUNCTION_P (x)) 4316 { 4317 static const char * const directive[2][2] = { 4318 /* 64-bit pointer */ /* 32-bit pointer */ 4319 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ 4320 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ 4321 }; 4322 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file); 4323 output_addr_const (asm_out_file, x); 4324 fputs (")\n", asm_out_file); 4325 return true; 4326 } 4327 return default_assemble_integer (x, size, aligned_p); 4328} 4329 4330/* Emit the function prologue. */ 4331 4332static void 4333ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4334{ 4335 int mask, grsave, grsave_prev; 4336 4337 if (current_frame_info.need_regstk) 4338 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 4339 current_frame_info.n_input_regs, 4340 current_frame_info.n_local_regs, 4341 current_frame_info.n_output_regs, 4342 current_frame_info.n_rotate_regs); 4343 4344 if (ia64_except_unwind_info (&global_options) != UI_TARGET) 4345 return; 4346 4347 /* Emit the .prologue directive. */ 4348 4349 mask = 0; 4350 grsave = grsave_prev = 0; 4351 if (current_frame_info.r[reg_save_b0] != 0) 4352 { 4353 mask |= 8; 4354 grsave = grsave_prev = current_frame_info.r[reg_save_b0]; 4355 } 4356 if (current_frame_info.r[reg_save_ar_pfs] != 0 4357 && (grsave_prev == 0 4358 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1)) 4359 { 4360 mask |= 4; 4361 if (grsave_prev == 0) 4362 grsave = current_frame_info.r[reg_save_ar_pfs]; 4363 grsave_prev = current_frame_info.r[reg_save_ar_pfs]; 4364 } 4365 if (current_frame_info.r[reg_fp] != 0 4366 && (grsave_prev == 0 4367 || current_frame_info.r[reg_fp] == grsave_prev + 1)) 4368 { 4369 mask |= 2; 4370 if (grsave_prev == 0) 4371 grsave = HARD_FRAME_POINTER_REGNUM; 4372 grsave_prev = current_frame_info.r[reg_fp]; 4373 } 4374 if (current_frame_info.r[reg_save_pr] != 0 4375 && (grsave_prev == 0 4376 || current_frame_info.r[reg_save_pr] == grsave_prev + 1)) 4377 { 4378 mask |= 1; 4379 if (grsave_prev == 0) 4380 grsave = current_frame_info.r[reg_save_pr]; 4381 } 4382 4383 if (mask && TARGET_GNU_AS) 4384 fprintf (file, "\t.prologue %d, %d\n", mask, 4385 ia64_dbx_register_number (grsave)); 4386 else 4387 fputs ("\t.prologue\n", file); 4388 4389 /* Emit a .spill directive, if necessary, to relocate the base of 4390 the register spill area. */ 4391 if (current_frame_info.spill_cfa_off != -16) 4392 fprintf (file, "\t.spill %ld\n", 4393 (long) (current_frame_info.spill_cfa_off 4394 + current_frame_info.spill_size)); 4395} 4396 4397/* Emit the .body directive at the scheduled end of the prologue. */ 4398 4399static void 4400ia64_output_function_end_prologue (FILE *file) 4401{ 4402 if (ia64_except_unwind_info (&global_options) != UI_TARGET) 4403 return; 4404 4405 fputs ("\t.body\n", file); 4406} 4407 4408/* Emit the function epilogue. */ 4409 4410static void 4411ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 4412 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 4413{ 4414 int i; 4415 4416 if (current_frame_info.r[reg_fp]) 4417 { 4418 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 4419 reg_names[HARD_FRAME_POINTER_REGNUM] 4420 = reg_names[current_frame_info.r[reg_fp]]; 4421 reg_names[current_frame_info.r[reg_fp]] = tmp; 4422 reg_emitted (reg_fp); 4423 } 4424 if (! TARGET_REG_NAMES) 4425 { 4426 for (i = 0; i < current_frame_info.n_input_regs; i++) 4427 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 4428 for (i = 0; i < current_frame_info.n_local_regs; i++) 4429 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 4430 for (i = 0; i < current_frame_info.n_output_regs; i++) 4431 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 4432 } 4433 4434 current_frame_info.initialized = 0; 4435} 4436 4437int 4438ia64_dbx_register_number (int regno) 4439{ 4440 /* In ia64_expand_prologue we quite literally renamed the frame pointer 4441 from its home at loc79 to something inside the register frame. We 4442 must perform the same renumbering here for the debug info. */ 4443 if (current_frame_info.r[reg_fp]) 4444 { 4445 if (regno == HARD_FRAME_POINTER_REGNUM) 4446 regno = current_frame_info.r[reg_fp]; 4447 else if (regno == current_frame_info.r[reg_fp]) 4448 regno = HARD_FRAME_POINTER_REGNUM; 4449 } 4450 4451 if (IN_REGNO_P (regno)) 4452 return 32 + regno - IN_REG (0); 4453 else if (LOC_REGNO_P (regno)) 4454 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 4455 else if (OUT_REGNO_P (regno)) 4456 return (32 + current_frame_info.n_input_regs 4457 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 4458 else 4459 return regno; 4460} 4461 4462/* Implement TARGET_TRAMPOLINE_INIT. 4463 4464 The trampoline should set the static chain pointer to value placed 4465 into the trampoline and should branch to the specified routine. 4466 To make the normal indirect-subroutine calling convention work, 4467 the trampoline must look like a function descriptor; the first 4468 word being the target address and the second being the target's 4469 global pointer. 4470 4471 We abuse the concept of a global pointer by arranging for it 4472 to point to the data we need to load. The complete trampoline 4473 has the following form: 4474 4475 +-------------------+ \ 4476 TRAMP: | __ia64_trampoline | | 4477 +-------------------+ > fake function descriptor 4478 | TRAMP+16 | | 4479 +-------------------+ / 4480 | target descriptor | 4481 +-------------------+ 4482 | static link | 4483 +-------------------+ 4484*/ 4485 4486static void 4487ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain) 4488{ 4489 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 4490 rtx addr, addr_reg, tramp, eight = GEN_INT (8); 4491 4492 /* The Intel assembler requires that the global __ia64_trampoline symbol 4493 be declared explicitly */ 4494 if (!TARGET_GNU_AS) 4495 { 4496 static bool declared_ia64_trampoline = false; 4497 4498 if (!declared_ia64_trampoline) 4499 { 4500 declared_ia64_trampoline = true; 4501 (*targetm.asm_out.globalize_label) (asm_out_file, 4502 "__ia64_trampoline"); 4503 } 4504 } 4505 4506 /* Make sure addresses are Pmode even if we are in ILP32 mode. */ 4507 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0)); 4508 fnaddr = convert_memory_address (Pmode, fnaddr); 4509 static_chain = convert_memory_address (Pmode, static_chain); 4510 4511 /* Load up our iterator. */ 4512 addr_reg = copy_to_reg (addr); 4513 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0); 4514 4515 /* The first two words are the fake descriptor: 4516 __ia64_trampoline, ADDR+16. */ 4517 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"); 4518 if (TARGET_ABI_OPEN_VMS) 4519 { 4520 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity 4521 in the Macro-32 compiler) and changed the semantics of the LTOFF22 4522 relocation against function symbols to make it identical to the 4523 LTOFF_FPTR22 relocation. Emit the latter directly to stay within 4524 strict ELF and dereference to get the bare code address. */ 4525 rtx reg = gen_reg_rtx (Pmode); 4526 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION; 4527 emit_move_insn (reg, tramp); 4528 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg)); 4529 tramp = reg; 4530 } 4531 emit_move_insn (m_tramp, tramp); 4532 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4533 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4534 4535 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16))); 4536 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4537 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4538 4539 /* The third word is the target descriptor. */ 4540 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr)); 4541 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4542 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4543 4544 /* The fourth word is the static chain. */ 4545 emit_move_insn (m_tramp, static_chain); 4546} 4547 4548/* Do any needed setup for a variadic function. CUM has not been updated 4549 for the last named argument which has type TYPE and mode MODE. 4550 4551 We generate the actual spill instructions during prologue generation. */ 4552 4553static void 4554ia64_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, 4555 tree type, int * pretend_size, 4556 int second_time ATTRIBUTE_UNUSED) 4557{ 4558 CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum); 4559 4560 /* Skip the current argument. */ 4561 ia64_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, 1); 4562 4563 if (next_cum.words < MAX_ARGUMENT_SLOTS) 4564 { 4565 int n = MAX_ARGUMENT_SLOTS - next_cum.words; 4566 *pretend_size = n * UNITS_PER_WORD; 4567 cfun->machine->n_varargs = n; 4568 } 4569} 4570 4571/* Check whether TYPE is a homogeneous floating point aggregate. If 4572 it is, return the mode of the floating point type that appears 4573 in all leafs. If it is not, return VOIDmode. 4574 4575 An aggregate is a homogeneous floating point aggregate is if all 4576 fields/elements in it have the same floating point type (e.g, 4577 SFmode). 128-bit quad-precision floats are excluded. 4578 4579 Variable sized aggregates should never arrive here, since we should 4580 have already decided to pass them by reference. Top-level zero-sized 4581 aggregates are excluded because our parallels crash the middle-end. */ 4582 4583static machine_mode 4584hfa_element_mode (const_tree type, bool nested) 4585{ 4586 machine_mode element_mode = VOIDmode; 4587 machine_mode mode; 4588 enum tree_code code = TREE_CODE (type); 4589 int know_element_mode = 0; 4590 tree t; 4591 4592 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type)))) 4593 return VOIDmode; 4594 4595 switch (code) 4596 { 4597 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 4598 case BOOLEAN_TYPE: case POINTER_TYPE: 4599 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 4600 case LANG_TYPE: case FUNCTION_TYPE: 4601 return VOIDmode; 4602 4603 /* Fortran complex types are supposed to be HFAs, so we need to handle 4604 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 4605 types though. */ 4606 case COMPLEX_TYPE: 4607 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 4608 && TYPE_MODE (type) != TCmode) 4609 return GET_MODE_INNER (TYPE_MODE (type)); 4610 else 4611 return VOIDmode; 4612 4613 case REAL_TYPE: 4614 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 4615 mode if this is contained within an aggregate. */ 4616 if (nested && TYPE_MODE (type) != TFmode) 4617 return TYPE_MODE (type); 4618 else 4619 return VOIDmode; 4620 4621 case ARRAY_TYPE: 4622 return hfa_element_mode (TREE_TYPE (type), 1); 4623 4624 case RECORD_TYPE: 4625 case UNION_TYPE: 4626 case QUAL_UNION_TYPE: 4627 for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t)) 4628 { 4629 if (TREE_CODE (t) != FIELD_DECL) 4630 continue; 4631 4632 mode = hfa_element_mode (TREE_TYPE (t), 1); 4633 if (know_element_mode) 4634 { 4635 if (mode != element_mode) 4636 return VOIDmode; 4637 } 4638 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 4639 return VOIDmode; 4640 else 4641 { 4642 know_element_mode = 1; 4643 element_mode = mode; 4644 } 4645 } 4646 return element_mode; 4647 4648 default: 4649 /* If we reach here, we probably have some front-end specific type 4650 that the backend doesn't know about. This can happen via the 4651 aggregate_value_p call in init_function_start. All we can do is 4652 ignore unknown tree types. */ 4653 return VOIDmode; 4654 } 4655 4656 return VOIDmode; 4657} 4658 4659/* Return the number of words required to hold a quantity of TYPE and MODE 4660 when passed as an argument. */ 4661static int 4662ia64_function_arg_words (const_tree type, machine_mode mode) 4663{ 4664 int words; 4665 4666 if (mode == BLKmode) 4667 words = int_size_in_bytes (type); 4668 else 4669 words = GET_MODE_SIZE (mode); 4670 4671 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */ 4672} 4673 4674/* Return the number of registers that should be skipped so the current 4675 argument (described by TYPE and WORDS) will be properly aligned. 4676 4677 Integer and float arguments larger than 8 bytes start at the next 4678 even boundary. Aggregates larger than 8 bytes start at the next 4679 even boundary if the aggregate has 16 byte alignment. Note that 4680 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment 4681 but are still to be aligned in registers. 4682 4683 ??? The ABI does not specify how to handle aggregates with 4684 alignment from 9 to 15 bytes, or greater than 16. We handle them 4685 all as if they had 16 byte alignment. Such aggregates can occur 4686 only if gcc extensions are used. */ 4687static int 4688ia64_function_arg_offset (const CUMULATIVE_ARGS *cum, 4689 const_tree type, int words) 4690{ 4691 /* No registers are skipped on VMS. */ 4692 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0) 4693 return 0; 4694 4695 if (type 4696 && TREE_CODE (type) != INTEGER_TYPE 4697 && TREE_CODE (type) != REAL_TYPE) 4698 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT; 4699 else 4700 return words > 1; 4701} 4702 4703/* Return rtx for register where argument is passed, or zero if it is passed 4704 on the stack. */ 4705/* ??? 128-bit quad-precision floats are always passed in general 4706 registers. */ 4707 4708static rtx 4709ia64_function_arg_1 (cumulative_args_t cum_v, machine_mode mode, 4710 const_tree type, bool named, bool incoming) 4711{ 4712 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4713 4714 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 4715 int words = ia64_function_arg_words (type, mode); 4716 int offset = ia64_function_arg_offset (cum, type, words); 4717 machine_mode hfa_mode = VOIDmode; 4718 4719 /* For OPEN VMS, emit the instruction setting up the argument register here, 4720 when we know this will be together with the other arguments setup related 4721 insns. This is not the conceptually best place to do this, but this is 4722 the easiest as we have convenient access to cumulative args info. */ 4723 4724 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node 4725 && named == 1) 4726 { 4727 unsigned HOST_WIDE_INT regval = cum->words; 4728 int i; 4729 4730 for (i = 0; i < 8; i++) 4731 regval |= ((int) cum->atypes[i]) << (i * 3 + 8); 4732 4733 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)), 4734 GEN_INT (regval)); 4735 } 4736 4737 /* If all argument slots are used, then it must go on the stack. */ 4738 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4739 return 0; 4740 4741 /* On OpenVMS argument is either in Rn or Fn. */ 4742 if (TARGET_ABI_OPEN_VMS) 4743 { 4744 if (FLOAT_MODE_P (mode)) 4745 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words); 4746 else 4747 return gen_rtx_REG (mode, basereg + cum->words); 4748 } 4749 4750 /* Check for and handle homogeneous FP aggregates. */ 4751 if (type) 4752 hfa_mode = hfa_element_mode (type, 0); 4753 4754 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4755 and unprototyped hfas are passed specially. */ 4756 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 4757 { 4758 rtx loc[16]; 4759 int i = 0; 4760 int fp_regs = cum->fp_regs; 4761 int int_regs = cum->words + offset; 4762 int hfa_size = GET_MODE_SIZE (hfa_mode); 4763 int byte_size; 4764 int args_byte_size; 4765 4766 /* If prototyped, pass it in FR regs then GR regs. 4767 If not prototyped, pass it in both FR and GR regs. 4768 4769 If this is an SFmode aggregate, then it is possible to run out of 4770 FR regs while GR regs are still left. In that case, we pass the 4771 remaining part in the GR regs. */ 4772 4773 /* Fill the FP regs. We do this always. We stop if we reach the end 4774 of the argument, the last FP register, or the last argument slot. */ 4775 4776 byte_size = ((mode == BLKmode) 4777 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4778 args_byte_size = int_regs * UNITS_PER_WORD; 4779 offset = 0; 4780 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 4781 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 4782 { 4783 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4784 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 4785 + fp_regs)), 4786 GEN_INT (offset)); 4787 offset += hfa_size; 4788 args_byte_size += hfa_size; 4789 fp_regs++; 4790 } 4791 4792 /* If no prototype, then the whole thing must go in GR regs. */ 4793 if (! cum->prototype) 4794 offset = 0; 4795 /* If this is an SFmode aggregate, then we might have some left over 4796 that needs to go in GR regs. */ 4797 else if (byte_size != offset) 4798 int_regs += offset / UNITS_PER_WORD; 4799 4800 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 4801 4802 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 4803 { 4804 machine_mode gr_mode = DImode; 4805 unsigned int gr_size; 4806 4807 /* If we have an odd 4 byte hunk because we ran out of FR regs, 4808 then this goes in a GR reg left adjusted/little endian, right 4809 adjusted/big endian. */ 4810 /* ??? Currently this is handled wrong, because 4-byte hunks are 4811 always right adjusted/little endian. */ 4812 if (offset & 0x4) 4813 gr_mode = SImode; 4814 /* If we have an even 4 byte hunk because the aggregate is a 4815 multiple of 4 bytes in size, then this goes in a GR reg right 4816 adjusted/little endian. */ 4817 else if (byte_size - offset == 4) 4818 gr_mode = SImode; 4819 4820 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4821 gen_rtx_REG (gr_mode, (basereg 4822 + int_regs)), 4823 GEN_INT (offset)); 4824 4825 gr_size = GET_MODE_SIZE (gr_mode); 4826 offset += gr_size; 4827 if (gr_size == UNITS_PER_WORD 4828 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0)) 4829 int_regs++; 4830 else if (gr_size > UNITS_PER_WORD) 4831 int_regs += gr_size / UNITS_PER_WORD; 4832 } 4833 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 4834 } 4835 4836 /* Integral and aggregates go in general registers. If we have run out of 4837 FR registers, then FP values must also go in general registers. This can 4838 happen when we have a SFmode HFA. */ 4839 else if (mode == TFmode || mode == TCmode 4840 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 4841 { 4842 int byte_size = ((mode == BLKmode) 4843 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4844 if (BYTES_BIG_ENDIAN 4845 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 4846 && byte_size < UNITS_PER_WORD 4847 && byte_size > 0) 4848 { 4849 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4850 gen_rtx_REG (DImode, 4851 (basereg + cum->words 4852 + offset)), 4853 const0_rtx); 4854 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 4855 } 4856 else 4857 return gen_rtx_REG (mode, basereg + cum->words + offset); 4858 4859 } 4860 4861 /* If there is a prototype, then FP values go in a FR register when 4862 named, and in a GR register when unnamed. */ 4863 else if (cum->prototype) 4864 { 4865 if (named) 4866 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 4867 /* In big-endian mode, an anonymous SFmode value must be represented 4868 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force 4869 the value into the high half of the general register. */ 4870 else if (BYTES_BIG_ENDIAN && mode == SFmode) 4871 return gen_rtx_PARALLEL (mode, 4872 gen_rtvec (1, 4873 gen_rtx_EXPR_LIST (VOIDmode, 4874 gen_rtx_REG (DImode, basereg + cum->words + offset), 4875 const0_rtx))); 4876 else 4877 return gen_rtx_REG (mode, basereg + cum->words + offset); 4878 } 4879 /* If there is no prototype, then FP values go in both FR and GR 4880 registers. */ 4881 else 4882 { 4883 /* See comment above. */ 4884 machine_mode inner_mode = 4885 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode; 4886 4887 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 4888 gen_rtx_REG (mode, (FR_ARG_FIRST 4889 + cum->fp_regs)), 4890 const0_rtx); 4891 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4892 gen_rtx_REG (inner_mode, 4893 (basereg + cum->words 4894 + offset)), 4895 const0_rtx); 4896 4897 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 4898 } 4899} 4900 4901/* Implement TARGET_FUNCION_ARG target hook. */ 4902 4903static rtx 4904ia64_function_arg (cumulative_args_t cum, machine_mode mode, 4905 const_tree type, bool named) 4906{ 4907 return ia64_function_arg_1 (cum, mode, type, named, false); 4908} 4909 4910/* Implement TARGET_FUNCION_INCOMING_ARG target hook. */ 4911 4912static rtx 4913ia64_function_incoming_arg (cumulative_args_t cum, 4914 machine_mode mode, 4915 const_tree type, bool named) 4916{ 4917 return ia64_function_arg_1 (cum, mode, type, named, true); 4918} 4919 4920/* Return number of bytes, at the beginning of the argument, that must be 4921 put in registers. 0 is the argument is entirely in registers or entirely 4922 in memory. */ 4923 4924static int 4925ia64_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, 4926 tree type, bool named ATTRIBUTE_UNUSED) 4927{ 4928 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4929 4930 int words = ia64_function_arg_words (type, mode); 4931 int offset = ia64_function_arg_offset (cum, type, words); 4932 4933 /* If all argument slots are used, then it must go on the stack. */ 4934 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4935 return 0; 4936 4937 /* It doesn't matter whether the argument goes in FR or GR regs. If 4938 it fits within the 8 argument slots, then it goes entirely in 4939 registers. If it extends past the last argument slot, then the rest 4940 goes on the stack. */ 4941 4942 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 4943 return 0; 4944 4945 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD; 4946} 4947 4948/* Return ivms_arg_type based on machine_mode. */ 4949 4950static enum ivms_arg_type 4951ia64_arg_type (machine_mode mode) 4952{ 4953 switch (mode) 4954 { 4955 case SFmode: 4956 return FS; 4957 case DFmode: 4958 return FT; 4959 default: 4960 return I64; 4961 } 4962} 4963 4964/* Update CUM to point after this argument. This is patterned after 4965 ia64_function_arg. */ 4966 4967static void 4968ia64_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 4969 const_tree type, bool named) 4970{ 4971 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4972 int words = ia64_function_arg_words (type, mode); 4973 int offset = ia64_function_arg_offset (cum, type, words); 4974 machine_mode hfa_mode = VOIDmode; 4975 4976 /* If all arg slots are already full, then there is nothing to do. */ 4977 if (cum->words >= MAX_ARGUMENT_SLOTS) 4978 { 4979 cum->words += words + offset; 4980 return; 4981 } 4982 4983 cum->atypes[cum->words] = ia64_arg_type (mode); 4984 cum->words += words + offset; 4985 4986 /* On OpenVMS argument is either in Rn or Fn. */ 4987 if (TARGET_ABI_OPEN_VMS) 4988 { 4989 cum->int_regs = cum->words; 4990 cum->fp_regs = cum->words; 4991 return; 4992 } 4993 4994 /* Check for and handle homogeneous FP aggregates. */ 4995 if (type) 4996 hfa_mode = hfa_element_mode (type, 0); 4997 4998 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4999 and unprototyped hfas are passed specially. */ 5000 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 5001 { 5002 int fp_regs = cum->fp_regs; 5003 /* This is the original value of cum->words + offset. */ 5004 int int_regs = cum->words - words; 5005 int hfa_size = GET_MODE_SIZE (hfa_mode); 5006 int byte_size; 5007 int args_byte_size; 5008 5009 /* If prototyped, pass it in FR regs then GR regs. 5010 If not prototyped, pass it in both FR and GR regs. 5011 5012 If this is an SFmode aggregate, then it is possible to run out of 5013 FR regs while GR regs are still left. In that case, we pass the 5014 remaining part in the GR regs. */ 5015 5016 /* Fill the FP regs. We do this always. We stop if we reach the end 5017 of the argument, the last FP register, or the last argument slot. */ 5018 5019 byte_size = ((mode == BLKmode) 5020 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 5021 args_byte_size = int_regs * UNITS_PER_WORD; 5022 offset = 0; 5023 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 5024 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 5025 { 5026 offset += hfa_size; 5027 args_byte_size += hfa_size; 5028 fp_regs++; 5029 } 5030 5031 cum->fp_regs = fp_regs; 5032 } 5033 5034 /* Integral and aggregates go in general registers. So do TFmode FP values. 5035 If we have run out of FR registers, then other FP values must also go in 5036 general registers. This can happen when we have a SFmode HFA. */ 5037 else if (mode == TFmode || mode == TCmode 5038 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 5039 cum->int_regs = cum->words; 5040 5041 /* If there is a prototype, then FP values go in a FR register when 5042 named, and in a GR register when unnamed. */ 5043 else if (cum->prototype) 5044 { 5045 if (! named) 5046 cum->int_regs = cum->words; 5047 else 5048 /* ??? Complex types should not reach here. */ 5049 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 5050 } 5051 /* If there is no prototype, then FP values go in both FR and GR 5052 registers. */ 5053 else 5054 { 5055 /* ??? Complex types should not reach here. */ 5056 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 5057 cum->int_regs = cum->words; 5058 } 5059} 5060 5061/* Arguments with alignment larger than 8 bytes start at the next even 5062 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary 5063 even though their normal alignment is 8 bytes. See ia64_function_arg. */ 5064 5065static unsigned int 5066ia64_function_arg_boundary (machine_mode mode, const_tree type) 5067{ 5068 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32) 5069 return PARM_BOUNDARY * 2; 5070 5071 if (type) 5072 { 5073 if (TYPE_ALIGN (type) > PARM_BOUNDARY) 5074 return PARM_BOUNDARY * 2; 5075 else 5076 return PARM_BOUNDARY; 5077 } 5078 5079 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY) 5080 return PARM_BOUNDARY * 2; 5081 else 5082 return PARM_BOUNDARY; 5083} 5084 5085/* True if it is OK to do sibling call optimization for the specified 5086 call expression EXP. DECL will be the called function, or NULL if 5087 this is an indirect call. */ 5088static bool 5089ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 5090{ 5091 /* We can't perform a sibcall if the current function has the syscall_linkage 5092 attribute. */ 5093 if (lookup_attribute ("syscall_linkage", 5094 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 5095 return false; 5096 5097 /* We must always return with our current GP. This means we can 5098 only sibcall to functions defined in the current module unless 5099 TARGET_CONST_GP is set to true. */ 5100 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP; 5101} 5102 5103 5104/* Implement va_arg. */ 5105 5106static tree 5107ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 5108 gimple_seq *post_p) 5109{ 5110 /* Variable sized types are passed by reference. */ 5111 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 5112 { 5113 tree ptrtype = build_pointer_type (type); 5114 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); 5115 return build_va_arg_indirect_ref (addr); 5116 } 5117 5118 /* Aggregate arguments with alignment larger than 8 bytes start at 5119 the next even boundary. Integer and floating point arguments 5120 do so if they are larger than 8 bytes, whether or not they are 5121 also aligned larger than 8 bytes. */ 5122 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE) 5123 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 5124 { 5125 tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1); 5126 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 5127 build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD)); 5128 gimplify_assign (unshare_expr (valist), t, pre_p); 5129 } 5130 5131 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 5132} 5133 5134/* Return 1 if function return value returned in memory. Return 0 if it is 5135 in a register. */ 5136 5137static bool 5138ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED) 5139{ 5140 machine_mode mode; 5141 machine_mode hfa_mode; 5142 HOST_WIDE_INT byte_size; 5143 5144 mode = TYPE_MODE (valtype); 5145 byte_size = GET_MODE_SIZE (mode); 5146 if (mode == BLKmode) 5147 { 5148 byte_size = int_size_in_bytes (valtype); 5149 if (byte_size < 0) 5150 return true; 5151 } 5152 5153 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 5154 5155 hfa_mode = hfa_element_mode (valtype, 0); 5156 if (hfa_mode != VOIDmode) 5157 { 5158 int hfa_size = GET_MODE_SIZE (hfa_mode); 5159 5160 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 5161 return true; 5162 else 5163 return false; 5164 } 5165 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 5166 return true; 5167 else 5168 return false; 5169} 5170 5171/* Return rtx for register that holds the function return value. */ 5172 5173static rtx 5174ia64_function_value (const_tree valtype, 5175 const_tree fn_decl_or_type, 5176 bool outgoing ATTRIBUTE_UNUSED) 5177{ 5178 machine_mode mode; 5179 machine_mode hfa_mode; 5180 int unsignedp; 5181 const_tree func = fn_decl_or_type; 5182 5183 if (fn_decl_or_type 5184 && !DECL_P (fn_decl_or_type)) 5185 func = NULL; 5186 5187 mode = TYPE_MODE (valtype); 5188 hfa_mode = hfa_element_mode (valtype, 0); 5189 5190 if (hfa_mode != VOIDmode) 5191 { 5192 rtx loc[8]; 5193 int i; 5194 int hfa_size; 5195 int byte_size; 5196 int offset; 5197 5198 hfa_size = GET_MODE_SIZE (hfa_mode); 5199 byte_size = ((mode == BLKmode) 5200 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 5201 offset = 0; 5202 for (i = 0; offset < byte_size; i++) 5203 { 5204 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 5205 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 5206 GEN_INT (offset)); 5207 offset += hfa_size; 5208 } 5209 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 5210 } 5211 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode) 5212 return gen_rtx_REG (mode, FR_ARG_FIRST); 5213 else 5214 { 5215 bool need_parallel = false; 5216 5217 /* In big-endian mode, we need to manage the layout of aggregates 5218 in the registers so that we get the bits properly aligned in 5219 the highpart of the registers. */ 5220 if (BYTES_BIG_ENDIAN 5221 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 5222 need_parallel = true; 5223 5224 /* Something like struct S { long double x; char a[0] } is not an 5225 HFA structure, and therefore doesn't go in fp registers. But 5226 the middle-end will give it XFmode anyway, and XFmode values 5227 don't normally fit in integer registers. So we need to smuggle 5228 the value inside a parallel. */ 5229 else if (mode == XFmode || mode == XCmode || mode == RFmode) 5230 need_parallel = true; 5231 5232 if (need_parallel) 5233 { 5234 rtx loc[8]; 5235 int offset; 5236 int bytesize; 5237 int i; 5238 5239 offset = 0; 5240 bytesize = int_size_in_bytes (valtype); 5241 /* An empty PARALLEL is invalid here, but the return value 5242 doesn't matter for empty structs. */ 5243 if (bytesize == 0) 5244 return gen_rtx_REG (mode, GR_RET_FIRST); 5245 for (i = 0; offset < bytesize; i++) 5246 { 5247 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 5248 gen_rtx_REG (DImode, 5249 GR_RET_FIRST + i), 5250 GEN_INT (offset)); 5251 offset += UNITS_PER_WORD; 5252 } 5253 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 5254 } 5255 5256 mode = promote_function_mode (valtype, mode, &unsignedp, 5257 func ? TREE_TYPE (func) : NULL_TREE, 5258 true); 5259 5260 return gen_rtx_REG (mode, GR_RET_FIRST); 5261 } 5262} 5263 5264/* Worker function for TARGET_LIBCALL_VALUE. */ 5265 5266static rtx 5267ia64_libcall_value (machine_mode mode, 5268 const_rtx fun ATTRIBUTE_UNUSED) 5269{ 5270 return gen_rtx_REG (mode, 5271 (((GET_MODE_CLASS (mode) == MODE_FLOAT 5272 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 5273 && (mode) != TFmode) 5274 ? FR_RET_FIRST : GR_RET_FIRST)); 5275} 5276 5277/* Worker function for FUNCTION_VALUE_REGNO_P. */ 5278 5279static bool 5280ia64_function_value_regno_p (const unsigned int regno) 5281{ 5282 return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST) 5283 || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST)); 5284} 5285 5286/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 5287 We need to emit DTP-relative relocations. */ 5288 5289static void 5290ia64_output_dwarf_dtprel (FILE *file, int size, rtx x) 5291{ 5292 gcc_assert (size == 4 || size == 8); 5293 if (size == 4) 5294 fputs ("\tdata4.ua\t@dtprel(", file); 5295 else 5296 fputs ("\tdata8.ua\t@dtprel(", file); 5297 output_addr_const (file, x); 5298 fputs (")", file); 5299} 5300 5301/* Print a memory address as an operand to reference that memory location. */ 5302 5303/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 5304 also call this from ia64_print_operand for memory addresses. */ 5305 5306static void 5307ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, 5308 rtx address ATTRIBUTE_UNUSED) 5309{ 5310} 5311 5312/* Print an operand to an assembler instruction. 5313 C Swap and print a comparison operator. 5314 D Print an FP comparison operator. 5315 E Print 32 - constant, for SImode shifts as extract. 5316 e Print 64 - constant, for DImode rotates. 5317 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 5318 a floating point register emitted normally. 5319 G A floating point constant. 5320 I Invert a predicate register by adding 1. 5321 J Select the proper predicate register for a condition. 5322 j Select the inverse predicate register for a condition. 5323 O Append .acq for volatile load. 5324 P Postincrement of a MEM. 5325 Q Append .rel for volatile store. 5326 R Print .s .d or nothing for a single, double or no truncation. 5327 S Shift amount for shladd instruction. 5328 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 5329 for Intel assembler. 5330 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 5331 for Intel assembler. 5332 X A pair of floating point registers. 5333 r Print register name, or constant 0 as r0. HP compatibility for 5334 Linux kernel. 5335 v Print vector constant value as an 8-byte integer value. */ 5336 5337static void 5338ia64_print_operand (FILE * file, rtx x, int code) 5339{ 5340 const char *str; 5341 5342 switch (code) 5343 { 5344 case 0: 5345 /* Handled below. */ 5346 break; 5347 5348 case 'C': 5349 { 5350 enum rtx_code c = swap_condition (GET_CODE (x)); 5351 fputs (GET_RTX_NAME (c), file); 5352 return; 5353 } 5354 5355 case 'D': 5356 switch (GET_CODE (x)) 5357 { 5358 case NE: 5359 str = "neq"; 5360 break; 5361 case UNORDERED: 5362 str = "unord"; 5363 break; 5364 case ORDERED: 5365 str = "ord"; 5366 break; 5367 case UNLT: 5368 str = "nge"; 5369 break; 5370 case UNLE: 5371 str = "ngt"; 5372 break; 5373 case UNGT: 5374 str = "nle"; 5375 break; 5376 case UNGE: 5377 str = "nlt"; 5378 break; 5379 case UNEQ: 5380 case LTGT: 5381 gcc_unreachable (); 5382 default: 5383 str = GET_RTX_NAME (GET_CODE (x)); 5384 break; 5385 } 5386 fputs (str, file); 5387 return; 5388 5389 case 'E': 5390 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 5391 return; 5392 5393 case 'e': 5394 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 5395 return; 5396 5397 case 'F': 5398 if (x == CONST0_RTX (GET_MODE (x))) 5399 str = reg_names [FR_REG (0)]; 5400 else if (x == CONST1_RTX (GET_MODE (x))) 5401 str = reg_names [FR_REG (1)]; 5402 else 5403 { 5404 gcc_assert (GET_CODE (x) == REG); 5405 str = reg_names [REGNO (x)]; 5406 } 5407 fputs (str, file); 5408 return; 5409 5410 case 'G': 5411 { 5412 long val[4]; 5413 REAL_VALUE_TYPE rv; 5414 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 5415 real_to_target (val, &rv, GET_MODE (x)); 5416 if (GET_MODE (x) == SFmode) 5417 fprintf (file, "0x%08lx", val[0] & 0xffffffff); 5418 else if (GET_MODE (x) == DFmode) 5419 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1]) 5420 & 0xffffffff, 5421 (WORDS_BIG_ENDIAN ? val[1] : val[0]) 5422 & 0xffffffff); 5423 else 5424 output_operand_lossage ("invalid %%G mode"); 5425 } 5426 return; 5427 5428 case 'I': 5429 fputs (reg_names [REGNO (x) + 1], file); 5430 return; 5431 5432 case 'J': 5433 case 'j': 5434 { 5435 unsigned int regno = REGNO (XEXP (x, 0)); 5436 if (GET_CODE (x) == EQ) 5437 regno += 1; 5438 if (code == 'j') 5439 regno ^= 1; 5440 fputs (reg_names [regno], file); 5441 } 5442 return; 5443 5444 case 'O': 5445 if (MEM_VOLATILE_P (x)) 5446 fputs(".acq", file); 5447 return; 5448 5449 case 'P': 5450 { 5451 HOST_WIDE_INT value; 5452 5453 switch (GET_CODE (XEXP (x, 0))) 5454 { 5455 default: 5456 return; 5457 5458 case POST_MODIFY: 5459 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 5460 if (GET_CODE (x) == CONST_INT) 5461 value = INTVAL (x); 5462 else 5463 { 5464 gcc_assert (GET_CODE (x) == REG); 5465 fprintf (file, ", %s", reg_names[REGNO (x)]); 5466 return; 5467 } 5468 break; 5469 5470 case POST_INC: 5471 value = GET_MODE_SIZE (GET_MODE (x)); 5472 break; 5473 5474 case POST_DEC: 5475 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 5476 break; 5477 } 5478 5479 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value); 5480 return; 5481 } 5482 5483 case 'Q': 5484 if (MEM_VOLATILE_P (x)) 5485 fputs(".rel", file); 5486 return; 5487 5488 case 'R': 5489 if (x == CONST0_RTX (GET_MODE (x))) 5490 fputs(".s", file); 5491 else if (x == CONST1_RTX (GET_MODE (x))) 5492 fputs(".d", file); 5493 else if (x == CONST2_RTX (GET_MODE (x))) 5494 ; 5495 else 5496 output_operand_lossage ("invalid %%R value"); 5497 return; 5498 5499 case 'S': 5500 fprintf (file, "%d", exact_log2 (INTVAL (x))); 5501 return; 5502 5503 case 'T': 5504 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 5505 { 5506 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 5507 return; 5508 } 5509 break; 5510 5511 case 'U': 5512 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 5513 { 5514 const char *prefix = "0x"; 5515 if (INTVAL (x) & 0x80000000) 5516 { 5517 fprintf (file, "0xffffffff"); 5518 prefix = ""; 5519 } 5520 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 5521 return; 5522 } 5523 break; 5524 5525 case 'X': 5526 { 5527 unsigned int regno = REGNO (x); 5528 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]); 5529 } 5530 return; 5531 5532 case 'r': 5533 /* If this operand is the constant zero, write it as register zero. 5534 Any register, zero, or CONST_INT value is OK here. */ 5535 if (GET_CODE (x) == REG) 5536 fputs (reg_names[REGNO (x)], file); 5537 else if (x == CONST0_RTX (GET_MODE (x))) 5538 fputs ("r0", file); 5539 else if (GET_CODE (x) == CONST_INT) 5540 output_addr_const (file, x); 5541 else 5542 output_operand_lossage ("invalid %%r value"); 5543 return; 5544 5545 case 'v': 5546 gcc_assert (GET_CODE (x) == CONST_VECTOR); 5547 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 5548 break; 5549 5550 case '+': 5551 { 5552 const char *which; 5553 5554 /* For conditional branches, returns or calls, substitute 5555 sptk, dptk, dpnt, or spnt for %s. */ 5556 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 5557 if (x) 5558 { 5559 int pred_val = XINT (x, 0); 5560 5561 /* Guess top and bottom 10% statically predicted. */ 5562 if (pred_val < REG_BR_PROB_BASE / 50 5563 && br_prob_note_reliable_p (x)) 5564 which = ".spnt"; 5565 else if (pred_val < REG_BR_PROB_BASE / 2) 5566 which = ".dpnt"; 5567 else if (pred_val < REG_BR_PROB_BASE / 100 * 98 5568 || !br_prob_note_reliable_p (x)) 5569 which = ".dptk"; 5570 else 5571 which = ".sptk"; 5572 } 5573 else if (CALL_P (current_output_insn)) 5574 which = ".sptk"; 5575 else 5576 which = ".dptk"; 5577 5578 fputs (which, file); 5579 return; 5580 } 5581 5582 case ',': 5583 x = current_insn_predicate; 5584 if (x) 5585 { 5586 unsigned int regno = REGNO (XEXP (x, 0)); 5587 if (GET_CODE (x) == EQ) 5588 regno += 1; 5589 fprintf (file, "(%s) ", reg_names [regno]); 5590 } 5591 return; 5592 5593 default: 5594 output_operand_lossage ("ia64_print_operand: unknown code"); 5595 return; 5596 } 5597 5598 switch (GET_CODE (x)) 5599 { 5600 /* This happens for the spill/restore instructions. */ 5601 case POST_INC: 5602 case POST_DEC: 5603 case POST_MODIFY: 5604 x = XEXP (x, 0); 5605 /* ... fall through ... */ 5606 5607 case REG: 5608 fputs (reg_names [REGNO (x)], file); 5609 break; 5610 5611 case MEM: 5612 { 5613 rtx addr = XEXP (x, 0); 5614 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) 5615 addr = XEXP (addr, 0); 5616 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 5617 break; 5618 } 5619 5620 default: 5621 output_addr_const (file, x); 5622 break; 5623 } 5624 5625 return; 5626} 5627 5628/* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 5629 5630static bool 5631ia64_print_operand_punct_valid_p (unsigned char code) 5632{ 5633 return (code == '+' || code == ','); 5634} 5635 5636/* Compute a (partial) cost for rtx X. Return true if the complete 5637 cost has been computed, and false if subexpressions should be 5638 scanned. In either case, *TOTAL contains the cost result. */ 5639/* ??? This is incomplete. */ 5640 5641static bool 5642ia64_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 5643 int *total, bool speed ATTRIBUTE_UNUSED) 5644{ 5645 switch (code) 5646 { 5647 case CONST_INT: 5648 switch (outer_code) 5649 { 5650 case SET: 5651 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1); 5652 return true; 5653 case PLUS: 5654 if (satisfies_constraint_I (x)) 5655 *total = 0; 5656 else if (satisfies_constraint_J (x)) 5657 *total = 1; 5658 else 5659 *total = COSTS_N_INSNS (1); 5660 return true; 5661 default: 5662 if (satisfies_constraint_K (x) || satisfies_constraint_L (x)) 5663 *total = 0; 5664 else 5665 *total = COSTS_N_INSNS (1); 5666 return true; 5667 } 5668 5669 case CONST_DOUBLE: 5670 *total = COSTS_N_INSNS (1); 5671 return true; 5672 5673 case CONST: 5674 case SYMBOL_REF: 5675 case LABEL_REF: 5676 *total = COSTS_N_INSNS (3); 5677 return true; 5678 5679 case FMA: 5680 *total = COSTS_N_INSNS (4); 5681 return true; 5682 5683 case MULT: 5684 /* For multiplies wider than HImode, we have to go to the FPU, 5685 which normally involves copies. Plus there's the latency 5686 of the multiply itself, and the latency of the instructions to 5687 transfer integer regs to FP regs. */ 5688 if (FLOAT_MODE_P (GET_MODE (x))) 5689 *total = COSTS_N_INSNS (4); 5690 else if (GET_MODE_SIZE (GET_MODE (x)) > 2) 5691 *total = COSTS_N_INSNS (10); 5692 else 5693 *total = COSTS_N_INSNS (2); 5694 return true; 5695 5696 case PLUS: 5697 case MINUS: 5698 if (FLOAT_MODE_P (GET_MODE (x))) 5699 { 5700 *total = COSTS_N_INSNS (4); 5701 return true; 5702 } 5703 /* FALLTHRU */ 5704 5705 case ASHIFT: 5706 case ASHIFTRT: 5707 case LSHIFTRT: 5708 *total = COSTS_N_INSNS (1); 5709 return true; 5710 5711 case DIV: 5712 case UDIV: 5713 case MOD: 5714 case UMOD: 5715 /* We make divide expensive, so that divide-by-constant will be 5716 optimized to a multiply. */ 5717 *total = COSTS_N_INSNS (60); 5718 return true; 5719 5720 default: 5721 return false; 5722 } 5723} 5724 5725/* Calculate the cost of moving data from a register in class FROM to 5726 one in class TO, using MODE. */ 5727 5728static int 5729ia64_register_move_cost (machine_mode mode, reg_class_t from, 5730 reg_class_t to) 5731{ 5732 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 5733 if (to == ADDL_REGS) 5734 to = GR_REGS; 5735 if (from == ADDL_REGS) 5736 from = GR_REGS; 5737 5738 /* All costs are symmetric, so reduce cases by putting the 5739 lower number class as the destination. */ 5740 if (from < to) 5741 { 5742 reg_class_t tmp = to; 5743 to = from, from = tmp; 5744 } 5745 5746 /* Moving from FR<->GR in XFmode must be more expensive than 2, 5747 so that we get secondary memory reloads. Between FR_REGS, 5748 we have to make this at least as expensive as memory_move_cost 5749 to avoid spectacularly poor register class preferencing. */ 5750 if (mode == XFmode || mode == RFmode) 5751 { 5752 if (to != GR_REGS || from != GR_REGS) 5753 return memory_move_cost (mode, to, false); 5754 else 5755 return 3; 5756 } 5757 5758 switch (to) 5759 { 5760 case PR_REGS: 5761 /* Moving between PR registers takes two insns. */ 5762 if (from == PR_REGS) 5763 return 3; 5764 /* Moving between PR and anything but GR is impossible. */ 5765 if (from != GR_REGS) 5766 return memory_move_cost (mode, to, false); 5767 break; 5768 5769 case BR_REGS: 5770 /* Moving between BR and anything but GR is impossible. */ 5771 if (from != GR_REGS && from != GR_AND_BR_REGS) 5772 return memory_move_cost (mode, to, false); 5773 break; 5774 5775 case AR_I_REGS: 5776 case AR_M_REGS: 5777 /* Moving between AR and anything but GR is impossible. */ 5778 if (from != GR_REGS) 5779 return memory_move_cost (mode, to, false); 5780 break; 5781 5782 case GR_REGS: 5783 case FR_REGS: 5784 case FP_REGS: 5785 case GR_AND_FR_REGS: 5786 case GR_AND_BR_REGS: 5787 case ALL_REGS: 5788 break; 5789 5790 default: 5791 gcc_unreachable (); 5792 } 5793 5794 return 2; 5795} 5796 5797/* Calculate the cost of moving data of MODE from a register to or from 5798 memory. */ 5799 5800static int 5801ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 5802 reg_class_t rclass, 5803 bool in ATTRIBUTE_UNUSED) 5804{ 5805 if (rclass == GENERAL_REGS 5806 || rclass == FR_REGS 5807 || rclass == FP_REGS 5808 || rclass == GR_AND_FR_REGS) 5809 return 4; 5810 else 5811 return 10; 5812} 5813 5814/* Implement TARGET_PREFERRED_RELOAD_CLASS. Place additional restrictions 5815 on RCLASS to use when copying X into that class. */ 5816 5817static reg_class_t 5818ia64_preferred_reload_class (rtx x, reg_class_t rclass) 5819{ 5820 switch (rclass) 5821 { 5822 case FR_REGS: 5823 case FP_REGS: 5824 /* Don't allow volatile mem reloads into floating point registers. 5825 This is defined to force reload to choose the r/m case instead 5826 of the f/f case when reloading (set (reg fX) (mem/v)). */ 5827 if (MEM_P (x) && MEM_VOLATILE_P (x)) 5828 return NO_REGS; 5829 5830 /* Force all unrecognized constants into the constant pool. */ 5831 if (CONSTANT_P (x)) 5832 return NO_REGS; 5833 break; 5834 5835 case AR_M_REGS: 5836 case AR_I_REGS: 5837 if (!OBJECT_P (x)) 5838 return NO_REGS; 5839 break; 5840 5841 default: 5842 break; 5843 } 5844 5845 return rclass; 5846} 5847 5848/* This function returns the register class required for a secondary 5849 register when copying between one of the registers in RCLASS, and X, 5850 using MODE. A return value of NO_REGS means that no secondary register 5851 is required. */ 5852 5853enum reg_class 5854ia64_secondary_reload_class (enum reg_class rclass, 5855 machine_mode mode ATTRIBUTE_UNUSED, rtx x) 5856{ 5857 int regno = -1; 5858 5859 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 5860 regno = true_regnum (x); 5861 5862 switch (rclass) 5863 { 5864 case BR_REGS: 5865 case AR_M_REGS: 5866 case AR_I_REGS: 5867 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 5868 interaction. We end up with two pseudos with overlapping lifetimes 5869 both of which are equiv to the same constant, and both which need 5870 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 5871 changes depending on the path length, which means the qty_first_reg 5872 check in make_regs_eqv can give different answers at different times. 5873 At some point I'll probably need a reload_indi pattern to handle 5874 this. 5875 5876 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 5877 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 5878 non-general registers for good measure. */ 5879 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 5880 return GR_REGS; 5881 5882 /* This is needed if a pseudo used as a call_operand gets spilled to a 5883 stack slot. */ 5884 if (GET_CODE (x) == MEM) 5885 return GR_REGS; 5886 break; 5887 5888 case FR_REGS: 5889 case FP_REGS: 5890 /* Need to go through general registers to get to other class regs. */ 5891 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 5892 return GR_REGS; 5893 5894 /* This can happen when a paradoxical subreg is an operand to the 5895 muldi3 pattern. */ 5896 /* ??? This shouldn't be necessary after instruction scheduling is 5897 enabled, because paradoxical subregs are not accepted by 5898 register_operand when INSN_SCHEDULING is defined. Or alternatively, 5899 stop the paradoxical subreg stupidity in the *_operand functions 5900 in recog.c. */ 5901 if (GET_CODE (x) == MEM 5902 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 5903 || GET_MODE (x) == QImode)) 5904 return GR_REGS; 5905 5906 /* This can happen because of the ior/and/etc patterns that accept FP 5907 registers as operands. If the third operand is a constant, then it 5908 needs to be reloaded into a FP register. */ 5909 if (GET_CODE (x) == CONST_INT) 5910 return GR_REGS; 5911 5912 /* This can happen because of register elimination in a muldi3 insn. 5913 E.g. `26107 * (unsigned long)&u'. */ 5914 if (GET_CODE (x) == PLUS) 5915 return GR_REGS; 5916 break; 5917 5918 case PR_REGS: 5919 /* ??? This happens if we cse/gcse a BImode value across a call, 5920 and the function has a nonlocal goto. This is because global 5921 does not allocate call crossing pseudos to hard registers when 5922 crtl->has_nonlocal_goto is true. This is relatively 5923 common for C++ programs that use exceptions. To reproduce, 5924 return NO_REGS and compile libstdc++. */ 5925 if (GET_CODE (x) == MEM) 5926 return GR_REGS; 5927 5928 /* This can happen when we take a BImode subreg of a DImode value, 5929 and that DImode value winds up in some non-GR register. */ 5930 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 5931 return GR_REGS; 5932 break; 5933 5934 default: 5935 break; 5936 } 5937 5938 return NO_REGS; 5939} 5940 5941 5942/* Implement targetm.unspec_may_trap_p hook. */ 5943static int 5944ia64_unspec_may_trap_p (const_rtx x, unsigned flags) 5945{ 5946 switch (XINT (x, 1)) 5947 { 5948 case UNSPEC_LDA: 5949 case UNSPEC_LDS: 5950 case UNSPEC_LDSA: 5951 case UNSPEC_LDCCLR: 5952 case UNSPEC_CHKACLR: 5953 case UNSPEC_CHKS: 5954 /* These unspecs are just wrappers. */ 5955 return may_trap_p_1 (XVECEXP (x, 0, 0), flags); 5956 } 5957 5958 return default_unspec_may_trap_p (x, flags); 5959} 5960 5961 5962/* Parse the -mfixed-range= option string. */ 5963 5964static void 5965fix_range (const char *const_str) 5966{ 5967 int i, first, last; 5968 char *str, *dash, *comma; 5969 5970 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 5971 REG2 are either register names or register numbers. The effect 5972 of this option is to mark the registers in the range from REG1 to 5973 REG2 as ``fixed'' so they won't be used by the compiler. This is 5974 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 5975 5976 i = strlen (const_str); 5977 str = (char *) alloca (i + 1); 5978 memcpy (str, const_str, i + 1); 5979 5980 while (1) 5981 { 5982 dash = strchr (str, '-'); 5983 if (!dash) 5984 { 5985 warning (0, "value of -mfixed-range must have form REG1-REG2"); 5986 return; 5987 } 5988 *dash = '\0'; 5989 5990 comma = strchr (dash + 1, ','); 5991 if (comma) 5992 *comma = '\0'; 5993 5994 first = decode_reg_name (str); 5995 if (first < 0) 5996 { 5997 warning (0, "unknown register name: %s", str); 5998 return; 5999 } 6000 6001 last = decode_reg_name (dash + 1); 6002 if (last < 0) 6003 { 6004 warning (0, "unknown register name: %s", dash + 1); 6005 return; 6006 } 6007 6008 *dash = '-'; 6009 6010 if (first > last) 6011 { 6012 warning (0, "%s-%s is an empty range", str, dash + 1); 6013 return; 6014 } 6015 6016 for (i = first; i <= last; ++i) 6017 fixed_regs[i] = call_used_regs[i] = 1; 6018 6019 if (!comma) 6020 break; 6021 6022 *comma = ','; 6023 str = comma + 1; 6024 } 6025} 6026 6027/* Implement TARGET_OPTION_OVERRIDE. */ 6028 6029static void 6030ia64_option_override (void) 6031{ 6032 unsigned int i; 6033 cl_deferred_option *opt; 6034 vec<cl_deferred_option> *v 6035 = (vec<cl_deferred_option> *) ia64_deferred_options; 6036 6037 if (v) 6038 FOR_EACH_VEC_ELT (*v, i, opt) 6039 { 6040 switch (opt->opt_index) 6041 { 6042 case OPT_mfixed_range_: 6043 fix_range (opt->arg); 6044 break; 6045 6046 default: 6047 gcc_unreachable (); 6048 } 6049 } 6050 6051 if (TARGET_AUTO_PIC) 6052 target_flags |= MASK_CONST_GP; 6053 6054 /* Numerous experiment shows that IRA based loop pressure 6055 calculation works better for RTL loop invariant motion on targets 6056 with enough (>= 32) registers. It is an expensive optimization. 6057 So it is on only for peak performance. */ 6058 if (optimize >= 3) 6059 flag_ira_loop_pressure = 1; 6060 6061 6062 ia64_section_threshold = (global_options_set.x_g_switch_value 6063 ? g_switch_value 6064 : IA64_DEFAULT_GVALUE); 6065 6066 init_machine_status = ia64_init_machine_status; 6067 6068 if (align_functions <= 0) 6069 align_functions = 64; 6070 if (align_loops <= 0) 6071 align_loops = 32; 6072 if (TARGET_ABI_OPEN_VMS) 6073 flag_no_common = 1; 6074 6075 ia64_override_options_after_change(); 6076} 6077 6078/* Implement targetm.override_options_after_change. */ 6079 6080static void 6081ia64_override_options_after_change (void) 6082{ 6083 if (optimize >= 3 6084 && !global_options_set.x_flag_selective_scheduling 6085 && !global_options_set.x_flag_selective_scheduling2) 6086 { 6087 flag_selective_scheduling2 = 1; 6088 flag_sel_sched_pipelining = 1; 6089 } 6090 if (mflag_sched_control_spec == 2) 6091 { 6092 /* Control speculation is on by default for the selective scheduler, 6093 but not for the Haifa scheduler. */ 6094 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0; 6095 } 6096 if (flag_sel_sched_pipelining && flag_auto_inc_dec) 6097 { 6098 /* FIXME: remove this when we'd implement breaking autoinsns as 6099 a transformation. */ 6100 flag_auto_inc_dec = 0; 6101 } 6102} 6103 6104/* Initialize the record of emitted frame related registers. */ 6105 6106void ia64_init_expanders (void) 6107{ 6108 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs)); 6109} 6110 6111static struct machine_function * 6112ia64_init_machine_status (void) 6113{ 6114 return ggc_cleared_alloc<machine_function> (); 6115} 6116 6117static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *); 6118static enum attr_type ia64_safe_type (rtx_insn *); 6119 6120static enum attr_itanium_class 6121ia64_safe_itanium_class (rtx_insn *insn) 6122{ 6123 if (recog_memoized (insn) >= 0) 6124 return get_attr_itanium_class (insn); 6125 else if (DEBUG_INSN_P (insn)) 6126 return ITANIUM_CLASS_IGNORE; 6127 else 6128 return ITANIUM_CLASS_UNKNOWN; 6129} 6130 6131static enum attr_type 6132ia64_safe_type (rtx_insn *insn) 6133{ 6134 if (recog_memoized (insn) >= 0) 6135 return get_attr_type (insn); 6136 else 6137 return TYPE_UNKNOWN; 6138} 6139 6140/* The following collection of routines emit instruction group stop bits as 6141 necessary to avoid dependencies. */ 6142 6143/* Need to track some additional registers as far as serialization is 6144 concerned so we can properly handle br.call and br.ret. We could 6145 make these registers visible to gcc, but since these registers are 6146 never explicitly used in gcc generated code, it seems wasteful to 6147 do so (plus it would make the call and return patterns needlessly 6148 complex). */ 6149#define REG_RP (BR_REG (0)) 6150#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 6151/* This is used for volatile asms which may require a stop bit immediately 6152 before and after them. */ 6153#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 6154#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 6155#define NUM_REGS (AR_UNAT_BIT_0 + 64) 6156 6157/* For each register, we keep track of how it has been written in the 6158 current instruction group. 6159 6160 If a register is written unconditionally (no qualifying predicate), 6161 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 6162 6163 If a register is written if its qualifying predicate P is true, we 6164 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 6165 may be written again by the complement of P (P^1) and when this happens, 6166 WRITE_COUNT gets set to 2. 6167 6168 The result of this is that whenever an insn attempts to write a register 6169 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 6170 6171 If a predicate register is written by a floating-point insn, we set 6172 WRITTEN_BY_FP to true. 6173 6174 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 6175 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 6176 6177#if GCC_VERSION >= 4000 6178#define RWS_FIELD_TYPE __extension__ unsigned short 6179#else 6180#define RWS_FIELD_TYPE unsigned int 6181#endif 6182struct reg_write_state 6183{ 6184 RWS_FIELD_TYPE write_count : 2; 6185 RWS_FIELD_TYPE first_pred : 10; 6186 RWS_FIELD_TYPE written_by_fp : 1; 6187 RWS_FIELD_TYPE written_by_and : 1; 6188 RWS_FIELD_TYPE written_by_or : 1; 6189}; 6190 6191/* Cumulative info for the current instruction group. */ 6192struct reg_write_state rws_sum[NUM_REGS]; 6193#ifdef ENABLE_CHECKING 6194/* Bitmap whether a register has been written in the current insn. */ 6195HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1) 6196 / HOST_BITS_PER_WIDEST_FAST_INT]; 6197 6198static inline void 6199rws_insn_set (int regno) 6200{ 6201 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno)); 6202 SET_HARD_REG_BIT (rws_insn, regno); 6203} 6204 6205static inline int 6206rws_insn_test (int regno) 6207{ 6208 return TEST_HARD_REG_BIT (rws_insn, regno); 6209} 6210#else 6211/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */ 6212unsigned char rws_insn[2]; 6213 6214static inline void 6215rws_insn_set (int regno) 6216{ 6217 if (regno == REG_AR_CFM) 6218 rws_insn[0] = 1; 6219 else if (regno == REG_VOLATILE) 6220 rws_insn[1] = 1; 6221} 6222 6223static inline int 6224rws_insn_test (int regno) 6225{ 6226 if (regno == REG_AR_CFM) 6227 return rws_insn[0]; 6228 if (regno == REG_VOLATILE) 6229 return rws_insn[1]; 6230 return 0; 6231} 6232#endif 6233 6234/* Indicates whether this is the first instruction after a stop bit, 6235 in which case we don't need another stop bit. Without this, 6236 ia64_variable_issue will die when scheduling an alloc. */ 6237static int first_instruction; 6238 6239/* Misc flags needed to compute RAW/WAW dependencies while we are traversing 6240 RTL for one instruction. */ 6241struct reg_flags 6242{ 6243 unsigned int is_write : 1; /* Is register being written? */ 6244 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 6245 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 6246 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 6247 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 6248 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 6249}; 6250 6251static void rws_update (int, struct reg_flags, int); 6252static int rws_access_regno (int, struct reg_flags, int); 6253static int rws_access_reg (rtx, struct reg_flags, int); 6254static void update_set_flags (rtx, struct reg_flags *); 6255static int set_src_needs_barrier (rtx, struct reg_flags, int); 6256static int rtx_needs_barrier (rtx, struct reg_flags, int); 6257static void init_insn_group_barriers (void); 6258static int group_barrier_needed (rtx_insn *); 6259static int safe_group_barrier_needed (rtx_insn *); 6260static int in_safe_group_barrier; 6261 6262/* Update *RWS for REGNO, which is being written by the current instruction, 6263 with predicate PRED, and associated register flags in FLAGS. */ 6264 6265static void 6266rws_update (int regno, struct reg_flags flags, int pred) 6267{ 6268 if (pred) 6269 rws_sum[regno].write_count++; 6270 else 6271 rws_sum[regno].write_count = 2; 6272 rws_sum[regno].written_by_fp |= flags.is_fp; 6273 /* ??? Not tracking and/or across differing predicates. */ 6274 rws_sum[regno].written_by_and = flags.is_and; 6275 rws_sum[regno].written_by_or = flags.is_or; 6276 rws_sum[regno].first_pred = pred; 6277} 6278 6279/* Handle an access to register REGNO of type FLAGS using predicate register 6280 PRED. Update rws_sum array. Return 1 if this access creates 6281 a dependency with an earlier instruction in the same group. */ 6282 6283static int 6284rws_access_regno (int regno, struct reg_flags flags, int pred) 6285{ 6286 int need_barrier = 0; 6287 6288 gcc_assert (regno < NUM_REGS); 6289 6290 if (! PR_REGNO_P (regno)) 6291 flags.is_and = flags.is_or = 0; 6292 6293 if (flags.is_write) 6294 { 6295 int write_count; 6296 6297 rws_insn_set (regno); 6298 write_count = rws_sum[regno].write_count; 6299 6300 switch (write_count) 6301 { 6302 case 0: 6303 /* The register has not been written yet. */ 6304 if (!in_safe_group_barrier) 6305 rws_update (regno, flags, pred); 6306 break; 6307 6308 case 1: 6309 /* The register has been written via a predicate. Treat 6310 it like a unconditional write and do not try to check 6311 for complementary pred reg in earlier write. */ 6312 if (flags.is_and && rws_sum[regno].written_by_and) 6313 ; 6314 else if (flags.is_or && rws_sum[regno].written_by_or) 6315 ; 6316 else 6317 need_barrier = 1; 6318 if (!in_safe_group_barrier) 6319 rws_update (regno, flags, pred); 6320 break; 6321 6322 case 2: 6323 /* The register has been unconditionally written already. We 6324 need a barrier. */ 6325 if (flags.is_and && rws_sum[regno].written_by_and) 6326 ; 6327 else if (flags.is_or && rws_sum[regno].written_by_or) 6328 ; 6329 else 6330 need_barrier = 1; 6331 if (!in_safe_group_barrier) 6332 { 6333 rws_sum[regno].written_by_and = flags.is_and; 6334 rws_sum[regno].written_by_or = flags.is_or; 6335 } 6336 break; 6337 6338 default: 6339 gcc_unreachable (); 6340 } 6341 } 6342 else 6343 { 6344 if (flags.is_branch) 6345 { 6346 /* Branches have several RAW exceptions that allow to avoid 6347 barriers. */ 6348 6349 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 6350 /* RAW dependencies on branch regs are permissible as long 6351 as the writer is a non-branch instruction. Since we 6352 never generate code that uses a branch register written 6353 by a branch instruction, handling this case is 6354 easy. */ 6355 return 0; 6356 6357 if (REGNO_REG_CLASS (regno) == PR_REGS 6358 && ! rws_sum[regno].written_by_fp) 6359 /* The predicates of a branch are available within the 6360 same insn group as long as the predicate was written by 6361 something other than a floating-point instruction. */ 6362 return 0; 6363 } 6364 6365 if (flags.is_and && rws_sum[regno].written_by_and) 6366 return 0; 6367 if (flags.is_or && rws_sum[regno].written_by_or) 6368 return 0; 6369 6370 switch (rws_sum[regno].write_count) 6371 { 6372 case 0: 6373 /* The register has not been written yet. */ 6374 break; 6375 6376 case 1: 6377 /* The register has been written via a predicate, assume we 6378 need a barrier (don't check for complementary regs). */ 6379 need_barrier = 1; 6380 break; 6381 6382 case 2: 6383 /* The register has been unconditionally written already. We 6384 need a barrier. */ 6385 need_barrier = 1; 6386 break; 6387 6388 default: 6389 gcc_unreachable (); 6390 } 6391 } 6392 6393 return need_barrier; 6394} 6395 6396static int 6397rws_access_reg (rtx reg, struct reg_flags flags, int pred) 6398{ 6399 int regno = REGNO (reg); 6400 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 6401 6402 if (n == 1) 6403 return rws_access_regno (regno, flags, pred); 6404 else 6405 { 6406 int need_barrier = 0; 6407 while (--n >= 0) 6408 need_barrier |= rws_access_regno (regno + n, flags, pred); 6409 return need_barrier; 6410 } 6411} 6412 6413/* Examine X, which is a SET rtx, and update the flags, the predicate, and 6414 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 6415 6416static void 6417update_set_flags (rtx x, struct reg_flags *pflags) 6418{ 6419 rtx src = SET_SRC (x); 6420 6421 switch (GET_CODE (src)) 6422 { 6423 case CALL: 6424 return; 6425 6426 case IF_THEN_ELSE: 6427 /* There are four cases here: 6428 (1) The destination is (pc), in which case this is a branch, 6429 nothing here applies. 6430 (2) The destination is ar.lc, in which case this is a 6431 doloop_end_internal, 6432 (3) The destination is an fp register, in which case this is 6433 an fselect instruction. 6434 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 6435 this is a check load. 6436 In all cases, nothing we do in this function applies. */ 6437 return; 6438 6439 default: 6440 if (COMPARISON_P (src) 6441 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0)))) 6442 /* Set pflags->is_fp to 1 so that we know we're dealing 6443 with a floating point comparison when processing the 6444 destination of the SET. */ 6445 pflags->is_fp = 1; 6446 6447 /* Discover if this is a parallel comparison. We only handle 6448 and.orcm and or.andcm at present, since we must retain a 6449 strict inverse on the predicate pair. */ 6450 else if (GET_CODE (src) == AND) 6451 pflags->is_and = 1; 6452 else if (GET_CODE (src) == IOR) 6453 pflags->is_or = 1; 6454 6455 break; 6456 } 6457} 6458 6459/* Subroutine of rtx_needs_barrier; this function determines whether the 6460 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 6461 are as in rtx_needs_barrier. COND is an rtx that holds the condition 6462 for this insn. */ 6463 6464static int 6465set_src_needs_barrier (rtx x, struct reg_flags flags, int pred) 6466{ 6467 int need_barrier = 0; 6468 rtx dst; 6469 rtx src = SET_SRC (x); 6470 6471 if (GET_CODE (src) == CALL) 6472 /* We don't need to worry about the result registers that 6473 get written by subroutine call. */ 6474 return rtx_needs_barrier (src, flags, pred); 6475 else if (SET_DEST (x) == pc_rtx) 6476 { 6477 /* X is a conditional branch. */ 6478 /* ??? This seems redundant, as the caller sets this bit for 6479 all JUMP_INSNs. */ 6480 if (!ia64_spec_check_src_p (src)) 6481 flags.is_branch = 1; 6482 return rtx_needs_barrier (src, flags, pred); 6483 } 6484 6485 if (ia64_spec_check_src_p (src)) 6486 /* Avoid checking one register twice (in condition 6487 and in 'then' section) for ldc pattern. */ 6488 { 6489 gcc_assert (REG_P (XEXP (src, 2))); 6490 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred); 6491 6492 /* We process MEM below. */ 6493 src = XEXP (src, 1); 6494 } 6495 6496 need_barrier |= rtx_needs_barrier (src, flags, pred); 6497 6498 dst = SET_DEST (x); 6499 if (GET_CODE (dst) == ZERO_EXTRACT) 6500 { 6501 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 6502 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 6503 } 6504 return need_barrier; 6505} 6506 6507/* Handle an access to rtx X of type FLAGS using predicate register 6508 PRED. Return 1 if this access creates a dependency with an earlier 6509 instruction in the same group. */ 6510 6511static int 6512rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) 6513{ 6514 int i, j; 6515 int is_complemented = 0; 6516 int need_barrier = 0; 6517 const char *format_ptr; 6518 struct reg_flags new_flags; 6519 rtx cond; 6520 6521 if (! x) 6522 return 0; 6523 6524 new_flags = flags; 6525 6526 switch (GET_CODE (x)) 6527 { 6528 case SET: 6529 update_set_flags (x, &new_flags); 6530 need_barrier = set_src_needs_barrier (x, new_flags, pred); 6531 if (GET_CODE (SET_SRC (x)) != CALL) 6532 { 6533 new_flags.is_write = 1; 6534 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 6535 } 6536 break; 6537 6538 case CALL: 6539 new_flags.is_write = 0; 6540 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 6541 6542 /* Avoid multiple register writes, in case this is a pattern with 6543 multiple CALL rtx. This avoids a failure in rws_access_reg. */ 6544 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM)) 6545 { 6546 new_flags.is_write = 1; 6547 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 6548 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 6549 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 6550 } 6551 break; 6552 6553 case COND_EXEC: 6554 /* X is a predicated instruction. */ 6555 6556 cond = COND_EXEC_TEST (x); 6557 gcc_assert (!pred); 6558 need_barrier = rtx_needs_barrier (cond, flags, 0); 6559 6560 if (GET_CODE (cond) == EQ) 6561 is_complemented = 1; 6562 cond = XEXP (cond, 0); 6563 gcc_assert (GET_CODE (cond) == REG 6564 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS); 6565 pred = REGNO (cond); 6566 if (is_complemented) 6567 ++pred; 6568 6569 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 6570 return need_barrier; 6571 6572 case CLOBBER: 6573 case USE: 6574 /* Clobber & use are for earlier compiler-phases only. */ 6575 break; 6576 6577 case ASM_OPERANDS: 6578 case ASM_INPUT: 6579 /* We always emit stop bits for traditional asms. We emit stop bits 6580 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 6581 if (GET_CODE (x) != ASM_OPERANDS 6582 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 6583 { 6584 /* Avoid writing the register multiple times if we have multiple 6585 asm outputs. This avoids a failure in rws_access_reg. */ 6586 if (! rws_insn_test (REG_VOLATILE)) 6587 { 6588 new_flags.is_write = 1; 6589 rws_access_regno (REG_VOLATILE, new_flags, pred); 6590 } 6591 return 1; 6592 } 6593 6594 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 6595 We cannot just fall through here since then we would be confused 6596 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 6597 traditional asms unlike their normal usage. */ 6598 6599 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 6600 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 6601 need_barrier = 1; 6602 break; 6603 6604 case PARALLEL: 6605 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6606 { 6607 rtx pat = XVECEXP (x, 0, i); 6608 switch (GET_CODE (pat)) 6609 { 6610 case SET: 6611 update_set_flags (pat, &new_flags); 6612 need_barrier |= set_src_needs_barrier (pat, new_flags, pred); 6613 break; 6614 6615 case USE: 6616 case CALL: 6617 case ASM_OPERANDS: 6618 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6619 break; 6620 6621 case CLOBBER: 6622 if (REG_P (XEXP (pat, 0)) 6623 && extract_asm_operands (x) != NULL_RTX 6624 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM) 6625 { 6626 new_flags.is_write = 1; 6627 need_barrier |= rtx_needs_barrier (XEXP (pat, 0), 6628 new_flags, pred); 6629 new_flags = flags; 6630 } 6631 break; 6632 6633 case RETURN: 6634 break; 6635 6636 default: 6637 gcc_unreachable (); 6638 } 6639 } 6640 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6641 { 6642 rtx pat = XVECEXP (x, 0, i); 6643 if (GET_CODE (pat) == SET) 6644 { 6645 if (GET_CODE (SET_SRC (pat)) != CALL) 6646 { 6647 new_flags.is_write = 1; 6648 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 6649 pred); 6650 } 6651 } 6652 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 6653 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6654 } 6655 break; 6656 6657 case SUBREG: 6658 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred); 6659 break; 6660 case REG: 6661 if (REGNO (x) == AR_UNAT_REGNUM) 6662 { 6663 for (i = 0; i < 64; ++i) 6664 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 6665 } 6666 else 6667 need_barrier = rws_access_reg (x, flags, pred); 6668 break; 6669 6670 case MEM: 6671 /* Find the regs used in memory address computation. */ 6672 new_flags.is_write = 0; 6673 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6674 break; 6675 6676 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: 6677 case SYMBOL_REF: case LABEL_REF: case CONST: 6678 break; 6679 6680 /* Operators with side-effects. */ 6681 case POST_INC: case POST_DEC: 6682 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6683 6684 new_flags.is_write = 0; 6685 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6686 new_flags.is_write = 1; 6687 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6688 break; 6689 6690 case POST_MODIFY: 6691 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6692 6693 new_flags.is_write = 0; 6694 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6695 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6696 new_flags.is_write = 1; 6697 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6698 break; 6699 6700 /* Handle common unary and binary ops for efficiency. */ 6701 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 6702 case MOD: case UDIV: case UMOD: case AND: case IOR: 6703 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 6704 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 6705 case NE: case EQ: case GE: case GT: case LE: 6706 case LT: case GEU: case GTU: case LEU: case LTU: 6707 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6708 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6709 break; 6710 6711 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 6712 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 6713 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 6714 case SQRT: case FFS: case POPCOUNT: 6715 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6716 break; 6717 6718 case VEC_SELECT: 6719 /* VEC_SELECT's second argument is a PARALLEL with integers that 6720 describe the elements selected. On ia64, those integers are 6721 always constants. Avoid walking the PARALLEL so that we don't 6722 get confused with "normal" parallels and then die. */ 6723 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6724 break; 6725 6726 case UNSPEC: 6727 switch (XINT (x, 1)) 6728 { 6729 case UNSPEC_LTOFF_DTPMOD: 6730 case UNSPEC_LTOFF_DTPREL: 6731 case UNSPEC_DTPREL: 6732 case UNSPEC_LTOFF_TPREL: 6733 case UNSPEC_TPREL: 6734 case UNSPEC_PRED_REL_MUTEX: 6735 case UNSPEC_PIC_CALL: 6736 case UNSPEC_MF: 6737 case UNSPEC_FETCHADD_ACQ: 6738 case UNSPEC_FETCHADD_REL: 6739 case UNSPEC_BSP_VALUE: 6740 case UNSPEC_FLUSHRS: 6741 case UNSPEC_BUNDLE_SELECTOR: 6742 break; 6743 6744 case UNSPEC_GR_SPILL: 6745 case UNSPEC_GR_RESTORE: 6746 { 6747 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 6748 HOST_WIDE_INT bit = (offset >> 3) & 63; 6749 6750 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6751 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL); 6752 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 6753 new_flags, pred); 6754 break; 6755 } 6756 6757 case UNSPEC_FR_SPILL: 6758 case UNSPEC_FR_RESTORE: 6759 case UNSPEC_GETF_EXP: 6760 case UNSPEC_SETF_EXP: 6761 case UNSPEC_ADDP4: 6762 case UNSPEC_FR_SQRT_RECIP_APPROX: 6763 case UNSPEC_FR_SQRT_RECIP_APPROX_RES: 6764 case UNSPEC_LDA: 6765 case UNSPEC_LDS: 6766 case UNSPEC_LDS_A: 6767 case UNSPEC_LDSA: 6768 case UNSPEC_CHKACLR: 6769 case UNSPEC_CHKS: 6770 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6771 break; 6772 6773 case UNSPEC_FR_RECIP_APPROX: 6774 case UNSPEC_SHRP: 6775 case UNSPEC_COPYSIGN: 6776 case UNSPEC_FR_RECIP_APPROX_RES: 6777 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6778 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6779 break; 6780 6781 case UNSPEC_CMPXCHG_ACQ: 6782 case UNSPEC_CMPXCHG_REL: 6783 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6784 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 6785 break; 6786 6787 default: 6788 gcc_unreachable (); 6789 } 6790 break; 6791 6792 case UNSPEC_VOLATILE: 6793 switch (XINT (x, 1)) 6794 { 6795 case UNSPECV_ALLOC: 6796 /* Alloc must always be the first instruction of a group. 6797 We force this by always returning true. */ 6798 /* ??? We might get better scheduling if we explicitly check for 6799 input/local/output register dependencies, and modify the 6800 scheduler so that alloc is always reordered to the start of 6801 the current group. We could then eliminate all of the 6802 first_instruction code. */ 6803 rws_access_regno (AR_PFS_REGNUM, flags, pred); 6804 6805 new_flags.is_write = 1; 6806 rws_access_regno (REG_AR_CFM, new_flags, pred); 6807 return 1; 6808 6809 case UNSPECV_SET_BSP: 6810 case UNSPECV_PROBE_STACK_RANGE: 6811 need_barrier = 1; 6812 break; 6813 6814 case UNSPECV_BLOCKAGE: 6815 case UNSPECV_INSN_GROUP_BARRIER: 6816 case UNSPECV_BREAK: 6817 case UNSPECV_PSAC_ALL: 6818 case UNSPECV_PSAC_NORMAL: 6819 return 0; 6820 6821 case UNSPECV_PROBE_STACK_ADDRESS: 6822 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6823 break; 6824 6825 default: 6826 gcc_unreachable (); 6827 } 6828 break; 6829 6830 case RETURN: 6831 new_flags.is_write = 0; 6832 need_barrier = rws_access_regno (REG_RP, flags, pred); 6833 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 6834 6835 new_flags.is_write = 1; 6836 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 6837 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 6838 break; 6839 6840 default: 6841 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 6842 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 6843 switch (format_ptr[i]) 6844 { 6845 case '0': /* unused field */ 6846 case 'i': /* integer */ 6847 case 'n': /* note */ 6848 case 'w': /* wide integer */ 6849 case 's': /* pointer to string */ 6850 case 'S': /* optional pointer to string */ 6851 break; 6852 6853 case 'e': 6854 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 6855 need_barrier = 1; 6856 break; 6857 6858 case 'E': 6859 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 6860 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 6861 need_barrier = 1; 6862 break; 6863 6864 default: 6865 gcc_unreachable (); 6866 } 6867 break; 6868 } 6869 return need_barrier; 6870} 6871 6872/* Clear out the state for group_barrier_needed at the start of a 6873 sequence of insns. */ 6874 6875static void 6876init_insn_group_barriers (void) 6877{ 6878 memset (rws_sum, 0, sizeof (rws_sum)); 6879 first_instruction = 1; 6880} 6881 6882/* Given the current state, determine whether a group barrier (a stop bit) is 6883 necessary before INSN. Return nonzero if so. This modifies the state to 6884 include the effects of INSN as a side-effect. */ 6885 6886static int 6887group_barrier_needed (rtx_insn *insn) 6888{ 6889 rtx pat; 6890 int need_barrier = 0; 6891 struct reg_flags flags; 6892 6893 memset (&flags, 0, sizeof (flags)); 6894 switch (GET_CODE (insn)) 6895 { 6896 case NOTE: 6897 case DEBUG_INSN: 6898 break; 6899 6900 case BARRIER: 6901 /* A barrier doesn't imply an instruction group boundary. */ 6902 break; 6903 6904 case CODE_LABEL: 6905 memset (rws_insn, 0, sizeof (rws_insn)); 6906 return 1; 6907 6908 case CALL_INSN: 6909 flags.is_branch = 1; 6910 flags.is_sibcall = SIBLING_CALL_P (insn); 6911 memset (rws_insn, 0, sizeof (rws_insn)); 6912 6913 /* Don't bundle a call following another call. */ 6914 if ((pat = prev_active_insn (insn)) && CALL_P (pat)) 6915 { 6916 need_barrier = 1; 6917 break; 6918 } 6919 6920 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 6921 break; 6922 6923 case JUMP_INSN: 6924 if (!ia64_spec_check_p (insn)) 6925 flags.is_branch = 1; 6926 6927 /* Don't bundle a jump following a call. */ 6928 if ((pat = prev_active_insn (insn)) && CALL_P (pat)) 6929 { 6930 need_barrier = 1; 6931 break; 6932 } 6933 /* FALLTHRU */ 6934 6935 case INSN: 6936 if (GET_CODE (PATTERN (insn)) == USE 6937 || GET_CODE (PATTERN (insn)) == CLOBBER) 6938 /* Don't care about USE and CLOBBER "insns"---those are used to 6939 indicate to the optimizer that it shouldn't get rid of 6940 certain operations. */ 6941 break; 6942 6943 pat = PATTERN (insn); 6944 6945 /* Ug. Hack hacks hacked elsewhere. */ 6946 switch (recog_memoized (insn)) 6947 { 6948 /* We play dependency tricks with the epilogue in order 6949 to get proper schedules. Undo this for dv analysis. */ 6950 case CODE_FOR_epilogue_deallocate_stack: 6951 case CODE_FOR_prologue_allocate_stack: 6952 pat = XVECEXP (pat, 0, 0); 6953 break; 6954 6955 /* The pattern we use for br.cloop confuses the code above. 6956 The second element of the vector is representative. */ 6957 case CODE_FOR_doloop_end_internal: 6958 pat = XVECEXP (pat, 0, 1); 6959 break; 6960 6961 /* Doesn't generate code. */ 6962 case CODE_FOR_pred_rel_mutex: 6963 case CODE_FOR_prologue_use: 6964 return 0; 6965 6966 default: 6967 break; 6968 } 6969 6970 memset (rws_insn, 0, sizeof (rws_insn)); 6971 need_barrier = rtx_needs_barrier (pat, flags, 0); 6972 6973 /* Check to see if the previous instruction was a volatile 6974 asm. */ 6975 if (! need_barrier) 6976 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 6977 6978 break; 6979 6980 default: 6981 gcc_unreachable (); 6982 } 6983 6984 if (first_instruction && important_for_bundling_p (insn)) 6985 { 6986 need_barrier = 0; 6987 first_instruction = 0; 6988 } 6989 6990 return need_barrier; 6991} 6992 6993/* Like group_barrier_needed, but do not clobber the current state. */ 6994 6995static int 6996safe_group_barrier_needed (rtx_insn *insn) 6997{ 6998 int saved_first_instruction; 6999 int t; 7000 7001 saved_first_instruction = first_instruction; 7002 in_safe_group_barrier = 1; 7003 7004 t = group_barrier_needed (insn); 7005 7006 first_instruction = saved_first_instruction; 7007 in_safe_group_barrier = 0; 7008 7009 return t; 7010} 7011 7012/* Scan the current function and insert stop bits as necessary to 7013 eliminate dependencies. This function assumes that a final 7014 instruction scheduling pass has been run which has already 7015 inserted most of the necessary stop bits. This function only 7016 inserts new ones at basic block boundaries, since these are 7017 invisible to the scheduler. */ 7018 7019static void 7020emit_insn_group_barriers (FILE *dump) 7021{ 7022 rtx_insn *insn; 7023 rtx_insn *last_label = 0; 7024 int insns_since_last_label = 0; 7025 7026 init_insn_group_barriers (); 7027 7028 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7029 { 7030 if (LABEL_P (insn)) 7031 { 7032 if (insns_since_last_label) 7033 last_label = insn; 7034 insns_since_last_label = 0; 7035 } 7036 else if (NOTE_P (insn) 7037 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK) 7038 { 7039 if (insns_since_last_label) 7040 last_label = insn; 7041 insns_since_last_label = 0; 7042 } 7043 else if (NONJUMP_INSN_P (insn) 7044 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 7045 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 7046 { 7047 init_insn_group_barriers (); 7048 last_label = 0; 7049 } 7050 else if (NONDEBUG_INSN_P (insn)) 7051 { 7052 insns_since_last_label = 1; 7053 7054 if (group_barrier_needed (insn)) 7055 { 7056 if (last_label) 7057 { 7058 if (dump) 7059 fprintf (dump, "Emitting stop before label %d\n", 7060 INSN_UID (last_label)); 7061 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 7062 insn = last_label; 7063 7064 init_insn_group_barriers (); 7065 last_label = 0; 7066 } 7067 } 7068 } 7069 } 7070} 7071 7072/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 7073 This function has to emit all necessary group barriers. */ 7074 7075static void 7076emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 7077{ 7078 rtx_insn *insn; 7079 7080 init_insn_group_barriers (); 7081 7082 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7083 { 7084 if (BARRIER_P (insn)) 7085 { 7086 rtx_insn *last = prev_active_insn (insn); 7087 7088 if (! last) 7089 continue; 7090 if (JUMP_TABLE_DATA_P (last)) 7091 last = prev_active_insn (last); 7092 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 7093 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 7094 7095 init_insn_group_barriers (); 7096 } 7097 else if (NONDEBUG_INSN_P (insn)) 7098 { 7099 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 7100 init_insn_group_barriers (); 7101 else if (group_barrier_needed (insn)) 7102 { 7103 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 7104 init_insn_group_barriers (); 7105 group_barrier_needed (insn); 7106 } 7107 } 7108 } 7109} 7110 7111 7112 7113/* Instruction scheduling support. */ 7114 7115#define NR_BUNDLES 10 7116 7117/* A list of names of all available bundles. */ 7118 7119static const char *bundle_name [NR_BUNDLES] = 7120{ 7121 ".mii", 7122 ".mmi", 7123 ".mfi", 7124 ".mmf", 7125#if NR_BUNDLES == 10 7126 ".bbb", 7127 ".mbb", 7128#endif 7129 ".mib", 7130 ".mmb", 7131 ".mfb", 7132 ".mlx" 7133}; 7134 7135/* Nonzero if we should insert stop bits into the schedule. */ 7136 7137int ia64_final_schedule = 0; 7138 7139/* Codes of the corresponding queried units: */ 7140 7141static int _0mii_, _0mmi_, _0mfi_, _0mmf_; 7142static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; 7143 7144static int _1mii_, _1mmi_, _1mfi_, _1mmf_; 7145static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; 7146 7147static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; 7148 7149/* The following variable value is an insn group barrier. */ 7150 7151static rtx_insn *dfa_stop_insn; 7152 7153/* The following variable value is the last issued insn. */ 7154 7155static rtx_insn *last_scheduled_insn; 7156 7157/* The following variable value is pointer to a DFA state used as 7158 temporary variable. */ 7159 7160static state_t temp_dfa_state = NULL; 7161 7162/* The following variable value is DFA state after issuing the last 7163 insn. */ 7164 7165static state_t prev_cycle_state = NULL; 7166 7167/* The following array element values are TRUE if the corresponding 7168 insn requires to add stop bits before it. */ 7169 7170static char *stops_p = NULL; 7171 7172/* The following variable is used to set up the mentioned above array. */ 7173 7174static int stop_before_p = 0; 7175 7176/* The following variable value is length of the arrays `clocks' and 7177 `add_cycles'. */ 7178 7179static int clocks_length; 7180 7181/* The following variable value is number of data speculations in progress. */ 7182static int pending_data_specs = 0; 7183 7184/* Number of memory references on current and three future processor cycles. */ 7185static char mem_ops_in_group[4]; 7186 7187/* Number of current processor cycle (from scheduler's point of view). */ 7188static int current_cycle; 7189 7190static rtx ia64_single_set (rtx_insn *); 7191static void ia64_emit_insn_before (rtx, rtx); 7192 7193/* Map a bundle number to its pseudo-op. */ 7194 7195const char * 7196get_bundle_name (int b) 7197{ 7198 return bundle_name[b]; 7199} 7200 7201 7202/* Return the maximum number of instructions a cpu can issue. */ 7203 7204static int 7205ia64_issue_rate (void) 7206{ 7207 return 6; 7208} 7209 7210/* Helper function - like single_set, but look inside COND_EXEC. */ 7211 7212static rtx 7213ia64_single_set (rtx_insn *insn) 7214{ 7215 rtx x = PATTERN (insn), ret; 7216 if (GET_CODE (x) == COND_EXEC) 7217 x = COND_EXEC_CODE (x); 7218 if (GET_CODE (x) == SET) 7219 return x; 7220 7221 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 7222 Although they are not classical single set, the second set is there just 7223 to protect it from moving past FP-relative stack accesses. */ 7224 switch (recog_memoized (insn)) 7225 { 7226 case CODE_FOR_prologue_allocate_stack: 7227 case CODE_FOR_prologue_allocate_stack_pr: 7228 case CODE_FOR_epilogue_deallocate_stack: 7229 case CODE_FOR_epilogue_deallocate_stack_pr: 7230 ret = XVECEXP (x, 0, 0); 7231 break; 7232 7233 default: 7234 ret = single_set_2 (insn, x); 7235 break; 7236 } 7237 7238 return ret; 7239} 7240 7241/* Adjust the cost of a scheduling dependency. 7242 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN. 7243 COST is the current cost, DW is dependency weakness. */ 7244static int 7245ia64_adjust_cost_2 (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn, 7246 int cost, dw_t dw) 7247{ 7248 enum reg_note dep_type = (enum reg_note) dep_type1; 7249 enum attr_itanium_class dep_class; 7250 enum attr_itanium_class insn_class; 7251 7252 insn_class = ia64_safe_itanium_class (insn); 7253 dep_class = ia64_safe_itanium_class (dep_insn); 7254 7255 /* Treat true memory dependencies separately. Ignore apparent true 7256 dependence between store and call (call has a MEM inside a SYMBOL_REF). */ 7257 if (dep_type == REG_DEP_TRUE 7258 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF) 7259 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL)) 7260 return 0; 7261 7262 if (dw == MIN_DEP_WEAK) 7263 /* Store and load are likely to alias, use higher cost to avoid stall. */ 7264 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST); 7265 else if (dw > MIN_DEP_WEAK) 7266 { 7267 /* Store and load are less likely to alias. */ 7268 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF) 7269 /* Assume there will be no cache conflict for floating-point data. 7270 For integer data, L1 conflict penalty is huge (17 cycles), so we 7271 never assume it will not cause a conflict. */ 7272 return 0; 7273 else 7274 return cost; 7275 } 7276 7277 if (dep_type != REG_DEP_OUTPUT) 7278 return cost; 7279 7280 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF 7281 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) 7282 return 0; 7283 7284 return cost; 7285} 7286 7287/* Like emit_insn_before, but skip cycle_display notes. 7288 ??? When cycle display notes are implemented, update this. */ 7289 7290static void 7291ia64_emit_insn_before (rtx insn, rtx before) 7292{ 7293 emit_insn_before (insn, before); 7294} 7295 7296/* The following function marks insns who produce addresses for load 7297 and store insns. Such insns will be placed into M slots because it 7298 decrease latency time for Itanium1 (see function 7299 `ia64_produce_address_p' and the DFA descriptions). */ 7300 7301static void 7302ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) 7303{ 7304 rtx_insn *insn, *next, *next_tail; 7305 7306 /* Before reload, which_alternative is not set, which means that 7307 ia64_safe_itanium_class will produce wrong results for (at least) 7308 move instructions. */ 7309 if (!reload_completed) 7310 return; 7311 7312 next_tail = NEXT_INSN (tail); 7313 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 7314 if (INSN_P (insn)) 7315 insn->call = 0; 7316 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 7317 if (INSN_P (insn) 7318 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) 7319 { 7320 sd_iterator_def sd_it; 7321 dep_t dep; 7322 bool has_mem_op_consumer_p = false; 7323 7324 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) 7325 { 7326 enum attr_itanium_class c; 7327 7328 if (DEP_TYPE (dep) != REG_DEP_TRUE) 7329 continue; 7330 7331 next = DEP_CON (dep); 7332 c = ia64_safe_itanium_class (next); 7333 if ((c == ITANIUM_CLASS_ST 7334 || c == ITANIUM_CLASS_STF) 7335 && ia64_st_address_bypass_p (insn, next)) 7336 { 7337 has_mem_op_consumer_p = true; 7338 break; 7339 } 7340 else if ((c == ITANIUM_CLASS_LD 7341 || c == ITANIUM_CLASS_FLD 7342 || c == ITANIUM_CLASS_FLDP) 7343 && ia64_ld_address_bypass_p (insn, next)) 7344 { 7345 has_mem_op_consumer_p = true; 7346 break; 7347 } 7348 } 7349 7350 insn->call = has_mem_op_consumer_p; 7351 } 7352} 7353 7354/* We're beginning a new block. Initialize data structures as necessary. */ 7355 7356static void 7357ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED, 7358 int sched_verbose ATTRIBUTE_UNUSED, 7359 int max_ready ATTRIBUTE_UNUSED) 7360{ 7361#ifdef ENABLE_CHECKING 7362 rtx_insn *insn; 7363 7364 if (!sel_sched_p () && reload_completed) 7365 for (insn = NEXT_INSN (current_sched_info->prev_head); 7366 insn != current_sched_info->next_tail; 7367 insn = NEXT_INSN (insn)) 7368 gcc_assert (!SCHED_GROUP_P (insn)); 7369#endif 7370 last_scheduled_insn = NULL; 7371 init_insn_group_barriers (); 7372 7373 current_cycle = 0; 7374 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 7375} 7376 7377/* We're beginning a scheduling pass. Check assertion. */ 7378 7379static void 7380ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, 7381 int sched_verbose ATTRIBUTE_UNUSED, 7382 int max_ready ATTRIBUTE_UNUSED) 7383{ 7384 gcc_assert (pending_data_specs == 0); 7385} 7386 7387/* Scheduling pass is now finished. Free/reset static variable. */ 7388static void 7389ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED, 7390 int sched_verbose ATTRIBUTE_UNUSED) 7391{ 7392 gcc_assert (pending_data_specs == 0); 7393} 7394 7395/* Return TRUE if INSN is a load (either normal or speculative, but not a 7396 speculation check), FALSE otherwise. */ 7397static bool 7398is_load_p (rtx_insn *insn) 7399{ 7400 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 7401 7402 return 7403 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD) 7404 && get_attr_check_load (insn) == CHECK_LOAD_NO); 7405} 7406 7407/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array 7408 (taking account for 3-cycle cache reference postponing for stores: Intel 7409 Itanium 2 Reference Manual for Software Development and Optimization, 7410 6.7.3.1). */ 7411static void 7412record_memory_reference (rtx_insn *insn) 7413{ 7414 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 7415 7416 switch (insn_class) { 7417 case ITANIUM_CLASS_FLD: 7418 case ITANIUM_CLASS_LD: 7419 mem_ops_in_group[current_cycle % 4]++; 7420 break; 7421 case ITANIUM_CLASS_STF: 7422 case ITANIUM_CLASS_ST: 7423 mem_ops_in_group[(current_cycle + 3) % 4]++; 7424 break; 7425 default:; 7426 } 7427} 7428 7429/* We are about to being issuing insns for this clock cycle. 7430 Override the default sort algorithm to better slot instructions. */ 7431 7432static int 7433ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, 7434 int *pn_ready, int clock_var, 7435 int reorder_type) 7436{ 7437 int n_asms; 7438 int n_ready = *pn_ready; 7439 rtx_insn **e_ready = ready + n_ready; 7440 rtx_insn **insnp; 7441 7442 if (sched_verbose) 7443 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); 7444 7445 if (reorder_type == 0) 7446 { 7447 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 7448 n_asms = 0; 7449 for (insnp = ready; insnp < e_ready; insnp++) 7450 if (insnp < e_ready) 7451 { 7452 rtx_insn *insn = *insnp; 7453 enum attr_type t = ia64_safe_type (insn); 7454 if (t == TYPE_UNKNOWN) 7455 { 7456 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 7457 || asm_noperands (PATTERN (insn)) >= 0) 7458 { 7459 rtx_insn *lowest = ready[n_asms]; 7460 ready[n_asms] = insn; 7461 *insnp = lowest; 7462 n_asms++; 7463 } 7464 else 7465 { 7466 rtx_insn *highest = ready[n_ready - 1]; 7467 ready[n_ready - 1] = insn; 7468 *insnp = highest; 7469 return 1; 7470 } 7471 } 7472 } 7473 7474 if (n_asms < n_ready) 7475 { 7476 /* Some normal insns to process. Skip the asms. */ 7477 ready += n_asms; 7478 n_ready -= n_asms; 7479 } 7480 else if (n_ready > 0) 7481 return 1; 7482 } 7483 7484 if (ia64_final_schedule) 7485 { 7486 int deleted = 0; 7487 int nr_need_stop = 0; 7488 7489 for (insnp = ready; insnp < e_ready; insnp++) 7490 if (safe_group_barrier_needed (*insnp)) 7491 nr_need_stop++; 7492 7493 if (reorder_type == 1 && n_ready == nr_need_stop) 7494 return 0; 7495 if (reorder_type == 0) 7496 return 1; 7497 insnp = e_ready; 7498 /* Move down everything that needs a stop bit, preserving 7499 relative order. */ 7500 while (insnp-- > ready + deleted) 7501 while (insnp >= ready + deleted) 7502 { 7503 rtx_insn *insn = *insnp; 7504 if (! safe_group_barrier_needed (insn)) 7505 break; 7506 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 7507 *ready = insn; 7508 deleted++; 7509 } 7510 n_ready -= deleted; 7511 ready += deleted; 7512 } 7513 7514 current_cycle = clock_var; 7515 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns) 7516 { 7517 int moved = 0; 7518 7519 insnp = e_ready; 7520 /* Move down loads/stores, preserving relative order. */ 7521 while (insnp-- > ready + moved) 7522 while (insnp >= ready + moved) 7523 { 7524 rtx_insn *insn = *insnp; 7525 if (! is_load_p (insn)) 7526 break; 7527 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 7528 *ready = insn; 7529 moved++; 7530 } 7531 n_ready -= moved; 7532 ready += moved; 7533 } 7534 7535 return 1; 7536} 7537 7538/* We are about to being issuing insns for this clock cycle. Override 7539 the default sort algorithm to better slot instructions. */ 7540 7541static int 7542ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready, 7543 int *pn_ready, int clock_var) 7544{ 7545 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, 7546 pn_ready, clock_var, 0); 7547} 7548 7549/* Like ia64_sched_reorder, but called after issuing each insn. 7550 Override the default sort algorithm to better slot instructions. */ 7551 7552static int 7553ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 7554 int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready, 7555 int *pn_ready, int clock_var) 7556{ 7557 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, 7558 clock_var, 1); 7559} 7560 7561/* We are about to issue INSN. Return the number of insns left on the 7562 ready queue that can be issued this cycle. */ 7563 7564static int 7565ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 7566 int sched_verbose ATTRIBUTE_UNUSED, 7567 rtx_insn *insn, 7568 int can_issue_more ATTRIBUTE_UNUSED) 7569{ 7570 if (sched_deps_info->generate_spec_deps && !sel_sched_p ()) 7571 /* Modulo scheduling does not extend h_i_d when emitting 7572 new instructions. Don't use h_i_d, if we don't have to. */ 7573 { 7574 if (DONE_SPEC (insn) & BEGIN_DATA) 7575 pending_data_specs++; 7576 if (CHECK_SPEC (insn) & BEGIN_DATA) 7577 pending_data_specs--; 7578 } 7579 7580 if (DEBUG_INSN_P (insn)) 7581 return 1; 7582 7583 last_scheduled_insn = insn; 7584 memcpy (prev_cycle_state, curr_state, dfa_state_size); 7585 if (reload_completed) 7586 { 7587 int needed = group_barrier_needed (insn); 7588 7589 gcc_assert (!needed); 7590 if (CALL_P (insn)) 7591 init_insn_group_barriers (); 7592 stops_p [INSN_UID (insn)] = stop_before_p; 7593 stop_before_p = 0; 7594 7595 record_memory_reference (insn); 7596 } 7597 return 1; 7598} 7599 7600/* We are choosing insn from the ready queue. Return zero if INSN 7601 can be chosen. */ 7602 7603static int 7604ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index) 7605{ 7606 gcc_assert (insn && INSN_P (insn)); 7607 7608 /* Size of ALAT is 32. As far as we perform conservative 7609 data speculation, we keep ALAT half-empty. */ 7610 if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA)) 7611 return ready_index == 0 ? -1 : 1; 7612 7613 if (ready_index == 0) 7614 return 0; 7615 7616 if ((!reload_completed 7617 || !safe_group_barrier_needed (insn)) 7618 && (!mflag_sched_mem_insns_hard_limit 7619 || !is_load_p (insn) 7620 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns)) 7621 return 0; 7622 7623 return 1; 7624} 7625 7626/* The following variable value is pseudo-insn used by the DFA insn 7627 scheduler to change the DFA state when the simulated clock is 7628 increased. */ 7629 7630static rtx_insn *dfa_pre_cycle_insn; 7631 7632/* Returns 1 when a meaningful insn was scheduled between the last group 7633 barrier and LAST. */ 7634static int 7635scheduled_good_insn (rtx_insn *last) 7636{ 7637 if (last && recog_memoized (last) >= 0) 7638 return 1; 7639 7640 for ( ; 7641 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last) 7642 && !stops_p[INSN_UID (last)]; 7643 last = PREV_INSN (last)) 7644 /* We could hit a NOTE_INSN_DELETED here which is actually outside 7645 the ebb we're scheduling. */ 7646 if (INSN_P (last) && recog_memoized (last) >= 0) 7647 return 1; 7648 7649 return 0; 7650} 7651 7652/* We are about to being issuing INSN. Return nonzero if we cannot 7653 issue it on given cycle CLOCK and return zero if we should not sort 7654 the ready queue on the next clock start. */ 7655 7656static int 7657ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock, 7658 int clock, int *sort_p) 7659{ 7660 gcc_assert (insn && INSN_P (insn)); 7661 7662 if (DEBUG_INSN_P (insn)) 7663 return 0; 7664 7665 /* When a group barrier is needed for insn, last_scheduled_insn 7666 should be set. */ 7667 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn)) 7668 || last_scheduled_insn); 7669 7670 if ((reload_completed 7671 && (safe_group_barrier_needed (insn) 7672 || (mflag_sched_stop_bits_after_every_cycle 7673 && last_clock != clock 7674 && last_scheduled_insn 7675 && scheduled_good_insn (last_scheduled_insn)))) 7676 || (last_scheduled_insn 7677 && (CALL_P (last_scheduled_insn) 7678 || unknown_for_bundling_p (last_scheduled_insn)))) 7679 { 7680 init_insn_group_barriers (); 7681 7682 if (verbose && dump) 7683 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), 7684 last_clock == clock ? " + cycle advance" : ""); 7685 7686 stop_before_p = 1; 7687 current_cycle = clock; 7688 mem_ops_in_group[current_cycle % 4] = 0; 7689 7690 if (last_clock == clock) 7691 { 7692 state_transition (curr_state, dfa_stop_insn); 7693 if (TARGET_EARLY_STOP_BITS) 7694 *sort_p = (last_scheduled_insn == NULL_RTX 7695 || ! CALL_P (last_scheduled_insn)); 7696 else 7697 *sort_p = 0; 7698 return 1; 7699 } 7700 7701 if (last_scheduled_insn) 7702 { 7703 if (unknown_for_bundling_p (last_scheduled_insn)) 7704 state_reset (curr_state); 7705 else 7706 { 7707 memcpy (curr_state, prev_cycle_state, dfa_state_size); 7708 state_transition (curr_state, dfa_stop_insn); 7709 state_transition (curr_state, dfa_pre_cycle_insn); 7710 state_transition (curr_state, NULL); 7711 } 7712 } 7713 } 7714 return 0; 7715} 7716 7717/* Implement targetm.sched.h_i_d_extended hook. 7718 Extend internal data structures. */ 7719static void 7720ia64_h_i_d_extended (void) 7721{ 7722 if (stops_p != NULL) 7723 { 7724 int new_clocks_length = get_max_uid () * 3 / 2; 7725 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1); 7726 clocks_length = new_clocks_length; 7727 } 7728} 7729 7730 7731/* This structure describes the data used by the backend to guide scheduling. 7732 When the current scheduling point is switched, this data should be saved 7733 and restored later, if the scheduler returns to this point. */ 7734struct _ia64_sched_context 7735{ 7736 state_t prev_cycle_state; 7737 rtx_insn *last_scheduled_insn; 7738 struct reg_write_state rws_sum[NUM_REGS]; 7739 struct reg_write_state rws_insn[NUM_REGS]; 7740 int first_instruction; 7741 int pending_data_specs; 7742 int current_cycle; 7743 char mem_ops_in_group[4]; 7744}; 7745typedef struct _ia64_sched_context *ia64_sched_context_t; 7746 7747/* Allocates a scheduling context. */ 7748static void * 7749ia64_alloc_sched_context (void) 7750{ 7751 return xmalloc (sizeof (struct _ia64_sched_context)); 7752} 7753 7754/* Initializes the _SC context with clean data, if CLEAN_P, and from 7755 the global context otherwise. */ 7756static void 7757ia64_init_sched_context (void *_sc, bool clean_p) 7758{ 7759 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7760 7761 sc->prev_cycle_state = xmalloc (dfa_state_size); 7762 if (clean_p) 7763 { 7764 state_reset (sc->prev_cycle_state); 7765 sc->last_scheduled_insn = NULL; 7766 memset (sc->rws_sum, 0, sizeof (rws_sum)); 7767 memset (sc->rws_insn, 0, sizeof (rws_insn)); 7768 sc->first_instruction = 1; 7769 sc->pending_data_specs = 0; 7770 sc->current_cycle = 0; 7771 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 7772 } 7773 else 7774 { 7775 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size); 7776 sc->last_scheduled_insn = last_scheduled_insn; 7777 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum)); 7778 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn)); 7779 sc->first_instruction = first_instruction; 7780 sc->pending_data_specs = pending_data_specs; 7781 sc->current_cycle = current_cycle; 7782 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group)); 7783 } 7784} 7785 7786/* Sets the global scheduling context to the one pointed to by _SC. */ 7787static void 7788ia64_set_sched_context (void *_sc) 7789{ 7790 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7791 7792 gcc_assert (sc != NULL); 7793 7794 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size); 7795 last_scheduled_insn = sc->last_scheduled_insn; 7796 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum)); 7797 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn)); 7798 first_instruction = sc->first_instruction; 7799 pending_data_specs = sc->pending_data_specs; 7800 current_cycle = sc->current_cycle; 7801 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group)); 7802} 7803 7804/* Clears the data in the _SC scheduling context. */ 7805static void 7806ia64_clear_sched_context (void *_sc) 7807{ 7808 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7809 7810 free (sc->prev_cycle_state); 7811 sc->prev_cycle_state = NULL; 7812} 7813 7814/* Frees the _SC scheduling context. */ 7815static void 7816ia64_free_sched_context (void *_sc) 7817{ 7818 gcc_assert (_sc != NULL); 7819 7820 free (_sc); 7821} 7822 7823typedef rtx (* gen_func_t) (rtx, rtx); 7824 7825/* Return a function that will generate a load of mode MODE_NO 7826 with speculation types TS. */ 7827static gen_func_t 7828get_spec_load_gen_function (ds_t ts, int mode_no) 7829{ 7830 static gen_func_t gen_ld_[] = { 7831 gen_movbi, 7832 gen_movqi_internal, 7833 gen_movhi_internal, 7834 gen_movsi_internal, 7835 gen_movdi_internal, 7836 gen_movsf_internal, 7837 gen_movdf_internal, 7838 gen_movxf_internal, 7839 gen_movti_internal, 7840 gen_zero_extendqidi2, 7841 gen_zero_extendhidi2, 7842 gen_zero_extendsidi2, 7843 }; 7844 7845 static gen_func_t gen_ld_a[] = { 7846 gen_movbi_advanced, 7847 gen_movqi_advanced, 7848 gen_movhi_advanced, 7849 gen_movsi_advanced, 7850 gen_movdi_advanced, 7851 gen_movsf_advanced, 7852 gen_movdf_advanced, 7853 gen_movxf_advanced, 7854 gen_movti_advanced, 7855 gen_zero_extendqidi2_advanced, 7856 gen_zero_extendhidi2_advanced, 7857 gen_zero_extendsidi2_advanced, 7858 }; 7859 static gen_func_t gen_ld_s[] = { 7860 gen_movbi_speculative, 7861 gen_movqi_speculative, 7862 gen_movhi_speculative, 7863 gen_movsi_speculative, 7864 gen_movdi_speculative, 7865 gen_movsf_speculative, 7866 gen_movdf_speculative, 7867 gen_movxf_speculative, 7868 gen_movti_speculative, 7869 gen_zero_extendqidi2_speculative, 7870 gen_zero_extendhidi2_speculative, 7871 gen_zero_extendsidi2_speculative, 7872 }; 7873 static gen_func_t gen_ld_sa[] = { 7874 gen_movbi_speculative_advanced, 7875 gen_movqi_speculative_advanced, 7876 gen_movhi_speculative_advanced, 7877 gen_movsi_speculative_advanced, 7878 gen_movdi_speculative_advanced, 7879 gen_movsf_speculative_advanced, 7880 gen_movdf_speculative_advanced, 7881 gen_movxf_speculative_advanced, 7882 gen_movti_speculative_advanced, 7883 gen_zero_extendqidi2_speculative_advanced, 7884 gen_zero_extendhidi2_speculative_advanced, 7885 gen_zero_extendsidi2_speculative_advanced, 7886 }; 7887 static gen_func_t gen_ld_s_a[] = { 7888 gen_movbi_speculative_a, 7889 gen_movqi_speculative_a, 7890 gen_movhi_speculative_a, 7891 gen_movsi_speculative_a, 7892 gen_movdi_speculative_a, 7893 gen_movsf_speculative_a, 7894 gen_movdf_speculative_a, 7895 gen_movxf_speculative_a, 7896 gen_movti_speculative_a, 7897 gen_zero_extendqidi2_speculative_a, 7898 gen_zero_extendhidi2_speculative_a, 7899 gen_zero_extendsidi2_speculative_a, 7900 }; 7901 7902 gen_func_t *gen_ld; 7903 7904 if (ts & BEGIN_DATA) 7905 { 7906 if (ts & BEGIN_CONTROL) 7907 gen_ld = gen_ld_sa; 7908 else 7909 gen_ld = gen_ld_a; 7910 } 7911 else if (ts & BEGIN_CONTROL) 7912 { 7913 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL) 7914 || ia64_needs_block_p (ts)) 7915 gen_ld = gen_ld_s; 7916 else 7917 gen_ld = gen_ld_s_a; 7918 } 7919 else if (ts == 0) 7920 gen_ld = gen_ld_; 7921 else 7922 gcc_unreachable (); 7923 7924 return gen_ld[mode_no]; 7925} 7926 7927/* Constants that help mapping 'machine_mode' to int. */ 7928enum SPEC_MODES 7929 { 7930 SPEC_MODE_INVALID = -1, 7931 SPEC_MODE_FIRST = 0, 7932 SPEC_MODE_FOR_EXTEND_FIRST = 1, 7933 SPEC_MODE_FOR_EXTEND_LAST = 3, 7934 SPEC_MODE_LAST = 8 7935 }; 7936 7937enum 7938 { 7939 /* Offset to reach ZERO_EXTEND patterns. */ 7940 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1 7941 }; 7942 7943/* Return index of the MODE. */ 7944static int 7945ia64_mode_to_int (machine_mode mode) 7946{ 7947 switch (mode) 7948 { 7949 case BImode: return 0; /* SPEC_MODE_FIRST */ 7950 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */ 7951 case HImode: return 2; 7952 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */ 7953 case DImode: return 4; 7954 case SFmode: return 5; 7955 case DFmode: return 6; 7956 case XFmode: return 7; 7957 case TImode: 7958 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not 7959 mentioned in itanium[12].md. Predicate fp_register_operand also 7960 needs to be defined. Bottom line: better disable for now. */ 7961 return SPEC_MODE_INVALID; 7962 default: return SPEC_MODE_INVALID; 7963 } 7964} 7965 7966/* Provide information about speculation capabilities. */ 7967static void 7968ia64_set_sched_flags (spec_info_t spec_info) 7969{ 7970 unsigned int *flags = &(current_sched_info->flags); 7971 7972 if (*flags & SCHED_RGN 7973 || *flags & SCHED_EBB 7974 || *flags & SEL_SCHED) 7975 { 7976 int mask = 0; 7977 7978 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0) 7979 || (mflag_sched_ar_data_spec && reload_completed)) 7980 { 7981 mask |= BEGIN_DATA; 7982 7983 if (!sel_sched_p () 7984 && ((mflag_sched_br_in_data_spec && !reload_completed) 7985 || (mflag_sched_ar_in_data_spec && reload_completed))) 7986 mask |= BE_IN_DATA; 7987 } 7988 7989 if (mflag_sched_control_spec 7990 && (!sel_sched_p () 7991 || reload_completed)) 7992 { 7993 mask |= BEGIN_CONTROL; 7994 7995 if (!sel_sched_p () && mflag_sched_in_control_spec) 7996 mask |= BE_IN_CONTROL; 7997 } 7998 7999 spec_info->mask = mask; 8000 8001 if (mask) 8002 { 8003 *flags |= USE_DEPS_LIST | DO_SPECULATION; 8004 8005 if (mask & BE_IN_SPEC) 8006 *flags |= NEW_BBS; 8007 8008 spec_info->flags = 0; 8009 8010 if ((mask & CONTROL_SPEC) 8011 && sel_sched_p () && mflag_sel_sched_dont_check_control_spec) 8012 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL; 8013 8014 if (sched_verbose >= 1) 8015 spec_info->dump = sched_dump; 8016 else 8017 spec_info->dump = 0; 8018 8019 if (mflag_sched_count_spec_in_critical_path) 8020 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH; 8021 } 8022 } 8023 else 8024 spec_info->mask = 0; 8025} 8026 8027/* If INSN is an appropriate load return its mode. 8028 Return -1 otherwise. */ 8029static int 8030get_mode_no_for_insn (rtx_insn *insn) 8031{ 8032 rtx reg, mem, mode_rtx; 8033 int mode_no; 8034 bool extend_p; 8035 8036 extract_insn_cached (insn); 8037 8038 /* We use WHICH_ALTERNATIVE only after reload. This will 8039 guarantee that reload won't touch a speculative insn. */ 8040 8041 if (recog_data.n_operands != 2) 8042 return -1; 8043 8044 reg = recog_data.operand[0]; 8045 mem = recog_data.operand[1]; 8046 8047 /* We should use MEM's mode since REG's mode in presence of 8048 ZERO_EXTEND will always be DImode. */ 8049 if (get_attr_speculable1 (insn) == SPECULABLE1_YES) 8050 /* Process non-speculative ld. */ 8051 { 8052 if (!reload_completed) 8053 { 8054 /* Do not speculate into regs like ar.lc. */ 8055 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg))) 8056 return -1; 8057 8058 if (!MEM_P (mem)) 8059 return -1; 8060 8061 { 8062 rtx mem_reg = XEXP (mem, 0); 8063 8064 if (!REG_P (mem_reg)) 8065 return -1; 8066 } 8067 8068 mode_rtx = mem; 8069 } 8070 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES) 8071 { 8072 gcc_assert (REG_P (reg) && MEM_P (mem)); 8073 mode_rtx = mem; 8074 } 8075 else 8076 return -1; 8077 } 8078 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES 8079 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES 8080 || get_attr_check_load (insn) == CHECK_LOAD_YES) 8081 /* Process speculative ld or ld.c. */ 8082 { 8083 gcc_assert (REG_P (reg) && MEM_P (mem)); 8084 mode_rtx = mem; 8085 } 8086 else 8087 { 8088 enum attr_itanium_class attr_class = get_attr_itanium_class (insn); 8089 8090 if (attr_class == ITANIUM_CLASS_CHK_A 8091 || attr_class == ITANIUM_CLASS_CHK_S_I 8092 || attr_class == ITANIUM_CLASS_CHK_S_F) 8093 /* Process chk. */ 8094 mode_rtx = reg; 8095 else 8096 return -1; 8097 } 8098 8099 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx)); 8100 8101 if (mode_no == SPEC_MODE_INVALID) 8102 return -1; 8103 8104 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx)); 8105 8106 if (extend_p) 8107 { 8108 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no 8109 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)) 8110 return -1; 8111 8112 mode_no += SPEC_GEN_EXTEND_OFFSET; 8113 } 8114 8115 return mode_no; 8116} 8117 8118/* If X is an unspec part of a speculative load, return its code. 8119 Return -1 otherwise. */ 8120static int 8121get_spec_unspec_code (const_rtx x) 8122{ 8123 if (GET_CODE (x) != UNSPEC) 8124 return -1; 8125 8126 { 8127 int code; 8128 8129 code = XINT (x, 1); 8130 8131 switch (code) 8132 { 8133 case UNSPEC_LDA: 8134 case UNSPEC_LDS: 8135 case UNSPEC_LDS_A: 8136 case UNSPEC_LDSA: 8137 return code; 8138 8139 default: 8140 return -1; 8141 } 8142 } 8143} 8144 8145/* Implement skip_rtx_p hook. */ 8146static bool 8147ia64_skip_rtx_p (const_rtx x) 8148{ 8149 return get_spec_unspec_code (x) != -1; 8150} 8151 8152/* If INSN is a speculative load, return its UNSPEC code. 8153 Return -1 otherwise. */ 8154static int 8155get_insn_spec_code (const_rtx insn) 8156{ 8157 rtx pat, reg, mem; 8158 8159 pat = PATTERN (insn); 8160 8161 if (GET_CODE (pat) == COND_EXEC) 8162 pat = COND_EXEC_CODE (pat); 8163 8164 if (GET_CODE (pat) != SET) 8165 return -1; 8166 8167 reg = SET_DEST (pat); 8168 if (!REG_P (reg)) 8169 return -1; 8170 8171 mem = SET_SRC (pat); 8172 if (GET_CODE (mem) == ZERO_EXTEND) 8173 mem = XEXP (mem, 0); 8174 8175 return get_spec_unspec_code (mem); 8176} 8177 8178/* If INSN is a speculative load, return a ds with the speculation types. 8179 Otherwise [if INSN is a normal instruction] return 0. */ 8180static ds_t 8181ia64_get_insn_spec_ds (rtx_insn *insn) 8182{ 8183 int code = get_insn_spec_code (insn); 8184 8185 switch (code) 8186 { 8187 case UNSPEC_LDA: 8188 return BEGIN_DATA; 8189 8190 case UNSPEC_LDS: 8191 case UNSPEC_LDS_A: 8192 return BEGIN_CONTROL; 8193 8194 case UNSPEC_LDSA: 8195 return BEGIN_DATA | BEGIN_CONTROL; 8196 8197 default: 8198 return 0; 8199 } 8200} 8201 8202/* If INSN is a speculative load return a ds with the speculation types that 8203 will be checked. 8204 Otherwise [if INSN is a normal instruction] return 0. */ 8205static ds_t 8206ia64_get_insn_checked_ds (rtx_insn *insn) 8207{ 8208 int code = get_insn_spec_code (insn); 8209 8210 switch (code) 8211 { 8212 case UNSPEC_LDA: 8213 return BEGIN_DATA | BEGIN_CONTROL; 8214 8215 case UNSPEC_LDS: 8216 return BEGIN_CONTROL; 8217 8218 case UNSPEC_LDS_A: 8219 case UNSPEC_LDSA: 8220 return BEGIN_DATA | BEGIN_CONTROL; 8221 8222 default: 8223 return 0; 8224 } 8225} 8226 8227/* If GEN_P is true, calculate the index of needed speculation check and return 8228 speculative pattern for INSN with speculative mode TS, machine mode 8229 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true). 8230 If GEN_P is false, just calculate the index of needed speculation check. */ 8231static rtx 8232ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no) 8233{ 8234 rtx pat, new_pat; 8235 gen_func_t gen_load; 8236 8237 gen_load = get_spec_load_gen_function (ts, mode_no); 8238 8239 new_pat = gen_load (copy_rtx (recog_data.operand[0]), 8240 copy_rtx (recog_data.operand[1])); 8241 8242 pat = PATTERN (insn); 8243 if (GET_CODE (pat) == COND_EXEC) 8244 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 8245 new_pat); 8246 8247 return new_pat; 8248} 8249 8250static bool 8251insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED, 8252 ds_t ds ATTRIBUTE_UNUSED) 8253{ 8254 return false; 8255} 8256 8257/* Implement targetm.sched.speculate_insn hook. 8258 Check if the INSN can be TS speculative. 8259 If 'no' - return -1. 8260 If 'yes' - generate speculative pattern in the NEW_PAT and return 1. 8261 If current pattern of the INSN already provides TS speculation, 8262 return 0. */ 8263static int 8264ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat) 8265{ 8266 int mode_no; 8267 int res; 8268 8269 gcc_assert (!(ts & ~SPECULATIVE)); 8270 8271 if (ia64_spec_check_p (insn)) 8272 return -1; 8273 8274 if ((ts & BE_IN_SPEC) 8275 && !insn_can_be_in_speculative_p (insn, ts)) 8276 return -1; 8277 8278 mode_no = get_mode_no_for_insn (insn); 8279 8280 if (mode_no != SPEC_MODE_INVALID) 8281 { 8282 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts)) 8283 res = 0; 8284 else 8285 { 8286 res = 1; 8287 *new_pat = ia64_gen_spec_load (insn, ts, mode_no); 8288 } 8289 } 8290 else 8291 res = -1; 8292 8293 return res; 8294} 8295 8296/* Return a function that will generate a check for speculation TS with mode 8297 MODE_NO. 8298 If simple check is needed, pass true for SIMPLE_CHECK_P. 8299 If clearing check is needed, pass true for CLEARING_CHECK_P. */ 8300static gen_func_t 8301get_spec_check_gen_function (ds_t ts, int mode_no, 8302 bool simple_check_p, bool clearing_check_p) 8303{ 8304 static gen_func_t gen_ld_c_clr[] = { 8305 gen_movbi_clr, 8306 gen_movqi_clr, 8307 gen_movhi_clr, 8308 gen_movsi_clr, 8309 gen_movdi_clr, 8310 gen_movsf_clr, 8311 gen_movdf_clr, 8312 gen_movxf_clr, 8313 gen_movti_clr, 8314 gen_zero_extendqidi2_clr, 8315 gen_zero_extendhidi2_clr, 8316 gen_zero_extendsidi2_clr, 8317 }; 8318 static gen_func_t gen_ld_c_nc[] = { 8319 gen_movbi_nc, 8320 gen_movqi_nc, 8321 gen_movhi_nc, 8322 gen_movsi_nc, 8323 gen_movdi_nc, 8324 gen_movsf_nc, 8325 gen_movdf_nc, 8326 gen_movxf_nc, 8327 gen_movti_nc, 8328 gen_zero_extendqidi2_nc, 8329 gen_zero_extendhidi2_nc, 8330 gen_zero_extendsidi2_nc, 8331 }; 8332 static gen_func_t gen_chk_a_clr[] = { 8333 gen_advanced_load_check_clr_bi, 8334 gen_advanced_load_check_clr_qi, 8335 gen_advanced_load_check_clr_hi, 8336 gen_advanced_load_check_clr_si, 8337 gen_advanced_load_check_clr_di, 8338 gen_advanced_load_check_clr_sf, 8339 gen_advanced_load_check_clr_df, 8340 gen_advanced_load_check_clr_xf, 8341 gen_advanced_load_check_clr_ti, 8342 gen_advanced_load_check_clr_di, 8343 gen_advanced_load_check_clr_di, 8344 gen_advanced_load_check_clr_di, 8345 }; 8346 static gen_func_t gen_chk_a_nc[] = { 8347 gen_advanced_load_check_nc_bi, 8348 gen_advanced_load_check_nc_qi, 8349 gen_advanced_load_check_nc_hi, 8350 gen_advanced_load_check_nc_si, 8351 gen_advanced_load_check_nc_di, 8352 gen_advanced_load_check_nc_sf, 8353 gen_advanced_load_check_nc_df, 8354 gen_advanced_load_check_nc_xf, 8355 gen_advanced_load_check_nc_ti, 8356 gen_advanced_load_check_nc_di, 8357 gen_advanced_load_check_nc_di, 8358 gen_advanced_load_check_nc_di, 8359 }; 8360 static gen_func_t gen_chk_s[] = { 8361 gen_speculation_check_bi, 8362 gen_speculation_check_qi, 8363 gen_speculation_check_hi, 8364 gen_speculation_check_si, 8365 gen_speculation_check_di, 8366 gen_speculation_check_sf, 8367 gen_speculation_check_df, 8368 gen_speculation_check_xf, 8369 gen_speculation_check_ti, 8370 gen_speculation_check_di, 8371 gen_speculation_check_di, 8372 gen_speculation_check_di, 8373 }; 8374 8375 gen_func_t *gen_check; 8376 8377 if (ts & BEGIN_DATA) 8378 { 8379 /* We don't need recovery because even if this is ld.sa 8380 ALAT entry will be allocated only if NAT bit is set to zero. 8381 So it is enough to use ld.c here. */ 8382 8383 if (simple_check_p) 8384 { 8385 gcc_assert (mflag_sched_spec_ldc); 8386 8387 if (clearing_check_p) 8388 gen_check = gen_ld_c_clr; 8389 else 8390 gen_check = gen_ld_c_nc; 8391 } 8392 else 8393 { 8394 if (clearing_check_p) 8395 gen_check = gen_chk_a_clr; 8396 else 8397 gen_check = gen_chk_a_nc; 8398 } 8399 } 8400 else if (ts & BEGIN_CONTROL) 8401 { 8402 if (simple_check_p) 8403 /* We might want to use ld.sa -> ld.c instead of 8404 ld.s -> chk.s. */ 8405 { 8406 gcc_assert (!ia64_needs_block_p (ts)); 8407 8408 if (clearing_check_p) 8409 gen_check = gen_ld_c_clr; 8410 else 8411 gen_check = gen_ld_c_nc; 8412 } 8413 else 8414 { 8415 gen_check = gen_chk_s; 8416 } 8417 } 8418 else 8419 gcc_unreachable (); 8420 8421 gcc_assert (mode_no >= 0); 8422 return gen_check[mode_no]; 8423} 8424 8425/* Return nonzero, if INSN needs branchy recovery check. */ 8426static bool 8427ia64_needs_block_p (ds_t ts) 8428{ 8429 if (ts & BEGIN_DATA) 8430 return !mflag_sched_spec_ldc; 8431 8432 gcc_assert ((ts & BEGIN_CONTROL) != 0); 8433 8434 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc); 8435} 8436 8437/* Generate (or regenerate) a recovery check for INSN. */ 8438static rtx 8439ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds) 8440{ 8441 rtx op1, pat, check_pat; 8442 gen_func_t gen_check; 8443 int mode_no; 8444 8445 mode_no = get_mode_no_for_insn (insn); 8446 gcc_assert (mode_no >= 0); 8447 8448 if (label) 8449 op1 = label; 8450 else 8451 { 8452 gcc_assert (!ia64_needs_block_p (ds)); 8453 op1 = copy_rtx (recog_data.operand[1]); 8454 } 8455 8456 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX, 8457 true); 8458 8459 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1); 8460 8461 pat = PATTERN (insn); 8462 if (GET_CODE (pat) == COND_EXEC) 8463 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 8464 check_pat); 8465 8466 return check_pat; 8467} 8468 8469/* Return nonzero, if X is branchy recovery check. */ 8470static int 8471ia64_spec_check_p (rtx x) 8472{ 8473 x = PATTERN (x); 8474 if (GET_CODE (x) == COND_EXEC) 8475 x = COND_EXEC_CODE (x); 8476 if (GET_CODE (x) == SET) 8477 return ia64_spec_check_src_p (SET_SRC (x)); 8478 return 0; 8479} 8480 8481/* Return nonzero, if SRC belongs to recovery check. */ 8482static int 8483ia64_spec_check_src_p (rtx src) 8484{ 8485 if (GET_CODE (src) == IF_THEN_ELSE) 8486 { 8487 rtx t; 8488 8489 t = XEXP (src, 0); 8490 if (GET_CODE (t) == NE) 8491 { 8492 t = XEXP (t, 0); 8493 8494 if (GET_CODE (t) == UNSPEC) 8495 { 8496 int code; 8497 8498 code = XINT (t, 1); 8499 8500 if (code == UNSPEC_LDCCLR 8501 || code == UNSPEC_LDCNC 8502 || code == UNSPEC_CHKACLR 8503 || code == UNSPEC_CHKANC 8504 || code == UNSPEC_CHKS) 8505 { 8506 gcc_assert (code != 0); 8507 return code; 8508 } 8509 } 8510 } 8511 } 8512 return 0; 8513} 8514 8515 8516/* The following page contains abstract data `bundle states' which are 8517 used for bundling insns (inserting nops and template generation). */ 8518 8519/* The following describes state of insn bundling. */ 8520 8521struct bundle_state 8522{ 8523 /* Unique bundle state number to identify them in the debugging 8524 output */ 8525 int unique_num; 8526 rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state */ 8527 /* number nops before and after the insn */ 8528 short before_nops_num, after_nops_num; 8529 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st 8530 insn */ 8531 int cost; /* cost of the state in cycles */ 8532 int accumulated_insns_num; /* number of all previous insns including 8533 nops. L is considered as 2 insns */ 8534 int branch_deviation; /* deviation of previous branches from 3rd slots */ 8535 int middle_bundle_stops; /* number of stop bits in the middle of bundles */ 8536 struct bundle_state *next; /* next state with the same insn_num */ 8537 struct bundle_state *originator; /* originator (previous insn state) */ 8538 /* All bundle states are in the following chain. */ 8539 struct bundle_state *allocated_states_chain; 8540 /* The DFA State after issuing the insn and the nops. */ 8541 state_t dfa_state; 8542}; 8543 8544/* The following is map insn number to the corresponding bundle state. */ 8545 8546static struct bundle_state **index_to_bundle_states; 8547 8548/* The unique number of next bundle state. */ 8549 8550static int bundle_states_num; 8551 8552/* All allocated bundle states are in the following chain. */ 8553 8554static struct bundle_state *allocated_bundle_states_chain; 8555 8556/* All allocated but not used bundle states are in the following 8557 chain. */ 8558 8559static struct bundle_state *free_bundle_state_chain; 8560 8561 8562/* The following function returns a free bundle state. */ 8563 8564static struct bundle_state * 8565get_free_bundle_state (void) 8566{ 8567 struct bundle_state *result; 8568 8569 if (free_bundle_state_chain != NULL) 8570 { 8571 result = free_bundle_state_chain; 8572 free_bundle_state_chain = result->next; 8573 } 8574 else 8575 { 8576 result = XNEW (struct bundle_state); 8577 result->dfa_state = xmalloc (dfa_state_size); 8578 result->allocated_states_chain = allocated_bundle_states_chain; 8579 allocated_bundle_states_chain = result; 8580 } 8581 result->unique_num = bundle_states_num++; 8582 return result; 8583 8584} 8585 8586/* The following function frees given bundle state. */ 8587 8588static void 8589free_bundle_state (struct bundle_state *state) 8590{ 8591 state->next = free_bundle_state_chain; 8592 free_bundle_state_chain = state; 8593} 8594 8595/* Start work with abstract data `bundle states'. */ 8596 8597static void 8598initiate_bundle_states (void) 8599{ 8600 bundle_states_num = 0; 8601 free_bundle_state_chain = NULL; 8602 allocated_bundle_states_chain = NULL; 8603} 8604 8605/* Finish work with abstract data `bundle states'. */ 8606 8607static void 8608finish_bundle_states (void) 8609{ 8610 struct bundle_state *curr_state, *next_state; 8611 8612 for (curr_state = allocated_bundle_states_chain; 8613 curr_state != NULL; 8614 curr_state = next_state) 8615 { 8616 next_state = curr_state->allocated_states_chain; 8617 free (curr_state->dfa_state); 8618 free (curr_state); 8619 } 8620} 8621 8622/* Hashtable helpers. */ 8623 8624struct bundle_state_hasher : typed_noop_remove <bundle_state> 8625{ 8626 typedef bundle_state value_type; 8627 typedef bundle_state compare_type; 8628 static inline hashval_t hash (const value_type *); 8629 static inline bool equal (const value_type *, const compare_type *); 8630}; 8631 8632/* The function returns hash of BUNDLE_STATE. */ 8633 8634inline hashval_t 8635bundle_state_hasher::hash (const value_type *state) 8636{ 8637 unsigned result, i; 8638 8639 for (result = i = 0; i < dfa_state_size; i++) 8640 result += (((unsigned char *) state->dfa_state) [i] 8641 << ((i % CHAR_BIT) * 3 + CHAR_BIT)); 8642 return result + state->insn_num; 8643} 8644 8645/* The function returns nonzero if the bundle state keys are equal. */ 8646 8647inline bool 8648bundle_state_hasher::equal (const value_type *state1, 8649 const compare_type *state2) 8650{ 8651 return (state1->insn_num == state2->insn_num 8652 && memcmp (state1->dfa_state, state2->dfa_state, 8653 dfa_state_size) == 0); 8654} 8655 8656/* Hash table of the bundle states. The key is dfa_state and insn_num 8657 of the bundle states. */ 8658 8659static hash_table<bundle_state_hasher> *bundle_state_table; 8660 8661/* The function inserts the BUNDLE_STATE into the hash table. The 8662 function returns nonzero if the bundle has been inserted into the 8663 table. The table contains the best bundle state with given key. */ 8664 8665static int 8666insert_bundle_state (struct bundle_state *bundle_state) 8667{ 8668 struct bundle_state **entry_ptr; 8669 8670 entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT); 8671 if (*entry_ptr == NULL) 8672 { 8673 bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; 8674 index_to_bundle_states [bundle_state->insn_num] = bundle_state; 8675 *entry_ptr = bundle_state; 8676 return TRUE; 8677 } 8678 else if (bundle_state->cost < (*entry_ptr)->cost 8679 || (bundle_state->cost == (*entry_ptr)->cost 8680 && ((*entry_ptr)->accumulated_insns_num 8681 > bundle_state->accumulated_insns_num 8682 || ((*entry_ptr)->accumulated_insns_num 8683 == bundle_state->accumulated_insns_num 8684 && ((*entry_ptr)->branch_deviation 8685 > bundle_state->branch_deviation 8686 || ((*entry_ptr)->branch_deviation 8687 == bundle_state->branch_deviation 8688 && (*entry_ptr)->middle_bundle_stops 8689 > bundle_state->middle_bundle_stops)))))) 8690 8691 { 8692 struct bundle_state temp; 8693 8694 temp = **entry_ptr; 8695 **entry_ptr = *bundle_state; 8696 (*entry_ptr)->next = temp.next; 8697 *bundle_state = temp; 8698 } 8699 return FALSE; 8700} 8701 8702/* Start work with the hash table. */ 8703 8704static void 8705initiate_bundle_state_table (void) 8706{ 8707 bundle_state_table = new hash_table<bundle_state_hasher> (50); 8708} 8709 8710/* Finish work with the hash table. */ 8711 8712static void 8713finish_bundle_state_table (void) 8714{ 8715 delete bundle_state_table; 8716 bundle_state_table = NULL; 8717} 8718 8719 8720 8721/* The following variable is a insn `nop' used to check bundle states 8722 with different number of inserted nops. */ 8723 8724static rtx_insn *ia64_nop; 8725 8726/* The following function tries to issue NOPS_NUM nops for the current 8727 state without advancing processor cycle. If it failed, the 8728 function returns FALSE and frees the current state. */ 8729 8730static int 8731try_issue_nops (struct bundle_state *curr_state, int nops_num) 8732{ 8733 int i; 8734 8735 for (i = 0; i < nops_num; i++) 8736 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) 8737 { 8738 free_bundle_state (curr_state); 8739 return FALSE; 8740 } 8741 return TRUE; 8742} 8743 8744/* The following function tries to issue INSN for the current 8745 state without advancing processor cycle. If it failed, the 8746 function returns FALSE and frees the current state. */ 8747 8748static int 8749try_issue_insn (struct bundle_state *curr_state, rtx insn) 8750{ 8751 if (insn && state_transition (curr_state->dfa_state, insn) >= 0) 8752 { 8753 free_bundle_state (curr_state); 8754 return FALSE; 8755 } 8756 return TRUE; 8757} 8758 8759/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN 8760 starting with ORIGINATOR without advancing processor cycle. If 8761 TRY_BUNDLE_END_P is TRUE, the function also/only (if 8762 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle. 8763 If it was successful, the function creates new bundle state and 8764 insert into the hash table and into `index_to_bundle_states'. */ 8765 8766static void 8767issue_nops_and_insn (struct bundle_state *originator, int before_nops_num, 8768 rtx_insn *insn, int try_bundle_end_p, 8769 int only_bundle_end_p) 8770{ 8771 struct bundle_state *curr_state; 8772 8773 curr_state = get_free_bundle_state (); 8774 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); 8775 curr_state->insn = insn; 8776 curr_state->insn_num = originator->insn_num + 1; 8777 curr_state->cost = originator->cost; 8778 curr_state->originator = originator; 8779 curr_state->before_nops_num = before_nops_num; 8780 curr_state->after_nops_num = 0; 8781 curr_state->accumulated_insns_num 8782 = originator->accumulated_insns_num + before_nops_num; 8783 curr_state->branch_deviation = originator->branch_deviation; 8784 curr_state->middle_bundle_stops = originator->middle_bundle_stops; 8785 gcc_assert (insn); 8786 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) 8787 { 8788 gcc_assert (GET_MODE (insn) != TImode); 8789 if (!try_issue_nops (curr_state, before_nops_num)) 8790 return; 8791 if (!try_issue_insn (curr_state, insn)) 8792 return; 8793 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); 8794 if (curr_state->accumulated_insns_num % 3 != 0) 8795 curr_state->middle_bundle_stops++; 8796 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 8797 && curr_state->accumulated_insns_num % 3 != 0) 8798 { 8799 free_bundle_state (curr_state); 8800 return; 8801 } 8802 } 8803 else if (GET_MODE (insn) != TImode) 8804 { 8805 if (!try_issue_nops (curr_state, before_nops_num)) 8806 return; 8807 if (!try_issue_insn (curr_state, insn)) 8808 return; 8809 curr_state->accumulated_insns_num++; 8810 gcc_assert (!unknown_for_bundling_p (insn)); 8811 8812 if (ia64_safe_type (insn) == TYPE_L) 8813 curr_state->accumulated_insns_num++; 8814 } 8815 else 8816 { 8817 /* If this is an insn that must be first in a group, then don't allow 8818 nops to be emitted before it. Currently, alloc is the only such 8819 supported instruction. */ 8820 /* ??? The bundling automatons should handle this for us, but they do 8821 not yet have support for the first_insn attribute. */ 8822 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES) 8823 { 8824 free_bundle_state (curr_state); 8825 return; 8826 } 8827 8828 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); 8829 state_transition (curr_state->dfa_state, NULL); 8830 curr_state->cost++; 8831 if (!try_issue_nops (curr_state, before_nops_num)) 8832 return; 8833 if (!try_issue_insn (curr_state, insn)) 8834 return; 8835 curr_state->accumulated_insns_num++; 8836 if (unknown_for_bundling_p (insn)) 8837 { 8838 /* Finish bundle containing asm insn. */ 8839 curr_state->after_nops_num 8840 = 3 - curr_state->accumulated_insns_num % 3; 8841 curr_state->accumulated_insns_num 8842 += 3 - curr_state->accumulated_insns_num % 3; 8843 } 8844 else if (ia64_safe_type (insn) == TYPE_L) 8845 curr_state->accumulated_insns_num++; 8846 } 8847 if (ia64_safe_type (insn) == TYPE_B) 8848 curr_state->branch_deviation 8849 += 2 - (curr_state->accumulated_insns_num - 1) % 3; 8850 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) 8851 { 8852 if (!only_bundle_end_p && insert_bundle_state (curr_state)) 8853 { 8854 state_t dfa_state; 8855 struct bundle_state *curr_state1; 8856 struct bundle_state *allocated_states_chain; 8857 8858 curr_state1 = get_free_bundle_state (); 8859 dfa_state = curr_state1->dfa_state; 8860 allocated_states_chain = curr_state1->allocated_states_chain; 8861 *curr_state1 = *curr_state; 8862 curr_state1->dfa_state = dfa_state; 8863 curr_state1->allocated_states_chain = allocated_states_chain; 8864 memcpy (curr_state1->dfa_state, curr_state->dfa_state, 8865 dfa_state_size); 8866 curr_state = curr_state1; 8867 } 8868 if (!try_issue_nops (curr_state, 8869 3 - curr_state->accumulated_insns_num % 3)) 8870 return; 8871 curr_state->after_nops_num 8872 = 3 - curr_state->accumulated_insns_num % 3; 8873 curr_state->accumulated_insns_num 8874 += 3 - curr_state->accumulated_insns_num % 3; 8875 } 8876 if (!insert_bundle_state (curr_state)) 8877 free_bundle_state (curr_state); 8878 return; 8879} 8880 8881/* The following function returns position in the two window bundle 8882 for given STATE. */ 8883 8884static int 8885get_max_pos (state_t state) 8886{ 8887 if (cpu_unit_reservation_p (state, pos_6)) 8888 return 6; 8889 else if (cpu_unit_reservation_p (state, pos_5)) 8890 return 5; 8891 else if (cpu_unit_reservation_p (state, pos_4)) 8892 return 4; 8893 else if (cpu_unit_reservation_p (state, pos_3)) 8894 return 3; 8895 else if (cpu_unit_reservation_p (state, pos_2)) 8896 return 2; 8897 else if (cpu_unit_reservation_p (state, pos_1)) 8898 return 1; 8899 else 8900 return 0; 8901} 8902 8903/* The function returns code of a possible template for given position 8904 and state. The function should be called only with 2 values of 8905 position equal to 3 or 6. We avoid generating F NOPs by putting 8906 templates containing F insns at the end of the template search 8907 because undocumented anomaly in McKinley derived cores which can 8908 cause stalls if an F-unit insn (including a NOP) is issued within a 8909 six-cycle window after reading certain application registers (such 8910 as ar.bsp). Furthermore, power-considerations also argue against 8911 the use of F-unit instructions unless they're really needed. */ 8912 8913static int 8914get_template (state_t state, int pos) 8915{ 8916 switch (pos) 8917 { 8918 case 3: 8919 if (cpu_unit_reservation_p (state, _0mmi_)) 8920 return 1; 8921 else if (cpu_unit_reservation_p (state, _0mii_)) 8922 return 0; 8923 else if (cpu_unit_reservation_p (state, _0mmb_)) 8924 return 7; 8925 else if (cpu_unit_reservation_p (state, _0mib_)) 8926 return 6; 8927 else if (cpu_unit_reservation_p (state, _0mbb_)) 8928 return 5; 8929 else if (cpu_unit_reservation_p (state, _0bbb_)) 8930 return 4; 8931 else if (cpu_unit_reservation_p (state, _0mmf_)) 8932 return 3; 8933 else if (cpu_unit_reservation_p (state, _0mfi_)) 8934 return 2; 8935 else if (cpu_unit_reservation_p (state, _0mfb_)) 8936 return 8; 8937 else if (cpu_unit_reservation_p (state, _0mlx_)) 8938 return 9; 8939 else 8940 gcc_unreachable (); 8941 case 6: 8942 if (cpu_unit_reservation_p (state, _1mmi_)) 8943 return 1; 8944 else if (cpu_unit_reservation_p (state, _1mii_)) 8945 return 0; 8946 else if (cpu_unit_reservation_p (state, _1mmb_)) 8947 return 7; 8948 else if (cpu_unit_reservation_p (state, _1mib_)) 8949 return 6; 8950 else if (cpu_unit_reservation_p (state, _1mbb_)) 8951 return 5; 8952 else if (cpu_unit_reservation_p (state, _1bbb_)) 8953 return 4; 8954 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) 8955 return 3; 8956 else if (cpu_unit_reservation_p (state, _1mfi_)) 8957 return 2; 8958 else if (cpu_unit_reservation_p (state, _1mfb_)) 8959 return 8; 8960 else if (cpu_unit_reservation_p (state, _1mlx_)) 8961 return 9; 8962 else 8963 gcc_unreachable (); 8964 default: 8965 gcc_unreachable (); 8966 } 8967} 8968 8969/* True when INSN is important for bundling. */ 8970 8971static bool 8972important_for_bundling_p (rtx_insn *insn) 8973{ 8974 return (INSN_P (insn) 8975 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE 8976 && GET_CODE (PATTERN (insn)) != USE 8977 && GET_CODE (PATTERN (insn)) != CLOBBER); 8978} 8979 8980/* The following function returns an insn important for insn bundling 8981 followed by INSN and before TAIL. */ 8982 8983static rtx_insn * 8984get_next_important_insn (rtx_insn *insn, rtx_insn *tail) 8985{ 8986 for (; insn && insn != tail; insn = NEXT_INSN (insn)) 8987 if (important_for_bundling_p (insn)) 8988 return insn; 8989 return NULL; 8990} 8991 8992/* True when INSN is unknown, but important, for bundling. */ 8993 8994static bool 8995unknown_for_bundling_p (rtx_insn *insn) 8996{ 8997 return (INSN_P (insn) 8998 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN 8999 && GET_CODE (PATTERN (insn)) != USE 9000 && GET_CODE (PATTERN (insn)) != CLOBBER); 9001} 9002 9003/* Add a bundle selector TEMPLATE0 before INSN. */ 9004 9005static void 9006ia64_add_bundle_selector_before (int template0, rtx_insn *insn) 9007{ 9008 rtx b = gen_bundle_selector (GEN_INT (template0)); 9009 9010 ia64_emit_insn_before (b, insn); 9011#if NR_BUNDLES == 10 9012 if ((template0 == 4 || template0 == 5) 9013 && ia64_except_unwind_info (&global_options) == UI_TARGET) 9014 { 9015 int i; 9016 rtx note = NULL_RTX; 9017 9018 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the 9019 first or second slot. If it is and has REG_EH_NOTE set, copy it 9020 to following nops, as br.call sets rp to the address of following 9021 bundle and therefore an EH region end must be on a bundle 9022 boundary. */ 9023 insn = PREV_INSN (insn); 9024 for (i = 0; i < 3; i++) 9025 { 9026 do 9027 insn = next_active_insn (insn); 9028 while (NONJUMP_INSN_P (insn) 9029 && get_attr_empty (insn) == EMPTY_YES); 9030 if (CALL_P (insn)) 9031 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); 9032 else if (note) 9033 { 9034 int code; 9035 9036 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop 9037 || code == CODE_FOR_nop_b); 9038 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) 9039 note = NULL_RTX; 9040 else 9041 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0)); 9042 } 9043 } 9044 } 9045#endif 9046} 9047 9048/* The following function does insn bundling. Bundling means 9049 inserting templates and nop insns to fit insn groups into permitted 9050 templates. Instruction scheduling uses NDFA (non-deterministic 9051 finite automata) encoding informations about the templates and the 9052 inserted nops. Nondeterminism of the automata permits follows 9053 all possible insn sequences very fast. 9054 9055 Unfortunately it is not possible to get information about inserting 9056 nop insns and used templates from the automata states. The 9057 automata only says that we can issue an insn possibly inserting 9058 some nops before it and using some template. Therefore insn 9059 bundling in this function is implemented by using DFA 9060 (deterministic finite automata). We follow all possible insn 9061 sequences by inserting 0-2 nops (that is what the NDFA describe for 9062 insn scheduling) before/after each insn being bundled. We know the 9063 start of simulated processor cycle from insn scheduling (insn 9064 starting a new cycle has TImode). 9065 9066 Simple implementation of insn bundling would create enormous 9067 number of possible insn sequences satisfying information about new 9068 cycle ticks taken from the insn scheduling. To make the algorithm 9069 practical we use dynamic programming. Each decision (about 9070 inserting nops and implicitly about previous decisions) is described 9071 by structure bundle_state (see above). If we generate the same 9072 bundle state (key is automaton state after issuing the insns and 9073 nops for it), we reuse already generated one. As consequence we 9074 reject some decisions which cannot improve the solution and 9075 reduce memory for the algorithm. 9076 9077 When we reach the end of EBB (extended basic block), we choose the 9078 best sequence and then, moving back in EBB, insert templates for 9079 the best alternative. The templates are taken from querying 9080 automaton state for each insn in chosen bundle states. 9081 9082 So the algorithm makes two (forward and backward) passes through 9083 EBB. */ 9084 9085static void 9086bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail) 9087{ 9088 struct bundle_state *curr_state, *next_state, *best_state; 9089 rtx_insn *insn, *next_insn; 9090 int insn_num; 9091 int i, bundle_end_p, only_bundle_end_p, asm_p; 9092 int pos = 0, max_pos, template0, template1; 9093 rtx_insn *b; 9094 enum attr_type type; 9095 9096 insn_num = 0; 9097 /* Count insns in the EBB. */ 9098 for (insn = NEXT_INSN (prev_head_insn); 9099 insn && insn != tail; 9100 insn = NEXT_INSN (insn)) 9101 if (INSN_P (insn)) 9102 insn_num++; 9103 if (insn_num == 0) 9104 return; 9105 bundling_p = 1; 9106 dfa_clean_insn_cache (); 9107 initiate_bundle_state_table (); 9108 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2); 9109 /* First (forward) pass -- generation of bundle states. */ 9110 curr_state = get_free_bundle_state (); 9111 curr_state->insn = NULL; 9112 curr_state->before_nops_num = 0; 9113 curr_state->after_nops_num = 0; 9114 curr_state->insn_num = 0; 9115 curr_state->cost = 0; 9116 curr_state->accumulated_insns_num = 0; 9117 curr_state->branch_deviation = 0; 9118 curr_state->middle_bundle_stops = 0; 9119 curr_state->next = NULL; 9120 curr_state->originator = NULL; 9121 state_reset (curr_state->dfa_state); 9122 index_to_bundle_states [0] = curr_state; 9123 insn_num = 0; 9124 /* Shift cycle mark if it is put on insn which could be ignored. */ 9125 for (insn = NEXT_INSN (prev_head_insn); 9126 insn != tail; 9127 insn = NEXT_INSN (insn)) 9128 if (INSN_P (insn) 9129 && !important_for_bundling_p (insn) 9130 && GET_MODE (insn) == TImode) 9131 { 9132 PUT_MODE (insn, VOIDmode); 9133 for (next_insn = NEXT_INSN (insn); 9134 next_insn != tail; 9135 next_insn = NEXT_INSN (next_insn)) 9136 if (important_for_bundling_p (next_insn) 9137 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier) 9138 { 9139 PUT_MODE (next_insn, TImode); 9140 break; 9141 } 9142 } 9143 /* Forward pass: generation of bundle states. */ 9144 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); 9145 insn != NULL_RTX; 9146 insn = next_insn) 9147 { 9148 gcc_assert (important_for_bundling_p (insn)); 9149 type = ia64_safe_type (insn); 9150 next_insn = get_next_important_insn (NEXT_INSN (insn), tail); 9151 insn_num++; 9152 index_to_bundle_states [insn_num] = NULL; 9153 for (curr_state = index_to_bundle_states [insn_num - 1]; 9154 curr_state != NULL; 9155 curr_state = next_state) 9156 { 9157 pos = curr_state->accumulated_insns_num % 3; 9158 next_state = curr_state->next; 9159 /* We must fill up the current bundle in order to start a 9160 subsequent asm insn in a new bundle. Asm insn is always 9161 placed in a separate bundle. */ 9162 only_bundle_end_p 9163 = (next_insn != NULL_RTX 9164 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier 9165 && unknown_for_bundling_p (next_insn)); 9166 /* We may fill up the current bundle if it is the cycle end 9167 without a group barrier. */ 9168 bundle_end_p 9169 = (only_bundle_end_p || next_insn == NULL_RTX 9170 || (GET_MODE (next_insn) == TImode 9171 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); 9172 if (type == TYPE_F || type == TYPE_B || type == TYPE_L 9173 || type == TYPE_S) 9174 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p, 9175 only_bundle_end_p); 9176 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p, 9177 only_bundle_end_p); 9178 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p, 9179 only_bundle_end_p); 9180 } 9181 gcc_assert (index_to_bundle_states [insn_num]); 9182 for (curr_state = index_to_bundle_states [insn_num]; 9183 curr_state != NULL; 9184 curr_state = curr_state->next) 9185 if (verbose >= 2 && dump) 9186 { 9187 /* This structure is taken from generated code of the 9188 pipeline hazard recognizer (see file insn-attrtab.c). 9189 Please don't forget to change the structure if a new 9190 automaton is added to .md file. */ 9191 struct DFA_chip 9192 { 9193 unsigned short one_automaton_state; 9194 unsigned short oneb_automaton_state; 9195 unsigned short two_automaton_state; 9196 unsigned short twob_automaton_state; 9197 }; 9198 9199 fprintf 9200 (dump, 9201 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n", 9202 curr_state->unique_num, 9203 (curr_state->originator == NULL 9204 ? -1 : curr_state->originator->unique_num), 9205 curr_state->cost, 9206 curr_state->before_nops_num, curr_state->after_nops_num, 9207 curr_state->accumulated_insns_num, curr_state->branch_deviation, 9208 curr_state->middle_bundle_stops, 9209 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 9210 INSN_UID (insn)); 9211 } 9212 } 9213 9214 /* We should find a solution because the 2nd insn scheduling has 9215 found one. */ 9216 gcc_assert (index_to_bundle_states [insn_num]); 9217 /* Find a state corresponding to the best insn sequence. */ 9218 best_state = NULL; 9219 for (curr_state = index_to_bundle_states [insn_num]; 9220 curr_state != NULL; 9221 curr_state = curr_state->next) 9222 /* We are just looking at the states with fully filled up last 9223 bundle. The first we prefer insn sequences with minimal cost 9224 then with minimal inserted nops and finally with branch insns 9225 placed in the 3rd slots. */ 9226 if (curr_state->accumulated_insns_num % 3 == 0 9227 && (best_state == NULL || best_state->cost > curr_state->cost 9228 || (best_state->cost == curr_state->cost 9229 && (curr_state->accumulated_insns_num 9230 < best_state->accumulated_insns_num 9231 || (curr_state->accumulated_insns_num 9232 == best_state->accumulated_insns_num 9233 && (curr_state->branch_deviation 9234 < best_state->branch_deviation 9235 || (curr_state->branch_deviation 9236 == best_state->branch_deviation 9237 && curr_state->middle_bundle_stops 9238 < best_state->middle_bundle_stops))))))) 9239 best_state = curr_state; 9240 /* Second (backward) pass: adding nops and templates. */ 9241 gcc_assert (best_state); 9242 insn_num = best_state->before_nops_num; 9243 template0 = template1 = -1; 9244 for (curr_state = best_state; 9245 curr_state->originator != NULL; 9246 curr_state = curr_state->originator) 9247 { 9248 insn = curr_state->insn; 9249 asm_p = unknown_for_bundling_p (insn); 9250 insn_num++; 9251 if (verbose >= 2 && dump) 9252 { 9253 struct DFA_chip 9254 { 9255 unsigned short one_automaton_state; 9256 unsigned short oneb_automaton_state; 9257 unsigned short two_automaton_state; 9258 unsigned short twob_automaton_state; 9259 }; 9260 9261 fprintf 9262 (dump, 9263 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n", 9264 curr_state->unique_num, 9265 (curr_state->originator == NULL 9266 ? -1 : curr_state->originator->unique_num), 9267 curr_state->cost, 9268 curr_state->before_nops_num, curr_state->after_nops_num, 9269 curr_state->accumulated_insns_num, curr_state->branch_deviation, 9270 curr_state->middle_bundle_stops, 9271 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 9272 INSN_UID (insn)); 9273 } 9274 /* Find the position in the current bundle window. The window can 9275 contain at most two bundles. Two bundle window means that 9276 the processor will make two bundle rotation. */ 9277 max_pos = get_max_pos (curr_state->dfa_state); 9278 if (max_pos == 6 9279 /* The following (negative template number) means that the 9280 processor did one bundle rotation. */ 9281 || (max_pos == 3 && template0 < 0)) 9282 { 9283 /* We are at the end of the window -- find template(s) for 9284 its bundle(s). */ 9285 pos = max_pos; 9286 if (max_pos == 3) 9287 template0 = get_template (curr_state->dfa_state, 3); 9288 else 9289 { 9290 template1 = get_template (curr_state->dfa_state, 3); 9291 template0 = get_template (curr_state->dfa_state, 6); 9292 } 9293 } 9294 if (max_pos > 3 && template1 < 0) 9295 /* It may happen when we have the stop inside a bundle. */ 9296 { 9297 gcc_assert (pos <= 3); 9298 template1 = get_template (curr_state->dfa_state, 3); 9299 pos += 3; 9300 } 9301 if (!asm_p) 9302 /* Emit nops after the current insn. */ 9303 for (i = 0; i < curr_state->after_nops_num; i++) 9304 { 9305 rtx nop_pat = gen_nop (); 9306 rtx_insn *nop = emit_insn_after (nop_pat, insn); 9307 pos--; 9308 gcc_assert (pos >= 0); 9309 if (pos % 3 == 0) 9310 { 9311 /* We are at the start of a bundle: emit the template 9312 (it should be defined). */ 9313 gcc_assert (template0 >= 0); 9314 ia64_add_bundle_selector_before (template0, nop); 9315 /* If we have two bundle window, we make one bundle 9316 rotation. Otherwise template0 will be undefined 9317 (negative value). */ 9318 template0 = template1; 9319 template1 = -1; 9320 } 9321 } 9322 /* Move the position backward in the window. Group barrier has 9323 no slot. Asm insn takes all bundle. */ 9324 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier 9325 && !unknown_for_bundling_p (insn)) 9326 pos--; 9327 /* Long insn takes 2 slots. */ 9328 if (ia64_safe_type (insn) == TYPE_L) 9329 pos--; 9330 gcc_assert (pos >= 0); 9331 if (pos % 3 == 0 9332 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier 9333 && !unknown_for_bundling_p (insn)) 9334 { 9335 /* The current insn is at the bundle start: emit the 9336 template. */ 9337 gcc_assert (template0 >= 0); 9338 ia64_add_bundle_selector_before (template0, insn); 9339 b = PREV_INSN (insn); 9340 insn = b; 9341 /* See comment above in analogous place for emitting nops 9342 after the insn. */ 9343 template0 = template1; 9344 template1 = -1; 9345 } 9346 /* Emit nops after the current insn. */ 9347 for (i = 0; i < curr_state->before_nops_num; i++) 9348 { 9349 rtx nop_pat = gen_nop (); 9350 ia64_emit_insn_before (nop_pat, insn); 9351 rtx_insn *nop = PREV_INSN (insn); 9352 insn = nop; 9353 pos--; 9354 gcc_assert (pos >= 0); 9355 if (pos % 3 == 0) 9356 { 9357 /* See comment above in analogous place for emitting nops 9358 after the insn. */ 9359 gcc_assert (template0 >= 0); 9360 ia64_add_bundle_selector_before (template0, insn); 9361 b = PREV_INSN (insn); 9362 insn = b; 9363 template0 = template1; 9364 template1 = -1; 9365 } 9366 } 9367 } 9368 9369#ifdef ENABLE_CHECKING 9370 { 9371 /* Assert right calculation of middle_bundle_stops. */ 9372 int num = best_state->middle_bundle_stops; 9373 bool start_bundle = true, end_bundle = false; 9374 9375 for (insn = NEXT_INSN (prev_head_insn); 9376 insn && insn != tail; 9377 insn = NEXT_INSN (insn)) 9378 { 9379 if (!INSN_P (insn)) 9380 continue; 9381 if (recog_memoized (insn) == CODE_FOR_bundle_selector) 9382 start_bundle = true; 9383 else 9384 { 9385 rtx_insn *next_insn; 9386 9387 for (next_insn = NEXT_INSN (insn); 9388 next_insn && next_insn != tail; 9389 next_insn = NEXT_INSN (next_insn)) 9390 if (INSN_P (next_insn) 9391 && (ia64_safe_itanium_class (next_insn) 9392 != ITANIUM_CLASS_IGNORE 9393 || recog_memoized (next_insn) 9394 == CODE_FOR_bundle_selector) 9395 && GET_CODE (PATTERN (next_insn)) != USE 9396 && GET_CODE (PATTERN (next_insn)) != CLOBBER) 9397 break; 9398 9399 end_bundle = next_insn == NULL_RTX 9400 || next_insn == tail 9401 || (INSN_P (next_insn) 9402 && recog_memoized (next_insn) 9403 == CODE_FOR_bundle_selector); 9404 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier 9405 && !start_bundle && !end_bundle 9406 && next_insn 9407 && !unknown_for_bundling_p (next_insn)) 9408 num--; 9409 9410 start_bundle = false; 9411 } 9412 } 9413 9414 gcc_assert (num == 0); 9415 } 9416#endif 9417 9418 free (index_to_bundle_states); 9419 finish_bundle_state_table (); 9420 bundling_p = 0; 9421 dfa_clean_insn_cache (); 9422} 9423 9424/* The following function is called at the end of scheduling BB or 9425 EBB. After reload, it inserts stop bits and does insn bundling. */ 9426 9427static void 9428ia64_sched_finish (FILE *dump, int sched_verbose) 9429{ 9430 if (sched_verbose) 9431 fprintf (dump, "// Finishing schedule.\n"); 9432 if (!reload_completed) 9433 return; 9434 if (reload_completed) 9435 { 9436 final_emit_insn_group_barriers (dump); 9437 bundling (dump, sched_verbose, current_sched_info->prev_head, 9438 current_sched_info->next_tail); 9439 if (sched_verbose && dump) 9440 fprintf (dump, "// finishing %d-%d\n", 9441 INSN_UID (NEXT_INSN (current_sched_info->prev_head)), 9442 INSN_UID (PREV_INSN (current_sched_info->next_tail))); 9443 9444 return; 9445 } 9446} 9447 9448/* The following function inserts stop bits in scheduled BB or EBB. */ 9449 9450static void 9451final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 9452{ 9453 rtx_insn *insn; 9454 int need_barrier_p = 0; 9455 int seen_good_insn = 0; 9456 9457 init_insn_group_barriers (); 9458 9459 for (insn = NEXT_INSN (current_sched_info->prev_head); 9460 insn != current_sched_info->next_tail; 9461 insn = NEXT_INSN (insn)) 9462 { 9463 if (BARRIER_P (insn)) 9464 { 9465 rtx_insn *last = prev_active_insn (insn); 9466 9467 if (! last) 9468 continue; 9469 if (JUMP_TABLE_DATA_P (last)) 9470 last = prev_active_insn (last); 9471 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 9472 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 9473 9474 init_insn_group_barriers (); 9475 seen_good_insn = 0; 9476 need_barrier_p = 0; 9477 } 9478 else if (NONDEBUG_INSN_P (insn)) 9479 { 9480 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 9481 { 9482 init_insn_group_barriers (); 9483 seen_good_insn = 0; 9484 need_barrier_p = 0; 9485 } 9486 else if (need_barrier_p || group_barrier_needed (insn) 9487 || (mflag_sched_stop_bits_after_every_cycle 9488 && GET_MODE (insn) == TImode 9489 && seen_good_insn)) 9490 { 9491 if (TARGET_EARLY_STOP_BITS) 9492 { 9493 rtx_insn *last; 9494 9495 for (last = insn; 9496 last != current_sched_info->prev_head; 9497 last = PREV_INSN (last)) 9498 if (INSN_P (last) && GET_MODE (last) == TImode 9499 && stops_p [INSN_UID (last)]) 9500 break; 9501 if (last == current_sched_info->prev_head) 9502 last = insn; 9503 last = prev_active_insn (last); 9504 if (last 9505 && recog_memoized (last) != CODE_FOR_insn_group_barrier) 9506 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), 9507 last); 9508 init_insn_group_barriers (); 9509 for (last = NEXT_INSN (last); 9510 last != insn; 9511 last = NEXT_INSN (last)) 9512 if (INSN_P (last)) 9513 { 9514 group_barrier_needed (last); 9515 if (recog_memoized (last) >= 0 9516 && important_for_bundling_p (last)) 9517 seen_good_insn = 1; 9518 } 9519 } 9520 else 9521 { 9522 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), 9523 insn); 9524 init_insn_group_barriers (); 9525 seen_good_insn = 0; 9526 } 9527 group_barrier_needed (insn); 9528 if (recog_memoized (insn) >= 0 9529 && important_for_bundling_p (insn)) 9530 seen_good_insn = 1; 9531 } 9532 else if (recog_memoized (insn) >= 0 9533 && important_for_bundling_p (insn)) 9534 seen_good_insn = 1; 9535 need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn)); 9536 } 9537 } 9538} 9539 9540 9541 9542/* If the following function returns TRUE, we will use the DFA 9543 insn scheduler. */ 9544 9545static int 9546ia64_first_cycle_multipass_dfa_lookahead (void) 9547{ 9548 return (reload_completed ? 6 : 4); 9549} 9550 9551/* The following function initiates variable `dfa_pre_cycle_insn'. */ 9552 9553static void 9554ia64_init_dfa_pre_cycle_insn (void) 9555{ 9556 if (temp_dfa_state == NULL) 9557 { 9558 dfa_state_size = state_size (); 9559 temp_dfa_state = xmalloc (dfa_state_size); 9560 prev_cycle_state = xmalloc (dfa_state_size); 9561 } 9562 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); 9563 SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; 9564 recog_memoized (dfa_pre_cycle_insn); 9565 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); 9566 SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX; 9567 recog_memoized (dfa_stop_insn); 9568} 9569 9570/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN 9571 used by the DFA insn scheduler. */ 9572 9573static rtx 9574ia64_dfa_pre_cycle_insn (void) 9575{ 9576 return dfa_pre_cycle_insn; 9577} 9578 9579/* The following function returns TRUE if PRODUCER (of type ilog or 9580 ld) produces address for CONSUMER (of type st or stf). */ 9581 9582int 9583ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) 9584{ 9585 rtx dest, reg, mem; 9586 9587 gcc_assert (producer && consumer); 9588 dest = ia64_single_set (producer); 9589 gcc_assert (dest); 9590 reg = SET_DEST (dest); 9591 gcc_assert (reg); 9592 if (GET_CODE (reg) == SUBREG) 9593 reg = SUBREG_REG (reg); 9594 gcc_assert (GET_CODE (reg) == REG); 9595 9596 dest = ia64_single_set (consumer); 9597 gcc_assert (dest); 9598 mem = SET_DEST (dest); 9599 gcc_assert (mem && GET_CODE (mem) == MEM); 9600 return reg_mentioned_p (reg, mem); 9601} 9602 9603/* The following function returns TRUE if PRODUCER (of type ilog or 9604 ld) produces address for CONSUMER (of type ld or fld). */ 9605 9606int 9607ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer) 9608{ 9609 rtx dest, src, reg, mem; 9610 9611 gcc_assert (producer && consumer); 9612 dest = ia64_single_set (producer); 9613 gcc_assert (dest); 9614 reg = SET_DEST (dest); 9615 gcc_assert (reg); 9616 if (GET_CODE (reg) == SUBREG) 9617 reg = SUBREG_REG (reg); 9618 gcc_assert (GET_CODE (reg) == REG); 9619 9620 src = ia64_single_set (consumer); 9621 gcc_assert (src); 9622 mem = SET_SRC (src); 9623 gcc_assert (mem); 9624 9625 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) 9626 mem = XVECEXP (mem, 0, 0); 9627 else if (GET_CODE (mem) == IF_THEN_ELSE) 9628 /* ??? Is this bypass necessary for ld.c? */ 9629 { 9630 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR); 9631 mem = XEXP (mem, 1); 9632 } 9633 9634 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) 9635 mem = XEXP (mem, 0); 9636 9637 if (GET_CODE (mem) == UNSPEC) 9638 { 9639 int c = XINT (mem, 1); 9640 9641 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A 9642 || c == UNSPEC_LDSA); 9643 mem = XVECEXP (mem, 0, 0); 9644 } 9645 9646 /* Note that LO_SUM is used for GOT loads. */ 9647 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM); 9648 9649 return reg_mentioned_p (reg, mem); 9650} 9651 9652/* The following function returns TRUE if INSN produces address for a 9653 load/store insn. We will place such insns into M slot because it 9654 decreases its latency time. */ 9655 9656int 9657ia64_produce_address_p (rtx insn) 9658{ 9659 return insn->call; 9660} 9661 9662 9663/* Emit pseudo-ops for the assembler to describe predicate relations. 9664 At present this assumes that we only consider predicate pairs to 9665 be mutex, and that the assembler can deduce proper values from 9666 straight-line code. */ 9667 9668static void 9669emit_predicate_relation_info (void) 9670{ 9671 basic_block bb; 9672 9673 FOR_EACH_BB_REVERSE_FN (bb, cfun) 9674 { 9675 int r; 9676 rtx_insn *head = BB_HEAD (bb); 9677 9678 /* We only need such notes at code labels. */ 9679 if (! LABEL_P (head)) 9680 continue; 9681 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head))) 9682 head = NEXT_INSN (head); 9683 9684 /* Skip p0, which may be thought to be live due to (reg:DI p0) 9685 grabbing the entire block of predicate registers. */ 9686 for (r = PR_REG (2); r < PR_REG (64); r += 2) 9687 if (REGNO_REG_SET_P (df_get_live_in (bb), r)) 9688 { 9689 rtx p = gen_rtx_REG (BImode, r); 9690 rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head); 9691 if (head == BB_END (bb)) 9692 BB_END (bb) = n; 9693 head = n; 9694 } 9695 } 9696 9697 /* Look for conditional calls that do not return, and protect predicate 9698 relations around them. Otherwise the assembler will assume the call 9699 returns, and complain about uses of call-clobbered predicates after 9700 the call. */ 9701 FOR_EACH_BB_REVERSE_FN (bb, cfun) 9702 { 9703 rtx_insn *insn = BB_HEAD (bb); 9704 9705 while (1) 9706 { 9707 if (CALL_P (insn) 9708 && GET_CODE (PATTERN (insn)) == COND_EXEC 9709 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 9710 { 9711 rtx_insn *b = 9712 emit_insn_before (gen_safe_across_calls_all (), insn); 9713 rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn); 9714 if (BB_HEAD (bb) == insn) 9715 BB_HEAD (bb) = b; 9716 if (BB_END (bb) == insn) 9717 BB_END (bb) = a; 9718 } 9719 9720 if (insn == BB_END (bb)) 9721 break; 9722 insn = NEXT_INSN (insn); 9723 } 9724 } 9725} 9726 9727/* Perform machine dependent operations on the rtl chain INSNS. */ 9728 9729static void 9730ia64_reorg (void) 9731{ 9732 /* We are freeing block_for_insn in the toplev to keep compatibility 9733 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 9734 compute_bb_for_insn (); 9735 9736 /* If optimizing, we'll have split before scheduling. */ 9737 if (optimize == 0) 9738 split_all_insns (); 9739 9740 if (optimize && flag_schedule_insns_after_reload 9741 && dbg_cnt (ia64_sched2)) 9742 { 9743 basic_block bb; 9744 timevar_push (TV_SCHED2); 9745 ia64_final_schedule = 1; 9746 9747 /* We can't let modulo-sched prevent us from scheduling any bbs, 9748 since we need the final schedule to produce bundle information. */ 9749 FOR_EACH_BB_FN (bb, cfun) 9750 bb->flags &= ~BB_DISABLE_SCHEDULE; 9751 9752 initiate_bundle_states (); 9753 ia64_nop = make_insn_raw (gen_nop ()); 9754 SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX; 9755 recog_memoized (ia64_nop); 9756 clocks_length = get_max_uid () + 1; 9757 stops_p = XCNEWVEC (char, clocks_length); 9758 9759 if (ia64_tune == PROCESSOR_ITANIUM2) 9760 { 9761 pos_1 = get_cpu_unit_code ("2_1"); 9762 pos_2 = get_cpu_unit_code ("2_2"); 9763 pos_3 = get_cpu_unit_code ("2_3"); 9764 pos_4 = get_cpu_unit_code ("2_4"); 9765 pos_5 = get_cpu_unit_code ("2_5"); 9766 pos_6 = get_cpu_unit_code ("2_6"); 9767 _0mii_ = get_cpu_unit_code ("2b_0mii."); 9768 _0mmi_ = get_cpu_unit_code ("2b_0mmi."); 9769 _0mfi_ = get_cpu_unit_code ("2b_0mfi."); 9770 _0mmf_ = get_cpu_unit_code ("2b_0mmf."); 9771 _0bbb_ = get_cpu_unit_code ("2b_0bbb."); 9772 _0mbb_ = get_cpu_unit_code ("2b_0mbb."); 9773 _0mib_ = get_cpu_unit_code ("2b_0mib."); 9774 _0mmb_ = get_cpu_unit_code ("2b_0mmb."); 9775 _0mfb_ = get_cpu_unit_code ("2b_0mfb."); 9776 _0mlx_ = get_cpu_unit_code ("2b_0mlx."); 9777 _1mii_ = get_cpu_unit_code ("2b_1mii."); 9778 _1mmi_ = get_cpu_unit_code ("2b_1mmi."); 9779 _1mfi_ = get_cpu_unit_code ("2b_1mfi."); 9780 _1mmf_ = get_cpu_unit_code ("2b_1mmf."); 9781 _1bbb_ = get_cpu_unit_code ("2b_1bbb."); 9782 _1mbb_ = get_cpu_unit_code ("2b_1mbb."); 9783 _1mib_ = get_cpu_unit_code ("2b_1mib."); 9784 _1mmb_ = get_cpu_unit_code ("2b_1mmb."); 9785 _1mfb_ = get_cpu_unit_code ("2b_1mfb."); 9786 _1mlx_ = get_cpu_unit_code ("2b_1mlx."); 9787 } 9788 else 9789 { 9790 pos_1 = get_cpu_unit_code ("1_1"); 9791 pos_2 = get_cpu_unit_code ("1_2"); 9792 pos_3 = get_cpu_unit_code ("1_3"); 9793 pos_4 = get_cpu_unit_code ("1_4"); 9794 pos_5 = get_cpu_unit_code ("1_5"); 9795 pos_6 = get_cpu_unit_code ("1_6"); 9796 _0mii_ = get_cpu_unit_code ("1b_0mii."); 9797 _0mmi_ = get_cpu_unit_code ("1b_0mmi."); 9798 _0mfi_ = get_cpu_unit_code ("1b_0mfi."); 9799 _0mmf_ = get_cpu_unit_code ("1b_0mmf."); 9800 _0bbb_ = get_cpu_unit_code ("1b_0bbb."); 9801 _0mbb_ = get_cpu_unit_code ("1b_0mbb."); 9802 _0mib_ = get_cpu_unit_code ("1b_0mib."); 9803 _0mmb_ = get_cpu_unit_code ("1b_0mmb."); 9804 _0mfb_ = get_cpu_unit_code ("1b_0mfb."); 9805 _0mlx_ = get_cpu_unit_code ("1b_0mlx."); 9806 _1mii_ = get_cpu_unit_code ("1b_1mii."); 9807 _1mmi_ = get_cpu_unit_code ("1b_1mmi."); 9808 _1mfi_ = get_cpu_unit_code ("1b_1mfi."); 9809 _1mmf_ = get_cpu_unit_code ("1b_1mmf."); 9810 _1bbb_ = get_cpu_unit_code ("1b_1bbb."); 9811 _1mbb_ = get_cpu_unit_code ("1b_1mbb."); 9812 _1mib_ = get_cpu_unit_code ("1b_1mib."); 9813 _1mmb_ = get_cpu_unit_code ("1b_1mmb."); 9814 _1mfb_ = get_cpu_unit_code ("1b_1mfb."); 9815 _1mlx_ = get_cpu_unit_code ("1b_1mlx."); 9816 } 9817 9818 if (flag_selective_scheduling2 9819 && !maybe_skip_selective_scheduling ()) 9820 run_selective_scheduling (); 9821 else 9822 schedule_ebbs (); 9823 9824 /* Redo alignment computation, as it might gone wrong. */ 9825 compute_alignments (); 9826 9827 /* We cannot reuse this one because it has been corrupted by the 9828 evil glat. */ 9829 finish_bundle_states (); 9830 free (stops_p); 9831 stops_p = NULL; 9832 emit_insn_group_barriers (dump_file); 9833 9834 ia64_final_schedule = 0; 9835 timevar_pop (TV_SCHED2); 9836 } 9837 else 9838 emit_all_insn_group_barriers (dump_file); 9839 9840 df_analyze (); 9841 9842 /* A call must not be the last instruction in a function, so that the 9843 return address is still within the function, so that unwinding works 9844 properly. Note that IA-64 differs from dwarf2 on this point. */ 9845 if (ia64_except_unwind_info (&global_options) == UI_TARGET) 9846 { 9847 rtx_insn *insn; 9848 int saw_stop = 0; 9849 9850 insn = get_last_insn (); 9851 if (! INSN_P (insn)) 9852 insn = prev_active_insn (insn); 9853 if (insn) 9854 { 9855 /* Skip over insns that expand to nothing. */ 9856 while (NONJUMP_INSN_P (insn) 9857 && get_attr_empty (insn) == EMPTY_YES) 9858 { 9859 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 9860 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 9861 saw_stop = 1; 9862 insn = prev_active_insn (insn); 9863 } 9864 if (CALL_P (insn)) 9865 { 9866 if (! saw_stop) 9867 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9868 emit_insn (gen_break_f ()); 9869 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9870 } 9871 } 9872 } 9873 9874 emit_predicate_relation_info (); 9875 9876 if (flag_var_tracking) 9877 { 9878 timevar_push (TV_VAR_TRACKING); 9879 variable_tracking_main (); 9880 timevar_pop (TV_VAR_TRACKING); 9881 } 9882 df_finish_pass (false); 9883} 9884 9885/* Return true if REGNO is used by the epilogue. */ 9886 9887int 9888ia64_epilogue_uses (int regno) 9889{ 9890 switch (regno) 9891 { 9892 case R_GR (1): 9893 /* With a call to a function in another module, we will write a new 9894 value to "gp". After returning from such a call, we need to make 9895 sure the function restores the original gp-value, even if the 9896 function itself does not use the gp anymore. */ 9897 return !(TARGET_AUTO_PIC || TARGET_NO_PIC); 9898 9899 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 9900 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 9901 /* For functions defined with the syscall_linkage attribute, all 9902 input registers are marked as live at all function exits. This 9903 prevents the register allocator from using the input registers, 9904 which in turn makes it possible to restart a system call after 9905 an interrupt without having to save/restore the input registers. 9906 This also prevents kernel data from leaking to application code. */ 9907 return lookup_attribute ("syscall_linkage", 9908 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 9909 9910 case R_BR (0): 9911 /* Conditional return patterns can't represent the use of `b0' as 9912 the return address, so we force the value live this way. */ 9913 return 1; 9914 9915 case AR_PFS_REGNUM: 9916 /* Likewise for ar.pfs, which is used by br.ret. */ 9917 return 1; 9918 9919 default: 9920 return 0; 9921 } 9922} 9923 9924/* Return true if REGNO is used by the frame unwinder. */ 9925 9926int 9927ia64_eh_uses (int regno) 9928{ 9929 unsigned int r; 9930 9931 if (! reload_completed) 9932 return 0; 9933 9934 if (regno == 0) 9935 return 0; 9936 9937 for (r = reg_save_b0; r <= reg_save_ar_lc; r++) 9938 if (regno == current_frame_info.r[r] 9939 || regno == emitted_frame_related_regs[r]) 9940 return 1; 9941 9942 return 0; 9943} 9944 9945/* Return true if this goes in small data/bss. */ 9946 9947/* ??? We could also support own long data here. Generating movl/add/ld8 9948 instead of addl,ld8/ld8. This makes the code bigger, but should make the 9949 code faster because there is one less load. This also includes incomplete 9950 types which can't go in sdata/sbss. */ 9951 9952static bool 9953ia64_in_small_data_p (const_tree exp) 9954{ 9955 if (TARGET_NO_SDATA) 9956 return false; 9957 9958 /* We want to merge strings, so we never consider them small data. */ 9959 if (TREE_CODE (exp) == STRING_CST) 9960 return false; 9961 9962 /* Functions are never small data. */ 9963 if (TREE_CODE (exp) == FUNCTION_DECL) 9964 return false; 9965 9966 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 9967 { 9968 const char *section = DECL_SECTION_NAME (exp); 9969 9970 if (strcmp (section, ".sdata") == 0 9971 || strncmp (section, ".sdata.", 7) == 0 9972 || strncmp (section, ".gnu.linkonce.s.", 16) == 0 9973 || strcmp (section, ".sbss") == 0 9974 || strncmp (section, ".sbss.", 6) == 0 9975 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0) 9976 return true; 9977 } 9978 else 9979 { 9980 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 9981 9982 /* If this is an incomplete type with size 0, then we can't put it 9983 in sdata because it might be too big when completed. */ 9984 if (size > 0 && size <= ia64_section_threshold) 9985 return true; 9986 } 9987 9988 return false; 9989} 9990 9991/* Output assembly directives for prologue regions. */ 9992 9993/* The current basic block number. */ 9994 9995static bool last_block; 9996 9997/* True if we need a copy_state command at the start of the next block. */ 9998 9999static bool need_copy_state; 10000 10001#ifndef MAX_ARTIFICIAL_LABEL_BYTES 10002# define MAX_ARTIFICIAL_LABEL_BYTES 30 10003#endif 10004 10005/* The function emits unwind directives for the start of an epilogue. */ 10006 10007static void 10008process_epilogue (FILE *asm_out_file, rtx insn ATTRIBUTE_UNUSED, 10009 bool unwind, bool frame ATTRIBUTE_UNUSED) 10010{ 10011 /* If this isn't the last block of the function, then we need to label the 10012 current state, and copy it back in at the start of the next block. */ 10013 10014 if (!last_block) 10015 { 10016 if (unwind) 10017 fprintf (asm_out_file, "\t.label_state %d\n", 10018 ++cfun->machine->state_num); 10019 need_copy_state = true; 10020 } 10021 10022 if (unwind) 10023 fprintf (asm_out_file, "\t.restore sp\n"); 10024} 10025 10026/* This function processes a SET pattern for REG_CFA_ADJUST_CFA. */ 10027 10028static void 10029process_cfa_adjust_cfa (FILE *asm_out_file, rtx pat, rtx insn, 10030 bool unwind, bool frame) 10031{ 10032 rtx dest = SET_DEST (pat); 10033 rtx src = SET_SRC (pat); 10034 10035 if (dest == stack_pointer_rtx) 10036 { 10037 if (GET_CODE (src) == PLUS) 10038 { 10039 rtx op0 = XEXP (src, 0); 10040 rtx op1 = XEXP (src, 1); 10041 10042 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT); 10043 10044 if (INTVAL (op1) < 0) 10045 { 10046 gcc_assert (!frame_pointer_needed); 10047 if (unwind) 10048 fprintf (asm_out_file, 10049 "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n", 10050 -INTVAL (op1)); 10051 } 10052 else 10053 process_epilogue (asm_out_file, insn, unwind, frame); 10054 } 10055 else 10056 { 10057 gcc_assert (src == hard_frame_pointer_rtx); 10058 process_epilogue (asm_out_file, insn, unwind, frame); 10059 } 10060 } 10061 else if (dest == hard_frame_pointer_rtx) 10062 { 10063 gcc_assert (src == stack_pointer_rtx); 10064 gcc_assert (frame_pointer_needed); 10065 10066 if (unwind) 10067 fprintf (asm_out_file, "\t.vframe r%d\n", 10068 ia64_dbx_register_number (REGNO (dest))); 10069 } 10070 else 10071 gcc_unreachable (); 10072} 10073 10074/* This function processes a SET pattern for REG_CFA_REGISTER. */ 10075 10076static void 10077process_cfa_register (FILE *asm_out_file, rtx pat, bool unwind) 10078{ 10079 rtx dest = SET_DEST (pat); 10080 rtx src = SET_SRC (pat); 10081 int dest_regno = REGNO (dest); 10082 int src_regno; 10083 10084 if (src == pc_rtx) 10085 { 10086 /* Saving return address pointer. */ 10087 if (unwind) 10088 fprintf (asm_out_file, "\t.save rp, r%d\n", 10089 ia64_dbx_register_number (dest_regno)); 10090 return; 10091 } 10092 10093 src_regno = REGNO (src); 10094 10095 switch (src_regno) 10096 { 10097 case PR_REG (0): 10098 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]); 10099 if (unwind) 10100 fprintf (asm_out_file, "\t.save pr, r%d\n", 10101 ia64_dbx_register_number (dest_regno)); 10102 break; 10103 10104 case AR_UNAT_REGNUM: 10105 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]); 10106 if (unwind) 10107 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 10108 ia64_dbx_register_number (dest_regno)); 10109 break; 10110 10111 case AR_LC_REGNUM: 10112 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]); 10113 if (unwind) 10114 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 10115 ia64_dbx_register_number (dest_regno)); 10116 break; 10117 10118 default: 10119 /* Everything else should indicate being stored to memory. */ 10120 gcc_unreachable (); 10121 } 10122} 10123 10124/* This function processes a SET pattern for REG_CFA_OFFSET. */ 10125 10126static void 10127process_cfa_offset (FILE *asm_out_file, rtx pat, bool unwind) 10128{ 10129 rtx dest = SET_DEST (pat); 10130 rtx src = SET_SRC (pat); 10131 int src_regno = REGNO (src); 10132 const char *saveop; 10133 HOST_WIDE_INT off; 10134 rtx base; 10135 10136 gcc_assert (MEM_P (dest)); 10137 if (GET_CODE (XEXP (dest, 0)) == REG) 10138 { 10139 base = XEXP (dest, 0); 10140 off = 0; 10141 } 10142 else 10143 { 10144 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS 10145 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT); 10146 base = XEXP (XEXP (dest, 0), 0); 10147 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 10148 } 10149 10150 if (base == hard_frame_pointer_rtx) 10151 { 10152 saveop = ".savepsp"; 10153 off = - off; 10154 } 10155 else 10156 { 10157 gcc_assert (base == stack_pointer_rtx); 10158 saveop = ".savesp"; 10159 } 10160 10161 src_regno = REGNO (src); 10162 switch (src_regno) 10163 { 10164 case BR_REG (0): 10165 gcc_assert (!current_frame_info.r[reg_save_b0]); 10166 if (unwind) 10167 fprintf (asm_out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n", 10168 saveop, off); 10169 break; 10170 10171 case PR_REG (0): 10172 gcc_assert (!current_frame_info.r[reg_save_pr]); 10173 if (unwind) 10174 fprintf (asm_out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n", 10175 saveop, off); 10176 break; 10177 10178 case AR_LC_REGNUM: 10179 gcc_assert (!current_frame_info.r[reg_save_ar_lc]); 10180 if (unwind) 10181 fprintf (asm_out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n", 10182 saveop, off); 10183 break; 10184 10185 case AR_PFS_REGNUM: 10186 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]); 10187 if (unwind) 10188 fprintf (asm_out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n", 10189 saveop, off); 10190 break; 10191 10192 case AR_UNAT_REGNUM: 10193 gcc_assert (!current_frame_info.r[reg_save_ar_unat]); 10194 if (unwind) 10195 fprintf (asm_out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n", 10196 saveop, off); 10197 break; 10198 10199 case GR_REG (4): 10200 case GR_REG (5): 10201 case GR_REG (6): 10202 case GR_REG (7): 10203 if (unwind) 10204 fprintf (asm_out_file, "\t.save.g 0x%x\n", 10205 1 << (src_regno - GR_REG (4))); 10206 break; 10207 10208 case BR_REG (1): 10209 case BR_REG (2): 10210 case BR_REG (3): 10211 case BR_REG (4): 10212 case BR_REG (5): 10213 if (unwind) 10214 fprintf (asm_out_file, "\t.save.b 0x%x\n", 10215 1 << (src_regno - BR_REG (1))); 10216 break; 10217 10218 case FR_REG (2): 10219 case FR_REG (3): 10220 case FR_REG (4): 10221 case FR_REG (5): 10222 if (unwind) 10223 fprintf (asm_out_file, "\t.save.f 0x%x\n", 10224 1 << (src_regno - FR_REG (2))); 10225 break; 10226 10227 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 10228 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 10229 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 10230 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 10231 if (unwind) 10232 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 10233 1 << (src_regno - FR_REG (12))); 10234 break; 10235 10236 default: 10237 /* ??? For some reason we mark other general registers, even those 10238 we can't represent in the unwind info. Ignore them. */ 10239 break; 10240 } 10241} 10242 10243/* This function looks at a single insn and emits any directives 10244 required to unwind this insn. */ 10245 10246static void 10247ia64_asm_unwind_emit (FILE *asm_out_file, rtx_insn *insn) 10248{ 10249 bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET; 10250 bool frame = dwarf2out_do_frame (); 10251 rtx note, pat; 10252 bool handled_one; 10253 10254 if (!unwind && !frame) 10255 return; 10256 10257 if (NOTE_INSN_BASIC_BLOCK_P (insn)) 10258 { 10259 last_block = NOTE_BASIC_BLOCK (insn)->next_bb 10260 == EXIT_BLOCK_PTR_FOR_FN (cfun); 10261 10262 /* Restore unwind state from immediately before the epilogue. */ 10263 if (need_copy_state) 10264 { 10265 if (unwind) 10266 { 10267 fprintf (asm_out_file, "\t.body\n"); 10268 fprintf (asm_out_file, "\t.copy_state %d\n", 10269 cfun->machine->state_num); 10270 } 10271 need_copy_state = false; 10272 } 10273 } 10274 10275 if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn)) 10276 return; 10277 10278 /* Look for the ALLOC insn. */ 10279 if (INSN_CODE (insn) == CODE_FOR_alloc) 10280 { 10281 rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0)); 10282 int dest_regno = REGNO (dest); 10283 10284 /* If this is the final destination for ar.pfs, then this must 10285 be the alloc in the prologue. */ 10286 if (dest_regno == current_frame_info.r[reg_save_ar_pfs]) 10287 { 10288 if (unwind) 10289 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 10290 ia64_dbx_register_number (dest_regno)); 10291 } 10292 else 10293 { 10294 /* This must be an alloc before a sibcall. We must drop the 10295 old frame info. The easiest way to drop the old frame 10296 info is to ensure we had a ".restore sp" directive 10297 followed by a new prologue. If the procedure doesn't 10298 have a memory-stack frame, we'll issue a dummy ".restore 10299 sp" now. */ 10300 if (current_frame_info.total_size == 0 && !frame_pointer_needed) 10301 /* if haven't done process_epilogue() yet, do it now */ 10302 process_epilogue (asm_out_file, insn, unwind, frame); 10303 if (unwind) 10304 fprintf (asm_out_file, "\t.prologue\n"); 10305 } 10306 return; 10307 } 10308 10309 handled_one = false; 10310 for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) 10311 switch (REG_NOTE_KIND (note)) 10312 { 10313 case REG_CFA_ADJUST_CFA: 10314 pat = XEXP (note, 0); 10315 if (pat == NULL) 10316 pat = PATTERN (insn); 10317 process_cfa_adjust_cfa (asm_out_file, pat, insn, unwind, frame); 10318 handled_one = true; 10319 break; 10320 10321 case REG_CFA_OFFSET: 10322 pat = XEXP (note, 0); 10323 if (pat == NULL) 10324 pat = PATTERN (insn); 10325 process_cfa_offset (asm_out_file, pat, unwind); 10326 handled_one = true; 10327 break; 10328 10329 case REG_CFA_REGISTER: 10330 pat = XEXP (note, 0); 10331 if (pat == NULL) 10332 pat = PATTERN (insn); 10333 process_cfa_register (asm_out_file, pat, unwind); 10334 handled_one = true; 10335 break; 10336 10337 case REG_FRAME_RELATED_EXPR: 10338 case REG_CFA_DEF_CFA: 10339 case REG_CFA_EXPRESSION: 10340 case REG_CFA_RESTORE: 10341 case REG_CFA_SET_VDRAP: 10342 /* Not used in the ia64 port. */ 10343 gcc_unreachable (); 10344 10345 default: 10346 /* Not a frame-related note. */ 10347 break; 10348 } 10349 10350 /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the 10351 explicit action to take. No guessing required. */ 10352 gcc_assert (handled_one); 10353} 10354 10355/* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ 10356 10357static void 10358ia64_asm_emit_except_personality (rtx personality) 10359{ 10360 fputs ("\t.personality\t", asm_out_file); 10361 output_addr_const (asm_out_file, personality); 10362 fputc ('\n', asm_out_file); 10363} 10364 10365/* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ 10366 10367static void 10368ia64_asm_init_sections (void) 10369{ 10370 exception_section = get_unnamed_section (0, output_section_asm_op, 10371 "\t.handlerdata"); 10372} 10373 10374/* Implement TARGET_DEBUG_UNWIND_INFO. */ 10375 10376static enum unwind_info_type 10377ia64_debug_unwind_info (void) 10378{ 10379 return UI_TARGET; 10380} 10381 10382enum ia64_builtins 10383{ 10384 IA64_BUILTIN_BSP, 10385 IA64_BUILTIN_COPYSIGNQ, 10386 IA64_BUILTIN_FABSQ, 10387 IA64_BUILTIN_FLUSHRS, 10388 IA64_BUILTIN_INFQ, 10389 IA64_BUILTIN_HUGE_VALQ, 10390 IA64_BUILTIN_max 10391}; 10392 10393static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max]; 10394 10395void 10396ia64_init_builtins (void) 10397{ 10398 tree fpreg_type; 10399 tree float80_type; 10400 tree decl; 10401 10402 /* The __fpreg type. */ 10403 fpreg_type = make_node (REAL_TYPE); 10404 TYPE_PRECISION (fpreg_type) = 82; 10405 layout_type (fpreg_type); 10406 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg"); 10407 10408 /* The __float80 type. */ 10409 float80_type = make_node (REAL_TYPE); 10410 TYPE_PRECISION (float80_type) = 80; 10411 layout_type (float80_type); 10412 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 10413 10414 /* The __float128 type. */ 10415 if (!TARGET_HPUX) 10416 { 10417 tree ftype; 10418 tree float128_type = make_node (REAL_TYPE); 10419 10420 TYPE_PRECISION (float128_type) = 128; 10421 layout_type (float128_type); 10422 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 10423 10424 /* TFmode support builtins. */ 10425 ftype = build_function_type_list (float128_type, NULL_TREE); 10426 decl = add_builtin_function ("__builtin_infq", ftype, 10427 IA64_BUILTIN_INFQ, BUILT_IN_MD, 10428 NULL, NULL_TREE); 10429 ia64_builtins[IA64_BUILTIN_INFQ] = decl; 10430 10431 decl = add_builtin_function ("__builtin_huge_valq", ftype, 10432 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 10433 NULL, NULL_TREE); 10434 ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl; 10435 10436 ftype = build_function_type_list (float128_type, 10437 float128_type, 10438 NULL_TREE); 10439 decl = add_builtin_function ("__builtin_fabsq", ftype, 10440 IA64_BUILTIN_FABSQ, BUILT_IN_MD, 10441 "__fabstf2", NULL_TREE); 10442 TREE_READONLY (decl) = 1; 10443 ia64_builtins[IA64_BUILTIN_FABSQ] = decl; 10444 10445 ftype = build_function_type_list (float128_type, 10446 float128_type, 10447 float128_type, 10448 NULL_TREE); 10449 decl = add_builtin_function ("__builtin_copysignq", ftype, 10450 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 10451 "__copysigntf3", NULL_TREE); 10452 TREE_READONLY (decl) = 1; 10453 ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl; 10454 } 10455 else 10456 /* Under HPUX, this is a synonym for "long double". */ 10457 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 10458 "__float128"); 10459 10460 /* Fwrite on VMS is non-standard. */ 10461#if TARGET_ABI_OPEN_VMS 10462 vms_patch_builtins (); 10463#endif 10464 10465#define def_builtin(name, type, code) \ 10466 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \ 10467 NULL, NULL_TREE) 10468 10469 decl = def_builtin ("__builtin_ia64_bsp", 10470 build_function_type_list (ptr_type_node, NULL_TREE), 10471 IA64_BUILTIN_BSP); 10472 ia64_builtins[IA64_BUILTIN_BSP] = decl; 10473 10474 decl = def_builtin ("__builtin_ia64_flushrs", 10475 build_function_type_list (void_type_node, NULL_TREE), 10476 IA64_BUILTIN_FLUSHRS); 10477 ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl; 10478 10479#undef def_builtin 10480 10481 if (TARGET_HPUX) 10482 { 10483 if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE) 10484 set_user_assembler_name (decl, "_Isfinite"); 10485 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE) 10486 set_user_assembler_name (decl, "_Isfinitef"); 10487 if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE) 10488 set_user_assembler_name (decl, "_Isfinitef128"); 10489 } 10490} 10491 10492rtx 10493ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 10494 machine_mode mode ATTRIBUTE_UNUSED, 10495 int ignore ATTRIBUTE_UNUSED) 10496{ 10497 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10498 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 10499 10500 switch (fcode) 10501 { 10502 case IA64_BUILTIN_BSP: 10503 if (! target || ! register_operand (target, DImode)) 10504 target = gen_reg_rtx (DImode); 10505 emit_insn (gen_bsp_value (target)); 10506#ifdef POINTERS_EXTEND_UNSIGNED 10507 target = convert_memory_address (ptr_mode, target); 10508#endif 10509 return target; 10510 10511 case IA64_BUILTIN_FLUSHRS: 10512 emit_insn (gen_flushrs ()); 10513 return const0_rtx; 10514 10515 case IA64_BUILTIN_INFQ: 10516 case IA64_BUILTIN_HUGE_VALQ: 10517 { 10518 machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); 10519 REAL_VALUE_TYPE inf; 10520 rtx tmp; 10521 10522 real_inf (&inf); 10523 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode); 10524 10525 tmp = validize_mem (force_const_mem (target_mode, tmp)); 10526 10527 if (target == 0) 10528 target = gen_reg_rtx (target_mode); 10529 10530 emit_move_insn (target, tmp); 10531 return target; 10532 } 10533 10534 case IA64_BUILTIN_FABSQ: 10535 case IA64_BUILTIN_COPYSIGNQ: 10536 return expand_call (exp, target, ignore); 10537 10538 default: 10539 gcc_unreachable (); 10540 } 10541 10542 return NULL_RTX; 10543} 10544 10545/* Return the ia64 builtin for CODE. */ 10546 10547static tree 10548ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10549{ 10550 if (code >= IA64_BUILTIN_max) 10551 return error_mark_node; 10552 10553 return ia64_builtins[code]; 10554} 10555 10556/* For the HP-UX IA64 aggregate parameters are passed stored in the 10557 most significant bits of the stack slot. */ 10558 10559enum direction 10560ia64_hpux_function_arg_padding (machine_mode mode, const_tree type) 10561{ 10562 /* Exception to normal case for structures/unions/etc. */ 10563 10564 if (type && AGGREGATE_TYPE_P (type) 10565 && int_size_in_bytes (type) < UNITS_PER_WORD) 10566 return upward; 10567 10568 /* Fall back to the default. */ 10569 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); 10570} 10571 10572/* Emit text to declare externally defined variables and functions, because 10573 the Intel assembler does not support undefined externals. */ 10574 10575void 10576ia64_asm_output_external (FILE *file, tree decl, const char *name) 10577{ 10578 /* We output the name if and only if TREE_SYMBOL_REFERENCED is 10579 set in order to avoid putting out names that are never really 10580 used. */ 10581 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) 10582 { 10583 /* maybe_assemble_visibility will return 1 if the assembler 10584 visibility directive is output. */ 10585 int need_visibility = ((*targetm.binds_local_p) (decl) 10586 && maybe_assemble_visibility (decl)); 10587 10588 /* GNU as does not need anything here, but the HP linker does 10589 need something for external functions. */ 10590 if ((TARGET_HPUX_LD || !TARGET_GNU_AS) 10591 && TREE_CODE (decl) == FUNCTION_DECL) 10592 (*targetm.asm_out.globalize_decl_name) (file, decl); 10593 else if (need_visibility && !TARGET_GNU_AS) 10594 (*targetm.asm_out.globalize_label) (file, name); 10595 } 10596} 10597 10598/* Set SImode div/mod functions, init_integral_libfuncs only initializes 10599 modes of word_mode and larger. Rename the TFmode libfuncs using the 10600 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for 10601 backward compatibility. */ 10602 10603static void 10604ia64_init_libfuncs (void) 10605{ 10606 set_optab_libfunc (sdiv_optab, SImode, "__divsi3"); 10607 set_optab_libfunc (udiv_optab, SImode, "__udivsi3"); 10608 set_optab_libfunc (smod_optab, SImode, "__modsi3"); 10609 set_optab_libfunc (umod_optab, SImode, "__umodsi3"); 10610 10611 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 10612 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 10613 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 10614 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 10615 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 10616 10617 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 10618 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 10619 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad"); 10620 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 10621 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 10622 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80"); 10623 10624 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl"); 10625 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); 10626 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad"); 10627 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl"); 10628 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl"); 10629 10630 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); 10631 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); 10632 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad"); 10633 /* HP-UX 11.23 libc does not have a function for unsigned 10634 SImode-to-TFmode conversion. */ 10635 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad"); 10636} 10637 10638/* Rename all the TFmode libfuncs using the HPUX conventions. */ 10639 10640static void 10641ia64_hpux_init_libfuncs (void) 10642{ 10643 ia64_init_libfuncs (); 10644 10645 /* The HP SI millicode division and mod functions expect DI arguments. 10646 By turning them off completely we avoid using both libgcc and the 10647 non-standard millicode routines and use the HP DI millicode routines 10648 instead. */ 10649 10650 set_optab_libfunc (sdiv_optab, SImode, 0); 10651 set_optab_libfunc (udiv_optab, SImode, 0); 10652 set_optab_libfunc (smod_optab, SImode, 0); 10653 set_optab_libfunc (umod_optab, SImode, 0); 10654 10655 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI"); 10656 set_optab_libfunc (udiv_optab, DImode, "__milli_divU"); 10657 set_optab_libfunc (smod_optab, DImode, "__milli_remI"); 10658 set_optab_libfunc (umod_optab, DImode, "__milli_remU"); 10659 10660 /* HP-UX libc has TF min/max/abs routines in it. */ 10661 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin"); 10662 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 10663 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 10664 10665 /* ia64_expand_compare uses this. */ 10666 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp"); 10667 10668 /* These should never be used. */ 10669 set_optab_libfunc (eq_optab, TFmode, 0); 10670 set_optab_libfunc (ne_optab, TFmode, 0); 10671 set_optab_libfunc (gt_optab, TFmode, 0); 10672 set_optab_libfunc (ge_optab, TFmode, 0); 10673 set_optab_libfunc (lt_optab, TFmode, 0); 10674 set_optab_libfunc (le_optab, TFmode, 0); 10675} 10676 10677/* Rename the division and modulus functions in VMS. */ 10678 10679static void 10680ia64_vms_init_libfuncs (void) 10681{ 10682 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 10683 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 10684 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 10685 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 10686 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 10687 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 10688 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 10689 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 10690 abort_libfunc = init_one_libfunc ("decc$abort"); 10691 memcmp_libfunc = init_one_libfunc ("decc$memcmp"); 10692#ifdef MEM_LIBFUNCS_INIT 10693 MEM_LIBFUNCS_INIT; 10694#endif 10695} 10696 10697/* Rename the TFmode libfuncs available from soft-fp in glibc using 10698 the HPUX conventions. */ 10699 10700static void 10701ia64_sysv4_init_libfuncs (void) 10702{ 10703 ia64_init_libfuncs (); 10704 10705 /* These functions are not part of the HPUX TFmode interface. We 10706 use them instead of _U_Qfcmp, which doesn't work the way we 10707 expect. */ 10708 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 10709 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 10710 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 10711 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 10712 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 10713 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 10714 10715 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in 10716 glibc doesn't have them. */ 10717} 10718 10719/* Use soft-fp. */ 10720 10721static void 10722ia64_soft_fp_init_libfuncs (void) 10723{ 10724} 10725 10726static bool 10727ia64_vms_valid_pointer_mode (machine_mode mode) 10728{ 10729 return (mode == SImode || mode == DImode); 10730} 10731 10732/* For HPUX, it is illegal to have relocations in shared segments. */ 10733 10734static int 10735ia64_hpux_reloc_rw_mask (void) 10736{ 10737 return 3; 10738} 10739 10740/* For others, relax this so that relocations to local data goes in 10741 read-only segments, but we still cannot allow global relocations 10742 in read-only segments. */ 10743 10744static int 10745ia64_reloc_rw_mask (void) 10746{ 10747 return flag_pic ? 3 : 2; 10748} 10749 10750/* Return the section to use for X. The only special thing we do here 10751 is to honor small data. */ 10752 10753static section * 10754ia64_select_rtx_section (machine_mode mode, rtx x, 10755 unsigned HOST_WIDE_INT align) 10756{ 10757 if (GET_MODE_SIZE (mode) > 0 10758 && GET_MODE_SIZE (mode) <= ia64_section_threshold 10759 && !TARGET_NO_SDATA) 10760 return sdata_section; 10761 else 10762 return default_elf_select_rtx_section (mode, x, align); 10763} 10764 10765static unsigned int 10766ia64_section_type_flags (tree decl, const char *name, int reloc) 10767{ 10768 unsigned int flags = 0; 10769 10770 if (strcmp (name, ".sdata") == 0 10771 || strncmp (name, ".sdata.", 7) == 0 10772 || strncmp (name, ".gnu.linkonce.s.", 16) == 0 10773 || strncmp (name, ".sdata2.", 8) == 0 10774 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0 10775 || strcmp (name, ".sbss") == 0 10776 || strncmp (name, ".sbss.", 6) == 0 10777 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) 10778 flags = SECTION_SMALL; 10779 10780 flags |= default_section_type_flags (decl, name, reloc); 10781 return flags; 10782} 10783 10784/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a 10785 structure type and that the address of that type should be passed 10786 in out0, rather than in r8. */ 10787 10788static bool 10789ia64_struct_retval_addr_is_first_parm_p (tree fntype) 10790{ 10791 tree ret_type = TREE_TYPE (fntype); 10792 10793 /* The Itanium C++ ABI requires that out0, rather than r8, be used 10794 as the structure return address parameter, if the return value 10795 type has a non-trivial copy constructor or destructor. It is not 10796 clear if this same convention should be used for other 10797 programming languages. Until G++ 3.4, we incorrectly used r8 for 10798 these return values. */ 10799 return (abi_version_at_least (2) 10800 && ret_type 10801 && TYPE_MODE (ret_type) == BLKmode 10802 && TREE_ADDRESSABLE (ret_type) 10803 && lang_GNU_CXX ()); 10804} 10805 10806/* Output the assembler code for a thunk function. THUNK_DECL is the 10807 declaration for the thunk function itself, FUNCTION is the decl for 10808 the target function. DELTA is an immediate constant offset to be 10809 added to THIS. If VCALL_OFFSET is nonzero, the word at 10810 *(*this + vcall_offset) should be added to THIS. */ 10811 10812static void 10813ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 10814 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 10815 tree function) 10816{ 10817 rtx this_rtx, funexp; 10818 rtx_insn *insn; 10819 unsigned int this_parmno; 10820 unsigned int this_regno; 10821 rtx delta_rtx; 10822 10823 reload_completed = 1; 10824 epilogue_completed = 1; 10825 10826 /* Set things up as ia64_expand_prologue might. */ 10827 last_scratch_gr_reg = 15; 10828 10829 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 10830 current_frame_info.spill_cfa_off = -16; 10831 current_frame_info.n_input_regs = 1; 10832 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 10833 10834 /* Mark the end of the (empty) prologue. */ 10835 emit_note (NOTE_INSN_PROLOGUE_END); 10836 10837 /* Figure out whether "this" will be the first parameter (the 10838 typical case) or the second parameter (as happens when the 10839 virtual function returns certain class objects). */ 10840 this_parmno 10841 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk)) 10842 ? 1 : 0); 10843 this_regno = IN_REG (this_parmno); 10844 if (!TARGET_REG_NAMES) 10845 reg_names[this_regno] = ia64_reg_numbers[this_parmno]; 10846 10847 this_rtx = gen_rtx_REG (Pmode, this_regno); 10848 10849 /* Apply the constant offset, if required. */ 10850 delta_rtx = GEN_INT (delta); 10851 if (TARGET_ILP32) 10852 { 10853 rtx tmp = gen_rtx_REG (ptr_mode, this_regno); 10854 REG_POINTER (tmp) = 1; 10855 if (delta && satisfies_constraint_I (delta_rtx)) 10856 { 10857 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx)); 10858 delta = 0; 10859 } 10860 else 10861 emit_insn (gen_ptr_extend (this_rtx, tmp)); 10862 } 10863 if (delta) 10864 { 10865 if (!satisfies_constraint_I (delta_rtx)) 10866 { 10867 rtx tmp = gen_rtx_REG (Pmode, 2); 10868 emit_move_insn (tmp, delta_rtx); 10869 delta_rtx = tmp; 10870 } 10871 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx)); 10872 } 10873 10874 /* Apply the offset from the vtable, if required. */ 10875 if (vcall_offset) 10876 { 10877 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 10878 rtx tmp = gen_rtx_REG (Pmode, 2); 10879 10880 if (TARGET_ILP32) 10881 { 10882 rtx t = gen_rtx_REG (ptr_mode, 2); 10883 REG_POINTER (t) = 1; 10884 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx)); 10885 if (satisfies_constraint_I (vcall_offset_rtx)) 10886 { 10887 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx)); 10888 vcall_offset = 0; 10889 } 10890 else 10891 emit_insn (gen_ptr_extend (tmp, t)); 10892 } 10893 else 10894 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 10895 10896 if (vcall_offset) 10897 { 10898 if (!satisfies_constraint_J (vcall_offset_rtx)) 10899 { 10900 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 10901 emit_move_insn (tmp2, vcall_offset_rtx); 10902 vcall_offset_rtx = tmp2; 10903 } 10904 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 10905 } 10906 10907 if (TARGET_ILP32) 10908 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp))); 10909 else 10910 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 10911 10912 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 10913 } 10914 10915 /* Generate a tail call to the target function. */ 10916 if (! TREE_USED (function)) 10917 { 10918 assemble_external (function); 10919 TREE_USED (function) = 1; 10920 } 10921 funexp = XEXP (DECL_RTL (function), 0); 10922 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 10923 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 10924 insn = get_last_insn (); 10925 SIBLING_CALL_P (insn) = 1; 10926 10927 /* Code generation for calls relies on splitting. */ 10928 reload_completed = 1; 10929 epilogue_completed = 1; 10930 try_split (PATTERN (insn), insn, 0); 10931 10932 emit_barrier (); 10933 10934 /* Run just enough of rest_of_compilation to get the insns emitted. 10935 There's not really enough bulk here to make other passes such as 10936 instruction scheduling worth while. Note that use_thunk calls 10937 assemble_start_function and assemble_end_function. */ 10938 10939 emit_all_insn_group_barriers (NULL); 10940 insn = get_insns (); 10941 shorten_branches (insn); 10942 final_start_function (insn, file, 1); 10943 final (insn, file, 1); 10944 final_end_function (); 10945 10946 reload_completed = 0; 10947 epilogue_completed = 0; 10948} 10949 10950/* Worker function for TARGET_STRUCT_VALUE_RTX. */ 10951 10952static rtx 10953ia64_struct_value_rtx (tree fntype, 10954 int incoming ATTRIBUTE_UNUSED) 10955{ 10956 if (TARGET_ABI_OPEN_VMS || 10957 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))) 10958 return NULL_RTX; 10959 return gen_rtx_REG (Pmode, GR_REG (8)); 10960} 10961 10962static bool 10963ia64_scalar_mode_supported_p (machine_mode mode) 10964{ 10965 switch (mode) 10966 { 10967 case QImode: 10968 case HImode: 10969 case SImode: 10970 case DImode: 10971 case TImode: 10972 return true; 10973 10974 case SFmode: 10975 case DFmode: 10976 case XFmode: 10977 case RFmode: 10978 return true; 10979 10980 case TFmode: 10981 return true; 10982 10983 default: 10984 return false; 10985 } 10986} 10987 10988static bool 10989ia64_vector_mode_supported_p (machine_mode mode) 10990{ 10991 switch (mode) 10992 { 10993 case V8QImode: 10994 case V4HImode: 10995 case V2SImode: 10996 return true; 10997 10998 case V2SFmode: 10999 return true; 11000 11001 default: 11002 return false; 11003 } 11004} 11005 11006/* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P. */ 11007 11008static bool 11009ia64_libgcc_floating_mode_supported_p (machine_mode mode) 11010{ 11011 switch (mode) 11012 { 11013 case SFmode: 11014 case DFmode: 11015 return true; 11016 11017 case XFmode: 11018#ifdef IA64_NO_LIBGCC_XFMODE 11019 return false; 11020#else 11021 return true; 11022#endif 11023 11024 case TFmode: 11025#ifdef IA64_NO_LIBGCC_TFMODE 11026 return false; 11027#else 11028 return true; 11029#endif 11030 11031 default: 11032 return false; 11033 } 11034} 11035 11036/* Implement the FUNCTION_PROFILER macro. */ 11037 11038void 11039ia64_output_function_profiler (FILE *file, int labelno) 11040{ 11041 bool indirect_call; 11042 11043 /* If the function needs a static chain and the static chain 11044 register is r15, we use an indirect call so as to bypass 11045 the PLT stub in case the executable is dynamically linked, 11046 because the stub clobbers r15 as per 5.3.6 of the psABI. 11047 We don't need to do that in non canonical PIC mode. */ 11048 11049 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC) 11050 { 11051 gcc_assert (STATIC_CHAIN_REGNUM == 15); 11052 indirect_call = true; 11053 } 11054 else 11055 indirect_call = false; 11056 11057 if (TARGET_GNU_AS) 11058 fputs ("\t.prologue 4, r40\n", file); 11059 else 11060 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file); 11061 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file); 11062 11063 if (NO_PROFILE_COUNTERS) 11064 fputs ("\tmov out3 = r0\n", file); 11065 else 11066 { 11067 char buf[20]; 11068 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 11069 11070 if (TARGET_AUTO_PIC) 11071 fputs ("\tmovl out3 = @gprel(", file); 11072 else 11073 fputs ("\taddl out3 = @ltoff(", file); 11074 assemble_name (file, buf); 11075 if (TARGET_AUTO_PIC) 11076 fputs (")\n", file); 11077 else 11078 fputs ("), r1\n", file); 11079 } 11080 11081 if (indirect_call) 11082 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file); 11083 fputs ("\t;;\n", file); 11084 11085 fputs ("\t.save rp, r42\n", file); 11086 fputs ("\tmov out2 = b0\n", file); 11087 if (indirect_call) 11088 fputs ("\tld8 r14 = [r14]\n\t;;\n", file); 11089 fputs ("\t.body\n", file); 11090 fputs ("\tmov out1 = r1\n", file); 11091 if (indirect_call) 11092 { 11093 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file); 11094 fputs ("\tmov b6 = r16\n", file); 11095 fputs ("\tld8 r1 = [r14]\n", file); 11096 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file); 11097 } 11098 else 11099 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file); 11100} 11101 11102static GTY(()) rtx mcount_func_rtx; 11103static rtx 11104gen_mcount_func_rtx (void) 11105{ 11106 if (!mcount_func_rtx) 11107 mcount_func_rtx = init_one_libfunc ("_mcount"); 11108 return mcount_func_rtx; 11109} 11110 11111void 11112ia64_profile_hook (int labelno) 11113{ 11114 rtx label, ip; 11115 11116 if (NO_PROFILE_COUNTERS) 11117 label = const0_rtx; 11118 else 11119 { 11120 char buf[30]; 11121 const char *label_name; 11122 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 11123 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); 11124 label = gen_rtx_SYMBOL_REF (Pmode, label_name); 11125 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL; 11126 } 11127 ip = gen_reg_rtx (Pmode); 11128 emit_insn (gen_ip_value (ip)); 11129 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL, 11130 VOIDmode, 3, 11131 gen_rtx_REG (Pmode, BR_REG (0)), Pmode, 11132 ip, Pmode, 11133 label, Pmode); 11134} 11135 11136/* Return the mangling of TYPE if it is an extended fundamental type. */ 11137 11138static const char * 11139ia64_mangle_type (const_tree type) 11140{ 11141 type = TYPE_MAIN_VARIANT (type); 11142 11143 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 11144 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 11145 return NULL; 11146 11147 /* On HP-UX, "long double" is mangled as "e" so __float128 is 11148 mangled as "e". */ 11149 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode) 11150 return "g"; 11151 /* On HP-UX, "e" is not available as a mangling of __float80 so use 11152 an extended mangling. Elsewhere, "e" is available since long 11153 double is 80 bits. */ 11154 if (TYPE_MODE (type) == XFmode) 11155 return TARGET_HPUX ? "u9__float80" : "e"; 11156 if (TYPE_MODE (type) == RFmode) 11157 return "u7__fpreg"; 11158 return NULL; 11159} 11160 11161/* Return the diagnostic message string if conversion from FROMTYPE to 11162 TOTYPE is not allowed, NULL otherwise. */ 11163static const char * 11164ia64_invalid_conversion (const_tree fromtype, const_tree totype) 11165{ 11166 /* Reject nontrivial conversion to or from __fpreg. */ 11167 if (TYPE_MODE (fromtype) == RFmode 11168 && TYPE_MODE (totype) != RFmode 11169 && TYPE_MODE (totype) != VOIDmode) 11170 return N_("invalid conversion from %<__fpreg%>"); 11171 if (TYPE_MODE (totype) == RFmode 11172 && TYPE_MODE (fromtype) != RFmode) 11173 return N_("invalid conversion to %<__fpreg%>"); 11174 return NULL; 11175} 11176 11177/* Return the diagnostic message string if the unary operation OP is 11178 not permitted on TYPE, NULL otherwise. */ 11179static const char * 11180ia64_invalid_unary_op (int op, const_tree type) 11181{ 11182 /* Reject operations on __fpreg other than unary + or &. */ 11183 if (TYPE_MODE (type) == RFmode 11184 && op != CONVERT_EXPR 11185 && op != ADDR_EXPR) 11186 return N_("invalid operation on %<__fpreg%>"); 11187 return NULL; 11188} 11189 11190/* Return the diagnostic message string if the binary operation OP is 11191 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 11192static const char * 11193ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) 11194{ 11195 /* Reject operations on __fpreg. */ 11196 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode) 11197 return N_("invalid operation on %<__fpreg%>"); 11198 return NULL; 11199} 11200 11201/* HP-UX version_id attribute. 11202 For object foo, if the version_id is set to 1234 put out an alias 11203 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything 11204 other than an alias statement because it is an illegal symbol name. */ 11205 11206static tree 11207ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED, 11208 tree name ATTRIBUTE_UNUSED, 11209 tree args, 11210 int flags ATTRIBUTE_UNUSED, 11211 bool *no_add_attrs) 11212{ 11213 tree arg = TREE_VALUE (args); 11214 11215 if (TREE_CODE (arg) != STRING_CST) 11216 { 11217 error("version attribute is not a string"); 11218 *no_add_attrs = true; 11219 return NULL_TREE; 11220 } 11221 return NULL_TREE; 11222} 11223 11224/* Target hook for c_mode_for_suffix. */ 11225 11226static machine_mode 11227ia64_c_mode_for_suffix (char suffix) 11228{ 11229 if (suffix == 'q') 11230 return TFmode; 11231 if (suffix == 'w') 11232 return XFmode; 11233 11234 return VOIDmode; 11235} 11236 11237static GTY(()) rtx ia64_dconst_0_5_rtx; 11238 11239rtx 11240ia64_dconst_0_5 (void) 11241{ 11242 if (! ia64_dconst_0_5_rtx) 11243 { 11244 REAL_VALUE_TYPE rv; 11245 real_from_string (&rv, "0.5"); 11246 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode); 11247 } 11248 return ia64_dconst_0_5_rtx; 11249} 11250 11251static GTY(()) rtx ia64_dconst_0_375_rtx; 11252 11253rtx 11254ia64_dconst_0_375 (void) 11255{ 11256 if (! ia64_dconst_0_375_rtx) 11257 { 11258 REAL_VALUE_TYPE rv; 11259 real_from_string (&rv, "0.375"); 11260 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode); 11261 } 11262 return ia64_dconst_0_375_rtx; 11263} 11264 11265static machine_mode 11266ia64_get_reg_raw_mode (int regno) 11267{ 11268 if (FR_REGNO_P (regno)) 11269 return XFmode; 11270 return default_get_reg_raw_mode(regno); 11271} 11272 11273/* Implement TARGET_MEMBER_TYPE_FORCES_BLK. ??? Might not be needed 11274 anymore. */ 11275 11276bool 11277ia64_member_type_forces_blk (const_tree, machine_mode mode) 11278{ 11279 return TARGET_HPUX && mode == TFmode; 11280} 11281 11282/* Always default to .text section until HP-UX linker is fixed. */ 11283 11284ATTRIBUTE_UNUSED static section * 11285ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED, 11286 enum node_frequency freq ATTRIBUTE_UNUSED, 11287 bool startup ATTRIBUTE_UNUSED, 11288 bool exit ATTRIBUTE_UNUSED) 11289{ 11290 return NULL; 11291} 11292 11293/* Construct (set target (vec_select op0 (parallel perm))) and 11294 return true if that's a valid instruction in the active ISA. */ 11295 11296static bool 11297expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) 11298{ 11299 rtx rperm[MAX_VECT_LEN], x; 11300 unsigned i; 11301 11302 for (i = 0; i < nelt; ++i) 11303 rperm[i] = GEN_INT (perm[i]); 11304 11305 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); 11306 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); 11307 x = gen_rtx_SET (VOIDmode, target, x); 11308 11309 rtx_insn *insn = emit_insn (x); 11310 if (recog_memoized (insn) < 0) 11311 { 11312 remove_insn (insn); 11313 return false; 11314 } 11315 return true; 11316} 11317 11318/* Similar, but generate a vec_concat from op0 and op1 as well. */ 11319 11320static bool 11321expand_vselect_vconcat (rtx target, rtx op0, rtx op1, 11322 const unsigned char *perm, unsigned nelt) 11323{ 11324 machine_mode v2mode; 11325 rtx x; 11326 11327 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0)); 11328 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); 11329 return expand_vselect (target, x, perm, nelt); 11330} 11331 11332/* Try to expand a no-op permutation. */ 11333 11334static bool 11335expand_vec_perm_identity (struct expand_vec_perm_d *d) 11336{ 11337 unsigned i, nelt = d->nelt; 11338 11339 for (i = 0; i < nelt; ++i) 11340 if (d->perm[i] != i) 11341 return false; 11342 11343 if (!d->testing_p) 11344 emit_move_insn (d->target, d->op0); 11345 11346 return true; 11347} 11348 11349/* Try to expand D via a shrp instruction. */ 11350 11351static bool 11352expand_vec_perm_shrp (struct expand_vec_perm_d *d) 11353{ 11354 unsigned i, nelt = d->nelt, shift, mask; 11355 rtx tmp, hi, lo; 11356 11357 /* ??? Don't force V2SFmode into the integer registers. */ 11358 if (d->vmode == V2SFmode) 11359 return false; 11360 11361 mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1); 11362 11363 shift = d->perm[0]; 11364 if (BYTES_BIG_ENDIAN && shift > nelt) 11365 return false; 11366 11367 for (i = 1; i < nelt; ++i) 11368 if (d->perm[i] != ((shift + i) & mask)) 11369 return false; 11370 11371 if (d->testing_p) 11372 return true; 11373 11374 hi = shift < nelt ? d->op1 : d->op0; 11375 lo = shift < nelt ? d->op0 : d->op1; 11376 11377 shift %= nelt; 11378 11379 shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT; 11380 11381 /* We've eliminated the shift 0 case via expand_vec_perm_identity. */ 11382 gcc_assert (IN_RANGE (shift, 1, 63)); 11383 11384 /* Recall that big-endian elements are numbered starting at the top of 11385 the register. Ideally we'd have a shift-left-pair. But since we 11386 don't, convert to a shift the other direction. */ 11387 if (BYTES_BIG_ENDIAN) 11388 shift = 64 - shift; 11389 11390 tmp = gen_reg_rtx (DImode); 11391 hi = gen_lowpart (DImode, hi); 11392 lo = gen_lowpart (DImode, lo); 11393 emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift))); 11394 11395 emit_move_insn (d->target, gen_lowpart (d->vmode, tmp)); 11396 return true; 11397} 11398 11399/* Try to instantiate D in a single instruction. */ 11400 11401static bool 11402expand_vec_perm_1 (struct expand_vec_perm_d *d) 11403{ 11404 unsigned i, nelt = d->nelt; 11405 unsigned char perm2[MAX_VECT_LEN]; 11406 11407 /* Try single-operand selections. */ 11408 if (d->one_operand_p) 11409 { 11410 if (expand_vec_perm_identity (d)) 11411 return true; 11412 if (expand_vselect (d->target, d->op0, d->perm, nelt)) 11413 return true; 11414 } 11415 11416 /* Try two operand selections. */ 11417 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) 11418 return true; 11419 11420 /* Recognize interleave style patterns with reversed operands. */ 11421 if (!d->one_operand_p) 11422 { 11423 for (i = 0; i < nelt; ++i) 11424 { 11425 unsigned e = d->perm[i]; 11426 if (e >= nelt) 11427 e -= nelt; 11428 else 11429 e += nelt; 11430 perm2[i] = e; 11431 } 11432 11433 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) 11434 return true; 11435 } 11436 11437 if (expand_vec_perm_shrp (d)) 11438 return true; 11439 11440 /* ??? Look for deposit-like permutations where most of the result 11441 comes from one vector unchanged and the rest comes from a 11442 sequential hunk of the other vector. */ 11443 11444 return false; 11445} 11446 11447/* Pattern match broadcast permutations. */ 11448 11449static bool 11450expand_vec_perm_broadcast (struct expand_vec_perm_d *d) 11451{ 11452 unsigned i, elt, nelt = d->nelt; 11453 unsigned char perm2[2]; 11454 rtx temp; 11455 bool ok; 11456 11457 if (!d->one_operand_p) 11458 return false; 11459 11460 elt = d->perm[0]; 11461 for (i = 1; i < nelt; ++i) 11462 if (d->perm[i] != elt) 11463 return false; 11464 11465 switch (d->vmode) 11466 { 11467 case V2SImode: 11468 case V2SFmode: 11469 /* Implementable by interleave. */ 11470 perm2[0] = elt; 11471 perm2[1] = elt + 2; 11472 ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2); 11473 gcc_assert (ok); 11474 break; 11475 11476 case V8QImode: 11477 /* Implementable by extract + broadcast. */ 11478 if (BYTES_BIG_ENDIAN) 11479 elt = 7 - elt; 11480 elt *= BITS_PER_UNIT; 11481 temp = gen_reg_rtx (DImode); 11482 emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0), 11483 GEN_INT (8), GEN_INT (elt))); 11484 emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp))); 11485 break; 11486 11487 case V4HImode: 11488 /* Should have been matched directly by vec_select. */ 11489 default: 11490 gcc_unreachable (); 11491 } 11492 11493 return true; 11494} 11495 11496/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a 11497 two vector permutation into a single vector permutation by using 11498 an interleave operation to merge the vectors. */ 11499 11500static bool 11501expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d) 11502{ 11503 struct expand_vec_perm_d dremap, dfinal; 11504 unsigned char remap[2 * MAX_VECT_LEN]; 11505 unsigned contents, i, nelt, nelt2; 11506 unsigned h0, h1, h2, h3; 11507 rtx_insn *seq; 11508 bool ok; 11509 11510 if (d->one_operand_p) 11511 return false; 11512 11513 nelt = d->nelt; 11514 nelt2 = nelt / 2; 11515 11516 /* Examine from whence the elements come. */ 11517 contents = 0; 11518 for (i = 0; i < nelt; ++i) 11519 contents |= 1u << d->perm[i]; 11520 11521 memset (remap, 0xff, sizeof (remap)); 11522 dremap = *d; 11523 11524 h0 = (1u << nelt2) - 1; 11525 h1 = h0 << nelt2; 11526 h2 = h0 << nelt; 11527 h3 = h0 << (nelt + nelt2); 11528 11529 if ((contents & (h0 | h2)) == contents) /* punpck even halves */ 11530 { 11531 for (i = 0; i < nelt; ++i) 11532 { 11533 unsigned which = i / 2 + (i & 1 ? nelt : 0); 11534 remap[which] = i; 11535 dremap.perm[i] = which; 11536 } 11537 } 11538 else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */ 11539 { 11540 for (i = 0; i < nelt; ++i) 11541 { 11542 unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0); 11543 remap[which] = i; 11544 dremap.perm[i] = which; 11545 } 11546 } 11547 else if ((contents & 0x5555) == contents) /* mix even elements */ 11548 { 11549 for (i = 0; i < nelt; ++i) 11550 { 11551 unsigned which = (i & ~1) + (i & 1 ? nelt : 0); 11552 remap[which] = i; 11553 dremap.perm[i] = which; 11554 } 11555 } 11556 else if ((contents & 0xaaaa) == contents) /* mix odd elements */ 11557 { 11558 for (i = 0; i < nelt; ++i) 11559 { 11560 unsigned which = (i | 1) + (i & 1 ? nelt : 0); 11561 remap[which] = i; 11562 dremap.perm[i] = which; 11563 } 11564 } 11565 else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */ 11566 { 11567 unsigned shift = ctz_hwi (contents); 11568 for (i = 0; i < nelt; ++i) 11569 { 11570 unsigned which = (i + shift) & (2 * nelt - 1); 11571 remap[which] = i; 11572 dremap.perm[i] = which; 11573 } 11574 } 11575 else 11576 return false; 11577 11578 /* Use the remapping array set up above to move the elements from their 11579 swizzled locations into their final destinations. */ 11580 dfinal = *d; 11581 for (i = 0; i < nelt; ++i) 11582 { 11583 unsigned e = remap[d->perm[i]]; 11584 gcc_assert (e < nelt); 11585 dfinal.perm[i] = e; 11586 } 11587 if (d->testing_p) 11588 dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1); 11589 else 11590 dfinal.op0 = gen_reg_rtx (dfinal.vmode); 11591 dfinal.op1 = dfinal.op0; 11592 dfinal.one_operand_p = true; 11593 dremap.target = dfinal.op0; 11594 11595 /* Test if the final remap can be done with a single insn. For V4HImode 11596 this *will* succeed. For V8QImode or V2SImode it may not. */ 11597 start_sequence (); 11598 ok = expand_vec_perm_1 (&dfinal); 11599 seq = get_insns (); 11600 end_sequence (); 11601 if (!ok) 11602 return false; 11603 if (d->testing_p) 11604 return true; 11605 11606 ok = expand_vec_perm_1 (&dremap); 11607 gcc_assert (ok); 11608 11609 emit_insn (seq); 11610 return true; 11611} 11612 11613/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode 11614 constant permutation via two mux2 and a merge. */ 11615 11616static bool 11617expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d) 11618{ 11619 unsigned char perm2[4]; 11620 rtx rmask[4]; 11621 unsigned i; 11622 rtx t0, t1, mask, x; 11623 bool ok; 11624 11625 if (d->vmode != V4HImode || d->one_operand_p) 11626 return false; 11627 if (d->testing_p) 11628 return true; 11629 11630 for (i = 0; i < 4; ++i) 11631 { 11632 perm2[i] = d->perm[i] & 3; 11633 rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx); 11634 } 11635 mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask)); 11636 mask = force_reg (V4HImode, mask); 11637 11638 t0 = gen_reg_rtx (V4HImode); 11639 t1 = gen_reg_rtx (V4HImode); 11640 11641 ok = expand_vselect (t0, d->op0, perm2, 4); 11642 gcc_assert (ok); 11643 ok = expand_vselect (t1, d->op1, perm2, 4); 11644 gcc_assert (ok); 11645 11646 x = gen_rtx_AND (V4HImode, mask, t0); 11647 emit_insn (gen_rtx_SET (VOIDmode, t0, x)); 11648 11649 x = gen_rtx_NOT (V4HImode, mask); 11650 x = gen_rtx_AND (V4HImode, x, t1); 11651 emit_insn (gen_rtx_SET (VOIDmode, t1, x)); 11652 11653 x = gen_rtx_IOR (V4HImode, t0, t1); 11654 emit_insn (gen_rtx_SET (VOIDmode, d->target, x)); 11655 11656 return true; 11657} 11658 11659/* The guts of ia64_expand_vec_perm_const, also used by the ok hook. 11660 With all of the interface bits taken care of, perform the expansion 11661 in D and return true on success. */ 11662 11663static bool 11664ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) 11665{ 11666 if (expand_vec_perm_1 (d)) 11667 return true; 11668 if (expand_vec_perm_broadcast (d)) 11669 return true; 11670 if (expand_vec_perm_interleave_2 (d)) 11671 return true; 11672 if (expand_vec_perm_v4hi_5 (d)) 11673 return true; 11674 return false; 11675} 11676 11677bool 11678ia64_expand_vec_perm_const (rtx operands[4]) 11679{ 11680 struct expand_vec_perm_d d; 11681 unsigned char perm[MAX_VECT_LEN]; 11682 int i, nelt, which; 11683 rtx sel; 11684 11685 d.target = operands[0]; 11686 d.op0 = operands[1]; 11687 d.op1 = operands[2]; 11688 sel = operands[3]; 11689 11690 d.vmode = GET_MODE (d.target); 11691 gcc_assert (VECTOR_MODE_P (d.vmode)); 11692 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 11693 d.testing_p = false; 11694 11695 gcc_assert (GET_CODE (sel) == CONST_VECTOR); 11696 gcc_assert (XVECLEN (sel, 0) == nelt); 11697 gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); 11698 11699 for (i = which = 0; i < nelt; ++i) 11700 { 11701 rtx e = XVECEXP (sel, 0, i); 11702 int ei = INTVAL (e) & (2 * nelt - 1); 11703 11704 which |= (ei < nelt ? 1 : 2); 11705 d.perm[i] = ei; 11706 perm[i] = ei; 11707 } 11708 11709 switch (which) 11710 { 11711 default: 11712 gcc_unreachable(); 11713 11714 case 3: 11715 if (!rtx_equal_p (d.op0, d.op1)) 11716 { 11717 d.one_operand_p = false; 11718 break; 11719 } 11720 11721 /* The elements of PERM do not suggest that only the first operand 11722 is used, but both operands are identical. Allow easier matching 11723 of the permutation by folding the permutation into the single 11724 input vector. */ 11725 for (i = 0; i < nelt; ++i) 11726 if (d.perm[i] >= nelt) 11727 d.perm[i] -= nelt; 11728 /* FALLTHRU */ 11729 11730 case 1: 11731 d.op1 = d.op0; 11732 d.one_operand_p = true; 11733 break; 11734 11735 case 2: 11736 for (i = 0; i < nelt; ++i) 11737 d.perm[i] -= nelt; 11738 d.op0 = d.op1; 11739 d.one_operand_p = true; 11740 break; 11741 } 11742 11743 if (ia64_expand_vec_perm_const_1 (&d)) 11744 return true; 11745 11746 /* If the mask says both arguments are needed, but they are the same, 11747 the above tried to expand with one_operand_p true. If that didn't 11748 work, retry with one_operand_p false, as that's what we used in _ok. */ 11749 if (which == 3 && d.one_operand_p) 11750 { 11751 memcpy (d.perm, perm, sizeof (perm)); 11752 d.one_operand_p = false; 11753 return ia64_expand_vec_perm_const_1 (&d); 11754 } 11755 11756 return false; 11757} 11758 11759/* Implement targetm.vectorize.vec_perm_const_ok. */ 11760 11761static bool 11762ia64_vectorize_vec_perm_const_ok (machine_mode vmode, 11763 const unsigned char *sel) 11764{ 11765 struct expand_vec_perm_d d; 11766 unsigned int i, nelt, which; 11767 bool ret; 11768 11769 d.vmode = vmode; 11770 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 11771 d.testing_p = true; 11772 11773 /* Extract the values from the vector CST into the permutation 11774 array in D. */ 11775 memcpy (d.perm, sel, nelt); 11776 for (i = which = 0; i < nelt; ++i) 11777 { 11778 unsigned char e = d.perm[i]; 11779 gcc_assert (e < 2 * nelt); 11780 which |= (e < nelt ? 1 : 2); 11781 } 11782 11783 /* For all elements from second vector, fold the elements to first. */ 11784 if (which == 2) 11785 for (i = 0; i < nelt; ++i) 11786 d.perm[i] -= nelt; 11787 11788 /* Check whether the mask can be applied to the vector type. */ 11789 d.one_operand_p = (which != 3); 11790 11791 /* Otherwise we have to go through the motions and see if we can 11792 figure out how to generate the requested permutation. */ 11793 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); 11794 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); 11795 if (!d.one_operand_p) 11796 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); 11797 11798 start_sequence (); 11799 ret = ia64_expand_vec_perm_const_1 (&d); 11800 end_sequence (); 11801 11802 return ret; 11803} 11804 11805void 11806ia64_expand_vec_setv2sf (rtx operands[3]) 11807{ 11808 struct expand_vec_perm_d d; 11809 unsigned int which; 11810 bool ok; 11811 11812 d.target = operands[0]; 11813 d.op0 = operands[0]; 11814 d.op1 = gen_reg_rtx (V2SFmode); 11815 d.vmode = V2SFmode; 11816 d.nelt = 2; 11817 d.one_operand_p = false; 11818 d.testing_p = false; 11819 11820 which = INTVAL (operands[2]); 11821 gcc_assert (which <= 1); 11822 d.perm[0] = 1 - which; 11823 d.perm[1] = which + 2; 11824 11825 emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode))); 11826 11827 ok = ia64_expand_vec_perm_const_1 (&d); 11828 gcc_assert (ok); 11829} 11830 11831void 11832ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd) 11833{ 11834 struct expand_vec_perm_d d; 11835 machine_mode vmode = GET_MODE (target); 11836 unsigned int i, nelt = GET_MODE_NUNITS (vmode); 11837 bool ok; 11838 11839 d.target = target; 11840 d.op0 = op0; 11841 d.op1 = op1; 11842 d.vmode = vmode; 11843 d.nelt = nelt; 11844 d.one_operand_p = false; 11845 d.testing_p = false; 11846 11847 for (i = 0; i < nelt; ++i) 11848 d.perm[i] = i * 2 + odd; 11849 11850 ok = ia64_expand_vec_perm_const_1 (&d); 11851 gcc_assert (ok); 11852} 11853 11854#include "gt-ia64.h" 11855