1/* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 3 2009, 2010 4 Free Software Foundation, Inc. 5 Contributed by James E. Wilson <wilson@cygnus.com> and 6 David Mosberger <davidm@hpl.hp.com>. 7 8This file is part of GCC. 9 10GCC is free software; you can redistribute it and/or modify 11it under the terms of the GNU General Public License as published by 12the Free Software Foundation; either version 3, or (at your option) 13any later version. 14 15GCC is distributed in the hope that it will be useful, 16but WITHOUT ANY WARRANTY; without even the implied warranty of 17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18GNU General Public License for more details. 19 20You should have received a copy of the GNU General Public License 21along with GCC; see the file COPYING3. If not see 22<http://www.gnu.org/licenses/>. */ 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "tm.h" 28#include "rtl.h" 29#include "tree.h" 30#include "regs.h" 31#include "hard-reg-set.h" 32#include "real.h" 33#include "insn-config.h" 34#include "conditions.h" 35#include "output.h" 36#include "insn-attr.h" 37#include "flags.h" 38#include "recog.h" 39#include "expr.h" 40#include "optabs.h" 41#include "except.h" 42#include "function.h" 43#include "ggc.h" 44#include "basic-block.h" 45#include "libfuncs.h" 46#include "toplev.h" 47#include "sched-int.h" 48#include "timevar.h" 49#include "target.h" 50#include "target-def.h" 51#include "tm_p.h" 52#include "hashtab.h" 53#include "langhooks.h" 54#include "cfglayout.h" 55#include "gimple.h" 56#include "intl.h" 57#include "df.h" 58#include "debug.h" 59#include "params.h" 60#include "dbgcnt.h" 61#include "tm-constrs.h" 62#include "sel-sched.h" 63 64/* This is used for communication between ASM_OUTPUT_LABEL and 65 ASM_OUTPUT_LABELREF. */ 66int ia64_asm_output_label = 0; 67 68/* Register names for ia64_expand_prologue. */ 69static const char * const ia64_reg_numbers[96] = 70{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 71 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 72 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 73 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 74 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 75 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 76 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 77 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 78 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 79 "r104","r105","r106","r107","r108","r109","r110","r111", 80 "r112","r113","r114","r115","r116","r117","r118","r119", 81 "r120","r121","r122","r123","r124","r125","r126","r127"}; 82 83/* ??? These strings could be shared with REGISTER_NAMES. */ 84static const char * const ia64_input_reg_names[8] = 85{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 86 87/* ??? These strings could be shared with REGISTER_NAMES. */ 88static const char * const ia64_local_reg_names[80] = 89{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 90 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 91 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 92 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 93 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 94 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 95 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 96 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 97 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 98 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 99 100/* ??? These strings could be shared with REGISTER_NAMES. */ 101static const char * const ia64_output_reg_names[8] = 102{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 103 104/* Which cpu are we scheduling for. */ 105enum processor_type ia64_tune = PROCESSOR_ITANIUM2; 106 107/* Determines whether we run our final scheduling pass or not. We always 108 avoid the normal second scheduling pass. */ 109static int ia64_flag_schedule_insns2; 110 111/* Determines whether we run variable tracking in machine dependent 112 reorganization. */ 113static int ia64_flag_var_tracking; 114 115/* Variables which are this size or smaller are put in the sdata/sbss 116 sections. */ 117 118unsigned int ia64_section_threshold; 119 120/* The following variable is used by the DFA insn scheduler. The value is 121 TRUE if we do insn bundling instead of insn scheduling. */ 122int bundling_p = 0; 123 124enum ia64_frame_regs 125{ 126 reg_fp, 127 reg_save_b0, 128 reg_save_pr, 129 reg_save_ar_pfs, 130 reg_save_ar_unat, 131 reg_save_ar_lc, 132 reg_save_gp, 133 number_of_ia64_frame_regs 134}; 135 136/* Structure to be filled in by ia64_compute_frame_size with register 137 save masks and offsets for the current function. */ 138 139struct ia64_frame_info 140{ 141 HOST_WIDE_INT total_size; /* size of the stack frame, not including 142 the caller's scratch area. */ 143 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 144 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 145 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 146 HARD_REG_SET mask; /* mask of saved registers. */ 147 unsigned int gr_used_mask; /* mask of registers in use as gr spill 148 registers or long-term scratches. */ 149 int n_spilled; /* number of spilled registers. */ 150 int r[number_of_ia64_frame_regs]; /* Frame related registers. */ 151 int n_input_regs; /* number of input registers used. */ 152 int n_local_regs; /* number of local registers used. */ 153 int n_output_regs; /* number of output registers used. */ 154 int n_rotate_regs; /* number of rotating registers used. */ 155 156 char need_regstk; /* true if a .regstk directive needed. */ 157 char initialized; /* true if the data is finalized. */ 158}; 159 160/* Current frame information calculated by ia64_compute_frame_size. */ 161static struct ia64_frame_info current_frame_info; 162/* The actual registers that are emitted. */ 163static int emitted_frame_related_regs[number_of_ia64_frame_regs]; 164 165static int ia64_first_cycle_multipass_dfa_lookahead (void); 166static void ia64_dependencies_evaluation_hook (rtx, rtx); 167static void ia64_init_dfa_pre_cycle_insn (void); 168static rtx ia64_dfa_pre_cycle_insn (void); 169static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx); 170static bool ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx); 171static int ia64_dfa_new_cycle (FILE *, int, rtx, int, int, int *); 172static void ia64_h_i_d_extended (void); 173static void * ia64_alloc_sched_context (void); 174static void ia64_init_sched_context (void *, bool); 175static void ia64_set_sched_context (void *); 176static void ia64_clear_sched_context (void *); 177static void ia64_free_sched_context (void *); 178static int ia64_mode_to_int (enum machine_mode); 179static void ia64_set_sched_flags (spec_info_t); 180static ds_t ia64_get_insn_spec_ds (rtx); 181static ds_t ia64_get_insn_checked_ds (rtx); 182static bool ia64_skip_rtx_p (const_rtx); 183static int ia64_speculate_insn (rtx, ds_t, rtx *); 184static bool ia64_needs_block_p (int); 185static rtx ia64_gen_spec_check (rtx, rtx, ds_t); 186static int ia64_spec_check_p (rtx); 187static int ia64_spec_check_src_p (rtx); 188static rtx gen_tls_get_addr (void); 189static rtx gen_thread_pointer (void); 190static int find_gr_spill (enum ia64_frame_regs, int); 191static int next_scratch_gr_reg (void); 192static void mark_reg_gr_used_mask (rtx, void *); 193static void ia64_compute_frame_size (HOST_WIDE_INT); 194static void setup_spill_pointers (int, rtx, HOST_WIDE_INT); 195static void finish_spill_pointers (void); 196static rtx spill_restore_mem (rtx, HOST_WIDE_INT); 197static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx); 198static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT); 199static rtx gen_movdi_x (rtx, rtx, rtx); 200static rtx gen_fr_spill_x (rtx, rtx, rtx); 201static rtx gen_fr_restore_x (rtx, rtx, rtx); 202 203static bool ia64_can_eliminate (const int, const int); 204static enum machine_mode hfa_element_mode (const_tree, bool); 205static void ia64_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, 206 tree, int *, int); 207static int ia64_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, 208 tree, bool); 209static bool ia64_function_ok_for_sibcall (tree, tree); 210static bool ia64_return_in_memory (const_tree, const_tree); 211static bool ia64_rtx_costs (rtx, int, int, int *, bool); 212static int ia64_unspec_may_trap_p (const_rtx, unsigned); 213static void fix_range (const char *); 214static bool ia64_handle_option (size_t, const char *, int); 215static struct machine_function * ia64_init_machine_status (void); 216static void emit_insn_group_barriers (FILE *); 217static void emit_all_insn_group_barriers (FILE *); 218static void final_emit_insn_group_barriers (FILE *); 219static void emit_predicate_relation_info (void); 220static void ia64_reorg (void); 221static bool ia64_in_small_data_p (const_tree); 222static void process_epilogue (FILE *, rtx, bool, bool); 223static int process_set (FILE *, rtx, rtx, bool, bool); 224 225static bool ia64_assemble_integer (rtx, unsigned int, int); 226static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT); 227static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT); 228static void ia64_output_function_end_prologue (FILE *); 229 230static int ia64_issue_rate (void); 231static int ia64_adjust_cost_2 (rtx, int, rtx, int, dw_t); 232static void ia64_sched_init (FILE *, int, int); 233static void ia64_sched_init_global (FILE *, int, int); 234static void ia64_sched_finish_global (FILE *, int); 235static void ia64_sched_finish (FILE *, int); 236static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int); 237static int ia64_sched_reorder (FILE *, int, rtx *, int *, int); 238static int ia64_sched_reorder2 (FILE *, int, rtx *, int *, int); 239static int ia64_variable_issue (FILE *, int, rtx, int); 240 241static struct bundle_state *get_free_bundle_state (void); 242static void free_bundle_state (struct bundle_state *); 243static void initiate_bundle_states (void); 244static void finish_bundle_states (void); 245static unsigned bundle_state_hash (const void *); 246static int bundle_state_eq_p (const void *, const void *); 247static int insert_bundle_state (struct bundle_state *); 248static void initiate_bundle_state_table (void); 249static void finish_bundle_state_table (void); 250static int try_issue_nops (struct bundle_state *, int); 251static int try_issue_insn (struct bundle_state *, rtx); 252static void issue_nops_and_insn (struct bundle_state *, int, rtx, int, int); 253static int get_max_pos (state_t); 254static int get_template (state_t, int); 255 256static rtx get_next_important_insn (rtx, rtx); 257static bool important_for_bundling_p (rtx); 258static void bundling (FILE *, int, rtx, rtx); 259 260static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 261 HOST_WIDE_INT, tree); 262static void ia64_file_start (void); 263static void ia64_globalize_decl_name (FILE *, tree); 264 265static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 266static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED; 267static section *ia64_select_rtx_section (enum machine_mode, rtx, 268 unsigned HOST_WIDE_INT); 269static void ia64_output_dwarf_dtprel (FILE *, int, rtx) 270 ATTRIBUTE_UNUSED; 271static unsigned int ia64_section_type_flags (tree, const char *, int); 272static void ia64_init_libfuncs (void) 273 ATTRIBUTE_UNUSED; 274static void ia64_hpux_init_libfuncs (void) 275 ATTRIBUTE_UNUSED; 276static void ia64_sysv4_init_libfuncs (void) 277 ATTRIBUTE_UNUSED; 278static void ia64_vms_init_libfuncs (void) 279 ATTRIBUTE_UNUSED; 280static void ia64_soft_fp_init_libfuncs (void) 281 ATTRIBUTE_UNUSED; 282static bool ia64_vms_valid_pointer_mode (enum machine_mode mode) 283 ATTRIBUTE_UNUSED; 284static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *) 285 ATTRIBUTE_UNUSED; 286 287static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *); 288static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *); 289static void ia64_encode_section_info (tree, rtx, int); 290static rtx ia64_struct_value_rtx (tree, int); 291static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 292static bool ia64_scalar_mode_supported_p (enum machine_mode mode); 293static bool ia64_vector_mode_supported_p (enum machine_mode mode); 294static bool ia64_cannot_force_const_mem (rtx); 295static const char *ia64_mangle_type (const_tree); 296static const char *ia64_invalid_conversion (const_tree, const_tree); 297static const char *ia64_invalid_unary_op (int, const_tree); 298static const char *ia64_invalid_binary_op (int, const_tree, const_tree); 299static enum machine_mode ia64_c_mode_for_suffix (char); 300static enum machine_mode ia64_promote_function_mode (const_tree, 301 enum machine_mode, 302 int *, 303 const_tree, 304 int); 305static void ia64_trampoline_init (rtx, tree, rtx); 306static void ia64_override_options_after_change (void); 307 308/* Table of valid machine attributes. */ 309static const struct attribute_spec ia64_attribute_table[] = 310{ 311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 312 { "syscall_linkage", 0, 0, false, true, true, NULL }, 313 { "model", 1, 1, true, false, false, ia64_handle_model_attribute }, 314#if TARGET_ABI_OPEN_VMS 315 { "common_object", 1, 1, true, false, false, ia64_vms_common_object_attribute}, 316#endif 317 { "version_id", 1, 1, true, false, false, 318 ia64_handle_version_id_attribute }, 319 { NULL, 0, 0, false, false, false, NULL } 320}; 321 322/* Initialize the GCC target structure. */ 323#undef TARGET_ATTRIBUTE_TABLE 324#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 325 326#undef TARGET_INIT_BUILTINS 327#define TARGET_INIT_BUILTINS ia64_init_builtins 328 329#undef TARGET_EXPAND_BUILTIN 330#define TARGET_EXPAND_BUILTIN ia64_expand_builtin 331 332#undef TARGET_ASM_BYTE_OP 333#define TARGET_ASM_BYTE_OP "\tdata1\t" 334#undef TARGET_ASM_ALIGNED_HI_OP 335#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 336#undef TARGET_ASM_ALIGNED_SI_OP 337#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 338#undef TARGET_ASM_ALIGNED_DI_OP 339#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 340#undef TARGET_ASM_UNALIGNED_HI_OP 341#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 342#undef TARGET_ASM_UNALIGNED_SI_OP 343#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 344#undef TARGET_ASM_UNALIGNED_DI_OP 345#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 346#undef TARGET_ASM_INTEGER 347#define TARGET_ASM_INTEGER ia64_assemble_integer 348 349#undef TARGET_ASM_FUNCTION_PROLOGUE 350#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 351#undef TARGET_ASM_FUNCTION_END_PROLOGUE 352#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 353#undef TARGET_ASM_FUNCTION_EPILOGUE 354#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 355 356#undef TARGET_IN_SMALL_DATA_P 357#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 358 359#undef TARGET_SCHED_ADJUST_COST_2 360#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2 361#undef TARGET_SCHED_ISSUE_RATE 362#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 363#undef TARGET_SCHED_VARIABLE_ISSUE 364#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 365#undef TARGET_SCHED_INIT 366#define TARGET_SCHED_INIT ia64_sched_init 367#undef TARGET_SCHED_FINISH 368#define TARGET_SCHED_FINISH ia64_sched_finish 369#undef TARGET_SCHED_INIT_GLOBAL 370#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global 371#undef TARGET_SCHED_FINISH_GLOBAL 372#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global 373#undef TARGET_SCHED_REORDER 374#define TARGET_SCHED_REORDER ia64_sched_reorder 375#undef TARGET_SCHED_REORDER2 376#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 377 378#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 379#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook 380 381#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 382#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead 383 384#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 385#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn 386#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN 387#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn 388 389#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 390#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\ 391 ia64_first_cycle_multipass_dfa_lookahead_guard 392 393#undef TARGET_SCHED_DFA_NEW_CYCLE 394#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle 395 396#undef TARGET_SCHED_H_I_D_EXTENDED 397#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended 398 399#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT 400#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context 401 402#undef TARGET_SCHED_INIT_SCHED_CONTEXT 403#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context 404 405#undef TARGET_SCHED_SET_SCHED_CONTEXT 406#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context 407 408#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT 409#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context 410 411#undef TARGET_SCHED_FREE_SCHED_CONTEXT 412#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context 413 414#undef TARGET_SCHED_SET_SCHED_FLAGS 415#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags 416 417#undef TARGET_SCHED_GET_INSN_SPEC_DS 418#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds 419 420#undef TARGET_SCHED_GET_INSN_CHECKED_DS 421#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds 422 423#undef TARGET_SCHED_SPECULATE_INSN 424#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn 425 426#undef TARGET_SCHED_NEEDS_BLOCK_P 427#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p 428 429#undef TARGET_SCHED_GEN_SPEC_CHECK 430#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check 431 432#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC 433#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD_SPEC\ 434 ia64_first_cycle_multipass_dfa_lookahead_guard_spec 435 436#undef TARGET_SCHED_SKIP_RTX_P 437#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p 438 439#undef TARGET_FUNCTION_OK_FOR_SIBCALL 440#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall 441#undef TARGET_ARG_PARTIAL_BYTES 442#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes 443 444#undef TARGET_ASM_OUTPUT_MI_THUNK 445#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 446#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 447#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 448 449#undef TARGET_ASM_FILE_START 450#define TARGET_ASM_FILE_START ia64_file_start 451 452#undef TARGET_ASM_GLOBALIZE_DECL_NAME 453#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name 454 455#undef TARGET_RTX_COSTS 456#define TARGET_RTX_COSTS ia64_rtx_costs 457#undef TARGET_ADDRESS_COST 458#define TARGET_ADDRESS_COST hook_int_rtx_bool_0 459 460#undef TARGET_UNSPEC_MAY_TRAP_P 461#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p 462 463#undef TARGET_MACHINE_DEPENDENT_REORG 464#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg 465 466#undef TARGET_ENCODE_SECTION_INFO 467#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 468 469#undef TARGET_SECTION_TYPE_FLAGS 470#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags 471 472#ifdef HAVE_AS_TLS 473#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 474#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel 475#endif 476 477#undef TARGET_PROMOTE_FUNCTION_MODE 478#define TARGET_PROMOTE_FUNCTION_MODE ia64_promote_function_mode 479 480/* ??? Investigate. */ 481#if 0 482#undef TARGET_PROMOTE_PROTOTYPES 483#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true 484#endif 485 486#undef TARGET_STRUCT_VALUE_RTX 487#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx 488#undef TARGET_RETURN_IN_MEMORY 489#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory 490#undef TARGET_SETUP_INCOMING_VARARGS 491#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs 492#undef TARGET_STRICT_ARGUMENT_NAMING 493#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 494#undef TARGET_MUST_PASS_IN_STACK 495#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 496 497#undef TARGET_GIMPLIFY_VA_ARG_EXPR 498#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg 499 500#undef TARGET_UNWIND_EMIT 501#define TARGET_UNWIND_EMIT process_for_unwind_directive 502 503#undef TARGET_SCALAR_MODE_SUPPORTED_P 504#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p 505#undef TARGET_VECTOR_MODE_SUPPORTED_P 506#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p 507 508/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur 509 in an order different from the specified program order. */ 510#undef TARGET_RELAXED_ORDERING 511#define TARGET_RELAXED_ORDERING true 512 513#undef TARGET_DEFAULT_TARGET_FLAGS 514#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT) 515#undef TARGET_HANDLE_OPTION 516#define TARGET_HANDLE_OPTION ia64_handle_option 517 518#undef TARGET_CANNOT_FORCE_CONST_MEM 519#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem 520 521#undef TARGET_MANGLE_TYPE 522#define TARGET_MANGLE_TYPE ia64_mangle_type 523 524#undef TARGET_INVALID_CONVERSION 525#define TARGET_INVALID_CONVERSION ia64_invalid_conversion 526#undef TARGET_INVALID_UNARY_OP 527#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op 528#undef TARGET_INVALID_BINARY_OP 529#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op 530 531#undef TARGET_C_MODE_FOR_SUFFIX 532#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix 533 534#undef TARGET_CAN_ELIMINATE 535#define TARGET_CAN_ELIMINATE ia64_can_eliminate 536 537#undef TARGET_TRAMPOLINE_INIT 538#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init 539 540#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 541#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change 542 543struct gcc_target targetm = TARGET_INITIALIZER; 544 545typedef enum 546 { 547 ADDR_AREA_NORMAL, /* normal address area */ 548 ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */ 549 } 550ia64_addr_area; 551 552static GTY(()) tree small_ident1; 553static GTY(()) tree small_ident2; 554 555static void 556init_idents (void) 557{ 558 if (small_ident1 == 0) 559 { 560 small_ident1 = get_identifier ("small"); 561 small_ident2 = get_identifier ("__small__"); 562 } 563} 564 565/* Retrieve the address area that has been chosen for the given decl. */ 566 567static ia64_addr_area 568ia64_get_addr_area (tree decl) 569{ 570 tree model_attr; 571 572 model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl)); 573 if (model_attr) 574 { 575 tree id; 576 577 init_idents (); 578 id = TREE_VALUE (TREE_VALUE (model_attr)); 579 if (id == small_ident1 || id == small_ident2) 580 return ADDR_AREA_SMALL; 581 } 582 return ADDR_AREA_NORMAL; 583} 584 585static tree 586ia64_handle_model_attribute (tree *node, tree name, tree args, 587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 588{ 589 ia64_addr_area addr_area = ADDR_AREA_NORMAL; 590 ia64_addr_area area; 591 tree arg, decl = *node; 592 593 init_idents (); 594 arg = TREE_VALUE (args); 595 if (arg == small_ident1 || arg == small_ident2) 596 { 597 addr_area = ADDR_AREA_SMALL; 598 } 599 else 600 { 601 warning (OPT_Wattributes, "invalid argument of %qE attribute", 602 name); 603 *no_add_attrs = true; 604 } 605 606 switch (TREE_CODE (decl)) 607 { 608 case VAR_DECL: 609 if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl)) 610 == FUNCTION_DECL) 611 && !TREE_STATIC (decl)) 612 { 613 error_at (DECL_SOURCE_LOCATION (decl), 614 "an address area attribute cannot be specified for " 615 "local variables"); 616 *no_add_attrs = true; 617 } 618 area = ia64_get_addr_area (decl); 619 if (area != ADDR_AREA_NORMAL && addr_area != area) 620 { 621 error ("address area of %q+D conflicts with previous " 622 "declaration", decl); 623 *no_add_attrs = true; 624 } 625 break; 626 627 case FUNCTION_DECL: 628 error_at (DECL_SOURCE_LOCATION (decl), 629 "address area attribute cannot be specified for " 630 "functions"); 631 *no_add_attrs = true; 632 break; 633 634 default: 635 warning (OPT_Wattributes, "%qE attribute ignored", 636 name); 637 *no_add_attrs = true; 638 break; 639 } 640 641 return NULL_TREE; 642} 643 644/* The section must have global and overlaid attributes. */ 645#define SECTION_VMS_OVERLAY SECTION_MACH_DEP 646 647/* Part of the low level implementation of DEC Ada pragma Common_Object which 648 enables the shared use of variables stored in overlaid linker areas 649 corresponding to the use of Fortran COMMON. */ 650 651static tree 652ia64_vms_common_object_attribute (tree *node, tree name, tree args, 653 int flags ATTRIBUTE_UNUSED, 654 bool *no_add_attrs) 655{ 656 tree decl = *node; 657 tree id, val; 658 if (! DECL_P (decl)) 659 abort (); 660 661 DECL_COMMON (decl) = 1; 662 id = TREE_VALUE (args); 663 if (TREE_CODE (id) == IDENTIFIER_NODE) 664 val = build_string (IDENTIFIER_LENGTH (id), IDENTIFIER_POINTER (id)); 665 else if (TREE_CODE (id) == STRING_CST) 666 val = id; 667 else 668 { 669 warning (OPT_Wattributes, 670 "%qE attribute requires a string constant argument", name); 671 *no_add_attrs = true; 672 return NULL_TREE; 673 } 674 DECL_SECTION_NAME (decl) = val; 675 return NULL_TREE; 676} 677 678/* Part of the low level implementation of DEC Ada pragma Common_Object. */ 679 680void 681ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name, 682 unsigned HOST_WIDE_INT size, 683 unsigned int align) 684{ 685 tree attr = DECL_ATTRIBUTES (decl); 686 687 /* As common_object attribute set DECL_SECTION_NAME check it before 688 looking up the attribute. */ 689 if (DECL_SECTION_NAME (decl) && attr) 690 attr = lookup_attribute ("common_object", attr); 691 else 692 attr = NULL_TREE; 693 694 if (!attr) 695 { 696 /* Code from elfos.h. */ 697 fprintf (file, "%s", COMMON_ASM_OP); 698 assemble_name (file, name); 699 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 700 size, align / BITS_PER_UNIT); 701 } 702 else 703 { 704 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 705 ASM_OUTPUT_LABEL (file, name); 706 ASM_OUTPUT_SKIP (file, size ? size : 1); 707 } 708} 709 710/* Definition of TARGET_ASM_NAMED_SECTION for VMS. */ 711 712void 713ia64_vms_elf_asm_named_section (const char *name, unsigned int flags, 714 tree decl) 715{ 716 if (!(flags & SECTION_VMS_OVERLAY)) 717 { 718 default_elf_asm_named_section (name, flags, decl); 719 return; 720 } 721 if (flags != (SECTION_VMS_OVERLAY | SECTION_WRITE)) 722 abort (); 723 724 if (flags & SECTION_DECLARED) 725 { 726 fprintf (asm_out_file, "\t.section\t%s\n", name); 727 return; 728 } 729 730 fprintf (asm_out_file, "\t.section\t%s,\"awgO\"\n", name); 731} 732 733static void 734ia64_encode_addr_area (tree decl, rtx symbol) 735{ 736 int flags; 737 738 flags = SYMBOL_REF_FLAGS (symbol); 739 switch (ia64_get_addr_area (decl)) 740 { 741 case ADDR_AREA_NORMAL: break; 742 case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break; 743 default: gcc_unreachable (); 744 } 745 SYMBOL_REF_FLAGS (symbol) = flags; 746} 747 748static void 749ia64_encode_section_info (tree decl, rtx rtl, int first) 750{ 751 default_encode_section_info (decl, rtl, first); 752 753 /* Careful not to prod global register variables. */ 754 if (TREE_CODE (decl) == VAR_DECL 755 && GET_CODE (DECL_RTL (decl)) == MEM 756 && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF 757 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))) 758 ia64_encode_addr_area (decl, XEXP (rtl, 0)); 759} 760 761/* Return 1 if the operands of a move are ok. */ 762 763int 764ia64_move_ok (rtx dst, rtx src) 765{ 766 /* If we're under init_recog_no_volatile, we'll not be able to use 767 memory_operand. So check the code directly and don't worry about 768 the validity of the underlying address, which should have been 769 checked elsewhere anyway. */ 770 if (GET_CODE (dst) != MEM) 771 return 1; 772 if (GET_CODE (src) == MEM) 773 return 0; 774 if (register_operand (src, VOIDmode)) 775 return 1; 776 777 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 778 if (INTEGRAL_MODE_P (GET_MODE (dst))) 779 return src == const0_rtx; 780 else 781 return satisfies_constraint_G (src); 782} 783 784/* Return 1 if the operands are ok for a floating point load pair. */ 785 786int 787ia64_load_pair_ok (rtx dst, rtx src) 788{ 789 if (GET_CODE (dst) != REG || !FP_REGNO_P (REGNO (dst))) 790 return 0; 791 if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src)) 792 return 0; 793 switch (GET_CODE (XEXP (src, 0))) 794 { 795 case REG: 796 case POST_INC: 797 break; 798 case POST_DEC: 799 return 0; 800 case POST_MODIFY: 801 { 802 rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1); 803 804 if (GET_CODE (adjust) != CONST_INT 805 || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src))) 806 return 0; 807 } 808 break; 809 default: 810 abort (); 811 } 812 return 1; 813} 814 815int 816addp4_optimize_ok (rtx op1, rtx op2) 817{ 818 return (basereg_operand (op1, GET_MODE(op1)) != 819 basereg_operand (op2, GET_MODE(op2))); 820} 821 822/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction. 823 Return the length of the field, or <= 0 on failure. */ 824 825int 826ia64_depz_field_mask (rtx rop, rtx rshift) 827{ 828 unsigned HOST_WIDE_INT op = INTVAL (rop); 829 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 830 831 /* Get rid of the zero bits we're shifting in. */ 832 op >>= shift; 833 834 /* We must now have a solid block of 1's at bit 0. */ 835 return exact_log2 (op + 1); 836} 837 838/* Return the TLS model to use for ADDR. */ 839 840static enum tls_model 841tls_symbolic_operand_type (rtx addr) 842{ 843 enum tls_model tls_kind = TLS_MODEL_NONE; 844 845 if (GET_CODE (addr) == CONST) 846 { 847 if (GET_CODE (XEXP (addr, 0)) == PLUS 848 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF) 849 tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0)); 850 } 851 else if (GET_CODE (addr) == SYMBOL_REF) 852 tls_kind = SYMBOL_REF_TLS_MODEL (addr); 853 854 return tls_kind; 855} 856 857/* Return true if X is a constant that is valid for some immediate 858 field in an instruction. */ 859 860bool 861ia64_legitimate_constant_p (rtx x) 862{ 863 switch (GET_CODE (x)) 864 { 865 case CONST_INT: 866 case LABEL_REF: 867 return true; 868 869 case CONST_DOUBLE: 870 if (GET_MODE (x) == VOIDmode || GET_MODE (x) == SFmode 871 || GET_MODE (x) == DFmode) 872 return true; 873 return satisfies_constraint_G (x); 874 875 case CONST: 876 case SYMBOL_REF: 877 /* ??? Short term workaround for PR 28490. We must make the code here 878 match the code in ia64_expand_move and move_operand, even though they 879 are both technically wrong. */ 880 if (tls_symbolic_operand_type (x) == 0) 881 { 882 HOST_WIDE_INT addend = 0; 883 rtx op = x; 884 885 if (GET_CODE (op) == CONST 886 && GET_CODE (XEXP (op, 0)) == PLUS 887 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT) 888 { 889 addend = INTVAL (XEXP (XEXP (op, 0), 1)); 890 op = XEXP (XEXP (op, 0), 0); 891 } 892 893 if (any_offset_symbol_operand (op, GET_MODE (op)) 894 || function_operand (op, GET_MODE (op))) 895 return true; 896 if (aligned_offset_symbol_operand (op, GET_MODE (op))) 897 return (addend & 0x3fff) == 0; 898 return false; 899 } 900 return false; 901 902 case CONST_VECTOR: 903 { 904 enum machine_mode mode = GET_MODE (x); 905 906 if (mode == V2SFmode) 907 return satisfies_constraint_Y (x); 908 909 return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 910 && GET_MODE_SIZE (mode) <= 8); 911 } 912 913 default: 914 return false; 915 } 916} 917 918/* Don't allow TLS addresses to get spilled to memory. */ 919 920static bool 921ia64_cannot_force_const_mem (rtx x) 922{ 923 if (GET_MODE (x) == RFmode) 924 return true; 925 return tls_symbolic_operand_type (x) != 0; 926} 927 928/* Expand a symbolic constant load. */ 929 930bool 931ia64_expand_load_address (rtx dest, rtx src) 932{ 933 gcc_assert (GET_CODE (dest) == REG); 934 935 /* ILP32 mode still loads 64-bits of data from the GOT. This avoids 936 having to pointer-extend the value afterward. Other forms of address 937 computation below are also more natural to compute as 64-bit quantities. 938 If we've been given an SImode destination register, change it. */ 939 if (GET_MODE (dest) != Pmode) 940 dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest), 941 byte_lowpart_offset (Pmode, GET_MODE (dest))); 942 943 if (TARGET_NO_PIC) 944 return false; 945 if (small_addr_symbolic_operand (src, VOIDmode)) 946 return false; 947 948 if (TARGET_AUTO_PIC) 949 emit_insn (gen_load_gprel64 (dest, src)); 950 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src)) 951 emit_insn (gen_load_fptr (dest, src)); 952 else if (sdata_symbolic_operand (src, VOIDmode)) 953 emit_insn (gen_load_gprel (dest, src)); 954 else 955 { 956 HOST_WIDE_INT addend = 0; 957 rtx tmp; 958 959 /* We did split constant offsets in ia64_expand_move, and we did try 960 to keep them split in move_operand, but we also allowed reload to 961 rematerialize arbitrary constants rather than spill the value to 962 the stack and reload it. So we have to be prepared here to split 963 them apart again. */ 964 if (GET_CODE (src) == CONST) 965 { 966 HOST_WIDE_INT hi, lo; 967 968 hi = INTVAL (XEXP (XEXP (src, 0), 1)); 969 lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000; 970 hi = hi - lo; 971 972 if (lo != 0) 973 { 974 addend = lo; 975 src = plus_constant (XEXP (XEXP (src, 0), 0), hi); 976 } 977 } 978 979 tmp = gen_rtx_HIGH (Pmode, src); 980 tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx); 981 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 982 983 tmp = gen_rtx_LO_SUM (Pmode, dest, src); 984 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 985 986 if (addend) 987 { 988 tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend)); 989 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 990 } 991 } 992 993 return true; 994} 995 996static GTY(()) rtx gen_tls_tga; 997static rtx 998gen_tls_get_addr (void) 999{ 1000 if (!gen_tls_tga) 1001 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1002 return gen_tls_tga; 1003} 1004 1005static GTY(()) rtx thread_pointer_rtx; 1006static rtx 1007gen_thread_pointer (void) 1008{ 1009 if (!thread_pointer_rtx) 1010 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1011 return thread_pointer_rtx; 1012} 1013 1014static rtx 1015ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1, 1016 rtx orig_op1, HOST_WIDE_INT addend) 1017{ 1018 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; 1019 rtx orig_op0 = op0; 1020 HOST_WIDE_INT addend_lo, addend_hi; 1021 1022 switch (tls_kind) 1023 { 1024 case TLS_MODEL_GLOBAL_DYNAMIC: 1025 start_sequence (); 1026 1027 tga_op1 = gen_reg_rtx (Pmode); 1028 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1029 1030 tga_op2 = gen_reg_rtx (Pmode); 1031 emit_insn (gen_load_dtprel (tga_op2, op1)); 1032 1033 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1034 LCT_CONST, Pmode, 2, tga_op1, 1035 Pmode, tga_op2, Pmode); 1036 1037 insns = get_insns (); 1038 end_sequence (); 1039 1040 if (GET_MODE (op0) != Pmode) 1041 op0 = tga_ret; 1042 emit_libcall_block (insns, op0, tga_ret, op1); 1043 break; 1044 1045 case TLS_MODEL_LOCAL_DYNAMIC: 1046 /* ??? This isn't the completely proper way to do local-dynamic 1047 If the call to __tls_get_addr is used only by a single symbol, 1048 then we should (somehow) move the dtprel to the second arg 1049 to avoid the extra add. */ 1050 start_sequence (); 1051 1052 tga_op1 = gen_reg_rtx (Pmode); 1053 emit_insn (gen_load_dtpmod (tga_op1, op1)); 1054 1055 tga_op2 = const0_rtx; 1056 1057 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1058 LCT_CONST, Pmode, 2, tga_op1, 1059 Pmode, tga_op2, Pmode); 1060 1061 insns = get_insns (); 1062 end_sequence (); 1063 1064 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1065 UNSPEC_LD_BASE); 1066 tmp = gen_reg_rtx (Pmode); 1067 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1068 1069 if (!register_operand (op0, Pmode)) 1070 op0 = gen_reg_rtx (Pmode); 1071 if (TARGET_TLS64) 1072 { 1073 emit_insn (gen_load_dtprel (op0, op1)); 1074 emit_insn (gen_adddi3 (op0, tmp, op0)); 1075 } 1076 else 1077 emit_insn (gen_add_dtprel (op0, op1, tmp)); 1078 break; 1079 1080 case TLS_MODEL_INITIAL_EXEC: 1081 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1082 addend_hi = addend - addend_lo; 1083 1084 op1 = plus_constant (op1, addend_hi); 1085 addend = addend_lo; 1086 1087 tmp = gen_reg_rtx (Pmode); 1088 emit_insn (gen_load_tprel (tmp, op1)); 1089 1090 if (!register_operand (op0, Pmode)) 1091 op0 = gen_reg_rtx (Pmode); 1092 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); 1093 break; 1094 1095 case TLS_MODEL_LOCAL_EXEC: 1096 if (!register_operand (op0, Pmode)) 1097 op0 = gen_reg_rtx (Pmode); 1098 1099 op1 = orig_op1; 1100 addend = 0; 1101 if (TARGET_TLS64) 1102 { 1103 emit_insn (gen_load_tprel (op0, op1)); 1104 emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ())); 1105 } 1106 else 1107 emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ())); 1108 break; 1109 1110 default: 1111 gcc_unreachable (); 1112 } 1113 1114 if (addend) 1115 op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend), 1116 orig_op0, 1, OPTAB_DIRECT); 1117 if (orig_op0 == op0) 1118 return NULL_RTX; 1119 if (GET_MODE (orig_op0) == Pmode) 1120 return op0; 1121 return gen_lowpart (GET_MODE (orig_op0), op0); 1122} 1123 1124rtx 1125ia64_expand_move (rtx op0, rtx op1) 1126{ 1127 enum machine_mode mode = GET_MODE (op0); 1128 1129 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1130 op1 = force_reg (mode, op1); 1131 1132 if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode)) 1133 { 1134 HOST_WIDE_INT addend = 0; 1135 enum tls_model tls_kind; 1136 rtx sym = op1; 1137 1138 if (GET_CODE (op1) == CONST 1139 && GET_CODE (XEXP (op1, 0)) == PLUS 1140 && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT) 1141 { 1142 addend = INTVAL (XEXP (XEXP (op1, 0), 1)); 1143 sym = XEXP (XEXP (op1, 0), 0); 1144 } 1145 1146 tls_kind = tls_symbolic_operand_type (sym); 1147 if (tls_kind) 1148 return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend); 1149 1150 if (any_offset_symbol_operand (sym, mode)) 1151 addend = 0; 1152 else if (aligned_offset_symbol_operand (sym, mode)) 1153 { 1154 HOST_WIDE_INT addend_lo, addend_hi; 1155 1156 addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000; 1157 addend_hi = addend - addend_lo; 1158 1159 if (addend_lo != 0) 1160 { 1161 op1 = plus_constant (sym, addend_hi); 1162 addend = addend_lo; 1163 } 1164 else 1165 addend = 0; 1166 } 1167 else 1168 op1 = sym; 1169 1170 if (reload_completed) 1171 { 1172 /* We really should have taken care of this offset earlier. */ 1173 gcc_assert (addend == 0); 1174 if (ia64_expand_load_address (op0, op1)) 1175 return NULL_RTX; 1176 } 1177 1178 if (addend) 1179 { 1180 rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode); 1181 1182 emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1)); 1183 1184 op1 = expand_simple_binop (mode, PLUS, subtarget, 1185 GEN_INT (addend), op0, 1, OPTAB_DIRECT); 1186 if (op0 == op1) 1187 return NULL_RTX; 1188 } 1189 } 1190 1191 return op1; 1192} 1193 1194/* Split a move from OP1 to OP0 conditional on COND. */ 1195 1196void 1197ia64_emit_cond_move (rtx op0, rtx op1, rtx cond) 1198{ 1199 rtx insn, first = get_last_insn (); 1200 1201 emit_move_insn (op0, op1); 1202 1203 for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn)) 1204 if (INSN_P (insn)) 1205 PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), 1206 PATTERN (insn)); 1207} 1208 1209/* Split a post-reload TImode or TFmode reference into two DImode 1210 components. This is made extra difficult by the fact that we do 1211 not get any scratch registers to work with, because reload cannot 1212 be prevented from giving us a scratch that overlaps the register 1213 pair involved. So instead, when addressing memory, we tweak the 1214 pointer register up and back down with POST_INCs. Or up and not 1215 back down when we can get away with it. 1216 1217 REVERSED is true when the loads must be done in reversed order 1218 (high word first) for correctness. DEAD is true when the pointer 1219 dies with the second insn we generate and therefore the second 1220 address must not carry a postmodify. 1221 1222 May return an insn which is to be emitted after the moves. */ 1223 1224static rtx 1225ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead) 1226{ 1227 rtx fixup = 0; 1228 1229 switch (GET_CODE (in)) 1230 { 1231 case REG: 1232 out[reversed] = gen_rtx_REG (DImode, REGNO (in)); 1233 out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1); 1234 break; 1235 1236 case CONST_INT: 1237 case CONST_DOUBLE: 1238 /* Cannot occur reversed. */ 1239 gcc_assert (!reversed); 1240 1241 if (GET_MODE (in) != TFmode) 1242 split_double (in, &out[0], &out[1]); 1243 else 1244 /* split_double does not understand how to split a TFmode 1245 quantity into a pair of DImode constants. */ 1246 { 1247 REAL_VALUE_TYPE r; 1248 unsigned HOST_WIDE_INT p[2]; 1249 long l[4]; /* TFmode is 128 bits */ 1250 1251 REAL_VALUE_FROM_CONST_DOUBLE (r, in); 1252 real_to_target (l, &r, TFmode); 1253 1254 if (FLOAT_WORDS_BIG_ENDIAN) 1255 { 1256 p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1]; 1257 p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3]; 1258 } 1259 else 1260 { 1261 p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0]; 1262 p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2]; 1263 } 1264 out[0] = GEN_INT (p[0]); 1265 out[1] = GEN_INT (p[1]); 1266 } 1267 break; 1268 1269 case MEM: 1270 { 1271 rtx base = XEXP (in, 0); 1272 rtx offset; 1273 1274 switch (GET_CODE (base)) 1275 { 1276 case REG: 1277 if (!reversed) 1278 { 1279 out[0] = adjust_automodify_address 1280 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1281 out[1] = adjust_automodify_address 1282 (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8); 1283 } 1284 else 1285 { 1286 /* Reversal requires a pre-increment, which can only 1287 be done as a separate insn. */ 1288 emit_insn (gen_adddi3 (base, base, GEN_INT (8))); 1289 out[0] = adjust_automodify_address 1290 (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8); 1291 out[1] = adjust_address (in, DImode, 0); 1292 } 1293 break; 1294 1295 case POST_INC: 1296 gcc_assert (!reversed && !dead); 1297 1298 /* Just do the increment in two steps. */ 1299 out[0] = adjust_automodify_address (in, DImode, 0, 0); 1300 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1301 break; 1302 1303 case POST_DEC: 1304 gcc_assert (!reversed && !dead); 1305 1306 /* Add 8, subtract 24. */ 1307 base = XEXP (base, 0); 1308 out[0] = adjust_automodify_address 1309 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1310 out[1] = adjust_automodify_address 1311 (in, DImode, 1312 gen_rtx_POST_MODIFY (Pmode, base, plus_constant (base, -24)), 1313 8); 1314 break; 1315 1316 case POST_MODIFY: 1317 gcc_assert (!reversed && !dead); 1318 1319 /* Extract and adjust the modification. This case is 1320 trickier than the others, because we might have an 1321 index register, or we might have a combined offset that 1322 doesn't fit a signed 9-bit displacement field. We can 1323 assume the incoming expression is already legitimate. */ 1324 offset = XEXP (base, 1); 1325 base = XEXP (base, 0); 1326 1327 out[0] = adjust_automodify_address 1328 (in, DImode, gen_rtx_POST_INC (Pmode, base), 0); 1329 1330 if (GET_CODE (XEXP (offset, 1)) == REG) 1331 { 1332 /* Can't adjust the postmodify to match. Emit the 1333 original, then a separate addition insn. */ 1334 out[1] = adjust_automodify_address (in, DImode, 0, 8); 1335 fixup = gen_adddi3 (base, base, GEN_INT (-8)); 1336 } 1337 else 1338 { 1339 gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT); 1340 if (INTVAL (XEXP (offset, 1)) < -256 + 8) 1341 { 1342 /* Again the postmodify cannot be made to match, 1343 but in this case it's more efficient to get rid 1344 of the postmodify entirely and fix up with an 1345 add insn. */ 1346 out[1] = adjust_automodify_address (in, DImode, base, 8); 1347 fixup = gen_adddi3 1348 (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8)); 1349 } 1350 else 1351 { 1352 /* Combined offset still fits in the displacement field. 1353 (We cannot overflow it at the high end.) */ 1354 out[1] = adjust_automodify_address 1355 (in, DImode, gen_rtx_POST_MODIFY 1356 (Pmode, base, gen_rtx_PLUS 1357 (Pmode, base, 1358 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))), 1359 8); 1360 } 1361 } 1362 break; 1363 1364 default: 1365 gcc_unreachable (); 1366 } 1367 break; 1368 } 1369 1370 default: 1371 gcc_unreachable (); 1372 } 1373 1374 return fixup; 1375} 1376 1377/* Split a TImode or TFmode move instruction after reload. 1378 This is used by *movtf_internal and *movti_internal. */ 1379void 1380ia64_split_tmode_move (rtx operands[]) 1381{ 1382 rtx in[2], out[2], insn; 1383 rtx fixup[2]; 1384 bool dead = false; 1385 bool reversed = false; 1386 1387 /* It is possible for reload to decide to overwrite a pointer with 1388 the value it points to. In that case we have to do the loads in 1389 the appropriate order so that the pointer is not destroyed too 1390 early. Also we must not generate a postmodify for that second 1391 load, or rws_access_regno will die. */ 1392 if (GET_CODE (operands[1]) == MEM 1393 && reg_overlap_mentioned_p (operands[0], operands[1])) 1394 { 1395 rtx base = XEXP (operands[1], 0); 1396 while (GET_CODE (base) != REG) 1397 base = XEXP (base, 0); 1398 1399 if (REGNO (base) == REGNO (operands[0])) 1400 reversed = true; 1401 dead = true; 1402 } 1403 /* Another reason to do the moves in reversed order is if the first 1404 element of the target register pair is also the second element of 1405 the source register pair. */ 1406 if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG 1407 && REGNO (operands[0]) == REGNO (operands[1]) + 1) 1408 reversed = true; 1409 1410 fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead); 1411 fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead); 1412 1413#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \ 1414 if (GET_CODE (EXP) == MEM \ 1415 && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \ 1416 || GET_CODE (XEXP (EXP, 0)) == POST_INC \ 1417 || GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \ 1418 add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0)) 1419 1420 insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0])); 1421 MAYBE_ADD_REG_INC_NOTE (insn, in[0]); 1422 MAYBE_ADD_REG_INC_NOTE (insn, out[0]); 1423 1424 insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1])); 1425 MAYBE_ADD_REG_INC_NOTE (insn, in[1]); 1426 MAYBE_ADD_REG_INC_NOTE (insn, out[1]); 1427 1428 if (fixup[0]) 1429 emit_insn (fixup[0]); 1430 if (fixup[1]) 1431 emit_insn (fixup[1]); 1432 1433#undef MAYBE_ADD_REG_INC_NOTE 1434} 1435 1436/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go 1437 through memory plus an extra GR scratch register. Except that you can 1438 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1439 SECONDARY_RELOAD_CLASS, but not both. 1440 1441 We got into problems in the first place by allowing a construct like 1442 (subreg:XF (reg:TI)), which we got from a union containing a long double. 1443 This solution attempts to prevent this situation from occurring. When 1444 we see something like the above, we spill the inner register to memory. */ 1445 1446static rtx 1447spill_xfmode_rfmode_operand (rtx in, int force, enum machine_mode mode) 1448{ 1449 if (GET_CODE (in) == SUBREG 1450 && GET_MODE (SUBREG_REG (in)) == TImode 1451 && GET_CODE (SUBREG_REG (in)) == REG) 1452 { 1453 rtx memt = assign_stack_temp (TImode, 16, 0); 1454 emit_move_insn (memt, SUBREG_REG (in)); 1455 return adjust_address (memt, mode, 0); 1456 } 1457 else if (force && GET_CODE (in) == REG) 1458 { 1459 rtx memx = assign_stack_temp (mode, 16, 0); 1460 emit_move_insn (memx, in); 1461 return memx; 1462 } 1463 else 1464 return in; 1465} 1466 1467/* Expand the movxf or movrf pattern (MODE says which) with the given 1468 OPERANDS, returning true if the pattern should then invoke 1469 DONE. */ 1470 1471bool 1472ia64_expand_movxf_movrf (enum machine_mode mode, rtx operands[]) 1473{ 1474 rtx op0 = operands[0]; 1475 1476 if (GET_CODE (op0) == SUBREG) 1477 op0 = SUBREG_REG (op0); 1478 1479 /* We must support XFmode loads into general registers for stdarg/vararg, 1480 unprototyped calls, and a rare case where a long double is passed as 1481 an argument after a float HFA fills the FP registers. We split them into 1482 DImode loads for convenience. We also need to support XFmode stores 1483 for the last case. This case does not happen for stdarg/vararg routines, 1484 because we do a block store to memory of unnamed arguments. */ 1485 1486 if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0))) 1487 { 1488 rtx out[2]; 1489 1490 /* We're hoping to transform everything that deals with XFmode 1491 quantities and GR registers early in the compiler. */ 1492 gcc_assert (can_create_pseudo_p ()); 1493 1494 /* Struct to register can just use TImode instead. */ 1495 if ((GET_CODE (operands[1]) == SUBREG 1496 && GET_MODE (SUBREG_REG (operands[1])) == TImode) 1497 || (GET_CODE (operands[1]) == REG 1498 && GR_REGNO_P (REGNO (operands[1])))) 1499 { 1500 rtx op1 = operands[1]; 1501 1502 if (GET_CODE (op1) == SUBREG) 1503 op1 = SUBREG_REG (op1); 1504 else 1505 op1 = gen_rtx_REG (TImode, REGNO (op1)); 1506 1507 emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1); 1508 return true; 1509 } 1510 1511 if (GET_CODE (operands[1]) == CONST_DOUBLE) 1512 { 1513 /* Don't word-swap when reading in the constant. */ 1514 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)), 1515 operand_subword (operands[1], WORDS_BIG_ENDIAN, 1516 0, mode)); 1517 emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1), 1518 operand_subword (operands[1], !WORDS_BIG_ENDIAN, 1519 0, mode)); 1520 return true; 1521 } 1522 1523 /* If the quantity is in a register not known to be GR, spill it. */ 1524 if (register_operand (operands[1], mode)) 1525 operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode); 1526 1527 gcc_assert (GET_CODE (operands[1]) == MEM); 1528 1529 /* Don't word-swap when reading in the value. */ 1530 out[0] = gen_rtx_REG (DImode, REGNO (op0)); 1531 out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1); 1532 1533 emit_move_insn (out[0], adjust_address (operands[1], DImode, 0)); 1534 emit_move_insn (out[1], adjust_address (operands[1], DImode, 8)); 1535 return true; 1536 } 1537 1538 if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1]))) 1539 { 1540 /* We're hoping to transform everything that deals with XFmode 1541 quantities and GR registers early in the compiler. */ 1542 gcc_assert (can_create_pseudo_p ()); 1543 1544 /* Op0 can't be a GR_REG here, as that case is handled above. 1545 If op0 is a register, then we spill op1, so that we now have a 1546 MEM operand. This requires creating an XFmode subreg of a TImode reg 1547 to force the spill. */ 1548 if (register_operand (operands[0], mode)) 1549 { 1550 rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1])); 1551 op1 = gen_rtx_SUBREG (mode, op1, 0); 1552 operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode); 1553 } 1554 1555 else 1556 { 1557 rtx in[2]; 1558 1559 gcc_assert (GET_CODE (operands[0]) == MEM); 1560 1561 /* Don't word-swap when writing out the value. */ 1562 in[0] = gen_rtx_REG (DImode, REGNO (operands[1])); 1563 in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1); 1564 1565 emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]); 1566 emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]); 1567 return true; 1568 } 1569 } 1570 1571 if (!reload_in_progress && !reload_completed) 1572 { 1573 operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode); 1574 1575 if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG) 1576 { 1577 rtx memt, memx, in = operands[1]; 1578 if (CONSTANT_P (in)) 1579 in = validize_mem (force_const_mem (mode, in)); 1580 if (GET_CODE (in) == MEM) 1581 memt = adjust_address (in, TImode, 0); 1582 else 1583 { 1584 memt = assign_stack_temp (TImode, 16, 0); 1585 memx = adjust_address (memt, mode, 0); 1586 emit_move_insn (memx, in); 1587 } 1588 emit_move_insn (op0, memt); 1589 return true; 1590 } 1591 1592 if (!ia64_move_ok (operands[0], operands[1])) 1593 operands[1] = force_reg (mode, operands[1]); 1594 } 1595 1596 return false; 1597} 1598 1599/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1 1600 with the expression that holds the compare result (in VOIDmode). */ 1601 1602static GTY(()) rtx cmptf_libfunc; 1603 1604void 1605ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1) 1606{ 1607 enum rtx_code code = GET_CODE (*expr); 1608 rtx cmp; 1609 1610 /* If we have a BImode input, then we already have a compare result, and 1611 do not need to emit another comparison. */ 1612 if (GET_MODE (*op0) == BImode) 1613 { 1614 gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx); 1615 cmp = *op0; 1616 } 1617 /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a 1618 magic number as its third argument, that indicates what to do. 1619 The return value is an integer to be compared against zero. */ 1620 else if (TARGET_HPUX && GET_MODE (*op0) == TFmode) 1621 { 1622 enum qfcmp_magic { 1623 QCMP_INV = 1, /* Raise FP_INVALID on SNaN as a side effect. */ 1624 QCMP_UNORD = 2, 1625 QCMP_EQ = 4, 1626 QCMP_LT = 8, 1627 QCMP_GT = 16 1628 }; 1629 int magic; 1630 enum rtx_code ncode; 1631 rtx ret, insns; 1632 1633 gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode); 1634 switch (code) 1635 { 1636 /* 1 = equal, 0 = not equal. Equality operators do 1637 not raise FP_INVALID when given an SNaN operand. */ 1638 case EQ: magic = QCMP_EQ; ncode = NE; break; 1639 case NE: magic = QCMP_EQ; ncode = EQ; break; 1640 /* isunordered() from C99. */ 1641 case UNORDERED: magic = QCMP_UNORD; ncode = NE; break; 1642 case ORDERED: magic = QCMP_UNORD; ncode = EQ; break; 1643 /* Relational operators raise FP_INVALID when given 1644 an SNaN operand. */ 1645 case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break; 1646 case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1647 case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break; 1648 case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break; 1649 /* FUTURE: Implement UNEQ, UNLT, UNLE, UNGT, UNGE, LTGT. 1650 Expanders for buneq etc. weuld have to be added to ia64.md 1651 for this to be useful. */ 1652 default: gcc_unreachable (); 1653 } 1654 1655 start_sequence (); 1656 1657 ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3, 1658 *op0, TFmode, *op1, TFmode, 1659 GEN_INT (magic), DImode); 1660 cmp = gen_reg_rtx (BImode); 1661 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1662 gen_rtx_fmt_ee (ncode, BImode, 1663 ret, const0_rtx))); 1664 1665 insns = get_insns (); 1666 end_sequence (); 1667 1668 emit_libcall_block (insns, cmp, cmp, 1669 gen_rtx_fmt_ee (code, BImode, *op0, *op1)); 1670 code = NE; 1671 } 1672 else 1673 { 1674 cmp = gen_reg_rtx (BImode); 1675 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1676 gen_rtx_fmt_ee (code, BImode, *op0, *op1))); 1677 code = NE; 1678 } 1679 1680 *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx); 1681 *op0 = cmp; 1682 *op1 = const0_rtx; 1683} 1684 1685/* Generate an integral vector comparison. Return true if the condition has 1686 been reversed, and so the sense of the comparison should be inverted. */ 1687 1688static bool 1689ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode, 1690 rtx dest, rtx op0, rtx op1) 1691{ 1692 bool negate = false; 1693 rtx x; 1694 1695 /* Canonicalize the comparison to EQ, GT, GTU. */ 1696 switch (code) 1697 { 1698 case EQ: 1699 case GT: 1700 case GTU: 1701 break; 1702 1703 case NE: 1704 case LE: 1705 case LEU: 1706 code = reverse_condition (code); 1707 negate = true; 1708 break; 1709 1710 case GE: 1711 case GEU: 1712 code = reverse_condition (code); 1713 negate = true; 1714 /* FALLTHRU */ 1715 1716 case LT: 1717 case LTU: 1718 code = swap_condition (code); 1719 x = op0, op0 = op1, op1 = x; 1720 break; 1721 1722 default: 1723 gcc_unreachable (); 1724 } 1725 1726 /* Unsigned parallel compare is not supported by the hardware. Play some 1727 tricks to turn this into a signed comparison against 0. */ 1728 if (code == GTU) 1729 { 1730 switch (mode) 1731 { 1732 case V2SImode: 1733 { 1734 rtx t1, t2, mask; 1735 1736 /* Subtract (-(INT MAX) - 1) from both operands to make 1737 them signed. */ 1738 mask = GEN_INT (0x80000000); 1739 mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask)); 1740 mask = force_reg (mode, mask); 1741 t1 = gen_reg_rtx (mode); 1742 emit_insn (gen_subv2si3 (t1, op0, mask)); 1743 t2 = gen_reg_rtx (mode); 1744 emit_insn (gen_subv2si3 (t2, op1, mask)); 1745 op0 = t1; 1746 op1 = t2; 1747 code = GT; 1748 } 1749 break; 1750 1751 case V8QImode: 1752 case V4HImode: 1753 /* Perform a parallel unsigned saturating subtraction. */ 1754 x = gen_reg_rtx (mode); 1755 emit_insn (gen_rtx_SET (VOIDmode, x, 1756 gen_rtx_US_MINUS (mode, op0, op1))); 1757 1758 code = EQ; 1759 op0 = x; 1760 op1 = CONST0_RTX (mode); 1761 negate = !negate; 1762 break; 1763 1764 default: 1765 gcc_unreachable (); 1766 } 1767 } 1768 1769 x = gen_rtx_fmt_ee (code, mode, op0, op1); 1770 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 1771 1772 return negate; 1773} 1774 1775/* Emit an integral vector conditional move. */ 1776 1777void 1778ia64_expand_vecint_cmov (rtx operands[]) 1779{ 1780 enum machine_mode mode = GET_MODE (operands[0]); 1781 enum rtx_code code = GET_CODE (operands[3]); 1782 bool negate; 1783 rtx cmp, x, ot, of; 1784 1785 cmp = gen_reg_rtx (mode); 1786 negate = ia64_expand_vecint_compare (code, mode, cmp, 1787 operands[4], operands[5]); 1788 1789 ot = operands[1+negate]; 1790 of = operands[2-negate]; 1791 1792 if (ot == CONST0_RTX (mode)) 1793 { 1794 if (of == CONST0_RTX (mode)) 1795 { 1796 emit_move_insn (operands[0], ot); 1797 return; 1798 } 1799 1800 x = gen_rtx_NOT (mode, cmp); 1801 x = gen_rtx_AND (mode, x, of); 1802 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 1803 } 1804 else if (of == CONST0_RTX (mode)) 1805 { 1806 x = gen_rtx_AND (mode, cmp, ot); 1807 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 1808 } 1809 else 1810 { 1811 rtx t, f; 1812 1813 t = gen_reg_rtx (mode); 1814 x = gen_rtx_AND (mode, cmp, operands[1+negate]); 1815 emit_insn (gen_rtx_SET (VOIDmode, t, x)); 1816 1817 f = gen_reg_rtx (mode); 1818 x = gen_rtx_NOT (mode, cmp); 1819 x = gen_rtx_AND (mode, x, operands[2-negate]); 1820 emit_insn (gen_rtx_SET (VOIDmode, f, x)); 1821 1822 x = gen_rtx_IOR (mode, t, f); 1823 emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); 1824 } 1825} 1826 1827/* Emit an integral vector min or max operation. Return true if all done. */ 1828 1829bool 1830ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, 1831 rtx operands[]) 1832{ 1833 rtx xops[6]; 1834 1835 /* These four combinations are supported directly. */ 1836 if (mode == V8QImode && (code == UMIN || code == UMAX)) 1837 return false; 1838 if (mode == V4HImode && (code == SMIN || code == SMAX)) 1839 return false; 1840 1841 /* This combination can be implemented with only saturating subtraction. */ 1842 if (mode == V4HImode && code == UMAX) 1843 { 1844 rtx x, tmp = gen_reg_rtx (mode); 1845 1846 x = gen_rtx_US_MINUS (mode, operands[1], operands[2]); 1847 emit_insn (gen_rtx_SET (VOIDmode, tmp, x)); 1848 1849 emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2])); 1850 return true; 1851 } 1852 1853 /* Everything else implemented via vector comparisons. */ 1854 xops[0] = operands[0]; 1855 xops[4] = xops[1] = operands[1]; 1856 xops[5] = xops[2] = operands[2]; 1857 1858 switch (code) 1859 { 1860 case UMIN: 1861 code = LTU; 1862 break; 1863 case UMAX: 1864 code = GTU; 1865 break; 1866 case SMIN: 1867 code = LT; 1868 break; 1869 case SMAX: 1870 code = GT; 1871 break; 1872 default: 1873 gcc_unreachable (); 1874 } 1875 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 1876 1877 ia64_expand_vecint_cmov (xops); 1878 return true; 1879} 1880 1881/* Emit an integral vector widening sum operations. */ 1882 1883void 1884ia64_expand_widen_sum (rtx operands[3], bool unsignedp) 1885{ 1886 rtx l, h, x, s; 1887 enum machine_mode wmode, mode; 1888 rtx (*unpack_l) (rtx, rtx, rtx); 1889 rtx (*unpack_h) (rtx, rtx, rtx); 1890 rtx (*plus) (rtx, rtx, rtx); 1891 1892 wmode = GET_MODE (operands[0]); 1893 mode = GET_MODE (operands[1]); 1894 1895 switch (mode) 1896 { 1897 case V8QImode: 1898 unpack_l = gen_unpack1_l; 1899 unpack_h = gen_unpack1_h; 1900 plus = gen_addv4hi3; 1901 break; 1902 case V4HImode: 1903 unpack_l = gen_unpack2_l; 1904 unpack_h = gen_unpack2_h; 1905 plus = gen_addv2si3; 1906 break; 1907 default: 1908 gcc_unreachable (); 1909 } 1910 1911 /* Fill in x with the sign extension of each element in op1. */ 1912 if (unsignedp) 1913 x = CONST0_RTX (mode); 1914 else 1915 { 1916 bool neg; 1917 1918 x = gen_reg_rtx (mode); 1919 1920 neg = ia64_expand_vecint_compare (LT, mode, x, operands[1], 1921 CONST0_RTX (mode)); 1922 gcc_assert (!neg); 1923 } 1924 1925 l = gen_reg_rtx (wmode); 1926 h = gen_reg_rtx (wmode); 1927 s = gen_reg_rtx (wmode); 1928 1929 emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x)); 1930 emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x)); 1931 emit_insn (plus (s, l, operands[2])); 1932 emit_insn (plus (operands[0], h, s)); 1933} 1934 1935/* Emit a signed or unsigned V8QI dot product operation. */ 1936 1937void 1938ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) 1939{ 1940 rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; 1941 1942 /* Fill in x1 and x2 with the sign extension of each element. */ 1943 if (unsignedp) 1944 x1 = x2 = CONST0_RTX (V8QImode); 1945 else 1946 { 1947 bool neg; 1948 1949 x1 = gen_reg_rtx (V8QImode); 1950 x2 = gen_reg_rtx (V8QImode); 1951 1952 neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1], 1953 CONST0_RTX (V8QImode)); 1954 gcc_assert (!neg); 1955 neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2], 1956 CONST0_RTX (V8QImode)); 1957 gcc_assert (!neg); 1958 } 1959 1960 l1 = gen_reg_rtx (V4HImode); 1961 l2 = gen_reg_rtx (V4HImode); 1962 h1 = gen_reg_rtx (V4HImode); 1963 h2 = gen_reg_rtx (V4HImode); 1964 1965 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1)); 1966 emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2)); 1967 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1)); 1968 emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2)); 1969 1970 p1 = gen_reg_rtx (V2SImode); 1971 p2 = gen_reg_rtx (V2SImode); 1972 p3 = gen_reg_rtx (V2SImode); 1973 p4 = gen_reg_rtx (V2SImode); 1974 emit_insn (gen_pmpy2_r (p1, l1, l2)); 1975 emit_insn (gen_pmpy2_l (p2, l1, l2)); 1976 emit_insn (gen_pmpy2_r (p3, h1, h2)); 1977 emit_insn (gen_pmpy2_l (p4, h1, h2)); 1978 1979 s1 = gen_reg_rtx (V2SImode); 1980 s2 = gen_reg_rtx (V2SImode); 1981 s3 = gen_reg_rtx (V2SImode); 1982 emit_insn (gen_addv2si3 (s1, p1, p2)); 1983 emit_insn (gen_addv2si3 (s2, p3, p4)); 1984 emit_insn (gen_addv2si3 (s3, s1, operands[3])); 1985 emit_insn (gen_addv2si3 (operands[0], s2, s3)); 1986} 1987 1988/* Emit the appropriate sequence for a call. */ 1989 1990void 1991ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED, 1992 int sibcall_p) 1993{ 1994 rtx insn, b0; 1995 1996 addr = XEXP (addr, 0); 1997 addr = convert_memory_address (DImode, addr); 1998 b0 = gen_rtx_REG (DImode, R_BR (0)); 1999 2000 /* ??? Should do this for functions known to bind local too. */ 2001 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 2002 { 2003 if (sibcall_p) 2004 insn = gen_sibcall_nogp (addr); 2005 else if (! retval) 2006 insn = gen_call_nogp (addr, b0); 2007 else 2008 insn = gen_call_value_nogp (retval, addr, b0); 2009 insn = emit_call_insn (insn); 2010 } 2011 else 2012 { 2013 if (sibcall_p) 2014 insn = gen_sibcall_gp (addr); 2015 else if (! retval) 2016 insn = gen_call_gp (addr, b0); 2017 else 2018 insn = gen_call_value_gp (retval, addr, b0); 2019 insn = emit_call_insn (insn); 2020 2021 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 2022 } 2023 2024 if (sibcall_p) 2025 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 2026 2027 if (TARGET_ABI_OPEN_VMS) 2028 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 2029 gen_rtx_REG (DImode, GR_REG (25))); 2030} 2031 2032static void 2033reg_emitted (enum ia64_frame_regs r) 2034{ 2035 if (emitted_frame_related_regs[r] == 0) 2036 emitted_frame_related_regs[r] = current_frame_info.r[r]; 2037 else 2038 gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]); 2039} 2040 2041static int 2042get_reg (enum ia64_frame_regs r) 2043{ 2044 reg_emitted (r); 2045 return current_frame_info.r[r]; 2046} 2047 2048static bool 2049is_emitted (int regno) 2050{ 2051 unsigned int r; 2052 2053 for (r = reg_fp; r < number_of_ia64_frame_regs; r++) 2054 if (emitted_frame_related_regs[r] == regno) 2055 return true; 2056 return false; 2057} 2058 2059void 2060ia64_reload_gp (void) 2061{ 2062 rtx tmp; 2063 2064 if (current_frame_info.r[reg_save_gp]) 2065 { 2066 tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp)); 2067 } 2068 else 2069 { 2070 HOST_WIDE_INT offset; 2071 rtx offset_r; 2072 2073 offset = (current_frame_info.spill_cfa_off 2074 + current_frame_info.spill_size); 2075 if (frame_pointer_needed) 2076 { 2077 tmp = hard_frame_pointer_rtx; 2078 offset = -offset; 2079 } 2080 else 2081 { 2082 tmp = stack_pointer_rtx; 2083 offset = current_frame_info.total_size - offset; 2084 } 2085 2086 offset_r = GEN_INT (offset); 2087 if (satisfies_constraint_I (offset_r)) 2088 emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r)); 2089 else 2090 { 2091 emit_move_insn (pic_offset_table_rtx, offset_r); 2092 emit_insn (gen_adddi3 (pic_offset_table_rtx, 2093 pic_offset_table_rtx, tmp)); 2094 } 2095 2096 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 2097 } 2098 2099 emit_move_insn (pic_offset_table_rtx, tmp); 2100} 2101 2102void 2103ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r, 2104 rtx scratch_b, int noreturn_p, int sibcall_p) 2105{ 2106 rtx insn; 2107 bool is_desc = false; 2108 2109 /* If we find we're calling through a register, then we're actually 2110 calling through a descriptor, so load up the values. */ 2111 if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) 2112 { 2113 rtx tmp; 2114 bool addr_dead_p; 2115 2116 /* ??? We are currently constrained to *not* use peep2, because 2117 we can legitimately change the global lifetime of the GP 2118 (in the form of killing where previously live). This is 2119 because a call through a descriptor doesn't use the previous 2120 value of the GP, while a direct call does, and we do not 2121 commit to either form until the split here. 2122 2123 That said, this means that we lack precise life info for 2124 whether ADDR is dead after this call. This is not terribly 2125 important, since we can fix things up essentially for free 2126 with the POST_DEC below, but it's nice to not use it when we 2127 can immediately tell it's not necessary. */ 2128 addr_dead_p = ((noreturn_p || sibcall_p 2129 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 2130 REGNO (addr))) 2131 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 2132 2133 /* Load the code address into scratch_b. */ 2134 tmp = gen_rtx_POST_INC (Pmode, addr); 2135 tmp = gen_rtx_MEM (Pmode, tmp); 2136 emit_move_insn (scratch_r, tmp); 2137 emit_move_insn (scratch_b, scratch_r); 2138 2139 /* Load the GP address. If ADDR is not dead here, then we must 2140 revert the change made above via the POST_INCREMENT. */ 2141 if (!addr_dead_p) 2142 tmp = gen_rtx_POST_DEC (Pmode, addr); 2143 else 2144 tmp = addr; 2145 tmp = gen_rtx_MEM (Pmode, tmp); 2146 emit_move_insn (pic_offset_table_rtx, tmp); 2147 2148 is_desc = true; 2149 addr = scratch_b; 2150 } 2151 2152 if (sibcall_p) 2153 insn = gen_sibcall_nogp (addr); 2154 else if (retval) 2155 insn = gen_call_value_nogp (retval, addr, retaddr); 2156 else 2157 insn = gen_call_nogp (addr, retaddr); 2158 emit_call_insn (insn); 2159 2160 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 2161 ia64_reload_gp (); 2162} 2163 2164/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically. 2165 2166 This differs from the generic code in that we know about the zero-extending 2167 properties of cmpxchg, and the zero-extending requirements of ar.ccv. We 2168 also know that ld.acq+cmpxchg.rel equals a full barrier. 2169 2170 The loop we want to generate looks like 2171 2172 cmp_reg = mem; 2173 label: 2174 old_reg = cmp_reg; 2175 new_reg = cmp_reg op val; 2176 cmp_reg = compare-and-swap(mem, old_reg, new_reg) 2177 if (cmp_reg != old_reg) 2178 goto label; 2179 2180 Note that we only do the plain load from memory once. Subsequent 2181 iterations use the value loaded by the compare-and-swap pattern. */ 2182 2183void 2184ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 2185 rtx old_dst, rtx new_dst) 2186{ 2187 enum machine_mode mode = GET_MODE (mem); 2188 rtx old_reg, new_reg, cmp_reg, ar_ccv, label; 2189 enum insn_code icode; 2190 2191 /* Special case for using fetchadd. */ 2192 if ((mode == SImode || mode == DImode) 2193 && (code == PLUS || code == MINUS) 2194 && fetchadd_operand (val, mode)) 2195 { 2196 if (code == MINUS) 2197 val = GEN_INT (-INTVAL (val)); 2198 2199 if (!old_dst) 2200 old_dst = gen_reg_rtx (mode); 2201 2202 emit_insn (gen_memory_barrier ()); 2203 2204 if (mode == SImode) 2205 icode = CODE_FOR_fetchadd_acq_si; 2206 else 2207 icode = CODE_FOR_fetchadd_acq_di; 2208 emit_insn (GEN_FCN (icode) (old_dst, mem, val)); 2209 2210 if (new_dst) 2211 { 2212 new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst, 2213 true, OPTAB_WIDEN); 2214 if (new_reg != new_dst) 2215 emit_move_insn (new_dst, new_reg); 2216 } 2217 return; 2218 } 2219 2220 /* Because of the volatile mem read, we get an ld.acq, which is the 2221 front half of the full barrier. The end half is the cmpxchg.rel. */ 2222 gcc_assert (MEM_VOLATILE_P (mem)); 2223 2224 old_reg = gen_reg_rtx (DImode); 2225 cmp_reg = gen_reg_rtx (DImode); 2226 label = gen_label_rtx (); 2227 2228 if (mode != DImode) 2229 { 2230 val = simplify_gen_subreg (DImode, val, mode, 0); 2231 emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1)); 2232 } 2233 else 2234 emit_move_insn (cmp_reg, mem); 2235 2236 emit_label (label); 2237 2238 ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 2239 emit_move_insn (old_reg, cmp_reg); 2240 emit_move_insn (ar_ccv, cmp_reg); 2241 2242 if (old_dst) 2243 emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg)); 2244 2245 new_reg = cmp_reg; 2246 if (code == NOT) 2247 { 2248 new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX, 2249 true, OPTAB_DIRECT); 2250 new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true); 2251 } 2252 else 2253 new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX, 2254 true, OPTAB_DIRECT); 2255 2256 if (mode != DImode) 2257 new_reg = gen_lowpart (mode, new_reg); 2258 if (new_dst) 2259 emit_move_insn (new_dst, new_reg); 2260 2261 switch (mode) 2262 { 2263 case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break; 2264 case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break; 2265 case SImode: icode = CODE_FOR_cmpxchg_rel_si; break; 2266 case DImode: icode = CODE_FOR_cmpxchg_rel_di; break; 2267 default: 2268 gcc_unreachable (); 2269 } 2270 2271 emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg)); 2272 2273 emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label); 2274} 2275 2276/* Begin the assembly file. */ 2277 2278static void 2279ia64_file_start (void) 2280{ 2281 /* Variable tracking should be run after all optimizations which change order 2282 of insns. It also needs a valid CFG. This can't be done in 2283 ia64_override_options, because flag_var_tracking is finalized after 2284 that. */ 2285 ia64_flag_var_tracking = flag_var_tracking; 2286 flag_var_tracking = 0; 2287 2288 default_file_start (); 2289 emit_safe_across_calls (); 2290} 2291 2292void 2293emit_safe_across_calls (void) 2294{ 2295 unsigned int rs, re; 2296 int out_state; 2297 2298 rs = 1; 2299 out_state = 0; 2300 while (1) 2301 { 2302 while (rs < 64 && call_used_regs[PR_REG (rs)]) 2303 rs++; 2304 if (rs >= 64) 2305 break; 2306 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 2307 continue; 2308 if (out_state == 0) 2309 { 2310 fputs ("\t.pred.safe_across_calls ", asm_out_file); 2311 out_state = 1; 2312 } 2313 else 2314 fputc (',', asm_out_file); 2315 if (re == rs + 1) 2316 fprintf (asm_out_file, "p%u", rs); 2317 else 2318 fprintf (asm_out_file, "p%u-p%u", rs, re - 1); 2319 rs = re + 1; 2320 } 2321 if (out_state) 2322 fputc ('\n', asm_out_file); 2323} 2324 2325/* Globalize a declaration. */ 2326 2327static void 2328ia64_globalize_decl_name (FILE * stream, tree decl) 2329{ 2330 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 2331 tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl)); 2332 if (version_attr) 2333 { 2334 tree v = TREE_VALUE (TREE_VALUE (version_attr)); 2335 const char *p = TREE_STRING_POINTER (v); 2336 fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p); 2337 } 2338 targetm.asm_out.globalize_label (stream, name); 2339 if (TREE_CODE (decl) == FUNCTION_DECL) 2340 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function"); 2341} 2342 2343/* Helper function for ia64_compute_frame_size: find an appropriate general 2344 register to spill some special register to. SPECIAL_SPILL_MASK contains 2345 bits in GR0 to GR31 that have already been allocated by this routine. 2346 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 2347 2348static int 2349find_gr_spill (enum ia64_frame_regs r, int try_locals) 2350{ 2351 int regno; 2352 2353 if (emitted_frame_related_regs[r] != 0) 2354 { 2355 regno = emitted_frame_related_regs[r]; 2356 if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed) 2357 && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1) 2358 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2359 else if (current_function_is_leaf 2360 && regno >= GR_REG (1) && regno <= GR_REG (31)) 2361 current_frame_info.gr_used_mask |= 1 << regno; 2362 2363 return regno; 2364 } 2365 2366 /* If this is a leaf function, first try an otherwise unused 2367 call-clobbered register. */ 2368 if (current_function_is_leaf) 2369 { 2370 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2371 if (! df_regs_ever_live_p (regno) 2372 && call_used_regs[regno] 2373 && ! fixed_regs[regno] 2374 && ! global_regs[regno] 2375 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0 2376 && ! is_emitted (regno)) 2377 { 2378 current_frame_info.gr_used_mask |= 1 << regno; 2379 return regno; 2380 } 2381 } 2382 2383 if (try_locals) 2384 { 2385 regno = current_frame_info.n_local_regs; 2386 /* If there is a frame pointer, then we can't use loc79, because 2387 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 2388 reg_name switching code in ia64_expand_prologue. */ 2389 while (regno < (80 - frame_pointer_needed)) 2390 if (! is_emitted (LOC_REG (regno++))) 2391 { 2392 current_frame_info.n_local_regs = regno; 2393 return LOC_REG (regno - 1); 2394 } 2395 } 2396 2397 /* Failed to find a general register to spill to. Must use stack. */ 2398 return 0; 2399} 2400 2401/* In order to make for nice schedules, we try to allocate every temporary 2402 to a different register. We must of course stay away from call-saved, 2403 fixed, and global registers. We must also stay away from registers 2404 allocated in current_frame_info.gr_used_mask, since those include regs 2405 used all through the prologue. 2406 2407 Any register allocated here must be used immediately. The idea is to 2408 aid scheduling, not to solve data flow problems. */ 2409 2410static int last_scratch_gr_reg; 2411 2412static int 2413next_scratch_gr_reg (void) 2414{ 2415 int i, regno; 2416 2417 for (i = 0; i < 32; ++i) 2418 { 2419 regno = (last_scratch_gr_reg + i + 1) & 31; 2420 if (call_used_regs[regno] 2421 && ! fixed_regs[regno] 2422 && ! global_regs[regno] 2423 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 2424 { 2425 last_scratch_gr_reg = regno; 2426 return regno; 2427 } 2428 } 2429 2430 /* There must be _something_ available. */ 2431 gcc_unreachable (); 2432} 2433 2434/* Helper function for ia64_compute_frame_size, called through 2435 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 2436 2437static void 2438mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED) 2439{ 2440 unsigned int regno = REGNO (reg); 2441 if (regno < 32) 2442 { 2443 unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)]; 2444 for (i = 0; i < n; ++i) 2445 current_frame_info.gr_used_mask |= 1 << (regno + i); 2446 } 2447} 2448 2449 2450/* Returns the number of bytes offset between the frame pointer and the stack 2451 pointer for the current function. SIZE is the number of bytes of space 2452 needed for local variables. */ 2453 2454static void 2455ia64_compute_frame_size (HOST_WIDE_INT size) 2456{ 2457 HOST_WIDE_INT total_size; 2458 HOST_WIDE_INT spill_size = 0; 2459 HOST_WIDE_INT extra_spill_size = 0; 2460 HOST_WIDE_INT pretend_args_size; 2461 HARD_REG_SET mask; 2462 int n_spilled = 0; 2463 int spilled_gr_p = 0; 2464 int spilled_fr_p = 0; 2465 unsigned int regno; 2466 int min_regno; 2467 int max_regno; 2468 int i; 2469 2470 if (current_frame_info.initialized) 2471 return; 2472 2473 memset (¤t_frame_info, 0, sizeof current_frame_info); 2474 CLEAR_HARD_REG_SET (mask); 2475 2476 /* Don't allocate scratches to the return register. */ 2477 diddle_return_value (mark_reg_gr_used_mask, NULL); 2478 2479 /* Don't allocate scratches to the EH scratch registers. */ 2480 if (cfun->machine->ia64_eh_epilogue_sp) 2481 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 2482 if (cfun->machine->ia64_eh_epilogue_bsp) 2483 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 2484 2485 /* Find the size of the register stack frame. We have only 80 local 2486 registers, because we reserve 8 for the inputs and 8 for the 2487 outputs. */ 2488 2489 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 2490 since we'll be adjusting that down later. */ 2491 regno = LOC_REG (78) + ! frame_pointer_needed; 2492 for (; regno >= LOC_REG (0); regno--) 2493 if (df_regs_ever_live_p (regno) && !is_emitted (regno)) 2494 break; 2495 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 2496 2497 /* For functions marked with the syscall_linkage attribute, we must mark 2498 all eight input registers as in use, so that locals aren't visible to 2499 the caller. */ 2500 2501 if (cfun->machine->n_varargs > 0 2502 || lookup_attribute ("syscall_linkage", 2503 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 2504 current_frame_info.n_input_regs = 8; 2505 else 2506 { 2507 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 2508 if (df_regs_ever_live_p (regno)) 2509 break; 2510 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 2511 } 2512 2513 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 2514 if (df_regs_ever_live_p (regno)) 2515 break; 2516 i = regno - OUT_REG (0) + 1; 2517 2518#ifndef PROFILE_HOOK 2519 /* When -p profiling, we need one output register for the mcount argument. 2520 Likewise for -a profiling for the bb_init_func argument. For -ax 2521 profiling, we need two output registers for the two bb_init_trace_func 2522 arguments. */ 2523 if (crtl->profile) 2524 i = MAX (i, 1); 2525#endif 2526 current_frame_info.n_output_regs = i; 2527 2528 /* ??? No rotating register support yet. */ 2529 current_frame_info.n_rotate_regs = 0; 2530 2531 /* Discover which registers need spilling, and how much room that 2532 will take. Begin with floating point and general registers, 2533 which will always wind up on the stack. */ 2534 2535 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 2536 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2537 { 2538 SET_HARD_REG_BIT (mask, regno); 2539 spill_size += 16; 2540 n_spilled += 1; 2541 spilled_fr_p = 1; 2542 } 2543 2544 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 2545 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2546 { 2547 SET_HARD_REG_BIT (mask, regno); 2548 spill_size += 8; 2549 n_spilled += 1; 2550 spilled_gr_p = 1; 2551 } 2552 2553 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 2554 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2555 { 2556 SET_HARD_REG_BIT (mask, regno); 2557 spill_size += 8; 2558 n_spilled += 1; 2559 } 2560 2561 /* Now come all special registers that might get saved in other 2562 general registers. */ 2563 2564 if (frame_pointer_needed) 2565 { 2566 current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1); 2567 /* If we did not get a register, then we take LOC79. This is guaranteed 2568 to be free, even if regs_ever_live is already set, because this is 2569 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 2570 as we don't count loc79 above. */ 2571 if (current_frame_info.r[reg_fp] == 0) 2572 { 2573 current_frame_info.r[reg_fp] = LOC_REG (79); 2574 current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1; 2575 } 2576 } 2577 2578 if (! current_function_is_leaf) 2579 { 2580 /* Emit a save of BR0 if we call other functions. Do this even 2581 if this function doesn't return, as EH depends on this to be 2582 able to unwind the stack. */ 2583 SET_HARD_REG_BIT (mask, BR_REG (0)); 2584 2585 current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1); 2586 if (current_frame_info.r[reg_save_b0] == 0) 2587 { 2588 extra_spill_size += 8; 2589 n_spilled += 1; 2590 } 2591 2592 /* Similarly for ar.pfs. */ 2593 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2594 current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1); 2595 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2596 { 2597 extra_spill_size += 8; 2598 n_spilled += 1; 2599 } 2600 2601 /* Similarly for gp. Note that if we're calling setjmp, the stacked 2602 registers are clobbered, so we fall back to the stack. */ 2603 current_frame_info.r[reg_save_gp] 2604 = (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1)); 2605 if (current_frame_info.r[reg_save_gp] == 0) 2606 { 2607 SET_HARD_REG_BIT (mask, GR_REG (1)); 2608 spill_size += 8; 2609 n_spilled += 1; 2610 } 2611 } 2612 else 2613 { 2614 if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)]) 2615 { 2616 SET_HARD_REG_BIT (mask, BR_REG (0)); 2617 extra_spill_size += 8; 2618 n_spilled += 1; 2619 } 2620 2621 if (df_regs_ever_live_p (AR_PFS_REGNUM)) 2622 { 2623 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 2624 current_frame_info.r[reg_save_ar_pfs] 2625 = find_gr_spill (reg_save_ar_pfs, 1); 2626 if (current_frame_info.r[reg_save_ar_pfs] == 0) 2627 { 2628 extra_spill_size += 8; 2629 n_spilled += 1; 2630 } 2631 } 2632 } 2633 2634 /* Unwind descriptor hackery: things are most efficient if we allocate 2635 consecutive GR save registers for RP, PFS, FP in that order. However, 2636 it is absolutely critical that FP get the only hard register that's 2637 guaranteed to be free, so we allocated it first. If all three did 2638 happen to be allocated hard regs, and are consecutive, rearrange them 2639 into the preferred order now. 2640 2641 If we have already emitted code for any of those registers, 2642 then it's already too late to change. */ 2643 min_regno = MIN (current_frame_info.r[reg_fp], 2644 MIN (current_frame_info.r[reg_save_b0], 2645 current_frame_info.r[reg_save_ar_pfs])); 2646 max_regno = MAX (current_frame_info.r[reg_fp], 2647 MAX (current_frame_info.r[reg_save_b0], 2648 current_frame_info.r[reg_save_ar_pfs])); 2649 if (min_regno > 0 2650 && min_regno + 2 == max_regno 2651 && (current_frame_info.r[reg_fp] == min_regno + 1 2652 || current_frame_info.r[reg_save_b0] == min_regno + 1 2653 || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1) 2654 && (emitted_frame_related_regs[reg_save_b0] == 0 2655 || emitted_frame_related_regs[reg_save_b0] == min_regno) 2656 && (emitted_frame_related_regs[reg_save_ar_pfs] == 0 2657 || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1) 2658 && (emitted_frame_related_regs[reg_fp] == 0 2659 || emitted_frame_related_regs[reg_fp] == min_regno + 2)) 2660 { 2661 current_frame_info.r[reg_save_b0] = min_regno; 2662 current_frame_info.r[reg_save_ar_pfs] = min_regno + 1; 2663 current_frame_info.r[reg_fp] = min_regno + 2; 2664 } 2665 2666 /* See if we need to store the predicate register block. */ 2667 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2668 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) 2669 break; 2670 if (regno <= PR_REG (63)) 2671 { 2672 SET_HARD_REG_BIT (mask, PR_REG (0)); 2673 current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1); 2674 if (current_frame_info.r[reg_save_pr] == 0) 2675 { 2676 extra_spill_size += 8; 2677 n_spilled += 1; 2678 } 2679 2680 /* ??? Mark them all as used so that register renaming and such 2681 are free to use them. */ 2682 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 2683 df_set_regs_ever_live (regno, true); 2684 } 2685 2686 /* If we're forced to use st8.spill, we're forced to save and restore 2687 ar.unat as well. The check for existing liveness allows inline asm 2688 to touch ar.unat. */ 2689 if (spilled_gr_p || cfun->machine->n_varargs 2690 || df_regs_ever_live_p (AR_UNAT_REGNUM)) 2691 { 2692 df_set_regs_ever_live (AR_UNAT_REGNUM, true); 2693 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 2694 current_frame_info.r[reg_save_ar_unat] 2695 = find_gr_spill (reg_save_ar_unat, spill_size == 0); 2696 if (current_frame_info.r[reg_save_ar_unat] == 0) 2697 { 2698 extra_spill_size += 8; 2699 n_spilled += 1; 2700 } 2701 } 2702 2703 if (df_regs_ever_live_p (AR_LC_REGNUM)) 2704 { 2705 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 2706 current_frame_info.r[reg_save_ar_lc] 2707 = find_gr_spill (reg_save_ar_lc, spill_size == 0); 2708 if (current_frame_info.r[reg_save_ar_lc] == 0) 2709 { 2710 extra_spill_size += 8; 2711 n_spilled += 1; 2712 } 2713 } 2714 2715 /* If we have an odd number of words of pretend arguments written to 2716 the stack, then the FR save area will be unaligned. We round the 2717 size of this area up to keep things 16 byte aligned. */ 2718 if (spilled_fr_p) 2719 pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size); 2720 else 2721 pretend_args_size = crtl->args.pretend_args_size; 2722 2723 total_size = (spill_size + extra_spill_size + size + pretend_args_size 2724 + crtl->outgoing_args_size); 2725 total_size = IA64_STACK_ALIGN (total_size); 2726 2727 /* We always use the 16-byte scratch area provided by the caller, but 2728 if we are a leaf function, there's no one to which we need to provide 2729 a scratch area. */ 2730 if (current_function_is_leaf) 2731 total_size = MAX (0, total_size - 16); 2732 2733 current_frame_info.total_size = total_size; 2734 current_frame_info.spill_cfa_off = pretend_args_size - 16; 2735 current_frame_info.spill_size = spill_size; 2736 current_frame_info.extra_spill_size = extra_spill_size; 2737 COPY_HARD_REG_SET (current_frame_info.mask, mask); 2738 current_frame_info.n_spilled = n_spilled; 2739 current_frame_info.initialized = reload_completed; 2740} 2741 2742/* Worker function for TARGET_CAN_ELIMINATE. */ 2743 2744bool 2745ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 2746{ 2747 return (to == BR_REG (0) ? current_function_is_leaf : true); 2748} 2749 2750/* Compute the initial difference between the specified pair of registers. */ 2751 2752HOST_WIDE_INT 2753ia64_initial_elimination_offset (int from, int to) 2754{ 2755 HOST_WIDE_INT offset; 2756 2757 ia64_compute_frame_size (get_frame_size ()); 2758 switch (from) 2759 { 2760 case FRAME_POINTER_REGNUM: 2761 switch (to) 2762 { 2763 case HARD_FRAME_POINTER_REGNUM: 2764 if (current_function_is_leaf) 2765 offset = -current_frame_info.total_size; 2766 else 2767 offset = -(current_frame_info.total_size 2768 - crtl->outgoing_args_size - 16); 2769 break; 2770 2771 case STACK_POINTER_REGNUM: 2772 if (current_function_is_leaf) 2773 offset = 0; 2774 else 2775 offset = 16 + crtl->outgoing_args_size; 2776 break; 2777 2778 default: 2779 gcc_unreachable (); 2780 } 2781 break; 2782 2783 case ARG_POINTER_REGNUM: 2784 /* Arguments start above the 16 byte save area, unless stdarg 2785 in which case we store through the 16 byte save area. */ 2786 switch (to) 2787 { 2788 case HARD_FRAME_POINTER_REGNUM: 2789 offset = 16 - crtl->args.pretend_args_size; 2790 break; 2791 2792 case STACK_POINTER_REGNUM: 2793 offset = (current_frame_info.total_size 2794 + 16 - crtl->args.pretend_args_size); 2795 break; 2796 2797 default: 2798 gcc_unreachable (); 2799 } 2800 break; 2801 2802 default: 2803 gcc_unreachable (); 2804 } 2805 2806 return offset; 2807} 2808 2809/* If there are more than a trivial number of register spills, we use 2810 two interleaved iterators so that we can get two memory references 2811 per insn group. 2812 2813 In order to simplify things in the prologue and epilogue expanders, 2814 we use helper functions to fix up the memory references after the 2815 fact with the appropriate offsets to a POST_MODIFY memory mode. 2816 The following data structure tracks the state of the two iterators 2817 while insns are being emitted. */ 2818 2819struct spill_fill_data 2820{ 2821 rtx init_after; /* point at which to emit initializations */ 2822 rtx init_reg[2]; /* initial base register */ 2823 rtx iter_reg[2]; /* the iterator registers */ 2824 rtx *prev_addr[2]; /* address of last memory use */ 2825 rtx prev_insn[2]; /* the insn corresponding to prev_addr */ 2826 HOST_WIDE_INT prev_off[2]; /* last offset */ 2827 int n_iter; /* number of iterators in use */ 2828 int next_iter; /* next iterator to use */ 2829 unsigned int save_gr_used_mask; 2830}; 2831 2832static struct spill_fill_data spill_fill_data; 2833 2834static void 2835setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off) 2836{ 2837 int i; 2838 2839 spill_fill_data.init_after = get_last_insn (); 2840 spill_fill_data.init_reg[0] = init_reg; 2841 spill_fill_data.init_reg[1] = init_reg; 2842 spill_fill_data.prev_addr[0] = NULL; 2843 spill_fill_data.prev_addr[1] = NULL; 2844 spill_fill_data.prev_insn[0] = NULL; 2845 spill_fill_data.prev_insn[1] = NULL; 2846 spill_fill_data.prev_off[0] = cfa_off; 2847 spill_fill_data.prev_off[1] = cfa_off; 2848 spill_fill_data.next_iter = 0; 2849 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 2850 2851 spill_fill_data.n_iter = 1 + (n_spills > 2); 2852 for (i = 0; i < spill_fill_data.n_iter; ++i) 2853 { 2854 int regno = next_scratch_gr_reg (); 2855 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 2856 current_frame_info.gr_used_mask |= 1 << regno; 2857 } 2858} 2859 2860static void 2861finish_spill_pointers (void) 2862{ 2863 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 2864} 2865 2866static rtx 2867spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off) 2868{ 2869 int iter = spill_fill_data.next_iter; 2870 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 2871 rtx disp_rtx = GEN_INT (disp); 2872 rtx mem; 2873 2874 if (spill_fill_data.prev_addr[iter]) 2875 { 2876 if (satisfies_constraint_N (disp_rtx)) 2877 { 2878 *spill_fill_data.prev_addr[iter] 2879 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 2880 gen_rtx_PLUS (DImode, 2881 spill_fill_data.iter_reg[iter], 2882 disp_rtx)); 2883 add_reg_note (spill_fill_data.prev_insn[iter], 2884 REG_INC, spill_fill_data.iter_reg[iter]); 2885 } 2886 else 2887 { 2888 /* ??? Could use register post_modify for loads. */ 2889 if (!satisfies_constraint_I (disp_rtx)) 2890 { 2891 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2892 emit_move_insn (tmp, disp_rtx); 2893 disp_rtx = tmp; 2894 } 2895 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2896 spill_fill_data.iter_reg[iter], disp_rtx)); 2897 } 2898 } 2899 /* Micro-optimization: if we've created a frame pointer, it's at 2900 CFA 0, which may allow the real iterator to be initialized lower, 2901 slightly increasing parallelism. Also, if there are few saves 2902 it may eliminate the iterator entirely. */ 2903 else if (disp == 0 2904 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 2905 && frame_pointer_needed) 2906 { 2907 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 2908 set_mem_alias_set (mem, get_varargs_alias_set ()); 2909 return mem; 2910 } 2911 else 2912 { 2913 rtx seq, insn; 2914 2915 if (disp == 0) 2916 seq = gen_movdi (spill_fill_data.iter_reg[iter], 2917 spill_fill_data.init_reg[iter]); 2918 else 2919 { 2920 start_sequence (); 2921 2922 if (!satisfies_constraint_I (disp_rtx)) 2923 { 2924 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2925 emit_move_insn (tmp, disp_rtx); 2926 disp_rtx = tmp; 2927 } 2928 2929 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2930 spill_fill_data.init_reg[iter], 2931 disp_rtx)); 2932 2933 seq = get_insns (); 2934 end_sequence (); 2935 } 2936 2937 /* Careful for being the first insn in a sequence. */ 2938 if (spill_fill_data.init_after) 2939 insn = emit_insn_after (seq, spill_fill_data.init_after); 2940 else 2941 { 2942 rtx first = get_insns (); 2943 if (first) 2944 insn = emit_insn_before (seq, first); 2945 else 2946 insn = emit_insn (seq); 2947 } 2948 spill_fill_data.init_after = insn; 2949 } 2950 2951 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 2952 2953 /* ??? Not all of the spills are for varargs, but some of them are. 2954 The rest of the spills belong in an alias set of their own. But 2955 it doesn't actually hurt to include them here. */ 2956 set_mem_alias_set (mem, get_varargs_alias_set ()); 2957 2958 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 2959 spill_fill_data.prev_off[iter] = cfa_off; 2960 2961 if (++iter >= spill_fill_data.n_iter) 2962 iter = 0; 2963 spill_fill_data.next_iter = iter; 2964 2965 return mem; 2966} 2967 2968static void 2969do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off, 2970 rtx frame_reg) 2971{ 2972 int iter = spill_fill_data.next_iter; 2973 rtx mem, insn; 2974 2975 mem = spill_restore_mem (reg, cfa_off); 2976 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 2977 spill_fill_data.prev_insn[iter] = insn; 2978 2979 if (frame_reg) 2980 { 2981 rtx base; 2982 HOST_WIDE_INT off; 2983 2984 RTX_FRAME_RELATED_P (insn) = 1; 2985 2986 /* Don't even pretend that the unwind code can intuit its way 2987 through a pair of interleaved post_modify iterators. Just 2988 provide the correct answer. */ 2989 2990 if (frame_pointer_needed) 2991 { 2992 base = hard_frame_pointer_rtx; 2993 off = - cfa_off; 2994 } 2995 else 2996 { 2997 base = stack_pointer_rtx; 2998 off = current_frame_info.total_size - cfa_off; 2999 } 3000 3001 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3002 gen_rtx_SET (VOIDmode, 3003 gen_rtx_MEM (GET_MODE (reg), 3004 plus_constant (base, off)), 3005 frame_reg)); 3006 } 3007} 3008 3009static void 3010do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off) 3011{ 3012 int iter = spill_fill_data.next_iter; 3013 rtx insn; 3014 3015 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 3016 GEN_INT (cfa_off))); 3017 spill_fill_data.prev_insn[iter] = insn; 3018} 3019 3020/* Wrapper functions that discards the CONST_INT spill offset. These 3021 exist so that we can give gr_spill/gr_fill the offset they need and 3022 use a consistent function interface. */ 3023 3024static rtx 3025gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3026{ 3027 return gen_movdi (dest, src); 3028} 3029 3030static rtx 3031gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3032{ 3033 return gen_fr_spill (dest, src); 3034} 3035 3036static rtx 3037gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED) 3038{ 3039 return gen_fr_restore (dest, src); 3040} 3041 3042/* Called after register allocation to add any instructions needed for the 3043 prologue. Using a prologue insn is favored compared to putting all of the 3044 instructions in output_function_prologue(), since it allows the scheduler 3045 to intermix instructions with the saves of the caller saved registers. In 3046 some cases, it might be necessary to emit a barrier instruction as the last 3047 insn to prevent such scheduling. 3048 3049 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 3050 so that the debug info generation code can handle them properly. 3051 3052 The register save area is layed out like so: 3053 cfa+16 3054 [ varargs spill area ] 3055 [ fr register spill area ] 3056 [ br register spill area ] 3057 [ ar register spill area ] 3058 [ pr register spill area ] 3059 [ gr register spill area ] */ 3060 3061/* ??? Get inefficient code when the frame size is larger than can fit in an 3062 adds instruction. */ 3063 3064void 3065ia64_expand_prologue (void) 3066{ 3067 rtx insn, ar_pfs_save_reg, ar_unat_save_reg; 3068 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 3069 rtx reg, alt_reg; 3070 3071 ia64_compute_frame_size (get_frame_size ()); 3072 last_scratch_gr_reg = 15; 3073 3074 if (dump_file) 3075 { 3076 fprintf (dump_file, "ia64 frame related registers " 3077 "recorded in current_frame_info.r[]:\n"); 3078#define PRINTREG(a) if (current_frame_info.r[a]) \ 3079 fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a]) 3080 PRINTREG(reg_fp); 3081 PRINTREG(reg_save_b0); 3082 PRINTREG(reg_save_pr); 3083 PRINTREG(reg_save_ar_pfs); 3084 PRINTREG(reg_save_ar_unat); 3085 PRINTREG(reg_save_ar_lc); 3086 PRINTREG(reg_save_gp); 3087#undef PRINTREG 3088 } 3089 3090 /* If there is no epilogue, then we don't need some prologue insns. 3091 We need to avoid emitting the dead prologue insns, because flow 3092 will complain about them. */ 3093 if (optimize) 3094 { 3095 edge e; 3096 edge_iterator ei; 3097 3098 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 3099 if ((e->flags & EDGE_FAKE) == 0 3100 && (e->flags & EDGE_FALLTHRU) != 0) 3101 break; 3102 epilogue_p = (e != NULL); 3103 } 3104 else 3105 epilogue_p = 1; 3106 3107 /* Set the local, input, and output register names. We need to do this 3108 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 3109 half. If we use in/loc/out register names, then we get assembler errors 3110 in crtn.S because there is no alloc insn or regstk directive in there. */ 3111 if (! TARGET_REG_NAMES) 3112 { 3113 int inputs = current_frame_info.n_input_regs; 3114 int locals = current_frame_info.n_local_regs; 3115 int outputs = current_frame_info.n_output_regs; 3116 3117 for (i = 0; i < inputs; i++) 3118 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 3119 for (i = 0; i < locals; i++) 3120 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 3121 for (i = 0; i < outputs; i++) 3122 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 3123 } 3124 3125 /* Set the frame pointer register name. The regnum is logically loc79, 3126 but of course we'll not have allocated that many locals. Rather than 3127 worrying about renumbering the existing rtxs, we adjust the name. */ 3128 /* ??? This code means that we can never use one local register when 3129 there is a frame pointer. loc79 gets wasted in this case, as it is 3130 renamed to a register that will never be used. See also the try_locals 3131 code in find_gr_spill. */ 3132 if (current_frame_info.r[reg_fp]) 3133 { 3134 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3135 reg_names[HARD_FRAME_POINTER_REGNUM] 3136 = reg_names[current_frame_info.r[reg_fp]]; 3137 reg_names[current_frame_info.r[reg_fp]] = tmp; 3138 } 3139 3140 /* We don't need an alloc instruction if we've used no outputs or locals. */ 3141 if (current_frame_info.n_local_regs == 0 3142 && current_frame_info.n_output_regs == 0 3143 && current_frame_info.n_input_regs <= crtl->args.info.int_regs 3144 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 3145 { 3146 /* If there is no alloc, but there are input registers used, then we 3147 need a .regstk directive. */ 3148 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 3149 ar_pfs_save_reg = NULL_RTX; 3150 } 3151 else 3152 { 3153 current_frame_info.need_regstk = 0; 3154 3155 if (current_frame_info.r[reg_save_ar_pfs]) 3156 { 3157 regno = current_frame_info.r[reg_save_ar_pfs]; 3158 reg_emitted (reg_save_ar_pfs); 3159 } 3160 else 3161 regno = next_scratch_gr_reg (); 3162 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 3163 3164 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 3165 GEN_INT (current_frame_info.n_input_regs), 3166 GEN_INT (current_frame_info.n_local_regs), 3167 GEN_INT (current_frame_info.n_output_regs), 3168 GEN_INT (current_frame_info.n_rotate_regs))); 3169 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_pfs] != 0); 3170 } 3171 3172 /* Set up frame pointer, stack pointer, and spill iterators. */ 3173 3174 n_varargs = cfun->machine->n_varargs; 3175 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 3176 stack_pointer_rtx, 0); 3177 3178 if (frame_pointer_needed) 3179 { 3180 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 3181 RTX_FRAME_RELATED_P (insn) = 1; 3182 } 3183 3184 if (current_frame_info.total_size != 0) 3185 { 3186 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 3187 rtx offset; 3188 3189 if (satisfies_constraint_I (frame_size_rtx)) 3190 offset = frame_size_rtx; 3191 else 3192 { 3193 regno = next_scratch_gr_reg (); 3194 offset = gen_rtx_REG (DImode, regno); 3195 emit_move_insn (offset, frame_size_rtx); 3196 } 3197 3198 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 3199 stack_pointer_rtx, offset)); 3200 3201 if (! frame_pointer_needed) 3202 { 3203 RTX_FRAME_RELATED_P (insn) = 1; 3204 if (GET_CODE (offset) != CONST_INT) 3205 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3206 gen_rtx_SET (VOIDmode, 3207 stack_pointer_rtx, 3208 gen_rtx_PLUS (DImode, 3209 stack_pointer_rtx, 3210 frame_size_rtx))); 3211 } 3212 3213 /* ??? At this point we must generate a magic insn that appears to 3214 modify the stack pointer, the frame pointer, and all spill 3215 iterators. This would allow the most scheduling freedom. For 3216 now, just hard stop. */ 3217 emit_insn (gen_blockage ()); 3218 } 3219 3220 /* Must copy out ar.unat before doing any integer spills. */ 3221 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3222 { 3223 if (current_frame_info.r[reg_save_ar_unat]) 3224 { 3225 ar_unat_save_reg 3226 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3227 reg_emitted (reg_save_ar_unat); 3228 } 3229 else 3230 { 3231 alt_regno = next_scratch_gr_reg (); 3232 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3233 current_frame_info.gr_used_mask |= 1 << alt_regno; 3234 } 3235 3236 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3237 insn = emit_move_insn (ar_unat_save_reg, reg); 3238 RTX_FRAME_RELATED_P (insn) = (current_frame_info.r[reg_save_ar_unat] != 0); 3239 3240 /* Even if we're not going to generate an epilogue, we still 3241 need to save the register so that EH works. */ 3242 if (! epilogue_p && current_frame_info.r[reg_save_ar_unat]) 3243 emit_insn (gen_prologue_use (ar_unat_save_reg)); 3244 } 3245 else 3246 ar_unat_save_reg = NULL_RTX; 3247 3248 /* Spill all varargs registers. Do this before spilling any GR registers, 3249 since we want the UNAT bits for the GR registers to override the UNAT 3250 bits from varargs, which we don't care about. */ 3251 3252 cfa_off = -16; 3253 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 3254 { 3255 reg = gen_rtx_REG (DImode, regno); 3256 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 3257 } 3258 3259 /* Locate the bottom of the register save area. */ 3260 cfa_off = (current_frame_info.spill_cfa_off 3261 + current_frame_info.spill_size 3262 + current_frame_info.extra_spill_size); 3263 3264 /* Save the predicate register block either in a register or in memory. */ 3265 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3266 { 3267 reg = gen_rtx_REG (DImode, PR_REG (0)); 3268 if (current_frame_info.r[reg_save_pr] != 0) 3269 { 3270 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3271 reg_emitted (reg_save_pr); 3272 insn = emit_move_insn (alt_reg, reg); 3273 3274 /* ??? Denote pr spill/fill by a DImode move that modifies all 3275 64 hard registers. */ 3276 RTX_FRAME_RELATED_P (insn) = 1; 3277 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3278 gen_rtx_SET (VOIDmode, alt_reg, reg)); 3279 3280 /* Even if we're not going to generate an epilogue, we still 3281 need to save the register so that EH works. */ 3282 if (! epilogue_p) 3283 emit_insn (gen_prologue_use (alt_reg)); 3284 } 3285 else 3286 { 3287 alt_regno = next_scratch_gr_reg (); 3288 alt_reg = gen_rtx_REG (DImode, alt_regno); 3289 insn = emit_move_insn (alt_reg, reg); 3290 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3291 cfa_off -= 8; 3292 } 3293 } 3294 3295 /* Handle AR regs in numerical order. All of them get special handling. */ 3296 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 3297 && current_frame_info.r[reg_save_ar_unat] == 0) 3298 { 3299 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3300 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 3301 cfa_off -= 8; 3302 } 3303 3304 /* The alloc insn already copied ar.pfs into a general register. The 3305 only thing we have to do now is copy that register to a stack slot 3306 if we'd not allocated a local register for the job. */ 3307 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 3308 && current_frame_info.r[reg_save_ar_pfs] == 0) 3309 { 3310 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3311 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 3312 cfa_off -= 8; 3313 } 3314 3315 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 3316 { 3317 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 3318 if (current_frame_info.r[reg_save_ar_lc] != 0) 3319 { 3320 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 3321 reg_emitted (reg_save_ar_lc); 3322 insn = emit_move_insn (alt_reg, reg); 3323 RTX_FRAME_RELATED_P (insn) = 1; 3324 3325 /* Even if we're not going to generate an epilogue, we still 3326 need to save the register so that EH works. */ 3327 if (! epilogue_p) 3328 emit_insn (gen_prologue_use (alt_reg)); 3329 } 3330 else 3331 { 3332 alt_regno = next_scratch_gr_reg (); 3333 alt_reg = gen_rtx_REG (DImode, alt_regno); 3334 emit_move_insn (alt_reg, reg); 3335 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3336 cfa_off -= 8; 3337 } 3338 } 3339 3340 /* Save the return pointer. */ 3341 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3342 { 3343 reg = gen_rtx_REG (DImode, BR_REG (0)); 3344 if (current_frame_info.r[reg_save_b0] != 0) 3345 { 3346 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3347 reg_emitted (reg_save_b0); 3348 insn = emit_move_insn (alt_reg, reg); 3349 RTX_FRAME_RELATED_P (insn) = 1; 3350 3351 /* Even if we're not going to generate an epilogue, we still 3352 need to save the register so that EH works. */ 3353 if (! epilogue_p) 3354 emit_insn (gen_prologue_use (alt_reg)); 3355 } 3356 else 3357 { 3358 alt_regno = next_scratch_gr_reg (); 3359 alt_reg = gen_rtx_REG (DImode, alt_regno); 3360 emit_move_insn (alt_reg, reg); 3361 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3362 cfa_off -= 8; 3363 } 3364 } 3365 3366 if (current_frame_info.r[reg_save_gp]) 3367 { 3368 reg_emitted (reg_save_gp); 3369 insn = emit_move_insn (gen_rtx_REG (DImode, 3370 current_frame_info.r[reg_save_gp]), 3371 pic_offset_table_rtx); 3372 } 3373 3374 /* We should now be at the base of the gr/br/fr spill area. */ 3375 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 3376 + current_frame_info.spill_size)); 3377 3378 /* Spill all general registers. */ 3379 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 3380 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3381 { 3382 reg = gen_rtx_REG (DImode, regno); 3383 do_spill (gen_gr_spill, reg, cfa_off, reg); 3384 cfa_off -= 8; 3385 } 3386 3387 /* Spill the rest of the BR registers. */ 3388 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 3389 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3390 { 3391 alt_regno = next_scratch_gr_reg (); 3392 alt_reg = gen_rtx_REG (DImode, alt_regno); 3393 reg = gen_rtx_REG (DImode, regno); 3394 emit_move_insn (alt_reg, reg); 3395 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 3396 cfa_off -= 8; 3397 } 3398 3399 /* Align the frame and spill all FR registers. */ 3400 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 3401 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3402 { 3403 gcc_assert (!(cfa_off & 15)); 3404 reg = gen_rtx_REG (XFmode, regno); 3405 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 3406 cfa_off -= 16; 3407 } 3408 3409 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 3410 3411 finish_spill_pointers (); 3412} 3413 3414/* Called after register allocation to add any instructions needed for the 3415 epilogue. Using an epilogue insn is favored compared to putting all of the 3416 instructions in output_function_prologue(), since it allows the scheduler 3417 to intermix instructions with the saves of the caller saved registers. In 3418 some cases, it might be necessary to emit a barrier instruction as the last 3419 insn to prevent such scheduling. */ 3420 3421void 3422ia64_expand_epilogue (int sibcall_p) 3423{ 3424 rtx insn, reg, alt_reg, ar_unat_save_reg; 3425 int regno, alt_regno, cfa_off; 3426 3427 ia64_compute_frame_size (get_frame_size ()); 3428 3429 /* If there is a frame pointer, then we use it instead of the stack 3430 pointer, so that the stack pointer does not need to be valid when 3431 the epilogue starts. See EXIT_IGNORE_STACK. */ 3432 if (frame_pointer_needed) 3433 setup_spill_pointers (current_frame_info.n_spilled, 3434 hard_frame_pointer_rtx, 0); 3435 else 3436 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 3437 current_frame_info.total_size); 3438 3439 if (current_frame_info.total_size != 0) 3440 { 3441 /* ??? At this point we must generate a magic insn that appears to 3442 modify the spill iterators and the frame pointer. This would 3443 allow the most scheduling freedom. For now, just hard stop. */ 3444 emit_insn (gen_blockage ()); 3445 } 3446 3447 /* Locate the bottom of the register save area. */ 3448 cfa_off = (current_frame_info.spill_cfa_off 3449 + current_frame_info.spill_size 3450 + current_frame_info.extra_spill_size); 3451 3452 /* Restore the predicate registers. */ 3453 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 3454 { 3455 if (current_frame_info.r[reg_save_pr] != 0) 3456 { 3457 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]); 3458 reg_emitted (reg_save_pr); 3459 } 3460 else 3461 { 3462 alt_regno = next_scratch_gr_reg (); 3463 alt_reg = gen_rtx_REG (DImode, alt_regno); 3464 do_restore (gen_movdi_x, alt_reg, cfa_off); 3465 cfa_off -= 8; 3466 } 3467 reg = gen_rtx_REG (DImode, PR_REG (0)); 3468 emit_move_insn (reg, alt_reg); 3469 } 3470 3471 /* Restore the application registers. */ 3472 3473 /* Load the saved unat from the stack, but do not restore it until 3474 after the GRs have been restored. */ 3475 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3476 { 3477 if (current_frame_info.r[reg_save_ar_unat] != 0) 3478 { 3479 ar_unat_save_reg 3480 = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]); 3481 reg_emitted (reg_save_ar_unat); 3482 } 3483 else 3484 { 3485 alt_regno = next_scratch_gr_reg (); 3486 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 3487 current_frame_info.gr_used_mask |= 1 << alt_regno; 3488 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 3489 cfa_off -= 8; 3490 } 3491 } 3492 else 3493 ar_unat_save_reg = NULL_RTX; 3494 3495 if (current_frame_info.r[reg_save_ar_pfs] != 0) 3496 { 3497 reg_emitted (reg_save_ar_pfs); 3498 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]); 3499 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3500 emit_move_insn (reg, alt_reg); 3501 } 3502 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 3503 { 3504 alt_regno = next_scratch_gr_reg (); 3505 alt_reg = gen_rtx_REG (DImode, alt_regno); 3506 do_restore (gen_movdi_x, alt_reg, cfa_off); 3507 cfa_off -= 8; 3508 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 3509 emit_move_insn (reg, alt_reg); 3510 } 3511 3512 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 3513 { 3514 if (current_frame_info.r[reg_save_ar_lc] != 0) 3515 { 3516 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]); 3517 reg_emitted (reg_save_ar_lc); 3518 } 3519 else 3520 { 3521 alt_regno = next_scratch_gr_reg (); 3522 alt_reg = gen_rtx_REG (DImode, alt_regno); 3523 do_restore (gen_movdi_x, alt_reg, cfa_off); 3524 cfa_off -= 8; 3525 } 3526 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 3527 emit_move_insn (reg, alt_reg); 3528 } 3529 3530 /* Restore the return pointer. */ 3531 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3532 { 3533 if (current_frame_info.r[reg_save_b0] != 0) 3534 { 3535 alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3536 reg_emitted (reg_save_b0); 3537 } 3538 else 3539 { 3540 alt_regno = next_scratch_gr_reg (); 3541 alt_reg = gen_rtx_REG (DImode, alt_regno); 3542 do_restore (gen_movdi_x, alt_reg, cfa_off); 3543 cfa_off -= 8; 3544 } 3545 reg = gen_rtx_REG (DImode, BR_REG (0)); 3546 emit_move_insn (reg, alt_reg); 3547 } 3548 3549 /* We should now be at the base of the gr/br/fr spill area. */ 3550 gcc_assert (cfa_off == (current_frame_info.spill_cfa_off 3551 + current_frame_info.spill_size)); 3552 3553 /* The GP may be stored on the stack in the prologue, but it's 3554 never restored in the epilogue. Skip the stack slot. */ 3555 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 3556 cfa_off -= 8; 3557 3558 /* Restore all general registers. */ 3559 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 3560 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3561 { 3562 reg = gen_rtx_REG (DImode, regno); 3563 do_restore (gen_gr_restore, reg, cfa_off); 3564 cfa_off -= 8; 3565 } 3566 3567 /* Restore the branch registers. */ 3568 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 3569 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3570 { 3571 alt_regno = next_scratch_gr_reg (); 3572 alt_reg = gen_rtx_REG (DImode, alt_regno); 3573 do_restore (gen_movdi_x, alt_reg, cfa_off); 3574 cfa_off -= 8; 3575 reg = gen_rtx_REG (DImode, regno); 3576 emit_move_insn (reg, alt_reg); 3577 } 3578 3579 /* Restore floating point registers. */ 3580 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 3581 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3582 { 3583 gcc_assert (!(cfa_off & 15)); 3584 reg = gen_rtx_REG (XFmode, regno); 3585 do_restore (gen_fr_restore_x, reg, cfa_off); 3586 cfa_off -= 16; 3587 } 3588 3589 /* Restore ar.unat for real. */ 3590 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 3591 { 3592 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 3593 emit_move_insn (reg, ar_unat_save_reg); 3594 } 3595 3596 gcc_assert (cfa_off == current_frame_info.spill_cfa_off); 3597 3598 finish_spill_pointers (); 3599 3600 if (current_frame_info.total_size 3601 || cfun->machine->ia64_eh_epilogue_sp 3602 || frame_pointer_needed) 3603 { 3604 /* ??? At this point we must generate a magic insn that appears to 3605 modify the spill iterators, the stack pointer, and the frame 3606 pointer. This would allow the most scheduling freedom. For now, 3607 just hard stop. */ 3608 emit_insn (gen_blockage ()); 3609 } 3610 3611 if (cfun->machine->ia64_eh_epilogue_sp) 3612 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 3613 else if (frame_pointer_needed) 3614 { 3615 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 3616 RTX_FRAME_RELATED_P (insn) = 1; 3617 } 3618 else if (current_frame_info.total_size) 3619 { 3620 rtx offset, frame_size_rtx; 3621 3622 frame_size_rtx = GEN_INT (current_frame_info.total_size); 3623 if (satisfies_constraint_I (frame_size_rtx)) 3624 offset = frame_size_rtx; 3625 else 3626 { 3627 regno = next_scratch_gr_reg (); 3628 offset = gen_rtx_REG (DImode, regno); 3629 emit_move_insn (offset, frame_size_rtx); 3630 } 3631 3632 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 3633 offset)); 3634 3635 RTX_FRAME_RELATED_P (insn) = 1; 3636 if (GET_CODE (offset) != CONST_INT) 3637 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 3638 gen_rtx_SET (VOIDmode, 3639 stack_pointer_rtx, 3640 gen_rtx_PLUS (DImode, 3641 stack_pointer_rtx, 3642 frame_size_rtx))); 3643 } 3644 3645 if (cfun->machine->ia64_eh_epilogue_bsp) 3646 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 3647 3648 if (! sibcall_p) 3649 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 3650 else 3651 { 3652 int fp = GR_REG (2); 3653 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the 3654 first available call clobbered register. If there was a frame_pointer 3655 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, 3656 so we have to make sure we're using the string "r2" when emitting 3657 the register name for the assembler. */ 3658 if (current_frame_info.r[reg_fp] 3659 && current_frame_info.r[reg_fp] == GR_REG (2)) 3660 fp = HARD_FRAME_POINTER_REGNUM; 3661 3662 /* We must emit an alloc to force the input registers to become output 3663 registers. Otherwise, if the callee tries to pass its parameters 3664 through to another call without an intervening alloc, then these 3665 values get lost. */ 3666 /* ??? We don't need to preserve all input registers. We only need to 3667 preserve those input registers used as arguments to the sibling call. 3668 It is unclear how to compute that number here. */ 3669 if (current_frame_info.n_input_regs != 0) 3670 { 3671 rtx n_inputs = GEN_INT (current_frame_info.n_input_regs); 3672 insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 3673 const0_rtx, const0_rtx, 3674 n_inputs, const0_rtx)); 3675 RTX_FRAME_RELATED_P (insn) = 1; 3676 } 3677 } 3678} 3679 3680/* Return 1 if br.ret can do all the work required to return from a 3681 function. */ 3682 3683int 3684ia64_direct_return (void) 3685{ 3686 if (reload_completed && ! frame_pointer_needed) 3687 { 3688 ia64_compute_frame_size (get_frame_size ()); 3689 3690 return (current_frame_info.total_size == 0 3691 && current_frame_info.n_spilled == 0 3692 && current_frame_info.r[reg_save_b0] == 0 3693 && current_frame_info.r[reg_save_pr] == 0 3694 && current_frame_info.r[reg_save_ar_pfs] == 0 3695 && current_frame_info.r[reg_save_ar_unat] == 0 3696 && current_frame_info.r[reg_save_ar_lc] == 0); 3697 } 3698 return 0; 3699} 3700 3701/* Return the magic cookie that we use to hold the return address 3702 during early compilation. */ 3703 3704rtx 3705ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED) 3706{ 3707 if (count != 0) 3708 return NULL; 3709 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); 3710} 3711 3712/* Split this value after reload, now that we know where the return 3713 address is saved. */ 3714 3715void 3716ia64_split_return_addr_rtx (rtx dest) 3717{ 3718 rtx src; 3719 3720 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 3721 { 3722 if (current_frame_info.r[reg_save_b0] != 0) 3723 { 3724 src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]); 3725 reg_emitted (reg_save_b0); 3726 } 3727 else 3728 { 3729 HOST_WIDE_INT off; 3730 unsigned int regno; 3731 rtx off_r; 3732 3733 /* Compute offset from CFA for BR0. */ 3734 /* ??? Must be kept in sync with ia64_expand_prologue. */ 3735 off = (current_frame_info.spill_cfa_off 3736 + current_frame_info.spill_size); 3737 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 3738 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 3739 off -= 8; 3740 3741 /* Convert CFA offset to a register based offset. */ 3742 if (frame_pointer_needed) 3743 src = hard_frame_pointer_rtx; 3744 else 3745 { 3746 src = stack_pointer_rtx; 3747 off += current_frame_info.total_size; 3748 } 3749 3750 /* Load address into scratch register. */ 3751 off_r = GEN_INT (off); 3752 if (satisfies_constraint_I (off_r)) 3753 emit_insn (gen_adddi3 (dest, src, off_r)); 3754 else 3755 { 3756 emit_move_insn (dest, off_r); 3757 emit_insn (gen_adddi3 (dest, src, dest)); 3758 } 3759 3760 src = gen_rtx_MEM (Pmode, dest); 3761 } 3762 } 3763 else 3764 src = gen_rtx_REG (DImode, BR_REG (0)); 3765 3766 emit_move_insn (dest, src); 3767} 3768 3769int 3770ia64_hard_regno_rename_ok (int from, int to) 3771{ 3772 /* Don't clobber any of the registers we reserved for the prologue. */ 3773 unsigned int r; 3774 3775 for (r = reg_fp; r <= reg_save_ar_lc; r++) 3776 if (to == current_frame_info.r[r] 3777 || from == current_frame_info.r[r] 3778 || to == emitted_frame_related_regs[r] 3779 || from == emitted_frame_related_regs[r]) 3780 return 0; 3781 3782 /* Don't use output registers outside the register frame. */ 3783 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 3784 return 0; 3785 3786 /* Retain even/oddness on predicate register pairs. */ 3787 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 3788 return (from & 1) == (to & 1); 3789 3790 return 1; 3791} 3792 3793/* Target hook for assembling integer objects. Handle word-sized 3794 aligned objects and detect the cases when @fptr is needed. */ 3795 3796static bool 3797ia64_assemble_integer (rtx x, unsigned int size, int aligned_p) 3798{ 3799 if (size == POINTER_SIZE / BITS_PER_UNIT 3800 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 3801 && GET_CODE (x) == SYMBOL_REF 3802 && SYMBOL_REF_FUNCTION_P (x)) 3803 { 3804 static const char * const directive[2][2] = { 3805 /* 64-bit pointer */ /* 32-bit pointer */ 3806 { "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("}, /* unaligned */ 3807 { "\tdata8\t@fptr(", "\tdata4\t@fptr("} /* aligned */ 3808 }; 3809 fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file); 3810 output_addr_const (asm_out_file, x); 3811 fputs (")\n", asm_out_file); 3812 return true; 3813 } 3814 return default_assemble_integer (x, size, aligned_p); 3815} 3816 3817/* Emit the function prologue. */ 3818 3819static void 3820ia64_output_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 3821{ 3822 int mask, grsave, grsave_prev; 3823 3824 if (current_frame_info.need_regstk) 3825 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 3826 current_frame_info.n_input_regs, 3827 current_frame_info.n_local_regs, 3828 current_frame_info.n_output_regs, 3829 current_frame_info.n_rotate_regs); 3830 3831 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3832 return; 3833 3834 /* Emit the .prologue directive. */ 3835 3836 mask = 0; 3837 grsave = grsave_prev = 0; 3838 if (current_frame_info.r[reg_save_b0] != 0) 3839 { 3840 mask |= 8; 3841 grsave = grsave_prev = current_frame_info.r[reg_save_b0]; 3842 } 3843 if (current_frame_info.r[reg_save_ar_pfs] != 0 3844 && (grsave_prev == 0 3845 || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1)) 3846 { 3847 mask |= 4; 3848 if (grsave_prev == 0) 3849 grsave = current_frame_info.r[reg_save_ar_pfs]; 3850 grsave_prev = current_frame_info.r[reg_save_ar_pfs]; 3851 } 3852 if (current_frame_info.r[reg_fp] != 0 3853 && (grsave_prev == 0 3854 || current_frame_info.r[reg_fp] == grsave_prev + 1)) 3855 { 3856 mask |= 2; 3857 if (grsave_prev == 0) 3858 grsave = HARD_FRAME_POINTER_REGNUM; 3859 grsave_prev = current_frame_info.r[reg_fp]; 3860 } 3861 if (current_frame_info.r[reg_save_pr] != 0 3862 && (grsave_prev == 0 3863 || current_frame_info.r[reg_save_pr] == grsave_prev + 1)) 3864 { 3865 mask |= 1; 3866 if (grsave_prev == 0) 3867 grsave = current_frame_info.r[reg_save_pr]; 3868 } 3869 3870 if (mask && TARGET_GNU_AS) 3871 fprintf (file, "\t.prologue %d, %d\n", mask, 3872 ia64_dbx_register_number (grsave)); 3873 else 3874 fputs ("\t.prologue\n", file); 3875 3876 /* Emit a .spill directive, if necessary, to relocate the base of 3877 the register spill area. */ 3878 if (current_frame_info.spill_cfa_off != -16) 3879 fprintf (file, "\t.spill %ld\n", 3880 (long) (current_frame_info.spill_cfa_off 3881 + current_frame_info.spill_size)); 3882} 3883 3884/* Emit the .body directive at the scheduled end of the prologue. */ 3885 3886static void 3887ia64_output_function_end_prologue (FILE *file) 3888{ 3889 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3890 return; 3891 3892 fputs ("\t.body\n", file); 3893} 3894 3895/* Emit the function epilogue. */ 3896 3897static void 3898ia64_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 3899 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 3900{ 3901 int i; 3902 3903 if (current_frame_info.r[reg_fp]) 3904 { 3905 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3906 reg_names[HARD_FRAME_POINTER_REGNUM] 3907 = reg_names[current_frame_info.r[reg_fp]]; 3908 reg_names[current_frame_info.r[reg_fp]] = tmp; 3909 reg_emitted (reg_fp); 3910 } 3911 if (! TARGET_REG_NAMES) 3912 { 3913 for (i = 0; i < current_frame_info.n_input_regs; i++) 3914 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 3915 for (i = 0; i < current_frame_info.n_local_regs; i++) 3916 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 3917 for (i = 0; i < current_frame_info.n_output_regs; i++) 3918 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 3919 } 3920 3921 current_frame_info.initialized = 0; 3922} 3923 3924int 3925ia64_dbx_register_number (int regno) 3926{ 3927 /* In ia64_expand_prologue we quite literally renamed the frame pointer 3928 from its home at loc79 to something inside the register frame. We 3929 must perform the same renumbering here for the debug info. */ 3930 if (current_frame_info.r[reg_fp]) 3931 { 3932 if (regno == HARD_FRAME_POINTER_REGNUM) 3933 regno = current_frame_info.r[reg_fp]; 3934 else if (regno == current_frame_info.r[reg_fp]) 3935 regno = HARD_FRAME_POINTER_REGNUM; 3936 } 3937 3938 if (IN_REGNO_P (regno)) 3939 return 32 + regno - IN_REG (0); 3940 else if (LOC_REGNO_P (regno)) 3941 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 3942 else if (OUT_REGNO_P (regno)) 3943 return (32 + current_frame_info.n_input_regs 3944 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 3945 else 3946 return regno; 3947} 3948 3949/* Implement TARGET_TRAMPOLINE_INIT. 3950 3951 The trampoline should set the static chain pointer to value placed 3952 into the trampoline and should branch to the specified routine. 3953 To make the normal indirect-subroutine calling convention work, 3954 the trampoline must look like a function descriptor; the first 3955 word being the target address and the second being the target's 3956 global pointer. 3957 3958 We abuse the concept of a global pointer by arranging for it 3959 to point to the data we need to load. The complete trampoline 3960 has the following form: 3961 3962 +-------------------+ \ 3963 TRAMP: | __ia64_trampoline | | 3964 +-------------------+ > fake function descriptor 3965 | TRAMP+16 | | 3966 +-------------------+ / 3967 | target descriptor | 3968 +-------------------+ 3969 | static link | 3970 +-------------------+ 3971*/ 3972 3973static void 3974ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain) 3975{ 3976 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 3977 rtx addr, addr_reg, tramp, eight = GEN_INT (8); 3978 3979 /* The Intel assembler requires that the global __ia64_trampoline symbol 3980 be declared explicitly */ 3981 if (!TARGET_GNU_AS) 3982 { 3983 static bool declared_ia64_trampoline = false; 3984 3985 if (!declared_ia64_trampoline) 3986 { 3987 declared_ia64_trampoline = true; 3988 (*targetm.asm_out.globalize_label) (asm_out_file, 3989 "__ia64_trampoline"); 3990 } 3991 } 3992 3993 /* Make sure addresses are Pmode even if we are in ILP32 mode. */ 3994 addr = convert_memory_address (Pmode, XEXP (m_tramp, 0)); 3995 fnaddr = convert_memory_address (Pmode, fnaddr); 3996 static_chain = convert_memory_address (Pmode, static_chain); 3997 3998 /* Load up our iterator. */ 3999 addr_reg = copy_to_reg (addr); 4000 m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0); 4001 4002 /* The first two words are the fake descriptor: 4003 __ia64_trampoline, ADDR+16. */ 4004 tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline"); 4005 if (TARGET_ABI_OPEN_VMS) 4006 { 4007 /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity 4008 in the Macro-32 compiler) and changed the semantics of the LTOFF22 4009 relocation against function symbols to make it identical to the 4010 LTOFF_FPTR22 relocation. Emit the latter directly to stay within 4011 strict ELF and dereference to get the bare code address. */ 4012 rtx reg = gen_reg_rtx (Pmode); 4013 SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION; 4014 emit_move_insn (reg, tramp); 4015 emit_move_insn (reg, gen_rtx_MEM (Pmode, reg)); 4016 tramp = reg; 4017 } 4018 emit_move_insn (m_tramp, tramp); 4019 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4020 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4021 4022 emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (addr, 16))); 4023 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4024 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4025 4026 /* The third word is the target descriptor. */ 4027 emit_move_insn (m_tramp, force_reg (Pmode, fnaddr)); 4028 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 4029 m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8); 4030 4031 /* The fourth word is the static chain. */ 4032 emit_move_insn (m_tramp, static_chain); 4033} 4034 4035/* Do any needed setup for a variadic function. CUM has not been updated 4036 for the last named argument which has type TYPE and mode MODE. 4037 4038 We generate the actual spill instructions during prologue generation. */ 4039 4040static void 4041ia64_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4042 tree type, int * pretend_size, 4043 int second_time ATTRIBUTE_UNUSED) 4044{ 4045 CUMULATIVE_ARGS next_cum = *cum; 4046 4047 /* Skip the current argument. */ 4048 ia64_function_arg_advance (&next_cum, mode, type, 1); 4049 4050 if (next_cum.words < MAX_ARGUMENT_SLOTS) 4051 { 4052 int n = MAX_ARGUMENT_SLOTS - next_cum.words; 4053 *pretend_size = n * UNITS_PER_WORD; 4054 cfun->machine->n_varargs = n; 4055 } 4056} 4057 4058/* Check whether TYPE is a homogeneous floating point aggregate. If 4059 it is, return the mode of the floating point type that appears 4060 in all leafs. If it is not, return VOIDmode. 4061 4062 An aggregate is a homogeneous floating point aggregate is if all 4063 fields/elements in it have the same floating point type (e.g, 4064 SFmode). 128-bit quad-precision floats are excluded. 4065 4066 Variable sized aggregates should never arrive here, since we should 4067 have already decided to pass them by reference. Top-level zero-sized 4068 aggregates are excluded because our parallels crash the middle-end. */ 4069 4070static enum machine_mode 4071hfa_element_mode (const_tree type, bool nested) 4072{ 4073 enum machine_mode element_mode = VOIDmode; 4074 enum machine_mode mode; 4075 enum tree_code code = TREE_CODE (type); 4076 int know_element_mode = 0; 4077 tree t; 4078 4079 if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type)))) 4080 return VOIDmode; 4081 4082 switch (code) 4083 { 4084 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 4085 case BOOLEAN_TYPE: case POINTER_TYPE: 4086 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 4087 case LANG_TYPE: case FUNCTION_TYPE: 4088 return VOIDmode; 4089 4090 /* Fortran complex types are supposed to be HFAs, so we need to handle 4091 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 4092 types though. */ 4093 case COMPLEX_TYPE: 4094 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 4095 && TYPE_MODE (type) != TCmode) 4096 return GET_MODE_INNER (TYPE_MODE (type)); 4097 else 4098 return VOIDmode; 4099 4100 case REAL_TYPE: 4101 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 4102 mode if this is contained within an aggregate. */ 4103 if (nested && TYPE_MODE (type) != TFmode) 4104 return TYPE_MODE (type); 4105 else 4106 return VOIDmode; 4107 4108 case ARRAY_TYPE: 4109 return hfa_element_mode (TREE_TYPE (type), 1); 4110 4111 case RECORD_TYPE: 4112 case UNION_TYPE: 4113 case QUAL_UNION_TYPE: 4114 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) 4115 { 4116 if (TREE_CODE (t) != FIELD_DECL) 4117 continue; 4118 4119 mode = hfa_element_mode (TREE_TYPE (t), 1); 4120 if (know_element_mode) 4121 { 4122 if (mode != element_mode) 4123 return VOIDmode; 4124 } 4125 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 4126 return VOIDmode; 4127 else 4128 { 4129 know_element_mode = 1; 4130 element_mode = mode; 4131 } 4132 } 4133 return element_mode; 4134 4135 default: 4136 /* If we reach here, we probably have some front-end specific type 4137 that the backend doesn't know about. This can happen via the 4138 aggregate_value_p call in init_function_start. All we can do is 4139 ignore unknown tree types. */ 4140 return VOIDmode; 4141 } 4142 4143 return VOIDmode; 4144} 4145 4146/* Return the number of words required to hold a quantity of TYPE and MODE 4147 when passed as an argument. */ 4148static int 4149ia64_function_arg_words (tree type, enum machine_mode mode) 4150{ 4151 int words; 4152 4153 if (mode == BLKmode) 4154 words = int_size_in_bytes (type); 4155 else 4156 words = GET_MODE_SIZE (mode); 4157 4158 return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD; /* round up */ 4159} 4160 4161/* Return the number of registers that should be skipped so the current 4162 argument (described by TYPE and WORDS) will be properly aligned. 4163 4164 Integer and float arguments larger than 8 bytes start at the next 4165 even boundary. Aggregates larger than 8 bytes start at the next 4166 even boundary if the aggregate has 16 byte alignment. Note that 4167 in the 32-bit ABI, TImode and TFmode have only 8-byte alignment 4168 but are still to be aligned in registers. 4169 4170 ??? The ABI does not specify how to handle aggregates with 4171 alignment from 9 to 15 bytes, or greater than 16. We handle them 4172 all as if they had 16 byte alignment. Such aggregates can occur 4173 only if gcc extensions are used. */ 4174static int 4175ia64_function_arg_offset (CUMULATIVE_ARGS *cum, tree type, int words) 4176{ 4177 /* No registers are skipped on VMS. */ 4178 if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0) 4179 return 0; 4180 4181 if (type 4182 && TREE_CODE (type) != INTEGER_TYPE 4183 && TREE_CODE (type) != REAL_TYPE) 4184 return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT; 4185 else 4186 return words > 1; 4187} 4188 4189/* Return rtx for register where argument is passed, or zero if it is passed 4190 on the stack. */ 4191/* ??? 128-bit quad-precision floats are always passed in general 4192 registers. */ 4193 4194rtx 4195ia64_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, 4196 int named, int incoming) 4197{ 4198 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 4199 int words = ia64_function_arg_words (type, mode); 4200 int offset = ia64_function_arg_offset (cum, type, words); 4201 enum machine_mode hfa_mode = VOIDmode; 4202 4203 /* For OPEN VMS, emit the instruction setting up the argument register here, 4204 when we know this will be together with the other arguments setup related 4205 insns. This is not the conceptually best place to do this, but this is 4206 the easiest as we have convenient access to cumulative args info. */ 4207 4208 if (TARGET_ABI_OPEN_VMS && mode == VOIDmode && type == void_type_node 4209 && named == 1) 4210 { 4211 unsigned HOST_WIDE_INT regval = cum->words; 4212 int i; 4213 4214 for (i = 0; i < 8; i++) 4215 regval |= ((int) cum->atypes[i]) << (i * 3 + 8); 4216 4217 emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)), 4218 GEN_INT (regval)); 4219 } 4220 4221 /* If all argument slots are used, then it must go on the stack. */ 4222 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4223 return 0; 4224 4225 /* Check for and handle homogeneous FP aggregates. */ 4226 if (type) 4227 hfa_mode = hfa_element_mode (type, 0); 4228 4229 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4230 and unprototyped hfas are passed specially. */ 4231 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 4232 { 4233 rtx loc[16]; 4234 int i = 0; 4235 int fp_regs = cum->fp_regs; 4236 int int_regs = cum->words + offset; 4237 int hfa_size = GET_MODE_SIZE (hfa_mode); 4238 int byte_size; 4239 int args_byte_size; 4240 4241 /* If prototyped, pass it in FR regs then GR regs. 4242 If not prototyped, pass it in both FR and GR regs. 4243 4244 If this is an SFmode aggregate, then it is possible to run out of 4245 FR regs while GR regs are still left. In that case, we pass the 4246 remaining part in the GR regs. */ 4247 4248 /* Fill the FP regs. We do this always. We stop if we reach the end 4249 of the argument, the last FP register, or the last argument slot. */ 4250 4251 byte_size = ((mode == BLKmode) 4252 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4253 args_byte_size = int_regs * UNITS_PER_WORD; 4254 offset = 0; 4255 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 4256 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 4257 { 4258 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4259 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 4260 + fp_regs)), 4261 GEN_INT (offset)); 4262 offset += hfa_size; 4263 args_byte_size += hfa_size; 4264 fp_regs++; 4265 } 4266 4267 /* If no prototype, then the whole thing must go in GR regs. */ 4268 if (! cum->prototype) 4269 offset = 0; 4270 /* If this is an SFmode aggregate, then we might have some left over 4271 that needs to go in GR regs. */ 4272 else if (byte_size != offset) 4273 int_regs += offset / UNITS_PER_WORD; 4274 4275 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 4276 4277 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 4278 { 4279 enum machine_mode gr_mode = DImode; 4280 unsigned int gr_size; 4281 4282 /* If we have an odd 4 byte hunk because we ran out of FR regs, 4283 then this goes in a GR reg left adjusted/little endian, right 4284 adjusted/big endian. */ 4285 /* ??? Currently this is handled wrong, because 4-byte hunks are 4286 always right adjusted/little endian. */ 4287 if (offset & 0x4) 4288 gr_mode = SImode; 4289 /* If we have an even 4 byte hunk because the aggregate is a 4290 multiple of 4 bytes in size, then this goes in a GR reg right 4291 adjusted/little endian. */ 4292 else if (byte_size - offset == 4) 4293 gr_mode = SImode; 4294 4295 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4296 gen_rtx_REG (gr_mode, (basereg 4297 + int_regs)), 4298 GEN_INT (offset)); 4299 4300 gr_size = GET_MODE_SIZE (gr_mode); 4301 offset += gr_size; 4302 if (gr_size == UNITS_PER_WORD 4303 || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0)) 4304 int_regs++; 4305 else if (gr_size > UNITS_PER_WORD) 4306 int_regs += gr_size / UNITS_PER_WORD; 4307 } 4308 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 4309 } 4310 4311 /* On OpenVMS variable argument is either in Rn or Fn. */ 4312 else if (TARGET_ABI_OPEN_VMS && named == 0) 4313 { 4314 if (FLOAT_MODE_P (mode)) 4315 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->words); 4316 else 4317 return gen_rtx_REG (mode, basereg + cum->words); 4318 } 4319 4320 /* Integral and aggregates go in general registers. If we have run out of 4321 FR registers, then FP values must also go in general registers. This can 4322 happen when we have a SFmode HFA. */ 4323 else if (mode == TFmode || mode == TCmode 4324 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 4325 { 4326 int byte_size = ((mode == BLKmode) 4327 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4328 if (BYTES_BIG_ENDIAN 4329 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 4330 && byte_size < UNITS_PER_WORD 4331 && byte_size > 0) 4332 { 4333 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4334 gen_rtx_REG (DImode, 4335 (basereg + cum->words 4336 + offset)), 4337 const0_rtx); 4338 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 4339 } 4340 else 4341 return gen_rtx_REG (mode, basereg + cum->words + offset); 4342 4343 } 4344 4345 /* If there is a prototype, then FP values go in a FR register when 4346 named, and in a GR register when unnamed. */ 4347 else if (cum->prototype) 4348 { 4349 if (named) 4350 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 4351 /* In big-endian mode, an anonymous SFmode value must be represented 4352 as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force 4353 the value into the high half of the general register. */ 4354 else if (BYTES_BIG_ENDIAN && mode == SFmode) 4355 return gen_rtx_PARALLEL (mode, 4356 gen_rtvec (1, 4357 gen_rtx_EXPR_LIST (VOIDmode, 4358 gen_rtx_REG (DImode, basereg + cum->words + offset), 4359 const0_rtx))); 4360 else 4361 return gen_rtx_REG (mode, basereg + cum->words + offset); 4362 } 4363 /* If there is no prototype, then FP values go in both FR and GR 4364 registers. */ 4365 else 4366 { 4367 /* See comment above. */ 4368 enum machine_mode inner_mode = 4369 (BYTES_BIG_ENDIAN && mode == SFmode) ? DImode : mode; 4370 4371 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 4372 gen_rtx_REG (mode, (FR_ARG_FIRST 4373 + cum->fp_regs)), 4374 const0_rtx); 4375 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4376 gen_rtx_REG (inner_mode, 4377 (basereg + cum->words 4378 + offset)), 4379 const0_rtx); 4380 4381 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 4382 } 4383} 4384 4385/* Return number of bytes, at the beginning of the argument, that must be 4386 put in registers. 0 is the argument is entirely in registers or entirely 4387 in memory. */ 4388 4389static int 4390ia64_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4391 tree type, bool named ATTRIBUTE_UNUSED) 4392{ 4393 int words = ia64_function_arg_words (type, mode); 4394 int offset = ia64_function_arg_offset (cum, type, words); 4395 4396 /* If all argument slots are used, then it must go on the stack. */ 4397 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 4398 return 0; 4399 4400 /* It doesn't matter whether the argument goes in FR or GR regs. If 4401 it fits within the 8 argument slots, then it goes entirely in 4402 registers. If it extends past the last argument slot, then the rest 4403 goes on the stack. */ 4404 4405 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 4406 return 0; 4407 4408 return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD; 4409} 4410 4411/* Return ivms_arg_type based on machine_mode. */ 4412 4413static enum ivms_arg_type 4414ia64_arg_type (enum machine_mode mode) 4415{ 4416 switch (mode) 4417 { 4418 case SFmode: 4419 return FS; 4420 case DFmode: 4421 return FT; 4422 default: 4423 return I64; 4424 } 4425} 4426 4427/* Update CUM to point after this argument. This is patterned after 4428 ia64_function_arg. */ 4429 4430void 4431ia64_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 4432 tree type, int named) 4433{ 4434 int words = ia64_function_arg_words (type, mode); 4435 int offset = ia64_function_arg_offset (cum, type, words); 4436 enum machine_mode hfa_mode = VOIDmode; 4437 4438 /* If all arg slots are already full, then there is nothing to do. */ 4439 if (cum->words >= MAX_ARGUMENT_SLOTS) 4440 { 4441 cum->words += words + offset; 4442 return; 4443 } 4444 4445 cum->atypes[cum->words] = ia64_arg_type (mode); 4446 cum->words += words + offset; 4447 4448 /* Check for and handle homogeneous FP aggregates. */ 4449 if (type) 4450 hfa_mode = hfa_element_mode (type, 0); 4451 4452 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 4453 and unprototyped hfas are passed specially. */ 4454 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 4455 { 4456 int fp_regs = cum->fp_regs; 4457 /* This is the original value of cum->words + offset. */ 4458 int int_regs = cum->words - words; 4459 int hfa_size = GET_MODE_SIZE (hfa_mode); 4460 int byte_size; 4461 int args_byte_size; 4462 4463 /* If prototyped, pass it in FR regs then GR regs. 4464 If not prototyped, pass it in both FR and GR regs. 4465 4466 If this is an SFmode aggregate, then it is possible to run out of 4467 FR regs while GR regs are still left. In that case, we pass the 4468 remaining part in the GR regs. */ 4469 4470 /* Fill the FP regs. We do this always. We stop if we reach the end 4471 of the argument, the last FP register, or the last argument slot. */ 4472 4473 byte_size = ((mode == BLKmode) 4474 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4475 args_byte_size = int_regs * UNITS_PER_WORD; 4476 offset = 0; 4477 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 4478 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 4479 { 4480 offset += hfa_size; 4481 args_byte_size += hfa_size; 4482 fp_regs++; 4483 } 4484 4485 cum->fp_regs = fp_regs; 4486 } 4487 4488 /* On OpenVMS variable argument is either in Rn or Fn. */ 4489 else if (TARGET_ABI_OPEN_VMS && named == 0) 4490 { 4491 cum->int_regs = cum->words; 4492 cum->fp_regs = cum->words; 4493 } 4494 4495 /* Integral and aggregates go in general registers. So do TFmode FP values. 4496 If we have run out of FR registers, then other FP values must also go in 4497 general registers. This can happen when we have a SFmode HFA. */ 4498 else if (mode == TFmode || mode == TCmode 4499 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 4500 cum->int_regs = cum->words; 4501 4502 /* If there is a prototype, then FP values go in a FR register when 4503 named, and in a GR register when unnamed. */ 4504 else if (cum->prototype) 4505 { 4506 if (! named) 4507 cum->int_regs = cum->words; 4508 else 4509 /* ??? Complex types should not reach here. */ 4510 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 4511 } 4512 /* If there is no prototype, then FP values go in both FR and GR 4513 registers. */ 4514 else 4515 { 4516 /* ??? Complex types should not reach here. */ 4517 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 4518 cum->int_regs = cum->words; 4519 } 4520} 4521 4522/* Arguments with alignment larger than 8 bytes start at the next even 4523 boundary. On ILP32 HPUX, TFmode arguments start on next even boundary 4524 even though their normal alignment is 8 bytes. See ia64_function_arg. */ 4525 4526int 4527ia64_function_arg_boundary (enum machine_mode mode, tree type) 4528{ 4529 4530 if (mode == TFmode && TARGET_HPUX && TARGET_ILP32) 4531 return PARM_BOUNDARY * 2; 4532 4533 if (type) 4534 { 4535 if (TYPE_ALIGN (type) > PARM_BOUNDARY) 4536 return PARM_BOUNDARY * 2; 4537 else 4538 return PARM_BOUNDARY; 4539 } 4540 4541 if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY) 4542 return PARM_BOUNDARY * 2; 4543 else 4544 return PARM_BOUNDARY; 4545} 4546 4547/* True if it is OK to do sibling call optimization for the specified 4548 call expression EXP. DECL will be the called function, or NULL if 4549 this is an indirect call. */ 4550static bool 4551ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 4552{ 4553 /* We can't perform a sibcall if the current function has the syscall_linkage 4554 attribute. */ 4555 if (lookup_attribute ("syscall_linkage", 4556 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 4557 return false; 4558 4559 /* We must always return with our current GP. This means we can 4560 only sibcall to functions defined in the current module unless 4561 TARGET_CONST_GP is set to true. */ 4562 return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP; 4563} 4564 4565 4566/* Implement va_arg. */ 4567 4568static tree 4569ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 4570 gimple_seq *post_p) 4571{ 4572 /* Variable sized types are passed by reference. */ 4573 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 4574 { 4575 tree ptrtype = build_pointer_type (type); 4576 tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); 4577 return build_va_arg_indirect_ref (addr); 4578 } 4579 4580 /* Aggregate arguments with alignment larger than 8 bytes start at 4581 the next even boundary. Integer and floating point arguments 4582 do so if they are larger than 8 bytes, whether or not they are 4583 also aligned larger than 8 bytes. */ 4584 if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE) 4585 ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 4586 { 4587 tree t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (valist), valist, 4588 size_int (2 * UNITS_PER_WORD - 1)); 4589 t = fold_convert (sizetype, t); 4590 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4591 size_int (-2 * UNITS_PER_WORD)); 4592 t = fold_convert (TREE_TYPE (valist), t); 4593 gimplify_assign (unshare_expr (valist), t, pre_p); 4594 } 4595 4596 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4597} 4598 4599/* Return 1 if function return value returned in memory. Return 0 if it is 4600 in a register. */ 4601 4602static bool 4603ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED) 4604{ 4605 enum machine_mode mode; 4606 enum machine_mode hfa_mode; 4607 HOST_WIDE_INT byte_size; 4608 4609 mode = TYPE_MODE (valtype); 4610 byte_size = GET_MODE_SIZE (mode); 4611 if (mode == BLKmode) 4612 { 4613 byte_size = int_size_in_bytes (valtype); 4614 if (byte_size < 0) 4615 return true; 4616 } 4617 4618 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 4619 4620 hfa_mode = hfa_element_mode (valtype, 0); 4621 if (hfa_mode != VOIDmode) 4622 { 4623 int hfa_size = GET_MODE_SIZE (hfa_mode); 4624 4625 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 4626 return true; 4627 else 4628 return false; 4629 } 4630 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 4631 return true; 4632 else 4633 return false; 4634} 4635 4636/* Return rtx for register that holds the function return value. */ 4637 4638rtx 4639ia64_function_value (const_tree valtype, const_tree func) 4640{ 4641 enum machine_mode mode; 4642 enum machine_mode hfa_mode; 4643 int unsignedp; 4644 4645 mode = TYPE_MODE (valtype); 4646 hfa_mode = hfa_element_mode (valtype, 0); 4647 4648 if (hfa_mode != VOIDmode) 4649 { 4650 rtx loc[8]; 4651 int i; 4652 int hfa_size; 4653 int byte_size; 4654 int offset; 4655 4656 hfa_size = GET_MODE_SIZE (hfa_mode); 4657 byte_size = ((mode == BLKmode) 4658 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 4659 offset = 0; 4660 for (i = 0; offset < byte_size; i++) 4661 { 4662 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4663 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 4664 GEN_INT (offset)); 4665 offset += hfa_size; 4666 } 4667 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 4668 } 4669 else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode) 4670 return gen_rtx_REG (mode, FR_ARG_FIRST); 4671 else 4672 { 4673 bool need_parallel = false; 4674 4675 /* In big-endian mode, we need to manage the layout of aggregates 4676 in the registers so that we get the bits properly aligned in 4677 the highpart of the registers. */ 4678 if (BYTES_BIG_ENDIAN 4679 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 4680 need_parallel = true; 4681 4682 /* Something like struct S { long double x; char a[0] } is not an 4683 HFA structure, and therefore doesn't go in fp registers. But 4684 the middle-end will give it XFmode anyway, and XFmode values 4685 don't normally fit in integer registers. So we need to smuggle 4686 the value inside a parallel. */ 4687 else if (mode == XFmode || mode == XCmode || mode == RFmode) 4688 need_parallel = true; 4689 4690 if (need_parallel) 4691 { 4692 rtx loc[8]; 4693 int offset; 4694 int bytesize; 4695 int i; 4696 4697 offset = 0; 4698 bytesize = int_size_in_bytes (valtype); 4699 /* An empty PARALLEL is invalid here, but the return value 4700 doesn't matter for empty structs. */ 4701 if (bytesize == 0) 4702 return gen_rtx_REG (mode, GR_RET_FIRST); 4703 for (i = 0; offset < bytesize; i++) 4704 { 4705 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 4706 gen_rtx_REG (DImode, 4707 GR_RET_FIRST + i), 4708 GEN_INT (offset)); 4709 offset += UNITS_PER_WORD; 4710 } 4711 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 4712 } 4713 4714 mode = ia64_promote_function_mode (valtype, mode, &unsignedp, 4715 func ? TREE_TYPE (func) : NULL_TREE, 4716 true); 4717 4718 return gen_rtx_REG (mode, GR_RET_FIRST); 4719 } 4720} 4721 4722/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 4723 We need to emit DTP-relative relocations. */ 4724 4725static void 4726ia64_output_dwarf_dtprel (FILE *file, int size, rtx x) 4727{ 4728 gcc_assert (size == 4 || size == 8); 4729 if (size == 4) 4730 fputs ("\tdata4.ua\t@dtprel(", file); 4731 else 4732 fputs ("\tdata8.ua\t@dtprel(", file); 4733 output_addr_const (file, x); 4734 fputs (")", file); 4735} 4736 4737/* Print a memory address as an operand to reference that memory location. */ 4738 4739/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 4740 also call this from ia64_print_operand for memory addresses. */ 4741 4742void 4743ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, 4744 rtx address ATTRIBUTE_UNUSED) 4745{ 4746} 4747 4748/* Print an operand to an assembler instruction. 4749 C Swap and print a comparison operator. 4750 D Print an FP comparison operator. 4751 E Print 32 - constant, for SImode shifts as extract. 4752 e Print 64 - constant, for DImode rotates. 4753 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 4754 a floating point register emitted normally. 4755 G A floating point constant. 4756 I Invert a predicate register by adding 1. 4757 J Select the proper predicate register for a condition. 4758 j Select the inverse predicate register for a condition. 4759 O Append .acq for volatile load. 4760 P Postincrement of a MEM. 4761 Q Append .rel for volatile store. 4762 R Print .s .d or nothing for a single, double or no truncation. 4763 S Shift amount for shladd instruction. 4764 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 4765 for Intel assembler. 4766 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 4767 for Intel assembler. 4768 X A pair of floating point registers. 4769 r Print register name, or constant 0 as r0. HP compatibility for 4770 Linux kernel. 4771 v Print vector constant value as an 8-byte integer value. */ 4772 4773void 4774ia64_print_operand (FILE * file, rtx x, int code) 4775{ 4776 const char *str; 4777 4778 switch (code) 4779 { 4780 case 0: 4781 /* Handled below. */ 4782 break; 4783 4784 case 'C': 4785 { 4786 enum rtx_code c = swap_condition (GET_CODE (x)); 4787 fputs (GET_RTX_NAME (c), file); 4788 return; 4789 } 4790 4791 case 'D': 4792 switch (GET_CODE (x)) 4793 { 4794 case NE: 4795 str = "neq"; 4796 break; 4797 case UNORDERED: 4798 str = "unord"; 4799 break; 4800 case ORDERED: 4801 str = "ord"; 4802 break; 4803 case UNLT: 4804 str = "nge"; 4805 break; 4806 case UNLE: 4807 str = "ngt"; 4808 break; 4809 case UNGT: 4810 str = "nle"; 4811 break; 4812 case UNGE: 4813 str = "nlt"; 4814 break; 4815 default: 4816 str = GET_RTX_NAME (GET_CODE (x)); 4817 break; 4818 } 4819 fputs (str, file); 4820 return; 4821 4822 case 'E': 4823 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 4824 return; 4825 4826 case 'e': 4827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 4828 return; 4829 4830 case 'F': 4831 if (x == CONST0_RTX (GET_MODE (x))) 4832 str = reg_names [FR_REG (0)]; 4833 else if (x == CONST1_RTX (GET_MODE (x))) 4834 str = reg_names [FR_REG (1)]; 4835 else 4836 { 4837 gcc_assert (GET_CODE (x) == REG); 4838 str = reg_names [REGNO (x)]; 4839 } 4840 fputs (str, file); 4841 return; 4842 4843 case 'G': 4844 { 4845 long val[4]; 4846 REAL_VALUE_TYPE rv; 4847 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 4848 real_to_target (val, &rv, GET_MODE (x)); 4849 if (GET_MODE (x) == SFmode) 4850 fprintf (file, "0x%08lx", val[0] & 0xffffffff); 4851 else if (GET_MODE (x) == DFmode) 4852 fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1]) 4853 & 0xffffffff, 4854 (WORDS_BIG_ENDIAN ? val[1] : val[0]) 4855 & 0xffffffff); 4856 else 4857 output_operand_lossage ("invalid %%G mode"); 4858 } 4859 return; 4860 4861 case 'I': 4862 fputs (reg_names [REGNO (x) + 1], file); 4863 return; 4864 4865 case 'J': 4866 case 'j': 4867 { 4868 unsigned int regno = REGNO (XEXP (x, 0)); 4869 if (GET_CODE (x) == EQ) 4870 regno += 1; 4871 if (code == 'j') 4872 regno ^= 1; 4873 fputs (reg_names [regno], file); 4874 } 4875 return; 4876 4877 case 'O': 4878 if (MEM_VOLATILE_P (x)) 4879 fputs(".acq", file); 4880 return; 4881 4882 case 'P': 4883 { 4884 HOST_WIDE_INT value; 4885 4886 switch (GET_CODE (XEXP (x, 0))) 4887 { 4888 default: 4889 return; 4890 4891 case POST_MODIFY: 4892 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 4893 if (GET_CODE (x) == CONST_INT) 4894 value = INTVAL (x); 4895 else 4896 { 4897 gcc_assert (GET_CODE (x) == REG); 4898 fprintf (file, ", %s", reg_names[REGNO (x)]); 4899 return; 4900 } 4901 break; 4902 4903 case POST_INC: 4904 value = GET_MODE_SIZE (GET_MODE (x)); 4905 break; 4906 4907 case POST_DEC: 4908 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 4909 break; 4910 } 4911 4912 fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value); 4913 return; 4914 } 4915 4916 case 'Q': 4917 if (MEM_VOLATILE_P (x)) 4918 fputs(".rel", file); 4919 return; 4920 4921 case 'R': 4922 if (x == CONST0_RTX (GET_MODE (x))) 4923 fputs(".s", file); 4924 else if (x == CONST1_RTX (GET_MODE (x))) 4925 fputs(".d", file); 4926 else if (x == CONST2_RTX (GET_MODE (x))) 4927 ; 4928 else 4929 output_operand_lossage ("invalid %%R value"); 4930 return; 4931 4932 case 'S': 4933 fprintf (file, "%d", exact_log2 (INTVAL (x))); 4934 return; 4935 4936 case 'T': 4937 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 4938 { 4939 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 4940 return; 4941 } 4942 break; 4943 4944 case 'U': 4945 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 4946 { 4947 const char *prefix = "0x"; 4948 if (INTVAL (x) & 0x80000000) 4949 { 4950 fprintf (file, "0xffffffff"); 4951 prefix = ""; 4952 } 4953 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 4954 return; 4955 } 4956 break; 4957 4958 case 'X': 4959 { 4960 unsigned int regno = REGNO (x); 4961 fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]); 4962 } 4963 return; 4964 4965 case 'r': 4966 /* If this operand is the constant zero, write it as register zero. 4967 Any register, zero, or CONST_INT value is OK here. */ 4968 if (GET_CODE (x) == REG) 4969 fputs (reg_names[REGNO (x)], file); 4970 else if (x == CONST0_RTX (GET_MODE (x))) 4971 fputs ("r0", file); 4972 else if (GET_CODE (x) == CONST_INT) 4973 output_addr_const (file, x); 4974 else 4975 output_operand_lossage ("invalid %%r value"); 4976 return; 4977 4978 case 'v': 4979 gcc_assert (GET_CODE (x) == CONST_VECTOR); 4980 x = simplify_subreg (DImode, x, GET_MODE (x), 0); 4981 break; 4982 4983 case '+': 4984 { 4985 const char *which; 4986 4987 /* For conditional branches, returns or calls, substitute 4988 sptk, dptk, dpnt, or spnt for %s. */ 4989 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 4990 if (x) 4991 { 4992 int pred_val = INTVAL (XEXP (x, 0)); 4993 4994 /* Guess top and bottom 10% statically predicted. */ 4995 if (pred_val < REG_BR_PROB_BASE / 50 4996 && br_prob_note_reliable_p (x)) 4997 which = ".spnt"; 4998 else if (pred_val < REG_BR_PROB_BASE / 2) 4999 which = ".dpnt"; 5000 else if (pred_val < REG_BR_PROB_BASE / 100 * 98 5001 || !br_prob_note_reliable_p (x)) 5002 which = ".dptk"; 5003 else 5004 which = ".sptk"; 5005 } 5006 else if (GET_CODE (current_output_insn) == CALL_INSN) 5007 which = ".sptk"; 5008 else 5009 which = ".dptk"; 5010 5011 fputs (which, file); 5012 return; 5013 } 5014 5015 case ',': 5016 x = current_insn_predicate; 5017 if (x) 5018 { 5019 unsigned int regno = REGNO (XEXP (x, 0)); 5020 if (GET_CODE (x) == EQ) 5021 regno += 1; 5022 fprintf (file, "(%s) ", reg_names [regno]); 5023 } 5024 return; 5025 5026 default: 5027 output_operand_lossage ("ia64_print_operand: unknown code"); 5028 return; 5029 } 5030 5031 switch (GET_CODE (x)) 5032 { 5033 /* This happens for the spill/restore instructions. */ 5034 case POST_INC: 5035 case POST_DEC: 5036 case POST_MODIFY: 5037 x = XEXP (x, 0); 5038 /* ... fall through ... */ 5039 5040 case REG: 5041 fputs (reg_names [REGNO (x)], file); 5042 break; 5043 5044 case MEM: 5045 { 5046 rtx addr = XEXP (x, 0); 5047 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) 5048 addr = XEXP (addr, 0); 5049 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 5050 break; 5051 } 5052 5053 default: 5054 output_addr_const (file, x); 5055 break; 5056 } 5057 5058 return; 5059} 5060 5061/* Compute a (partial) cost for rtx X. Return true if the complete 5062 cost has been computed, and false if subexpressions should be 5063 scanned. In either case, *TOTAL contains the cost result. */ 5064/* ??? This is incomplete. */ 5065 5066static bool 5067ia64_rtx_costs (rtx x, int code, int outer_code, int *total, 5068 bool speed ATTRIBUTE_UNUSED) 5069{ 5070 switch (code) 5071 { 5072 case CONST_INT: 5073 switch (outer_code) 5074 { 5075 case SET: 5076 *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1); 5077 return true; 5078 case PLUS: 5079 if (satisfies_constraint_I (x)) 5080 *total = 0; 5081 else if (satisfies_constraint_J (x)) 5082 *total = 1; 5083 else 5084 *total = COSTS_N_INSNS (1); 5085 return true; 5086 default: 5087 if (satisfies_constraint_K (x) || satisfies_constraint_L (x)) 5088 *total = 0; 5089 else 5090 *total = COSTS_N_INSNS (1); 5091 return true; 5092 } 5093 5094 case CONST_DOUBLE: 5095 *total = COSTS_N_INSNS (1); 5096 return true; 5097 5098 case CONST: 5099 case SYMBOL_REF: 5100 case LABEL_REF: 5101 *total = COSTS_N_INSNS (3); 5102 return true; 5103 5104 case MULT: 5105 /* For multiplies wider than HImode, we have to go to the FPU, 5106 which normally involves copies. Plus there's the latency 5107 of the multiply itself, and the latency of the instructions to 5108 transfer integer regs to FP regs. */ 5109 /* ??? Check for FP mode. */ 5110 if (GET_MODE_SIZE (GET_MODE (x)) > 2) 5111 *total = COSTS_N_INSNS (10); 5112 else 5113 *total = COSTS_N_INSNS (2); 5114 return true; 5115 5116 case PLUS: 5117 case MINUS: 5118 case ASHIFT: 5119 case ASHIFTRT: 5120 case LSHIFTRT: 5121 *total = COSTS_N_INSNS (1); 5122 return true; 5123 5124 case DIV: 5125 case UDIV: 5126 case MOD: 5127 case UMOD: 5128 /* We make divide expensive, so that divide-by-constant will be 5129 optimized to a multiply. */ 5130 *total = COSTS_N_INSNS (60); 5131 return true; 5132 5133 default: 5134 return false; 5135 } 5136} 5137 5138/* Calculate the cost of moving data from a register in class FROM to 5139 one in class TO, using MODE. */ 5140 5141int 5142ia64_register_move_cost (enum machine_mode mode, enum reg_class from, 5143 enum reg_class to) 5144{ 5145 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 5146 if (to == ADDL_REGS) 5147 to = GR_REGS; 5148 if (from == ADDL_REGS) 5149 from = GR_REGS; 5150 5151 /* All costs are symmetric, so reduce cases by putting the 5152 lower number class as the destination. */ 5153 if (from < to) 5154 { 5155 enum reg_class tmp = to; 5156 to = from, from = tmp; 5157 } 5158 5159 /* Moving from FR<->GR in XFmode must be more expensive than 2, 5160 so that we get secondary memory reloads. Between FR_REGS, 5161 we have to make this at least as expensive as MEMORY_MOVE_COST 5162 to avoid spectacularly poor register class preferencing. */ 5163 if (mode == XFmode || mode == RFmode) 5164 { 5165 if (to != GR_REGS || from != GR_REGS) 5166 return MEMORY_MOVE_COST (mode, to, 0); 5167 else 5168 return 3; 5169 } 5170 5171 switch (to) 5172 { 5173 case PR_REGS: 5174 /* Moving between PR registers takes two insns. */ 5175 if (from == PR_REGS) 5176 return 3; 5177 /* Moving between PR and anything but GR is impossible. */ 5178 if (from != GR_REGS) 5179 return MEMORY_MOVE_COST (mode, to, 0); 5180 break; 5181 5182 case BR_REGS: 5183 /* Moving between BR and anything but GR is impossible. */ 5184 if (from != GR_REGS && from != GR_AND_BR_REGS) 5185 return MEMORY_MOVE_COST (mode, to, 0); 5186 break; 5187 5188 case AR_I_REGS: 5189 case AR_M_REGS: 5190 /* Moving between AR and anything but GR is impossible. */ 5191 if (from != GR_REGS) 5192 return MEMORY_MOVE_COST (mode, to, 0); 5193 break; 5194 5195 case GR_REGS: 5196 case FR_REGS: 5197 case FP_REGS: 5198 case GR_AND_FR_REGS: 5199 case GR_AND_BR_REGS: 5200 case ALL_REGS: 5201 break; 5202 5203 default: 5204 gcc_unreachable (); 5205 } 5206 5207 return 2; 5208} 5209 5210/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on RCLASS 5211 to use when copying X into that class. */ 5212 5213enum reg_class 5214ia64_preferred_reload_class (rtx x, enum reg_class rclass) 5215{ 5216 switch (rclass) 5217 { 5218 case FR_REGS: 5219 case FP_REGS: 5220 /* Don't allow volatile mem reloads into floating point registers. 5221 This is defined to force reload to choose the r/m case instead 5222 of the f/f case when reloading (set (reg fX) (mem/v)). */ 5223 if (MEM_P (x) && MEM_VOLATILE_P (x)) 5224 return NO_REGS; 5225 5226 /* Force all unrecognized constants into the constant pool. */ 5227 if (CONSTANT_P (x)) 5228 return NO_REGS; 5229 break; 5230 5231 case AR_M_REGS: 5232 case AR_I_REGS: 5233 if (!OBJECT_P (x)) 5234 return NO_REGS; 5235 break; 5236 5237 default: 5238 break; 5239 } 5240 5241 return rclass; 5242} 5243 5244/* This function returns the register class required for a secondary 5245 register when copying between one of the registers in RCLASS, and X, 5246 using MODE. A return value of NO_REGS means that no secondary register 5247 is required. */ 5248 5249enum reg_class 5250ia64_secondary_reload_class (enum reg_class rclass, 5251 enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) 5252{ 5253 int regno = -1; 5254 5255 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 5256 regno = true_regnum (x); 5257 5258 switch (rclass) 5259 { 5260 case BR_REGS: 5261 case AR_M_REGS: 5262 case AR_I_REGS: 5263 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 5264 interaction. We end up with two pseudos with overlapping lifetimes 5265 both of which are equiv to the same constant, and both which need 5266 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 5267 changes depending on the path length, which means the qty_first_reg 5268 check in make_regs_eqv can give different answers at different times. 5269 At some point I'll probably need a reload_indi pattern to handle 5270 this. 5271 5272 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 5273 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 5274 non-general registers for good measure. */ 5275 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 5276 return GR_REGS; 5277 5278 /* This is needed if a pseudo used as a call_operand gets spilled to a 5279 stack slot. */ 5280 if (GET_CODE (x) == MEM) 5281 return GR_REGS; 5282 break; 5283 5284 case FR_REGS: 5285 case FP_REGS: 5286 /* Need to go through general registers to get to other class regs. */ 5287 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 5288 return GR_REGS; 5289 5290 /* This can happen when a paradoxical subreg is an operand to the 5291 muldi3 pattern. */ 5292 /* ??? This shouldn't be necessary after instruction scheduling is 5293 enabled, because paradoxical subregs are not accepted by 5294 register_operand when INSN_SCHEDULING is defined. Or alternatively, 5295 stop the paradoxical subreg stupidity in the *_operand functions 5296 in recog.c. */ 5297 if (GET_CODE (x) == MEM 5298 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 5299 || GET_MODE (x) == QImode)) 5300 return GR_REGS; 5301 5302 /* This can happen because of the ior/and/etc patterns that accept FP 5303 registers as operands. If the third operand is a constant, then it 5304 needs to be reloaded into a FP register. */ 5305 if (GET_CODE (x) == CONST_INT) 5306 return GR_REGS; 5307 5308 /* This can happen because of register elimination in a muldi3 insn. 5309 E.g. `26107 * (unsigned long)&u'. */ 5310 if (GET_CODE (x) == PLUS) 5311 return GR_REGS; 5312 break; 5313 5314 case PR_REGS: 5315 /* ??? This happens if we cse/gcse a BImode value across a call, 5316 and the function has a nonlocal goto. This is because global 5317 does not allocate call crossing pseudos to hard registers when 5318 crtl->has_nonlocal_goto is true. This is relatively 5319 common for C++ programs that use exceptions. To reproduce, 5320 return NO_REGS and compile libstdc++. */ 5321 if (GET_CODE (x) == MEM) 5322 return GR_REGS; 5323 5324 /* This can happen when we take a BImode subreg of a DImode value, 5325 and that DImode value winds up in some non-GR register. */ 5326 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 5327 return GR_REGS; 5328 break; 5329 5330 default: 5331 break; 5332 } 5333 5334 return NO_REGS; 5335} 5336 5337 5338/* Implement targetm.unspec_may_trap_p hook. */ 5339static int 5340ia64_unspec_may_trap_p (const_rtx x, unsigned flags) 5341{ 5342 if (GET_CODE (x) == UNSPEC) 5343 { 5344 switch (XINT (x, 1)) 5345 { 5346 case UNSPEC_LDA: 5347 case UNSPEC_LDS: 5348 case UNSPEC_LDSA: 5349 case UNSPEC_LDCCLR: 5350 case UNSPEC_CHKACLR: 5351 case UNSPEC_CHKS: 5352 /* These unspecs are just wrappers. */ 5353 return may_trap_p_1 (XVECEXP (x, 0, 0), flags); 5354 } 5355 } 5356 5357 return default_unspec_may_trap_p (x, flags); 5358} 5359 5360 5361/* Parse the -mfixed-range= option string. */ 5362 5363static void 5364fix_range (const char *const_str) 5365{ 5366 int i, first, last; 5367 char *str, *dash, *comma; 5368 5369 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 5370 REG2 are either register names or register numbers. The effect 5371 of this option is to mark the registers in the range from REG1 to 5372 REG2 as ``fixed'' so they won't be used by the compiler. This is 5373 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 5374 5375 i = strlen (const_str); 5376 str = (char *) alloca (i + 1); 5377 memcpy (str, const_str, i + 1); 5378 5379 while (1) 5380 { 5381 dash = strchr (str, '-'); 5382 if (!dash) 5383 { 5384 warning (0, "value of -mfixed-range must have form REG1-REG2"); 5385 return; 5386 } 5387 *dash = '\0'; 5388 5389 comma = strchr (dash + 1, ','); 5390 if (comma) 5391 *comma = '\0'; 5392 5393 first = decode_reg_name (str); 5394 if (first < 0) 5395 { 5396 warning (0, "unknown register name: %s", str); 5397 return; 5398 } 5399 5400 last = decode_reg_name (dash + 1); 5401 if (last < 0) 5402 { 5403 warning (0, "unknown register name: %s", dash + 1); 5404 return; 5405 } 5406 5407 *dash = '-'; 5408 5409 if (first > last) 5410 { 5411 warning (0, "%s-%s is an empty range", str, dash + 1); 5412 return; 5413 } 5414 5415 for (i = first; i <= last; ++i) 5416 fixed_regs[i] = call_used_regs[i] = 1; 5417 5418 if (!comma) 5419 break; 5420 5421 *comma = ','; 5422 str = comma + 1; 5423 } 5424} 5425 5426/* Implement TARGET_HANDLE_OPTION. */ 5427 5428static bool 5429ia64_handle_option (size_t code, const char *arg, int value) 5430{ 5431 switch (code) 5432 { 5433 case OPT_mfixed_range_: 5434 fix_range (arg); 5435 return true; 5436 5437 case OPT_mtls_size_: 5438 if (value != 14 && value != 22 && value != 64) 5439 error ("bad value %<%s%> for -mtls-size= switch", arg); 5440 return true; 5441 5442 case OPT_mtune_: 5443 { 5444 static struct pta 5445 { 5446 const char *name; /* processor name or nickname. */ 5447 enum processor_type processor; 5448 } 5449 const processor_alias_table[] = 5450 { 5451 {"itanium2", PROCESSOR_ITANIUM2}, 5452 {"mckinley", PROCESSOR_ITANIUM2}, 5453 }; 5454 int const pta_size = ARRAY_SIZE (processor_alias_table); 5455 int i; 5456 5457 for (i = 0; i < pta_size; i++) 5458 if (!strcmp (arg, processor_alias_table[i].name)) 5459 { 5460 ia64_tune = processor_alias_table[i].processor; 5461 break; 5462 } 5463 if (i == pta_size) 5464 error ("bad value %<%s%> for -mtune= switch", arg); 5465 return true; 5466 } 5467 5468 default: 5469 return true; 5470 } 5471} 5472 5473/* Implement OVERRIDE_OPTIONS. */ 5474 5475void 5476ia64_override_options (void) 5477{ 5478 if (TARGET_AUTO_PIC) 5479 target_flags |= MASK_CONST_GP; 5480 5481 /* Numerous experiment shows that IRA based loop pressure 5482 calculation works better for RTL loop invariant motion on targets 5483 with enough (>= 32) registers. It is an expensive optimization. 5484 So it is on only for peak performance. */ 5485 if (optimize >= 3) 5486 flag_ira_loop_pressure = 1; 5487 5488 5489 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; 5490 5491 init_machine_status = ia64_init_machine_status; 5492 5493 if (align_functions <= 0) 5494 align_functions = 64; 5495 if (align_loops <= 0) 5496 align_loops = 32; 5497 if (TARGET_ABI_OPEN_VMS) 5498 flag_no_common = 1; 5499 5500 ia64_override_options_after_change(); 5501} 5502 5503/* Implement targetm.override_options_after_change. */ 5504 5505static void 5506ia64_override_options_after_change (void) 5507{ 5508 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; 5509 flag_schedule_insns_after_reload = 0; 5510 5511 if (optimize >= 3 5512 && ! sel_sched_switch_set) 5513 { 5514 flag_selective_scheduling2 = 1; 5515 flag_sel_sched_pipelining = 1; 5516 } 5517 if (mflag_sched_control_spec == 2) 5518 { 5519 /* Control speculation is on by default for the selective scheduler, 5520 but not for the Haifa scheduler. */ 5521 mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0; 5522 } 5523 if (flag_sel_sched_pipelining && flag_auto_inc_dec) 5524 { 5525 /* FIXME: remove this when we'd implement breaking autoinsns as 5526 a transformation. */ 5527 flag_auto_inc_dec = 0; 5528 } 5529} 5530 5531/* Initialize the record of emitted frame related registers. */ 5532 5533void ia64_init_expanders (void) 5534{ 5535 memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs)); 5536} 5537 5538static struct machine_function * 5539ia64_init_machine_status (void) 5540{ 5541 return GGC_CNEW (struct machine_function); 5542} 5543 5544static enum attr_itanium_class ia64_safe_itanium_class (rtx); 5545static enum attr_type ia64_safe_type (rtx); 5546 5547static enum attr_itanium_class 5548ia64_safe_itanium_class (rtx insn) 5549{ 5550 if (recog_memoized (insn) >= 0) 5551 return get_attr_itanium_class (insn); 5552 else if (DEBUG_INSN_P (insn)) 5553 return ITANIUM_CLASS_IGNORE; 5554 else 5555 return ITANIUM_CLASS_UNKNOWN; 5556} 5557 5558static enum attr_type 5559ia64_safe_type (rtx insn) 5560{ 5561 if (recog_memoized (insn) >= 0) 5562 return get_attr_type (insn); 5563 else 5564 return TYPE_UNKNOWN; 5565} 5566 5567/* The following collection of routines emit instruction group stop bits as 5568 necessary to avoid dependencies. */ 5569 5570/* Need to track some additional registers as far as serialization is 5571 concerned so we can properly handle br.call and br.ret. We could 5572 make these registers visible to gcc, but since these registers are 5573 never explicitly used in gcc generated code, it seems wasteful to 5574 do so (plus it would make the call and return patterns needlessly 5575 complex). */ 5576#define REG_RP (BR_REG (0)) 5577#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 5578/* This is used for volatile asms which may require a stop bit immediately 5579 before and after them. */ 5580#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 5581#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 5582#define NUM_REGS (AR_UNAT_BIT_0 + 64) 5583 5584/* For each register, we keep track of how it has been written in the 5585 current instruction group. 5586 5587 If a register is written unconditionally (no qualifying predicate), 5588 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 5589 5590 If a register is written if its qualifying predicate P is true, we 5591 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 5592 may be written again by the complement of P (P^1) and when this happens, 5593 WRITE_COUNT gets set to 2. 5594 5595 The result of this is that whenever an insn attempts to write a register 5596 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 5597 5598 If a predicate register is written by a floating-point insn, we set 5599 WRITTEN_BY_FP to true. 5600 5601 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 5602 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 5603 5604#if GCC_VERSION >= 4000 5605#define RWS_FIELD_TYPE __extension__ unsigned short 5606#else 5607#define RWS_FIELD_TYPE unsigned int 5608#endif 5609struct reg_write_state 5610{ 5611 RWS_FIELD_TYPE write_count : 2; 5612 RWS_FIELD_TYPE first_pred : 10; 5613 RWS_FIELD_TYPE written_by_fp : 1; 5614 RWS_FIELD_TYPE written_by_and : 1; 5615 RWS_FIELD_TYPE written_by_or : 1; 5616}; 5617 5618/* Cumulative info for the current instruction group. */ 5619struct reg_write_state rws_sum[NUM_REGS]; 5620#ifdef ENABLE_CHECKING 5621/* Bitmap whether a register has been written in the current insn. */ 5622HARD_REG_ELT_TYPE rws_insn[(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1) 5623 / HOST_BITS_PER_WIDEST_FAST_INT]; 5624 5625static inline void 5626rws_insn_set (int regno) 5627{ 5628 gcc_assert (!TEST_HARD_REG_BIT (rws_insn, regno)); 5629 SET_HARD_REG_BIT (rws_insn, regno); 5630} 5631 5632static inline int 5633rws_insn_test (int regno) 5634{ 5635 return TEST_HARD_REG_BIT (rws_insn, regno); 5636} 5637#else 5638/* When not checking, track just REG_AR_CFM and REG_VOLATILE. */ 5639unsigned char rws_insn[2]; 5640 5641static inline void 5642rws_insn_set (int regno) 5643{ 5644 if (regno == REG_AR_CFM) 5645 rws_insn[0] = 1; 5646 else if (regno == REG_VOLATILE) 5647 rws_insn[1] = 1; 5648} 5649 5650static inline int 5651rws_insn_test (int regno) 5652{ 5653 if (regno == REG_AR_CFM) 5654 return rws_insn[0]; 5655 if (regno == REG_VOLATILE) 5656 return rws_insn[1]; 5657 return 0; 5658} 5659#endif 5660 5661/* Indicates whether this is the first instruction after a stop bit, 5662 in which case we don't need another stop bit. Without this, 5663 ia64_variable_issue will die when scheduling an alloc. */ 5664static int first_instruction; 5665 5666/* Misc flags needed to compute RAW/WAW dependencies while we are traversing 5667 RTL for one instruction. */ 5668struct reg_flags 5669{ 5670 unsigned int is_write : 1; /* Is register being written? */ 5671 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 5672 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 5673 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 5674 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 5675 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 5676}; 5677 5678static void rws_update (int, struct reg_flags, int); 5679static int rws_access_regno (int, struct reg_flags, int); 5680static int rws_access_reg (rtx, struct reg_flags, int); 5681static void update_set_flags (rtx, struct reg_flags *); 5682static int set_src_needs_barrier (rtx, struct reg_flags, int); 5683static int rtx_needs_barrier (rtx, struct reg_flags, int); 5684static void init_insn_group_barriers (void); 5685static int group_barrier_needed (rtx); 5686static int safe_group_barrier_needed (rtx); 5687static int in_safe_group_barrier; 5688 5689/* Update *RWS for REGNO, which is being written by the current instruction, 5690 with predicate PRED, and associated register flags in FLAGS. */ 5691 5692static void 5693rws_update (int regno, struct reg_flags flags, int pred) 5694{ 5695 if (pred) 5696 rws_sum[regno].write_count++; 5697 else 5698 rws_sum[regno].write_count = 2; 5699 rws_sum[regno].written_by_fp |= flags.is_fp; 5700 /* ??? Not tracking and/or across differing predicates. */ 5701 rws_sum[regno].written_by_and = flags.is_and; 5702 rws_sum[regno].written_by_or = flags.is_or; 5703 rws_sum[regno].first_pred = pred; 5704} 5705 5706/* Handle an access to register REGNO of type FLAGS using predicate register 5707 PRED. Update rws_sum array. Return 1 if this access creates 5708 a dependency with an earlier instruction in the same group. */ 5709 5710static int 5711rws_access_regno (int regno, struct reg_flags flags, int pred) 5712{ 5713 int need_barrier = 0; 5714 5715 gcc_assert (regno < NUM_REGS); 5716 5717 if (! PR_REGNO_P (regno)) 5718 flags.is_and = flags.is_or = 0; 5719 5720 if (flags.is_write) 5721 { 5722 int write_count; 5723 5724 rws_insn_set (regno); 5725 write_count = rws_sum[regno].write_count; 5726 5727 switch (write_count) 5728 { 5729 case 0: 5730 /* The register has not been written yet. */ 5731 if (!in_safe_group_barrier) 5732 rws_update (regno, flags, pred); 5733 break; 5734 5735 case 1: 5736 /* The register has been written via a predicate. If this is 5737 not a complementary predicate, then we need a barrier. */ 5738 /* ??? This assumes that P and P+1 are always complementary 5739 predicates for P even. */ 5740 if (flags.is_and && rws_sum[regno].written_by_and) 5741 ; 5742 else if (flags.is_or && rws_sum[regno].written_by_or) 5743 ; 5744 else if ((rws_sum[regno].first_pred ^ 1) != pred) 5745 need_barrier = 1; 5746 if (!in_safe_group_barrier) 5747 rws_update (regno, flags, pred); 5748 break; 5749 5750 case 2: 5751 /* The register has been unconditionally written already. We 5752 need a barrier. */ 5753 if (flags.is_and && rws_sum[regno].written_by_and) 5754 ; 5755 else if (flags.is_or && rws_sum[regno].written_by_or) 5756 ; 5757 else 5758 need_barrier = 1; 5759 if (!in_safe_group_barrier) 5760 { 5761 rws_sum[regno].written_by_and = flags.is_and; 5762 rws_sum[regno].written_by_or = flags.is_or; 5763 } 5764 break; 5765 5766 default: 5767 gcc_unreachable (); 5768 } 5769 } 5770 else 5771 { 5772 if (flags.is_branch) 5773 { 5774 /* Branches have several RAW exceptions that allow to avoid 5775 barriers. */ 5776 5777 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 5778 /* RAW dependencies on branch regs are permissible as long 5779 as the writer is a non-branch instruction. Since we 5780 never generate code that uses a branch register written 5781 by a branch instruction, handling this case is 5782 easy. */ 5783 return 0; 5784 5785 if (REGNO_REG_CLASS (regno) == PR_REGS 5786 && ! rws_sum[regno].written_by_fp) 5787 /* The predicates of a branch are available within the 5788 same insn group as long as the predicate was written by 5789 something other than a floating-point instruction. */ 5790 return 0; 5791 } 5792 5793 if (flags.is_and && rws_sum[regno].written_by_and) 5794 return 0; 5795 if (flags.is_or && rws_sum[regno].written_by_or) 5796 return 0; 5797 5798 switch (rws_sum[regno].write_count) 5799 { 5800 case 0: 5801 /* The register has not been written yet. */ 5802 break; 5803 5804 case 1: 5805 /* The register has been written via a predicate. If this is 5806 not a complementary predicate, then we need a barrier. */ 5807 /* ??? This assumes that P and P+1 are always complementary 5808 predicates for P even. */ 5809 if ((rws_sum[regno].first_pred ^ 1) != pred) 5810 need_barrier = 1; 5811 break; 5812 5813 case 2: 5814 /* The register has been unconditionally written already. We 5815 need a barrier. */ 5816 need_barrier = 1; 5817 break; 5818 5819 default: 5820 gcc_unreachable (); 5821 } 5822 } 5823 5824 return need_barrier; 5825} 5826 5827static int 5828rws_access_reg (rtx reg, struct reg_flags flags, int pred) 5829{ 5830 int regno = REGNO (reg); 5831 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 5832 5833 if (n == 1) 5834 return rws_access_regno (regno, flags, pred); 5835 else 5836 { 5837 int need_barrier = 0; 5838 while (--n >= 0) 5839 need_barrier |= rws_access_regno (regno + n, flags, pred); 5840 return need_barrier; 5841 } 5842} 5843 5844/* Examine X, which is a SET rtx, and update the flags, the predicate, and 5845 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 5846 5847static void 5848update_set_flags (rtx x, struct reg_flags *pflags) 5849{ 5850 rtx src = SET_SRC (x); 5851 5852 switch (GET_CODE (src)) 5853 { 5854 case CALL: 5855 return; 5856 5857 case IF_THEN_ELSE: 5858 /* There are four cases here: 5859 (1) The destination is (pc), in which case this is a branch, 5860 nothing here applies. 5861 (2) The destination is ar.lc, in which case this is a 5862 doloop_end_internal, 5863 (3) The destination is an fp register, in which case this is 5864 an fselect instruction. 5865 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 5866 this is a check load. 5867 In all cases, nothing we do in this function applies. */ 5868 return; 5869 5870 default: 5871 if (COMPARISON_P (src) 5872 && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0)))) 5873 /* Set pflags->is_fp to 1 so that we know we're dealing 5874 with a floating point comparison when processing the 5875 destination of the SET. */ 5876 pflags->is_fp = 1; 5877 5878 /* Discover if this is a parallel comparison. We only handle 5879 and.orcm and or.andcm at present, since we must retain a 5880 strict inverse on the predicate pair. */ 5881 else if (GET_CODE (src) == AND) 5882 pflags->is_and = 1; 5883 else if (GET_CODE (src) == IOR) 5884 pflags->is_or = 1; 5885 5886 break; 5887 } 5888} 5889 5890/* Subroutine of rtx_needs_barrier; this function determines whether the 5891 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 5892 are as in rtx_needs_barrier. COND is an rtx that holds the condition 5893 for this insn. */ 5894 5895static int 5896set_src_needs_barrier (rtx x, struct reg_flags flags, int pred) 5897{ 5898 int need_barrier = 0; 5899 rtx dst; 5900 rtx src = SET_SRC (x); 5901 5902 if (GET_CODE (src) == CALL) 5903 /* We don't need to worry about the result registers that 5904 get written by subroutine call. */ 5905 return rtx_needs_barrier (src, flags, pred); 5906 else if (SET_DEST (x) == pc_rtx) 5907 { 5908 /* X is a conditional branch. */ 5909 /* ??? This seems redundant, as the caller sets this bit for 5910 all JUMP_INSNs. */ 5911 if (!ia64_spec_check_src_p (src)) 5912 flags.is_branch = 1; 5913 return rtx_needs_barrier (src, flags, pred); 5914 } 5915 5916 if (ia64_spec_check_src_p (src)) 5917 /* Avoid checking one register twice (in condition 5918 and in 'then' section) for ldc pattern. */ 5919 { 5920 gcc_assert (REG_P (XEXP (src, 2))); 5921 need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred); 5922 5923 /* We process MEM below. */ 5924 src = XEXP (src, 1); 5925 } 5926 5927 need_barrier |= rtx_needs_barrier (src, flags, pred); 5928 5929 dst = SET_DEST (x); 5930 if (GET_CODE (dst) == ZERO_EXTRACT) 5931 { 5932 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 5933 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 5934 } 5935 return need_barrier; 5936} 5937 5938/* Handle an access to rtx X of type FLAGS using predicate register 5939 PRED. Return 1 if this access creates a dependency with an earlier 5940 instruction in the same group. */ 5941 5942static int 5943rtx_needs_barrier (rtx x, struct reg_flags flags, int pred) 5944{ 5945 int i, j; 5946 int is_complemented = 0; 5947 int need_barrier = 0; 5948 const char *format_ptr; 5949 struct reg_flags new_flags; 5950 rtx cond; 5951 5952 if (! x) 5953 return 0; 5954 5955 new_flags = flags; 5956 5957 switch (GET_CODE (x)) 5958 { 5959 case SET: 5960 update_set_flags (x, &new_flags); 5961 need_barrier = set_src_needs_barrier (x, new_flags, pred); 5962 if (GET_CODE (SET_SRC (x)) != CALL) 5963 { 5964 new_flags.is_write = 1; 5965 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 5966 } 5967 break; 5968 5969 case CALL: 5970 new_flags.is_write = 0; 5971 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 5972 5973 /* Avoid multiple register writes, in case this is a pattern with 5974 multiple CALL rtx. This avoids a failure in rws_access_reg. */ 5975 if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM)) 5976 { 5977 new_flags.is_write = 1; 5978 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 5979 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 5980 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 5981 } 5982 break; 5983 5984 case COND_EXEC: 5985 /* X is a predicated instruction. */ 5986 5987 cond = COND_EXEC_TEST (x); 5988 gcc_assert (!pred); 5989 need_barrier = rtx_needs_barrier (cond, flags, 0); 5990 5991 if (GET_CODE (cond) == EQ) 5992 is_complemented = 1; 5993 cond = XEXP (cond, 0); 5994 gcc_assert (GET_CODE (cond) == REG 5995 && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS); 5996 pred = REGNO (cond); 5997 if (is_complemented) 5998 ++pred; 5999 6000 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 6001 return need_barrier; 6002 6003 case CLOBBER: 6004 case USE: 6005 /* Clobber & use are for earlier compiler-phases only. */ 6006 break; 6007 6008 case ASM_OPERANDS: 6009 case ASM_INPUT: 6010 /* We always emit stop bits for traditional asms. We emit stop bits 6011 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 6012 if (GET_CODE (x) != ASM_OPERANDS 6013 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 6014 { 6015 /* Avoid writing the register multiple times if we have multiple 6016 asm outputs. This avoids a failure in rws_access_reg. */ 6017 if (! rws_insn_test (REG_VOLATILE)) 6018 { 6019 new_flags.is_write = 1; 6020 rws_access_regno (REG_VOLATILE, new_flags, pred); 6021 } 6022 return 1; 6023 } 6024 6025 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 6026 We cannot just fall through here since then we would be confused 6027 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 6028 traditional asms unlike their normal usage. */ 6029 6030 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 6031 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 6032 need_barrier = 1; 6033 break; 6034 6035 case PARALLEL: 6036 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6037 { 6038 rtx pat = XVECEXP (x, 0, i); 6039 switch (GET_CODE (pat)) 6040 { 6041 case SET: 6042 update_set_flags (pat, &new_flags); 6043 need_barrier |= set_src_needs_barrier (pat, new_flags, pred); 6044 break; 6045 6046 case USE: 6047 case CALL: 6048 case ASM_OPERANDS: 6049 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6050 break; 6051 6052 case CLOBBER: 6053 if (REG_P (XEXP (pat, 0)) 6054 && extract_asm_operands (x) != NULL_RTX 6055 && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM) 6056 { 6057 new_flags.is_write = 1; 6058 need_barrier |= rtx_needs_barrier (XEXP (pat, 0), 6059 new_flags, pred); 6060 new_flags = flags; 6061 } 6062 break; 6063 6064 case RETURN: 6065 break; 6066 6067 default: 6068 gcc_unreachable (); 6069 } 6070 } 6071 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 6072 { 6073 rtx pat = XVECEXP (x, 0, i); 6074 if (GET_CODE (pat) == SET) 6075 { 6076 if (GET_CODE (SET_SRC (pat)) != CALL) 6077 { 6078 new_flags.is_write = 1; 6079 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 6080 pred); 6081 } 6082 } 6083 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 6084 need_barrier |= rtx_needs_barrier (pat, flags, pred); 6085 } 6086 break; 6087 6088 case SUBREG: 6089 need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred); 6090 break; 6091 case REG: 6092 if (REGNO (x) == AR_UNAT_REGNUM) 6093 { 6094 for (i = 0; i < 64; ++i) 6095 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 6096 } 6097 else 6098 need_barrier = rws_access_reg (x, flags, pred); 6099 break; 6100 6101 case MEM: 6102 /* Find the regs used in memory address computation. */ 6103 new_flags.is_write = 0; 6104 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6105 break; 6106 6107 case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR: 6108 case SYMBOL_REF: case LABEL_REF: case CONST: 6109 break; 6110 6111 /* Operators with side-effects. */ 6112 case POST_INC: case POST_DEC: 6113 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6114 6115 new_flags.is_write = 0; 6116 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6117 new_flags.is_write = 1; 6118 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6119 break; 6120 6121 case POST_MODIFY: 6122 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); 6123 6124 new_flags.is_write = 0; 6125 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 6126 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6127 new_flags.is_write = 1; 6128 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 6129 break; 6130 6131 /* Handle common unary and binary ops for efficiency. */ 6132 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 6133 case MOD: case UDIV: case UMOD: case AND: case IOR: 6134 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 6135 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 6136 case NE: case EQ: case GE: case GT: case LE: 6137 case LT: case GEU: case GTU: case LEU: case LTU: 6138 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 6139 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 6140 break; 6141 6142 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 6143 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 6144 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 6145 case SQRT: case FFS: case POPCOUNT: 6146 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6147 break; 6148 6149 case VEC_SELECT: 6150 /* VEC_SELECT's second argument is a PARALLEL with integers that 6151 describe the elements selected. On ia64, those integers are 6152 always constants. Avoid walking the PARALLEL so that we don't 6153 get confused with "normal" parallels and then die. */ 6154 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 6155 break; 6156 6157 case UNSPEC: 6158 switch (XINT (x, 1)) 6159 { 6160 case UNSPEC_LTOFF_DTPMOD: 6161 case UNSPEC_LTOFF_DTPREL: 6162 case UNSPEC_DTPREL: 6163 case UNSPEC_LTOFF_TPREL: 6164 case UNSPEC_TPREL: 6165 case UNSPEC_PRED_REL_MUTEX: 6166 case UNSPEC_PIC_CALL: 6167 case UNSPEC_MF: 6168 case UNSPEC_FETCHADD_ACQ: 6169 case UNSPEC_BSP_VALUE: 6170 case UNSPEC_FLUSHRS: 6171 case UNSPEC_BUNDLE_SELECTOR: 6172 break; 6173 6174 case UNSPEC_GR_SPILL: 6175 case UNSPEC_GR_RESTORE: 6176 { 6177 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 6178 HOST_WIDE_INT bit = (offset >> 3) & 63; 6179 6180 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6181 new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL); 6182 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 6183 new_flags, pred); 6184 break; 6185 } 6186 6187 case UNSPEC_FR_SPILL: 6188 case UNSPEC_FR_RESTORE: 6189 case UNSPEC_GETF_EXP: 6190 case UNSPEC_SETF_EXP: 6191 case UNSPEC_ADDP4: 6192 case UNSPEC_FR_SQRT_RECIP_APPROX: 6193 case UNSPEC_FR_SQRT_RECIP_APPROX_RES: 6194 case UNSPEC_LDA: 6195 case UNSPEC_LDS: 6196 case UNSPEC_LDS_A: 6197 case UNSPEC_LDSA: 6198 case UNSPEC_CHKACLR: 6199 case UNSPEC_CHKS: 6200 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6201 break; 6202 6203 case UNSPEC_FR_RECIP_APPROX: 6204 case UNSPEC_SHRP: 6205 case UNSPEC_COPYSIGN: 6206 case UNSPEC_FR_RECIP_APPROX_RES: 6207 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 6208 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6209 break; 6210 6211 case UNSPEC_CMPXCHG_ACQ: 6212 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 6213 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 6214 break; 6215 6216 default: 6217 gcc_unreachable (); 6218 } 6219 break; 6220 6221 case UNSPEC_VOLATILE: 6222 switch (XINT (x, 1)) 6223 { 6224 case UNSPECV_ALLOC: 6225 /* Alloc must always be the first instruction of a group. 6226 We force this by always returning true. */ 6227 /* ??? We might get better scheduling if we explicitly check for 6228 input/local/output register dependencies, and modify the 6229 scheduler so that alloc is always reordered to the start of 6230 the current group. We could then eliminate all of the 6231 first_instruction code. */ 6232 rws_access_regno (AR_PFS_REGNUM, flags, pred); 6233 6234 new_flags.is_write = 1; 6235 rws_access_regno (REG_AR_CFM, new_flags, pred); 6236 return 1; 6237 6238 case UNSPECV_SET_BSP: 6239 need_barrier = 1; 6240 break; 6241 6242 case UNSPECV_BLOCKAGE: 6243 case UNSPECV_INSN_GROUP_BARRIER: 6244 case UNSPECV_BREAK: 6245 case UNSPECV_PSAC_ALL: 6246 case UNSPECV_PSAC_NORMAL: 6247 return 0; 6248 6249 default: 6250 gcc_unreachable (); 6251 } 6252 break; 6253 6254 case RETURN: 6255 new_flags.is_write = 0; 6256 need_barrier = rws_access_regno (REG_RP, flags, pred); 6257 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 6258 6259 new_flags.is_write = 1; 6260 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 6261 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 6262 break; 6263 6264 default: 6265 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 6266 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 6267 switch (format_ptr[i]) 6268 { 6269 case '0': /* unused field */ 6270 case 'i': /* integer */ 6271 case 'n': /* note */ 6272 case 'w': /* wide integer */ 6273 case 's': /* pointer to string */ 6274 case 'S': /* optional pointer to string */ 6275 break; 6276 6277 case 'e': 6278 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 6279 need_barrier = 1; 6280 break; 6281 6282 case 'E': 6283 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 6284 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 6285 need_barrier = 1; 6286 break; 6287 6288 default: 6289 gcc_unreachable (); 6290 } 6291 break; 6292 } 6293 return need_barrier; 6294} 6295 6296/* Clear out the state for group_barrier_needed at the start of a 6297 sequence of insns. */ 6298 6299static void 6300init_insn_group_barriers (void) 6301{ 6302 memset (rws_sum, 0, sizeof (rws_sum)); 6303 first_instruction = 1; 6304} 6305 6306/* Given the current state, determine whether a group barrier (a stop bit) is 6307 necessary before INSN. Return nonzero if so. This modifies the state to 6308 include the effects of INSN as a side-effect. */ 6309 6310static int 6311group_barrier_needed (rtx insn) 6312{ 6313 rtx pat; 6314 int need_barrier = 0; 6315 struct reg_flags flags; 6316 6317 memset (&flags, 0, sizeof (flags)); 6318 switch (GET_CODE (insn)) 6319 { 6320 case NOTE: 6321 case DEBUG_INSN: 6322 break; 6323 6324 case BARRIER: 6325 /* A barrier doesn't imply an instruction group boundary. */ 6326 break; 6327 6328 case CODE_LABEL: 6329 memset (rws_insn, 0, sizeof (rws_insn)); 6330 return 1; 6331 6332 case CALL_INSN: 6333 flags.is_branch = 1; 6334 flags.is_sibcall = SIBLING_CALL_P (insn); 6335 memset (rws_insn, 0, sizeof (rws_insn)); 6336 6337 /* Don't bundle a call following another call. */ 6338 if ((pat = prev_active_insn (insn)) 6339 && GET_CODE (pat) == CALL_INSN) 6340 { 6341 need_barrier = 1; 6342 break; 6343 } 6344 6345 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 6346 break; 6347 6348 case JUMP_INSN: 6349 if (!ia64_spec_check_p (insn)) 6350 flags.is_branch = 1; 6351 6352 /* Don't bundle a jump following a call. */ 6353 if ((pat = prev_active_insn (insn)) 6354 && GET_CODE (pat) == CALL_INSN) 6355 { 6356 need_barrier = 1; 6357 break; 6358 } 6359 /* FALLTHRU */ 6360 6361 case INSN: 6362 if (GET_CODE (PATTERN (insn)) == USE 6363 || GET_CODE (PATTERN (insn)) == CLOBBER) 6364 /* Don't care about USE and CLOBBER "insns"---those are used to 6365 indicate to the optimizer that it shouldn't get rid of 6366 certain operations. */ 6367 break; 6368 6369 pat = PATTERN (insn); 6370 6371 /* Ug. Hack hacks hacked elsewhere. */ 6372 switch (recog_memoized (insn)) 6373 { 6374 /* We play dependency tricks with the epilogue in order 6375 to get proper schedules. Undo this for dv analysis. */ 6376 case CODE_FOR_epilogue_deallocate_stack: 6377 case CODE_FOR_prologue_allocate_stack: 6378 pat = XVECEXP (pat, 0, 0); 6379 break; 6380 6381 /* The pattern we use for br.cloop confuses the code above. 6382 The second element of the vector is representative. */ 6383 case CODE_FOR_doloop_end_internal: 6384 pat = XVECEXP (pat, 0, 1); 6385 break; 6386 6387 /* Doesn't generate code. */ 6388 case CODE_FOR_pred_rel_mutex: 6389 case CODE_FOR_prologue_use: 6390 return 0; 6391 6392 default: 6393 break; 6394 } 6395 6396 memset (rws_insn, 0, sizeof (rws_insn)); 6397 need_barrier = rtx_needs_barrier (pat, flags, 0); 6398 6399 /* Check to see if the previous instruction was a volatile 6400 asm. */ 6401 if (! need_barrier) 6402 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 6403 6404 break; 6405 6406 default: 6407 gcc_unreachable (); 6408 } 6409 6410 if (first_instruction && INSN_P (insn) 6411 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE 6412 && GET_CODE (PATTERN (insn)) != USE 6413 && GET_CODE (PATTERN (insn)) != CLOBBER) 6414 { 6415 need_barrier = 0; 6416 first_instruction = 0; 6417 } 6418 6419 return need_barrier; 6420} 6421 6422/* Like group_barrier_needed, but do not clobber the current state. */ 6423 6424static int 6425safe_group_barrier_needed (rtx insn) 6426{ 6427 int saved_first_instruction; 6428 int t; 6429 6430 saved_first_instruction = first_instruction; 6431 in_safe_group_barrier = 1; 6432 6433 t = group_barrier_needed (insn); 6434 6435 first_instruction = saved_first_instruction; 6436 in_safe_group_barrier = 0; 6437 6438 return t; 6439} 6440 6441/* Scan the current function and insert stop bits as necessary to 6442 eliminate dependencies. This function assumes that a final 6443 instruction scheduling pass has been run which has already 6444 inserted most of the necessary stop bits. This function only 6445 inserts new ones at basic block boundaries, since these are 6446 invisible to the scheduler. */ 6447 6448static void 6449emit_insn_group_barriers (FILE *dump) 6450{ 6451 rtx insn; 6452 rtx last_label = 0; 6453 int insns_since_last_label = 0; 6454 6455 init_insn_group_barriers (); 6456 6457 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6458 { 6459 if (GET_CODE (insn) == CODE_LABEL) 6460 { 6461 if (insns_since_last_label) 6462 last_label = insn; 6463 insns_since_last_label = 0; 6464 } 6465 else if (GET_CODE (insn) == NOTE 6466 && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK) 6467 { 6468 if (insns_since_last_label) 6469 last_label = insn; 6470 insns_since_last_label = 0; 6471 } 6472 else if (GET_CODE (insn) == INSN 6473 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 6474 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 6475 { 6476 init_insn_group_barriers (); 6477 last_label = 0; 6478 } 6479 else if (NONDEBUG_INSN_P (insn)) 6480 { 6481 insns_since_last_label = 1; 6482 6483 if (group_barrier_needed (insn)) 6484 { 6485 if (last_label) 6486 { 6487 if (dump) 6488 fprintf (dump, "Emitting stop before label %d\n", 6489 INSN_UID (last_label)); 6490 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 6491 insn = last_label; 6492 6493 init_insn_group_barriers (); 6494 last_label = 0; 6495 } 6496 } 6497 } 6498 } 6499} 6500 6501/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 6502 This function has to emit all necessary group barriers. */ 6503 6504static void 6505emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 6506{ 6507 rtx insn; 6508 6509 init_insn_group_barriers (); 6510 6511 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 6512 { 6513 if (GET_CODE (insn) == BARRIER) 6514 { 6515 rtx last = prev_active_insn (insn); 6516 6517 if (! last) 6518 continue; 6519 if (GET_CODE (last) == JUMP_INSN 6520 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) 6521 last = prev_active_insn (last); 6522 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 6523 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 6524 6525 init_insn_group_barriers (); 6526 } 6527 else if (NONDEBUG_INSN_P (insn)) 6528 { 6529 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 6530 init_insn_group_barriers (); 6531 else if (group_barrier_needed (insn)) 6532 { 6533 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 6534 init_insn_group_barriers (); 6535 group_barrier_needed (insn); 6536 } 6537 } 6538 } 6539} 6540 6541 6542 6543/* Instruction scheduling support. */ 6544 6545#define NR_BUNDLES 10 6546 6547/* A list of names of all available bundles. */ 6548 6549static const char *bundle_name [NR_BUNDLES] = 6550{ 6551 ".mii", 6552 ".mmi", 6553 ".mfi", 6554 ".mmf", 6555#if NR_BUNDLES == 10 6556 ".bbb", 6557 ".mbb", 6558#endif 6559 ".mib", 6560 ".mmb", 6561 ".mfb", 6562 ".mlx" 6563}; 6564 6565/* Nonzero if we should insert stop bits into the schedule. */ 6566 6567int ia64_final_schedule = 0; 6568 6569/* Codes of the corresponding queried units: */ 6570 6571static int _0mii_, _0mmi_, _0mfi_, _0mmf_; 6572static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_; 6573 6574static int _1mii_, _1mmi_, _1mfi_, _1mmf_; 6575static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_; 6576 6577static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6; 6578 6579/* The following variable value is an insn group barrier. */ 6580 6581static rtx dfa_stop_insn; 6582 6583/* The following variable value is the last issued insn. */ 6584 6585static rtx last_scheduled_insn; 6586 6587/* The following variable value is pointer to a DFA state used as 6588 temporary variable. */ 6589 6590static state_t temp_dfa_state = NULL; 6591 6592/* The following variable value is DFA state after issuing the last 6593 insn. */ 6594 6595static state_t prev_cycle_state = NULL; 6596 6597/* The following array element values are TRUE if the corresponding 6598 insn requires to add stop bits before it. */ 6599 6600static char *stops_p = NULL; 6601 6602/* The following variable is used to set up the mentioned above array. */ 6603 6604static int stop_before_p = 0; 6605 6606/* The following variable value is length of the arrays `clocks' and 6607 `add_cycles'. */ 6608 6609static int clocks_length; 6610 6611/* The following variable value is number of data speculations in progress. */ 6612static int pending_data_specs = 0; 6613 6614/* Number of memory references on current and three future processor cycles. */ 6615static char mem_ops_in_group[4]; 6616 6617/* Number of current processor cycle (from scheduler's point of view). */ 6618static int current_cycle; 6619 6620static rtx ia64_single_set (rtx); 6621static void ia64_emit_insn_before (rtx, rtx); 6622 6623/* Map a bundle number to its pseudo-op. */ 6624 6625const char * 6626get_bundle_name (int b) 6627{ 6628 return bundle_name[b]; 6629} 6630 6631 6632/* Return the maximum number of instructions a cpu can issue. */ 6633 6634static int 6635ia64_issue_rate (void) 6636{ 6637 return 6; 6638} 6639 6640/* Helper function - like single_set, but look inside COND_EXEC. */ 6641 6642static rtx 6643ia64_single_set (rtx insn) 6644{ 6645 rtx x = PATTERN (insn), ret; 6646 if (GET_CODE (x) == COND_EXEC) 6647 x = COND_EXEC_CODE (x); 6648 if (GET_CODE (x) == SET) 6649 return x; 6650 6651 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 6652 Although they are not classical single set, the second set is there just 6653 to protect it from moving past FP-relative stack accesses. */ 6654 switch (recog_memoized (insn)) 6655 { 6656 case CODE_FOR_prologue_allocate_stack: 6657 case CODE_FOR_epilogue_deallocate_stack: 6658 ret = XVECEXP (x, 0, 0); 6659 break; 6660 6661 default: 6662 ret = single_set_2 (insn, x); 6663 break; 6664 } 6665 6666 return ret; 6667} 6668 6669/* Adjust the cost of a scheduling dependency. 6670 Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN. 6671 COST is the current cost, DW is dependency weakness. */ 6672static int 6673ia64_adjust_cost_2 (rtx insn, int dep_type1, rtx dep_insn, int cost, dw_t dw) 6674{ 6675 enum reg_note dep_type = (enum reg_note) dep_type1; 6676 enum attr_itanium_class dep_class; 6677 enum attr_itanium_class insn_class; 6678 6679 insn_class = ia64_safe_itanium_class (insn); 6680 dep_class = ia64_safe_itanium_class (dep_insn); 6681 6682 /* Treat true memory dependencies separately. Ignore apparent true 6683 dependence between store and call (call has a MEM inside a SYMBOL_REF). */ 6684 if (dep_type == REG_DEP_TRUE 6685 && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF) 6686 && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL)) 6687 return 0; 6688 6689 if (dw == MIN_DEP_WEAK) 6690 /* Store and load are likely to alias, use higher cost to avoid stall. */ 6691 return PARAM_VALUE (PARAM_SCHED_MEM_TRUE_DEP_COST); 6692 else if (dw > MIN_DEP_WEAK) 6693 { 6694 /* Store and load are less likely to alias. */ 6695 if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF) 6696 /* Assume there will be no cache conflict for floating-point data. 6697 For integer data, L1 conflict penalty is huge (17 cycles), so we 6698 never assume it will not cause a conflict. */ 6699 return 0; 6700 else 6701 return cost; 6702 } 6703 6704 if (dep_type != REG_DEP_OUTPUT) 6705 return cost; 6706 6707 if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF 6708 || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF) 6709 return 0; 6710 6711 return cost; 6712} 6713 6714/* Like emit_insn_before, but skip cycle_display notes. 6715 ??? When cycle display notes are implemented, update this. */ 6716 6717static void 6718ia64_emit_insn_before (rtx insn, rtx before) 6719{ 6720 emit_insn_before (insn, before); 6721} 6722 6723/* The following function marks insns who produce addresses for load 6724 and store insns. Such insns will be placed into M slots because it 6725 decrease latency time for Itanium1 (see function 6726 `ia64_produce_address_p' and the DFA descriptions). */ 6727 6728static void 6729ia64_dependencies_evaluation_hook (rtx head, rtx tail) 6730{ 6731 rtx insn, next, next_tail; 6732 6733 /* Before reload, which_alternative is not set, which means that 6734 ia64_safe_itanium_class will produce wrong results for (at least) 6735 move instructions. */ 6736 if (!reload_completed) 6737 return; 6738 6739 next_tail = NEXT_INSN (tail); 6740 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 6741 if (INSN_P (insn)) 6742 insn->call = 0; 6743 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) 6744 if (INSN_P (insn) 6745 && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU) 6746 { 6747 sd_iterator_def sd_it; 6748 dep_t dep; 6749 bool has_mem_op_consumer_p = false; 6750 6751 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) 6752 { 6753 enum attr_itanium_class c; 6754 6755 if (DEP_TYPE (dep) != REG_DEP_TRUE) 6756 continue; 6757 6758 next = DEP_CON (dep); 6759 c = ia64_safe_itanium_class (next); 6760 if ((c == ITANIUM_CLASS_ST 6761 || c == ITANIUM_CLASS_STF) 6762 && ia64_st_address_bypass_p (insn, next)) 6763 { 6764 has_mem_op_consumer_p = true; 6765 break; 6766 } 6767 else if ((c == ITANIUM_CLASS_LD 6768 || c == ITANIUM_CLASS_FLD 6769 || c == ITANIUM_CLASS_FLDP) 6770 && ia64_ld_address_bypass_p (insn, next)) 6771 { 6772 has_mem_op_consumer_p = true; 6773 break; 6774 } 6775 } 6776 6777 insn->call = has_mem_op_consumer_p; 6778 } 6779} 6780 6781/* We're beginning a new block. Initialize data structures as necessary. */ 6782 6783static void 6784ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED, 6785 int sched_verbose ATTRIBUTE_UNUSED, 6786 int max_ready ATTRIBUTE_UNUSED) 6787{ 6788#ifdef ENABLE_CHECKING 6789 rtx insn; 6790 6791 if (!sel_sched_p () && reload_completed) 6792 for (insn = NEXT_INSN (current_sched_info->prev_head); 6793 insn != current_sched_info->next_tail; 6794 insn = NEXT_INSN (insn)) 6795 gcc_assert (!SCHED_GROUP_P (insn)); 6796#endif 6797 last_scheduled_insn = NULL_RTX; 6798 init_insn_group_barriers (); 6799 6800 current_cycle = 0; 6801 memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 6802} 6803 6804/* We're beginning a scheduling pass. Check assertion. */ 6805 6806static void 6807ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, 6808 int sched_verbose ATTRIBUTE_UNUSED, 6809 int max_ready ATTRIBUTE_UNUSED) 6810{ 6811 gcc_assert (pending_data_specs == 0); 6812} 6813 6814/* Scheduling pass is now finished. Free/reset static variable. */ 6815static void 6816ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED, 6817 int sched_verbose ATTRIBUTE_UNUSED) 6818{ 6819 gcc_assert (pending_data_specs == 0); 6820} 6821 6822/* Return TRUE if INSN is a load (either normal or speculative, but not a 6823 speculation check), FALSE otherwise. */ 6824static bool 6825is_load_p (rtx insn) 6826{ 6827 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 6828 6829 return 6830 ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD) 6831 && get_attr_check_load (insn) == CHECK_LOAD_NO); 6832} 6833 6834/* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array 6835 (taking account for 3-cycle cache reference postponing for stores: Intel 6836 Itanium 2 Reference Manual for Software Development and Optimization, 6837 6.7.3.1). */ 6838static void 6839record_memory_reference (rtx insn) 6840{ 6841 enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn); 6842 6843 switch (insn_class) { 6844 case ITANIUM_CLASS_FLD: 6845 case ITANIUM_CLASS_LD: 6846 mem_ops_in_group[current_cycle % 4]++; 6847 break; 6848 case ITANIUM_CLASS_STF: 6849 case ITANIUM_CLASS_ST: 6850 mem_ops_in_group[(current_cycle + 3) % 4]++; 6851 break; 6852 default:; 6853 } 6854} 6855 6856/* We are about to being issuing insns for this clock cycle. 6857 Override the default sort algorithm to better slot instructions. */ 6858 6859static int 6860ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, 6861 int *pn_ready, int clock_var, 6862 int reorder_type) 6863{ 6864 int n_asms; 6865 int n_ready = *pn_ready; 6866 rtx *e_ready = ready + n_ready; 6867 rtx *insnp; 6868 6869 if (sched_verbose) 6870 fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type); 6871 6872 if (reorder_type == 0) 6873 { 6874 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 6875 n_asms = 0; 6876 for (insnp = ready; insnp < e_ready; insnp++) 6877 if (insnp < e_ready) 6878 { 6879 rtx insn = *insnp; 6880 enum attr_type t = ia64_safe_type (insn); 6881 if (t == TYPE_UNKNOWN) 6882 { 6883 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6884 || asm_noperands (PATTERN (insn)) >= 0) 6885 { 6886 rtx lowest = ready[n_asms]; 6887 ready[n_asms] = insn; 6888 *insnp = lowest; 6889 n_asms++; 6890 } 6891 else 6892 { 6893 rtx highest = ready[n_ready - 1]; 6894 ready[n_ready - 1] = insn; 6895 *insnp = highest; 6896 return 1; 6897 } 6898 } 6899 } 6900 6901 if (n_asms < n_ready) 6902 { 6903 /* Some normal insns to process. Skip the asms. */ 6904 ready += n_asms; 6905 n_ready -= n_asms; 6906 } 6907 else if (n_ready > 0) 6908 return 1; 6909 } 6910 6911 if (ia64_final_schedule) 6912 { 6913 int deleted = 0; 6914 int nr_need_stop = 0; 6915 6916 for (insnp = ready; insnp < e_ready; insnp++) 6917 if (safe_group_barrier_needed (*insnp)) 6918 nr_need_stop++; 6919 6920 if (reorder_type == 1 && n_ready == nr_need_stop) 6921 return 0; 6922 if (reorder_type == 0) 6923 return 1; 6924 insnp = e_ready; 6925 /* Move down everything that needs a stop bit, preserving 6926 relative order. */ 6927 while (insnp-- > ready + deleted) 6928 while (insnp >= ready + deleted) 6929 { 6930 rtx insn = *insnp; 6931 if (! safe_group_barrier_needed (insn)) 6932 break; 6933 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 6934 *ready = insn; 6935 deleted++; 6936 } 6937 n_ready -= deleted; 6938 ready += deleted; 6939 } 6940 6941 current_cycle = clock_var; 6942 if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns) 6943 { 6944 int moved = 0; 6945 6946 insnp = e_ready; 6947 /* Move down loads/stores, preserving relative order. */ 6948 while (insnp-- > ready + moved) 6949 while (insnp >= ready + moved) 6950 { 6951 rtx insn = *insnp; 6952 if (! is_load_p (insn)) 6953 break; 6954 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 6955 *ready = insn; 6956 moved++; 6957 } 6958 n_ready -= moved; 6959 ready += moved; 6960 } 6961 6962 return 1; 6963} 6964 6965/* We are about to being issuing insns for this clock cycle. Override 6966 the default sort algorithm to better slot instructions. */ 6967 6968static int 6969ia64_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready, 6970 int clock_var) 6971{ 6972 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, 6973 pn_ready, clock_var, 0); 6974} 6975 6976/* Like ia64_sched_reorder, but called after issuing each insn. 6977 Override the default sort algorithm to better slot instructions. */ 6978 6979static int 6980ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED, 6981 int sched_verbose ATTRIBUTE_UNUSED, rtx *ready, 6982 int *pn_ready, int clock_var) 6983{ 6984 return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready, 6985 clock_var, 1); 6986} 6987 6988/* We are about to issue INSN. Return the number of insns left on the 6989 ready queue that can be issued this cycle. */ 6990 6991static int 6992ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED, 6993 int sched_verbose ATTRIBUTE_UNUSED, 6994 rtx insn ATTRIBUTE_UNUSED, 6995 int can_issue_more ATTRIBUTE_UNUSED) 6996{ 6997 if (sched_deps_info->generate_spec_deps && !sel_sched_p ()) 6998 /* Modulo scheduling does not extend h_i_d when emitting 6999 new instructions. Don't use h_i_d, if we don't have to. */ 7000 { 7001 if (DONE_SPEC (insn) & BEGIN_DATA) 7002 pending_data_specs++; 7003 if (CHECK_SPEC (insn) & BEGIN_DATA) 7004 pending_data_specs--; 7005 } 7006 7007 if (DEBUG_INSN_P (insn)) 7008 return 1; 7009 7010 last_scheduled_insn = insn; 7011 memcpy (prev_cycle_state, curr_state, dfa_state_size); 7012 if (reload_completed) 7013 { 7014 int needed = group_barrier_needed (insn); 7015 7016 gcc_assert (!needed); 7017 if (GET_CODE (insn) == CALL_INSN) 7018 init_insn_group_barriers (); 7019 stops_p [INSN_UID (insn)] = stop_before_p; 7020 stop_before_p = 0; 7021 7022 record_memory_reference (insn); 7023 } 7024 return 1; 7025} 7026 7027/* We are choosing insn from the ready queue. Return nonzero if INSN 7028 can be chosen. */ 7029 7030static int 7031ia64_first_cycle_multipass_dfa_lookahead_guard (rtx insn) 7032{ 7033 gcc_assert (insn && INSN_P (insn)); 7034 return ((!reload_completed 7035 || !safe_group_barrier_needed (insn)) 7036 && ia64_first_cycle_multipass_dfa_lookahead_guard_spec (insn) 7037 && (!mflag_sched_mem_insns_hard_limit 7038 || !is_load_p (insn) 7039 || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns)); 7040} 7041 7042/* We are choosing insn from the ready queue. Return nonzero if INSN 7043 can be chosen. */ 7044 7045static bool 7046ia64_first_cycle_multipass_dfa_lookahead_guard_spec (const_rtx insn) 7047{ 7048 gcc_assert (insn && INSN_P (insn)); 7049 /* Size of ALAT is 32. As far as we perform conservative data speculation, 7050 we keep ALAT half-empty. */ 7051 return (pending_data_specs < 16 7052 || !(TODO_SPEC (insn) & BEGIN_DATA)); 7053} 7054 7055/* The following variable value is pseudo-insn used by the DFA insn 7056 scheduler to change the DFA state when the simulated clock is 7057 increased. */ 7058 7059static rtx dfa_pre_cycle_insn; 7060 7061/* Returns 1 when a meaningful insn was scheduled between the last group 7062 barrier and LAST. */ 7063static int 7064scheduled_good_insn (rtx last) 7065{ 7066 if (last && recog_memoized (last) >= 0) 7067 return 1; 7068 7069 for ( ; 7070 last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last) 7071 && !stops_p[INSN_UID (last)]; 7072 last = PREV_INSN (last)) 7073 /* We could hit a NOTE_INSN_DELETED here which is actually outside 7074 the ebb we're scheduling. */ 7075 if (INSN_P (last) && recog_memoized (last) >= 0) 7076 return 1; 7077 7078 return 0; 7079} 7080 7081/* We are about to being issuing INSN. Return nonzero if we cannot 7082 issue it on given cycle CLOCK and return zero if we should not sort 7083 the ready queue on the next clock start. */ 7084 7085static int 7086ia64_dfa_new_cycle (FILE *dump, int verbose, rtx insn, int last_clock, 7087 int clock, int *sort_p) 7088{ 7089 int setup_clocks_p = FALSE; 7090 7091 gcc_assert (insn && INSN_P (insn)); 7092 7093 if (DEBUG_INSN_P (insn)) 7094 return 0; 7095 7096 /* When a group barrier is needed for insn, last_scheduled_insn 7097 should be set. */ 7098 gcc_assert (!(reload_completed && safe_group_barrier_needed (insn)) 7099 || last_scheduled_insn); 7100 7101 if ((reload_completed 7102 && (safe_group_barrier_needed (insn) 7103 || (mflag_sched_stop_bits_after_every_cycle 7104 && last_clock != clock 7105 && last_scheduled_insn 7106 && scheduled_good_insn (last_scheduled_insn)))) 7107 || (last_scheduled_insn 7108 && (GET_CODE (last_scheduled_insn) == CALL_INSN 7109 || GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT 7110 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0))) 7111 { 7112 init_insn_group_barriers (); 7113 7114 if (verbose && dump) 7115 fprintf (dump, "// Stop should be before %d%s\n", INSN_UID (insn), 7116 last_clock == clock ? " + cycle advance" : ""); 7117 7118 stop_before_p = 1; 7119 current_cycle = clock; 7120 mem_ops_in_group[current_cycle % 4] = 0; 7121 7122 if (last_clock == clock) 7123 { 7124 state_transition (curr_state, dfa_stop_insn); 7125 if (TARGET_EARLY_STOP_BITS) 7126 *sort_p = (last_scheduled_insn == NULL_RTX 7127 || GET_CODE (last_scheduled_insn) != CALL_INSN); 7128 else 7129 *sort_p = 0; 7130 return 1; 7131 } 7132 else if (reload_completed) 7133 setup_clocks_p = TRUE; 7134 7135 if (last_scheduled_insn) 7136 { 7137 if (GET_CODE (PATTERN (last_scheduled_insn)) == ASM_INPUT 7138 || asm_noperands (PATTERN (last_scheduled_insn)) >= 0) 7139 state_reset (curr_state); 7140 else 7141 { 7142 memcpy (curr_state, prev_cycle_state, dfa_state_size); 7143 state_transition (curr_state, dfa_stop_insn); 7144 state_transition (curr_state, dfa_pre_cycle_insn); 7145 state_transition (curr_state, NULL); 7146 } 7147 } 7148 } 7149 else if (reload_completed) 7150 setup_clocks_p = TRUE; 7151 7152 return 0; 7153} 7154 7155/* Implement targetm.sched.h_i_d_extended hook. 7156 Extend internal data structures. */ 7157static void 7158ia64_h_i_d_extended (void) 7159{ 7160 if (stops_p != NULL) 7161 { 7162 int new_clocks_length = get_max_uid () * 3 / 2; 7163 stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1); 7164 clocks_length = new_clocks_length; 7165 } 7166} 7167 7168 7169/* This structure describes the data used by the backend to guide scheduling. 7170 When the current scheduling point is switched, this data should be saved 7171 and restored later, if the scheduler returns to this point. */ 7172struct _ia64_sched_context 7173{ 7174 state_t prev_cycle_state; 7175 rtx last_scheduled_insn; 7176 struct reg_write_state rws_sum[NUM_REGS]; 7177 struct reg_write_state rws_insn[NUM_REGS]; 7178 int first_instruction; 7179 int pending_data_specs; 7180 int current_cycle; 7181 char mem_ops_in_group[4]; 7182}; 7183typedef struct _ia64_sched_context *ia64_sched_context_t; 7184 7185/* Allocates a scheduling context. */ 7186static void * 7187ia64_alloc_sched_context (void) 7188{ 7189 return xmalloc (sizeof (struct _ia64_sched_context)); 7190} 7191 7192/* Initializes the _SC context with clean data, if CLEAN_P, and from 7193 the global context otherwise. */ 7194static void 7195ia64_init_sched_context (void *_sc, bool clean_p) 7196{ 7197 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7198 7199 sc->prev_cycle_state = xmalloc (dfa_state_size); 7200 if (clean_p) 7201 { 7202 state_reset (sc->prev_cycle_state); 7203 sc->last_scheduled_insn = NULL_RTX; 7204 memset (sc->rws_sum, 0, sizeof (rws_sum)); 7205 memset (sc->rws_insn, 0, sizeof (rws_insn)); 7206 sc->first_instruction = 1; 7207 sc->pending_data_specs = 0; 7208 sc->current_cycle = 0; 7209 memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group)); 7210 } 7211 else 7212 { 7213 memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size); 7214 sc->last_scheduled_insn = last_scheduled_insn; 7215 memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum)); 7216 memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn)); 7217 sc->first_instruction = first_instruction; 7218 sc->pending_data_specs = pending_data_specs; 7219 sc->current_cycle = current_cycle; 7220 memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group)); 7221 } 7222} 7223 7224/* Sets the global scheduling context to the one pointed to by _SC. */ 7225static void 7226ia64_set_sched_context (void *_sc) 7227{ 7228 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7229 7230 gcc_assert (sc != NULL); 7231 7232 memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size); 7233 last_scheduled_insn = sc->last_scheduled_insn; 7234 memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum)); 7235 memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn)); 7236 first_instruction = sc->first_instruction; 7237 pending_data_specs = sc->pending_data_specs; 7238 current_cycle = sc->current_cycle; 7239 memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group)); 7240} 7241 7242/* Clears the data in the _SC scheduling context. */ 7243static void 7244ia64_clear_sched_context (void *_sc) 7245{ 7246 ia64_sched_context_t sc = (ia64_sched_context_t) _sc; 7247 7248 free (sc->prev_cycle_state); 7249 sc->prev_cycle_state = NULL; 7250} 7251 7252/* Frees the _SC scheduling context. */ 7253static void 7254ia64_free_sched_context (void *_sc) 7255{ 7256 gcc_assert (_sc != NULL); 7257 7258 free (_sc); 7259} 7260 7261typedef rtx (* gen_func_t) (rtx, rtx); 7262 7263/* Return a function that will generate a load of mode MODE_NO 7264 with speculation types TS. */ 7265static gen_func_t 7266get_spec_load_gen_function (ds_t ts, int mode_no) 7267{ 7268 static gen_func_t gen_ld_[] = { 7269 gen_movbi, 7270 gen_movqi_internal, 7271 gen_movhi_internal, 7272 gen_movsi_internal, 7273 gen_movdi_internal, 7274 gen_movsf_internal, 7275 gen_movdf_internal, 7276 gen_movxf_internal, 7277 gen_movti_internal, 7278 gen_zero_extendqidi2, 7279 gen_zero_extendhidi2, 7280 gen_zero_extendsidi2, 7281 }; 7282 7283 static gen_func_t gen_ld_a[] = { 7284 gen_movbi_advanced, 7285 gen_movqi_advanced, 7286 gen_movhi_advanced, 7287 gen_movsi_advanced, 7288 gen_movdi_advanced, 7289 gen_movsf_advanced, 7290 gen_movdf_advanced, 7291 gen_movxf_advanced, 7292 gen_movti_advanced, 7293 gen_zero_extendqidi2_advanced, 7294 gen_zero_extendhidi2_advanced, 7295 gen_zero_extendsidi2_advanced, 7296 }; 7297 static gen_func_t gen_ld_s[] = { 7298 gen_movbi_speculative, 7299 gen_movqi_speculative, 7300 gen_movhi_speculative, 7301 gen_movsi_speculative, 7302 gen_movdi_speculative, 7303 gen_movsf_speculative, 7304 gen_movdf_speculative, 7305 gen_movxf_speculative, 7306 gen_movti_speculative, 7307 gen_zero_extendqidi2_speculative, 7308 gen_zero_extendhidi2_speculative, 7309 gen_zero_extendsidi2_speculative, 7310 }; 7311 static gen_func_t gen_ld_sa[] = { 7312 gen_movbi_speculative_advanced, 7313 gen_movqi_speculative_advanced, 7314 gen_movhi_speculative_advanced, 7315 gen_movsi_speculative_advanced, 7316 gen_movdi_speculative_advanced, 7317 gen_movsf_speculative_advanced, 7318 gen_movdf_speculative_advanced, 7319 gen_movxf_speculative_advanced, 7320 gen_movti_speculative_advanced, 7321 gen_zero_extendqidi2_speculative_advanced, 7322 gen_zero_extendhidi2_speculative_advanced, 7323 gen_zero_extendsidi2_speculative_advanced, 7324 }; 7325 static gen_func_t gen_ld_s_a[] = { 7326 gen_movbi_speculative_a, 7327 gen_movqi_speculative_a, 7328 gen_movhi_speculative_a, 7329 gen_movsi_speculative_a, 7330 gen_movdi_speculative_a, 7331 gen_movsf_speculative_a, 7332 gen_movdf_speculative_a, 7333 gen_movxf_speculative_a, 7334 gen_movti_speculative_a, 7335 gen_zero_extendqidi2_speculative_a, 7336 gen_zero_extendhidi2_speculative_a, 7337 gen_zero_extendsidi2_speculative_a, 7338 }; 7339 7340 gen_func_t *gen_ld; 7341 7342 if (ts & BEGIN_DATA) 7343 { 7344 if (ts & BEGIN_CONTROL) 7345 gen_ld = gen_ld_sa; 7346 else 7347 gen_ld = gen_ld_a; 7348 } 7349 else if (ts & BEGIN_CONTROL) 7350 { 7351 if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL) 7352 || ia64_needs_block_p (ts)) 7353 gen_ld = gen_ld_s; 7354 else 7355 gen_ld = gen_ld_s_a; 7356 } 7357 else if (ts == 0) 7358 gen_ld = gen_ld_; 7359 else 7360 gcc_unreachable (); 7361 7362 return gen_ld[mode_no]; 7363} 7364 7365/* Constants that help mapping 'enum machine_mode' to int. */ 7366enum SPEC_MODES 7367 { 7368 SPEC_MODE_INVALID = -1, 7369 SPEC_MODE_FIRST = 0, 7370 SPEC_MODE_FOR_EXTEND_FIRST = 1, 7371 SPEC_MODE_FOR_EXTEND_LAST = 3, 7372 SPEC_MODE_LAST = 8 7373 }; 7374 7375enum 7376 { 7377 /* Offset to reach ZERO_EXTEND patterns. */ 7378 SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1 7379 }; 7380 7381/* Return index of the MODE. */ 7382static int 7383ia64_mode_to_int (enum machine_mode mode) 7384{ 7385 switch (mode) 7386 { 7387 case BImode: return 0; /* SPEC_MODE_FIRST */ 7388 case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */ 7389 case HImode: return 2; 7390 case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */ 7391 case DImode: return 4; 7392 case SFmode: return 5; 7393 case DFmode: return 6; 7394 case XFmode: return 7; 7395 case TImode: 7396 /* ??? This mode needs testing. Bypasses for ldfp8 instruction are not 7397 mentioned in itanium[12].md. Predicate fp_register_operand also 7398 needs to be defined. Bottom line: better disable for now. */ 7399 return SPEC_MODE_INVALID; 7400 default: return SPEC_MODE_INVALID; 7401 } 7402} 7403 7404/* Provide information about speculation capabilities. */ 7405static void 7406ia64_set_sched_flags (spec_info_t spec_info) 7407{ 7408 unsigned int *flags = &(current_sched_info->flags); 7409 7410 if (*flags & SCHED_RGN 7411 || *flags & SCHED_EBB 7412 || *flags & SEL_SCHED) 7413 { 7414 int mask = 0; 7415 7416 if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0) 7417 || (mflag_sched_ar_data_spec && reload_completed)) 7418 { 7419 mask |= BEGIN_DATA; 7420 7421 if (!sel_sched_p () 7422 && ((mflag_sched_br_in_data_spec && !reload_completed) 7423 || (mflag_sched_ar_in_data_spec && reload_completed))) 7424 mask |= BE_IN_DATA; 7425 } 7426 7427 if (mflag_sched_control_spec 7428 && (!sel_sched_p () 7429 || reload_completed)) 7430 { 7431 mask |= BEGIN_CONTROL; 7432 7433 if (!sel_sched_p () && mflag_sched_in_control_spec) 7434 mask |= BE_IN_CONTROL; 7435 } 7436 7437 spec_info->mask = mask; 7438 7439 if (mask) 7440 { 7441 *flags |= USE_DEPS_LIST | DO_SPECULATION; 7442 7443 if (mask & BE_IN_SPEC) 7444 *flags |= NEW_BBS; 7445 7446 spec_info->flags = 0; 7447 7448 if ((mask & DATA_SPEC) && mflag_sched_prefer_non_data_spec_insns) 7449 spec_info->flags |= PREFER_NON_DATA_SPEC; 7450 7451 if (mask & CONTROL_SPEC) 7452 { 7453 if (mflag_sched_prefer_non_control_spec_insns) 7454 spec_info->flags |= PREFER_NON_CONTROL_SPEC; 7455 7456 if (sel_sched_p () && mflag_sel_sched_dont_check_control_spec) 7457 spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL; 7458 } 7459 7460 if (sched_verbose >= 1) 7461 spec_info->dump = sched_dump; 7462 else 7463 spec_info->dump = 0; 7464 7465 if (mflag_sched_count_spec_in_critical_path) 7466 spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH; 7467 } 7468 } 7469 else 7470 spec_info->mask = 0; 7471} 7472 7473/* If INSN is an appropriate load return its mode. 7474 Return -1 otherwise. */ 7475static int 7476get_mode_no_for_insn (rtx insn) 7477{ 7478 rtx reg, mem, mode_rtx; 7479 int mode_no; 7480 bool extend_p; 7481 7482 extract_insn_cached (insn); 7483 7484 /* We use WHICH_ALTERNATIVE only after reload. This will 7485 guarantee that reload won't touch a speculative insn. */ 7486 7487 if (recog_data.n_operands != 2) 7488 return -1; 7489 7490 reg = recog_data.operand[0]; 7491 mem = recog_data.operand[1]; 7492 7493 /* We should use MEM's mode since REG's mode in presence of 7494 ZERO_EXTEND will always be DImode. */ 7495 if (get_attr_speculable1 (insn) == SPECULABLE1_YES) 7496 /* Process non-speculative ld. */ 7497 { 7498 if (!reload_completed) 7499 { 7500 /* Do not speculate into regs like ar.lc. */ 7501 if (!REG_P (reg) || AR_REGNO_P (REGNO (reg))) 7502 return -1; 7503 7504 if (!MEM_P (mem)) 7505 return -1; 7506 7507 { 7508 rtx mem_reg = XEXP (mem, 0); 7509 7510 if (!REG_P (mem_reg)) 7511 return -1; 7512 } 7513 7514 mode_rtx = mem; 7515 } 7516 else if (get_attr_speculable2 (insn) == SPECULABLE2_YES) 7517 { 7518 gcc_assert (REG_P (reg) && MEM_P (mem)); 7519 mode_rtx = mem; 7520 } 7521 else 7522 return -1; 7523 } 7524 else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES 7525 || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES 7526 || get_attr_check_load (insn) == CHECK_LOAD_YES) 7527 /* Process speculative ld or ld.c. */ 7528 { 7529 gcc_assert (REG_P (reg) && MEM_P (mem)); 7530 mode_rtx = mem; 7531 } 7532 else 7533 { 7534 enum attr_itanium_class attr_class = get_attr_itanium_class (insn); 7535 7536 if (attr_class == ITANIUM_CLASS_CHK_A 7537 || attr_class == ITANIUM_CLASS_CHK_S_I 7538 || attr_class == ITANIUM_CLASS_CHK_S_F) 7539 /* Process chk. */ 7540 mode_rtx = reg; 7541 else 7542 return -1; 7543 } 7544 7545 mode_no = ia64_mode_to_int (GET_MODE (mode_rtx)); 7546 7547 if (mode_no == SPEC_MODE_INVALID) 7548 return -1; 7549 7550 extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx)); 7551 7552 if (extend_p) 7553 { 7554 if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no 7555 && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)) 7556 return -1; 7557 7558 mode_no += SPEC_GEN_EXTEND_OFFSET; 7559 } 7560 7561 return mode_no; 7562} 7563 7564/* If X is an unspec part of a speculative load, return its code. 7565 Return -1 otherwise. */ 7566static int 7567get_spec_unspec_code (const_rtx x) 7568{ 7569 if (GET_CODE (x) != UNSPEC) 7570 return -1; 7571 7572 { 7573 int code; 7574 7575 code = XINT (x, 1); 7576 7577 switch (code) 7578 { 7579 case UNSPEC_LDA: 7580 case UNSPEC_LDS: 7581 case UNSPEC_LDS_A: 7582 case UNSPEC_LDSA: 7583 return code; 7584 7585 default: 7586 return -1; 7587 } 7588 } 7589} 7590 7591/* Implement skip_rtx_p hook. */ 7592static bool 7593ia64_skip_rtx_p (const_rtx x) 7594{ 7595 return get_spec_unspec_code (x) != -1; 7596} 7597 7598/* If INSN is a speculative load, return its UNSPEC code. 7599 Return -1 otherwise. */ 7600static int 7601get_insn_spec_code (const_rtx insn) 7602{ 7603 rtx pat, reg, mem; 7604 7605 pat = PATTERN (insn); 7606 7607 if (GET_CODE (pat) == COND_EXEC) 7608 pat = COND_EXEC_CODE (pat); 7609 7610 if (GET_CODE (pat) != SET) 7611 return -1; 7612 7613 reg = SET_DEST (pat); 7614 if (!REG_P (reg)) 7615 return -1; 7616 7617 mem = SET_SRC (pat); 7618 if (GET_CODE (mem) == ZERO_EXTEND) 7619 mem = XEXP (mem, 0); 7620 7621 return get_spec_unspec_code (mem); 7622} 7623 7624/* If INSN is a speculative load, return a ds with the speculation types. 7625 Otherwise [if INSN is a normal instruction] return 0. */ 7626static ds_t 7627ia64_get_insn_spec_ds (rtx insn) 7628{ 7629 int code = get_insn_spec_code (insn); 7630 7631 switch (code) 7632 { 7633 case UNSPEC_LDA: 7634 return BEGIN_DATA; 7635 7636 case UNSPEC_LDS: 7637 case UNSPEC_LDS_A: 7638 return BEGIN_CONTROL; 7639 7640 case UNSPEC_LDSA: 7641 return BEGIN_DATA | BEGIN_CONTROL; 7642 7643 default: 7644 return 0; 7645 } 7646} 7647 7648/* If INSN is a speculative load return a ds with the speculation types that 7649 will be checked. 7650 Otherwise [if INSN is a normal instruction] return 0. */ 7651static ds_t 7652ia64_get_insn_checked_ds (rtx insn) 7653{ 7654 int code = get_insn_spec_code (insn); 7655 7656 switch (code) 7657 { 7658 case UNSPEC_LDA: 7659 return BEGIN_DATA | BEGIN_CONTROL; 7660 7661 case UNSPEC_LDS: 7662 return BEGIN_CONTROL; 7663 7664 case UNSPEC_LDS_A: 7665 case UNSPEC_LDSA: 7666 return BEGIN_DATA | BEGIN_CONTROL; 7667 7668 default: 7669 return 0; 7670 } 7671} 7672 7673/* If GEN_P is true, calculate the index of needed speculation check and return 7674 speculative pattern for INSN with speculative mode TS, machine mode 7675 MODE_NO and with ZERO_EXTEND (if EXTEND_P is true). 7676 If GEN_P is false, just calculate the index of needed speculation check. */ 7677static rtx 7678ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no) 7679{ 7680 rtx pat, new_pat; 7681 gen_func_t gen_load; 7682 7683 gen_load = get_spec_load_gen_function (ts, mode_no); 7684 7685 new_pat = gen_load (copy_rtx (recog_data.operand[0]), 7686 copy_rtx (recog_data.operand[1])); 7687 7688 pat = PATTERN (insn); 7689 if (GET_CODE (pat) == COND_EXEC) 7690 new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 7691 new_pat); 7692 7693 return new_pat; 7694} 7695 7696static bool 7697insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED, 7698 ds_t ds ATTRIBUTE_UNUSED) 7699{ 7700 return false; 7701} 7702 7703/* Implement targetm.sched.speculate_insn hook. 7704 Check if the INSN can be TS speculative. 7705 If 'no' - return -1. 7706 If 'yes' - generate speculative pattern in the NEW_PAT and return 1. 7707 If current pattern of the INSN already provides TS speculation, 7708 return 0. */ 7709static int 7710ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat) 7711{ 7712 int mode_no; 7713 int res; 7714 7715 gcc_assert (!(ts & ~SPECULATIVE)); 7716 7717 if (ia64_spec_check_p (insn)) 7718 return -1; 7719 7720 if ((ts & BE_IN_SPEC) 7721 && !insn_can_be_in_speculative_p (insn, ts)) 7722 return -1; 7723 7724 mode_no = get_mode_no_for_insn (insn); 7725 7726 if (mode_no != SPEC_MODE_INVALID) 7727 { 7728 if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts)) 7729 res = 0; 7730 else 7731 { 7732 res = 1; 7733 *new_pat = ia64_gen_spec_load (insn, ts, mode_no); 7734 } 7735 } 7736 else 7737 res = -1; 7738 7739 return res; 7740} 7741 7742/* Return a function that will generate a check for speculation TS with mode 7743 MODE_NO. 7744 If simple check is needed, pass true for SIMPLE_CHECK_P. 7745 If clearing check is needed, pass true for CLEARING_CHECK_P. */ 7746static gen_func_t 7747get_spec_check_gen_function (ds_t ts, int mode_no, 7748 bool simple_check_p, bool clearing_check_p) 7749{ 7750 static gen_func_t gen_ld_c_clr[] = { 7751 gen_movbi_clr, 7752 gen_movqi_clr, 7753 gen_movhi_clr, 7754 gen_movsi_clr, 7755 gen_movdi_clr, 7756 gen_movsf_clr, 7757 gen_movdf_clr, 7758 gen_movxf_clr, 7759 gen_movti_clr, 7760 gen_zero_extendqidi2_clr, 7761 gen_zero_extendhidi2_clr, 7762 gen_zero_extendsidi2_clr, 7763 }; 7764 static gen_func_t gen_ld_c_nc[] = { 7765 gen_movbi_nc, 7766 gen_movqi_nc, 7767 gen_movhi_nc, 7768 gen_movsi_nc, 7769 gen_movdi_nc, 7770 gen_movsf_nc, 7771 gen_movdf_nc, 7772 gen_movxf_nc, 7773 gen_movti_nc, 7774 gen_zero_extendqidi2_nc, 7775 gen_zero_extendhidi2_nc, 7776 gen_zero_extendsidi2_nc, 7777 }; 7778 static gen_func_t gen_chk_a_clr[] = { 7779 gen_advanced_load_check_clr_bi, 7780 gen_advanced_load_check_clr_qi, 7781 gen_advanced_load_check_clr_hi, 7782 gen_advanced_load_check_clr_si, 7783 gen_advanced_load_check_clr_di, 7784 gen_advanced_load_check_clr_sf, 7785 gen_advanced_load_check_clr_df, 7786 gen_advanced_load_check_clr_xf, 7787 gen_advanced_load_check_clr_ti, 7788 gen_advanced_load_check_clr_di, 7789 gen_advanced_load_check_clr_di, 7790 gen_advanced_load_check_clr_di, 7791 }; 7792 static gen_func_t gen_chk_a_nc[] = { 7793 gen_advanced_load_check_nc_bi, 7794 gen_advanced_load_check_nc_qi, 7795 gen_advanced_load_check_nc_hi, 7796 gen_advanced_load_check_nc_si, 7797 gen_advanced_load_check_nc_di, 7798 gen_advanced_load_check_nc_sf, 7799 gen_advanced_load_check_nc_df, 7800 gen_advanced_load_check_nc_xf, 7801 gen_advanced_load_check_nc_ti, 7802 gen_advanced_load_check_nc_di, 7803 gen_advanced_load_check_nc_di, 7804 gen_advanced_load_check_nc_di, 7805 }; 7806 static gen_func_t gen_chk_s[] = { 7807 gen_speculation_check_bi, 7808 gen_speculation_check_qi, 7809 gen_speculation_check_hi, 7810 gen_speculation_check_si, 7811 gen_speculation_check_di, 7812 gen_speculation_check_sf, 7813 gen_speculation_check_df, 7814 gen_speculation_check_xf, 7815 gen_speculation_check_ti, 7816 gen_speculation_check_di, 7817 gen_speculation_check_di, 7818 gen_speculation_check_di, 7819 }; 7820 7821 gen_func_t *gen_check; 7822 7823 if (ts & BEGIN_DATA) 7824 { 7825 /* We don't need recovery because even if this is ld.sa 7826 ALAT entry will be allocated only if NAT bit is set to zero. 7827 So it is enough to use ld.c here. */ 7828 7829 if (simple_check_p) 7830 { 7831 gcc_assert (mflag_sched_spec_ldc); 7832 7833 if (clearing_check_p) 7834 gen_check = gen_ld_c_clr; 7835 else 7836 gen_check = gen_ld_c_nc; 7837 } 7838 else 7839 { 7840 if (clearing_check_p) 7841 gen_check = gen_chk_a_clr; 7842 else 7843 gen_check = gen_chk_a_nc; 7844 } 7845 } 7846 else if (ts & BEGIN_CONTROL) 7847 { 7848 if (simple_check_p) 7849 /* We might want to use ld.sa -> ld.c instead of 7850 ld.s -> chk.s. */ 7851 { 7852 gcc_assert (!ia64_needs_block_p (ts)); 7853 7854 if (clearing_check_p) 7855 gen_check = gen_ld_c_clr; 7856 else 7857 gen_check = gen_ld_c_nc; 7858 } 7859 else 7860 { 7861 gen_check = gen_chk_s; 7862 } 7863 } 7864 else 7865 gcc_unreachable (); 7866 7867 gcc_assert (mode_no >= 0); 7868 return gen_check[mode_no]; 7869} 7870 7871/* Return nonzero, if INSN needs branchy recovery check. */ 7872static bool 7873ia64_needs_block_p (ds_t ts) 7874{ 7875 if (ts & BEGIN_DATA) 7876 return !mflag_sched_spec_ldc; 7877 7878 gcc_assert ((ts & BEGIN_CONTROL) != 0); 7879 7880 return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc); 7881} 7882 7883/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN. 7884 If (LABEL != 0 || MUTATE_P), generate branchy recovery check. 7885 Otherwise, generate a simple check. */ 7886static rtx 7887ia64_gen_spec_check (rtx insn, rtx label, ds_t ds) 7888{ 7889 rtx op1, pat, check_pat; 7890 gen_func_t gen_check; 7891 int mode_no; 7892 7893 mode_no = get_mode_no_for_insn (insn); 7894 gcc_assert (mode_no >= 0); 7895 7896 if (label) 7897 op1 = label; 7898 else 7899 { 7900 gcc_assert (!ia64_needs_block_p (ds)); 7901 op1 = copy_rtx (recog_data.operand[1]); 7902 } 7903 7904 gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX, 7905 true); 7906 7907 check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1); 7908 7909 pat = PATTERN (insn); 7910 if (GET_CODE (pat) == COND_EXEC) 7911 check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)), 7912 check_pat); 7913 7914 return check_pat; 7915} 7916 7917/* Return nonzero, if X is branchy recovery check. */ 7918static int 7919ia64_spec_check_p (rtx x) 7920{ 7921 x = PATTERN (x); 7922 if (GET_CODE (x) == COND_EXEC) 7923 x = COND_EXEC_CODE (x); 7924 if (GET_CODE (x) == SET) 7925 return ia64_spec_check_src_p (SET_SRC (x)); 7926 return 0; 7927} 7928 7929/* Return nonzero, if SRC belongs to recovery check. */ 7930static int 7931ia64_spec_check_src_p (rtx src) 7932{ 7933 if (GET_CODE (src) == IF_THEN_ELSE) 7934 { 7935 rtx t; 7936 7937 t = XEXP (src, 0); 7938 if (GET_CODE (t) == NE) 7939 { 7940 t = XEXP (t, 0); 7941 7942 if (GET_CODE (t) == UNSPEC) 7943 { 7944 int code; 7945 7946 code = XINT (t, 1); 7947 7948 if (code == UNSPEC_LDCCLR 7949 || code == UNSPEC_LDCNC 7950 || code == UNSPEC_CHKACLR 7951 || code == UNSPEC_CHKANC 7952 || code == UNSPEC_CHKS) 7953 { 7954 gcc_assert (code != 0); 7955 return code; 7956 } 7957 } 7958 } 7959 } 7960 return 0; 7961} 7962 7963 7964/* The following page contains abstract data `bundle states' which are 7965 used for bundling insns (inserting nops and template generation). */ 7966 7967/* The following describes state of insn bundling. */ 7968 7969struct bundle_state 7970{ 7971 /* Unique bundle state number to identify them in the debugging 7972 output */ 7973 int unique_num; 7974 rtx insn; /* corresponding insn, NULL for the 1st and the last state */ 7975 /* number nops before and after the insn */ 7976 short before_nops_num, after_nops_num; 7977 int insn_num; /* insn number (0 - for initial state, 1 - for the 1st 7978 insn */ 7979 int cost; /* cost of the state in cycles */ 7980 int accumulated_insns_num; /* number of all previous insns including 7981 nops. L is considered as 2 insns */ 7982 int branch_deviation; /* deviation of previous branches from 3rd slots */ 7983 int middle_bundle_stops; /* number of stop bits in the middle of bundles */ 7984 struct bundle_state *next; /* next state with the same insn_num */ 7985 struct bundle_state *originator; /* originator (previous insn state) */ 7986 /* All bundle states are in the following chain. */ 7987 struct bundle_state *allocated_states_chain; 7988 /* The DFA State after issuing the insn and the nops. */ 7989 state_t dfa_state; 7990}; 7991 7992/* The following is map insn number to the corresponding bundle state. */ 7993 7994static struct bundle_state **index_to_bundle_states; 7995 7996/* The unique number of next bundle state. */ 7997 7998static int bundle_states_num; 7999 8000/* All allocated bundle states are in the following chain. */ 8001 8002static struct bundle_state *allocated_bundle_states_chain; 8003 8004/* All allocated but not used bundle states are in the following 8005 chain. */ 8006 8007static struct bundle_state *free_bundle_state_chain; 8008 8009 8010/* The following function returns a free bundle state. */ 8011 8012static struct bundle_state * 8013get_free_bundle_state (void) 8014{ 8015 struct bundle_state *result; 8016 8017 if (free_bundle_state_chain != NULL) 8018 { 8019 result = free_bundle_state_chain; 8020 free_bundle_state_chain = result->next; 8021 } 8022 else 8023 { 8024 result = XNEW (struct bundle_state); 8025 result->dfa_state = xmalloc (dfa_state_size); 8026 result->allocated_states_chain = allocated_bundle_states_chain; 8027 allocated_bundle_states_chain = result; 8028 } 8029 result->unique_num = bundle_states_num++; 8030 return result; 8031 8032} 8033 8034/* The following function frees given bundle state. */ 8035 8036static void 8037free_bundle_state (struct bundle_state *state) 8038{ 8039 state->next = free_bundle_state_chain; 8040 free_bundle_state_chain = state; 8041} 8042 8043/* Start work with abstract data `bundle states'. */ 8044 8045static void 8046initiate_bundle_states (void) 8047{ 8048 bundle_states_num = 0; 8049 free_bundle_state_chain = NULL; 8050 allocated_bundle_states_chain = NULL; 8051} 8052 8053/* Finish work with abstract data `bundle states'. */ 8054 8055static void 8056finish_bundle_states (void) 8057{ 8058 struct bundle_state *curr_state, *next_state; 8059 8060 for (curr_state = allocated_bundle_states_chain; 8061 curr_state != NULL; 8062 curr_state = next_state) 8063 { 8064 next_state = curr_state->allocated_states_chain; 8065 free (curr_state->dfa_state); 8066 free (curr_state); 8067 } 8068} 8069 8070/* Hash table of the bundle states. The key is dfa_state and insn_num 8071 of the bundle states. */ 8072 8073static htab_t bundle_state_table; 8074 8075/* The function returns hash of BUNDLE_STATE. */ 8076 8077static unsigned 8078bundle_state_hash (const void *bundle_state) 8079{ 8080 const struct bundle_state *const state 8081 = (const struct bundle_state *) bundle_state; 8082 unsigned result, i; 8083 8084 for (result = i = 0; i < dfa_state_size; i++) 8085 result += (((unsigned char *) state->dfa_state) [i] 8086 << ((i % CHAR_BIT) * 3 + CHAR_BIT)); 8087 return result + state->insn_num; 8088} 8089 8090/* The function returns nonzero if the bundle state keys are equal. */ 8091 8092static int 8093bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2) 8094{ 8095 const struct bundle_state *const state1 8096 = (const struct bundle_state *) bundle_state_1; 8097 const struct bundle_state *const state2 8098 = (const struct bundle_state *) bundle_state_2; 8099 8100 return (state1->insn_num == state2->insn_num 8101 && memcmp (state1->dfa_state, state2->dfa_state, 8102 dfa_state_size) == 0); 8103} 8104 8105/* The function inserts the BUNDLE_STATE into the hash table. The 8106 function returns nonzero if the bundle has been inserted into the 8107 table. The table contains the best bundle state with given key. */ 8108 8109static int 8110insert_bundle_state (struct bundle_state *bundle_state) 8111{ 8112 void **entry_ptr; 8113 8114 entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT); 8115 if (*entry_ptr == NULL) 8116 { 8117 bundle_state->next = index_to_bundle_states [bundle_state->insn_num]; 8118 index_to_bundle_states [bundle_state->insn_num] = bundle_state; 8119 *entry_ptr = (void *) bundle_state; 8120 return TRUE; 8121 } 8122 else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost 8123 || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost 8124 && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num 8125 > bundle_state->accumulated_insns_num 8126 || (((struct bundle_state *) 8127 *entry_ptr)->accumulated_insns_num 8128 == bundle_state->accumulated_insns_num 8129 && (((struct bundle_state *) 8130 *entry_ptr)->branch_deviation 8131 > bundle_state->branch_deviation 8132 || (((struct bundle_state *) 8133 *entry_ptr)->branch_deviation 8134 == bundle_state->branch_deviation 8135 && ((struct bundle_state *) 8136 *entry_ptr)->middle_bundle_stops 8137 > bundle_state->middle_bundle_stops)))))) 8138 8139 { 8140 struct bundle_state temp; 8141 8142 temp = *(struct bundle_state *) *entry_ptr; 8143 *(struct bundle_state *) *entry_ptr = *bundle_state; 8144 ((struct bundle_state *) *entry_ptr)->next = temp.next; 8145 *bundle_state = temp; 8146 } 8147 return FALSE; 8148} 8149 8150/* Start work with the hash table. */ 8151 8152static void 8153initiate_bundle_state_table (void) 8154{ 8155 bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p, 8156 (htab_del) 0); 8157} 8158 8159/* Finish work with the hash table. */ 8160 8161static void 8162finish_bundle_state_table (void) 8163{ 8164 htab_delete (bundle_state_table); 8165} 8166 8167 8168 8169/* The following variable is a insn `nop' used to check bundle states 8170 with different number of inserted nops. */ 8171 8172static rtx ia64_nop; 8173 8174/* The following function tries to issue NOPS_NUM nops for the current 8175 state without advancing processor cycle. If it failed, the 8176 function returns FALSE and frees the current state. */ 8177 8178static int 8179try_issue_nops (struct bundle_state *curr_state, int nops_num) 8180{ 8181 int i; 8182 8183 for (i = 0; i < nops_num; i++) 8184 if (state_transition (curr_state->dfa_state, ia64_nop) >= 0) 8185 { 8186 free_bundle_state (curr_state); 8187 return FALSE; 8188 } 8189 return TRUE; 8190} 8191 8192/* The following function tries to issue INSN for the current 8193 state without advancing processor cycle. If it failed, the 8194 function returns FALSE and frees the current state. */ 8195 8196static int 8197try_issue_insn (struct bundle_state *curr_state, rtx insn) 8198{ 8199 if (insn && state_transition (curr_state->dfa_state, insn) >= 0) 8200 { 8201 free_bundle_state (curr_state); 8202 return FALSE; 8203 } 8204 return TRUE; 8205} 8206 8207/* The following function tries to issue BEFORE_NOPS_NUM nops and INSN 8208 starting with ORIGINATOR without advancing processor cycle. If 8209 TRY_BUNDLE_END_P is TRUE, the function also/only (if 8210 ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle. 8211 If it was successful, the function creates new bundle state and 8212 insert into the hash table and into `index_to_bundle_states'. */ 8213 8214static void 8215issue_nops_and_insn (struct bundle_state *originator, int before_nops_num, 8216 rtx insn, int try_bundle_end_p, int only_bundle_end_p) 8217{ 8218 struct bundle_state *curr_state; 8219 8220 curr_state = get_free_bundle_state (); 8221 memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size); 8222 curr_state->insn = insn; 8223 curr_state->insn_num = originator->insn_num + 1; 8224 curr_state->cost = originator->cost; 8225 curr_state->originator = originator; 8226 curr_state->before_nops_num = before_nops_num; 8227 curr_state->after_nops_num = 0; 8228 curr_state->accumulated_insns_num 8229 = originator->accumulated_insns_num + before_nops_num; 8230 curr_state->branch_deviation = originator->branch_deviation; 8231 curr_state->middle_bundle_stops = originator->middle_bundle_stops; 8232 gcc_assert (insn); 8233 if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier) 8234 { 8235 gcc_assert (GET_MODE (insn) != TImode); 8236 if (!try_issue_nops (curr_state, before_nops_num)) 8237 return; 8238 if (!try_issue_insn (curr_state, insn)) 8239 return; 8240 memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size); 8241 if (curr_state->accumulated_insns_num % 3 != 0) 8242 curr_state->middle_bundle_stops++; 8243 if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0 8244 && curr_state->accumulated_insns_num % 3 != 0) 8245 { 8246 free_bundle_state (curr_state); 8247 return; 8248 } 8249 } 8250 else if (GET_MODE (insn) != TImode) 8251 { 8252 if (!try_issue_nops (curr_state, before_nops_num)) 8253 return; 8254 if (!try_issue_insn (curr_state, insn)) 8255 return; 8256 curr_state->accumulated_insns_num++; 8257 gcc_assert (GET_CODE (PATTERN (insn)) != ASM_INPUT 8258 && asm_noperands (PATTERN (insn)) < 0); 8259 8260 if (ia64_safe_type (insn) == TYPE_L) 8261 curr_state->accumulated_insns_num++; 8262 } 8263 else 8264 { 8265 /* If this is an insn that must be first in a group, then don't allow 8266 nops to be emitted before it. Currently, alloc is the only such 8267 supported instruction. */ 8268 /* ??? The bundling automatons should handle this for us, but they do 8269 not yet have support for the first_insn attribute. */ 8270 if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES) 8271 { 8272 free_bundle_state (curr_state); 8273 return; 8274 } 8275 8276 state_transition (curr_state->dfa_state, dfa_pre_cycle_insn); 8277 state_transition (curr_state->dfa_state, NULL); 8278 curr_state->cost++; 8279 if (!try_issue_nops (curr_state, before_nops_num)) 8280 return; 8281 if (!try_issue_insn (curr_state, insn)) 8282 return; 8283 curr_state->accumulated_insns_num++; 8284 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 8285 || asm_noperands (PATTERN (insn)) >= 0) 8286 { 8287 /* Finish bundle containing asm insn. */ 8288 curr_state->after_nops_num 8289 = 3 - curr_state->accumulated_insns_num % 3; 8290 curr_state->accumulated_insns_num 8291 += 3 - curr_state->accumulated_insns_num % 3; 8292 } 8293 else if (ia64_safe_type (insn) == TYPE_L) 8294 curr_state->accumulated_insns_num++; 8295 } 8296 if (ia64_safe_type (insn) == TYPE_B) 8297 curr_state->branch_deviation 8298 += 2 - (curr_state->accumulated_insns_num - 1) % 3; 8299 if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0) 8300 { 8301 if (!only_bundle_end_p && insert_bundle_state (curr_state)) 8302 { 8303 state_t dfa_state; 8304 struct bundle_state *curr_state1; 8305 struct bundle_state *allocated_states_chain; 8306 8307 curr_state1 = get_free_bundle_state (); 8308 dfa_state = curr_state1->dfa_state; 8309 allocated_states_chain = curr_state1->allocated_states_chain; 8310 *curr_state1 = *curr_state; 8311 curr_state1->dfa_state = dfa_state; 8312 curr_state1->allocated_states_chain = allocated_states_chain; 8313 memcpy (curr_state1->dfa_state, curr_state->dfa_state, 8314 dfa_state_size); 8315 curr_state = curr_state1; 8316 } 8317 if (!try_issue_nops (curr_state, 8318 3 - curr_state->accumulated_insns_num % 3)) 8319 return; 8320 curr_state->after_nops_num 8321 = 3 - curr_state->accumulated_insns_num % 3; 8322 curr_state->accumulated_insns_num 8323 += 3 - curr_state->accumulated_insns_num % 3; 8324 } 8325 if (!insert_bundle_state (curr_state)) 8326 free_bundle_state (curr_state); 8327 return; 8328} 8329 8330/* The following function returns position in the two window bundle 8331 for given STATE. */ 8332 8333static int 8334get_max_pos (state_t state) 8335{ 8336 if (cpu_unit_reservation_p (state, pos_6)) 8337 return 6; 8338 else if (cpu_unit_reservation_p (state, pos_5)) 8339 return 5; 8340 else if (cpu_unit_reservation_p (state, pos_4)) 8341 return 4; 8342 else if (cpu_unit_reservation_p (state, pos_3)) 8343 return 3; 8344 else if (cpu_unit_reservation_p (state, pos_2)) 8345 return 2; 8346 else if (cpu_unit_reservation_p (state, pos_1)) 8347 return 1; 8348 else 8349 return 0; 8350} 8351 8352/* The function returns code of a possible template for given position 8353 and state. The function should be called only with 2 values of 8354 position equal to 3 or 6. We avoid generating F NOPs by putting 8355 templates containing F insns at the end of the template search 8356 because undocumented anomaly in McKinley derived cores which can 8357 cause stalls if an F-unit insn (including a NOP) is issued within a 8358 six-cycle window after reading certain application registers (such 8359 as ar.bsp). Furthermore, power-considerations also argue against 8360 the use of F-unit instructions unless they're really needed. */ 8361 8362static int 8363get_template (state_t state, int pos) 8364{ 8365 switch (pos) 8366 { 8367 case 3: 8368 if (cpu_unit_reservation_p (state, _0mmi_)) 8369 return 1; 8370 else if (cpu_unit_reservation_p (state, _0mii_)) 8371 return 0; 8372 else if (cpu_unit_reservation_p (state, _0mmb_)) 8373 return 7; 8374 else if (cpu_unit_reservation_p (state, _0mib_)) 8375 return 6; 8376 else if (cpu_unit_reservation_p (state, _0mbb_)) 8377 return 5; 8378 else if (cpu_unit_reservation_p (state, _0bbb_)) 8379 return 4; 8380 else if (cpu_unit_reservation_p (state, _0mmf_)) 8381 return 3; 8382 else if (cpu_unit_reservation_p (state, _0mfi_)) 8383 return 2; 8384 else if (cpu_unit_reservation_p (state, _0mfb_)) 8385 return 8; 8386 else if (cpu_unit_reservation_p (state, _0mlx_)) 8387 return 9; 8388 else 8389 gcc_unreachable (); 8390 case 6: 8391 if (cpu_unit_reservation_p (state, _1mmi_)) 8392 return 1; 8393 else if (cpu_unit_reservation_p (state, _1mii_)) 8394 return 0; 8395 else if (cpu_unit_reservation_p (state, _1mmb_)) 8396 return 7; 8397 else if (cpu_unit_reservation_p (state, _1mib_)) 8398 return 6; 8399 else if (cpu_unit_reservation_p (state, _1mbb_)) 8400 return 5; 8401 else if (cpu_unit_reservation_p (state, _1bbb_)) 8402 return 4; 8403 else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_)) 8404 return 3; 8405 else if (cpu_unit_reservation_p (state, _1mfi_)) 8406 return 2; 8407 else if (cpu_unit_reservation_p (state, _1mfb_)) 8408 return 8; 8409 else if (cpu_unit_reservation_p (state, _1mlx_)) 8410 return 9; 8411 else 8412 gcc_unreachable (); 8413 default: 8414 gcc_unreachable (); 8415 } 8416} 8417 8418/* True when INSN is important for bundling. */ 8419static bool 8420important_for_bundling_p (rtx insn) 8421{ 8422 return (INSN_P (insn) 8423 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE 8424 && GET_CODE (PATTERN (insn)) != USE 8425 && GET_CODE (PATTERN (insn)) != CLOBBER); 8426} 8427 8428/* The following function returns an insn important for insn bundling 8429 followed by INSN and before TAIL. */ 8430 8431static rtx 8432get_next_important_insn (rtx insn, rtx tail) 8433{ 8434 for (; insn && insn != tail; insn = NEXT_INSN (insn)) 8435 if (important_for_bundling_p (insn)) 8436 return insn; 8437 return NULL_RTX; 8438} 8439 8440/* Add a bundle selector TEMPLATE0 before INSN. */ 8441 8442static void 8443ia64_add_bundle_selector_before (int template0, rtx insn) 8444{ 8445 rtx b = gen_bundle_selector (GEN_INT (template0)); 8446 8447 ia64_emit_insn_before (b, insn); 8448#if NR_BUNDLES == 10 8449 if ((template0 == 4 || template0 == 5) 8450 && (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))) 8451 { 8452 int i; 8453 rtx note = NULL_RTX; 8454 8455 /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the 8456 first or second slot. If it is and has REG_EH_NOTE set, copy it 8457 to following nops, as br.call sets rp to the address of following 8458 bundle and therefore an EH region end must be on a bundle 8459 boundary. */ 8460 insn = PREV_INSN (insn); 8461 for (i = 0; i < 3; i++) 8462 { 8463 do 8464 insn = next_active_insn (insn); 8465 while (GET_CODE (insn) == INSN 8466 && get_attr_empty (insn) == EMPTY_YES); 8467 if (GET_CODE (insn) == CALL_INSN) 8468 note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); 8469 else if (note) 8470 { 8471 int code; 8472 8473 gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop 8474 || code == CODE_FOR_nop_b); 8475 if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) 8476 note = NULL_RTX; 8477 else 8478 add_reg_note (insn, REG_EH_REGION, XEXP (note, 0)); 8479 } 8480 } 8481 } 8482#endif 8483} 8484 8485/* The following function does insn bundling. Bundling means 8486 inserting templates and nop insns to fit insn groups into permitted 8487 templates. Instruction scheduling uses NDFA (non-deterministic 8488 finite automata) encoding informations about the templates and the 8489 inserted nops. Nondeterminism of the automata permits follows 8490 all possible insn sequences very fast. 8491 8492 Unfortunately it is not possible to get information about inserting 8493 nop insns and used templates from the automata states. The 8494 automata only says that we can issue an insn possibly inserting 8495 some nops before it and using some template. Therefore insn 8496 bundling in this function is implemented by using DFA 8497 (deterministic finite automata). We follow all possible insn 8498 sequences by inserting 0-2 nops (that is what the NDFA describe for 8499 insn scheduling) before/after each insn being bundled. We know the 8500 start of simulated processor cycle from insn scheduling (insn 8501 starting a new cycle has TImode). 8502 8503 Simple implementation of insn bundling would create enormous 8504 number of possible insn sequences satisfying information about new 8505 cycle ticks taken from the insn scheduling. To make the algorithm 8506 practical we use dynamic programming. Each decision (about 8507 inserting nops and implicitly about previous decisions) is described 8508 by structure bundle_state (see above). If we generate the same 8509 bundle state (key is automaton state after issuing the insns and 8510 nops for it), we reuse already generated one. As consequence we 8511 reject some decisions which cannot improve the solution and 8512 reduce memory for the algorithm. 8513 8514 When we reach the end of EBB (extended basic block), we choose the 8515 best sequence and then, moving back in EBB, insert templates for 8516 the best alternative. The templates are taken from querying 8517 automaton state for each insn in chosen bundle states. 8518 8519 So the algorithm makes two (forward and backward) passes through 8520 EBB. */ 8521 8522static void 8523bundling (FILE *dump, int verbose, rtx prev_head_insn, rtx tail) 8524{ 8525 struct bundle_state *curr_state, *next_state, *best_state; 8526 rtx insn, next_insn; 8527 int insn_num; 8528 int i, bundle_end_p, only_bundle_end_p, asm_p; 8529 int pos = 0, max_pos, template0, template1; 8530 rtx b; 8531 rtx nop; 8532 enum attr_type type; 8533 8534 insn_num = 0; 8535 /* Count insns in the EBB. */ 8536 for (insn = NEXT_INSN (prev_head_insn); 8537 insn && insn != tail; 8538 insn = NEXT_INSN (insn)) 8539 if (INSN_P (insn)) 8540 insn_num++; 8541 if (insn_num == 0) 8542 return; 8543 bundling_p = 1; 8544 dfa_clean_insn_cache (); 8545 initiate_bundle_state_table (); 8546 index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2); 8547 /* First (forward) pass -- generation of bundle states. */ 8548 curr_state = get_free_bundle_state (); 8549 curr_state->insn = NULL; 8550 curr_state->before_nops_num = 0; 8551 curr_state->after_nops_num = 0; 8552 curr_state->insn_num = 0; 8553 curr_state->cost = 0; 8554 curr_state->accumulated_insns_num = 0; 8555 curr_state->branch_deviation = 0; 8556 curr_state->middle_bundle_stops = 0; 8557 curr_state->next = NULL; 8558 curr_state->originator = NULL; 8559 state_reset (curr_state->dfa_state); 8560 index_to_bundle_states [0] = curr_state; 8561 insn_num = 0; 8562 /* Shift cycle mark if it is put on insn which could be ignored. */ 8563 for (insn = NEXT_INSN (prev_head_insn); 8564 insn != tail; 8565 insn = NEXT_INSN (insn)) 8566 if (INSN_P (insn) 8567 && (ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IGNORE 8568 || GET_CODE (PATTERN (insn)) == USE 8569 || GET_CODE (PATTERN (insn)) == CLOBBER) 8570 && GET_MODE (insn) == TImode) 8571 { 8572 PUT_MODE (insn, VOIDmode); 8573 for (next_insn = NEXT_INSN (insn); 8574 next_insn != tail; 8575 next_insn = NEXT_INSN (next_insn)) 8576 if (INSN_P (next_insn) 8577 && ia64_safe_itanium_class (next_insn) != ITANIUM_CLASS_IGNORE 8578 && GET_CODE (PATTERN (next_insn)) != USE 8579 && GET_CODE (PATTERN (next_insn)) != CLOBBER 8580 && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier) 8581 { 8582 PUT_MODE (next_insn, TImode); 8583 break; 8584 } 8585 } 8586 /* Forward pass: generation of bundle states. */ 8587 for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail); 8588 insn != NULL_RTX; 8589 insn = next_insn) 8590 { 8591 gcc_assert (INSN_P (insn) 8592 && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE 8593 && GET_CODE (PATTERN (insn)) != USE 8594 && GET_CODE (PATTERN (insn)) != CLOBBER); 8595 type = ia64_safe_type (insn); 8596 next_insn = get_next_important_insn (NEXT_INSN (insn), tail); 8597 insn_num++; 8598 index_to_bundle_states [insn_num] = NULL; 8599 for (curr_state = index_to_bundle_states [insn_num - 1]; 8600 curr_state != NULL; 8601 curr_state = next_state) 8602 { 8603 pos = curr_state->accumulated_insns_num % 3; 8604 next_state = curr_state->next; 8605 /* We must fill up the current bundle in order to start a 8606 subsequent asm insn in a new bundle. Asm insn is always 8607 placed in a separate bundle. */ 8608 only_bundle_end_p 8609 = (next_insn != NULL_RTX 8610 && INSN_CODE (insn) == CODE_FOR_insn_group_barrier 8611 && ia64_safe_type (next_insn) == TYPE_UNKNOWN); 8612 /* We may fill up the current bundle if it is the cycle end 8613 without a group barrier. */ 8614 bundle_end_p 8615 = (only_bundle_end_p || next_insn == NULL_RTX 8616 || (GET_MODE (next_insn) == TImode 8617 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier)); 8618 if (type == TYPE_F || type == TYPE_B || type == TYPE_L 8619 || type == TYPE_S) 8620 issue_nops_and_insn (curr_state, 2, insn, bundle_end_p, 8621 only_bundle_end_p); 8622 issue_nops_and_insn (curr_state, 1, insn, bundle_end_p, 8623 only_bundle_end_p); 8624 issue_nops_and_insn (curr_state, 0, insn, bundle_end_p, 8625 only_bundle_end_p); 8626 } 8627 gcc_assert (index_to_bundle_states [insn_num]); 8628 for (curr_state = index_to_bundle_states [insn_num]; 8629 curr_state != NULL; 8630 curr_state = curr_state->next) 8631 if (verbose >= 2 && dump) 8632 { 8633 /* This structure is taken from generated code of the 8634 pipeline hazard recognizer (see file insn-attrtab.c). 8635 Please don't forget to change the structure if a new 8636 automaton is added to .md file. */ 8637 struct DFA_chip 8638 { 8639 unsigned short one_automaton_state; 8640 unsigned short oneb_automaton_state; 8641 unsigned short two_automaton_state; 8642 unsigned short twob_automaton_state; 8643 }; 8644 8645 fprintf 8646 (dump, 8647 "// Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n", 8648 curr_state->unique_num, 8649 (curr_state->originator == NULL 8650 ? -1 : curr_state->originator->unique_num), 8651 curr_state->cost, 8652 curr_state->before_nops_num, curr_state->after_nops_num, 8653 curr_state->accumulated_insns_num, curr_state->branch_deviation, 8654 curr_state->middle_bundle_stops, 8655 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 8656 INSN_UID (insn)); 8657 } 8658 } 8659 8660 /* We should find a solution because the 2nd insn scheduling has 8661 found one. */ 8662 gcc_assert (index_to_bundle_states [insn_num]); 8663 /* Find a state corresponding to the best insn sequence. */ 8664 best_state = NULL; 8665 for (curr_state = index_to_bundle_states [insn_num]; 8666 curr_state != NULL; 8667 curr_state = curr_state->next) 8668 /* We are just looking at the states with fully filled up last 8669 bundle. The first we prefer insn sequences with minimal cost 8670 then with minimal inserted nops and finally with branch insns 8671 placed in the 3rd slots. */ 8672 if (curr_state->accumulated_insns_num % 3 == 0 8673 && (best_state == NULL || best_state->cost > curr_state->cost 8674 || (best_state->cost == curr_state->cost 8675 && (curr_state->accumulated_insns_num 8676 < best_state->accumulated_insns_num 8677 || (curr_state->accumulated_insns_num 8678 == best_state->accumulated_insns_num 8679 && (curr_state->branch_deviation 8680 < best_state->branch_deviation 8681 || (curr_state->branch_deviation 8682 == best_state->branch_deviation 8683 && curr_state->middle_bundle_stops 8684 < best_state->middle_bundle_stops))))))) 8685 best_state = curr_state; 8686 /* Second (backward) pass: adding nops and templates. */ 8687 gcc_assert (best_state); 8688 insn_num = best_state->before_nops_num; 8689 template0 = template1 = -1; 8690 for (curr_state = best_state; 8691 curr_state->originator != NULL; 8692 curr_state = curr_state->originator) 8693 { 8694 insn = curr_state->insn; 8695 asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT 8696 || asm_noperands (PATTERN (insn)) >= 0); 8697 insn_num++; 8698 if (verbose >= 2 && dump) 8699 { 8700 struct DFA_chip 8701 { 8702 unsigned short one_automaton_state; 8703 unsigned short oneb_automaton_state; 8704 unsigned short two_automaton_state; 8705 unsigned short twob_automaton_state; 8706 }; 8707 8708 fprintf 8709 (dump, 8710 "// Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n", 8711 curr_state->unique_num, 8712 (curr_state->originator == NULL 8713 ? -1 : curr_state->originator->unique_num), 8714 curr_state->cost, 8715 curr_state->before_nops_num, curr_state->after_nops_num, 8716 curr_state->accumulated_insns_num, curr_state->branch_deviation, 8717 curr_state->middle_bundle_stops, 8718 ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state, 8719 INSN_UID (insn)); 8720 } 8721 /* Find the position in the current bundle window. The window can 8722 contain at most two bundles. Two bundle window means that 8723 the processor will make two bundle rotation. */ 8724 max_pos = get_max_pos (curr_state->dfa_state); 8725 if (max_pos == 6 8726 /* The following (negative template number) means that the 8727 processor did one bundle rotation. */ 8728 || (max_pos == 3 && template0 < 0)) 8729 { 8730 /* We are at the end of the window -- find template(s) for 8731 its bundle(s). */ 8732 pos = max_pos; 8733 if (max_pos == 3) 8734 template0 = get_template (curr_state->dfa_state, 3); 8735 else 8736 { 8737 template1 = get_template (curr_state->dfa_state, 3); 8738 template0 = get_template (curr_state->dfa_state, 6); 8739 } 8740 } 8741 if (max_pos > 3 && template1 < 0) 8742 /* It may happen when we have the stop inside a bundle. */ 8743 { 8744 gcc_assert (pos <= 3); 8745 template1 = get_template (curr_state->dfa_state, 3); 8746 pos += 3; 8747 } 8748 if (!asm_p) 8749 /* Emit nops after the current insn. */ 8750 for (i = 0; i < curr_state->after_nops_num; i++) 8751 { 8752 nop = gen_nop (); 8753 emit_insn_after (nop, insn); 8754 pos--; 8755 gcc_assert (pos >= 0); 8756 if (pos % 3 == 0) 8757 { 8758 /* We are at the start of a bundle: emit the template 8759 (it should be defined). */ 8760 gcc_assert (template0 >= 0); 8761 ia64_add_bundle_selector_before (template0, nop); 8762 /* If we have two bundle window, we make one bundle 8763 rotation. Otherwise template0 will be undefined 8764 (negative value). */ 8765 template0 = template1; 8766 template1 = -1; 8767 } 8768 } 8769 /* Move the position backward in the window. Group barrier has 8770 no slot. Asm insn takes all bundle. */ 8771 if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier 8772 && GET_CODE (PATTERN (insn)) != ASM_INPUT 8773 && asm_noperands (PATTERN (insn)) < 0) 8774 pos--; 8775 /* Long insn takes 2 slots. */ 8776 if (ia64_safe_type (insn) == TYPE_L) 8777 pos--; 8778 gcc_assert (pos >= 0); 8779 if (pos % 3 == 0 8780 && INSN_CODE (insn) != CODE_FOR_insn_group_barrier 8781 && GET_CODE (PATTERN (insn)) != ASM_INPUT 8782 && asm_noperands (PATTERN (insn)) < 0) 8783 { 8784 /* The current insn is at the bundle start: emit the 8785 template. */ 8786 gcc_assert (template0 >= 0); 8787 ia64_add_bundle_selector_before (template0, insn); 8788 b = PREV_INSN (insn); 8789 insn = b; 8790 /* See comment above in analogous place for emitting nops 8791 after the insn. */ 8792 template0 = template1; 8793 template1 = -1; 8794 } 8795 /* Emit nops after the current insn. */ 8796 for (i = 0; i < curr_state->before_nops_num; i++) 8797 { 8798 nop = gen_nop (); 8799 ia64_emit_insn_before (nop, insn); 8800 nop = PREV_INSN (insn); 8801 insn = nop; 8802 pos--; 8803 gcc_assert (pos >= 0); 8804 if (pos % 3 == 0) 8805 { 8806 /* See comment above in analogous place for emitting nops 8807 after the insn. */ 8808 gcc_assert (template0 >= 0); 8809 ia64_add_bundle_selector_before (template0, insn); 8810 b = PREV_INSN (insn); 8811 insn = b; 8812 template0 = template1; 8813 template1 = -1; 8814 } 8815 } 8816 } 8817 8818#ifdef ENABLE_CHECKING 8819 { 8820 /* Assert right calculation of middle_bundle_stops. */ 8821 int num = best_state->middle_bundle_stops; 8822 bool start_bundle = true, end_bundle = false; 8823 8824 for (insn = NEXT_INSN (prev_head_insn); 8825 insn && insn != tail; 8826 insn = NEXT_INSN (insn)) 8827 { 8828 if (!INSN_P (insn)) 8829 continue; 8830 if (recog_memoized (insn) == CODE_FOR_bundle_selector) 8831 start_bundle = true; 8832 else 8833 { 8834 rtx next_insn; 8835 8836 for (next_insn = NEXT_INSN (insn); 8837 next_insn && next_insn != tail; 8838 next_insn = NEXT_INSN (next_insn)) 8839 if (INSN_P (next_insn) 8840 && (ia64_safe_itanium_class (next_insn) 8841 != ITANIUM_CLASS_IGNORE 8842 || recog_memoized (next_insn) 8843 == CODE_FOR_bundle_selector) 8844 && GET_CODE (PATTERN (next_insn)) != USE 8845 && GET_CODE (PATTERN (next_insn)) != CLOBBER) 8846 break; 8847 8848 end_bundle = next_insn == NULL_RTX 8849 || next_insn == tail 8850 || (INSN_P (next_insn) 8851 && recog_memoized (next_insn) 8852 == CODE_FOR_bundle_selector); 8853 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier 8854 && !start_bundle && !end_bundle 8855 && next_insn 8856 && GET_CODE (PATTERN (next_insn)) != ASM_INPUT 8857 && asm_noperands (PATTERN (next_insn)) < 0) 8858 num--; 8859 8860 start_bundle = false; 8861 } 8862 } 8863 8864 gcc_assert (num == 0); 8865 } 8866#endif 8867 8868 free (index_to_bundle_states); 8869 finish_bundle_state_table (); 8870 bundling_p = 0; 8871 dfa_clean_insn_cache (); 8872} 8873 8874/* The following function is called at the end of scheduling BB or 8875 EBB. After reload, it inserts stop bits and does insn bundling. */ 8876 8877static void 8878ia64_sched_finish (FILE *dump, int sched_verbose) 8879{ 8880 if (sched_verbose) 8881 fprintf (dump, "// Finishing schedule.\n"); 8882 if (!reload_completed) 8883 return; 8884 if (reload_completed) 8885 { 8886 final_emit_insn_group_barriers (dump); 8887 bundling (dump, sched_verbose, current_sched_info->prev_head, 8888 current_sched_info->next_tail); 8889 if (sched_verbose && dump) 8890 fprintf (dump, "// finishing %d-%d\n", 8891 INSN_UID (NEXT_INSN (current_sched_info->prev_head)), 8892 INSN_UID (PREV_INSN (current_sched_info->next_tail))); 8893 8894 return; 8895 } 8896} 8897 8898/* The following function inserts stop bits in scheduled BB or EBB. */ 8899 8900static void 8901final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED) 8902{ 8903 rtx insn; 8904 int need_barrier_p = 0; 8905 int seen_good_insn = 0; 8906 rtx prev_insn = NULL_RTX; 8907 8908 init_insn_group_barriers (); 8909 8910 for (insn = NEXT_INSN (current_sched_info->prev_head); 8911 insn != current_sched_info->next_tail; 8912 insn = NEXT_INSN (insn)) 8913 { 8914 if (GET_CODE (insn) == BARRIER) 8915 { 8916 rtx last = prev_active_insn (insn); 8917 8918 if (! last) 8919 continue; 8920 if (GET_CODE (last) == JUMP_INSN 8921 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) 8922 last = prev_active_insn (last); 8923 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 8924 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 8925 8926 init_insn_group_barriers (); 8927 seen_good_insn = 0; 8928 need_barrier_p = 0; 8929 prev_insn = NULL_RTX; 8930 } 8931 else if (NONDEBUG_INSN_P (insn)) 8932 { 8933 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 8934 { 8935 init_insn_group_barriers (); 8936 seen_good_insn = 0; 8937 need_barrier_p = 0; 8938 prev_insn = NULL_RTX; 8939 } 8940 else if (need_barrier_p || group_barrier_needed (insn) 8941 || (mflag_sched_stop_bits_after_every_cycle 8942 && GET_MODE (insn) == TImode 8943 && seen_good_insn)) 8944 { 8945 if (TARGET_EARLY_STOP_BITS) 8946 { 8947 rtx last; 8948 8949 for (last = insn; 8950 last != current_sched_info->prev_head; 8951 last = PREV_INSN (last)) 8952 if (INSN_P (last) && GET_MODE (last) == TImode 8953 && stops_p [INSN_UID (last)]) 8954 break; 8955 if (last == current_sched_info->prev_head) 8956 last = insn; 8957 last = prev_active_insn (last); 8958 if (last 8959 && recog_memoized (last) != CODE_FOR_insn_group_barrier) 8960 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), 8961 last); 8962 init_insn_group_barriers (); 8963 for (last = NEXT_INSN (last); 8964 last != insn; 8965 last = NEXT_INSN (last)) 8966 if (INSN_P (last)) 8967 { 8968 group_barrier_needed (last); 8969 if (recog_memoized (last) >= 0 8970 && important_for_bundling_p (last)) 8971 seen_good_insn = 1; 8972 } 8973 } 8974 else 8975 { 8976 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), 8977 insn); 8978 init_insn_group_barriers (); 8979 seen_good_insn = 0; 8980 } 8981 group_barrier_needed (insn); 8982 if (recog_memoized (insn) >= 0 8983 && important_for_bundling_p (insn)) 8984 seen_good_insn = 1; 8985 prev_insn = NULL_RTX; 8986 } 8987 else if (recog_memoized (insn) >= 0 8988 && important_for_bundling_p (insn)) 8989 { 8990 prev_insn = insn; 8991 seen_good_insn = 1; 8992 } 8993 need_barrier_p = (GET_CODE (insn) == CALL_INSN 8994 || GET_CODE (PATTERN (insn)) == ASM_INPUT 8995 || asm_noperands (PATTERN (insn)) >= 0); 8996 } 8997 } 8998} 8999 9000 9001 9002/* If the following function returns TRUE, we will use the DFA 9003 insn scheduler. */ 9004 9005static int 9006ia64_first_cycle_multipass_dfa_lookahead (void) 9007{ 9008 return (reload_completed ? 6 : 4); 9009} 9010 9011/* The following function initiates variable `dfa_pre_cycle_insn'. */ 9012 9013static void 9014ia64_init_dfa_pre_cycle_insn (void) 9015{ 9016 if (temp_dfa_state == NULL) 9017 { 9018 dfa_state_size = state_size (); 9019 temp_dfa_state = xmalloc (dfa_state_size); 9020 prev_cycle_state = xmalloc (dfa_state_size); 9021 } 9022 dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ()); 9023 PREV_INSN (dfa_pre_cycle_insn) = NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX; 9024 recog_memoized (dfa_pre_cycle_insn); 9025 dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3))); 9026 PREV_INSN (dfa_stop_insn) = NEXT_INSN (dfa_stop_insn) = NULL_RTX; 9027 recog_memoized (dfa_stop_insn); 9028} 9029 9030/* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN 9031 used by the DFA insn scheduler. */ 9032 9033static rtx 9034ia64_dfa_pre_cycle_insn (void) 9035{ 9036 return dfa_pre_cycle_insn; 9037} 9038 9039/* The following function returns TRUE if PRODUCER (of type ilog or 9040 ld) produces address for CONSUMER (of type st or stf). */ 9041 9042int 9043ia64_st_address_bypass_p (rtx producer, rtx consumer) 9044{ 9045 rtx dest, reg, mem; 9046 9047 gcc_assert (producer && consumer); 9048 dest = ia64_single_set (producer); 9049 gcc_assert (dest); 9050 reg = SET_DEST (dest); 9051 gcc_assert (reg); 9052 if (GET_CODE (reg) == SUBREG) 9053 reg = SUBREG_REG (reg); 9054 gcc_assert (GET_CODE (reg) == REG); 9055 9056 dest = ia64_single_set (consumer); 9057 gcc_assert (dest); 9058 mem = SET_DEST (dest); 9059 gcc_assert (mem && GET_CODE (mem) == MEM); 9060 return reg_mentioned_p (reg, mem); 9061} 9062 9063/* The following function returns TRUE if PRODUCER (of type ilog or 9064 ld) produces address for CONSUMER (of type ld or fld). */ 9065 9066int 9067ia64_ld_address_bypass_p (rtx producer, rtx consumer) 9068{ 9069 rtx dest, src, reg, mem; 9070 9071 gcc_assert (producer && consumer); 9072 dest = ia64_single_set (producer); 9073 gcc_assert (dest); 9074 reg = SET_DEST (dest); 9075 gcc_assert (reg); 9076 if (GET_CODE (reg) == SUBREG) 9077 reg = SUBREG_REG (reg); 9078 gcc_assert (GET_CODE (reg) == REG); 9079 9080 src = ia64_single_set (consumer); 9081 gcc_assert (src); 9082 mem = SET_SRC (src); 9083 gcc_assert (mem); 9084 9085 if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0) 9086 mem = XVECEXP (mem, 0, 0); 9087 else if (GET_CODE (mem) == IF_THEN_ELSE) 9088 /* ??? Is this bypass necessary for ld.c? */ 9089 { 9090 gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR); 9091 mem = XEXP (mem, 1); 9092 } 9093 9094 while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND) 9095 mem = XEXP (mem, 0); 9096 9097 if (GET_CODE (mem) == UNSPEC) 9098 { 9099 int c = XINT (mem, 1); 9100 9101 gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A 9102 || c == UNSPEC_LDSA); 9103 mem = XVECEXP (mem, 0, 0); 9104 } 9105 9106 /* Note that LO_SUM is used for GOT loads. */ 9107 gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM); 9108 9109 return reg_mentioned_p (reg, mem); 9110} 9111 9112/* The following function returns TRUE if INSN produces address for a 9113 load/store insn. We will place such insns into M slot because it 9114 decreases its latency time. */ 9115 9116int 9117ia64_produce_address_p (rtx insn) 9118{ 9119 return insn->call; 9120} 9121 9122 9123/* Emit pseudo-ops for the assembler to describe predicate relations. 9124 At present this assumes that we only consider predicate pairs to 9125 be mutex, and that the assembler can deduce proper values from 9126 straight-line code. */ 9127 9128static void 9129emit_predicate_relation_info (void) 9130{ 9131 basic_block bb; 9132 9133 FOR_EACH_BB_REVERSE (bb) 9134 { 9135 int r; 9136 rtx head = BB_HEAD (bb); 9137 9138 /* We only need such notes at code labels. */ 9139 if (GET_CODE (head) != CODE_LABEL) 9140 continue; 9141 if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head))) 9142 head = NEXT_INSN (head); 9143 9144 /* Skip p0, which may be thought to be live due to (reg:DI p0) 9145 grabbing the entire block of predicate registers. */ 9146 for (r = PR_REG (2); r < PR_REG (64); r += 2) 9147 if (REGNO_REG_SET_P (df_get_live_in (bb), r)) 9148 { 9149 rtx p = gen_rtx_REG (BImode, r); 9150 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); 9151 if (head == BB_END (bb)) 9152 BB_END (bb) = n; 9153 head = n; 9154 } 9155 } 9156 9157 /* Look for conditional calls that do not return, and protect predicate 9158 relations around them. Otherwise the assembler will assume the call 9159 returns, and complain about uses of call-clobbered predicates after 9160 the call. */ 9161 FOR_EACH_BB_REVERSE (bb) 9162 { 9163 rtx insn = BB_HEAD (bb); 9164 9165 while (1) 9166 { 9167 if (GET_CODE (insn) == CALL_INSN 9168 && GET_CODE (PATTERN (insn)) == COND_EXEC 9169 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 9170 { 9171 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); 9172 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); 9173 if (BB_HEAD (bb) == insn) 9174 BB_HEAD (bb) = b; 9175 if (BB_END (bb) == insn) 9176 BB_END (bb) = a; 9177 } 9178 9179 if (insn == BB_END (bb)) 9180 break; 9181 insn = NEXT_INSN (insn); 9182 } 9183 } 9184} 9185 9186/* Perform machine dependent operations on the rtl chain INSNS. */ 9187 9188static void 9189ia64_reorg (void) 9190{ 9191 /* We are freeing block_for_insn in the toplev to keep compatibility 9192 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 9193 compute_bb_for_insn (); 9194 9195 /* If optimizing, we'll have split before scheduling. */ 9196 if (optimize == 0) 9197 split_all_insns (); 9198 9199 if (optimize && ia64_flag_schedule_insns2 9200 && dbg_cnt (ia64_sched2)) 9201 { 9202 timevar_push (TV_SCHED2); 9203 ia64_final_schedule = 1; 9204 9205 initiate_bundle_states (); 9206 ia64_nop = make_insn_raw (gen_nop ()); 9207 PREV_INSN (ia64_nop) = NEXT_INSN (ia64_nop) = NULL_RTX; 9208 recog_memoized (ia64_nop); 9209 clocks_length = get_max_uid () + 1; 9210 stops_p = XCNEWVEC (char, clocks_length); 9211 9212 if (ia64_tune == PROCESSOR_ITANIUM2) 9213 { 9214 pos_1 = get_cpu_unit_code ("2_1"); 9215 pos_2 = get_cpu_unit_code ("2_2"); 9216 pos_3 = get_cpu_unit_code ("2_3"); 9217 pos_4 = get_cpu_unit_code ("2_4"); 9218 pos_5 = get_cpu_unit_code ("2_5"); 9219 pos_6 = get_cpu_unit_code ("2_6"); 9220 _0mii_ = get_cpu_unit_code ("2b_0mii."); 9221 _0mmi_ = get_cpu_unit_code ("2b_0mmi."); 9222 _0mfi_ = get_cpu_unit_code ("2b_0mfi."); 9223 _0mmf_ = get_cpu_unit_code ("2b_0mmf."); 9224 _0bbb_ = get_cpu_unit_code ("2b_0bbb."); 9225 _0mbb_ = get_cpu_unit_code ("2b_0mbb."); 9226 _0mib_ = get_cpu_unit_code ("2b_0mib."); 9227 _0mmb_ = get_cpu_unit_code ("2b_0mmb."); 9228 _0mfb_ = get_cpu_unit_code ("2b_0mfb."); 9229 _0mlx_ = get_cpu_unit_code ("2b_0mlx."); 9230 _1mii_ = get_cpu_unit_code ("2b_1mii."); 9231 _1mmi_ = get_cpu_unit_code ("2b_1mmi."); 9232 _1mfi_ = get_cpu_unit_code ("2b_1mfi."); 9233 _1mmf_ = get_cpu_unit_code ("2b_1mmf."); 9234 _1bbb_ = get_cpu_unit_code ("2b_1bbb."); 9235 _1mbb_ = get_cpu_unit_code ("2b_1mbb."); 9236 _1mib_ = get_cpu_unit_code ("2b_1mib."); 9237 _1mmb_ = get_cpu_unit_code ("2b_1mmb."); 9238 _1mfb_ = get_cpu_unit_code ("2b_1mfb."); 9239 _1mlx_ = get_cpu_unit_code ("2b_1mlx."); 9240 } 9241 else 9242 { 9243 pos_1 = get_cpu_unit_code ("1_1"); 9244 pos_2 = get_cpu_unit_code ("1_2"); 9245 pos_3 = get_cpu_unit_code ("1_3"); 9246 pos_4 = get_cpu_unit_code ("1_4"); 9247 pos_5 = get_cpu_unit_code ("1_5"); 9248 pos_6 = get_cpu_unit_code ("1_6"); 9249 _0mii_ = get_cpu_unit_code ("1b_0mii."); 9250 _0mmi_ = get_cpu_unit_code ("1b_0mmi."); 9251 _0mfi_ = get_cpu_unit_code ("1b_0mfi."); 9252 _0mmf_ = get_cpu_unit_code ("1b_0mmf."); 9253 _0bbb_ = get_cpu_unit_code ("1b_0bbb."); 9254 _0mbb_ = get_cpu_unit_code ("1b_0mbb."); 9255 _0mib_ = get_cpu_unit_code ("1b_0mib."); 9256 _0mmb_ = get_cpu_unit_code ("1b_0mmb."); 9257 _0mfb_ = get_cpu_unit_code ("1b_0mfb."); 9258 _0mlx_ = get_cpu_unit_code ("1b_0mlx."); 9259 _1mii_ = get_cpu_unit_code ("1b_1mii."); 9260 _1mmi_ = get_cpu_unit_code ("1b_1mmi."); 9261 _1mfi_ = get_cpu_unit_code ("1b_1mfi."); 9262 _1mmf_ = get_cpu_unit_code ("1b_1mmf."); 9263 _1bbb_ = get_cpu_unit_code ("1b_1bbb."); 9264 _1mbb_ = get_cpu_unit_code ("1b_1mbb."); 9265 _1mib_ = get_cpu_unit_code ("1b_1mib."); 9266 _1mmb_ = get_cpu_unit_code ("1b_1mmb."); 9267 _1mfb_ = get_cpu_unit_code ("1b_1mfb."); 9268 _1mlx_ = get_cpu_unit_code ("1b_1mlx."); 9269 } 9270 9271 if (flag_selective_scheduling2 9272 && !maybe_skip_selective_scheduling ()) 9273 run_selective_scheduling (); 9274 else 9275 schedule_ebbs (); 9276 9277 /* Redo alignment computation, as it might gone wrong. */ 9278 compute_alignments (); 9279 9280 /* We cannot reuse this one because it has been corrupted by the 9281 evil glat. */ 9282 finish_bundle_states (); 9283 free (stops_p); 9284 stops_p = NULL; 9285 emit_insn_group_barriers (dump_file); 9286 9287 ia64_final_schedule = 0; 9288 timevar_pop (TV_SCHED2); 9289 } 9290 else 9291 emit_all_insn_group_barriers (dump_file); 9292 9293 df_analyze (); 9294 9295 /* A call must not be the last instruction in a function, so that the 9296 return address is still within the function, so that unwinding works 9297 properly. Note that IA-64 differs from dwarf2 on this point. */ 9298 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 9299 { 9300 rtx insn; 9301 int saw_stop = 0; 9302 9303 insn = get_last_insn (); 9304 if (! INSN_P (insn)) 9305 insn = prev_active_insn (insn); 9306 if (insn) 9307 { 9308 /* Skip over insns that expand to nothing. */ 9309 while (GET_CODE (insn) == INSN 9310 && get_attr_empty (insn) == EMPTY_YES) 9311 { 9312 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 9313 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 9314 saw_stop = 1; 9315 insn = prev_active_insn (insn); 9316 } 9317 if (GET_CODE (insn) == CALL_INSN) 9318 { 9319 if (! saw_stop) 9320 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9321 emit_insn (gen_break_f ()); 9322 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 9323 } 9324 } 9325 } 9326 9327 emit_predicate_relation_info (); 9328 9329 if (ia64_flag_var_tracking) 9330 { 9331 timevar_push (TV_VAR_TRACKING); 9332 variable_tracking_main (); 9333 timevar_pop (TV_VAR_TRACKING); 9334 } 9335 df_finish_pass (false); 9336} 9337 9338/* Return true if REGNO is used by the epilogue. */ 9339 9340int 9341ia64_epilogue_uses (int regno) 9342{ 9343 switch (regno) 9344 { 9345 case R_GR (1): 9346 /* With a call to a function in another module, we will write a new 9347 value to "gp". After returning from such a call, we need to make 9348 sure the function restores the original gp-value, even if the 9349 function itself does not use the gp anymore. */ 9350 return !(TARGET_AUTO_PIC || TARGET_NO_PIC); 9351 9352 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 9353 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 9354 /* For functions defined with the syscall_linkage attribute, all 9355 input registers are marked as live at all function exits. This 9356 prevents the register allocator from using the input registers, 9357 which in turn makes it possible to restart a system call after 9358 an interrupt without having to save/restore the input registers. 9359 This also prevents kernel data from leaking to application code. */ 9360 return lookup_attribute ("syscall_linkage", 9361 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 9362 9363 case R_BR (0): 9364 /* Conditional return patterns can't represent the use of `b0' as 9365 the return address, so we force the value live this way. */ 9366 return 1; 9367 9368 case AR_PFS_REGNUM: 9369 /* Likewise for ar.pfs, which is used by br.ret. */ 9370 return 1; 9371 9372 default: 9373 return 0; 9374 } 9375} 9376 9377/* Return true if REGNO is used by the frame unwinder. */ 9378 9379int 9380ia64_eh_uses (int regno) 9381{ 9382 unsigned int r; 9383 9384 if (! reload_completed) 9385 return 0; 9386 9387 if (regno == 0) 9388 return 0; 9389 9390 for (r = reg_save_b0; r <= reg_save_ar_lc; r++) 9391 if (regno == current_frame_info.r[r] 9392 || regno == emitted_frame_related_regs[r]) 9393 return 1; 9394 9395 return 0; 9396} 9397 9398/* Return true if this goes in small data/bss. */ 9399 9400/* ??? We could also support own long data here. Generating movl/add/ld8 9401 instead of addl,ld8/ld8. This makes the code bigger, but should make the 9402 code faster because there is one less load. This also includes incomplete 9403 types which can't go in sdata/sbss. */ 9404 9405static bool 9406ia64_in_small_data_p (const_tree exp) 9407{ 9408 if (TARGET_NO_SDATA) 9409 return false; 9410 9411 /* We want to merge strings, so we never consider them small data. */ 9412 if (TREE_CODE (exp) == STRING_CST) 9413 return false; 9414 9415 /* Functions are never small data. */ 9416 if (TREE_CODE (exp) == FUNCTION_DECL) 9417 return false; 9418 9419 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 9420 { 9421 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 9422 9423 if (strcmp (section, ".sdata") == 0 9424 || strncmp (section, ".sdata.", 7) == 0 9425 || strncmp (section, ".gnu.linkonce.s.", 16) == 0 9426 || strcmp (section, ".sbss") == 0 9427 || strncmp (section, ".sbss.", 6) == 0 9428 || strncmp (section, ".gnu.linkonce.sb.", 17) == 0) 9429 return true; 9430 } 9431 else 9432 { 9433 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 9434 9435 /* If this is an incomplete type with size 0, then we can't put it 9436 in sdata because it might be too big when completed. */ 9437 if (size > 0 && size <= ia64_section_threshold) 9438 return true; 9439 } 9440 9441 return false; 9442} 9443 9444/* Output assembly directives for prologue regions. */ 9445 9446/* The current basic block number. */ 9447 9448static bool last_block; 9449 9450/* True if we need a copy_state command at the start of the next block. */ 9451 9452static bool need_copy_state; 9453 9454#ifndef MAX_ARTIFICIAL_LABEL_BYTES 9455# define MAX_ARTIFICIAL_LABEL_BYTES 30 9456#endif 9457 9458/* Emit a debugging label after a call-frame-related insn. We'd 9459 rather output the label right away, but we'd have to output it 9460 after, not before, the instruction, and the instruction has not 9461 been output yet. So we emit the label after the insn, delete it to 9462 avoid introducing basic blocks, and mark it as preserved, such that 9463 it is still output, given that it is referenced in debug info. */ 9464 9465static const char * 9466ia64_emit_deleted_label_after_insn (rtx insn) 9467{ 9468 char label[MAX_ARTIFICIAL_LABEL_BYTES]; 9469 rtx lb = gen_label_rtx (); 9470 rtx label_insn = emit_label_after (lb, insn); 9471 9472 LABEL_PRESERVE_P (lb) = 1; 9473 9474 delete_insn (label_insn); 9475 9476 ASM_GENERATE_INTERNAL_LABEL (label, "L", CODE_LABEL_NUMBER (label_insn)); 9477 9478 return xstrdup (label); 9479} 9480 9481/* Define the CFA after INSN with the steady-state definition. */ 9482 9483static void 9484ia64_dwarf2out_def_steady_cfa (rtx insn, bool frame) 9485{ 9486 rtx fp = frame_pointer_needed 9487 ? hard_frame_pointer_rtx 9488 : stack_pointer_rtx; 9489 const char *label = ia64_emit_deleted_label_after_insn (insn); 9490 9491 if (!frame) 9492 return; 9493 9494 dwarf2out_def_cfa 9495 (label, REGNO (fp), 9496 ia64_initial_elimination_offset 9497 (REGNO (arg_pointer_rtx), REGNO (fp)) 9498 + ARG_POINTER_CFA_OFFSET (current_function_decl)); 9499} 9500 9501/* The generic dwarf2 frame debug info generator does not define a 9502 separate region for the very end of the epilogue, so refrain from 9503 doing so in the IA64-specific code as well. */ 9504 9505#define IA64_CHANGE_CFA_IN_EPILOGUE 0 9506 9507/* The function emits unwind directives for the start of an epilogue. */ 9508 9509static void 9510process_epilogue (FILE *asm_out_file, rtx insn, bool unwind, bool frame) 9511{ 9512 /* If this isn't the last block of the function, then we need to label the 9513 current state, and copy it back in at the start of the next block. */ 9514 9515 if (!last_block) 9516 { 9517 if (unwind) 9518 fprintf (asm_out_file, "\t.label_state %d\n", 9519 ++cfun->machine->state_num); 9520 need_copy_state = true; 9521 } 9522 9523 if (unwind) 9524 fprintf (asm_out_file, "\t.restore sp\n"); 9525 if (IA64_CHANGE_CFA_IN_EPILOGUE && frame) 9526 dwarf2out_def_cfa (ia64_emit_deleted_label_after_insn (insn), 9527 STACK_POINTER_REGNUM, INCOMING_FRAME_SP_OFFSET); 9528} 9529 9530/* This function processes a SET pattern looking for specific patterns 9531 which result in emitting an assembly directive required for unwinding. */ 9532 9533static int 9534process_set (FILE *asm_out_file, rtx pat, rtx insn, bool unwind, bool frame) 9535{ 9536 rtx src = SET_SRC (pat); 9537 rtx dest = SET_DEST (pat); 9538 int src_regno, dest_regno; 9539 9540 /* Look for the ALLOC insn. */ 9541 if (GET_CODE (src) == UNSPEC_VOLATILE 9542 && XINT (src, 1) == UNSPECV_ALLOC 9543 && GET_CODE (dest) == REG) 9544 { 9545 dest_regno = REGNO (dest); 9546 9547 /* If this is the final destination for ar.pfs, then this must 9548 be the alloc in the prologue. */ 9549 if (dest_regno == current_frame_info.r[reg_save_ar_pfs]) 9550 { 9551 if (unwind) 9552 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 9553 ia64_dbx_register_number (dest_regno)); 9554 } 9555 else 9556 { 9557 /* This must be an alloc before a sibcall. We must drop the 9558 old frame info. The easiest way to drop the old frame 9559 info is to ensure we had a ".restore sp" directive 9560 followed by a new prologue. If the procedure doesn't 9561 have a memory-stack frame, we'll issue a dummy ".restore 9562 sp" now. */ 9563 if (current_frame_info.total_size == 0 && !frame_pointer_needed) 9564 /* if haven't done process_epilogue() yet, do it now */ 9565 process_epilogue (asm_out_file, insn, unwind, frame); 9566 if (unwind) 9567 fprintf (asm_out_file, "\t.prologue\n"); 9568 } 9569 return 1; 9570 } 9571 9572 /* Look for SP = .... */ 9573 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) 9574 { 9575 if (GET_CODE (src) == PLUS) 9576 { 9577 rtx op0 = XEXP (src, 0); 9578 rtx op1 = XEXP (src, 1); 9579 9580 gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT); 9581 9582 if (INTVAL (op1) < 0) 9583 { 9584 gcc_assert (!frame_pointer_needed); 9585 if (unwind) 9586 fprintf (asm_out_file, "\t.fframe "HOST_WIDE_INT_PRINT_DEC"\n", 9587 -INTVAL (op1)); 9588 ia64_dwarf2out_def_steady_cfa (insn, frame); 9589 } 9590 else 9591 process_epilogue (asm_out_file, insn, unwind, frame); 9592 } 9593 else 9594 { 9595 gcc_assert (GET_CODE (src) == REG 9596 && REGNO (src) == HARD_FRAME_POINTER_REGNUM); 9597 process_epilogue (asm_out_file, insn, unwind, frame); 9598 } 9599 9600 return 1; 9601 } 9602 9603 /* Register move we need to look at. */ 9604 if (GET_CODE (dest) == REG && GET_CODE (src) == REG) 9605 { 9606 src_regno = REGNO (src); 9607 dest_regno = REGNO (dest); 9608 9609 switch (src_regno) 9610 { 9611 case BR_REG (0): 9612 /* Saving return address pointer. */ 9613 gcc_assert (dest_regno == current_frame_info.r[reg_save_b0]); 9614 if (unwind) 9615 fprintf (asm_out_file, "\t.save rp, r%d\n", 9616 ia64_dbx_register_number (dest_regno)); 9617 return 1; 9618 9619 case PR_REG (0): 9620 gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]); 9621 if (unwind) 9622 fprintf (asm_out_file, "\t.save pr, r%d\n", 9623 ia64_dbx_register_number (dest_regno)); 9624 return 1; 9625 9626 case AR_UNAT_REGNUM: 9627 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]); 9628 if (unwind) 9629 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 9630 ia64_dbx_register_number (dest_regno)); 9631 return 1; 9632 9633 case AR_LC_REGNUM: 9634 gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]); 9635 if (unwind) 9636 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 9637 ia64_dbx_register_number (dest_regno)); 9638 return 1; 9639 9640 case STACK_POINTER_REGNUM: 9641 gcc_assert (dest_regno == HARD_FRAME_POINTER_REGNUM 9642 && frame_pointer_needed); 9643 if (unwind) 9644 fprintf (asm_out_file, "\t.vframe r%d\n", 9645 ia64_dbx_register_number (dest_regno)); 9646 ia64_dwarf2out_def_steady_cfa (insn, frame); 9647 return 1; 9648 9649 default: 9650 /* Everything else should indicate being stored to memory. */ 9651 gcc_unreachable (); 9652 } 9653 } 9654 9655 /* Memory store we need to look at. */ 9656 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) 9657 { 9658 long off; 9659 rtx base; 9660 const char *saveop; 9661 9662 if (GET_CODE (XEXP (dest, 0)) == REG) 9663 { 9664 base = XEXP (dest, 0); 9665 off = 0; 9666 } 9667 else 9668 { 9669 gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS 9670 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT); 9671 base = XEXP (XEXP (dest, 0), 0); 9672 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 9673 } 9674 9675 if (base == hard_frame_pointer_rtx) 9676 { 9677 saveop = ".savepsp"; 9678 off = - off; 9679 } 9680 else 9681 { 9682 gcc_assert (base == stack_pointer_rtx); 9683 saveop = ".savesp"; 9684 } 9685 9686 src_regno = REGNO (src); 9687 switch (src_regno) 9688 { 9689 case BR_REG (0): 9690 gcc_assert (!current_frame_info.r[reg_save_b0]); 9691 if (unwind) 9692 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); 9693 return 1; 9694 9695 case PR_REG (0): 9696 gcc_assert (!current_frame_info.r[reg_save_pr]); 9697 if (unwind) 9698 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); 9699 return 1; 9700 9701 case AR_LC_REGNUM: 9702 gcc_assert (!current_frame_info.r[reg_save_ar_lc]); 9703 if (unwind) 9704 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); 9705 return 1; 9706 9707 case AR_PFS_REGNUM: 9708 gcc_assert (!current_frame_info.r[reg_save_ar_pfs]); 9709 if (unwind) 9710 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); 9711 return 1; 9712 9713 case AR_UNAT_REGNUM: 9714 gcc_assert (!current_frame_info.r[reg_save_ar_unat]); 9715 if (unwind) 9716 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); 9717 return 1; 9718 9719 case GR_REG (4): 9720 case GR_REG (5): 9721 case GR_REG (6): 9722 case GR_REG (7): 9723 if (unwind) 9724 fprintf (asm_out_file, "\t.save.g 0x%x\n", 9725 1 << (src_regno - GR_REG (4))); 9726 return 1; 9727 9728 case BR_REG (1): 9729 case BR_REG (2): 9730 case BR_REG (3): 9731 case BR_REG (4): 9732 case BR_REG (5): 9733 if (unwind) 9734 fprintf (asm_out_file, "\t.save.b 0x%x\n", 9735 1 << (src_regno - BR_REG (1))); 9736 return 1; 9737 9738 case FR_REG (2): 9739 case FR_REG (3): 9740 case FR_REG (4): 9741 case FR_REG (5): 9742 if (unwind) 9743 fprintf (asm_out_file, "\t.save.f 0x%x\n", 9744 1 << (src_regno - FR_REG (2))); 9745 return 1; 9746 9747 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 9748 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 9749 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 9750 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 9751 if (unwind) 9752 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 9753 1 << (src_regno - FR_REG (12))); 9754 return 1; 9755 9756 default: 9757 return 0; 9758 } 9759 } 9760 9761 return 0; 9762} 9763 9764 9765/* This function looks at a single insn and emits any directives 9766 required to unwind this insn. */ 9767void 9768process_for_unwind_directive (FILE *asm_out_file, rtx insn) 9769{ 9770 bool unwind = (flag_unwind_tables 9771 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)); 9772 bool frame = dwarf2out_do_frame (); 9773 9774 if (unwind || frame) 9775 { 9776 rtx pat; 9777 9778 if (NOTE_INSN_BASIC_BLOCK_P (insn)) 9779 { 9780 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR; 9781 9782 /* Restore unwind state from immediately before the epilogue. */ 9783 if (need_copy_state) 9784 { 9785 if (unwind) 9786 { 9787 fprintf (asm_out_file, "\t.body\n"); 9788 fprintf (asm_out_file, "\t.copy_state %d\n", 9789 cfun->machine->state_num); 9790 } 9791 if (IA64_CHANGE_CFA_IN_EPILOGUE) 9792 ia64_dwarf2out_def_steady_cfa (insn, frame); 9793 need_copy_state = false; 9794 } 9795 } 9796 9797 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn)) 9798 return; 9799 9800 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); 9801 if (pat) 9802 pat = XEXP (pat, 0); 9803 else 9804 pat = PATTERN (insn); 9805 9806 switch (GET_CODE (pat)) 9807 { 9808 case SET: 9809 process_set (asm_out_file, pat, insn, unwind, frame); 9810 break; 9811 9812 case PARALLEL: 9813 { 9814 int par_index; 9815 int limit = XVECLEN (pat, 0); 9816 for (par_index = 0; par_index < limit; par_index++) 9817 { 9818 rtx x = XVECEXP (pat, 0, par_index); 9819 if (GET_CODE (x) == SET) 9820 process_set (asm_out_file, x, insn, unwind, frame); 9821 } 9822 break; 9823 } 9824 9825 default: 9826 gcc_unreachable (); 9827 } 9828 } 9829} 9830 9831 9832enum ia64_builtins 9833{ 9834 IA64_BUILTIN_BSP, 9835 IA64_BUILTIN_COPYSIGNQ, 9836 IA64_BUILTIN_FABSQ, 9837 IA64_BUILTIN_FLUSHRS, 9838 IA64_BUILTIN_INFQ, 9839 IA64_BUILTIN_HUGE_VALQ 9840}; 9841 9842void 9843ia64_init_builtins (void) 9844{ 9845 tree fpreg_type; 9846 tree float80_type; 9847 9848 /* The __fpreg type. */ 9849 fpreg_type = make_node (REAL_TYPE); 9850 TYPE_PRECISION (fpreg_type) = 82; 9851 layout_type (fpreg_type); 9852 (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg"); 9853 9854 /* The __float80 type. */ 9855 float80_type = make_node (REAL_TYPE); 9856 TYPE_PRECISION (float80_type) = 80; 9857 layout_type (float80_type); 9858 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80"); 9859 9860 /* The __float128 type. */ 9861 if (!TARGET_HPUX) 9862 { 9863 tree ftype, decl; 9864 tree float128_type = make_node (REAL_TYPE); 9865 9866 TYPE_PRECISION (float128_type) = 128; 9867 layout_type (float128_type); 9868 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128"); 9869 9870 /* TFmode support builtins. */ 9871 ftype = build_function_type (float128_type, void_list_node); 9872 add_builtin_function ("__builtin_infq", ftype, 9873 IA64_BUILTIN_INFQ, BUILT_IN_MD, 9874 NULL, NULL_TREE); 9875 9876 add_builtin_function ("__builtin_huge_valq", ftype, 9877 IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD, 9878 NULL, NULL_TREE); 9879 9880 ftype = build_function_type_list (float128_type, 9881 float128_type, 9882 NULL_TREE); 9883 decl = add_builtin_function ("__builtin_fabsq", ftype, 9884 IA64_BUILTIN_FABSQ, BUILT_IN_MD, 9885 "__fabstf2", NULL_TREE); 9886 TREE_READONLY (decl) = 1; 9887 9888 ftype = build_function_type_list (float128_type, 9889 float128_type, 9890 float128_type, 9891 NULL_TREE); 9892 decl = add_builtin_function ("__builtin_copysignq", ftype, 9893 IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 9894 "__copysigntf3", NULL_TREE); 9895 TREE_READONLY (decl) = 1; 9896 } 9897 else 9898 /* Under HPUX, this is a synonym for "long double". */ 9899 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 9900 "__float128"); 9901 9902 /* Fwrite on VMS is non-standard. */ 9903 if (TARGET_ABI_OPEN_VMS) 9904 { 9905 implicit_built_in_decls[(int) BUILT_IN_FWRITE] = NULL_TREE; 9906 implicit_built_in_decls[(int) BUILT_IN_FWRITE_UNLOCKED] = NULL_TREE; 9907 } 9908 9909#define def_builtin(name, type, code) \ 9910 add_builtin_function ((name), (type), (code), BUILT_IN_MD, \ 9911 NULL, NULL_TREE) 9912 9913 def_builtin ("__builtin_ia64_bsp", 9914 build_function_type (ptr_type_node, void_list_node), 9915 IA64_BUILTIN_BSP); 9916 9917 def_builtin ("__builtin_ia64_flushrs", 9918 build_function_type (void_type_node, void_list_node), 9919 IA64_BUILTIN_FLUSHRS); 9920 9921#undef def_builtin 9922 9923 if (TARGET_HPUX) 9924 { 9925 if (built_in_decls [BUILT_IN_FINITE]) 9926 set_user_assembler_name (built_in_decls [BUILT_IN_FINITE], 9927 "_Isfinite"); 9928 if (built_in_decls [BUILT_IN_FINITEF]) 9929 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEF], 9930 "_Isfinitef"); 9931 if (built_in_decls [BUILT_IN_FINITEL]) 9932 set_user_assembler_name (built_in_decls [BUILT_IN_FINITEL], 9933 "_Isfinitef128"); 9934 } 9935} 9936 9937rtx 9938ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 9939 enum machine_mode mode ATTRIBUTE_UNUSED, 9940 int ignore ATTRIBUTE_UNUSED) 9941{ 9942 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 9943 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 9944 9945 switch (fcode) 9946 { 9947 case IA64_BUILTIN_BSP: 9948 if (! target || ! register_operand (target, DImode)) 9949 target = gen_reg_rtx (DImode); 9950 emit_insn (gen_bsp_value (target)); 9951#ifdef POINTERS_EXTEND_UNSIGNED 9952 target = convert_memory_address (ptr_mode, target); 9953#endif 9954 return target; 9955 9956 case IA64_BUILTIN_FLUSHRS: 9957 emit_insn (gen_flushrs ()); 9958 return const0_rtx; 9959 9960 case IA64_BUILTIN_INFQ: 9961 case IA64_BUILTIN_HUGE_VALQ: 9962 { 9963 REAL_VALUE_TYPE inf; 9964 rtx tmp; 9965 9966 real_inf (&inf); 9967 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); 9968 9969 tmp = validize_mem (force_const_mem (mode, tmp)); 9970 9971 if (target == 0) 9972 target = gen_reg_rtx (mode); 9973 9974 emit_move_insn (target, tmp); 9975 return target; 9976 } 9977 9978 case IA64_BUILTIN_FABSQ: 9979 case IA64_BUILTIN_COPYSIGNQ: 9980 return expand_call (exp, target, ignore); 9981 9982 default: 9983 gcc_unreachable (); 9984 } 9985 9986 return NULL_RTX; 9987} 9988 9989/* For the HP-UX IA64 aggregate parameters are passed stored in the 9990 most significant bits of the stack slot. */ 9991 9992enum direction 9993ia64_hpux_function_arg_padding (enum machine_mode mode, const_tree type) 9994{ 9995 /* Exception to normal case for structures/unions/etc. */ 9996 9997 if (type && AGGREGATE_TYPE_P (type) 9998 && int_size_in_bytes (type) < UNITS_PER_WORD) 9999 return upward; 10000 10001 /* Fall back to the default. */ 10002 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); 10003} 10004 10005/* Emit text to declare externally defined variables and functions, because 10006 the Intel assembler does not support undefined externals. */ 10007 10008void 10009ia64_asm_output_external (FILE *file, tree decl, const char *name) 10010{ 10011 /* We output the name if and only if TREE_SYMBOL_REFERENCED is 10012 set in order to avoid putting out names that are never really 10013 used. */ 10014 if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))) 10015 { 10016 /* maybe_assemble_visibility will return 1 if the assembler 10017 visibility directive is output. */ 10018 int need_visibility = ((*targetm.binds_local_p) (decl) 10019 && maybe_assemble_visibility (decl)); 10020 10021#ifdef DO_CRTL_NAMES 10022 DO_CRTL_NAMES; 10023#endif 10024 10025 /* GNU as does not need anything here, but the HP linker does 10026 need something for external functions. */ 10027 if ((TARGET_HPUX_LD || !TARGET_GNU_AS) 10028 && TREE_CODE (decl) == FUNCTION_DECL) 10029 (*targetm.asm_out.globalize_decl_name) (file, decl); 10030 else if (need_visibility && !TARGET_GNU_AS) 10031 (*targetm.asm_out.globalize_label) (file, name); 10032 } 10033} 10034 10035/* Set SImode div/mod functions, init_integral_libfuncs only initializes 10036 modes of word_mode and larger. Rename the TFmode libfuncs using the 10037 HPUX conventions. __divtf3 is used for XFmode. We need to keep it for 10038 backward compatibility. */ 10039 10040static void 10041ia64_init_libfuncs (void) 10042{ 10043 set_optab_libfunc (sdiv_optab, SImode, "__divsi3"); 10044 set_optab_libfunc (udiv_optab, SImode, "__udivsi3"); 10045 set_optab_libfunc (smod_optab, SImode, "__modsi3"); 10046 set_optab_libfunc (umod_optab, SImode, "__umodsi3"); 10047 10048 set_optab_libfunc (add_optab, TFmode, "_U_Qfadd"); 10049 set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub"); 10050 set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy"); 10051 set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv"); 10052 set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg"); 10053 10054 set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad"); 10055 set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad"); 10056 set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad"); 10057 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl"); 10058 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl"); 10059 set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80"); 10060 10061 set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl"); 10062 set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl"); 10063 set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad"); 10064 set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl"); 10065 set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl"); 10066 10067 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad"); 10068 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad"); 10069 set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad"); 10070 /* HP-UX 11.23 libc does not have a function for unsigned 10071 SImode-to-TFmode conversion. */ 10072 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad"); 10073} 10074 10075/* Rename all the TFmode libfuncs using the HPUX conventions. */ 10076 10077static void 10078ia64_hpux_init_libfuncs (void) 10079{ 10080 ia64_init_libfuncs (); 10081 10082 /* The HP SI millicode division and mod functions expect DI arguments. 10083 By turning them off completely we avoid using both libgcc and the 10084 non-standard millicode routines and use the HP DI millicode routines 10085 instead. */ 10086 10087 set_optab_libfunc (sdiv_optab, SImode, 0); 10088 set_optab_libfunc (udiv_optab, SImode, 0); 10089 set_optab_libfunc (smod_optab, SImode, 0); 10090 set_optab_libfunc (umod_optab, SImode, 0); 10091 10092 set_optab_libfunc (sdiv_optab, DImode, "__milli_divI"); 10093 set_optab_libfunc (udiv_optab, DImode, "__milli_divU"); 10094 set_optab_libfunc (smod_optab, DImode, "__milli_remI"); 10095 set_optab_libfunc (umod_optab, DImode, "__milli_remU"); 10096 10097 /* HP-UX libc has TF min/max/abs routines in it. */ 10098 set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin"); 10099 set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax"); 10100 set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs"); 10101 10102 /* ia64_expand_compare uses this. */ 10103 cmptf_libfunc = init_one_libfunc ("_U_Qfcmp"); 10104 10105 /* These should never be used. */ 10106 set_optab_libfunc (eq_optab, TFmode, 0); 10107 set_optab_libfunc (ne_optab, TFmode, 0); 10108 set_optab_libfunc (gt_optab, TFmode, 0); 10109 set_optab_libfunc (ge_optab, TFmode, 0); 10110 set_optab_libfunc (lt_optab, TFmode, 0); 10111 set_optab_libfunc (le_optab, TFmode, 0); 10112} 10113 10114/* Rename the division and modulus functions in VMS. */ 10115 10116static void 10117ia64_vms_init_libfuncs (void) 10118{ 10119 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I"); 10120 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L"); 10121 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI"); 10122 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL"); 10123 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I"); 10124 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L"); 10125 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI"); 10126 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL"); 10127 abort_libfunc = init_one_libfunc ("decc$abort"); 10128 memcmp_libfunc = init_one_libfunc ("decc$memcmp"); 10129#ifdef MEM_LIBFUNCS_INIT 10130 MEM_LIBFUNCS_INIT; 10131#endif 10132} 10133 10134/* Rename the TFmode libfuncs available from soft-fp in glibc using 10135 the HPUX conventions. */ 10136 10137static void 10138ia64_sysv4_init_libfuncs (void) 10139{ 10140 ia64_init_libfuncs (); 10141 10142 /* These functions are not part of the HPUX TFmode interface. We 10143 use them instead of _U_Qfcmp, which doesn't work the way we 10144 expect. */ 10145 set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq"); 10146 set_optab_libfunc (ne_optab, TFmode, "_U_Qfne"); 10147 set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt"); 10148 set_optab_libfunc (ge_optab, TFmode, "_U_Qfge"); 10149 set_optab_libfunc (lt_optab, TFmode, "_U_Qflt"); 10150 set_optab_libfunc (le_optab, TFmode, "_U_Qfle"); 10151 10152 /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in 10153 glibc doesn't have them. */ 10154} 10155 10156/* Use soft-fp. */ 10157 10158static void 10159ia64_soft_fp_init_libfuncs (void) 10160{ 10161} 10162 10163static bool 10164ia64_vms_valid_pointer_mode (enum machine_mode mode) 10165{ 10166 return (mode == SImode || mode == DImode); 10167} 10168 10169/* For HPUX, it is illegal to have relocations in shared segments. */ 10170 10171static int 10172ia64_hpux_reloc_rw_mask (void) 10173{ 10174 return 3; 10175} 10176 10177/* For others, relax this so that relocations to local data goes in 10178 read-only segments, but we still cannot allow global relocations 10179 in read-only segments. */ 10180 10181static int 10182ia64_reloc_rw_mask (void) 10183{ 10184 return flag_pic ? 3 : 2; 10185} 10186 10187/* Return the section to use for X. The only special thing we do here 10188 is to honor small data. */ 10189 10190static section * 10191ia64_select_rtx_section (enum machine_mode mode, rtx x, 10192 unsigned HOST_WIDE_INT align) 10193{ 10194 if (GET_MODE_SIZE (mode) > 0 10195 && GET_MODE_SIZE (mode) <= ia64_section_threshold 10196 && !TARGET_NO_SDATA) 10197 return sdata_section; 10198 else 10199 return default_elf_select_rtx_section (mode, x, align); 10200} 10201 10202static unsigned int 10203ia64_section_type_flags (tree decl, const char *name, int reloc) 10204{ 10205 unsigned int flags = 0; 10206 10207 if (strcmp (name, ".sdata") == 0 10208 || strncmp (name, ".sdata.", 7) == 0 10209 || strncmp (name, ".gnu.linkonce.s.", 16) == 0 10210 || strncmp (name, ".sdata2.", 8) == 0 10211 || strncmp (name, ".gnu.linkonce.s2.", 17) == 0 10212 || strcmp (name, ".sbss") == 0 10213 || strncmp (name, ".sbss.", 6) == 0 10214 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0) 10215 flags = SECTION_SMALL; 10216 10217#if TARGET_ABI_OPEN_VMS 10218 if (decl && DECL_ATTRIBUTES (decl) 10219 && lookup_attribute ("common_object", DECL_ATTRIBUTES (decl))) 10220 flags |= SECTION_VMS_OVERLAY; 10221#endif 10222 10223 flags |= default_section_type_flags (decl, name, reloc); 10224 return flags; 10225} 10226 10227/* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a 10228 structure type and that the address of that type should be passed 10229 in out0, rather than in r8. */ 10230 10231static bool 10232ia64_struct_retval_addr_is_first_parm_p (tree fntype) 10233{ 10234 tree ret_type = TREE_TYPE (fntype); 10235 10236 /* The Itanium C++ ABI requires that out0, rather than r8, be used 10237 as the structure return address parameter, if the return value 10238 type has a non-trivial copy constructor or destructor. It is not 10239 clear if this same convention should be used for other 10240 programming languages. Until G++ 3.4, we incorrectly used r8 for 10241 these return values. */ 10242 return (abi_version_at_least (2) 10243 && ret_type 10244 && TYPE_MODE (ret_type) == BLKmode 10245 && TREE_ADDRESSABLE (ret_type) 10246 && strcmp (lang_hooks.name, "GNU C++") == 0); 10247} 10248 10249/* Output the assembler code for a thunk function. THUNK_DECL is the 10250 declaration for the thunk function itself, FUNCTION is the decl for 10251 the target function. DELTA is an immediate constant offset to be 10252 added to THIS. If VCALL_OFFSET is nonzero, the word at 10253 *(*this + vcall_offset) should be added to THIS. */ 10254 10255static void 10256ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 10257 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 10258 tree function) 10259{ 10260 rtx this_rtx, insn, funexp; 10261 unsigned int this_parmno; 10262 unsigned int this_regno; 10263 rtx delta_rtx; 10264 10265 reload_completed = 1; 10266 epilogue_completed = 1; 10267 10268 /* Set things up as ia64_expand_prologue might. */ 10269 last_scratch_gr_reg = 15; 10270 10271 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 10272 current_frame_info.spill_cfa_off = -16; 10273 current_frame_info.n_input_regs = 1; 10274 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 10275 10276 /* Mark the end of the (empty) prologue. */ 10277 emit_note (NOTE_INSN_PROLOGUE_END); 10278 10279 /* Figure out whether "this" will be the first parameter (the 10280 typical case) or the second parameter (as happens when the 10281 virtual function returns certain class objects). */ 10282 this_parmno 10283 = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk)) 10284 ? 1 : 0); 10285 this_regno = IN_REG (this_parmno); 10286 if (!TARGET_REG_NAMES) 10287 reg_names[this_regno] = ia64_reg_numbers[this_parmno]; 10288 10289 this_rtx = gen_rtx_REG (Pmode, this_regno); 10290 10291 /* Apply the constant offset, if required. */ 10292 delta_rtx = GEN_INT (delta); 10293 if (TARGET_ILP32) 10294 { 10295 rtx tmp = gen_rtx_REG (ptr_mode, this_regno); 10296 REG_POINTER (tmp) = 1; 10297 if (delta && satisfies_constraint_I (delta_rtx)) 10298 { 10299 emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx)); 10300 delta = 0; 10301 } 10302 else 10303 emit_insn (gen_ptr_extend (this_rtx, tmp)); 10304 } 10305 if (delta) 10306 { 10307 if (!satisfies_constraint_I (delta_rtx)) 10308 { 10309 rtx tmp = gen_rtx_REG (Pmode, 2); 10310 emit_move_insn (tmp, delta_rtx); 10311 delta_rtx = tmp; 10312 } 10313 emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx)); 10314 } 10315 10316 /* Apply the offset from the vtable, if required. */ 10317 if (vcall_offset) 10318 { 10319 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 10320 rtx tmp = gen_rtx_REG (Pmode, 2); 10321 10322 if (TARGET_ILP32) 10323 { 10324 rtx t = gen_rtx_REG (ptr_mode, 2); 10325 REG_POINTER (t) = 1; 10326 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx)); 10327 if (satisfies_constraint_I (vcall_offset_rtx)) 10328 { 10329 emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx)); 10330 vcall_offset = 0; 10331 } 10332 else 10333 emit_insn (gen_ptr_extend (tmp, t)); 10334 } 10335 else 10336 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); 10337 10338 if (vcall_offset) 10339 { 10340 if (!satisfies_constraint_J (vcall_offset_rtx)) 10341 { 10342 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 10343 emit_move_insn (tmp2, vcall_offset_rtx); 10344 vcall_offset_rtx = tmp2; 10345 } 10346 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 10347 } 10348 10349 if (TARGET_ILP32) 10350 emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp))); 10351 else 10352 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 10353 10354 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp)); 10355 } 10356 10357 /* Generate a tail call to the target function. */ 10358 if (! TREE_USED (function)) 10359 { 10360 assemble_external (function); 10361 TREE_USED (function) = 1; 10362 } 10363 funexp = XEXP (DECL_RTL (function), 0); 10364 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 10365 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 10366 insn = get_last_insn (); 10367 SIBLING_CALL_P (insn) = 1; 10368 10369 /* Code generation for calls relies on splitting. */ 10370 reload_completed = 1; 10371 epilogue_completed = 1; 10372 try_split (PATTERN (insn), insn, 0); 10373 10374 emit_barrier (); 10375 10376 /* Run just enough of rest_of_compilation to get the insns emitted. 10377 There's not really enough bulk here to make other passes such as 10378 instruction scheduling worth while. Note that use_thunk calls 10379 assemble_start_function and assemble_end_function. */ 10380 10381 insn_locators_alloc (); 10382 emit_all_insn_group_barriers (NULL); 10383 insn = get_insns (); 10384 shorten_branches (insn); 10385 final_start_function (insn, file, 1); 10386 final (insn, file, 1); 10387 final_end_function (); 10388 10389 reload_completed = 0; 10390 epilogue_completed = 0; 10391} 10392 10393/* Worker function for TARGET_STRUCT_VALUE_RTX. */ 10394 10395static rtx 10396ia64_struct_value_rtx (tree fntype, 10397 int incoming ATTRIBUTE_UNUSED) 10398{ 10399 if (TARGET_ABI_OPEN_VMS || 10400 (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype))) 10401 return NULL_RTX; 10402 return gen_rtx_REG (Pmode, GR_REG (8)); 10403} 10404 10405static bool 10406ia64_scalar_mode_supported_p (enum machine_mode mode) 10407{ 10408 switch (mode) 10409 { 10410 case QImode: 10411 case HImode: 10412 case SImode: 10413 case DImode: 10414 case TImode: 10415 return true; 10416 10417 case SFmode: 10418 case DFmode: 10419 case XFmode: 10420 case RFmode: 10421 return true; 10422 10423 case TFmode: 10424 return true; 10425 10426 default: 10427 return false; 10428 } 10429} 10430 10431static bool 10432ia64_vector_mode_supported_p (enum machine_mode mode) 10433{ 10434 switch (mode) 10435 { 10436 case V8QImode: 10437 case V4HImode: 10438 case V2SImode: 10439 return true; 10440 10441 case V2SFmode: 10442 return true; 10443 10444 default: 10445 return false; 10446 } 10447} 10448 10449/* Implement the FUNCTION_PROFILER macro. */ 10450 10451void 10452ia64_output_function_profiler (FILE *file, int labelno) 10453{ 10454 bool indirect_call; 10455 10456 /* If the function needs a static chain and the static chain 10457 register is r15, we use an indirect call so as to bypass 10458 the PLT stub in case the executable is dynamically linked, 10459 because the stub clobbers r15 as per 5.3.6 of the psABI. 10460 We don't need to do that in non canonical PIC mode. */ 10461 10462 if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC) 10463 { 10464 gcc_assert (STATIC_CHAIN_REGNUM == 15); 10465 indirect_call = true; 10466 } 10467 else 10468 indirect_call = false; 10469 10470 if (TARGET_GNU_AS) 10471 fputs ("\t.prologue 4, r40\n", file); 10472 else 10473 fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file); 10474 fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file); 10475 10476 if (NO_PROFILE_COUNTERS) 10477 fputs ("\tmov out3 = r0\n", file); 10478 else 10479 { 10480 char buf[20]; 10481 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 10482 10483 if (TARGET_AUTO_PIC) 10484 fputs ("\tmovl out3 = @gprel(", file); 10485 else 10486 fputs ("\taddl out3 = @ltoff(", file); 10487 assemble_name (file, buf); 10488 if (TARGET_AUTO_PIC) 10489 fputs (")\n", file); 10490 else 10491 fputs ("), r1\n", file); 10492 } 10493 10494 if (indirect_call) 10495 fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file); 10496 fputs ("\t;;\n", file); 10497 10498 fputs ("\t.save rp, r42\n", file); 10499 fputs ("\tmov out2 = b0\n", file); 10500 if (indirect_call) 10501 fputs ("\tld8 r14 = [r14]\n\t;;\n", file); 10502 fputs ("\t.body\n", file); 10503 fputs ("\tmov out1 = r1\n", file); 10504 if (indirect_call) 10505 { 10506 fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file); 10507 fputs ("\tmov b6 = r16\n", file); 10508 fputs ("\tld8 r1 = [r14]\n", file); 10509 fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file); 10510 } 10511 else 10512 fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file); 10513} 10514 10515static GTY(()) rtx mcount_func_rtx; 10516static rtx 10517gen_mcount_func_rtx (void) 10518{ 10519 if (!mcount_func_rtx) 10520 mcount_func_rtx = init_one_libfunc ("_mcount"); 10521 return mcount_func_rtx; 10522} 10523 10524void 10525ia64_profile_hook (int labelno) 10526{ 10527 rtx label, ip; 10528 10529 if (NO_PROFILE_COUNTERS) 10530 label = const0_rtx; 10531 else 10532 { 10533 char buf[30]; 10534 const char *label_name; 10535 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 10536 label_name = (*targetm.strip_name_encoding) (ggc_strdup (buf)); 10537 label = gen_rtx_SYMBOL_REF (Pmode, label_name); 10538 SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL; 10539 } 10540 ip = gen_reg_rtx (Pmode); 10541 emit_insn (gen_ip_value (ip)); 10542 emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL, 10543 VOIDmode, 3, 10544 gen_rtx_REG (Pmode, BR_REG (0)), Pmode, 10545 ip, Pmode, 10546 label, Pmode); 10547} 10548 10549/* Return the mangling of TYPE if it is an extended fundamental type. */ 10550 10551static const char * 10552ia64_mangle_type (const_tree type) 10553{ 10554 type = TYPE_MAIN_VARIANT (type); 10555 10556 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 10557 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 10558 return NULL; 10559 10560 /* On HP-UX, "long double" is mangled as "e" so __float128 is 10561 mangled as "e". */ 10562 if (!TARGET_HPUX && TYPE_MODE (type) == TFmode) 10563 return "g"; 10564 /* On HP-UX, "e" is not available as a mangling of __float80 so use 10565 an extended mangling. Elsewhere, "e" is available since long 10566 double is 80 bits. */ 10567 if (TYPE_MODE (type) == XFmode) 10568 return TARGET_HPUX ? "u9__float80" : "e"; 10569 if (TYPE_MODE (type) == RFmode) 10570 return "u7__fpreg"; 10571 return NULL; 10572} 10573 10574/* Return the diagnostic message string if conversion from FROMTYPE to 10575 TOTYPE is not allowed, NULL otherwise. */ 10576static const char * 10577ia64_invalid_conversion (const_tree fromtype, const_tree totype) 10578{ 10579 /* Reject nontrivial conversion to or from __fpreg. */ 10580 if (TYPE_MODE (fromtype) == RFmode 10581 && TYPE_MODE (totype) != RFmode 10582 && TYPE_MODE (totype) != VOIDmode) 10583 return N_("invalid conversion from %<__fpreg%>"); 10584 if (TYPE_MODE (totype) == RFmode 10585 && TYPE_MODE (fromtype) != RFmode) 10586 return N_("invalid conversion to %<__fpreg%>"); 10587 return NULL; 10588} 10589 10590/* Return the diagnostic message string if the unary operation OP is 10591 not permitted on TYPE, NULL otherwise. */ 10592static const char * 10593ia64_invalid_unary_op (int op, const_tree type) 10594{ 10595 /* Reject operations on __fpreg other than unary + or &. */ 10596 if (TYPE_MODE (type) == RFmode 10597 && op != CONVERT_EXPR 10598 && op != ADDR_EXPR) 10599 return N_("invalid operation on %<__fpreg%>"); 10600 return NULL; 10601} 10602 10603/* Return the diagnostic message string if the binary operation OP is 10604 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 10605static const char * 10606ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2) 10607{ 10608 /* Reject operations on __fpreg. */ 10609 if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode) 10610 return N_("invalid operation on %<__fpreg%>"); 10611 return NULL; 10612} 10613 10614/* Implement overriding of the optimization options. */ 10615void 10616ia64_optimization_options (int level ATTRIBUTE_UNUSED, 10617 int size ATTRIBUTE_UNUSED) 10618{ 10619 /* Let the scheduler form additional regions. */ 10620 set_param_value ("max-sched-extend-regions-iters", 2); 10621 10622 /* Set the default values for cache-related parameters. */ 10623 set_param_value ("simultaneous-prefetches", 6); 10624 set_param_value ("l1-cache-line-size", 32); 10625 10626 set_param_value("sched-mem-true-dep-cost", 4); 10627} 10628 10629/* HP-UX version_id attribute. 10630 For object foo, if the version_id is set to 1234 put out an alias 10631 of '.alias foo "foo{1234}" We can't use "foo{1234}" in anything 10632 other than an alias statement because it is an illegal symbol name. */ 10633 10634static tree 10635ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED, 10636 tree name ATTRIBUTE_UNUSED, 10637 tree args, 10638 int flags ATTRIBUTE_UNUSED, 10639 bool *no_add_attrs) 10640{ 10641 tree arg = TREE_VALUE (args); 10642 10643 if (TREE_CODE (arg) != STRING_CST) 10644 { 10645 error("version attribute is not a string"); 10646 *no_add_attrs = true; 10647 return NULL_TREE; 10648 } 10649 return NULL_TREE; 10650} 10651 10652/* Target hook for c_mode_for_suffix. */ 10653 10654static enum machine_mode 10655ia64_c_mode_for_suffix (char suffix) 10656{ 10657 if (suffix == 'q') 10658 return TFmode; 10659 if (suffix == 'w') 10660 return XFmode; 10661 10662 return VOIDmode; 10663} 10664 10665static enum machine_mode 10666ia64_promote_function_mode (const_tree type, 10667 enum machine_mode mode, 10668 int *punsignedp, 10669 const_tree funtype, 10670 int for_return) 10671{ 10672 /* Special processing required for OpenVMS ... */ 10673 10674 if (!TARGET_ABI_OPEN_VMS) 10675 return default_promote_function_mode(type, mode, punsignedp, funtype, 10676 for_return); 10677 10678 /* HP OpenVMS Calling Standard dated June, 2004, that describes 10679 HP OpenVMS I64 Version 8.2EFT, 10680 chapter 4 "OpenVMS I64 Conventions" 10681 section 4.7 "Procedure Linkage" 10682 subsection 4.7.5.2, "Normal Register Parameters" 10683 10684 "Unsigned integral (except unsigned 32-bit), set, and VAX floating-point 10685 values passed in registers are zero-filled; signed integral values as 10686 well as unsigned 32-bit integral values are sign-extended to 64 bits. 10687 For all other types passed in the general registers, unused bits are 10688 undefined." */ 10689 10690 if (!AGGREGATE_TYPE_P (type) 10691 && GET_MODE_CLASS (mode) == MODE_INT 10692 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 10693 { 10694 if (mode == SImode) 10695 *punsignedp = 0; 10696 return DImode; 10697 } 10698 else 10699 return promote_mode (type, mode, punsignedp); 10700} 10701 10702static GTY(()) rtx ia64_dconst_0_5_rtx; 10703 10704rtx 10705ia64_dconst_0_5 (void) 10706{ 10707 if (! ia64_dconst_0_5_rtx) 10708 { 10709 REAL_VALUE_TYPE rv; 10710 real_from_string (&rv, "0.5"); 10711 ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode); 10712 } 10713 return ia64_dconst_0_5_rtx; 10714} 10715 10716static GTY(()) rtx ia64_dconst_0_375_rtx; 10717 10718rtx 10719ia64_dconst_0_375 (void) 10720{ 10721 if (! ia64_dconst_0_375_rtx) 10722 { 10723 REAL_VALUE_TYPE rv; 10724 real_from_string (&rv, "0.375"); 10725 ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode); 10726 } 10727 return ia64_dconst_0_375_rtx; 10728} 10729 10730 10731#include "gt-ia64.h" 10732