ia64.c revision 117395
1/* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 3 Contributed by James E. Wilson <wilson@cygnus.com> and 4 David Mosberger <davidm@hpl.hp.com>. 5 6This file is part of GNU CC. 7 8GNU CC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 2, or (at your option) 11any later version. 12 13GNU CC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GNU CC; see the file COPYING. If not, write to 20the Free Software Foundation, 59 Temple Place - Suite 330, 21Boston, MA 02111-1307, USA. */ 22 23#include "config.h" 24#include "system.h" 25#include "rtl.h" 26#include "tree.h" 27#include "regs.h" 28#include "hard-reg-set.h" 29#include "real.h" 30#include "insn-config.h" 31#include "conditions.h" 32#include "output.h" 33#include "insn-attr.h" 34#include "flags.h" 35#include "recog.h" 36#include "expr.h" 37#include "optabs.h" 38#include "except.h" 39#include "function.h" 40#include "ggc.h" 41#include "basic-block.h" 42#include "toplev.h" 43#include "sched-int.h" 44#include "timevar.h" 45#include "target.h" 46#include "target-def.h" 47#include "tm_p.h" 48#include "langhooks.h" 49 50/* This is used for communication between ASM_OUTPUT_LABEL and 51 ASM_OUTPUT_LABELREF. */ 52int ia64_asm_output_label = 0; 53 54/* Define the information needed to generate branch and scc insns. This is 55 stored from the compare operation. */ 56struct rtx_def * ia64_compare_op0; 57struct rtx_def * ia64_compare_op1; 58 59/* Register names for ia64_expand_prologue. */ 60static const char * const ia64_reg_numbers[96] = 61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 70 "r104","r105","r106","r107","r108","r109","r110","r111", 71 "r112","r113","r114","r115","r116","r117","r118","r119", 72 "r120","r121","r122","r123","r124","r125","r126","r127"}; 73 74/* ??? These strings could be shared with REGISTER_NAMES. */ 75static const char * const ia64_input_reg_names[8] = 76{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 77 78/* ??? These strings could be shared with REGISTER_NAMES. */ 79static const char * const ia64_local_reg_names[80] = 80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 90 91/* ??? These strings could be shared with REGISTER_NAMES. */ 92static const char * const ia64_output_reg_names[8] = 93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 94 95/* String used with the -mfixed-range= option. */ 96const char *ia64_fixed_range_string; 97 98/* Determines whether we use adds, addl, or movl to generate our 99 TLS immediate offsets. */ 100int ia64_tls_size = 22; 101 102/* String used with the -mtls-size= option. */ 103const char *ia64_tls_size_string; 104 105/* Determines whether we run our final scheduling pass or not. We always 106 avoid the normal second scheduling pass. */ 107static int ia64_flag_schedule_insns2; 108 109/* Variables which are this size or smaller are put in the sdata/sbss 110 sections. */ 111 112unsigned int ia64_section_threshold; 113 114/* Structure to be filled in by ia64_compute_frame_size with register 115 save masks and offsets for the current function. */ 116 117struct ia64_frame_info 118{ 119 HOST_WIDE_INT total_size; /* size of the stack frame, not including 120 the caller's scratch area. */ 121 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 122 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 123 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 124 HARD_REG_SET mask; /* mask of saved registers. */ 125 unsigned int gr_used_mask; /* mask of registers in use as gr spill 126 registers or long-term scratches. */ 127 int n_spilled; /* number of spilled registers. */ 128 int reg_fp; /* register for fp. */ 129 int reg_save_b0; /* save register for b0. */ 130 int reg_save_pr; /* save register for prs. */ 131 int reg_save_ar_pfs; /* save register for ar.pfs. */ 132 int reg_save_ar_unat; /* save register for ar.unat. */ 133 int reg_save_ar_lc; /* save register for ar.lc. */ 134 int reg_save_gp; /* save register for gp. */ 135 int n_input_regs; /* number of input registers used. */ 136 int n_local_regs; /* number of local registers used. */ 137 int n_output_regs; /* number of output registers used. */ 138 int n_rotate_regs; /* number of rotating registers used. */ 139 140 char need_regstk; /* true if a .regstk directive needed. */ 141 char initialized; /* true if the data is finalized. */ 142}; 143 144/* Current frame information calculated by ia64_compute_frame_size. */ 145static struct ia64_frame_info current_frame_info; 146 147static rtx gen_tls_get_addr PARAMS ((void)); 148static rtx gen_thread_pointer PARAMS ((void)); 149static int find_gr_spill PARAMS ((int)); 150static int next_scratch_gr_reg PARAMS ((void)); 151static void mark_reg_gr_used_mask PARAMS ((rtx, void *)); 152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT)); 153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT)); 154static void finish_spill_pointers PARAMS ((void)); 155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT)); 156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx)); 157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT)); 158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx)); 159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx)); 160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx)); 161 162static enum machine_mode hfa_element_mode PARAMS ((tree, int)); 163static void fix_range PARAMS ((const char *)); 164static struct machine_function * ia64_init_machine_status PARAMS ((void)); 165static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); 166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); 167static void emit_predicate_relation_info PARAMS ((void)); 168static bool ia64_in_small_data_p PARAMS ((tree)); 169static void ia64_encode_section_info PARAMS ((tree, int)); 170static const char *ia64_strip_name_encoding PARAMS ((const char *)); 171static void process_epilogue PARAMS ((void)); 172static int process_set PARAMS ((FILE *, rtx)); 173 174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, 175 tree, rtx)); 176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, 177 tree, rtx)); 178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int, 179 tree, rtx)); 180static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, 181 tree, rtx)); 182static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); 183static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int)); 184static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT)); 185static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); 186static void ia64_output_function_end_prologue PARAMS ((FILE *)); 187 188static int ia64_issue_rate PARAMS ((void)); 189static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 190static void ia64_sched_init PARAMS ((FILE *, int, int)); 191static void ia64_sched_finish PARAMS ((FILE *, int)); 192static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, 193 int *, int, int)); 194static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 195static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); 196static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); 197 198static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, 199 HOST_WIDE_INT, tree)); 200 201static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx, 202 unsigned HOST_WIDE_INT)); 203static void ia64_rwreloc_select_section PARAMS ((tree, int, 204 unsigned HOST_WIDE_INT)) 205 ATTRIBUTE_UNUSED; 206static void ia64_rwreloc_unique_section PARAMS ((tree, int)) 207 ATTRIBUTE_UNUSED; 208static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx, 209 unsigned HOST_WIDE_INT)) 210 ATTRIBUTE_UNUSED; 211static unsigned int ia64_rwreloc_section_type_flags 212 PARAMS ((tree, const char *, int)) 213 ATTRIBUTE_UNUSED; 214 215static void ia64_hpux_add_extern_decl PARAMS ((const char *name)) 216 ATTRIBUTE_UNUSED; 217 218/* Table of valid machine attributes. */ 219static const struct attribute_spec ia64_attribute_table[] = 220{ 221 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 222 { "syscall_linkage", 0, 0, false, true, true, NULL }, 223 { NULL, 0, 0, false, false, false, NULL } 224}; 225 226/* Initialize the GCC target structure. */ 227#undef TARGET_ATTRIBUTE_TABLE 228#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 229 230#undef TARGET_INIT_BUILTINS 231#define TARGET_INIT_BUILTINS ia64_init_builtins 232 233#undef TARGET_EXPAND_BUILTIN 234#define TARGET_EXPAND_BUILTIN ia64_expand_builtin 235 236#undef TARGET_ASM_BYTE_OP 237#define TARGET_ASM_BYTE_OP "\tdata1\t" 238#undef TARGET_ASM_ALIGNED_HI_OP 239#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 240#undef TARGET_ASM_ALIGNED_SI_OP 241#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 242#undef TARGET_ASM_ALIGNED_DI_OP 243#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 244#undef TARGET_ASM_UNALIGNED_HI_OP 245#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 246#undef TARGET_ASM_UNALIGNED_SI_OP 247#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 248#undef TARGET_ASM_UNALIGNED_DI_OP 249#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 250#undef TARGET_ASM_INTEGER 251#define TARGET_ASM_INTEGER ia64_assemble_integer 252 253#undef TARGET_ASM_FUNCTION_PROLOGUE 254#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 255#undef TARGET_ASM_FUNCTION_END_PROLOGUE 256#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 257#undef TARGET_ASM_FUNCTION_EPILOGUE 258#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 259 260#undef TARGET_IN_SMALL_DATA_P 261#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 262#undef TARGET_ENCODE_SECTION_INFO 263#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 264#undef TARGET_STRIP_NAME_ENCODING 265#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding 266 267#undef TARGET_SCHED_ADJUST_COST 268#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost 269#undef TARGET_SCHED_ISSUE_RATE 270#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 271#undef TARGET_SCHED_VARIABLE_ISSUE 272#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 273#undef TARGET_SCHED_INIT 274#define TARGET_SCHED_INIT ia64_sched_init 275#undef TARGET_SCHED_FINISH 276#define TARGET_SCHED_FINISH ia64_sched_finish 277#undef TARGET_SCHED_REORDER 278#define TARGET_SCHED_REORDER ia64_sched_reorder 279#undef TARGET_SCHED_REORDER2 280#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 281 282#ifdef HAVE_AS_TLS 283#undef TARGET_HAVE_TLS 284#define TARGET_HAVE_TLS true 285#endif 286 287#undef TARGET_ASM_OUTPUT_MI_THUNK 288#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 289#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 290#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true 291 292struct gcc_target targetm = TARGET_INITIALIZER; 293 294/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ 295 296int 297call_operand (op, mode) 298 rtx op; 299 enum machine_mode mode; 300{ 301 if (mode != GET_MODE (op) && mode != VOIDmode) 302 return 0; 303 304 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG 305 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); 306} 307 308/* Return 1 if OP refers to a symbol in the sdata section. */ 309 310int 311sdata_symbolic_operand (op, mode) 312 rtx op; 313 enum machine_mode mode ATTRIBUTE_UNUSED; 314{ 315 switch (GET_CODE (op)) 316 { 317 case CONST: 318 if (GET_CODE (XEXP (op, 0)) != PLUS 319 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF) 320 break; 321 op = XEXP (XEXP (op, 0), 0); 322 /* FALLTHRU */ 323 324 case SYMBOL_REF: 325 if (CONSTANT_POOL_ADDRESS_P (op)) 326 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold; 327 else 328 { 329 const char *str = XSTR (op, 0); 330 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's'); 331 } 332 333 default: 334 break; 335 } 336 337 return 0; 338} 339 340/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */ 341 342int 343got_symbolic_operand (op, mode) 344 rtx op; 345 enum machine_mode mode ATTRIBUTE_UNUSED; 346{ 347 switch (GET_CODE (op)) 348 { 349 case CONST: 350 op = XEXP (op, 0); 351 if (GET_CODE (op) != PLUS) 352 return 0; 353 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF) 354 return 0; 355 op = XEXP (op, 1); 356 if (GET_CODE (op) != CONST_INT) 357 return 0; 358 359 return 1; 360 361 /* Ok if we're not using GOT entries at all. */ 362 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 363 return 1; 364 365 /* "Ok" while emitting rtl, since otherwise we won't be provided 366 with the entire offset during emission, which makes it very 367 hard to split the offset into high and low parts. */ 368 if (rtx_equal_function_value_matters) 369 return 1; 370 371 /* Force the low 14 bits of the constant to zero so that we do not 372 use up so many GOT entries. */ 373 return (INTVAL (op) & 0x3fff) == 0; 374 375 case SYMBOL_REF: 376 case LABEL_REF: 377 return 1; 378 379 default: 380 break; 381 } 382 return 0; 383} 384 385/* Return 1 if OP refers to a symbol. */ 386 387int 388symbolic_operand (op, mode) 389 rtx op; 390 enum machine_mode mode ATTRIBUTE_UNUSED; 391{ 392 switch (GET_CODE (op)) 393 { 394 case CONST: 395 case SYMBOL_REF: 396 case LABEL_REF: 397 return 1; 398 399 default: 400 break; 401 } 402 return 0; 403} 404 405/* Return tls_model if OP refers to a TLS symbol. */ 406 407int 408tls_symbolic_operand (op, mode) 409 rtx op; 410 enum machine_mode mode ATTRIBUTE_UNUSED; 411{ 412 const char *str; 413 414 if (GET_CODE (op) != SYMBOL_REF) 415 return 0; 416 str = XSTR (op, 0); 417 if (str[0] != ENCODE_SECTION_INFO_CHAR) 418 return 0; 419 switch (str[1]) 420 { 421 case 'G': 422 return TLS_MODEL_GLOBAL_DYNAMIC; 423 case 'L': 424 return TLS_MODEL_LOCAL_DYNAMIC; 425 case 'i': 426 return TLS_MODEL_INITIAL_EXEC; 427 case 'l': 428 return TLS_MODEL_LOCAL_EXEC; 429 } 430 return 0; 431} 432 433 434/* Return 1 if OP refers to a function. */ 435 436int 437function_operand (op, mode) 438 rtx op; 439 enum machine_mode mode ATTRIBUTE_UNUSED; 440{ 441 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) 442 return 1; 443 else 444 return 0; 445} 446 447/* Return 1 if OP is setjmp or a similar function. */ 448 449/* ??? This is an unsatisfying solution. Should rethink. */ 450 451int 452setjmp_operand (op, mode) 453 rtx op; 454 enum machine_mode mode ATTRIBUTE_UNUSED; 455{ 456 const char *name; 457 int retval = 0; 458 459 if (GET_CODE (op) != SYMBOL_REF) 460 return 0; 461 462 name = XSTR (op, 0); 463 464 /* The following code is borrowed from special_function_p in calls.c. */ 465 466 /* Disregard prefix _, __ or __x. */ 467 if (name[0] == '_') 468 { 469 if (name[1] == '_' && name[2] == 'x') 470 name += 3; 471 else if (name[1] == '_') 472 name += 2; 473 else 474 name += 1; 475 } 476 477 if (name[0] == 's') 478 { 479 retval 480 = ((name[1] == 'e' 481 && (! strcmp (name, "setjmp") 482 || ! strcmp (name, "setjmp_syscall"))) 483 || (name[1] == 'i' 484 && ! strcmp (name, "sigsetjmp")) 485 || (name[1] == 'a' 486 && ! strcmp (name, "savectx"))); 487 } 488 else if ((name[0] == 'q' && name[1] == 's' 489 && ! strcmp (name, "qsetjmp")) 490 || (name[0] == 'v' && name[1] == 'f' 491 && ! strcmp (name, "vfork"))) 492 retval = 1; 493 494 return retval; 495} 496 497/* Return 1 if OP is a general operand, but when pic exclude symbolic 498 operands. */ 499 500/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF 501 from PREDICATE_CODES. */ 502 503int 504move_operand (op, mode) 505 rtx op; 506 enum machine_mode mode; 507{ 508 if (! TARGET_NO_PIC && symbolic_operand (op, mode)) 509 return 0; 510 511 return general_operand (op, mode); 512} 513 514/* Return 1 if OP is a register operand that is (or could be) a GR reg. */ 515 516int 517gr_register_operand (op, mode) 518 rtx op; 519 enum machine_mode mode; 520{ 521 if (! register_operand (op, mode)) 522 return 0; 523 if (GET_CODE (op) == SUBREG) 524 op = SUBREG_REG (op); 525 if (GET_CODE (op) == REG) 526 { 527 unsigned int regno = REGNO (op); 528 if (regno < FIRST_PSEUDO_REGISTER) 529 return GENERAL_REGNO_P (regno); 530 } 531 return 1; 532} 533 534/* Return 1 if OP is a register operand that is (or could be) an FR reg. */ 535 536int 537fr_register_operand (op, mode) 538 rtx op; 539 enum machine_mode mode; 540{ 541 if (! register_operand (op, mode)) 542 return 0; 543 if (GET_CODE (op) == SUBREG) 544 op = SUBREG_REG (op); 545 if (GET_CODE (op) == REG) 546 { 547 unsigned int regno = REGNO (op); 548 if (regno < FIRST_PSEUDO_REGISTER) 549 return FR_REGNO_P (regno); 550 } 551 return 1; 552} 553 554/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */ 555 556int 557grfr_register_operand (op, mode) 558 rtx op; 559 enum machine_mode mode; 560{ 561 if (! register_operand (op, mode)) 562 return 0; 563 if (GET_CODE (op) == SUBREG) 564 op = SUBREG_REG (op); 565 if (GET_CODE (op) == REG) 566 { 567 unsigned int regno = REGNO (op); 568 if (regno < FIRST_PSEUDO_REGISTER) 569 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 570 } 571 return 1; 572} 573 574/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */ 575 576int 577gr_nonimmediate_operand (op, mode) 578 rtx op; 579 enum machine_mode mode; 580{ 581 if (! nonimmediate_operand (op, mode)) 582 return 0; 583 if (GET_CODE (op) == SUBREG) 584 op = SUBREG_REG (op); 585 if (GET_CODE (op) == REG) 586 { 587 unsigned int regno = REGNO (op); 588 if (regno < FIRST_PSEUDO_REGISTER) 589 return GENERAL_REGNO_P (regno); 590 } 591 return 1; 592} 593 594/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */ 595 596int 597fr_nonimmediate_operand (op, mode) 598 rtx op; 599 enum machine_mode mode; 600{ 601 if (! nonimmediate_operand (op, mode)) 602 return 0; 603 if (GET_CODE (op) == SUBREG) 604 op = SUBREG_REG (op); 605 if (GET_CODE (op) == REG) 606 { 607 unsigned int regno = REGNO (op); 608 if (regno < FIRST_PSEUDO_REGISTER) 609 return FR_REGNO_P (regno); 610 } 611 return 1; 612} 613 614/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */ 615 616int 617grfr_nonimmediate_operand (op, mode) 618 rtx op; 619 enum machine_mode mode; 620{ 621 if (! nonimmediate_operand (op, mode)) 622 return 0; 623 if (GET_CODE (op) == SUBREG) 624 op = SUBREG_REG (op); 625 if (GET_CODE (op) == REG) 626 { 627 unsigned int regno = REGNO (op); 628 if (regno < FIRST_PSEUDO_REGISTER) 629 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 630 } 631 return 1; 632} 633 634/* Return 1 if OP is a GR register operand, or zero. */ 635 636int 637gr_reg_or_0_operand (op, mode) 638 rtx op; 639 enum machine_mode mode; 640{ 641 return (op == const0_rtx || gr_register_operand (op, mode)); 642} 643 644/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */ 645 646int 647gr_reg_or_5bit_operand (op, mode) 648 rtx op; 649 enum machine_mode mode; 650{ 651 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32) 652 || GET_CODE (op) == CONSTANT_P_RTX 653 || gr_register_operand (op, mode)); 654} 655 656/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */ 657 658int 659gr_reg_or_6bit_operand (op, mode) 660 rtx op; 661 enum machine_mode mode; 662{ 663 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 664 || GET_CODE (op) == CONSTANT_P_RTX 665 || gr_register_operand (op, mode)); 666} 667 668/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */ 669 670int 671gr_reg_or_8bit_operand (op, mode) 672 rtx op; 673 enum machine_mode mode; 674{ 675 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 676 || GET_CODE (op) == CONSTANT_P_RTX 677 || gr_register_operand (op, mode)); 678} 679 680/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */ 681 682int 683grfr_reg_or_8bit_operand (op, mode) 684 rtx op; 685 enum machine_mode mode; 686{ 687 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 688 || GET_CODE (op) == CONSTANT_P_RTX 689 || grfr_register_operand (op, mode)); 690} 691 692/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate 693 operand. */ 694 695int 696gr_reg_or_8bit_adjusted_operand (op, mode) 697 rtx op; 698 enum machine_mode mode; 699{ 700 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) 701 || GET_CODE (op) == CONSTANT_P_RTX 702 || gr_register_operand (op, mode)); 703} 704 705/* Return 1 if OP is a register operand, or is valid for both an 8 bit 706 immediate and an 8 bit adjusted immediate operand. This is necessary 707 because when we emit a compare, we don't know what the condition will be, 708 so we need the union of the immediates accepted by GT and LT. */ 709 710int 711gr_reg_or_8bit_and_adjusted_operand (op, mode) 712 rtx op; 713 enum machine_mode mode; 714{ 715 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) 716 && CONST_OK_FOR_L (INTVAL (op))) 717 || GET_CODE (op) == CONSTANT_P_RTX 718 || gr_register_operand (op, mode)); 719} 720 721/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ 722 723int 724gr_reg_or_14bit_operand (op, mode) 725 rtx op; 726 enum machine_mode mode; 727{ 728 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) 729 || GET_CODE (op) == CONSTANT_P_RTX 730 || gr_register_operand (op, mode)); 731} 732 733/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ 734 735int 736gr_reg_or_22bit_operand (op, mode) 737 rtx op; 738 enum machine_mode mode; 739{ 740 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) 741 || GET_CODE (op) == CONSTANT_P_RTX 742 || gr_register_operand (op, mode)); 743} 744 745/* Return 1 if OP is a 6 bit immediate operand. */ 746 747int 748shift_count_operand (op, mode) 749 rtx op; 750 enum machine_mode mode ATTRIBUTE_UNUSED; 751{ 752 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 753 || GET_CODE (op) == CONSTANT_P_RTX); 754} 755 756/* Return 1 if OP is a 5 bit immediate operand. */ 757 758int 759shift_32bit_count_operand (op, mode) 760 rtx op; 761 enum machine_mode mode ATTRIBUTE_UNUSED; 762{ 763 return ((GET_CODE (op) == CONST_INT 764 && (INTVAL (op) >= 0 && INTVAL (op) < 32)) 765 || GET_CODE (op) == CONSTANT_P_RTX); 766} 767 768/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ 769 770int 771shladd_operand (op, mode) 772 rtx op; 773 enum machine_mode mode ATTRIBUTE_UNUSED; 774{ 775 return (GET_CODE (op) == CONST_INT 776 && (INTVAL (op) == 2 || INTVAL (op) == 4 777 || INTVAL (op) == 8 || INTVAL (op) == 16)); 778} 779 780/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ 781 782int 783fetchadd_operand (op, mode) 784 rtx op; 785 enum machine_mode mode ATTRIBUTE_UNUSED; 786{ 787 return (GET_CODE (op) == CONST_INT 788 && (INTVAL (op) == -16 || INTVAL (op) == -8 || 789 INTVAL (op) == -4 || INTVAL (op) == -1 || 790 INTVAL (op) == 1 || INTVAL (op) == 4 || 791 INTVAL (op) == 8 || INTVAL (op) == 16)); 792} 793 794/* Return 1 if OP is a floating-point constant zero, one, or a register. */ 795 796int 797fr_reg_or_fp01_operand (op, mode) 798 rtx op; 799 enum machine_mode mode; 800{ 801 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) 802 || fr_register_operand (op, mode)); 803} 804 805/* Like nonimmediate_operand, but don't allow MEMs that try to use a 806 POST_MODIFY with a REG as displacement. */ 807 808int 809destination_operand (op, mode) 810 rtx op; 811 enum machine_mode mode; 812{ 813 if (! nonimmediate_operand (op, mode)) 814 return 0; 815 if (GET_CODE (op) == MEM 816 && GET_CODE (XEXP (op, 0)) == POST_MODIFY 817 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG) 818 return 0; 819 return 1; 820} 821 822/* Like memory_operand, but don't allow post-increments. */ 823 824int 825not_postinc_memory_operand (op, mode) 826 rtx op; 827 enum machine_mode mode; 828{ 829 return (memory_operand (op, mode) 830 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a'); 831} 832 833/* Return 1 if this is a comparison operator, which accepts an normal 8-bit 834 signed immediate operand. */ 835 836int 837normal_comparison_operator (op, mode) 838 register rtx op; 839 enum machine_mode mode; 840{ 841 enum rtx_code code = GET_CODE (op); 842 return ((mode == VOIDmode || GET_MODE (op) == mode) 843 && (code == EQ || code == NE 844 || code == GT || code == LE || code == GTU || code == LEU)); 845} 846 847/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit 848 signed immediate operand. */ 849 850int 851adjusted_comparison_operator (op, mode) 852 register rtx op; 853 enum machine_mode mode; 854{ 855 enum rtx_code code = GET_CODE (op); 856 return ((mode == VOIDmode || GET_MODE (op) == mode) 857 && (code == LT || code == GE || code == LTU || code == GEU)); 858} 859 860/* Return 1 if this is a signed inequality operator. */ 861 862int 863signed_inequality_operator (op, mode) 864 register rtx op; 865 enum machine_mode mode; 866{ 867 enum rtx_code code = GET_CODE (op); 868 return ((mode == VOIDmode || GET_MODE (op) == mode) 869 && (code == GE || code == GT 870 || code == LE || code == LT)); 871} 872 873/* Return 1 if this operator is valid for predication. */ 874 875int 876predicate_operator (op, mode) 877 register rtx op; 878 enum machine_mode mode; 879{ 880 enum rtx_code code = GET_CODE (op); 881 return ((GET_MODE (op) == mode || mode == VOIDmode) 882 && (code == EQ || code == NE)); 883} 884 885/* Return 1 if this operator can be used in a conditional operation. */ 886 887int 888condop_operator (op, mode) 889 register rtx op; 890 enum machine_mode mode; 891{ 892 enum rtx_code code = GET_CODE (op); 893 return ((GET_MODE (op) == mode || mode == VOIDmode) 894 && (code == PLUS || code == MINUS || code == AND 895 || code == IOR || code == XOR)); 896} 897 898/* Return 1 if this is the ar.lc register. */ 899 900int 901ar_lc_reg_operand (op, mode) 902 register rtx op; 903 enum machine_mode mode; 904{ 905 return (GET_MODE (op) == DImode 906 && (mode == DImode || mode == VOIDmode) 907 && GET_CODE (op) == REG 908 && REGNO (op) == AR_LC_REGNUM); 909} 910 911/* Return 1 if this is the ar.ccv register. */ 912 913int 914ar_ccv_reg_operand (op, mode) 915 register rtx op; 916 enum machine_mode mode; 917{ 918 return ((GET_MODE (op) == mode || mode == VOIDmode) 919 && GET_CODE (op) == REG 920 && REGNO (op) == AR_CCV_REGNUM); 921} 922 923/* Return 1 if this is the ar.pfs register. */ 924 925int 926ar_pfs_reg_operand (op, mode) 927 register rtx op; 928 enum machine_mode mode; 929{ 930 return ((GET_MODE (op) == mode || mode == VOIDmode) 931 && GET_CODE (op) == REG 932 && REGNO (op) == AR_PFS_REGNUM); 933} 934 935/* Like general_operand, but don't allow (mem (addressof)). */ 936 937int 938general_tfmode_operand (op, mode) 939 rtx op; 940 enum machine_mode mode; 941{ 942 if (! general_operand (op, mode)) 943 return 0; 944 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 945 return 0; 946 return 1; 947} 948 949/* Similarly. */ 950 951int 952destination_tfmode_operand (op, mode) 953 rtx op; 954 enum machine_mode mode; 955{ 956 if (! destination_operand (op, mode)) 957 return 0; 958 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 959 return 0; 960 return 1; 961} 962 963/* Similarly. */ 964 965int 966tfreg_or_fp01_operand (op, mode) 967 rtx op; 968 enum machine_mode mode; 969{ 970 if (GET_CODE (op) == SUBREG) 971 return 0; 972 return fr_reg_or_fp01_operand (op, mode); 973} 974 975/* Return 1 if OP is valid as a base register in a reg + offset address. */ 976 977int 978basereg_operand (op, mode) 979 rtx op; 980 enum machine_mode mode; 981{ 982 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected 983 checks from pa.c basereg_operand as well? Seems to be OK without them 984 in test runs. */ 985 986 return (register_operand (op, mode) && 987 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op)); 988} 989 990/* Return 1 if the operands of a move are ok. */ 991 992int 993ia64_move_ok (dst, src) 994 rtx dst, src; 995{ 996 /* If we're under init_recog_no_volatile, we'll not be able to use 997 memory_operand. So check the code directly and don't worry about 998 the validity of the underlying address, which should have been 999 checked elsewhere anyway. */ 1000 if (GET_CODE (dst) != MEM) 1001 return 1; 1002 if (GET_CODE (src) == MEM) 1003 return 0; 1004 if (register_operand (src, VOIDmode)) 1005 return 1; 1006 1007 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 1008 if (INTEGRAL_MODE_P (GET_MODE (dst))) 1009 return src == const0_rtx; 1010 else 1011 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); 1012} 1013 1014/* Return 0 if we are doing C++ code. This optimization fails with 1015 C++ because of GNAT c++/6685. */ 1016 1017int 1018addp4_optimize_ok (op1, op2) 1019 rtx op1, op2; 1020{ 1021 1022 if (!strcmp (lang_hooks.name, "GNU C++")) 1023 return 0; 1024 1025 return (basereg_operand (op1, GET_MODE(op1)) != 1026 basereg_operand (op2, GET_MODE(op2))); 1027} 1028 1029/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction. 1030 Return the length of the field, or <= 0 on failure. */ 1031 1032int 1033ia64_depz_field_mask (rop, rshift) 1034 rtx rop, rshift; 1035{ 1036 unsigned HOST_WIDE_INT op = INTVAL (rop); 1037 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 1038 1039 /* Get rid of the zero bits we're shifting in. */ 1040 op >>= shift; 1041 1042 /* We must now have a solid block of 1's at bit 0. */ 1043 return exact_log2 (op + 1); 1044} 1045 1046/* Expand a symbolic constant load. */ 1047/* ??? Should generalize this, so that we can also support 32 bit pointers. */ 1048 1049void 1050ia64_expand_load_address (dest, src, scratch) 1051 rtx dest, src, scratch; 1052{ 1053 rtx temp; 1054 1055 /* The destination could be a MEM during initial rtl generation, 1056 which isn't a valid destination for the PIC load address patterns. */ 1057 if (! register_operand (dest, DImode)) 1058 if (! scratch || ! register_operand (scratch, DImode)) 1059 temp = gen_reg_rtx (DImode); 1060 else 1061 temp = scratch; 1062 else 1063 temp = dest; 1064 1065 if (tls_symbolic_operand (src, Pmode)) 1066 abort (); 1067 1068 if (TARGET_AUTO_PIC) 1069 emit_insn (gen_load_gprel64 (temp, src)); 1070 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src)) 1071 emit_insn (gen_load_fptr (temp, src)); 1072 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode) 1073 && sdata_symbolic_operand (src, VOIDmode)) 1074 emit_insn (gen_load_gprel (temp, src)); 1075 else if (GET_CODE (src) == CONST 1076 && GET_CODE (XEXP (src, 0)) == PLUS 1077 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT 1078 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0) 1079 { 1080 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1081 rtx sym = XEXP (XEXP (src, 0), 0); 1082 HOST_WIDE_INT ofs, hi, lo; 1083 1084 /* Split the offset into a sign extended 14-bit low part 1085 and a complementary high part. */ 1086 ofs = INTVAL (XEXP (XEXP (src, 0), 1)); 1087 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000; 1088 hi = ofs - lo; 1089 1090 if (! scratch) 1091 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode); 1092 1093 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi), 1094 scratch)); 1095 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); 1096 } 1097 else 1098 { 1099 rtx insn; 1100 if (! scratch) 1101 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1102 1103 insn = emit_insn (gen_load_symptr (temp, src, scratch)); 1104#ifdef POINTERS_EXTEND_UNSIGNED 1105 if (GET_MODE (temp) != GET_MODE (src)) 1106 src = convert_memory_address (GET_MODE (temp), src); 1107#endif 1108 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn)); 1109 } 1110 1111 if (temp != dest) 1112 { 1113 if (GET_MODE (dest) != GET_MODE (temp)) 1114 temp = convert_to_mode (GET_MODE (dest), temp, 0); 1115 emit_move_insn (dest, temp); 1116 } 1117} 1118 1119static GTY(()) rtx gen_tls_tga; 1120static rtx 1121gen_tls_get_addr () 1122{ 1123 if (!gen_tls_tga) 1124 { 1125 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1126 } 1127 return gen_tls_tga; 1128} 1129 1130static GTY(()) rtx thread_pointer_rtx; 1131static rtx 1132gen_thread_pointer () 1133{ 1134 if (!thread_pointer_rtx) 1135 { 1136 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1137 RTX_UNCHANGING_P (thread_pointer_rtx) = 1; 1138 } 1139 return thread_pointer_rtx; 1140} 1141 1142rtx 1143ia64_expand_move (op0, op1) 1144 rtx op0, op1; 1145{ 1146 enum machine_mode mode = GET_MODE (op0); 1147 1148 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1149 op1 = force_reg (mode, op1); 1150 1151 if (mode == Pmode || mode == ptr_mode) 1152 { 1153 enum tls_model tls_kind; 1154 if ((tls_kind = tls_symbolic_operand (op1, Pmode))) 1155 { 1156 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; 1157 1158 switch (tls_kind) 1159 { 1160 case TLS_MODEL_GLOBAL_DYNAMIC: 1161 start_sequence (); 1162 1163 tga_op1 = gen_reg_rtx (Pmode); 1164 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1165 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1166 RTX_UNCHANGING_P (tga_op1) = 1; 1167 1168 tga_op2 = gen_reg_rtx (Pmode); 1169 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1)); 1170 tga_op2 = gen_rtx_MEM (Pmode, tga_op2); 1171 RTX_UNCHANGING_P (tga_op2) = 1; 1172 1173 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1174 LCT_CONST, Pmode, 2, tga_op1, 1175 Pmode, tga_op2, Pmode); 1176 1177 insns = get_insns (); 1178 end_sequence (); 1179 1180 emit_libcall_block (insns, op0, tga_ret, op1); 1181 return NULL_RTX; 1182 1183 case TLS_MODEL_LOCAL_DYNAMIC: 1184 /* ??? This isn't the completely proper way to do local-dynamic 1185 If the call to __tls_get_addr is used only by a single symbol, 1186 then we should (somehow) move the dtprel to the second arg 1187 to avoid the extra add. */ 1188 start_sequence (); 1189 1190 tga_op1 = gen_reg_rtx (Pmode); 1191 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1192 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1193 RTX_UNCHANGING_P (tga_op1) = 1; 1194 1195 tga_op2 = const0_rtx; 1196 1197 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1198 LCT_CONST, Pmode, 2, tga_op1, 1199 Pmode, tga_op2, Pmode); 1200 1201 insns = get_insns (); 1202 end_sequence (); 1203 1204 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1205 UNSPEC_LD_BASE); 1206 tmp = gen_reg_rtx (Pmode); 1207 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1208 1209 if (register_operand (op0, Pmode)) 1210 tga_ret = op0; 1211 else 1212 tga_ret = gen_reg_rtx (Pmode); 1213 if (TARGET_TLS64) 1214 { 1215 emit_insn (gen_load_dtprel (tga_ret, op1)); 1216 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret)); 1217 } 1218 else 1219 emit_insn (gen_add_dtprel (tga_ret, tmp, op1)); 1220 if (tga_ret == op0) 1221 return NULL_RTX; 1222 op1 = tga_ret; 1223 break; 1224 1225 case TLS_MODEL_INITIAL_EXEC: 1226 tmp = gen_reg_rtx (Pmode); 1227 emit_insn (gen_load_ltoff_tprel (tmp, op1)); 1228 tmp = gen_rtx_MEM (Pmode, tmp); 1229 RTX_UNCHANGING_P (tmp) = 1; 1230 tmp = force_reg (Pmode, tmp); 1231 1232 if (register_operand (op0, Pmode)) 1233 op1 = op0; 1234 else 1235 op1 = gen_reg_rtx (Pmode); 1236 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ())); 1237 if (op1 == op0) 1238 return NULL_RTX; 1239 break; 1240 1241 case TLS_MODEL_LOCAL_EXEC: 1242 if (register_operand (op0, Pmode)) 1243 tmp = op0; 1244 else 1245 tmp = gen_reg_rtx (Pmode); 1246 if (TARGET_TLS64) 1247 { 1248 emit_insn (gen_load_tprel (tmp, op1)); 1249 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp)); 1250 } 1251 else 1252 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1)); 1253 if (tmp == op0) 1254 return NULL_RTX; 1255 op1 = tmp; 1256 break; 1257 1258 default: 1259 abort (); 1260 } 1261 } 1262 else if (!TARGET_NO_PIC && 1263 (symbolic_operand (op1, Pmode) || 1264 symbolic_operand (op1, ptr_mode))) 1265 { 1266 /* Before optimization starts, delay committing to any particular 1267 type of PIC address load. If this function gets deferred, we 1268 may acquire information that changes the value of the 1269 sdata_symbolic_operand predicate. 1270 1271 But don't delay for function pointers. Loading a function address 1272 actually loads the address of the descriptor not the function. 1273 If we represent these as SYMBOL_REFs, then they get cse'd with 1274 calls, and we end up with calls to the descriptor address instead 1275 of calls to the function address. Functions are not candidates 1276 for sdata anyways. 1277 1278 Don't delay for LABEL_REF because the splitter loses REG_LABEL 1279 notes. Don't delay for pool addresses on general principals; 1280 they'll never become non-local behind our back. */ 1281 1282 if (rtx_equal_function_value_matters 1283 && GET_CODE (op1) != LABEL_REF 1284 && ! (GET_CODE (op1) == SYMBOL_REF 1285 && (SYMBOL_REF_FLAG (op1) 1286 || CONSTANT_POOL_ADDRESS_P (op1) 1287 || STRING_POOL_ADDRESS_P (op1)))) 1288 if (GET_MODE (op1) == DImode) 1289 emit_insn (gen_movdi_symbolic (op0, op1)); 1290 else 1291 emit_insn (gen_movsi_symbolic (op0, op1)); 1292 else 1293 ia64_expand_load_address (op0, op1, NULL_RTX); 1294 return NULL_RTX; 1295 } 1296 } 1297 1298 return op1; 1299} 1300 1301/* Split a post-reload TImode reference into two DImode components. */ 1302 1303rtx 1304ia64_split_timode (out, in, scratch) 1305 rtx out[2]; 1306 rtx in, scratch; 1307{ 1308 switch (GET_CODE (in)) 1309 { 1310 case REG: 1311 out[0] = gen_rtx_REG (DImode, REGNO (in)); 1312 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1); 1313 return NULL_RTX; 1314 1315 case MEM: 1316 { 1317 rtx base = XEXP (in, 0); 1318 1319 switch (GET_CODE (base)) 1320 { 1321 case REG: 1322 out[0] = adjust_address (in, DImode, 0); 1323 break; 1324 case POST_MODIFY: 1325 base = XEXP (base, 0); 1326 out[0] = adjust_address (in, DImode, 0); 1327 break; 1328 1329 /* Since we're changing the mode, we need to change to POST_MODIFY 1330 as well to preserve the size of the increment. Either that or 1331 do the update in two steps, but we've already got this scratch 1332 register handy so let's use it. */ 1333 case POST_INC: 1334 base = XEXP (base, 0); 1335 out[0] 1336 = change_address (in, DImode, 1337 gen_rtx_POST_MODIFY 1338 (Pmode, base, plus_constant (base, 16))); 1339 break; 1340 case POST_DEC: 1341 base = XEXP (base, 0); 1342 out[0] 1343 = change_address (in, DImode, 1344 gen_rtx_POST_MODIFY 1345 (Pmode, base, plus_constant (base, -16))); 1346 break; 1347 default: 1348 abort (); 1349 } 1350 1351 if (scratch == NULL_RTX) 1352 abort (); 1353 out[1] = change_address (in, DImode, scratch); 1354 return gen_adddi3 (scratch, base, GEN_INT (8)); 1355 } 1356 1357 case CONST_INT: 1358 case CONST_DOUBLE: 1359 split_double (in, &out[0], &out[1]); 1360 return NULL_RTX; 1361 1362 default: 1363 abort (); 1364 } 1365} 1366 1367/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go 1368 through memory plus an extra GR scratch register. Except that you can 1369 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1370 SECONDARY_RELOAD_CLASS, but not both. 1371 1372 We got into problems in the first place by allowing a construct like 1373 (subreg:TF (reg:TI)), which we got from a union containing a long double. 1374 This solution attempts to prevent this situation from occurring. When 1375 we see something like the above, we spill the inner register to memory. */ 1376 1377rtx 1378spill_tfmode_operand (in, force) 1379 rtx in; 1380 int force; 1381{ 1382 if (GET_CODE (in) == SUBREG 1383 && GET_MODE (SUBREG_REG (in)) == TImode 1384 && GET_CODE (SUBREG_REG (in)) == REG) 1385 { 1386 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true); 1387 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1388 } 1389 else if (force && GET_CODE (in) == REG) 1390 { 1391 rtx mem = gen_mem_addressof (in, NULL_TREE, true); 1392 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1393 } 1394 else if (GET_CODE (in) == MEM 1395 && GET_CODE (XEXP (in, 0)) == ADDRESSOF) 1396 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0))); 1397 else 1398 return in; 1399} 1400 1401/* Emit comparison instruction if necessary, returning the expression 1402 that holds the compare result in the proper mode. */ 1403 1404rtx 1405ia64_expand_compare (code, mode) 1406 enum rtx_code code; 1407 enum machine_mode mode; 1408{ 1409 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1; 1410 rtx cmp; 1411 1412 /* If we have a BImode input, then we already have a compare result, and 1413 do not need to emit another comparison. */ 1414 if (GET_MODE (op0) == BImode) 1415 { 1416 if ((code == NE || code == EQ) && op1 == const0_rtx) 1417 cmp = op0; 1418 else 1419 abort (); 1420 } 1421 else 1422 { 1423 cmp = gen_reg_rtx (BImode); 1424 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1425 gen_rtx_fmt_ee (code, BImode, op0, op1))); 1426 code = NE; 1427 } 1428 1429 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); 1430} 1431 1432/* Emit the appropriate sequence for a call. */ 1433void 1434ia64_expand_call (retval, addr, nextarg, sibcall_p) 1435 rtx retval; 1436 rtx addr; 1437 rtx nextarg ATTRIBUTE_UNUSED; 1438 int sibcall_p; 1439{ 1440 rtx insn, b0; 1441 1442 addr = XEXP (addr, 0); 1443 b0 = gen_rtx_REG (DImode, R_BR (0)); 1444 1445 /* ??? Should do this for functions known to bind local too. */ 1446 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 1447 { 1448 if (sibcall_p) 1449 insn = gen_sibcall_nogp (addr); 1450 else if (! retval) 1451 insn = gen_call_nogp (addr, b0); 1452 else 1453 insn = gen_call_value_nogp (retval, addr, b0); 1454 insn = emit_call_insn (insn); 1455 } 1456 else 1457 { 1458 if (sibcall_p) 1459 insn = gen_sibcall_gp (addr); 1460 else if (! retval) 1461 insn = gen_call_gp (addr, b0); 1462 else 1463 insn = gen_call_value_gp (retval, addr, b0); 1464 insn = emit_call_insn (insn); 1465 1466 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 1467 } 1468 1469 if (sibcall_p) 1470 { 1471 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 1472 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 1473 gen_rtx_REG (DImode, AR_PFS_REGNUM)); 1474 } 1475} 1476void 1477ia64_reload_gp () 1478{ 1479 rtx tmp; 1480 1481 if (current_frame_info.reg_save_gp) 1482 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); 1483 else 1484 { 1485 HOST_WIDE_INT offset; 1486 1487 offset = (current_frame_info.spill_cfa_off 1488 + current_frame_info.spill_size); 1489 if (frame_pointer_needed) 1490 { 1491 tmp = hard_frame_pointer_rtx; 1492 offset = -offset; 1493 } 1494 else 1495 { 1496 tmp = stack_pointer_rtx; 1497 offset = current_frame_info.total_size - offset; 1498 } 1499 1500 if (CONST_OK_FOR_I (offset)) 1501 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1502 tmp, GEN_INT (offset))); 1503 else 1504 { 1505 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); 1506 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1507 pic_offset_table_rtx, tmp)); 1508 } 1509 1510 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 1511 } 1512 1513 emit_move_insn (pic_offset_table_rtx, tmp); 1514} 1515 1516void 1517ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b, 1518 noreturn_p, sibcall_p) 1519 rtx retval, addr, retaddr, scratch_r, scratch_b; 1520 int noreturn_p, sibcall_p; 1521{ 1522 rtx insn; 1523 bool is_desc = false; 1524 1525 /* If we find we're calling through a register, then we're actually 1526 calling through a descriptor, so load up the values. */ 1527 if (REG_P (addr)) 1528 { 1529 rtx tmp; 1530 bool addr_dead_p; 1531 1532 /* ??? We are currently constrained to *not* use peep2, because 1533 we can legitimiately change the global lifetime of the GP 1534 (in the form of killing where previously live). This is 1535 because a call through a descriptor doesn't use the previous 1536 value of the GP, while a direct call does, and we do not 1537 commit to either form until the split here. 1538 1539 That said, this means that we lack precise life info for 1540 whether ADDR is dead after this call. This is not terribly 1541 important, since we can fix things up essentially for free 1542 with the POST_DEC below, but it's nice to not use it when we 1543 can immediately tell it's not necessary. */ 1544 addr_dead_p = ((noreturn_p || sibcall_p 1545 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 1546 REGNO (addr))) 1547 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 1548 1549 /* Load the code address into scratch_b. */ 1550 tmp = gen_rtx_POST_INC (Pmode, addr); 1551 tmp = gen_rtx_MEM (Pmode, tmp); 1552 emit_move_insn (scratch_r, tmp); 1553 emit_move_insn (scratch_b, scratch_r); 1554 1555 /* Load the GP address. If ADDR is not dead here, then we must 1556 revert the change made above via the POST_INCREMENT. */ 1557 if (!addr_dead_p) 1558 tmp = gen_rtx_POST_DEC (Pmode, addr); 1559 else 1560 tmp = addr; 1561 tmp = gen_rtx_MEM (Pmode, tmp); 1562 emit_move_insn (pic_offset_table_rtx, tmp); 1563 1564 is_desc = true; 1565 addr = scratch_b; 1566 } 1567 1568 if (sibcall_p) 1569 insn = gen_sibcall_nogp (addr); 1570 else if (retval) 1571 insn = gen_call_value_nogp (retval, addr, retaddr); 1572 else 1573 insn = gen_call_nogp (addr, retaddr); 1574 emit_call_insn (insn); 1575 1576 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 1577 ia64_reload_gp (); 1578} 1579 1580/* Begin the assembly file. */ 1581 1582void 1583emit_safe_across_calls (f) 1584 FILE *f; 1585{ 1586 unsigned int rs, re; 1587 int out_state; 1588 1589 rs = 1; 1590 out_state = 0; 1591 while (1) 1592 { 1593 while (rs < 64 && call_used_regs[PR_REG (rs)]) 1594 rs++; 1595 if (rs >= 64) 1596 break; 1597 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 1598 continue; 1599 if (out_state == 0) 1600 { 1601 fputs ("\t.pred.safe_across_calls ", f); 1602 out_state = 1; 1603 } 1604 else 1605 fputc (',', f); 1606 if (re == rs + 1) 1607 fprintf (f, "p%u", rs); 1608 else 1609 fprintf (f, "p%u-p%u", rs, re - 1); 1610 rs = re + 1; 1611 } 1612 if (out_state) 1613 fputc ('\n', f); 1614} 1615 1616/* Helper function for ia64_compute_frame_size: find an appropriate general 1617 register to spill some special register to. SPECIAL_SPILL_MASK contains 1618 bits in GR0 to GR31 that have already been allocated by this routine. 1619 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 1620 1621static int 1622find_gr_spill (try_locals) 1623 int try_locals; 1624{ 1625 int regno; 1626 1627 /* If this is a leaf function, first try an otherwise unused 1628 call-clobbered register. */ 1629 if (current_function_is_leaf) 1630 { 1631 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1632 if (! regs_ever_live[regno] 1633 && call_used_regs[regno] 1634 && ! fixed_regs[regno] 1635 && ! global_regs[regno] 1636 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1637 { 1638 current_frame_info.gr_used_mask |= 1 << regno; 1639 return regno; 1640 } 1641 } 1642 1643 if (try_locals) 1644 { 1645 regno = current_frame_info.n_local_regs; 1646 /* If there is a frame pointer, then we can't use loc79, because 1647 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 1648 reg_name switching code in ia64_expand_prologue. */ 1649 if (regno < (80 - frame_pointer_needed)) 1650 { 1651 current_frame_info.n_local_regs = regno + 1; 1652 return LOC_REG (0) + regno; 1653 } 1654 } 1655 1656 /* Failed to find a general register to spill to. Must use stack. */ 1657 return 0; 1658} 1659 1660/* In order to make for nice schedules, we try to allocate every temporary 1661 to a different register. We must of course stay away from call-saved, 1662 fixed, and global registers. We must also stay away from registers 1663 allocated in current_frame_info.gr_used_mask, since those include regs 1664 used all through the prologue. 1665 1666 Any register allocated here must be used immediately. The idea is to 1667 aid scheduling, not to solve data flow problems. */ 1668 1669static int last_scratch_gr_reg; 1670 1671static int 1672next_scratch_gr_reg () 1673{ 1674 int i, regno; 1675 1676 for (i = 0; i < 32; ++i) 1677 { 1678 regno = (last_scratch_gr_reg + i + 1) & 31; 1679 if (call_used_regs[regno] 1680 && ! fixed_regs[regno] 1681 && ! global_regs[regno] 1682 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1683 { 1684 last_scratch_gr_reg = regno; 1685 return regno; 1686 } 1687 } 1688 1689 /* There must be _something_ available. */ 1690 abort (); 1691} 1692 1693/* Helper function for ia64_compute_frame_size, called through 1694 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 1695 1696static void 1697mark_reg_gr_used_mask (reg, data) 1698 rtx reg; 1699 void *data ATTRIBUTE_UNUSED; 1700{ 1701 unsigned int regno = REGNO (reg); 1702 if (regno < 32) 1703 { 1704 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg)); 1705 for (i = 0; i < n; ++i) 1706 current_frame_info.gr_used_mask |= 1 << (regno + i); 1707 } 1708} 1709 1710/* Returns the number of bytes offset between the frame pointer and the stack 1711 pointer for the current function. SIZE is the number of bytes of space 1712 needed for local variables. */ 1713 1714static void 1715ia64_compute_frame_size (size) 1716 HOST_WIDE_INT size; 1717{ 1718 HOST_WIDE_INT total_size; 1719 HOST_WIDE_INT spill_size = 0; 1720 HOST_WIDE_INT extra_spill_size = 0; 1721 HOST_WIDE_INT pretend_args_size; 1722 HARD_REG_SET mask; 1723 int n_spilled = 0; 1724 int spilled_gr_p = 0; 1725 int spilled_fr_p = 0; 1726 unsigned int regno; 1727 int i; 1728 1729 if (current_frame_info.initialized) 1730 return; 1731 1732 memset (¤t_frame_info, 0, sizeof current_frame_info); 1733 CLEAR_HARD_REG_SET (mask); 1734 1735 /* Don't allocate scratches to the return register. */ 1736 diddle_return_value (mark_reg_gr_used_mask, NULL); 1737 1738 /* Don't allocate scratches to the EH scratch registers. */ 1739 if (cfun->machine->ia64_eh_epilogue_sp) 1740 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 1741 if (cfun->machine->ia64_eh_epilogue_bsp) 1742 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 1743 1744 /* Find the size of the register stack frame. We have only 80 local 1745 registers, because we reserve 8 for the inputs and 8 for the 1746 outputs. */ 1747 1748 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 1749 since we'll be adjusting that down later. */ 1750 regno = LOC_REG (78) + ! frame_pointer_needed; 1751 for (; regno >= LOC_REG (0); regno--) 1752 if (regs_ever_live[regno]) 1753 break; 1754 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 1755 1756 /* For functions marked with the syscall_linkage attribute, we must mark 1757 all eight input registers as in use, so that locals aren't visible to 1758 the caller. */ 1759 1760 if (cfun->machine->n_varargs > 0 1761 || lookup_attribute ("syscall_linkage", 1762 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 1763 current_frame_info.n_input_regs = 8; 1764 else 1765 { 1766 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 1767 if (regs_ever_live[regno]) 1768 break; 1769 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 1770 } 1771 1772 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 1773 if (regs_ever_live[regno]) 1774 break; 1775 i = regno - OUT_REG (0) + 1; 1776 1777 /* When -p profiling, we need one output register for the mcount argument. 1778 Likwise for -a profiling for the bb_init_func argument. For -ax 1779 profiling, we need two output registers for the two bb_init_trace_func 1780 arguments. */ 1781 if (current_function_profile) 1782 i = MAX (i, 1); 1783 current_frame_info.n_output_regs = i; 1784 1785 /* ??? No rotating register support yet. */ 1786 current_frame_info.n_rotate_regs = 0; 1787 1788 /* Discover which registers need spilling, and how much room that 1789 will take. Begin with floating point and general registers, 1790 which will always wind up on the stack. */ 1791 1792 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 1793 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1794 { 1795 SET_HARD_REG_BIT (mask, regno); 1796 spill_size += 16; 1797 n_spilled += 1; 1798 spilled_fr_p = 1; 1799 } 1800 1801 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1802 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1803 { 1804 SET_HARD_REG_BIT (mask, regno); 1805 spill_size += 8; 1806 n_spilled += 1; 1807 spilled_gr_p = 1; 1808 } 1809 1810 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 1811 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1812 { 1813 SET_HARD_REG_BIT (mask, regno); 1814 spill_size += 8; 1815 n_spilled += 1; 1816 } 1817 1818 /* Now come all special registers that might get saved in other 1819 general registers. */ 1820 1821 if (frame_pointer_needed) 1822 { 1823 current_frame_info.reg_fp = find_gr_spill (1); 1824 /* If we did not get a register, then we take LOC79. This is guaranteed 1825 to be free, even if regs_ever_live is already set, because this is 1826 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 1827 as we don't count loc79 above. */ 1828 if (current_frame_info.reg_fp == 0) 1829 { 1830 current_frame_info.reg_fp = LOC_REG (79); 1831 current_frame_info.n_local_regs++; 1832 } 1833 } 1834 1835 if (! current_function_is_leaf) 1836 { 1837 /* Emit a save of BR0 if we call other functions. Do this even 1838 if this function doesn't return, as EH depends on this to be 1839 able to unwind the stack. */ 1840 SET_HARD_REG_BIT (mask, BR_REG (0)); 1841 1842 current_frame_info.reg_save_b0 = find_gr_spill (1); 1843 if (current_frame_info.reg_save_b0 == 0) 1844 { 1845 spill_size += 8; 1846 n_spilled += 1; 1847 } 1848 1849 /* Similarly for ar.pfs. */ 1850 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1851 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1852 if (current_frame_info.reg_save_ar_pfs == 0) 1853 { 1854 extra_spill_size += 8; 1855 n_spilled += 1; 1856 } 1857 1858 /* Similarly for gp. Note that if we're calling setjmp, the stacked 1859 registers are clobbered, so we fall back to the stack. */ 1860 current_frame_info.reg_save_gp 1861 = (current_function_calls_setjmp ? 0 : find_gr_spill (1)); 1862 if (current_frame_info.reg_save_gp == 0) 1863 { 1864 SET_HARD_REG_BIT (mask, GR_REG (1)); 1865 spill_size += 8; 1866 n_spilled += 1; 1867 } 1868 } 1869 else 1870 { 1871 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)]) 1872 { 1873 SET_HARD_REG_BIT (mask, BR_REG (0)); 1874 spill_size += 8; 1875 n_spilled += 1; 1876 } 1877 1878 if (regs_ever_live[AR_PFS_REGNUM]) 1879 { 1880 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1881 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1882 if (current_frame_info.reg_save_ar_pfs == 0) 1883 { 1884 extra_spill_size += 8; 1885 n_spilled += 1; 1886 } 1887 } 1888 } 1889 1890 /* Unwind descriptor hackery: things are most efficient if we allocate 1891 consecutive GR save registers for RP, PFS, FP in that order. However, 1892 it is absolutely critical that FP get the only hard register that's 1893 guaranteed to be free, so we allocated it first. If all three did 1894 happen to be allocated hard regs, and are consecutive, rearrange them 1895 into the preferred order now. */ 1896 if (current_frame_info.reg_fp != 0 1897 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1 1898 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2) 1899 { 1900 current_frame_info.reg_save_b0 = current_frame_info.reg_fp; 1901 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1; 1902 current_frame_info.reg_fp = current_frame_info.reg_fp + 2; 1903 } 1904 1905 /* See if we need to store the predicate register block. */ 1906 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1907 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1908 break; 1909 if (regno <= PR_REG (63)) 1910 { 1911 SET_HARD_REG_BIT (mask, PR_REG (0)); 1912 current_frame_info.reg_save_pr = find_gr_spill (1); 1913 if (current_frame_info.reg_save_pr == 0) 1914 { 1915 extra_spill_size += 8; 1916 n_spilled += 1; 1917 } 1918 1919 /* ??? Mark them all as used so that register renaming and such 1920 are free to use them. */ 1921 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1922 regs_ever_live[regno] = 1; 1923 } 1924 1925 /* If we're forced to use st8.spill, we're forced to save and restore 1926 ar.unat as well. The check for existing liveness allows inline asm 1927 to touch ar.unat. */ 1928 if (spilled_gr_p || cfun->machine->n_varargs 1929 || regs_ever_live[AR_UNAT_REGNUM]) 1930 { 1931 regs_ever_live[AR_UNAT_REGNUM] = 1; 1932 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 1933 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0); 1934 if (current_frame_info.reg_save_ar_unat == 0) 1935 { 1936 extra_spill_size += 8; 1937 n_spilled += 1; 1938 } 1939 } 1940 1941 if (regs_ever_live[AR_LC_REGNUM]) 1942 { 1943 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 1944 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0); 1945 if (current_frame_info.reg_save_ar_lc == 0) 1946 { 1947 extra_spill_size += 8; 1948 n_spilled += 1; 1949 } 1950 } 1951 1952 /* If we have an odd number of words of pretend arguments written to 1953 the stack, then the FR save area will be unaligned. We round the 1954 size of this area up to keep things 16 byte aligned. */ 1955 if (spilled_fr_p) 1956 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size); 1957 else 1958 pretend_args_size = current_function_pretend_args_size; 1959 1960 total_size = (spill_size + extra_spill_size + size + pretend_args_size 1961 + current_function_outgoing_args_size); 1962 total_size = IA64_STACK_ALIGN (total_size); 1963 1964 /* We always use the 16-byte scratch area provided by the caller, but 1965 if we are a leaf function, there's no one to which we need to provide 1966 a scratch area. */ 1967 if (current_function_is_leaf) 1968 total_size = MAX (0, total_size - 16); 1969 1970 current_frame_info.total_size = total_size; 1971 current_frame_info.spill_cfa_off = pretend_args_size - 16; 1972 current_frame_info.spill_size = spill_size; 1973 current_frame_info.extra_spill_size = extra_spill_size; 1974 COPY_HARD_REG_SET (current_frame_info.mask, mask); 1975 current_frame_info.n_spilled = n_spilled; 1976 current_frame_info.initialized = reload_completed; 1977} 1978 1979/* Compute the initial difference between the specified pair of registers. */ 1980 1981HOST_WIDE_INT 1982ia64_initial_elimination_offset (from, to) 1983 int from, to; 1984{ 1985 HOST_WIDE_INT offset; 1986 1987 ia64_compute_frame_size (get_frame_size ()); 1988 switch (from) 1989 { 1990 case FRAME_POINTER_REGNUM: 1991 if (to == HARD_FRAME_POINTER_REGNUM) 1992 { 1993 if (current_function_is_leaf) 1994 offset = -current_frame_info.total_size; 1995 else 1996 offset = -(current_frame_info.total_size 1997 - current_function_outgoing_args_size - 16); 1998 } 1999 else if (to == STACK_POINTER_REGNUM) 2000 { 2001 if (current_function_is_leaf) 2002 offset = 0; 2003 else 2004 offset = 16 + current_function_outgoing_args_size; 2005 } 2006 else 2007 abort (); 2008 break; 2009 2010 case ARG_POINTER_REGNUM: 2011 /* Arguments start above the 16 byte save area, unless stdarg 2012 in which case we store through the 16 byte save area. */ 2013 if (to == HARD_FRAME_POINTER_REGNUM) 2014 offset = 16 - current_function_pretend_args_size; 2015 else if (to == STACK_POINTER_REGNUM) 2016 offset = (current_frame_info.total_size 2017 + 16 - current_function_pretend_args_size); 2018 else 2019 abort (); 2020 break; 2021 2022 case RETURN_ADDRESS_POINTER_REGNUM: 2023 offset = 0; 2024 break; 2025 2026 default: 2027 abort (); 2028 } 2029 2030 return offset; 2031} 2032 2033/* If there are more than a trivial number of register spills, we use 2034 two interleaved iterators so that we can get two memory references 2035 per insn group. 2036 2037 In order to simplify things in the prologue and epilogue expanders, 2038 we use helper functions to fix up the memory references after the 2039 fact with the appropriate offsets to a POST_MODIFY memory mode. 2040 The following data structure tracks the state of the two iterators 2041 while insns are being emitted. */ 2042 2043struct spill_fill_data 2044{ 2045 rtx init_after; /* point at which to emit initializations */ 2046 rtx init_reg[2]; /* initial base register */ 2047 rtx iter_reg[2]; /* the iterator registers */ 2048 rtx *prev_addr[2]; /* address of last memory use */ 2049 rtx prev_insn[2]; /* the insn corresponding to prev_addr */ 2050 HOST_WIDE_INT prev_off[2]; /* last offset */ 2051 int n_iter; /* number of iterators in use */ 2052 int next_iter; /* next iterator to use */ 2053 unsigned int save_gr_used_mask; 2054}; 2055 2056static struct spill_fill_data spill_fill_data; 2057 2058static void 2059setup_spill_pointers (n_spills, init_reg, cfa_off) 2060 int n_spills; 2061 rtx init_reg; 2062 HOST_WIDE_INT cfa_off; 2063{ 2064 int i; 2065 2066 spill_fill_data.init_after = get_last_insn (); 2067 spill_fill_data.init_reg[0] = init_reg; 2068 spill_fill_data.init_reg[1] = init_reg; 2069 spill_fill_data.prev_addr[0] = NULL; 2070 spill_fill_data.prev_addr[1] = NULL; 2071 spill_fill_data.prev_insn[0] = NULL; 2072 spill_fill_data.prev_insn[1] = NULL; 2073 spill_fill_data.prev_off[0] = cfa_off; 2074 spill_fill_data.prev_off[1] = cfa_off; 2075 spill_fill_data.next_iter = 0; 2076 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 2077 2078 spill_fill_data.n_iter = 1 + (n_spills > 2); 2079 for (i = 0; i < spill_fill_data.n_iter; ++i) 2080 { 2081 int regno = next_scratch_gr_reg (); 2082 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 2083 current_frame_info.gr_used_mask |= 1 << regno; 2084 } 2085} 2086 2087static void 2088finish_spill_pointers () 2089{ 2090 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 2091} 2092 2093static rtx 2094spill_restore_mem (reg, cfa_off) 2095 rtx reg; 2096 HOST_WIDE_INT cfa_off; 2097{ 2098 int iter = spill_fill_data.next_iter; 2099 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 2100 rtx disp_rtx = GEN_INT (disp); 2101 rtx mem; 2102 2103 if (spill_fill_data.prev_addr[iter]) 2104 { 2105 if (CONST_OK_FOR_N (disp)) 2106 { 2107 *spill_fill_data.prev_addr[iter] 2108 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 2109 gen_rtx_PLUS (DImode, 2110 spill_fill_data.iter_reg[iter], 2111 disp_rtx)); 2112 REG_NOTES (spill_fill_data.prev_insn[iter]) 2113 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter], 2114 REG_NOTES (spill_fill_data.prev_insn[iter])); 2115 } 2116 else 2117 { 2118 /* ??? Could use register post_modify for loads. */ 2119 if (! CONST_OK_FOR_I (disp)) 2120 { 2121 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2122 emit_move_insn (tmp, disp_rtx); 2123 disp_rtx = tmp; 2124 } 2125 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2126 spill_fill_data.iter_reg[iter], disp_rtx)); 2127 } 2128 } 2129 /* Micro-optimization: if we've created a frame pointer, it's at 2130 CFA 0, which may allow the real iterator to be initialized lower, 2131 slightly increasing parallelism. Also, if there are few saves 2132 it may eliminate the iterator entirely. */ 2133 else if (disp == 0 2134 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 2135 && frame_pointer_needed) 2136 { 2137 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 2138 set_mem_alias_set (mem, get_varargs_alias_set ()); 2139 return mem; 2140 } 2141 else 2142 { 2143 rtx seq, insn; 2144 2145 if (disp == 0) 2146 seq = gen_movdi (spill_fill_data.iter_reg[iter], 2147 spill_fill_data.init_reg[iter]); 2148 else 2149 { 2150 start_sequence (); 2151 2152 if (! CONST_OK_FOR_I (disp)) 2153 { 2154 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2155 emit_move_insn (tmp, disp_rtx); 2156 disp_rtx = tmp; 2157 } 2158 2159 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2160 spill_fill_data.init_reg[iter], 2161 disp_rtx)); 2162 2163 seq = get_insns (); 2164 end_sequence (); 2165 } 2166 2167 /* Careful for being the first insn in a sequence. */ 2168 if (spill_fill_data.init_after) 2169 insn = emit_insn_after (seq, spill_fill_data.init_after); 2170 else 2171 { 2172 rtx first = get_insns (); 2173 if (first) 2174 insn = emit_insn_before (seq, first); 2175 else 2176 insn = emit_insn (seq); 2177 } 2178 spill_fill_data.init_after = insn; 2179 2180 /* If DISP is 0, we may or may not have a further adjustment 2181 afterward. If we do, then the load/store insn may be modified 2182 to be a post-modify. If we don't, then this copy may be 2183 eliminated by copyprop_hardreg_forward, which makes this 2184 insn garbage, which runs afoul of the sanity check in 2185 propagate_one_insn. So mark this insn as legal to delete. */ 2186 if (disp == 0) 2187 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, 2188 REG_NOTES (insn)); 2189 } 2190 2191 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 2192 2193 /* ??? Not all of the spills are for varargs, but some of them are. 2194 The rest of the spills belong in an alias set of their own. But 2195 it doesn't actually hurt to include them here. */ 2196 set_mem_alias_set (mem, get_varargs_alias_set ()); 2197 2198 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 2199 spill_fill_data.prev_off[iter] = cfa_off; 2200 2201 if (++iter >= spill_fill_data.n_iter) 2202 iter = 0; 2203 spill_fill_data.next_iter = iter; 2204 2205 return mem; 2206} 2207 2208static void 2209do_spill (move_fn, reg, cfa_off, frame_reg) 2210 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2211 rtx reg, frame_reg; 2212 HOST_WIDE_INT cfa_off; 2213{ 2214 int iter = spill_fill_data.next_iter; 2215 rtx mem, insn; 2216 2217 mem = spill_restore_mem (reg, cfa_off); 2218 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 2219 spill_fill_data.prev_insn[iter] = insn; 2220 2221 if (frame_reg) 2222 { 2223 rtx base; 2224 HOST_WIDE_INT off; 2225 2226 RTX_FRAME_RELATED_P (insn) = 1; 2227 2228 /* Don't even pretend that the unwind code can intuit its way 2229 through a pair of interleaved post_modify iterators. Just 2230 provide the correct answer. */ 2231 2232 if (frame_pointer_needed) 2233 { 2234 base = hard_frame_pointer_rtx; 2235 off = - cfa_off; 2236 } 2237 else 2238 { 2239 base = stack_pointer_rtx; 2240 off = current_frame_info.total_size - cfa_off; 2241 } 2242 2243 REG_NOTES (insn) 2244 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2245 gen_rtx_SET (VOIDmode, 2246 gen_rtx_MEM (GET_MODE (reg), 2247 plus_constant (base, off)), 2248 frame_reg), 2249 REG_NOTES (insn)); 2250 } 2251} 2252 2253static void 2254do_restore (move_fn, reg, cfa_off) 2255 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2256 rtx reg; 2257 HOST_WIDE_INT cfa_off; 2258{ 2259 int iter = spill_fill_data.next_iter; 2260 rtx insn; 2261 2262 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 2263 GEN_INT (cfa_off))); 2264 spill_fill_data.prev_insn[iter] = insn; 2265} 2266 2267/* Wrapper functions that discards the CONST_INT spill offset. These 2268 exist so that we can give gr_spill/gr_fill the offset they need and 2269 use a consistant function interface. */ 2270 2271static rtx 2272gen_movdi_x (dest, src, offset) 2273 rtx dest, src; 2274 rtx offset ATTRIBUTE_UNUSED; 2275{ 2276 return gen_movdi (dest, src); 2277} 2278 2279static rtx 2280gen_fr_spill_x (dest, src, offset) 2281 rtx dest, src; 2282 rtx offset ATTRIBUTE_UNUSED; 2283{ 2284 return gen_fr_spill (dest, src); 2285} 2286 2287static rtx 2288gen_fr_restore_x (dest, src, offset) 2289 rtx dest, src; 2290 rtx offset ATTRIBUTE_UNUSED; 2291{ 2292 return gen_fr_restore (dest, src); 2293} 2294 2295/* Called after register allocation to add any instructions needed for the 2296 prologue. Using a prologue insn is favored compared to putting all of the 2297 instructions in output_function_prologue(), since it allows the scheduler 2298 to intermix instructions with the saves of the caller saved registers. In 2299 some cases, it might be necessary to emit a barrier instruction as the last 2300 insn to prevent such scheduling. 2301 2302 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 2303 so that the debug info generation code can handle them properly. 2304 2305 The register save area is layed out like so: 2306 cfa+16 2307 [ varargs spill area ] 2308 [ fr register spill area ] 2309 [ br register spill area ] 2310 [ ar register spill area ] 2311 [ pr register spill area ] 2312 [ gr register spill area ] */ 2313 2314/* ??? Get inefficient code when the frame size is larger than can fit in an 2315 adds instruction. */ 2316 2317void 2318ia64_expand_prologue () 2319{ 2320 rtx insn, ar_pfs_save_reg, ar_unat_save_reg; 2321 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 2322 rtx reg, alt_reg; 2323 2324 ia64_compute_frame_size (get_frame_size ()); 2325 last_scratch_gr_reg = 15; 2326 2327 /* If there is no epilogue, then we don't need some prologue insns. 2328 We need to avoid emitting the dead prologue insns, because flow 2329 will complain about them. */ 2330 if (optimize) 2331 { 2332 edge e; 2333 2334 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) 2335 if ((e->flags & EDGE_FAKE) == 0 2336 && (e->flags & EDGE_FALLTHRU) != 0) 2337 break; 2338 epilogue_p = (e != NULL); 2339 } 2340 else 2341 epilogue_p = 1; 2342 2343 /* Set the local, input, and output register names. We need to do this 2344 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 2345 half. If we use in/loc/out register names, then we get assembler errors 2346 in crtn.S because there is no alloc insn or regstk directive in there. */ 2347 if (! TARGET_REG_NAMES) 2348 { 2349 int inputs = current_frame_info.n_input_regs; 2350 int locals = current_frame_info.n_local_regs; 2351 int outputs = current_frame_info.n_output_regs; 2352 2353 for (i = 0; i < inputs; i++) 2354 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 2355 for (i = 0; i < locals; i++) 2356 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 2357 for (i = 0; i < outputs; i++) 2358 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 2359 } 2360 2361 /* Set the frame pointer register name. The regnum is logically loc79, 2362 but of course we'll not have allocated that many locals. Rather than 2363 worrying about renumbering the existing rtxs, we adjust the name. */ 2364 /* ??? This code means that we can never use one local register when 2365 there is a frame pointer. loc79 gets wasted in this case, as it is 2366 renamed to a register that will never be used. See also the try_locals 2367 code in find_gr_spill. */ 2368 if (current_frame_info.reg_fp) 2369 { 2370 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 2371 reg_names[HARD_FRAME_POINTER_REGNUM] 2372 = reg_names[current_frame_info.reg_fp]; 2373 reg_names[current_frame_info.reg_fp] = tmp; 2374 } 2375 2376 /* Fix up the return address placeholder. */ 2377 /* ??? We can fail if __builtin_return_address is used, and we didn't 2378 allocate a register in which to save b0. I can't think of a way to 2379 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and 2380 then be sure that I got the right one. Further, reload doesn't seem 2381 to care if an eliminable register isn't used, and "eliminates" it 2382 anyway. */ 2383 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM] 2384 && current_frame_info.reg_save_b0 != 0) 2385 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0; 2386 2387 /* We don't need an alloc instruction if we've used no outputs or locals. */ 2388 if (current_frame_info.n_local_regs == 0 2389 && current_frame_info.n_output_regs == 0 2390 && current_frame_info.n_input_regs <= current_function_args_info.int_regs 2391 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 2392 { 2393 /* If there is no alloc, but there are input registers used, then we 2394 need a .regstk directive. */ 2395 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 2396 ar_pfs_save_reg = NULL_RTX; 2397 } 2398 else 2399 { 2400 current_frame_info.need_regstk = 0; 2401 2402 if (current_frame_info.reg_save_ar_pfs) 2403 regno = current_frame_info.reg_save_ar_pfs; 2404 else 2405 regno = next_scratch_gr_reg (); 2406 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 2407 2408 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 2409 GEN_INT (current_frame_info.n_input_regs), 2410 GEN_INT (current_frame_info.n_local_regs), 2411 GEN_INT (current_frame_info.n_output_regs), 2412 GEN_INT (current_frame_info.n_rotate_regs))); 2413 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0); 2414 } 2415 2416 /* Set up frame pointer, stack pointer, and spill iterators. */ 2417 2418 n_varargs = cfun->machine->n_varargs; 2419 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 2420 stack_pointer_rtx, 0); 2421 2422 if (frame_pointer_needed) 2423 { 2424 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 2425 RTX_FRAME_RELATED_P (insn) = 1; 2426 } 2427 2428 if (current_frame_info.total_size != 0) 2429 { 2430 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 2431 rtx offset; 2432 2433 if (CONST_OK_FOR_I (- current_frame_info.total_size)) 2434 offset = frame_size_rtx; 2435 else 2436 { 2437 regno = next_scratch_gr_reg (); 2438 offset = gen_rtx_REG (DImode, regno); 2439 emit_move_insn (offset, frame_size_rtx); 2440 } 2441 2442 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 2443 stack_pointer_rtx, offset)); 2444 2445 if (! frame_pointer_needed) 2446 { 2447 RTX_FRAME_RELATED_P (insn) = 1; 2448 if (GET_CODE (offset) != CONST_INT) 2449 { 2450 REG_NOTES (insn) 2451 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2452 gen_rtx_SET (VOIDmode, 2453 stack_pointer_rtx, 2454 gen_rtx_PLUS (DImode, 2455 stack_pointer_rtx, 2456 frame_size_rtx)), 2457 REG_NOTES (insn)); 2458 } 2459 } 2460 2461 /* ??? At this point we must generate a magic insn that appears to 2462 modify the stack pointer, the frame pointer, and all spill 2463 iterators. This would allow the most scheduling freedom. For 2464 now, just hard stop. */ 2465 emit_insn (gen_blockage ()); 2466 } 2467 2468 /* Must copy out ar.unat before doing any integer spills. */ 2469 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2470 { 2471 if (current_frame_info.reg_save_ar_unat) 2472 ar_unat_save_reg 2473 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2474 else 2475 { 2476 alt_regno = next_scratch_gr_reg (); 2477 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2478 current_frame_info.gr_used_mask |= 1 << alt_regno; 2479 } 2480 2481 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2482 insn = emit_move_insn (ar_unat_save_reg, reg); 2483 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0); 2484 2485 /* Even if we're not going to generate an epilogue, we still 2486 need to save the register so that EH works. */ 2487 if (! epilogue_p && current_frame_info.reg_save_ar_unat) 2488 emit_insn (gen_prologue_use (ar_unat_save_reg)); 2489 } 2490 else 2491 ar_unat_save_reg = NULL_RTX; 2492 2493 /* Spill all varargs registers. Do this before spilling any GR registers, 2494 since we want the UNAT bits for the GR registers to override the UNAT 2495 bits from varargs, which we don't care about. */ 2496 2497 cfa_off = -16; 2498 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 2499 { 2500 reg = gen_rtx_REG (DImode, regno); 2501 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 2502 } 2503 2504 /* Locate the bottom of the register save area. */ 2505 cfa_off = (current_frame_info.spill_cfa_off 2506 + current_frame_info.spill_size 2507 + current_frame_info.extra_spill_size); 2508 2509 /* Save the predicate register block either in a register or in memory. */ 2510 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2511 { 2512 reg = gen_rtx_REG (DImode, PR_REG (0)); 2513 if (current_frame_info.reg_save_pr != 0) 2514 { 2515 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2516 insn = emit_move_insn (alt_reg, reg); 2517 2518 /* ??? Denote pr spill/fill by a DImode move that modifies all 2519 64 hard registers. */ 2520 RTX_FRAME_RELATED_P (insn) = 1; 2521 REG_NOTES (insn) 2522 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2523 gen_rtx_SET (VOIDmode, alt_reg, reg), 2524 REG_NOTES (insn)); 2525 2526 /* Even if we're not going to generate an epilogue, we still 2527 need to save the register so that EH works. */ 2528 if (! epilogue_p) 2529 emit_insn (gen_prologue_use (alt_reg)); 2530 } 2531 else 2532 { 2533 alt_regno = next_scratch_gr_reg (); 2534 alt_reg = gen_rtx_REG (DImode, alt_regno); 2535 insn = emit_move_insn (alt_reg, reg); 2536 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2537 cfa_off -= 8; 2538 } 2539 } 2540 2541 /* Handle AR regs in numerical order. All of them get special handling. */ 2542 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 2543 && current_frame_info.reg_save_ar_unat == 0) 2544 { 2545 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2546 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 2547 cfa_off -= 8; 2548 } 2549 2550 /* The alloc insn already copied ar.pfs into a general register. The 2551 only thing we have to do now is copy that register to a stack slot 2552 if we'd not allocated a local register for the job. */ 2553 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 2554 && current_frame_info.reg_save_ar_pfs == 0) 2555 { 2556 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2557 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 2558 cfa_off -= 8; 2559 } 2560 2561 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2562 { 2563 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2564 if (current_frame_info.reg_save_ar_lc != 0) 2565 { 2566 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2567 insn = emit_move_insn (alt_reg, reg); 2568 RTX_FRAME_RELATED_P (insn) = 1; 2569 2570 /* Even if we're not going to generate an epilogue, we still 2571 need to save the register so that EH works. */ 2572 if (! epilogue_p) 2573 emit_insn (gen_prologue_use (alt_reg)); 2574 } 2575 else 2576 { 2577 alt_regno = next_scratch_gr_reg (); 2578 alt_reg = gen_rtx_REG (DImode, alt_regno); 2579 emit_move_insn (alt_reg, reg); 2580 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2581 cfa_off -= 8; 2582 } 2583 } 2584 2585 if (current_frame_info.reg_save_gp) 2586 { 2587 insn = emit_move_insn (gen_rtx_REG (DImode, 2588 current_frame_info.reg_save_gp), 2589 pic_offset_table_rtx); 2590 /* We don't know for sure yet if this is actually needed, since 2591 we've not split the PIC call patterns. If all of the calls 2592 are indirect, and not followed by any uses of the gp, then 2593 this save is dead. Allow it to go away. */ 2594 REG_NOTES (insn) 2595 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn)); 2596 } 2597 2598 /* We should now be at the base of the gr/br/fr spill area. */ 2599 if (cfa_off != (current_frame_info.spill_cfa_off 2600 + current_frame_info.spill_size)) 2601 abort (); 2602 2603 /* Spill all general registers. */ 2604 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 2605 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2606 { 2607 reg = gen_rtx_REG (DImode, regno); 2608 do_spill (gen_gr_spill, reg, cfa_off, reg); 2609 cfa_off -= 8; 2610 } 2611 2612 /* Handle BR0 specially -- it may be getting stored permanently in 2613 some GR register. */ 2614 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2615 { 2616 reg = gen_rtx_REG (DImode, BR_REG (0)); 2617 if (current_frame_info.reg_save_b0 != 0) 2618 { 2619 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2620 insn = emit_move_insn (alt_reg, reg); 2621 RTX_FRAME_RELATED_P (insn) = 1; 2622 2623 /* Even if we're not going to generate an epilogue, we still 2624 need to save the register so that EH works. */ 2625 if (! epilogue_p) 2626 emit_insn (gen_prologue_use (alt_reg)); 2627 } 2628 else 2629 { 2630 alt_regno = next_scratch_gr_reg (); 2631 alt_reg = gen_rtx_REG (DImode, alt_regno); 2632 emit_move_insn (alt_reg, reg); 2633 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2634 cfa_off -= 8; 2635 } 2636 } 2637 2638 /* Spill the rest of the BR registers. */ 2639 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2640 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2641 { 2642 alt_regno = next_scratch_gr_reg (); 2643 alt_reg = gen_rtx_REG (DImode, alt_regno); 2644 reg = gen_rtx_REG (DImode, regno); 2645 emit_move_insn (alt_reg, reg); 2646 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2647 cfa_off -= 8; 2648 } 2649 2650 /* Align the frame and spill all FR registers. */ 2651 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2652 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2653 { 2654 if (cfa_off & 15) 2655 abort (); 2656 reg = gen_rtx_REG (TFmode, regno); 2657 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 2658 cfa_off -= 16; 2659 } 2660 2661 if (cfa_off != current_frame_info.spill_cfa_off) 2662 abort (); 2663 2664 finish_spill_pointers (); 2665} 2666 2667/* Called after register allocation to add any instructions needed for the 2668 epilogue. Using an epilogue insn is favored compared to putting all of the 2669 instructions in output_function_prologue(), since it allows the scheduler 2670 to intermix instructions with the saves of the caller saved registers. In 2671 some cases, it might be necessary to emit a barrier instruction as the last 2672 insn to prevent such scheduling. */ 2673 2674void 2675ia64_expand_epilogue (sibcall_p) 2676 int sibcall_p; 2677{ 2678 rtx insn, reg, alt_reg, ar_unat_save_reg; 2679 int regno, alt_regno, cfa_off; 2680 2681 ia64_compute_frame_size (get_frame_size ()); 2682 2683 /* If there is a frame pointer, then we use it instead of the stack 2684 pointer, so that the stack pointer does not need to be valid when 2685 the epilogue starts. See EXIT_IGNORE_STACK. */ 2686 if (frame_pointer_needed) 2687 setup_spill_pointers (current_frame_info.n_spilled, 2688 hard_frame_pointer_rtx, 0); 2689 else 2690 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 2691 current_frame_info.total_size); 2692 2693 if (current_frame_info.total_size != 0) 2694 { 2695 /* ??? At this point we must generate a magic insn that appears to 2696 modify the spill iterators and the frame pointer. This would 2697 allow the most scheduling freedom. For now, just hard stop. */ 2698 emit_insn (gen_blockage ()); 2699 } 2700 2701 /* Locate the bottom of the register save area. */ 2702 cfa_off = (current_frame_info.spill_cfa_off 2703 + current_frame_info.spill_size 2704 + current_frame_info.extra_spill_size); 2705 2706 /* Restore the predicate registers. */ 2707 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2708 { 2709 if (current_frame_info.reg_save_pr != 0) 2710 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2711 else 2712 { 2713 alt_regno = next_scratch_gr_reg (); 2714 alt_reg = gen_rtx_REG (DImode, alt_regno); 2715 do_restore (gen_movdi_x, alt_reg, cfa_off); 2716 cfa_off -= 8; 2717 } 2718 reg = gen_rtx_REG (DImode, PR_REG (0)); 2719 emit_move_insn (reg, alt_reg); 2720 } 2721 2722 /* Restore the application registers. */ 2723 2724 /* Load the saved unat from the stack, but do not restore it until 2725 after the GRs have been restored. */ 2726 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2727 { 2728 if (current_frame_info.reg_save_ar_unat != 0) 2729 ar_unat_save_reg 2730 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2731 else 2732 { 2733 alt_regno = next_scratch_gr_reg (); 2734 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2735 current_frame_info.gr_used_mask |= 1 << alt_regno; 2736 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 2737 cfa_off -= 8; 2738 } 2739 } 2740 else 2741 ar_unat_save_reg = NULL_RTX; 2742 2743 if (current_frame_info.reg_save_ar_pfs != 0) 2744 { 2745 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs); 2746 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2747 emit_move_insn (reg, alt_reg); 2748 } 2749 else if (! current_function_is_leaf) 2750 { 2751 alt_regno = next_scratch_gr_reg (); 2752 alt_reg = gen_rtx_REG (DImode, alt_regno); 2753 do_restore (gen_movdi_x, alt_reg, cfa_off); 2754 cfa_off -= 8; 2755 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2756 emit_move_insn (reg, alt_reg); 2757 } 2758 2759 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2760 { 2761 if (current_frame_info.reg_save_ar_lc != 0) 2762 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2763 else 2764 { 2765 alt_regno = next_scratch_gr_reg (); 2766 alt_reg = gen_rtx_REG (DImode, alt_regno); 2767 do_restore (gen_movdi_x, alt_reg, cfa_off); 2768 cfa_off -= 8; 2769 } 2770 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2771 emit_move_insn (reg, alt_reg); 2772 } 2773 2774 /* We should now be at the base of the gr/br/fr spill area. */ 2775 if (cfa_off != (current_frame_info.spill_cfa_off 2776 + current_frame_info.spill_size)) 2777 abort (); 2778 2779 /* The GP may be stored on the stack in the prologue, but it's 2780 never restored in the epilogue. Skip the stack slot. */ 2781 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 2782 cfa_off -= 8; 2783 2784 /* Restore all general registers. */ 2785 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 2786 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2787 { 2788 reg = gen_rtx_REG (DImode, regno); 2789 do_restore (gen_gr_restore, reg, cfa_off); 2790 cfa_off -= 8; 2791 } 2792 2793 /* Restore the branch registers. Handle B0 specially, as it may 2794 have gotten stored in some GR register. */ 2795 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2796 { 2797 if (current_frame_info.reg_save_b0 != 0) 2798 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2799 else 2800 { 2801 alt_regno = next_scratch_gr_reg (); 2802 alt_reg = gen_rtx_REG (DImode, alt_regno); 2803 do_restore (gen_movdi_x, alt_reg, cfa_off); 2804 cfa_off -= 8; 2805 } 2806 reg = gen_rtx_REG (DImode, BR_REG (0)); 2807 emit_move_insn (reg, alt_reg); 2808 } 2809 2810 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2811 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2812 { 2813 alt_regno = next_scratch_gr_reg (); 2814 alt_reg = gen_rtx_REG (DImode, alt_regno); 2815 do_restore (gen_movdi_x, alt_reg, cfa_off); 2816 cfa_off -= 8; 2817 reg = gen_rtx_REG (DImode, regno); 2818 emit_move_insn (reg, alt_reg); 2819 } 2820 2821 /* Restore floating point registers. */ 2822 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2823 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2824 { 2825 if (cfa_off & 15) 2826 abort (); 2827 reg = gen_rtx_REG (TFmode, regno); 2828 do_restore (gen_fr_restore_x, reg, cfa_off); 2829 cfa_off -= 16; 2830 } 2831 2832 /* Restore ar.unat for real. */ 2833 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2834 { 2835 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2836 emit_move_insn (reg, ar_unat_save_reg); 2837 } 2838 2839 if (cfa_off != current_frame_info.spill_cfa_off) 2840 abort (); 2841 2842 finish_spill_pointers (); 2843 2844 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp) 2845 { 2846 /* ??? At this point we must generate a magic insn that appears to 2847 modify the spill iterators, the stack pointer, and the frame 2848 pointer. This would allow the most scheduling freedom. For now, 2849 just hard stop. */ 2850 emit_insn (gen_blockage ()); 2851 } 2852 2853 if (cfun->machine->ia64_eh_epilogue_sp) 2854 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 2855 else if (frame_pointer_needed) 2856 { 2857 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 2858 RTX_FRAME_RELATED_P (insn) = 1; 2859 } 2860 else if (current_frame_info.total_size) 2861 { 2862 rtx offset, frame_size_rtx; 2863 2864 frame_size_rtx = GEN_INT (current_frame_info.total_size); 2865 if (CONST_OK_FOR_I (current_frame_info.total_size)) 2866 offset = frame_size_rtx; 2867 else 2868 { 2869 regno = next_scratch_gr_reg (); 2870 offset = gen_rtx_REG (DImode, regno); 2871 emit_move_insn (offset, frame_size_rtx); 2872 } 2873 2874 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 2875 offset)); 2876 2877 RTX_FRAME_RELATED_P (insn) = 1; 2878 if (GET_CODE (offset) != CONST_INT) 2879 { 2880 REG_NOTES (insn) 2881 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2882 gen_rtx_SET (VOIDmode, 2883 stack_pointer_rtx, 2884 gen_rtx_PLUS (DImode, 2885 stack_pointer_rtx, 2886 frame_size_rtx)), 2887 REG_NOTES (insn)); 2888 } 2889 } 2890 2891 if (cfun->machine->ia64_eh_epilogue_bsp) 2892 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 2893 2894 if (! sibcall_p) 2895 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 2896 else 2897 { 2898 int fp = GR_REG (2); 2899 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the 2900 first available call clobbered register. If there was a frame_pointer 2901 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, 2902 so we have to make sure we're using the string "r2" when emitting 2903 the register name for the assmbler. */ 2904 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2)) 2905 fp = HARD_FRAME_POINTER_REGNUM; 2906 2907 /* We must emit an alloc to force the input registers to become output 2908 registers. Otherwise, if the callee tries to pass its parameters 2909 through to another call without an intervening alloc, then these 2910 values get lost. */ 2911 /* ??? We don't need to preserve all input registers. We only need to 2912 preserve those input registers used as arguments to the sibling call. 2913 It is unclear how to compute that number here. */ 2914 if (current_frame_info.n_input_regs != 0) 2915 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 2916 GEN_INT (0), GEN_INT (0), 2917 GEN_INT (current_frame_info.n_input_regs), 2918 GEN_INT (0))); 2919 } 2920} 2921 2922/* Return 1 if br.ret can do all the work required to return from a 2923 function. */ 2924 2925int 2926ia64_direct_return () 2927{ 2928 if (reload_completed && ! frame_pointer_needed) 2929 { 2930 ia64_compute_frame_size (get_frame_size ()); 2931 2932 return (current_frame_info.total_size == 0 2933 && current_frame_info.n_spilled == 0 2934 && current_frame_info.reg_save_b0 == 0 2935 && current_frame_info.reg_save_pr == 0 2936 && current_frame_info.reg_save_ar_pfs == 0 2937 && current_frame_info.reg_save_ar_unat == 0 2938 && current_frame_info.reg_save_ar_lc == 0); 2939 } 2940 return 0; 2941} 2942 2943int 2944ia64_hard_regno_rename_ok (from, to) 2945 int from; 2946 int to; 2947{ 2948 /* Don't clobber any of the registers we reserved for the prologue. */ 2949 if (to == current_frame_info.reg_fp 2950 || to == current_frame_info.reg_save_b0 2951 || to == current_frame_info.reg_save_pr 2952 || to == current_frame_info.reg_save_ar_pfs 2953 || to == current_frame_info.reg_save_ar_unat 2954 || to == current_frame_info.reg_save_ar_lc) 2955 return 0; 2956 2957 if (from == current_frame_info.reg_fp 2958 || from == current_frame_info.reg_save_b0 2959 || from == current_frame_info.reg_save_pr 2960 || from == current_frame_info.reg_save_ar_pfs 2961 || from == current_frame_info.reg_save_ar_unat 2962 || from == current_frame_info.reg_save_ar_lc) 2963 return 0; 2964 2965 /* Don't use output registers outside the register frame. */ 2966 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 2967 return 0; 2968 2969 /* Retain even/oddness on predicate register pairs. */ 2970 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 2971 return (from & 1) == (to & 1); 2972 2973 return 1; 2974} 2975 2976/* Target hook for assembling integer objects. Handle word-sized 2977 aligned objects and detect the cases when @fptr is needed. */ 2978 2979static bool 2980ia64_assemble_integer (x, size, aligned_p) 2981 rtx x; 2982 unsigned int size; 2983 int aligned_p; 2984{ 2985 if (size == (TARGET_ILP32 ? 4 : 8) 2986 && aligned_p 2987 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 2988 && GET_CODE (x) == SYMBOL_REF 2989 && SYMBOL_REF_FLAG (x)) 2990 { 2991 if (TARGET_ILP32) 2992 fputs ("\tdata4\t@fptr(", asm_out_file); 2993 else 2994 fputs ("\tdata8\t@fptr(", asm_out_file); 2995 output_addr_const (asm_out_file, x); 2996 fputs (")\n", asm_out_file); 2997 return true; 2998 } 2999 return default_assemble_integer (x, size, aligned_p); 3000} 3001 3002/* Emit the function prologue. */ 3003 3004static void 3005ia64_output_function_prologue (file, size) 3006 FILE *file; 3007 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3008{ 3009 int mask, grsave, grsave_prev; 3010 3011 if (current_frame_info.need_regstk) 3012 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 3013 current_frame_info.n_input_regs, 3014 current_frame_info.n_local_regs, 3015 current_frame_info.n_output_regs, 3016 current_frame_info.n_rotate_regs); 3017 3018 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3019 return; 3020 3021 /* Emit the .prologue directive. */ 3022 3023 mask = 0; 3024 grsave = grsave_prev = 0; 3025 if (current_frame_info.reg_save_b0 != 0) 3026 { 3027 mask |= 8; 3028 grsave = grsave_prev = current_frame_info.reg_save_b0; 3029 } 3030 if (current_frame_info.reg_save_ar_pfs != 0 3031 && (grsave_prev == 0 3032 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1)) 3033 { 3034 mask |= 4; 3035 if (grsave_prev == 0) 3036 grsave = current_frame_info.reg_save_ar_pfs; 3037 grsave_prev = current_frame_info.reg_save_ar_pfs; 3038 } 3039 if (current_frame_info.reg_fp != 0 3040 && (grsave_prev == 0 3041 || current_frame_info.reg_fp == grsave_prev + 1)) 3042 { 3043 mask |= 2; 3044 if (grsave_prev == 0) 3045 grsave = HARD_FRAME_POINTER_REGNUM; 3046 grsave_prev = current_frame_info.reg_fp; 3047 } 3048 if (current_frame_info.reg_save_pr != 0 3049 && (grsave_prev == 0 3050 || current_frame_info.reg_save_pr == grsave_prev + 1)) 3051 { 3052 mask |= 1; 3053 if (grsave_prev == 0) 3054 grsave = current_frame_info.reg_save_pr; 3055 } 3056 3057 if (mask) 3058 fprintf (file, "\t.prologue %d, %d\n", mask, 3059 ia64_dbx_register_number (grsave)); 3060 else 3061 fputs ("\t.prologue\n", file); 3062 3063 /* Emit a .spill directive, if necessary, to relocate the base of 3064 the register spill area. */ 3065 if (current_frame_info.spill_cfa_off != -16) 3066 fprintf (file, "\t.spill %ld\n", 3067 (long) (current_frame_info.spill_cfa_off 3068 + current_frame_info.spill_size)); 3069} 3070 3071/* Emit the .body directive at the scheduled end of the prologue. */ 3072 3073static void 3074ia64_output_function_end_prologue (file) 3075 FILE *file; 3076{ 3077 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3078 return; 3079 3080 fputs ("\t.body\n", file); 3081} 3082 3083/* Emit the function epilogue. */ 3084 3085static void 3086ia64_output_function_epilogue (file, size) 3087 FILE *file ATTRIBUTE_UNUSED; 3088 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3089{ 3090 int i; 3091 3092 /* Reset from the function's potential modifications. */ 3093 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; 3094 3095 if (current_frame_info.reg_fp) 3096 { 3097 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3098 reg_names[HARD_FRAME_POINTER_REGNUM] 3099 = reg_names[current_frame_info.reg_fp]; 3100 reg_names[current_frame_info.reg_fp] = tmp; 3101 } 3102 if (! TARGET_REG_NAMES) 3103 { 3104 for (i = 0; i < current_frame_info.n_input_regs; i++) 3105 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 3106 for (i = 0; i < current_frame_info.n_local_regs; i++) 3107 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 3108 for (i = 0; i < current_frame_info.n_output_regs; i++) 3109 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 3110 } 3111 3112 current_frame_info.initialized = 0; 3113} 3114 3115int 3116ia64_dbx_register_number (regno) 3117 int regno; 3118{ 3119 /* In ia64_expand_prologue we quite literally renamed the frame pointer 3120 from its home at loc79 to something inside the register frame. We 3121 must perform the same renumbering here for the debug info. */ 3122 if (current_frame_info.reg_fp) 3123 { 3124 if (regno == HARD_FRAME_POINTER_REGNUM) 3125 regno = current_frame_info.reg_fp; 3126 else if (regno == current_frame_info.reg_fp) 3127 regno = HARD_FRAME_POINTER_REGNUM; 3128 } 3129 3130 if (IN_REGNO_P (regno)) 3131 return 32 + regno - IN_REG (0); 3132 else if (LOC_REGNO_P (regno)) 3133 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 3134 else if (OUT_REGNO_P (regno)) 3135 return (32 + current_frame_info.n_input_regs 3136 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 3137 else 3138 return regno; 3139} 3140 3141void 3142ia64_initialize_trampoline (addr, fnaddr, static_chain) 3143 rtx addr, fnaddr, static_chain; 3144{ 3145 rtx addr_reg, eight = GEN_INT (8); 3146 3147 /* Load up our iterator. */ 3148 addr_reg = gen_reg_rtx (Pmode); 3149 emit_move_insn (addr_reg, addr); 3150 3151 /* The first two words are the fake descriptor: 3152 __ia64_trampoline, ADDR+16. */ 3153 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3154 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline")); 3155 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3156 3157 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3158 copy_to_reg (plus_constant (addr, 16))); 3159 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3160 3161 /* The third word is the target descriptor. */ 3162 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr); 3163 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3164 3165 /* The fourth word is the static chain. */ 3166 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain); 3167} 3168 3169/* Do any needed setup for a variadic function. CUM has not been updated 3170 for the last named argument which has type TYPE and mode MODE. 3171 3172 We generate the actual spill instructions during prologue generation. */ 3173 3174void 3175ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) 3176 CUMULATIVE_ARGS cum; 3177 int int_mode; 3178 tree type; 3179 int * pretend_size; 3180 int second_time ATTRIBUTE_UNUSED; 3181{ 3182 /* Skip the current argument. */ 3183 ia64_function_arg_advance (&cum, int_mode, type, 1); 3184 3185 if (cum.words < MAX_ARGUMENT_SLOTS) 3186 { 3187 int n = MAX_ARGUMENT_SLOTS - cum.words; 3188 *pretend_size = n * UNITS_PER_WORD; 3189 cfun->machine->n_varargs = n; 3190 } 3191} 3192 3193/* Check whether TYPE is a homogeneous floating point aggregate. If 3194 it is, return the mode of the floating point type that appears 3195 in all leafs. If it is not, return VOIDmode. 3196 3197 An aggregate is a homogeneous floating point aggregate is if all 3198 fields/elements in it have the same floating point type (e.g, 3199 SFmode). 128-bit quad-precision floats are excluded. */ 3200 3201static enum machine_mode 3202hfa_element_mode (type, nested) 3203 tree type; 3204 int nested; 3205{ 3206 enum machine_mode element_mode = VOIDmode; 3207 enum machine_mode mode; 3208 enum tree_code code = TREE_CODE (type); 3209 int know_element_mode = 0; 3210 tree t; 3211 3212 switch (code) 3213 { 3214 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 3215 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: 3216 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 3217 case FILE_TYPE: case SET_TYPE: case LANG_TYPE: 3218 case FUNCTION_TYPE: 3219 return VOIDmode; 3220 3221 /* Fortran complex types are supposed to be HFAs, so we need to handle 3222 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 3223 types though. */ 3224 case COMPLEX_TYPE: 3225 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 3226 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT)) 3227 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) 3228 * BITS_PER_UNIT, MODE_FLOAT, 0); 3229 else 3230 return VOIDmode; 3231 3232 case REAL_TYPE: 3233 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 3234 mode if this is contained within an aggregate. */ 3235 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT)) 3236 return TYPE_MODE (type); 3237 else 3238 return VOIDmode; 3239 3240 case ARRAY_TYPE: 3241 return hfa_element_mode (TREE_TYPE (type), 1); 3242 3243 case RECORD_TYPE: 3244 case UNION_TYPE: 3245 case QUAL_UNION_TYPE: 3246 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) 3247 { 3248 if (TREE_CODE (t) != FIELD_DECL) 3249 continue; 3250 3251 mode = hfa_element_mode (TREE_TYPE (t), 1); 3252 if (know_element_mode) 3253 { 3254 if (mode != element_mode) 3255 return VOIDmode; 3256 } 3257 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 3258 return VOIDmode; 3259 else 3260 { 3261 know_element_mode = 1; 3262 element_mode = mode; 3263 } 3264 } 3265 return element_mode; 3266 3267 default: 3268 /* If we reach here, we probably have some front-end specific type 3269 that the backend doesn't know about. This can happen via the 3270 aggregate_value_p call in init_function_start. All we can do is 3271 ignore unknown tree types. */ 3272 return VOIDmode; 3273 } 3274 3275 return VOIDmode; 3276} 3277 3278/* Return rtx for register where argument is passed, or zero if it is passed 3279 on the stack. */ 3280 3281/* ??? 128-bit quad-precision floats are always passed in general 3282 registers. */ 3283 3284rtx 3285ia64_function_arg (cum, mode, type, named, incoming) 3286 CUMULATIVE_ARGS *cum; 3287 enum machine_mode mode; 3288 tree type; 3289 int named; 3290 int incoming; 3291{ 3292 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 3293 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3294 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3295 / UNITS_PER_WORD); 3296 int offset = 0; 3297 enum machine_mode hfa_mode = VOIDmode; 3298 3299 /* Integer and float arguments larger than 8 bytes start at the next even 3300 boundary. Aggregates larger than 8 bytes start at the next even boundary 3301 if the aggregate has 16 byte alignment. Net effect is that types with 3302 alignment greater than 8 start at the next even boundary. */ 3303 /* ??? The ABI does not specify how to handle aggregates with alignment from 3304 9 to 15 bytes, or greater than 16. We handle them all as if they had 3305 16 byte alignment. Such aggregates can occur only if gcc extensions are 3306 used. */ 3307 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3308 : (words > 1)) 3309 && (cum->words & 1)) 3310 offset = 1; 3311 3312 /* If all argument slots are used, then it must go on the stack. */ 3313 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3314 return 0; 3315 3316 /* Check for and handle homogeneous FP aggregates. */ 3317 if (type) 3318 hfa_mode = hfa_element_mode (type, 0); 3319 3320 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3321 and unprototyped hfas are passed specially. */ 3322 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3323 { 3324 rtx loc[16]; 3325 int i = 0; 3326 int fp_regs = cum->fp_regs; 3327 int int_regs = cum->words + offset; 3328 int hfa_size = GET_MODE_SIZE (hfa_mode); 3329 int byte_size; 3330 int args_byte_size; 3331 3332 /* If prototyped, pass it in FR regs then GR regs. 3333 If not prototyped, pass it in both FR and GR regs. 3334 3335 If this is an SFmode aggregate, then it is possible to run out of 3336 FR regs while GR regs are still left. In that case, we pass the 3337 remaining part in the GR regs. */ 3338 3339 /* Fill the FP regs. We do this always. We stop if we reach the end 3340 of the argument, the last FP register, or the last argument slot. */ 3341 3342 byte_size = ((mode == BLKmode) 3343 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3344 args_byte_size = int_regs * UNITS_PER_WORD; 3345 offset = 0; 3346 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3347 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 3348 { 3349 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3350 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 3351 + fp_regs)), 3352 GEN_INT (offset)); 3353 offset += hfa_size; 3354 args_byte_size += hfa_size; 3355 fp_regs++; 3356 } 3357 3358 /* If no prototype, then the whole thing must go in GR regs. */ 3359 if (! cum->prototype) 3360 offset = 0; 3361 /* If this is an SFmode aggregate, then we might have some left over 3362 that needs to go in GR regs. */ 3363 else if (byte_size != offset) 3364 int_regs += offset / UNITS_PER_WORD; 3365 3366 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 3367 3368 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 3369 { 3370 enum machine_mode gr_mode = DImode; 3371 3372 /* If we have an odd 4 byte hunk because we ran out of FR regs, 3373 then this goes in a GR reg left adjusted/little endian, right 3374 adjusted/big endian. */ 3375 /* ??? Currently this is handled wrong, because 4-byte hunks are 3376 always right adjusted/little endian. */ 3377 if (offset & 0x4) 3378 gr_mode = SImode; 3379 /* If we have an even 4 byte hunk because the aggregate is a 3380 multiple of 4 bytes in size, then this goes in a GR reg right 3381 adjusted/little endian. */ 3382 else if (byte_size - offset == 4) 3383 gr_mode = SImode; 3384 /* Complex floats need to have float mode. */ 3385 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 3386 gr_mode = hfa_mode; 3387 3388 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3389 gen_rtx_REG (gr_mode, (basereg 3390 + int_regs)), 3391 GEN_INT (offset)); 3392 offset += GET_MODE_SIZE (gr_mode); 3393 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD 3394 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD; 3395 } 3396 3397 /* If we ended up using just one location, just return that one loc. */ 3398 if (i == 1) 3399 return XEXP (loc[0], 0); 3400 else 3401 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3402 } 3403 3404 /* Integral and aggregates go in general registers. If we have run out of 3405 FR registers, then FP values must also go in general registers. This can 3406 happen when we have a SFmode HFA. */ 3407 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT) 3408 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 3409 { 3410 int byte_size = ((mode == BLKmode) 3411 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3412 if (BYTES_BIG_ENDIAN 3413 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3414 && byte_size < UNITS_PER_WORD 3415 && byte_size > 0) 3416 { 3417 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3418 gen_rtx_REG (DImode, 3419 (basereg + cum->words 3420 + offset)), 3421 const0_rtx); 3422 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 3423 } 3424 else 3425 return gen_rtx_REG (mode, basereg + cum->words + offset); 3426 3427 } 3428 3429 /* If there is a prototype, then FP values go in a FR register when 3430 named, and in a GR registeer when unnamed. */ 3431 else if (cum->prototype) 3432 { 3433 if (! named) 3434 return gen_rtx_REG (mode, basereg + cum->words + offset); 3435 else 3436 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 3437 } 3438 /* If there is no prototype, then FP values go in both FR and GR 3439 registers. */ 3440 else 3441 { 3442 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 3443 gen_rtx_REG (mode, (FR_ARG_FIRST 3444 + cum->fp_regs)), 3445 const0_rtx); 3446 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3447 gen_rtx_REG (mode, 3448 (basereg + cum->words 3449 + offset)), 3450 const0_rtx); 3451 3452 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 3453 } 3454} 3455 3456/* Return number of words, at the beginning of the argument, that must be 3457 put in registers. 0 is the argument is entirely in registers or entirely 3458 in memory. */ 3459 3460int 3461ia64_function_arg_partial_nregs (cum, mode, type, named) 3462 CUMULATIVE_ARGS *cum; 3463 enum machine_mode mode; 3464 tree type; 3465 int named ATTRIBUTE_UNUSED; 3466{ 3467 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3468 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3469 / UNITS_PER_WORD); 3470 int offset = 0; 3471 3472 /* Arguments with alignment larger than 8 bytes start at the next even 3473 boundary. */ 3474 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3475 : (words > 1)) 3476 && (cum->words & 1)) 3477 offset = 1; 3478 3479 /* If all argument slots are used, then it must go on the stack. */ 3480 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3481 return 0; 3482 3483 /* It doesn't matter whether the argument goes in FR or GR regs. If 3484 it fits within the 8 argument slots, then it goes entirely in 3485 registers. If it extends past the last argument slot, then the rest 3486 goes on the stack. */ 3487 3488 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 3489 return 0; 3490 3491 return MAX_ARGUMENT_SLOTS - cum->words - offset; 3492} 3493 3494/* Update CUM to point after this argument. This is patterned after 3495 ia64_function_arg. */ 3496 3497void 3498ia64_function_arg_advance (cum, mode, type, named) 3499 CUMULATIVE_ARGS *cum; 3500 enum machine_mode mode; 3501 tree type; 3502 int named; 3503{ 3504 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3505 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3506 / UNITS_PER_WORD); 3507 int offset = 0; 3508 enum machine_mode hfa_mode = VOIDmode; 3509 3510 /* If all arg slots are already full, then there is nothing to do. */ 3511 if (cum->words >= MAX_ARGUMENT_SLOTS) 3512 return; 3513 3514 /* Arguments with alignment larger than 8 bytes start at the next even 3515 boundary. */ 3516 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3517 : (words > 1)) 3518 && (cum->words & 1)) 3519 offset = 1; 3520 3521 cum->words += words + offset; 3522 3523 /* Check for and handle homogeneous FP aggregates. */ 3524 if (type) 3525 hfa_mode = hfa_element_mode (type, 0); 3526 3527 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3528 and unprototyped hfas are passed specially. */ 3529 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3530 { 3531 int fp_regs = cum->fp_regs; 3532 /* This is the original value of cum->words + offset. */ 3533 int int_regs = cum->words - words; 3534 int hfa_size = GET_MODE_SIZE (hfa_mode); 3535 int byte_size; 3536 int args_byte_size; 3537 3538 /* If prototyped, pass it in FR regs then GR regs. 3539 If not prototyped, pass it in both FR and GR regs. 3540 3541 If this is an SFmode aggregate, then it is possible to run out of 3542 FR regs while GR regs are still left. In that case, we pass the 3543 remaining part in the GR regs. */ 3544 3545 /* Fill the FP regs. We do this always. We stop if we reach the end 3546 of the argument, the last FP register, or the last argument slot. */ 3547 3548 byte_size = ((mode == BLKmode) 3549 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3550 args_byte_size = int_regs * UNITS_PER_WORD; 3551 offset = 0; 3552 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3553 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 3554 { 3555 offset += hfa_size; 3556 args_byte_size += hfa_size; 3557 fp_regs++; 3558 } 3559 3560 cum->fp_regs = fp_regs; 3561 } 3562 3563 /* Integral and aggregates go in general registers. If we have run out of 3564 FR registers, then FP values must also go in general registers. This can 3565 happen when we have a SFmode HFA. */ 3566 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) 3567 cum->int_regs = cum->words; 3568 3569 /* If there is a prototype, then FP values go in a FR register when 3570 named, and in a GR registeer when unnamed. */ 3571 else if (cum->prototype) 3572 { 3573 if (! named) 3574 cum->int_regs = cum->words; 3575 else 3576 /* ??? Complex types should not reach here. */ 3577 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3578 } 3579 /* If there is no prototype, then FP values go in both FR and GR 3580 registers. */ 3581 else 3582 { 3583 /* ??? Complex types should not reach here. */ 3584 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3585 cum->int_regs = cum->words; 3586 } 3587} 3588 3589/* Variable sized types are passed by reference. */ 3590/* ??? At present this is a GCC extension to the IA-64 ABI. */ 3591 3592int 3593ia64_function_arg_pass_by_reference (cum, mode, type, named) 3594 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; 3595 enum machine_mode mode ATTRIBUTE_UNUSED; 3596 tree type; 3597 int named ATTRIBUTE_UNUSED; 3598{ 3599 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 3600} 3601 3602 3603/* Implement va_arg. */ 3604 3605rtx 3606ia64_va_arg (valist, type) 3607 tree valist, type; 3608{ 3609 tree t; 3610 3611 /* Variable sized types are passed by reference. */ 3612 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 3613 { 3614 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type)); 3615 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr)); 3616 } 3617 3618 /* Arguments with alignment larger than 8 bytes start at the next even 3619 boundary. */ 3620 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3621 { 3622 t = build (PLUS_EXPR, TREE_TYPE (valist), valist, 3623 build_int_2 (2 * UNITS_PER_WORD - 1, 0)); 3624 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, 3625 build_int_2 (-2 * UNITS_PER_WORD, -1)); 3626 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 3627 TREE_SIDE_EFFECTS (t) = 1; 3628 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3629 } 3630 3631 return std_expand_builtin_va_arg (valist, type); 3632} 3633 3634/* Return 1 if function return value returned in memory. Return 0 if it is 3635 in a register. */ 3636 3637int 3638ia64_return_in_memory (valtype) 3639 tree valtype; 3640{ 3641 enum machine_mode mode; 3642 enum machine_mode hfa_mode; 3643 HOST_WIDE_INT byte_size; 3644 3645 mode = TYPE_MODE (valtype); 3646 byte_size = GET_MODE_SIZE (mode); 3647 if (mode == BLKmode) 3648 { 3649 byte_size = int_size_in_bytes (valtype); 3650 if (byte_size < 0) 3651 return 1; 3652 } 3653 3654 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 3655 3656 hfa_mode = hfa_element_mode (valtype, 0); 3657 if (hfa_mode != VOIDmode) 3658 { 3659 int hfa_size = GET_MODE_SIZE (hfa_mode); 3660 3661 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 3662 return 1; 3663 else 3664 return 0; 3665 } 3666 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 3667 return 1; 3668 else 3669 return 0; 3670} 3671 3672/* Return rtx for register that holds the function return value. */ 3673 3674rtx 3675ia64_function_value (valtype, func) 3676 tree valtype; 3677 tree func ATTRIBUTE_UNUSED; 3678{ 3679 enum machine_mode mode; 3680 enum machine_mode hfa_mode; 3681 3682 mode = TYPE_MODE (valtype); 3683 hfa_mode = hfa_element_mode (valtype, 0); 3684 3685 if (hfa_mode != VOIDmode) 3686 { 3687 rtx loc[8]; 3688 int i; 3689 int hfa_size; 3690 int byte_size; 3691 int offset; 3692 3693 hfa_size = GET_MODE_SIZE (hfa_mode); 3694 byte_size = ((mode == BLKmode) 3695 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 3696 offset = 0; 3697 for (i = 0; offset < byte_size; i++) 3698 { 3699 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3700 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 3701 GEN_INT (offset)); 3702 offset += hfa_size; 3703 } 3704 3705 if (i == 1) 3706 return XEXP (loc[0], 0); 3707 else 3708 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3709 } 3710 else if (FLOAT_TYPE_P (valtype) && 3711 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT)) 3712 return gen_rtx_REG (mode, FR_ARG_FIRST); 3713 else 3714 { 3715 if (BYTES_BIG_ENDIAN 3716 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 3717 { 3718 rtx loc[8]; 3719 int offset; 3720 int bytesize; 3721 int i; 3722 3723 offset = 0; 3724 bytesize = int_size_in_bytes (valtype); 3725 for (i = 0; offset < bytesize; i++) 3726 { 3727 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3728 gen_rtx_REG (DImode, 3729 GR_RET_FIRST + i), 3730 GEN_INT (offset)); 3731 offset += UNITS_PER_WORD; 3732 } 3733 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3734 } 3735 else 3736 return gen_rtx_REG (mode, GR_RET_FIRST); 3737 } 3738} 3739 3740/* Print a memory address as an operand to reference that memory location. */ 3741 3742/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 3743 also call this from ia64_print_operand for memory addresses. */ 3744 3745void 3746ia64_print_operand_address (stream, address) 3747 FILE * stream ATTRIBUTE_UNUSED; 3748 rtx address ATTRIBUTE_UNUSED; 3749{ 3750} 3751 3752/* Print an operand to an assembler instruction. 3753 C Swap and print a comparison operator. 3754 D Print an FP comparison operator. 3755 E Print 32 - constant, for SImode shifts as extract. 3756 e Print 64 - constant, for DImode rotates. 3757 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 3758 a floating point register emitted normally. 3759 I Invert a predicate register by adding 1. 3760 J Select the proper predicate register for a condition. 3761 j Select the inverse predicate register for a condition. 3762 O Append .acq for volatile load. 3763 P Postincrement of a MEM. 3764 Q Append .rel for volatile store. 3765 S Shift amount for shladd instruction. 3766 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 3767 for Intel assembler. 3768 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 3769 for Intel assembler. 3770 r Print register name, or constant 0 as r0. HP compatibility for 3771 Linux kernel. */ 3772void 3773ia64_print_operand (file, x, code) 3774 FILE * file; 3775 rtx x; 3776 int code; 3777{ 3778 const char *str; 3779 3780 switch (code) 3781 { 3782 case 0: 3783 /* Handled below. */ 3784 break; 3785 3786 case 'C': 3787 { 3788 enum rtx_code c = swap_condition (GET_CODE (x)); 3789 fputs (GET_RTX_NAME (c), file); 3790 return; 3791 } 3792 3793 case 'D': 3794 switch (GET_CODE (x)) 3795 { 3796 case NE: 3797 str = "neq"; 3798 break; 3799 case UNORDERED: 3800 str = "unord"; 3801 break; 3802 case ORDERED: 3803 str = "ord"; 3804 break; 3805 default: 3806 str = GET_RTX_NAME (GET_CODE (x)); 3807 break; 3808 } 3809 fputs (str, file); 3810 return; 3811 3812 case 'E': 3813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 3814 return; 3815 3816 case 'e': 3817 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 3818 return; 3819 3820 case 'F': 3821 if (x == CONST0_RTX (GET_MODE (x))) 3822 str = reg_names [FR_REG (0)]; 3823 else if (x == CONST1_RTX (GET_MODE (x))) 3824 str = reg_names [FR_REG (1)]; 3825 else if (GET_CODE (x) == REG) 3826 str = reg_names [REGNO (x)]; 3827 else 3828 abort (); 3829 fputs (str, file); 3830 return; 3831 3832 case 'I': 3833 fputs (reg_names [REGNO (x) + 1], file); 3834 return; 3835 3836 case 'J': 3837 case 'j': 3838 { 3839 unsigned int regno = REGNO (XEXP (x, 0)); 3840 if (GET_CODE (x) == EQ) 3841 regno += 1; 3842 if (code == 'j') 3843 regno ^= 1; 3844 fputs (reg_names [regno], file); 3845 } 3846 return; 3847 3848 case 'O': 3849 if (MEM_VOLATILE_P (x)) 3850 fputs(".acq", file); 3851 return; 3852 3853 case 'P': 3854 { 3855 HOST_WIDE_INT value; 3856 3857 switch (GET_CODE (XEXP (x, 0))) 3858 { 3859 default: 3860 return; 3861 3862 case POST_MODIFY: 3863 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 3864 if (GET_CODE (x) == CONST_INT) 3865 value = INTVAL (x); 3866 else if (GET_CODE (x) == REG) 3867 { 3868 fprintf (file, ", %s", reg_names[REGNO (x)]); 3869 return; 3870 } 3871 else 3872 abort (); 3873 break; 3874 3875 case POST_INC: 3876 value = GET_MODE_SIZE (GET_MODE (x)); 3877 break; 3878 3879 case POST_DEC: 3880 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 3881 break; 3882 } 3883 3884 putc (',', file); 3885 putc (' ', file); 3886 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value); 3887 return; 3888 } 3889 3890 case 'Q': 3891 if (MEM_VOLATILE_P (x)) 3892 fputs(".rel", file); 3893 return; 3894 3895 case 'S': 3896 fprintf (file, "%d", exact_log2 (INTVAL (x))); 3897 return; 3898 3899 case 'T': 3900 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3901 { 3902 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 3903 return; 3904 } 3905 break; 3906 3907 case 'U': 3908 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3909 { 3910 const char *prefix = "0x"; 3911 if (INTVAL (x) & 0x80000000) 3912 { 3913 fprintf (file, "0xffffffff"); 3914 prefix = ""; 3915 } 3916 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 3917 return; 3918 } 3919 break; 3920 3921 case 'r': 3922 /* If this operand is the constant zero, write it as register zero. 3923 Any register, zero, or CONST_INT value is OK here. */ 3924 if (GET_CODE (x) == REG) 3925 fputs (reg_names[REGNO (x)], file); 3926 else if (x == CONST0_RTX (GET_MODE (x))) 3927 fputs ("r0", file); 3928 else if (GET_CODE (x) == CONST_INT) 3929 output_addr_const (file, x); 3930 else 3931 output_operand_lossage ("invalid %%r value"); 3932 return; 3933 3934 case '+': 3935 { 3936 const char *which; 3937 3938 /* For conditional branches, returns or calls, substitute 3939 sptk, dptk, dpnt, or spnt for %s. */ 3940 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 3941 if (x) 3942 { 3943 int pred_val = INTVAL (XEXP (x, 0)); 3944 3945 /* Guess top and bottom 10% statically predicted. */ 3946 if (pred_val < REG_BR_PROB_BASE / 50) 3947 which = ".spnt"; 3948 else if (pred_val < REG_BR_PROB_BASE / 2) 3949 which = ".dpnt"; 3950 else if (pred_val < REG_BR_PROB_BASE / 100 * 98) 3951 which = ".dptk"; 3952 else 3953 which = ".sptk"; 3954 } 3955 else if (GET_CODE (current_output_insn) == CALL_INSN) 3956 which = ".sptk"; 3957 else 3958 which = ".dptk"; 3959 3960 fputs (which, file); 3961 return; 3962 } 3963 3964 case ',': 3965 x = current_insn_predicate; 3966 if (x) 3967 { 3968 unsigned int regno = REGNO (XEXP (x, 0)); 3969 if (GET_CODE (x) == EQ) 3970 regno += 1; 3971 fprintf (file, "(%s) ", reg_names [regno]); 3972 } 3973 return; 3974 3975 default: 3976 output_operand_lossage ("ia64_print_operand: unknown code"); 3977 return; 3978 } 3979 3980 switch (GET_CODE (x)) 3981 { 3982 /* This happens for the spill/restore instructions. */ 3983 case POST_INC: 3984 case POST_DEC: 3985 case POST_MODIFY: 3986 x = XEXP (x, 0); 3987 /* ... fall through ... */ 3988 3989 case REG: 3990 fputs (reg_names [REGNO (x)], file); 3991 break; 3992 3993 case MEM: 3994 { 3995 rtx addr = XEXP (x, 0); 3996 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a') 3997 addr = XEXP (addr, 0); 3998 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 3999 break; 4000 } 4001 4002 default: 4003 output_addr_const (file, x); 4004 break; 4005 } 4006 4007 return; 4008} 4009 4010/* Calulate the cost of moving data from a register in class FROM to 4011 one in class TO, using MODE. */ 4012 4013int 4014ia64_register_move_cost (mode, from, to) 4015 enum machine_mode mode; 4016 enum reg_class from, to; 4017{ 4018 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 4019 if (to == ADDL_REGS) 4020 to = GR_REGS; 4021 if (from == ADDL_REGS) 4022 from = GR_REGS; 4023 4024 /* All costs are symmetric, so reduce cases by putting the 4025 lower number class as the destination. */ 4026 if (from < to) 4027 { 4028 enum reg_class tmp = to; 4029 to = from, from = tmp; 4030 } 4031 4032 /* Moving from FR<->GR in TFmode must be more expensive than 2, 4033 so that we get secondary memory reloads. Between FR_REGS, 4034 we have to make this at least as expensive as MEMORY_MOVE_COST 4035 to avoid spectacularly poor register class preferencing. */ 4036 if (mode == TFmode) 4037 { 4038 if (to != GR_REGS || from != GR_REGS) 4039 return MEMORY_MOVE_COST (mode, to, 0); 4040 else 4041 return 3; 4042 } 4043 4044 switch (to) 4045 { 4046 case PR_REGS: 4047 /* Moving between PR registers takes two insns. */ 4048 if (from == PR_REGS) 4049 return 3; 4050 /* Moving between PR and anything but GR is impossible. */ 4051 if (from != GR_REGS) 4052 return MEMORY_MOVE_COST (mode, to, 0); 4053 break; 4054 4055 case BR_REGS: 4056 /* Moving between BR and anything but GR is impossible. */ 4057 if (from != GR_REGS && from != GR_AND_BR_REGS) 4058 return MEMORY_MOVE_COST (mode, to, 0); 4059 break; 4060 4061 case AR_I_REGS: 4062 case AR_M_REGS: 4063 /* Moving between AR and anything but GR is impossible. */ 4064 if (from != GR_REGS) 4065 return MEMORY_MOVE_COST (mode, to, 0); 4066 break; 4067 4068 case GR_REGS: 4069 case FR_REGS: 4070 case GR_AND_FR_REGS: 4071 case GR_AND_BR_REGS: 4072 case ALL_REGS: 4073 break; 4074 4075 default: 4076 abort (); 4077 } 4078 4079 return 2; 4080} 4081 4082/* This function returns the register class required for a secondary 4083 register when copying between one of the registers in CLASS, and X, 4084 using MODE. A return value of NO_REGS means that no secondary register 4085 is required. */ 4086 4087enum reg_class 4088ia64_secondary_reload_class (class, mode, x) 4089 enum reg_class class; 4090 enum machine_mode mode ATTRIBUTE_UNUSED; 4091 rtx x; 4092{ 4093 int regno = -1; 4094 4095 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 4096 regno = true_regnum (x); 4097 4098 switch (class) 4099 { 4100 case BR_REGS: 4101 case AR_M_REGS: 4102 case AR_I_REGS: 4103 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 4104 interaction. We end up with two pseudos with overlapping lifetimes 4105 both of which are equiv to the same constant, and both which need 4106 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 4107 changes depending on the path length, which means the qty_first_reg 4108 check in make_regs_eqv can give different answers at different times. 4109 At some point I'll probably need a reload_indi pattern to handle 4110 this. 4111 4112 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 4113 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 4114 non-general registers for good measure. */ 4115 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 4116 return GR_REGS; 4117 4118 /* This is needed if a pseudo used as a call_operand gets spilled to a 4119 stack slot. */ 4120 if (GET_CODE (x) == MEM) 4121 return GR_REGS; 4122 break; 4123 4124 case FR_REGS: 4125 /* Need to go through general regsters to get to other class regs. */ 4126 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 4127 return GR_REGS; 4128 4129 /* This can happen when a paradoxical subreg is an operand to the 4130 muldi3 pattern. */ 4131 /* ??? This shouldn't be necessary after instruction scheduling is 4132 enabled, because paradoxical subregs are not accepted by 4133 register_operand when INSN_SCHEDULING is defined. Or alternatively, 4134 stop the paradoxical subreg stupidity in the *_operand functions 4135 in recog.c. */ 4136 if (GET_CODE (x) == MEM 4137 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 4138 || GET_MODE (x) == QImode)) 4139 return GR_REGS; 4140 4141 /* This can happen because of the ior/and/etc patterns that accept FP 4142 registers as operands. If the third operand is a constant, then it 4143 needs to be reloaded into a FP register. */ 4144 if (GET_CODE (x) == CONST_INT) 4145 return GR_REGS; 4146 4147 /* This can happen because of register elimination in a muldi3 insn. 4148 E.g. `26107 * (unsigned long)&u'. */ 4149 if (GET_CODE (x) == PLUS) 4150 return GR_REGS; 4151 break; 4152 4153 case PR_REGS: 4154 /* ??? This happens if we cse/gcse a BImode value across a call, 4155 and the function has a nonlocal goto. This is because global 4156 does not allocate call crossing pseudos to hard registers when 4157 current_function_has_nonlocal_goto is true. This is relatively 4158 common for C++ programs that use exceptions. To reproduce, 4159 return NO_REGS and compile libstdc++. */ 4160 if (GET_CODE (x) == MEM) 4161 return GR_REGS; 4162 4163 /* This can happen when we take a BImode subreg of a DImode value, 4164 and that DImode value winds up in some non-GR register. */ 4165 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 4166 return GR_REGS; 4167 break; 4168 4169 case GR_REGS: 4170 /* Since we have no offsettable memory addresses, we need a temporary 4171 to hold the address of the second word. */ 4172 if (mode == TImode) 4173 return GR_REGS; 4174 break; 4175 4176 default: 4177 break; 4178 } 4179 4180 return NO_REGS; 4181} 4182 4183/* Emit text to declare externally defined variables and functions, because 4184 the Intel assembler does not support undefined externals. */ 4185 4186void 4187ia64_asm_output_external (file, decl, name) 4188 FILE *file; 4189 tree decl; 4190 const char *name; 4191{ 4192 int save_referenced; 4193 4194 /* GNU as does not need anything here, but the HP linker does need 4195 something for external functions. */ 4196 4197 if (TARGET_GNU_AS 4198 && (!TARGET_HPUX_LD 4199 || TREE_CODE (decl) != FUNCTION_DECL 4200 || strstr(name, "__builtin_") == name)) 4201 return; 4202 4203 /* ??? The Intel assembler creates a reference that needs to be satisfied by 4204 the linker when we do this, so we need to be careful not to do this for 4205 builtin functions which have no library equivalent. Unfortunately, we 4206 can't tell here whether or not a function will actually be called by 4207 expand_expr, so we pull in library functions even if we may not need 4208 them later. */ 4209 if (! strcmp (name, "__builtin_next_arg") 4210 || ! strcmp (name, "alloca") 4211 || ! strcmp (name, "__builtin_constant_p") 4212 || ! strcmp (name, "__builtin_args_info")) 4213 return; 4214 4215 if (TARGET_HPUX_LD) 4216 ia64_hpux_add_extern_decl (name); 4217 else 4218 { 4219 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and 4220 restore it. */ 4221 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); 4222 if (TREE_CODE (decl) == FUNCTION_DECL) 4223 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 4224 (*targetm.asm_out.globalize_label) (file, name); 4225 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; 4226 } 4227} 4228 4229/* Parse the -mfixed-range= option string. */ 4230 4231static void 4232fix_range (const_str) 4233 const char *const_str; 4234{ 4235 int i, first, last; 4236 char *str, *dash, *comma; 4237 4238 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 4239 REG2 are either register names or register numbers. The effect 4240 of this option is to mark the registers in the range from REG1 to 4241 REG2 as ``fixed'' so they won't be used by the compiler. This is 4242 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 4243 4244 i = strlen (const_str); 4245 str = (char *) alloca (i + 1); 4246 memcpy (str, const_str, i + 1); 4247 4248 while (1) 4249 { 4250 dash = strchr (str, '-'); 4251 if (!dash) 4252 { 4253 warning ("value of -mfixed-range must have form REG1-REG2"); 4254 return; 4255 } 4256 *dash = '\0'; 4257 4258 comma = strchr (dash + 1, ','); 4259 if (comma) 4260 *comma = '\0'; 4261 4262 first = decode_reg_name (str); 4263 if (first < 0) 4264 { 4265 warning ("unknown register name: %s", str); 4266 return; 4267 } 4268 4269 last = decode_reg_name (dash + 1); 4270 if (last < 0) 4271 { 4272 warning ("unknown register name: %s", dash + 1); 4273 return; 4274 } 4275 4276 *dash = '-'; 4277 4278 if (first > last) 4279 { 4280 warning ("%s-%s is an empty range", str, dash + 1); 4281 return; 4282 } 4283 4284 for (i = first; i <= last; ++i) 4285 fixed_regs[i] = call_used_regs[i] = 1; 4286 4287 if (!comma) 4288 break; 4289 4290 *comma = ','; 4291 str = comma + 1; 4292 } 4293} 4294 4295static struct machine_function * 4296ia64_init_machine_status () 4297{ 4298 return ggc_alloc_cleared (sizeof (struct machine_function)); 4299} 4300 4301/* Handle TARGET_OPTIONS switches. */ 4302 4303void 4304ia64_override_options () 4305{ 4306 if (TARGET_AUTO_PIC) 4307 target_flags |= MASK_CONST_GP; 4308 4309 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR) 4310 { 4311 warning ("cannot optimize floating point division for both latency and throughput"); 4312 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR; 4313 } 4314 4315 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR) 4316 { 4317 warning ("cannot optimize integer division for both latency and throughput"); 4318 target_flags &= ~MASK_INLINE_INT_DIV_THR; 4319 } 4320 4321 if (ia64_fixed_range_string) 4322 fix_range (ia64_fixed_range_string); 4323 4324 if (ia64_tls_size_string) 4325 { 4326 char *end; 4327 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10); 4328 if (*end || (tmp != 14 && tmp != 22 && tmp != 64)) 4329 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string); 4330 else 4331 ia64_tls_size = tmp; 4332 } 4333 4334 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; 4335 flag_schedule_insns_after_reload = 0; 4336 4337 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; 4338 4339 init_machine_status = ia64_init_machine_status; 4340 4341 /* Tell the compiler which flavor of TFmode we're using. */ 4342 if (INTEL_EXTENDED_IEEE_FORMAT) 4343 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; 4344} 4345 4346static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); 4347static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); 4348static enum attr_type ia64_safe_type PARAMS((rtx)); 4349 4350static enum attr_itanium_requires_unit0 4351ia64_safe_itanium_requires_unit0 (insn) 4352 rtx insn; 4353{ 4354 if (recog_memoized (insn) >= 0) 4355 return get_attr_itanium_requires_unit0 (insn); 4356 else 4357 return ITANIUM_REQUIRES_UNIT0_NO; 4358} 4359 4360static enum attr_itanium_class 4361ia64_safe_itanium_class (insn) 4362 rtx insn; 4363{ 4364 if (recog_memoized (insn) >= 0) 4365 return get_attr_itanium_class (insn); 4366 else 4367 return ITANIUM_CLASS_UNKNOWN; 4368} 4369 4370static enum attr_type 4371ia64_safe_type (insn) 4372 rtx insn; 4373{ 4374 if (recog_memoized (insn) >= 0) 4375 return get_attr_type (insn); 4376 else 4377 return TYPE_UNKNOWN; 4378} 4379 4380/* The following collection of routines emit instruction group stop bits as 4381 necessary to avoid dependencies. */ 4382 4383/* Need to track some additional registers as far as serialization is 4384 concerned so we can properly handle br.call and br.ret. We could 4385 make these registers visible to gcc, but since these registers are 4386 never explicitly used in gcc generated code, it seems wasteful to 4387 do so (plus it would make the call and return patterns needlessly 4388 complex). */ 4389#define REG_GP (GR_REG (1)) 4390#define REG_RP (BR_REG (0)) 4391#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 4392/* This is used for volatile asms which may require a stop bit immediately 4393 before and after them. */ 4394#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 4395#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 4396#define NUM_REGS (AR_UNAT_BIT_0 + 64) 4397 4398/* For each register, we keep track of how it has been written in the 4399 current instruction group. 4400 4401 If a register is written unconditionally (no qualifying predicate), 4402 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 4403 4404 If a register is written if its qualifying predicate P is true, we 4405 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 4406 may be written again by the complement of P (P^1) and when this happens, 4407 WRITE_COUNT gets set to 2. 4408 4409 The result of this is that whenever an insn attempts to write a register 4410 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 4411 4412 If a predicate register is written by a floating-point insn, we set 4413 WRITTEN_BY_FP to true. 4414 4415 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 4416 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 4417 4418struct reg_write_state 4419{ 4420 unsigned int write_count : 2; 4421 unsigned int first_pred : 16; 4422 unsigned int written_by_fp : 1; 4423 unsigned int written_by_and : 1; 4424 unsigned int written_by_or : 1; 4425}; 4426 4427/* Cumulative info for the current instruction group. */ 4428struct reg_write_state rws_sum[NUM_REGS]; 4429/* Info for the current instruction. This gets copied to rws_sum after a 4430 stop bit is emitted. */ 4431struct reg_write_state rws_insn[NUM_REGS]; 4432 4433/* Indicates whether this is the first instruction after a stop bit, 4434 in which case we don't need another stop bit. Without this, we hit 4435 the abort in ia64_variable_issue when scheduling an alloc. */ 4436static int first_instruction; 4437 4438/* Misc flags needed to compute RAW/WAW dependencies while we are traversing 4439 RTL for one instruction. */ 4440struct reg_flags 4441{ 4442 unsigned int is_write : 1; /* Is register being written? */ 4443 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 4444 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 4445 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 4446 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 4447 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 4448}; 4449 4450static void rws_update PARAMS ((struct reg_write_state *, int, 4451 struct reg_flags, int)); 4452static int rws_access_regno PARAMS ((int, struct reg_flags, int)); 4453static int rws_access_reg PARAMS ((rtx, struct reg_flags, int)); 4454static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *)); 4455static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx)); 4456static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int)); 4457static void init_insn_group_barriers PARAMS ((void)); 4458static int group_barrier_needed_p PARAMS ((rtx)); 4459static int safe_group_barrier_needed_p PARAMS ((rtx)); 4460 4461/* Update *RWS for REGNO, which is being written by the current instruction, 4462 with predicate PRED, and associated register flags in FLAGS. */ 4463 4464static void 4465rws_update (rws, regno, flags, pred) 4466 struct reg_write_state *rws; 4467 int regno; 4468 struct reg_flags flags; 4469 int pred; 4470{ 4471 if (pred) 4472 rws[regno].write_count++; 4473 else 4474 rws[regno].write_count = 2; 4475 rws[regno].written_by_fp |= flags.is_fp; 4476 /* ??? Not tracking and/or across differing predicates. */ 4477 rws[regno].written_by_and = flags.is_and; 4478 rws[regno].written_by_or = flags.is_or; 4479 rws[regno].first_pred = pred; 4480} 4481 4482/* Handle an access to register REGNO of type FLAGS using predicate register 4483 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates 4484 a dependency with an earlier instruction in the same group. */ 4485 4486static int 4487rws_access_regno (regno, flags, pred) 4488 int regno; 4489 struct reg_flags flags; 4490 int pred; 4491{ 4492 int need_barrier = 0; 4493 4494 if (regno >= NUM_REGS) 4495 abort (); 4496 4497 if (! PR_REGNO_P (regno)) 4498 flags.is_and = flags.is_or = 0; 4499 4500 if (flags.is_write) 4501 { 4502 int write_count; 4503 4504 /* One insn writes same reg multiple times? */ 4505 if (rws_insn[regno].write_count > 0) 4506 abort (); 4507 4508 /* Update info for current instruction. */ 4509 rws_update (rws_insn, regno, flags, pred); 4510 write_count = rws_sum[regno].write_count; 4511 4512 switch (write_count) 4513 { 4514 case 0: 4515 /* The register has not been written yet. */ 4516 rws_update (rws_sum, regno, flags, pred); 4517 break; 4518 4519 case 1: 4520 /* The register has been written via a predicate. If this is 4521 not a complementary predicate, then we need a barrier. */ 4522 /* ??? This assumes that P and P+1 are always complementary 4523 predicates for P even. */ 4524 if (flags.is_and && rws_sum[regno].written_by_and) 4525 ; 4526 else if (flags.is_or && rws_sum[regno].written_by_or) 4527 ; 4528 else if ((rws_sum[regno].first_pred ^ 1) != pred) 4529 need_barrier = 1; 4530 rws_update (rws_sum, regno, flags, pred); 4531 break; 4532 4533 case 2: 4534 /* The register has been unconditionally written already. We 4535 need a barrier. */ 4536 if (flags.is_and && rws_sum[regno].written_by_and) 4537 ; 4538 else if (flags.is_or && rws_sum[regno].written_by_or) 4539 ; 4540 else 4541 need_barrier = 1; 4542 rws_sum[regno].written_by_and = flags.is_and; 4543 rws_sum[regno].written_by_or = flags.is_or; 4544 break; 4545 4546 default: 4547 abort (); 4548 } 4549 } 4550 else 4551 { 4552 if (flags.is_branch) 4553 { 4554 /* Branches have several RAW exceptions that allow to avoid 4555 barriers. */ 4556 4557 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 4558 /* RAW dependencies on branch regs are permissible as long 4559 as the writer is a non-branch instruction. Since we 4560 never generate code that uses a branch register written 4561 by a branch instruction, handling this case is 4562 easy. */ 4563 return 0; 4564 4565 if (REGNO_REG_CLASS (regno) == PR_REGS 4566 && ! rws_sum[regno].written_by_fp) 4567 /* The predicates of a branch are available within the 4568 same insn group as long as the predicate was written by 4569 something other than a floating-point instruction. */ 4570 return 0; 4571 } 4572 4573 if (flags.is_and && rws_sum[regno].written_by_and) 4574 return 0; 4575 if (flags.is_or && rws_sum[regno].written_by_or) 4576 return 0; 4577 4578 switch (rws_sum[regno].write_count) 4579 { 4580 case 0: 4581 /* The register has not been written yet. */ 4582 break; 4583 4584 case 1: 4585 /* The register has been written via a predicate. If this is 4586 not a complementary predicate, then we need a barrier. */ 4587 /* ??? This assumes that P and P+1 are always complementary 4588 predicates for P even. */ 4589 if ((rws_sum[regno].first_pred ^ 1) != pred) 4590 need_barrier = 1; 4591 break; 4592 4593 case 2: 4594 /* The register has been unconditionally written already. We 4595 need a barrier. */ 4596 need_barrier = 1; 4597 break; 4598 4599 default: 4600 abort (); 4601 } 4602 } 4603 4604 return need_barrier; 4605} 4606 4607static int 4608rws_access_reg (reg, flags, pred) 4609 rtx reg; 4610 struct reg_flags flags; 4611 int pred; 4612{ 4613 int regno = REGNO (reg); 4614 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 4615 4616 if (n == 1) 4617 return rws_access_regno (regno, flags, pred); 4618 else 4619 { 4620 int need_barrier = 0; 4621 while (--n >= 0) 4622 need_barrier |= rws_access_regno (regno + n, flags, pred); 4623 return need_barrier; 4624 } 4625} 4626 4627/* Examine X, which is a SET rtx, and update the flags, the predicate, and 4628 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 4629 4630static void 4631update_set_flags (x, pflags, ppred, pcond) 4632 rtx x; 4633 struct reg_flags *pflags; 4634 int *ppred; 4635 rtx *pcond; 4636{ 4637 rtx src = SET_SRC (x); 4638 4639 *pcond = 0; 4640 4641 switch (GET_CODE (src)) 4642 { 4643 case CALL: 4644 return; 4645 4646 case IF_THEN_ELSE: 4647 if (SET_DEST (x) == pc_rtx) 4648 /* X is a conditional branch. */ 4649 return; 4650 else 4651 { 4652 int is_complemented = 0; 4653 4654 /* X is a conditional move. */ 4655 rtx cond = XEXP (src, 0); 4656 if (GET_CODE (cond) == EQ) 4657 is_complemented = 1; 4658 cond = XEXP (cond, 0); 4659 if (GET_CODE (cond) != REG 4660 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4661 abort (); 4662 *pcond = cond; 4663 if (XEXP (src, 1) == SET_DEST (x) 4664 || XEXP (src, 2) == SET_DEST (x)) 4665 { 4666 /* X is a conditional move that conditionally writes the 4667 destination. */ 4668 4669 /* We need another complement in this case. */ 4670 if (XEXP (src, 1) == SET_DEST (x)) 4671 is_complemented = ! is_complemented; 4672 4673 *ppred = REGNO (cond); 4674 if (is_complemented) 4675 ++*ppred; 4676 } 4677 4678 /* ??? If this is a conditional write to the dest, then this 4679 instruction does not actually read one source. This probably 4680 doesn't matter, because that source is also the dest. */ 4681 /* ??? Multiple writes to predicate registers are allowed 4682 if they are all AND type compares, or if they are all OR 4683 type compares. We do not generate such instructions 4684 currently. */ 4685 } 4686 /* ... fall through ... */ 4687 4688 default: 4689 if (GET_RTX_CLASS (GET_CODE (src)) == '<' 4690 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) 4691 /* Set pflags->is_fp to 1 so that we know we're dealing 4692 with a floating point comparison when processing the 4693 destination of the SET. */ 4694 pflags->is_fp = 1; 4695 4696 /* Discover if this is a parallel comparison. We only handle 4697 and.orcm and or.andcm at present, since we must retain a 4698 strict inverse on the predicate pair. */ 4699 else if (GET_CODE (src) == AND) 4700 pflags->is_and = 1; 4701 else if (GET_CODE (src) == IOR) 4702 pflags->is_or = 1; 4703 4704 break; 4705 } 4706} 4707 4708/* Subroutine of rtx_needs_barrier; this function determines whether the 4709 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 4710 are as in rtx_needs_barrier. COND is an rtx that holds the condition 4711 for this insn. */ 4712 4713static int 4714set_src_needs_barrier (x, flags, pred, cond) 4715 rtx x; 4716 struct reg_flags flags; 4717 int pred; 4718 rtx cond; 4719{ 4720 int need_barrier = 0; 4721 rtx dst; 4722 rtx src = SET_SRC (x); 4723 4724 if (GET_CODE (src) == CALL) 4725 /* We don't need to worry about the result registers that 4726 get written by subroutine call. */ 4727 return rtx_needs_barrier (src, flags, pred); 4728 else if (SET_DEST (x) == pc_rtx) 4729 { 4730 /* X is a conditional branch. */ 4731 /* ??? This seems redundant, as the caller sets this bit for 4732 all JUMP_INSNs. */ 4733 flags.is_branch = 1; 4734 return rtx_needs_barrier (src, flags, pred); 4735 } 4736 4737 need_barrier = rtx_needs_barrier (src, flags, pred); 4738 4739 /* This instruction unconditionally uses a predicate register. */ 4740 if (cond) 4741 need_barrier |= rws_access_reg (cond, flags, 0); 4742 4743 dst = SET_DEST (x); 4744 if (GET_CODE (dst) == ZERO_EXTRACT) 4745 { 4746 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 4747 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 4748 dst = XEXP (dst, 0); 4749 } 4750 return need_barrier; 4751} 4752 4753/* Handle an access to rtx X of type FLAGS using predicate register PRED. 4754 Return 1 is this access creates a dependency with an earlier instruction 4755 in the same group. */ 4756 4757static int 4758rtx_needs_barrier (x, flags, pred) 4759 rtx x; 4760 struct reg_flags flags; 4761 int pred; 4762{ 4763 int i, j; 4764 int is_complemented = 0; 4765 int need_barrier = 0; 4766 const char *format_ptr; 4767 struct reg_flags new_flags; 4768 rtx cond = 0; 4769 4770 if (! x) 4771 return 0; 4772 4773 new_flags = flags; 4774 4775 switch (GET_CODE (x)) 4776 { 4777 case SET: 4778 update_set_flags (x, &new_flags, &pred, &cond); 4779 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond); 4780 if (GET_CODE (SET_SRC (x)) != CALL) 4781 { 4782 new_flags.is_write = 1; 4783 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 4784 } 4785 break; 4786 4787 case CALL: 4788 new_flags.is_write = 0; 4789 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 4790 4791 /* Avoid multiple register writes, in case this is a pattern with 4792 multiple CALL rtx. This avoids an abort in rws_access_reg. */ 4793 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count) 4794 { 4795 new_flags.is_write = 1; 4796 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 4797 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 4798 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 4799 } 4800 break; 4801 4802 case COND_EXEC: 4803 /* X is a predicated instruction. */ 4804 4805 cond = COND_EXEC_TEST (x); 4806 if (pred) 4807 abort (); 4808 need_barrier = rtx_needs_barrier (cond, flags, 0); 4809 4810 if (GET_CODE (cond) == EQ) 4811 is_complemented = 1; 4812 cond = XEXP (cond, 0); 4813 if (GET_CODE (cond) != REG 4814 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4815 abort (); 4816 pred = REGNO (cond); 4817 if (is_complemented) 4818 ++pred; 4819 4820 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 4821 return need_barrier; 4822 4823 case CLOBBER: 4824 case USE: 4825 /* Clobber & use are for earlier compiler-phases only. */ 4826 break; 4827 4828 case ASM_OPERANDS: 4829 case ASM_INPUT: 4830 /* We always emit stop bits for traditional asms. We emit stop bits 4831 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 4832 if (GET_CODE (x) != ASM_OPERANDS 4833 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 4834 { 4835 /* Avoid writing the register multiple times if we have multiple 4836 asm outputs. This avoids an abort in rws_access_reg. */ 4837 if (! rws_insn[REG_VOLATILE].write_count) 4838 { 4839 new_flags.is_write = 1; 4840 rws_access_regno (REG_VOLATILE, new_flags, pred); 4841 } 4842 return 1; 4843 } 4844 4845 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 4846 We can not just fall through here since then we would be confused 4847 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 4848 traditional asms unlike their normal usage. */ 4849 4850 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 4851 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 4852 need_barrier = 1; 4853 break; 4854 4855 case PARALLEL: 4856 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4857 { 4858 rtx pat = XVECEXP (x, 0, i); 4859 if (GET_CODE (pat) == SET) 4860 { 4861 update_set_flags (pat, &new_flags, &pred, &cond); 4862 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond); 4863 } 4864 else if (GET_CODE (pat) == USE 4865 || GET_CODE (pat) == CALL 4866 || GET_CODE (pat) == ASM_OPERANDS) 4867 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4868 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN) 4869 abort (); 4870 } 4871 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4872 { 4873 rtx pat = XVECEXP (x, 0, i); 4874 if (GET_CODE (pat) == SET) 4875 { 4876 if (GET_CODE (SET_SRC (pat)) != CALL) 4877 { 4878 new_flags.is_write = 1; 4879 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 4880 pred); 4881 } 4882 } 4883 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 4884 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4885 } 4886 break; 4887 4888 case SUBREG: 4889 x = SUBREG_REG (x); 4890 /* FALLTHRU */ 4891 case REG: 4892 if (REGNO (x) == AR_UNAT_REGNUM) 4893 { 4894 for (i = 0; i < 64; ++i) 4895 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 4896 } 4897 else 4898 need_barrier = rws_access_reg (x, flags, pred); 4899 break; 4900 4901 case MEM: 4902 /* Find the regs used in memory address computation. */ 4903 new_flags.is_write = 0; 4904 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4905 break; 4906 4907 case CONST_INT: case CONST_DOUBLE: 4908 case SYMBOL_REF: case LABEL_REF: case CONST: 4909 break; 4910 4911 /* Operators with side-effects. */ 4912 case POST_INC: case POST_DEC: 4913 if (GET_CODE (XEXP (x, 0)) != REG) 4914 abort (); 4915 4916 new_flags.is_write = 0; 4917 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4918 new_flags.is_write = 1; 4919 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4920 break; 4921 4922 case POST_MODIFY: 4923 if (GET_CODE (XEXP (x, 0)) != REG) 4924 abort (); 4925 4926 new_flags.is_write = 0; 4927 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4928 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4929 new_flags.is_write = 1; 4930 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4931 break; 4932 4933 /* Handle common unary and binary ops for efficiency. */ 4934 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 4935 case MOD: case UDIV: case UMOD: case AND: case IOR: 4936 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 4937 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 4938 case NE: case EQ: case GE: case GT: case LE: 4939 case LT: case GEU: case GTU: case LEU: case LTU: 4940 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4941 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4942 break; 4943 4944 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 4945 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 4946 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 4947 case SQRT: case FFS: 4948 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 4949 break; 4950 4951 case UNSPEC: 4952 switch (XINT (x, 1)) 4953 { 4954 case UNSPEC_LTOFF_DTPMOD: 4955 case UNSPEC_LTOFF_DTPREL: 4956 case UNSPEC_DTPREL: 4957 case UNSPEC_LTOFF_TPREL: 4958 case UNSPEC_TPREL: 4959 case UNSPEC_PRED_REL_MUTEX: 4960 case UNSPEC_PIC_CALL: 4961 case UNSPEC_MF: 4962 case UNSPEC_FETCHADD_ACQ: 4963 case UNSPEC_BSP_VALUE: 4964 case UNSPEC_FLUSHRS: 4965 case UNSPEC_BUNDLE_SELECTOR: 4966 break; 4967 4968 case UNSPEC_GR_SPILL: 4969 case UNSPEC_GR_RESTORE: 4970 { 4971 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 4972 HOST_WIDE_INT bit = (offset >> 3) & 63; 4973 4974 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4975 new_flags.is_write = (XINT (x, 1) == 1); 4976 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 4977 new_flags, pred); 4978 break; 4979 } 4980 4981 case UNSPEC_FR_SPILL: 4982 case UNSPEC_FR_RESTORE: 4983 case UNSPEC_POPCNT: 4984 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4985 break; 4986 4987 case UNSPEC_ADDP4: 4988 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4989 break; 4990 4991 case UNSPEC_FR_RECIP_APPROX: 4992 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4993 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 4994 break; 4995 4996 case UNSPEC_CMPXCHG_ACQ: 4997 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 4998 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 4999 break; 5000 5001 default: 5002 abort (); 5003 } 5004 break; 5005 5006 case UNSPEC_VOLATILE: 5007 switch (XINT (x, 1)) 5008 { 5009 case UNSPECV_ALLOC: 5010 /* Alloc must always be the first instruction of a group. 5011 We force this by always returning true. */ 5012 /* ??? We might get better scheduling if we explicitly check for 5013 input/local/output register dependencies, and modify the 5014 scheduler so that alloc is always reordered to the start of 5015 the current group. We could then eliminate all of the 5016 first_instruction code. */ 5017 rws_access_regno (AR_PFS_REGNUM, flags, pred); 5018 5019 new_flags.is_write = 1; 5020 rws_access_regno (REG_AR_CFM, new_flags, pred); 5021 return 1; 5022 5023 case UNSPECV_SET_BSP: 5024 need_barrier = 1; 5025 break; 5026 5027 case UNSPECV_BLOCKAGE: 5028 case UNSPECV_INSN_GROUP_BARRIER: 5029 case UNSPECV_BREAK: 5030 case UNSPECV_PSAC_ALL: 5031 case UNSPECV_PSAC_NORMAL: 5032 return 0; 5033 5034 default: 5035 abort (); 5036 } 5037 break; 5038 5039 case RETURN: 5040 new_flags.is_write = 0; 5041 need_barrier = rws_access_regno (REG_RP, flags, pred); 5042 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 5043 5044 new_flags.is_write = 1; 5045 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 5046 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 5047 break; 5048 5049 default: 5050 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 5051 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 5052 switch (format_ptr[i]) 5053 { 5054 case '0': /* unused field */ 5055 case 'i': /* integer */ 5056 case 'n': /* note */ 5057 case 'w': /* wide integer */ 5058 case 's': /* pointer to string */ 5059 case 'S': /* optional pointer to string */ 5060 break; 5061 5062 case 'e': 5063 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 5064 need_barrier = 1; 5065 break; 5066 5067 case 'E': 5068 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 5069 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 5070 need_barrier = 1; 5071 break; 5072 5073 default: 5074 abort (); 5075 } 5076 break; 5077 } 5078 return need_barrier; 5079} 5080 5081/* Clear out the state for group_barrier_needed_p at the start of a 5082 sequence of insns. */ 5083 5084static void 5085init_insn_group_barriers () 5086{ 5087 memset (rws_sum, 0, sizeof (rws_sum)); 5088 first_instruction = 1; 5089} 5090 5091/* Given the current state, recorded by previous calls to this function, 5092 determine whether a group barrier (a stop bit) is necessary before INSN. 5093 Return nonzero if so. */ 5094 5095static int 5096group_barrier_needed_p (insn) 5097 rtx insn; 5098{ 5099 rtx pat; 5100 int need_barrier = 0; 5101 struct reg_flags flags; 5102 5103 memset (&flags, 0, sizeof (flags)); 5104 switch (GET_CODE (insn)) 5105 { 5106 case NOTE: 5107 break; 5108 5109 case BARRIER: 5110 /* A barrier doesn't imply an instruction group boundary. */ 5111 break; 5112 5113 case CODE_LABEL: 5114 memset (rws_insn, 0, sizeof (rws_insn)); 5115 return 1; 5116 5117 case CALL_INSN: 5118 flags.is_branch = 1; 5119 flags.is_sibcall = SIBLING_CALL_P (insn); 5120 memset (rws_insn, 0, sizeof (rws_insn)); 5121 5122 /* Don't bundle a call following another call. */ 5123 if ((pat = prev_active_insn (insn)) 5124 && GET_CODE (pat) == CALL_INSN) 5125 { 5126 need_barrier = 1; 5127 break; 5128 } 5129 5130 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 5131 break; 5132 5133 case JUMP_INSN: 5134 flags.is_branch = 1; 5135 5136 /* Don't bundle a jump following a call. */ 5137 if ((pat = prev_active_insn (insn)) 5138 && GET_CODE (pat) == CALL_INSN) 5139 { 5140 need_barrier = 1; 5141 break; 5142 } 5143 /* FALLTHRU */ 5144 5145 case INSN: 5146 if (GET_CODE (PATTERN (insn)) == USE 5147 || GET_CODE (PATTERN (insn)) == CLOBBER) 5148 /* Don't care about USE and CLOBBER "insns"---those are used to 5149 indicate to the optimizer that it shouldn't get rid of 5150 certain operations. */ 5151 break; 5152 5153 pat = PATTERN (insn); 5154 5155 /* Ug. Hack hacks hacked elsewhere. */ 5156 switch (recog_memoized (insn)) 5157 { 5158 /* We play dependency tricks with the epilogue in order 5159 to get proper schedules. Undo this for dv analysis. */ 5160 case CODE_FOR_epilogue_deallocate_stack: 5161 case CODE_FOR_prologue_allocate_stack: 5162 pat = XVECEXP (pat, 0, 0); 5163 break; 5164 5165 /* The pattern we use for br.cloop confuses the code above. 5166 The second element of the vector is representative. */ 5167 case CODE_FOR_doloop_end_internal: 5168 pat = XVECEXP (pat, 0, 1); 5169 break; 5170 5171 /* Doesn't generate code. */ 5172 case CODE_FOR_pred_rel_mutex: 5173 case CODE_FOR_prologue_use: 5174 return 0; 5175 5176 default: 5177 break; 5178 } 5179 5180 memset (rws_insn, 0, sizeof (rws_insn)); 5181 need_barrier = rtx_needs_barrier (pat, flags, 0); 5182 5183 /* Check to see if the previous instruction was a volatile 5184 asm. */ 5185 if (! need_barrier) 5186 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 5187 break; 5188 5189 default: 5190 abort (); 5191 } 5192 5193 if (first_instruction) 5194 { 5195 need_barrier = 0; 5196 first_instruction = 0; 5197 } 5198 5199 return need_barrier; 5200} 5201 5202/* Like group_barrier_needed_p, but do not clobber the current state. */ 5203 5204static int 5205safe_group_barrier_needed_p (insn) 5206 rtx insn; 5207{ 5208 struct reg_write_state rws_saved[NUM_REGS]; 5209 int saved_first_instruction; 5210 int t; 5211 5212 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); 5213 saved_first_instruction = first_instruction; 5214 5215 t = group_barrier_needed_p (insn); 5216 5217 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); 5218 first_instruction = saved_first_instruction; 5219 5220 return t; 5221} 5222 5223/* INSNS is an chain of instructions. Scan the chain, and insert stop bits 5224 as necessary to eliminate dependendencies. This function assumes that 5225 a final instruction scheduling pass has been run which has already 5226 inserted most of the necessary stop bits. This function only inserts 5227 new ones at basic block boundaries, since these are invisible to the 5228 scheduler. */ 5229 5230static void 5231emit_insn_group_barriers (dump, insns) 5232 FILE *dump; 5233 rtx insns; 5234{ 5235 rtx insn; 5236 rtx last_label = 0; 5237 int insns_since_last_label = 0; 5238 5239 init_insn_group_barriers (); 5240 5241 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5242 { 5243 if (GET_CODE (insn) == CODE_LABEL) 5244 { 5245 if (insns_since_last_label) 5246 last_label = insn; 5247 insns_since_last_label = 0; 5248 } 5249 else if (GET_CODE (insn) == NOTE 5250 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 5251 { 5252 if (insns_since_last_label) 5253 last_label = insn; 5254 insns_since_last_label = 0; 5255 } 5256 else if (GET_CODE (insn) == INSN 5257 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 5258 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 5259 { 5260 init_insn_group_barriers (); 5261 last_label = 0; 5262 } 5263 else if (INSN_P (insn)) 5264 { 5265 insns_since_last_label = 1; 5266 5267 if (group_barrier_needed_p (insn)) 5268 { 5269 if (last_label) 5270 { 5271 if (dump) 5272 fprintf (dump, "Emitting stop before label %d\n", 5273 INSN_UID (last_label)); 5274 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 5275 insn = last_label; 5276 5277 init_insn_group_barriers (); 5278 last_label = 0; 5279 } 5280 } 5281 } 5282 } 5283} 5284 5285/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 5286 This function has to emit all necessary group barriers. */ 5287 5288static void 5289emit_all_insn_group_barriers (dump, insns) 5290 FILE *dump ATTRIBUTE_UNUSED; 5291 rtx insns; 5292{ 5293 rtx insn; 5294 5295 init_insn_group_barriers (); 5296 5297 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5298 { 5299 if (GET_CODE (insn) == BARRIER) 5300 { 5301 rtx last = prev_active_insn (insn); 5302 5303 if (! last) 5304 continue; 5305 if (GET_CODE (last) == JUMP_INSN 5306 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) 5307 last = prev_active_insn (last); 5308 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 5309 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 5310 5311 init_insn_group_barriers (); 5312 } 5313 else if (INSN_P (insn)) 5314 { 5315 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 5316 init_insn_group_barriers (); 5317 else if (group_barrier_needed_p (insn)) 5318 { 5319 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5320 init_insn_group_barriers (); 5321 group_barrier_needed_p (insn); 5322 } 5323 } 5324 } 5325} 5326 5327static int errata_find_address_regs PARAMS ((rtx *, void *)); 5328static void errata_emit_nops PARAMS ((rtx)); 5329static void fixup_errata PARAMS ((void)); 5330 5331/* This structure is used to track some details about the previous insns 5332 groups so we can determine if it may be necessary to insert NOPs to 5333 workaround hardware errata. */ 5334static struct group 5335{ 5336 HARD_REG_SET p_reg_set; 5337 HARD_REG_SET gr_reg_conditionally_set; 5338} last_group[2]; 5339 5340/* Index into the last_group array. */ 5341static int group_idx; 5342 5343/* Called through for_each_rtx; determines if a hard register that was 5344 conditionally set in the previous group is used as an address register. 5345 It ensures that for_each_rtx returns 1 in that case. */ 5346static int 5347errata_find_address_regs (xp, data) 5348 rtx *xp; 5349 void *data ATTRIBUTE_UNUSED; 5350{ 5351 rtx x = *xp; 5352 if (GET_CODE (x) != MEM) 5353 return 0; 5354 x = XEXP (x, 0); 5355 if (GET_CODE (x) == POST_MODIFY) 5356 x = XEXP (x, 0); 5357 if (GET_CODE (x) == REG) 5358 { 5359 struct group *prev_group = last_group + (group_idx ^ 1); 5360 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set, 5361 REGNO (x))) 5362 return 1; 5363 return -1; 5364 } 5365 return 0; 5366} 5367 5368/* Called for each insn; this function keeps track of the state in 5369 last_group and emits additional NOPs if necessary to work around 5370 an Itanium A/B step erratum. */ 5371static void 5372errata_emit_nops (insn) 5373 rtx insn; 5374{ 5375 struct group *this_group = last_group + group_idx; 5376 struct group *prev_group = last_group + (group_idx ^ 1); 5377 rtx pat = PATTERN (insn); 5378 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0; 5379 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat; 5380 enum attr_type type; 5381 rtx set = real_pat; 5382 5383 if (GET_CODE (real_pat) == USE 5384 || GET_CODE (real_pat) == CLOBBER 5385 || GET_CODE (real_pat) == ASM_INPUT 5386 || GET_CODE (real_pat) == ADDR_VEC 5387 || GET_CODE (real_pat) == ADDR_DIFF_VEC 5388 || asm_noperands (PATTERN (insn)) >= 0) 5389 return; 5390 5391 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate 5392 parts of it. */ 5393 5394 if (GET_CODE (set) == PARALLEL) 5395 { 5396 int i; 5397 set = XVECEXP (real_pat, 0, 0); 5398 for (i = 1; i < XVECLEN (real_pat, 0); i++) 5399 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE 5400 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER) 5401 { 5402 set = 0; 5403 break; 5404 } 5405 } 5406 5407 if (set && GET_CODE (set) != SET) 5408 set = 0; 5409 5410 type = get_attr_type (insn); 5411 5412 if (type == TYPE_F 5413 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set)))) 5414 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set))); 5415 5416 if ((type == TYPE_M || type == TYPE_A) && cond && set 5417 && REG_P (SET_DEST (set)) 5418 && GET_CODE (SET_SRC (set)) != PLUS 5419 && GET_CODE (SET_SRC (set)) != MINUS 5420 && (GET_CODE (SET_SRC (set)) != ASHIFT 5421 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode)) 5422 && (GET_CODE (SET_SRC (set)) != MEM 5423 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY) 5424 && GENERAL_REGNO_P (REGNO (SET_DEST (set)))) 5425 { 5426 if (GET_RTX_CLASS (GET_CODE (cond)) != '<' 5427 || ! REG_P (XEXP (cond, 0))) 5428 abort (); 5429 5430 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0)))) 5431 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set))); 5432 } 5433 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL)) 5434 { 5435 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5436 emit_insn_before (gen_nop (), insn); 5437 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5438 group_idx = 0; 5439 memset (last_group, 0, sizeof last_group); 5440 } 5441} 5442 5443/* Emit extra nops if they are required to work around hardware errata. */ 5444 5445static void 5446fixup_errata () 5447{ 5448 rtx insn; 5449 5450 if (! TARGET_B_STEP) 5451 return; 5452 5453 group_idx = 0; 5454 memset (last_group, 0, sizeof last_group); 5455 5456 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 5457 { 5458 if (!INSN_P (insn)) 5459 continue; 5460 5461 if (ia64_safe_type (insn) == TYPE_S) 5462 { 5463 group_idx ^= 1; 5464 memset (last_group + group_idx, 0, sizeof last_group[group_idx]); 5465 } 5466 else 5467 errata_emit_nops (insn); 5468 } 5469} 5470 5471/* Instruction scheduling support. */ 5472/* Describe one bundle. */ 5473 5474struct bundle 5475{ 5476 /* Zero if there's no possibility of a stop in this bundle other than 5477 at the end, otherwise the position of the optional stop bit. */ 5478 int possible_stop; 5479 /* The types of the three slots. */ 5480 enum attr_type t[3]; 5481 /* The pseudo op to be emitted into the assembler output. */ 5482 const char *name; 5483}; 5484 5485#define NR_BUNDLES 10 5486 5487/* A list of all available bundles. */ 5488 5489static const struct bundle bundle[NR_BUNDLES] = 5490{ 5491 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, 5492 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, 5493 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, 5494 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, 5495#if NR_BUNDLES == 10 5496 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, 5497 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, 5498#endif 5499 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, 5500 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, 5501 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, 5502 /* .mfi needs to occur earlier than .mlx, so that we only generate it if 5503 it matches an L type insn. Otherwise we'll try to generate L type 5504 nops. */ 5505 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } 5506}; 5507 5508/* Describe a packet of instructions. Packets consist of two bundles that 5509 are visible to the hardware in one scheduling window. */ 5510 5511struct ia64_packet 5512{ 5513 const struct bundle *t1, *t2; 5514 /* Precomputed value of the first split issue in this packet if a cycle 5515 starts at its beginning. */ 5516 int first_split; 5517 /* For convenience, the insn types are replicated here so we don't have 5518 to go through T1 and T2 all the time. */ 5519 enum attr_type t[6]; 5520}; 5521 5522/* An array containing all possible packets. */ 5523#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) 5524static struct ia64_packet packets[NR_PACKETS]; 5525 5526/* Map attr_type to a string with the name. */ 5527 5528static const char *const type_names[] = 5529{ 5530 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" 5531}; 5532 5533/* Nonzero if we should insert stop bits into the schedule. */ 5534int ia64_final_schedule = 0; 5535 5536static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); 5537static rtx ia64_single_set PARAMS ((rtx)); 5538static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); 5539static void ia64_emit_insn_before PARAMS ((rtx, rtx)); 5540static void maybe_rotate PARAMS ((FILE *)); 5541static void finish_last_head PARAMS ((FILE *, int)); 5542static void rotate_one_bundle PARAMS ((FILE *)); 5543static void rotate_two_bundles PARAMS ((FILE *)); 5544static void nop_cycles_until PARAMS ((int, FILE *)); 5545static void cycle_end_fill_slots PARAMS ((FILE *)); 5546static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); 5547static int get_split PARAMS ((const struct ia64_packet *, int)); 5548static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, 5549 const struct ia64_packet *, int)); 5550static void find_best_packet PARAMS ((int *, const struct ia64_packet **, 5551 rtx *, enum attr_type *, int)); 5552static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); 5553static void dump_current_packet PARAMS ((FILE *)); 5554static void schedule_stop PARAMS ((FILE *)); 5555static rtx gen_nop_type PARAMS ((enum attr_type)); 5556static void ia64_emit_nops PARAMS ((void)); 5557 5558/* Map a bundle number to its pseudo-op. */ 5559 5560const char * 5561get_bundle_name (b) 5562 int b; 5563{ 5564 return bundle[b].name; 5565} 5566 5567/* Compute the slot which will cause a split issue in packet P if the 5568 current cycle begins at slot BEGIN. */ 5569 5570static int 5571itanium_split_issue (p, begin) 5572 const struct ia64_packet *p; 5573 int begin; 5574{ 5575 int type_count[TYPE_S]; 5576 int i; 5577 int split = 6; 5578 5579 if (begin < 3) 5580 { 5581 /* Always split before and after MMF. */ 5582 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) 5583 return 3; 5584 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) 5585 return 3; 5586 /* Always split after MBB and BBB. */ 5587 if (p->t[1] == TYPE_B) 5588 return 3; 5589 /* Split after first bundle in MIB BBB combination. */ 5590 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) 5591 return 3; 5592 } 5593 5594 memset (type_count, 0, sizeof type_count); 5595 for (i = begin; i < split; i++) 5596 { 5597 enum attr_type t0 = p->t[i]; 5598 /* An MLX bundle reserves the same units as an MFI bundle. */ 5599 enum attr_type t = (t0 == TYPE_L ? TYPE_F 5600 : t0 == TYPE_X ? TYPE_I 5601 : t0); 5602 5603 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and 5604 2 integer per cycle. */ 5605 int max = (t == TYPE_B ? 3 : 2); 5606 if (type_count[t] == max) 5607 return i; 5608 5609 type_count[t]++; 5610 } 5611 return split; 5612} 5613 5614/* Return the maximum number of instructions a cpu can issue. */ 5615 5616static int 5617ia64_issue_rate () 5618{ 5619 return 6; 5620} 5621 5622/* Helper function - like single_set, but look inside COND_EXEC. */ 5623 5624static rtx 5625ia64_single_set (insn) 5626 rtx insn; 5627{ 5628 rtx x = PATTERN (insn), ret; 5629 if (GET_CODE (x) == COND_EXEC) 5630 x = COND_EXEC_CODE (x); 5631 if (GET_CODE (x) == SET) 5632 return x; 5633 5634 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 5635 Although they are not classical single set, the second set is there just 5636 to protect it from moving past FP-relative stack accesses. */ 5637 switch (recog_memoized (insn)) 5638 { 5639 case CODE_FOR_prologue_allocate_stack: 5640 case CODE_FOR_epilogue_deallocate_stack: 5641 ret = XVECEXP (x, 0, 0); 5642 break; 5643 5644 default: 5645 ret = single_set_2 (insn, x); 5646 break; 5647 } 5648 5649 return ret; 5650} 5651 5652/* Adjust the cost of a scheduling dependency. Return the new cost of 5653 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 5654 5655static int 5656ia64_adjust_cost (insn, link, dep_insn, cost) 5657 rtx insn, link, dep_insn; 5658 int cost; 5659{ 5660 enum attr_type dep_type; 5661 enum attr_itanium_class dep_class; 5662 enum attr_itanium_class insn_class; 5663 rtx dep_set, set, src, addr; 5664 5665 if (GET_CODE (PATTERN (insn)) == CLOBBER 5666 || GET_CODE (PATTERN (insn)) == USE 5667 || GET_CODE (PATTERN (dep_insn)) == CLOBBER 5668 || GET_CODE (PATTERN (dep_insn)) == USE 5669 /* @@@ Not accurate for indirect calls. */ 5670 || GET_CODE (insn) == CALL_INSN 5671 || ia64_safe_type (insn) == TYPE_S) 5672 return 0; 5673 5674 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT 5675 || REG_NOTE_KIND (link) == REG_DEP_ANTI) 5676 return 0; 5677 5678 dep_type = ia64_safe_type (dep_insn); 5679 dep_class = ia64_safe_itanium_class (dep_insn); 5680 insn_class = ia64_safe_itanium_class (insn); 5681 5682 /* Compares that feed a conditional branch can execute in the same 5683 cycle. */ 5684 dep_set = ia64_single_set (dep_insn); 5685 set = ia64_single_set (insn); 5686 5687 if (dep_type != TYPE_F 5688 && dep_set 5689 && GET_CODE (SET_DEST (dep_set)) == REG 5690 && PR_REG (REGNO (SET_DEST (dep_set))) 5691 && GET_CODE (insn) == JUMP_INSN) 5692 return 0; 5693 5694 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) 5695 { 5696 /* ??? Can't find any information in the documenation about whether 5697 a sequence 5698 st [rx] = ra 5699 ld rb = [ry] 5700 splits issue. Assume it doesn't. */ 5701 return 0; 5702 } 5703 5704 src = set ? SET_SRC (set) : 0; 5705 addr = 0; 5706 if (set) 5707 { 5708 if (GET_CODE (SET_DEST (set)) == MEM) 5709 addr = XEXP (SET_DEST (set), 0); 5710 else if (GET_CODE (SET_DEST (set)) == SUBREG 5711 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) 5712 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); 5713 else 5714 { 5715 addr = src; 5716 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) 5717 addr = XVECEXP (addr, 0, 0); 5718 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) 5719 addr = XEXP (addr, 0); 5720 5721 /* Note that LO_SUM is used for GOT loads. */ 5722 if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM) 5723 addr = XEXP (addr, 0); 5724 else 5725 addr = 0; 5726 } 5727 } 5728 5729 if (addr && GET_CODE (addr) == POST_MODIFY) 5730 addr = XEXP (addr, 0); 5731 5732 set = ia64_single_set (dep_insn); 5733 5734 if ((dep_class == ITANIUM_CLASS_IALU 5735 || dep_class == ITANIUM_CLASS_ILOG 5736 || dep_class == ITANIUM_CLASS_LD) 5737 && (insn_class == ITANIUM_CLASS_LD 5738 || insn_class == ITANIUM_CLASS_ST)) 5739 { 5740 if (! addr || ! set) 5741 abort (); 5742 /* This isn't completely correct - an IALU that feeds an address has 5743 a latency of 1 cycle if it's issued in an M slot, but 2 cycles 5744 otherwise. Unfortunately there's no good way to describe this. */ 5745 if (reg_overlap_mentioned_p (SET_DEST (set), addr)) 5746 return cost + 1; 5747 } 5748 5749 if ((dep_class == ITANIUM_CLASS_IALU 5750 || dep_class == ITANIUM_CLASS_ILOG 5751 || dep_class == ITANIUM_CLASS_LD) 5752 && (insn_class == ITANIUM_CLASS_MMMUL 5753 || insn_class == ITANIUM_CLASS_MMSHF 5754 || insn_class == ITANIUM_CLASS_MMSHFI)) 5755 return 3; 5756 5757 if (dep_class == ITANIUM_CLASS_FMAC 5758 && (insn_class == ITANIUM_CLASS_FMISC 5759 || insn_class == ITANIUM_CLASS_FCVTFX 5760 || insn_class == ITANIUM_CLASS_XMPY)) 5761 return 7; 5762 5763 if ((dep_class == ITANIUM_CLASS_FMAC 5764 || dep_class == ITANIUM_CLASS_FMISC 5765 || dep_class == ITANIUM_CLASS_FCVTFX 5766 || dep_class == ITANIUM_CLASS_XMPY) 5767 && insn_class == ITANIUM_CLASS_STF) 5768 return 8; 5769 5770 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, 5771 but HP engineers say any non-MM operation. */ 5772 if ((dep_class == ITANIUM_CLASS_MMMUL 5773 || dep_class == ITANIUM_CLASS_MMSHF 5774 || dep_class == ITANIUM_CLASS_MMSHFI) 5775 && insn_class != ITANIUM_CLASS_MMMUL 5776 && insn_class != ITANIUM_CLASS_MMSHF 5777 && insn_class != ITANIUM_CLASS_MMSHFI) 5778 return 4; 5779 5780 return cost; 5781} 5782 5783/* Describe the current state of the Itanium pipeline. */ 5784static struct 5785{ 5786 /* The first slot that is used in the current cycle. */ 5787 int first_slot; 5788 /* The next slot to fill. */ 5789 int cur; 5790 /* The packet we have selected for the current issue window. */ 5791 const struct ia64_packet *packet; 5792 /* The position of the split issue that occurs due to issue width 5793 limitations (6 if there's no split issue). */ 5794 int split; 5795 /* Record data about the insns scheduled so far in the same issue 5796 window. The elements up to but not including FIRST_SLOT belong 5797 to the previous cycle, the ones starting with FIRST_SLOT belong 5798 to the current cycle. */ 5799 enum attr_type types[6]; 5800 rtx insns[6]; 5801 int stopbit[6]; 5802 /* Nonzero if we decided to schedule a stop bit. */ 5803 int last_was_stop; 5804} sched_data; 5805 5806/* Temporary arrays; they have enough elements to hold all insns that 5807 can be ready at the same time while scheduling of the current block. 5808 SCHED_READY can hold ready insns, SCHED_TYPES their types. */ 5809static rtx *sched_ready; 5810static enum attr_type *sched_types; 5811 5812/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT 5813 of packet P. */ 5814 5815static int 5816insn_matches_slot (p, itype, slot, insn) 5817 const struct ia64_packet *p; 5818 enum attr_type itype; 5819 int slot; 5820 rtx insn; 5821{ 5822 enum attr_itanium_requires_unit0 u0; 5823 enum attr_type stype = p->t[slot]; 5824 5825 if (insn) 5826 { 5827 u0 = ia64_safe_itanium_requires_unit0 (insn); 5828 if (u0 == ITANIUM_REQUIRES_UNIT0_YES) 5829 { 5830 int i; 5831 for (i = sched_data.first_slot; i < slot; i++) 5832 if (p->t[i] == stype 5833 || (stype == TYPE_F && p->t[i] == TYPE_L) 5834 || (stype == TYPE_I && p->t[i] == TYPE_X)) 5835 return 0; 5836 } 5837 if (GET_CODE (insn) == CALL_INSN) 5838 { 5839 /* Reject calls in multiway branch packets. We want to limit 5840 the number of multiway branches we generate (since the branch 5841 predictor is limited), and this seems to work fairly well. 5842 (If we didn't do this, we'd have to add another test here to 5843 force calls into the third slot of the bundle.) */ 5844 if (slot < 3) 5845 { 5846 if (p->t[1] == TYPE_B) 5847 return 0; 5848 } 5849 else 5850 { 5851 if (p->t[4] == TYPE_B) 5852 return 0; 5853 } 5854 } 5855 } 5856 5857 if (itype == stype) 5858 return 1; 5859 if (itype == TYPE_A) 5860 return stype == TYPE_M || stype == TYPE_I; 5861 return 0; 5862} 5863 5864/* Like emit_insn_before, but skip cycle_display notes. 5865 ??? When cycle display notes are implemented, update this. */ 5866 5867static void 5868ia64_emit_insn_before (insn, before) 5869 rtx insn, before; 5870{ 5871 emit_insn_before (insn, before); 5872} 5873 5874/* When rotating a bundle out of the issue window, insert a bundle selector 5875 insn in front of it. DUMP is the scheduling dump file or NULL. START 5876 is either 0 or 3, depending on whether we want to emit a bundle selector 5877 for the first bundle or the second bundle in the current issue window. 5878 5879 The selector insns are emitted this late because the selected packet can 5880 be changed until parts of it get rotated out. */ 5881 5882static void 5883finish_last_head (dump, start) 5884 FILE *dump; 5885 int start; 5886{ 5887 const struct ia64_packet *p = sched_data.packet; 5888 const struct bundle *b = start == 0 ? p->t1 : p->t2; 5889 int bundle_type = b - bundle; 5890 rtx insn; 5891 int i; 5892 5893 if (! ia64_final_schedule) 5894 return; 5895 5896 for (i = start; sched_data.insns[i] == 0; i++) 5897 if (i == start + 3) 5898 abort (); 5899 insn = sched_data.insns[i]; 5900 5901 if (dump) 5902 fprintf (dump, "// Emitting template before %d: %s\n", 5903 INSN_UID (insn), b->name); 5904 5905 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); 5906} 5907 5908/* We can't schedule more insns this cycle. Fix up the scheduling state 5909 and advance FIRST_SLOT and CUR. 5910 We have to distribute the insns that are currently found between 5911 FIRST_SLOT and CUR into the slots of the packet we have selected. So 5912 far, they are stored successively in the fields starting at FIRST_SLOT; 5913 now they must be moved to the correct slots. 5914 DUMP is the current scheduling dump file, or NULL. */ 5915 5916static void 5917cycle_end_fill_slots (dump) 5918 FILE *dump; 5919{ 5920 const struct ia64_packet *packet = sched_data.packet; 5921 int slot, i; 5922 enum attr_type tmp_types[6]; 5923 rtx tmp_insns[6]; 5924 5925 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); 5926 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); 5927 5928 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) 5929 { 5930 enum attr_type t = tmp_types[i]; 5931 if (t != ia64_safe_type (tmp_insns[i])) 5932 abort (); 5933 while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) 5934 { 5935 if (slot > sched_data.split) 5936 abort (); 5937 if (dump) 5938 fprintf (dump, "// Packet needs %s, have %s\n", 5939 type_names[packet->t[slot]], type_names[t]); 5940 sched_data.types[slot] = packet->t[slot]; 5941 sched_data.insns[slot] = 0; 5942 sched_data.stopbit[slot] = 0; 5943 5944 /* ??? TYPE_L instructions always fill up two slots, but we don't 5945 support TYPE_L nops. */ 5946 if (packet->t[slot] == TYPE_L) 5947 abort (); 5948 5949 slot++; 5950 } 5951 5952 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the 5953 actual slot type later. */ 5954 sched_data.types[slot] = packet->t[slot]; 5955 sched_data.insns[slot] = tmp_insns[i]; 5956 sched_data.stopbit[slot] = 0; 5957 slot++; 5958 5959 /* TYPE_L instructions always fill up two slots. */ 5960 if (t == TYPE_L) 5961 { 5962 sched_data.types[slot] = packet->t[slot]; 5963 sched_data.insns[slot] = 0; 5964 sched_data.stopbit[slot] = 0; 5965 slot++; 5966 } 5967 } 5968 5969 /* This isn't right - there's no need to pad out until the forced split; 5970 the CPU will automatically split if an insn isn't ready. */ 5971#if 0 5972 while (slot < sched_data.split) 5973 { 5974 sched_data.types[slot] = packet->t[slot]; 5975 sched_data.insns[slot] = 0; 5976 sched_data.stopbit[slot] = 0; 5977 slot++; 5978 } 5979#endif 5980 5981 sched_data.first_slot = sched_data.cur = slot; 5982} 5983 5984/* Bundle rotations, as described in the Itanium optimization manual. 5985 We can rotate either one or both bundles out of the issue window. 5986 DUMP is the current scheduling dump file, or NULL. */ 5987 5988static void 5989rotate_one_bundle (dump) 5990 FILE *dump; 5991{ 5992 if (dump) 5993 fprintf (dump, "// Rotating one bundle.\n"); 5994 5995 finish_last_head (dump, 0); 5996 if (sched_data.cur > 3) 5997 { 5998 sched_data.cur -= 3; 5999 sched_data.first_slot -= 3; 6000 memmove (sched_data.types, 6001 sched_data.types + 3, 6002 sched_data.cur * sizeof *sched_data.types); 6003 memmove (sched_data.stopbit, 6004 sched_data.stopbit + 3, 6005 sched_data.cur * sizeof *sched_data.stopbit); 6006 memmove (sched_data.insns, 6007 sched_data.insns + 3, 6008 sched_data.cur * sizeof *sched_data.insns); 6009 sched_data.packet 6010 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES]; 6011 } 6012 else 6013 { 6014 sched_data.cur = 0; 6015 sched_data.first_slot = 0; 6016 } 6017} 6018 6019static void 6020rotate_two_bundles (dump) 6021 FILE *dump; 6022{ 6023 if (dump) 6024 fprintf (dump, "// Rotating two bundles.\n"); 6025 6026 if (sched_data.cur == 0) 6027 return; 6028 6029 finish_last_head (dump, 0); 6030 if (sched_data.cur > 3) 6031 finish_last_head (dump, 3); 6032 sched_data.cur = 0; 6033 sched_data.first_slot = 0; 6034} 6035 6036/* We're beginning a new block. Initialize data structures as necessary. */ 6037 6038static void 6039ia64_sched_init (dump, sched_verbose, max_ready) 6040 FILE *dump ATTRIBUTE_UNUSED; 6041 int sched_verbose ATTRIBUTE_UNUSED; 6042 int max_ready; 6043{ 6044 static int initialized = 0; 6045 6046 if (! initialized) 6047 { 6048 int b1, b2, i; 6049 6050 initialized = 1; 6051 6052 for (i = b1 = 0; b1 < NR_BUNDLES; b1++) 6053 { 6054 const struct bundle *t1 = bundle + b1; 6055 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) 6056 { 6057 const struct bundle *t2 = bundle + b2; 6058 6059 packets[i].t1 = t1; 6060 packets[i].t2 = t2; 6061 } 6062 } 6063 for (i = 0; i < NR_PACKETS; i++) 6064 { 6065 int j; 6066 for (j = 0; j < 3; j++) 6067 packets[i].t[j] = packets[i].t1->t[j]; 6068 for (j = 0; j < 3; j++) 6069 packets[i].t[j + 3] = packets[i].t2->t[j]; 6070 packets[i].first_split = itanium_split_issue (packets + i, 0); 6071 } 6072 6073 } 6074 6075 init_insn_group_barriers (); 6076 6077 memset (&sched_data, 0, sizeof sched_data); 6078 sched_types = (enum attr_type *) xmalloc (max_ready 6079 * sizeof (enum attr_type)); 6080 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); 6081} 6082 6083/* See if the packet P can match the insns we have already scheduled. Return 6084 nonzero if so. In *PSLOT, we store the first slot that is available for 6085 more instructions if we choose this packet. 6086 SPLIT holds the last slot we can use, there's a split issue after it so 6087 scheduling beyond it would cause us to use more than one cycle. */ 6088 6089static int 6090packet_matches_p (p, split, pslot) 6091 const struct ia64_packet *p; 6092 int split; 6093 int *pslot; 6094{ 6095 int filled = sched_data.cur; 6096 int first = sched_data.first_slot; 6097 int i, slot; 6098 6099 /* First, check if the first of the two bundles must be a specific one (due 6100 to stop bits). */ 6101 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) 6102 return 0; 6103 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) 6104 return 0; 6105 6106 for (i = 0; i < first; i++) 6107 if (! insn_matches_slot (p, sched_data.types[i], i, 6108 sched_data.insns[i])) 6109 return 0; 6110 for (i = slot = first; i < filled; i++) 6111 { 6112 while (slot < split) 6113 { 6114 if (insn_matches_slot (p, sched_data.types[i], slot, 6115 sched_data.insns[i])) 6116 break; 6117 slot++; 6118 } 6119 if (slot == split) 6120 return 0; 6121 slot++; 6122 } 6123 6124 if (pslot) 6125 *pslot = slot; 6126 return 1; 6127} 6128 6129/* A frontend for itanium_split_issue. For a packet P and a slot 6130 number FIRST that describes the start of the current clock cycle, 6131 return the slot number of the first split issue. This function 6132 uses the cached number found in P if possible. */ 6133 6134static int 6135get_split (p, first) 6136 const struct ia64_packet *p; 6137 int first; 6138{ 6139 if (first == 0) 6140 return p->first_split; 6141 return itanium_split_issue (p, first); 6142} 6143 6144/* Given N_READY insns in the array READY, whose types are found in the 6145 corresponding array TYPES, return the insn that is best suited to be 6146 scheduled in slot SLOT of packet P. */ 6147 6148static int 6149find_best_insn (ready, types, n_ready, p, slot) 6150 rtx *ready; 6151 enum attr_type *types; 6152 int n_ready; 6153 const struct ia64_packet *p; 6154 int slot; 6155{ 6156 int best = -1; 6157 int best_pri = 0; 6158 while (n_ready-- > 0) 6159 { 6160 rtx insn = ready[n_ready]; 6161 if (! insn) 6162 continue; 6163 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) 6164 break; 6165 /* If we have equally good insns, one of which has a stricter 6166 slot requirement, prefer the one with the stricter requirement. */ 6167 if (best >= 0 && types[n_ready] == TYPE_A) 6168 continue; 6169 if (insn_matches_slot (p, types[n_ready], slot, insn)) 6170 { 6171 best = n_ready; 6172 best_pri = INSN_PRIORITY (ready[best]); 6173 6174 /* If there's no way we could get a stricter requirement, stop 6175 looking now. */ 6176 if (types[n_ready] != TYPE_A 6177 && ia64_safe_itanium_requires_unit0 (ready[n_ready])) 6178 break; 6179 break; 6180 } 6181 } 6182 return best; 6183} 6184 6185/* Select the best packet to use given the current scheduler state and the 6186 current ready list. 6187 READY is an array holding N_READY ready insns; TYPES is a corresponding 6188 array that holds their types. Store the best packet in *PPACKET and the 6189 number of insns that can be scheduled in the current cycle in *PBEST. */ 6190 6191static void 6192find_best_packet (pbest, ppacket, ready, types, n_ready) 6193 int *pbest; 6194 const struct ia64_packet **ppacket; 6195 rtx *ready; 6196 enum attr_type *types; 6197 int n_ready; 6198{ 6199 int first = sched_data.first_slot; 6200 int best = 0; 6201 int lowest_end = 6; 6202 const struct ia64_packet *best_packet = NULL; 6203 int i; 6204 6205 for (i = 0; i < NR_PACKETS; i++) 6206 { 6207 const struct ia64_packet *p = packets + i; 6208 int slot; 6209 int split = get_split (p, first); 6210 int win = 0; 6211 int first_slot, last_slot; 6212 int b_nops = 0; 6213 6214 if (! packet_matches_p (p, split, &first_slot)) 6215 continue; 6216 6217 memcpy (sched_ready, ready, n_ready * sizeof (rtx)); 6218 6219 win = 0; 6220 last_slot = 6; 6221 for (slot = first_slot; slot < split; slot++) 6222 { 6223 int insn_nr; 6224 6225 /* Disallow a degenerate case where the first bundle doesn't 6226 contain anything but NOPs! */ 6227 if (first_slot == 0 && win == 0 && slot == 3) 6228 { 6229 win = -1; 6230 break; 6231 } 6232 6233 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); 6234 if (insn_nr >= 0) 6235 { 6236 sched_ready[insn_nr] = 0; 6237 last_slot = slot; 6238 win++; 6239 } 6240 else if (p->t[slot] == TYPE_B) 6241 b_nops++; 6242 } 6243 /* We must disallow MBB/BBB packets if any of their B slots would be 6244 filled with nops. */ 6245 if (last_slot < 3) 6246 { 6247 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) 6248 win = -1; 6249 } 6250 else 6251 { 6252 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) 6253 win = -1; 6254 } 6255 6256 if (win > best 6257 || (win == best && last_slot < lowest_end)) 6258 { 6259 best = win; 6260 lowest_end = last_slot; 6261 best_packet = p; 6262 } 6263 } 6264 *pbest = best; 6265 *ppacket = best_packet; 6266} 6267 6268/* Reorder the ready list so that the insns that can be issued in this cycle 6269 are found in the correct order at the end of the list. 6270 DUMP is the scheduling dump file, or NULL. READY points to the start, 6271 E_READY to the end of the ready list. MAY_FAIL determines what should be 6272 done if no insns can be scheduled in this cycle: if it is zero, we abort, 6273 otherwise we return 0. 6274 Return 1 if any insns can be scheduled in this cycle. */ 6275 6276static int 6277itanium_reorder (dump, ready, e_ready, may_fail) 6278 FILE *dump; 6279 rtx *ready; 6280 rtx *e_ready; 6281 int may_fail; 6282{ 6283 const struct ia64_packet *best_packet; 6284 int n_ready = e_ready - ready; 6285 int first = sched_data.first_slot; 6286 int i, best, best_split, filled; 6287 6288 for (i = 0; i < n_ready; i++) 6289 sched_types[i] = ia64_safe_type (ready[i]); 6290 6291 find_best_packet (&best, &best_packet, ready, sched_types, n_ready); 6292 6293 if (best == 0) 6294 { 6295 if (may_fail) 6296 return 0; 6297 abort (); 6298 } 6299 6300 if (dump) 6301 { 6302 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", 6303 best_packet->t1->name, 6304 best_packet->t2 ? best_packet->t2->name : NULL, best); 6305 } 6306 6307 best_split = itanium_split_issue (best_packet, first); 6308 packet_matches_p (best_packet, best_split, &filled); 6309 6310 for (i = filled; i < best_split; i++) 6311 { 6312 int insn_nr; 6313 6314 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); 6315 if (insn_nr >= 0) 6316 { 6317 rtx insn = ready[insn_nr]; 6318 memmove (ready + insn_nr, ready + insn_nr + 1, 6319 (n_ready - insn_nr - 1) * sizeof (rtx)); 6320 memmove (sched_types + insn_nr, sched_types + insn_nr + 1, 6321 (n_ready - insn_nr - 1) * sizeof (enum attr_type)); 6322 ready[--n_ready] = insn; 6323 } 6324 } 6325 6326 sched_data.packet = best_packet; 6327 sched_data.split = best_split; 6328 return 1; 6329} 6330 6331/* Dump information about the current scheduling state to file DUMP. */ 6332 6333static void 6334dump_current_packet (dump) 6335 FILE *dump; 6336{ 6337 int i; 6338 fprintf (dump, "// %d slots filled:", sched_data.cur); 6339 for (i = 0; i < sched_data.first_slot; i++) 6340 { 6341 rtx insn = sched_data.insns[i]; 6342 fprintf (dump, " %s", type_names[sched_data.types[i]]); 6343 if (insn) 6344 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); 6345 if (sched_data.stopbit[i]) 6346 fprintf (dump, " ;;"); 6347 } 6348 fprintf (dump, " :::"); 6349 for (i = sched_data.first_slot; i < sched_data.cur; i++) 6350 { 6351 rtx insn = sched_data.insns[i]; 6352 enum attr_type t = ia64_safe_type (insn); 6353 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); 6354 } 6355 fprintf (dump, "\n"); 6356} 6357 6358/* Schedule a stop bit. DUMP is the current scheduling dump file, or 6359 NULL. */ 6360 6361static void 6362schedule_stop (dump) 6363 FILE *dump; 6364{ 6365 const struct ia64_packet *best = sched_data.packet; 6366 int i; 6367 int best_stop = 6; 6368 6369 if (dump) 6370 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); 6371 6372 if (sched_data.cur == 0) 6373 { 6374 if (dump) 6375 fprintf (dump, "// At start of bundle, so nothing to do.\n"); 6376 6377 rotate_two_bundles (NULL); 6378 return; 6379 } 6380 6381 for (i = -1; i < NR_PACKETS; i++) 6382 { 6383 /* This is a slight hack to give the current packet the first chance. 6384 This is done to avoid e.g. switching from MIB to MBB bundles. */ 6385 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); 6386 int split = get_split (p, sched_data.first_slot); 6387 const struct bundle *compare; 6388 int next, stoppos; 6389 6390 if (! packet_matches_p (p, split, &next)) 6391 continue; 6392 6393 compare = next > 3 ? p->t2 : p->t1; 6394 6395 stoppos = 3; 6396 if (compare->possible_stop) 6397 stoppos = compare->possible_stop; 6398 if (next > 3) 6399 stoppos += 3; 6400 6401 if (stoppos < next || stoppos >= best_stop) 6402 { 6403 if (compare->possible_stop == 0) 6404 continue; 6405 stoppos = (next > 3 ? 6 : 3); 6406 } 6407 if (stoppos < next || stoppos >= best_stop) 6408 continue; 6409 6410 if (dump) 6411 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", 6412 best->t1->name, best->t2->name, p->t1->name, p->t2->name, 6413 stoppos); 6414 6415 best_stop = stoppos; 6416 best = p; 6417 } 6418 6419 sched_data.packet = best; 6420 cycle_end_fill_slots (dump); 6421 while (sched_data.cur < best_stop) 6422 { 6423 sched_data.types[sched_data.cur] = best->t[sched_data.cur]; 6424 sched_data.insns[sched_data.cur] = 0; 6425 sched_data.stopbit[sched_data.cur] = 0; 6426 sched_data.cur++; 6427 } 6428 sched_data.stopbit[sched_data.cur - 1] = 1; 6429 sched_data.first_slot = best_stop; 6430 6431 if (dump) 6432 dump_current_packet (dump); 6433} 6434 6435/* If necessary, perform one or two rotations on the scheduling state. 6436 This should only be called if we are starting a new cycle. */ 6437 6438static void 6439maybe_rotate (dump) 6440 FILE *dump; 6441{ 6442 cycle_end_fill_slots (dump); 6443 if (sched_data.cur == 6) 6444 rotate_two_bundles (dump); 6445 else if (sched_data.cur >= 3) 6446 rotate_one_bundle (dump); 6447 sched_data.first_slot = sched_data.cur; 6448} 6449 6450/* The clock cycle when ia64_sched_reorder was last called. */ 6451static int prev_cycle; 6452 6453/* The first insn scheduled in the previous cycle. This is the saved 6454 value of sched_data.first_slot. */ 6455static int prev_first; 6456 6457/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to 6458 pad out the delay between MM (shifts, etc.) and integer operations. */ 6459 6460static void 6461nop_cycles_until (clock_var, dump) 6462 int clock_var; 6463 FILE *dump; 6464{ 6465 int prev_clock = prev_cycle; 6466 int cycles_left = clock_var - prev_clock; 6467 bool did_stop = false; 6468 6469 /* Finish the previous cycle; pad it out with NOPs. */ 6470 if (sched_data.cur == 3) 6471 { 6472 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6473 did_stop = true; 6474 maybe_rotate (dump); 6475 } 6476 else if (sched_data.cur > 0) 6477 { 6478 int need_stop = 0; 6479 int split = itanium_split_issue (sched_data.packet, prev_first); 6480 6481 if (sched_data.cur < 3 && split > 3) 6482 { 6483 split = 3; 6484 need_stop = 1; 6485 } 6486 6487 if (split > sched_data.cur) 6488 { 6489 int i; 6490 for (i = sched_data.cur; i < split; i++) 6491 { 6492 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6493 sched_data.types[i] = sched_data.packet->t[i]; 6494 sched_data.insns[i] = t; 6495 sched_data.stopbit[i] = 0; 6496 } 6497 sched_data.cur = split; 6498 } 6499 6500 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 6501 && cycles_left > 1) 6502 { 6503 int i; 6504 for (i = sched_data.cur; i < 6; i++) 6505 { 6506 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6507 sched_data.types[i] = sched_data.packet->t[i]; 6508 sched_data.insns[i] = t; 6509 sched_data.stopbit[i] = 0; 6510 } 6511 sched_data.cur = 6; 6512 cycles_left--; 6513 need_stop = 1; 6514 } 6515 6516 if (need_stop || sched_data.cur == 6) 6517 { 6518 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6519 did_stop = true; 6520 } 6521 maybe_rotate (dump); 6522 } 6523 6524 cycles_left--; 6525 while (cycles_left > 0) 6526 { 6527 sched_emit_insn (gen_bundle_selector (GEN_INT (0))); 6528 sched_emit_insn (gen_nop_type (TYPE_M)); 6529 sched_emit_insn (gen_nop_type (TYPE_I)); 6530 if (cycles_left > 1) 6531 { 6532 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2))); 6533 cycles_left--; 6534 } 6535 sched_emit_insn (gen_nop_type (TYPE_I)); 6536 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6537 did_stop = true; 6538 cycles_left--; 6539 } 6540 6541 if (did_stop) 6542 init_insn_group_barriers (); 6543} 6544 6545/* We are about to being issuing insns for this clock cycle. 6546 Override the default sort algorithm to better slot instructions. */ 6547 6548static int 6549ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, 6550 reorder_type, clock_var) 6551 FILE *dump ATTRIBUTE_UNUSED; 6552 int sched_verbose ATTRIBUTE_UNUSED; 6553 rtx *ready; 6554 int *pn_ready; 6555 int reorder_type, clock_var; 6556{ 6557 int n_asms; 6558 int n_ready = *pn_ready; 6559 rtx *e_ready = ready + n_ready; 6560 rtx *insnp; 6561 6562 if (sched_verbose) 6563 { 6564 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); 6565 dump_current_packet (dump); 6566 } 6567 6568 /* Work around the pipeline flush that will occurr if the results of 6569 an MM instruction are accessed before the result is ready. Intel 6570 documentation says this only happens with IALU, ISHF, ILOG, LD, 6571 and ST consumers, but experimental evidence shows that *any* non-MM 6572 type instruction will incurr the flush. */ 6573 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) 6574 { 6575 for (insnp = ready; insnp < e_ready; insnp++) 6576 { 6577 rtx insn = *insnp, link; 6578 enum attr_itanium_class t = ia64_safe_itanium_class (insn); 6579 6580 if (t == ITANIUM_CLASS_MMMUL 6581 || t == ITANIUM_CLASS_MMSHF 6582 || t == ITANIUM_CLASS_MMSHFI) 6583 continue; 6584 6585 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) 6586 if (REG_NOTE_KIND (link) == 0) 6587 { 6588 rtx other = XEXP (link, 0); 6589 enum attr_itanium_class t0 = ia64_safe_itanium_class (other); 6590 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL) 6591 { 6592 nop_cycles_until (clock_var, sched_verbose ? dump : NULL); 6593 goto out; 6594 } 6595 } 6596 } 6597 } 6598 out: 6599 6600 prev_first = sched_data.first_slot; 6601 prev_cycle = clock_var; 6602 6603 if (reorder_type == 0) 6604 maybe_rotate (sched_verbose ? dump : NULL); 6605 6606 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 6607 n_asms = 0; 6608 for (insnp = ready; insnp < e_ready; insnp++) 6609 if (insnp < e_ready) 6610 { 6611 rtx insn = *insnp; 6612 enum attr_type t = ia64_safe_type (insn); 6613 if (t == TYPE_UNKNOWN) 6614 { 6615 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6616 || asm_noperands (PATTERN (insn)) >= 0) 6617 { 6618 rtx lowest = ready[n_asms]; 6619 ready[n_asms] = insn; 6620 *insnp = lowest; 6621 n_asms++; 6622 } 6623 else 6624 { 6625 rtx highest = ready[n_ready - 1]; 6626 ready[n_ready - 1] = insn; 6627 *insnp = highest; 6628 if (ia64_final_schedule && group_barrier_needed_p (insn)) 6629 { 6630 schedule_stop (sched_verbose ? dump : NULL); 6631 sched_data.last_was_stop = 1; 6632 maybe_rotate (sched_verbose ? dump : NULL); 6633 } 6634 6635 return 1; 6636 } 6637 } 6638 } 6639 if (n_asms < n_ready) 6640 { 6641 /* Some normal insns to process. Skip the asms. */ 6642 ready += n_asms; 6643 n_ready -= n_asms; 6644 } 6645 else if (n_ready > 0) 6646 { 6647 /* Only asm insns left. */ 6648 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) 6649 { 6650 schedule_stop (sched_verbose ? dump : NULL); 6651 sched_data.last_was_stop = 1; 6652 maybe_rotate (sched_verbose ? dump : NULL); 6653 } 6654 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6655 return 1; 6656 } 6657 6658 if (ia64_final_schedule) 6659 { 6660 int nr_need_stop = 0; 6661 6662 for (insnp = ready; insnp < e_ready; insnp++) 6663 if (safe_group_barrier_needed_p (*insnp)) 6664 nr_need_stop++; 6665 6666 /* Schedule a stop bit if 6667 - all insns require a stop bit, or 6668 - we are starting a new cycle and _any_ insns require a stop bit. 6669 The reason for the latter is that if our schedule is accurate, then 6670 the additional stop won't decrease performance at this point (since 6671 there's a split issue at this point anyway), but it gives us more 6672 freedom when scheduling the currently ready insns. */ 6673 if ((reorder_type == 0 && nr_need_stop) 6674 || (reorder_type == 1 && n_ready == nr_need_stop)) 6675 { 6676 schedule_stop (sched_verbose ? dump : NULL); 6677 sched_data.last_was_stop = 1; 6678 maybe_rotate (sched_verbose ? dump : NULL); 6679 if (reorder_type == 1) 6680 return 0; 6681 } 6682 else 6683 { 6684 int deleted = 0; 6685 insnp = e_ready; 6686 /* Move down everything that needs a stop bit, preserving relative 6687 order. */ 6688 while (insnp-- > ready + deleted) 6689 while (insnp >= ready + deleted) 6690 { 6691 rtx insn = *insnp; 6692 if (! safe_group_barrier_needed_p (insn)) 6693 break; 6694 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 6695 *ready = insn; 6696 deleted++; 6697 } 6698 n_ready -= deleted; 6699 ready += deleted; 6700 if (deleted != nr_need_stop) 6701 abort (); 6702 } 6703 } 6704 6705 return itanium_reorder (sched_verbose ? dump : NULL, 6706 ready, e_ready, reorder_type == 1); 6707} 6708 6709static int 6710ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) 6711 FILE *dump; 6712 int sched_verbose; 6713 rtx *ready; 6714 int *pn_ready; 6715 int clock_var; 6716{ 6717 return ia64_internal_sched_reorder (dump, sched_verbose, ready, 6718 pn_ready, 0, clock_var); 6719} 6720 6721/* Like ia64_sched_reorder, but called after issuing each insn. 6722 Override the default sort algorithm to better slot instructions. */ 6723 6724static int 6725ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) 6726 FILE *dump ATTRIBUTE_UNUSED; 6727 int sched_verbose ATTRIBUTE_UNUSED; 6728 rtx *ready; 6729 int *pn_ready; 6730 int clock_var; 6731{ 6732 if (sched_data.last_was_stop) 6733 return 0; 6734 6735 /* Detect one special case and try to optimize it. 6736 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, 6737 then we can get better code by transforming this to 1.MFB;; 2.MIx. */ 6738 if (sched_data.first_slot == 1 6739 && sched_data.stopbit[0] 6740 && ((sched_data.cur == 4 6741 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) 6742 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) 6743 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) 6744 || (sched_data.cur == 3 6745 && (sched_data.types[1] == TYPE_M 6746 || sched_data.types[1] == TYPE_A) 6747 && (sched_data.types[2] != TYPE_M 6748 && sched_data.types[2] != TYPE_I 6749 && sched_data.types[2] != TYPE_A)))) 6750 6751 { 6752 int i, best; 6753 rtx stop = sched_data.insns[1]; 6754 6755 /* Search backward for the stop bit that must be there. */ 6756 while (1) 6757 { 6758 int insn_code; 6759 6760 stop = PREV_INSN (stop); 6761 if (GET_CODE (stop) != INSN) 6762 abort (); 6763 insn_code = recog_memoized (stop); 6764 6765 /* Ignore .pred.rel.mutex. 6766 6767 ??? Update this to ignore cycle display notes too 6768 ??? once those are implemented */ 6769 if (insn_code == CODE_FOR_pred_rel_mutex 6770 || insn_code == CODE_FOR_prologue_use) 6771 continue; 6772 6773 if (insn_code == CODE_FOR_insn_group_barrier) 6774 break; 6775 abort (); 6776 } 6777 6778 /* Adjust the stop bit's slot selector. */ 6779 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) 6780 abort (); 6781 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); 6782 6783 sched_data.stopbit[0] = 0; 6784 sched_data.stopbit[2] = 1; 6785 6786 sched_data.types[5] = sched_data.types[3]; 6787 sched_data.types[4] = sched_data.types[2]; 6788 sched_data.types[3] = sched_data.types[1]; 6789 sched_data.insns[5] = sched_data.insns[3]; 6790 sched_data.insns[4] = sched_data.insns[2]; 6791 sched_data.insns[3] = sched_data.insns[1]; 6792 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; 6793 sched_data.cur += 2; 6794 sched_data.first_slot = 3; 6795 for (i = 0; i < NR_PACKETS; i++) 6796 { 6797 const struct ia64_packet *p = packets + i; 6798 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) 6799 { 6800 sched_data.packet = p; 6801 break; 6802 } 6803 } 6804 rotate_one_bundle (sched_verbose ? dump : NULL); 6805 6806 best = 6; 6807 for (i = 0; i < NR_PACKETS; i++) 6808 { 6809 const struct ia64_packet *p = packets + i; 6810 int split = get_split (p, sched_data.first_slot); 6811 int next; 6812 6813 /* Disallow multiway branches here. */ 6814 if (p->t[1] == TYPE_B) 6815 continue; 6816 6817 if (packet_matches_p (p, split, &next) && next < best) 6818 { 6819 best = next; 6820 sched_data.packet = p; 6821 sched_data.split = split; 6822 } 6823 } 6824 if (best == 6) 6825 abort (); 6826 } 6827 6828 if (*pn_ready > 0) 6829 { 6830 int more = ia64_internal_sched_reorder (dump, sched_verbose, 6831 ready, pn_ready, 1, 6832 clock_var); 6833 if (more) 6834 return more; 6835 /* Did we schedule a stop? If so, finish this cycle. */ 6836 if (sched_data.cur == sched_data.first_slot) 6837 return 0; 6838 } 6839 6840 if (sched_verbose) 6841 fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); 6842 6843 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6844 if (sched_verbose) 6845 dump_current_packet (dump); 6846 return 0; 6847} 6848 6849/* We are about to issue INSN. Return the number of insns left on the 6850 ready queue that can be issued this cycle. */ 6851 6852static int 6853ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) 6854 FILE *dump; 6855 int sched_verbose; 6856 rtx insn; 6857 int can_issue_more ATTRIBUTE_UNUSED; 6858{ 6859 enum attr_type t = ia64_safe_type (insn); 6860 6861 if (sched_data.last_was_stop) 6862 { 6863 int t = sched_data.first_slot; 6864 if (t == 0) 6865 t = 3; 6866 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); 6867 init_insn_group_barriers (); 6868 sched_data.last_was_stop = 0; 6869 } 6870 6871 if (t == TYPE_UNKNOWN) 6872 { 6873 if (sched_verbose) 6874 fprintf (dump, "// Ignoring type %s\n", type_names[t]); 6875 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6876 || asm_noperands (PATTERN (insn)) >= 0) 6877 { 6878 /* This must be some kind of asm. Clear the scheduling state. */ 6879 rotate_two_bundles (sched_verbose ? dump : NULL); 6880 if (ia64_final_schedule) 6881 group_barrier_needed_p (insn); 6882 } 6883 return 1; 6884 } 6885 6886 /* This is _not_ just a sanity check. group_barrier_needed_p will update 6887 important state info. Don't delete this test. */ 6888 if (ia64_final_schedule 6889 && group_barrier_needed_p (insn)) 6890 abort (); 6891 6892 sched_data.stopbit[sched_data.cur] = 0; 6893 sched_data.insns[sched_data.cur] = insn; 6894 sched_data.types[sched_data.cur] = t; 6895 6896 sched_data.cur++; 6897 if (sched_verbose) 6898 fprintf (dump, "// Scheduling insn %d of type %s\n", 6899 INSN_UID (insn), type_names[t]); 6900 6901 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) 6902 { 6903 schedule_stop (sched_verbose ? dump : NULL); 6904 sched_data.last_was_stop = 1; 6905 } 6906 6907 return 1; 6908} 6909 6910/* Free data allocated by ia64_sched_init. */ 6911 6912static void 6913ia64_sched_finish (dump, sched_verbose) 6914 FILE *dump; 6915 int sched_verbose; 6916{ 6917 if (sched_verbose) 6918 fprintf (dump, "// Finishing schedule.\n"); 6919 rotate_two_bundles (NULL); 6920 free (sched_types); 6921 free (sched_ready); 6922} 6923 6924/* Emit pseudo-ops for the assembler to describe predicate relations. 6925 At present this assumes that we only consider predicate pairs to 6926 be mutex, and that the assembler can deduce proper values from 6927 straight-line code. */ 6928 6929static void 6930emit_predicate_relation_info () 6931{ 6932 basic_block bb; 6933 6934 FOR_EACH_BB_REVERSE (bb) 6935 { 6936 int r; 6937 rtx head = bb->head; 6938 6939 /* We only need such notes at code labels. */ 6940 if (GET_CODE (head) != CODE_LABEL) 6941 continue; 6942 if (GET_CODE (NEXT_INSN (head)) == NOTE 6943 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) 6944 head = NEXT_INSN (head); 6945 6946 for (r = PR_REG (0); r < PR_REG (64); r += 2) 6947 if (REGNO_REG_SET_P (bb->global_live_at_start, r)) 6948 { 6949 rtx p = gen_rtx_REG (BImode, r); 6950 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); 6951 if (head == bb->end) 6952 bb->end = n; 6953 head = n; 6954 } 6955 } 6956 6957 /* Look for conditional calls that do not return, and protect predicate 6958 relations around them. Otherwise the assembler will assume the call 6959 returns, and complain about uses of call-clobbered predicates after 6960 the call. */ 6961 FOR_EACH_BB_REVERSE (bb) 6962 { 6963 rtx insn = bb->head; 6964 6965 while (1) 6966 { 6967 if (GET_CODE (insn) == CALL_INSN 6968 && GET_CODE (PATTERN (insn)) == COND_EXEC 6969 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 6970 { 6971 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); 6972 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); 6973 if (bb->head == insn) 6974 bb->head = b; 6975 if (bb->end == insn) 6976 bb->end = a; 6977 } 6978 6979 if (insn == bb->end) 6980 break; 6981 insn = NEXT_INSN (insn); 6982 } 6983 } 6984} 6985 6986/* Generate a NOP instruction of type T. We will never generate L type 6987 nops. */ 6988 6989static rtx 6990gen_nop_type (t) 6991 enum attr_type t; 6992{ 6993 switch (t) 6994 { 6995 case TYPE_M: 6996 return gen_nop_m (); 6997 case TYPE_I: 6998 return gen_nop_i (); 6999 case TYPE_B: 7000 return gen_nop_b (); 7001 case TYPE_F: 7002 return gen_nop_f (); 7003 case TYPE_X: 7004 return gen_nop_x (); 7005 default: 7006 abort (); 7007 } 7008} 7009 7010/* After the last scheduling pass, fill in NOPs. It's easier to do this 7011 here than while scheduling. */ 7012 7013static void 7014ia64_emit_nops () 7015{ 7016 rtx insn; 7017 const struct bundle *b = 0; 7018 int bundle_pos = 0; 7019 7020 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7021 { 7022 rtx pat; 7023 enum attr_type t; 7024 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; 7025 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) 7026 continue; 7027 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR) 7028 || GET_CODE (insn) == CODE_LABEL) 7029 { 7030 if (b) 7031 while (bundle_pos < 3) 7032 { 7033 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7034 bundle_pos++; 7035 } 7036 if (GET_CODE (insn) != CODE_LABEL) 7037 b = bundle + INTVAL (XVECEXP (pat, 0, 0)); 7038 else 7039 b = 0; 7040 bundle_pos = 0; 7041 continue; 7042 } 7043 else if (GET_CODE (pat) == UNSPEC_VOLATILE 7044 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER) 7045 { 7046 int t = INTVAL (XVECEXP (pat, 0, 0)); 7047 if (b) 7048 while (bundle_pos < t) 7049 { 7050 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7051 bundle_pos++; 7052 } 7053 continue; 7054 } 7055 7056 if (bundle_pos == 3) 7057 b = 0; 7058 7059 if (b && INSN_P (insn)) 7060 { 7061 t = ia64_safe_type (insn); 7062 if (asm_noperands (PATTERN (insn)) >= 0 7063 || GET_CODE (PATTERN (insn)) == ASM_INPUT) 7064 { 7065 while (bundle_pos < 3) 7066 { 7067 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7068 bundle_pos++; 7069 } 7070 continue; 7071 } 7072 7073 if (t == TYPE_UNKNOWN) 7074 continue; 7075 while (bundle_pos < 3) 7076 { 7077 if (t == b->t[bundle_pos] 7078 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M 7079 || b->t[bundle_pos] == TYPE_I))) 7080 break; 7081 7082 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7083 bundle_pos++; 7084 } 7085 if (bundle_pos < 3) 7086 bundle_pos++; 7087 } 7088 } 7089} 7090 7091/* Perform machine dependent operations on the rtl chain INSNS. */ 7092 7093void 7094ia64_reorg (insns) 7095 rtx insns; 7096{ 7097 /* We are freeing block_for_insn in the toplev to keep compatibility 7098 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 7099 compute_bb_for_insn (); 7100 7101 /* If optimizing, we'll have split before scheduling. */ 7102 if (optimize == 0) 7103 split_all_insns (0); 7104 7105 /* ??? update_life_info_in_dirty_blocks fails to terminate during 7106 non-optimizing bootstrap. */ 7107 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES); 7108 7109 if (ia64_flag_schedule_insns2) 7110 { 7111 timevar_push (TV_SCHED2); 7112 ia64_final_schedule = 1; 7113 schedule_ebbs (rtl_dump_file); 7114 ia64_final_schedule = 0; 7115 timevar_pop (TV_SCHED2); 7116 7117 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same 7118 place as they were during scheduling. */ 7119 emit_insn_group_barriers (rtl_dump_file, insns); 7120 ia64_emit_nops (); 7121 } 7122 else 7123 emit_all_insn_group_barriers (rtl_dump_file, insns); 7124 7125 /* A call must not be the last instruction in a function, so that the 7126 return address is still within the function, so that unwinding works 7127 properly. Note that IA-64 differs from dwarf2 on this point. */ 7128 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7129 { 7130 rtx insn; 7131 int saw_stop = 0; 7132 7133 insn = get_last_insn (); 7134 if (! INSN_P (insn)) 7135 insn = prev_active_insn (insn); 7136 if (GET_CODE (insn) == INSN 7137 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 7138 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 7139 { 7140 saw_stop = 1; 7141 insn = prev_active_insn (insn); 7142 } 7143 if (GET_CODE (insn) == CALL_INSN) 7144 { 7145 if (! saw_stop) 7146 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7147 emit_insn (gen_break_f ()); 7148 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7149 } 7150 } 7151 7152 fixup_errata (); 7153 emit_predicate_relation_info (); 7154} 7155 7156/* Return true if REGNO is used by the epilogue. */ 7157 7158int 7159ia64_epilogue_uses (regno) 7160 int regno; 7161{ 7162 switch (regno) 7163 { 7164 case R_GR (1): 7165 /* When a function makes a call through a function descriptor, we 7166 will write a (potentially) new value to "gp". After returning 7167 from such a call, we need to make sure the function restores the 7168 original gp-value, even if the function itself does not use the 7169 gp anymore. */ 7170 return (TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC)); 7171 7172 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 7173 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 7174 /* For functions defined with the syscall_linkage attribute, all 7175 input registers are marked as live at all function exits. This 7176 prevents the register allocator from using the input registers, 7177 which in turn makes it possible to restart a system call after 7178 an interrupt without having to save/restore the input registers. 7179 This also prevents kernel data from leaking to application code. */ 7180 return lookup_attribute ("syscall_linkage", 7181 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 7182 7183 case R_BR (0): 7184 /* Conditional return patterns can't represent the use of `b0' as 7185 the return address, so we force the value live this way. */ 7186 return 1; 7187 7188 case AR_PFS_REGNUM: 7189 /* Likewise for ar.pfs, which is used by br.ret. */ 7190 return 1; 7191 7192 default: 7193 return 0; 7194 } 7195} 7196 7197/* Return true if REGNO is used by the frame unwinder. */ 7198 7199int 7200ia64_eh_uses (regno) 7201 int regno; 7202{ 7203 if (! reload_completed) 7204 return 0; 7205 7206 if (current_frame_info.reg_save_b0 7207 && regno == current_frame_info.reg_save_b0) 7208 return 1; 7209 if (current_frame_info.reg_save_pr 7210 && regno == current_frame_info.reg_save_pr) 7211 return 1; 7212 if (current_frame_info.reg_save_ar_pfs 7213 && regno == current_frame_info.reg_save_ar_pfs) 7214 return 1; 7215 if (current_frame_info.reg_save_ar_unat 7216 && regno == current_frame_info.reg_save_ar_unat) 7217 return 1; 7218 if (current_frame_info.reg_save_ar_lc 7219 && regno == current_frame_info.reg_save_ar_lc) 7220 return 1; 7221 7222 return 0; 7223} 7224 7225/* For ia64, SYMBOL_REF_FLAG set means that it is a function. 7226 7227 We add @ to the name if this goes in small data/bss. We can only put 7228 a variable in small data/bss if it is defined in this module or a module 7229 that we are statically linked with. We can't check the second condition, 7230 but TREE_STATIC gives us the first one. */ 7231 7232/* ??? If we had IPA, we could check the second condition. We could support 7233 programmer added section attributes if the variable is not defined in this 7234 module. */ 7235 7236/* ??? See the v850 port for a cleaner way to do this. */ 7237 7238/* ??? We could also support own long data here. Generating movl/add/ld8 7239 instead of addl,ld8/ld8. This makes the code bigger, but should make the 7240 code faster because there is one less load. This also includes incomplete 7241 types which can't go in sdata/sbss. */ 7242 7243static bool 7244ia64_in_small_data_p (exp) 7245 tree exp; 7246{ 7247 if (TARGET_NO_SDATA) 7248 return false; 7249 7250 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 7251 { 7252 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 7253 if (strcmp (section, ".sdata") == 0 7254 || strcmp (section, ".sbss") == 0) 7255 return true; 7256 } 7257 else 7258 { 7259 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 7260 7261 /* If this is an incomplete type with size 0, then we can't put it 7262 in sdata because it might be too big when completed. */ 7263 if (size > 0 && size <= ia64_section_threshold) 7264 return true; 7265 } 7266 7267 return false; 7268} 7269 7270static void 7271ia64_encode_section_info (decl, first) 7272 tree decl; 7273 int first ATTRIBUTE_UNUSED; 7274{ 7275 const char *symbol_str; 7276 bool is_local; 7277 rtx symbol; 7278 char encoding = 0; 7279 7280 if (TREE_CODE (decl) == FUNCTION_DECL) 7281 { 7282 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; 7283 return; 7284 } 7285 7286 /* Careful not to prod global register variables. */ 7287 if (TREE_CODE (decl) != VAR_DECL 7288 || GET_CODE (DECL_RTL (decl)) != MEM 7289 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) 7290 return; 7291 7292 symbol = XEXP (DECL_RTL (decl), 0); 7293 symbol_str = XSTR (symbol, 0); 7294 7295 is_local = (*targetm.binds_local_p) (decl); 7296 7297 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) 7298 encoding = " GLil"[decl_tls_model (decl)]; 7299 /* Determine if DECL will wind up in .sdata/.sbss. */ 7300 else if (is_local && ia64_in_small_data_p (decl)) 7301 encoding = 's'; 7302 7303 /* Finally, encode this into the symbol string. */ 7304 if (encoding) 7305 { 7306 char *newstr; 7307 size_t len; 7308 7309 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7310 { 7311 if (encoding == symbol_str[1]) 7312 return; 7313 /* ??? Sdata became thread or thread becaome not thread. Lose. */ 7314 abort (); 7315 } 7316 7317 len = strlen (symbol_str); 7318 newstr = alloca (len + 3); 7319 newstr[0] = ENCODE_SECTION_INFO_CHAR; 7320 newstr[1] = encoding; 7321 memcpy (newstr + 2, symbol_str, len + 1); 7322 7323 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2); 7324 } 7325 7326 /* This decl is marked as being in small data/bss but it shouldn't be; 7327 one likely explanation for this is that the decl has been moved into 7328 a different section from the one it was in when encode_section_info 7329 was first called. Remove the encoding. */ 7330 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7331 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2); 7332} 7333 7334static const char * 7335ia64_strip_name_encoding (str) 7336 const char *str; 7337{ 7338 if (str[0] == ENCODE_SECTION_INFO_CHAR) 7339 str += 2; 7340 if (str[0] == '*') 7341 str++; 7342 return str; 7343} 7344 7345/* True if it is OK to do sibling call optimization for the specified 7346 call expression EXP. DECL will be the called function, or NULL if 7347 this is an indirect call. */ 7348bool 7349ia64_function_ok_for_sibcall (decl) 7350 tree decl; 7351{ 7352 /* Direct calls are always ok. */ 7353 if (decl) 7354 return true; 7355 7356 /* If TARGET_CONST_GP is in effect, then our caller expects us to 7357 return with our current GP. This means that we'll always have 7358 a GP reload after an indirect call. */ 7359 return !ia64_epilogue_uses (R_GR (1)); 7360} 7361 7362/* Output assembly directives for prologue regions. */ 7363 7364/* The current basic block number. */ 7365 7366static bool last_block; 7367 7368/* True if we need a copy_state command at the start of the next block. */ 7369 7370static bool need_copy_state; 7371 7372/* The function emits unwind directives for the start of an epilogue. */ 7373 7374static void 7375process_epilogue () 7376{ 7377 /* If this isn't the last block of the function, then we need to label the 7378 current state, and copy it back in at the start of the next block. */ 7379 7380 if (!last_block) 7381 { 7382 fprintf (asm_out_file, "\t.label_state 1\n"); 7383 need_copy_state = true; 7384 } 7385 7386 fprintf (asm_out_file, "\t.restore sp\n"); 7387} 7388 7389/* This function processes a SET pattern looking for specific patterns 7390 which result in emitting an assembly directive required for unwinding. */ 7391 7392static int 7393process_set (asm_out_file, pat) 7394 FILE *asm_out_file; 7395 rtx pat; 7396{ 7397 rtx src = SET_SRC (pat); 7398 rtx dest = SET_DEST (pat); 7399 int src_regno, dest_regno; 7400 7401 /* Look for the ALLOC insn. */ 7402 if (GET_CODE (src) == UNSPEC_VOLATILE 7403 && XINT (src, 1) == UNSPECV_ALLOC 7404 && GET_CODE (dest) == REG) 7405 { 7406 dest_regno = REGNO (dest); 7407 7408 /* If this isn't the final destination for ar.pfs, the alloc 7409 shouldn't have been marked frame related. */ 7410 if (dest_regno != current_frame_info.reg_save_ar_pfs) 7411 abort (); 7412 7413 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 7414 ia64_dbx_register_number (dest_regno)); 7415 return 1; 7416 } 7417 7418 /* Look for SP = .... */ 7419 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) 7420 { 7421 if (GET_CODE (src) == PLUS) 7422 { 7423 rtx op0 = XEXP (src, 0); 7424 rtx op1 = XEXP (src, 1); 7425 if (op0 == dest && GET_CODE (op1) == CONST_INT) 7426 { 7427 if (INTVAL (op1) < 0) 7428 { 7429 fputs ("\t.fframe ", asm_out_file); 7430 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 7431 -INTVAL (op1)); 7432 fputc ('\n', asm_out_file); 7433 } 7434 else 7435 process_epilogue (); 7436 } 7437 else 7438 abort (); 7439 } 7440 else if (GET_CODE (src) == REG 7441 && REGNO (src) == HARD_FRAME_POINTER_REGNUM) 7442 process_epilogue (); 7443 else 7444 abort (); 7445 7446 return 1; 7447 } 7448 7449 /* Register move we need to look at. */ 7450 if (GET_CODE (dest) == REG && GET_CODE (src) == REG) 7451 { 7452 src_regno = REGNO (src); 7453 dest_regno = REGNO (dest); 7454 7455 switch (src_regno) 7456 { 7457 case BR_REG (0): 7458 /* Saving return address pointer. */ 7459 if (dest_regno != current_frame_info.reg_save_b0) 7460 abort (); 7461 fprintf (asm_out_file, "\t.save rp, r%d\n", 7462 ia64_dbx_register_number (dest_regno)); 7463 return 1; 7464 7465 case PR_REG (0): 7466 if (dest_regno != current_frame_info.reg_save_pr) 7467 abort (); 7468 fprintf (asm_out_file, "\t.save pr, r%d\n", 7469 ia64_dbx_register_number (dest_regno)); 7470 return 1; 7471 7472 case AR_UNAT_REGNUM: 7473 if (dest_regno != current_frame_info.reg_save_ar_unat) 7474 abort (); 7475 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 7476 ia64_dbx_register_number (dest_regno)); 7477 return 1; 7478 7479 case AR_LC_REGNUM: 7480 if (dest_regno != current_frame_info.reg_save_ar_lc) 7481 abort (); 7482 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 7483 ia64_dbx_register_number (dest_regno)); 7484 return 1; 7485 7486 case STACK_POINTER_REGNUM: 7487 if (dest_regno != HARD_FRAME_POINTER_REGNUM 7488 || ! frame_pointer_needed) 7489 abort (); 7490 fprintf (asm_out_file, "\t.vframe r%d\n", 7491 ia64_dbx_register_number (dest_regno)); 7492 return 1; 7493 7494 default: 7495 /* Everything else should indicate being stored to memory. */ 7496 abort (); 7497 } 7498 } 7499 7500 /* Memory store we need to look at. */ 7501 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) 7502 { 7503 long off; 7504 rtx base; 7505 const char *saveop; 7506 7507 if (GET_CODE (XEXP (dest, 0)) == REG) 7508 { 7509 base = XEXP (dest, 0); 7510 off = 0; 7511 } 7512 else if (GET_CODE (XEXP (dest, 0)) == PLUS 7513 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT) 7514 { 7515 base = XEXP (XEXP (dest, 0), 0); 7516 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 7517 } 7518 else 7519 abort (); 7520 7521 if (base == hard_frame_pointer_rtx) 7522 { 7523 saveop = ".savepsp"; 7524 off = - off; 7525 } 7526 else if (base == stack_pointer_rtx) 7527 saveop = ".savesp"; 7528 else 7529 abort (); 7530 7531 src_regno = REGNO (src); 7532 switch (src_regno) 7533 { 7534 case BR_REG (0): 7535 if (current_frame_info.reg_save_b0 != 0) 7536 abort (); 7537 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); 7538 return 1; 7539 7540 case PR_REG (0): 7541 if (current_frame_info.reg_save_pr != 0) 7542 abort (); 7543 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); 7544 return 1; 7545 7546 case AR_LC_REGNUM: 7547 if (current_frame_info.reg_save_ar_lc != 0) 7548 abort (); 7549 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); 7550 return 1; 7551 7552 case AR_PFS_REGNUM: 7553 if (current_frame_info.reg_save_ar_pfs != 0) 7554 abort (); 7555 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); 7556 return 1; 7557 7558 case AR_UNAT_REGNUM: 7559 if (current_frame_info.reg_save_ar_unat != 0) 7560 abort (); 7561 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); 7562 return 1; 7563 7564 case GR_REG (4): 7565 case GR_REG (5): 7566 case GR_REG (6): 7567 case GR_REG (7): 7568 fprintf (asm_out_file, "\t.save.g 0x%x\n", 7569 1 << (src_regno - GR_REG (4))); 7570 return 1; 7571 7572 case BR_REG (1): 7573 case BR_REG (2): 7574 case BR_REG (3): 7575 case BR_REG (4): 7576 case BR_REG (5): 7577 fprintf (asm_out_file, "\t.save.b 0x%x\n", 7578 1 << (src_regno - BR_REG (1))); 7579 return 1; 7580 7581 case FR_REG (2): 7582 case FR_REG (3): 7583 case FR_REG (4): 7584 case FR_REG (5): 7585 fprintf (asm_out_file, "\t.save.f 0x%x\n", 7586 1 << (src_regno - FR_REG (2))); 7587 return 1; 7588 7589 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 7590 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 7591 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 7592 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 7593 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 7594 1 << (src_regno - FR_REG (12))); 7595 return 1; 7596 7597 default: 7598 return 0; 7599 } 7600 } 7601 7602 return 0; 7603} 7604 7605 7606/* This function looks at a single insn and emits any directives 7607 required to unwind this insn. */ 7608void 7609process_for_unwind_directive (asm_out_file, insn) 7610 FILE *asm_out_file; 7611 rtx insn; 7612{ 7613 if (flag_unwind_tables 7614 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7615 { 7616 rtx pat; 7617 7618 if (GET_CODE (insn) == NOTE 7619 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 7620 { 7621 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR; 7622 7623 /* Restore unwind state from immediately before the epilogue. */ 7624 if (need_copy_state) 7625 { 7626 fprintf (asm_out_file, "\t.body\n"); 7627 fprintf (asm_out_file, "\t.copy_state 1\n"); 7628 need_copy_state = false; 7629 } 7630 } 7631 7632 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn)) 7633 return; 7634 7635 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); 7636 if (pat) 7637 pat = XEXP (pat, 0); 7638 else 7639 pat = PATTERN (insn); 7640 7641 switch (GET_CODE (pat)) 7642 { 7643 case SET: 7644 process_set (asm_out_file, pat); 7645 break; 7646 7647 case PARALLEL: 7648 { 7649 int par_index; 7650 int limit = XVECLEN (pat, 0); 7651 for (par_index = 0; par_index < limit; par_index++) 7652 { 7653 rtx x = XVECEXP (pat, 0, par_index); 7654 if (GET_CODE (x) == SET) 7655 process_set (asm_out_file, x); 7656 } 7657 break; 7658 } 7659 7660 default: 7661 abort (); 7662 } 7663 } 7664} 7665 7666 7667void 7668ia64_init_builtins () 7669{ 7670 tree psi_type_node = build_pointer_type (integer_type_node); 7671 tree pdi_type_node = build_pointer_type (long_integer_type_node); 7672 7673 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ 7674 tree si_ftype_psi_si_si 7675 = build_function_type_list (integer_type_node, 7676 psi_type_node, integer_type_node, 7677 integer_type_node, NULL_TREE); 7678 7679 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ 7680 tree di_ftype_pdi_di_di 7681 = build_function_type_list (long_integer_type_node, 7682 pdi_type_node, long_integer_type_node, 7683 long_integer_type_node, NULL_TREE); 7684 /* __sync_synchronize */ 7685 tree void_ftype_void 7686 = build_function_type (void_type_node, void_list_node); 7687 7688 /* __sync_lock_test_and_set_si */ 7689 tree si_ftype_psi_si 7690 = build_function_type_list (integer_type_node, 7691 psi_type_node, integer_type_node, NULL_TREE); 7692 7693 /* __sync_lock_test_and_set_di */ 7694 tree di_ftype_pdi_di 7695 = build_function_type_list (long_integer_type_node, 7696 pdi_type_node, long_integer_type_node, 7697 NULL_TREE); 7698 7699 /* __sync_lock_release_si */ 7700 tree void_ftype_psi 7701 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE); 7702 7703 /* __sync_lock_release_di */ 7704 tree void_ftype_pdi 7705 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE); 7706 7707#define def_builtin(name, type, code) \ 7708 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE) 7709 7710 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, 7711 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); 7712 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, 7713 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); 7714 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, 7715 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); 7716 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, 7717 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); 7718 7719 def_builtin ("__sync_synchronize", void_ftype_void, 7720 IA64_BUILTIN_SYNCHRONIZE); 7721 7722 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, 7723 IA64_BUILTIN_LOCK_TEST_AND_SET_SI); 7724 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, 7725 IA64_BUILTIN_LOCK_TEST_AND_SET_DI); 7726 def_builtin ("__sync_lock_release_si", void_ftype_psi, 7727 IA64_BUILTIN_LOCK_RELEASE_SI); 7728 def_builtin ("__sync_lock_release_di", void_ftype_pdi, 7729 IA64_BUILTIN_LOCK_RELEASE_DI); 7730 7731 def_builtin ("__builtin_ia64_bsp", 7732 build_function_type (ptr_type_node, void_list_node), 7733 IA64_BUILTIN_BSP); 7734 7735 def_builtin ("__builtin_ia64_flushrs", 7736 build_function_type (void_type_node, void_list_node), 7737 IA64_BUILTIN_FLUSHRS); 7738 7739 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si, 7740 IA64_BUILTIN_FETCH_AND_ADD_SI); 7741 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si, 7742 IA64_BUILTIN_FETCH_AND_SUB_SI); 7743 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si, 7744 IA64_BUILTIN_FETCH_AND_OR_SI); 7745 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si, 7746 IA64_BUILTIN_FETCH_AND_AND_SI); 7747 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si, 7748 IA64_BUILTIN_FETCH_AND_XOR_SI); 7749 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si, 7750 IA64_BUILTIN_FETCH_AND_NAND_SI); 7751 7752 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si, 7753 IA64_BUILTIN_ADD_AND_FETCH_SI); 7754 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si, 7755 IA64_BUILTIN_SUB_AND_FETCH_SI); 7756 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si, 7757 IA64_BUILTIN_OR_AND_FETCH_SI); 7758 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si, 7759 IA64_BUILTIN_AND_AND_FETCH_SI); 7760 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si, 7761 IA64_BUILTIN_XOR_AND_FETCH_SI); 7762 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si, 7763 IA64_BUILTIN_NAND_AND_FETCH_SI); 7764 7765 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di, 7766 IA64_BUILTIN_FETCH_AND_ADD_DI); 7767 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di, 7768 IA64_BUILTIN_FETCH_AND_SUB_DI); 7769 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di, 7770 IA64_BUILTIN_FETCH_AND_OR_DI); 7771 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di, 7772 IA64_BUILTIN_FETCH_AND_AND_DI); 7773 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di, 7774 IA64_BUILTIN_FETCH_AND_XOR_DI); 7775 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di, 7776 IA64_BUILTIN_FETCH_AND_NAND_DI); 7777 7778 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di, 7779 IA64_BUILTIN_ADD_AND_FETCH_DI); 7780 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di, 7781 IA64_BUILTIN_SUB_AND_FETCH_DI); 7782 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di, 7783 IA64_BUILTIN_OR_AND_FETCH_DI); 7784 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di, 7785 IA64_BUILTIN_AND_AND_FETCH_DI); 7786 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di, 7787 IA64_BUILTIN_XOR_AND_FETCH_DI); 7788 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di, 7789 IA64_BUILTIN_NAND_AND_FETCH_DI); 7790 7791#undef def_builtin 7792} 7793 7794/* Expand fetch_and_op intrinsics. The basic code sequence is: 7795 7796 mf 7797 tmp = [ptr]; 7798 do { 7799 ret = tmp; 7800 ar.ccv = tmp; 7801 tmp <op>= value; 7802 cmpxchgsz.acq tmp = [ptr], tmp 7803 } while (tmp != ret) 7804*/ 7805 7806static rtx 7807ia64_expand_fetch_and_op (binoptab, mode, arglist, target) 7808 optab binoptab; 7809 enum machine_mode mode; 7810 tree arglist; 7811 rtx target; 7812{ 7813 rtx ret, label, tmp, ccv, insn, mem, value; 7814 tree arg0, arg1; 7815 7816 arg0 = TREE_VALUE (arglist); 7817 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7818 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7819#ifdef POINTERS_EXTEND_UNSIGNED 7820 if (GET_MODE(mem) != Pmode) 7821 mem = convert_memory_address (Pmode, mem); 7822#endif 7823 value = expand_expr (arg1, NULL_RTX, mode, 0); 7824 7825 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7826 MEM_VOLATILE_P (mem) = 1; 7827 7828 if (target && register_operand (target, mode)) 7829 ret = target; 7830 else 7831 ret = gen_reg_rtx (mode); 7832 7833 emit_insn (gen_mf ()); 7834 7835 /* Special case for fetchadd instructions. */ 7836 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode)) 7837 { 7838 if (mode == SImode) 7839 insn = gen_fetchadd_acq_si (ret, mem, value); 7840 else 7841 insn = gen_fetchadd_acq_di (ret, mem, value); 7842 emit_insn (insn); 7843 return ret; 7844 } 7845 7846 tmp = gen_reg_rtx (mode); 7847 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7848 emit_move_insn (tmp, mem); 7849 7850 label = gen_label_rtx (); 7851 emit_label (label); 7852 emit_move_insn (ret, tmp); 7853 emit_move_insn (ccv, tmp); 7854 7855 /* Perform the specific operation. Special case NAND by noticing 7856 one_cmpl_optab instead. */ 7857 if (binoptab == one_cmpl_optab) 7858 { 7859 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7860 binoptab = and_optab; 7861 } 7862 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN); 7863 7864 if (mode == SImode) 7865 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv); 7866 else 7867 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv); 7868 emit_insn (insn); 7869 7870 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label); 7871 7872 return ret; 7873} 7874 7875/* Expand op_and_fetch intrinsics. The basic code sequence is: 7876 7877 mf 7878 tmp = [ptr]; 7879 do { 7880 old = tmp; 7881 ar.ccv = tmp; 7882 ret = tmp <op> value; 7883 cmpxchgsz.acq tmp = [ptr], ret 7884 } while (tmp != old) 7885*/ 7886 7887static rtx 7888ia64_expand_op_and_fetch (binoptab, mode, arglist, target) 7889 optab binoptab; 7890 enum machine_mode mode; 7891 tree arglist; 7892 rtx target; 7893{ 7894 rtx old, label, tmp, ret, ccv, insn, mem, value; 7895 tree arg0, arg1; 7896 7897 arg0 = TREE_VALUE (arglist); 7898 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7899 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7900#ifdef POINTERS_EXTEND_UNSIGNED 7901 if (GET_MODE(mem) != Pmode) 7902 mem = convert_memory_address (Pmode, mem); 7903#endif 7904 7905 value = expand_expr (arg1, NULL_RTX, mode, 0); 7906 7907 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7908 MEM_VOLATILE_P (mem) = 1; 7909 7910 if (target && ! register_operand (target, mode)) 7911 target = NULL_RTX; 7912 7913 emit_insn (gen_mf ()); 7914 tmp = gen_reg_rtx (mode); 7915 old = gen_reg_rtx (mode); 7916 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7917 7918 emit_move_insn (tmp, mem); 7919 7920 label = gen_label_rtx (); 7921 emit_label (label); 7922 emit_move_insn (old, tmp); 7923 emit_move_insn (ccv, tmp); 7924 7925 /* Perform the specific operation. Special case NAND by noticing 7926 one_cmpl_optab instead. */ 7927 if (binoptab == one_cmpl_optab) 7928 { 7929 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7930 binoptab = and_optab; 7931 } 7932 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN); 7933 7934 if (mode == SImode) 7935 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv); 7936 else 7937 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv); 7938 emit_insn (insn); 7939 7940 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label); 7941 7942 return ret; 7943} 7944 7945/* Expand val_ and bool_compare_and_swap. For val_ we want: 7946 7947 ar.ccv = oldval 7948 mf 7949 cmpxchgsz.acq ret = [ptr], newval, ar.ccv 7950 return ret 7951 7952 For bool_ it's the same except return ret == oldval. 7953*/ 7954 7955static rtx 7956ia64_expand_compare_and_swap (mode, boolp, arglist, target) 7957 enum machine_mode mode; 7958 int boolp; 7959 tree arglist; 7960 rtx target; 7961{ 7962 tree arg0, arg1, arg2; 7963 rtx mem, old, new, ccv, tmp, insn; 7964 7965 arg0 = TREE_VALUE (arglist); 7966 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7967 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 7968 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 7969 old = expand_expr (arg1, NULL_RTX, mode, 0); 7970 new = expand_expr (arg2, NULL_RTX, mode, 0); 7971 7972 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 7973 MEM_VOLATILE_P (mem) = 1; 7974 7975 if (! register_operand (old, mode)) 7976 old = copy_to_mode_reg (mode, old); 7977 if (! register_operand (new, mode)) 7978 new = copy_to_mode_reg (mode, new); 7979 7980 if (! boolp && target && register_operand (target, mode)) 7981 tmp = target; 7982 else 7983 tmp = gen_reg_rtx (mode); 7984 7985 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 7986 if (mode == DImode) 7987 emit_move_insn (ccv, old); 7988 else 7989 { 7990 rtx ccvtmp = gen_reg_rtx (DImode); 7991 emit_insn (gen_zero_extendsidi2 (ccvtmp, old)); 7992 emit_move_insn (ccv, ccvtmp); 7993 } 7994 emit_insn (gen_mf ()); 7995 if (mode == SImode) 7996 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv); 7997 else 7998 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv); 7999 emit_insn (insn); 8000 8001 if (boolp) 8002 { 8003 if (! target) 8004 target = gen_reg_rtx (mode); 8005 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); 8006 } 8007 else 8008 return tmp; 8009} 8010 8011/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */ 8012 8013static rtx 8014ia64_expand_lock_test_and_set (mode, arglist, target) 8015 enum machine_mode mode; 8016 tree arglist; 8017 rtx target; 8018{ 8019 tree arg0, arg1; 8020 rtx mem, new, ret, insn; 8021 8022 arg0 = TREE_VALUE (arglist); 8023 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 8024 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8025 new = expand_expr (arg1, NULL_RTX, mode, 0); 8026 8027 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8028 MEM_VOLATILE_P (mem) = 1; 8029 if (! register_operand (new, mode)) 8030 new = copy_to_mode_reg (mode, new); 8031 8032 if (target && register_operand (target, mode)) 8033 ret = target; 8034 else 8035 ret = gen_reg_rtx (mode); 8036 8037 if (mode == SImode) 8038 insn = gen_xchgsi (ret, mem, new); 8039 else 8040 insn = gen_xchgdi (ret, mem, new); 8041 emit_insn (insn); 8042 8043 return ret; 8044} 8045 8046/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */ 8047 8048static rtx 8049ia64_expand_lock_release (mode, arglist, target) 8050 enum machine_mode mode; 8051 tree arglist; 8052 rtx target ATTRIBUTE_UNUSED; 8053{ 8054 tree arg0; 8055 rtx mem; 8056 8057 arg0 = TREE_VALUE (arglist); 8058 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8059 8060 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8061 MEM_VOLATILE_P (mem) = 1; 8062 8063 emit_move_insn (mem, const0_rtx); 8064 8065 return const0_rtx; 8066} 8067 8068rtx 8069ia64_expand_builtin (exp, target, subtarget, mode, ignore) 8070 tree exp; 8071 rtx target; 8072 rtx subtarget ATTRIBUTE_UNUSED; 8073 enum machine_mode mode ATTRIBUTE_UNUSED; 8074 int ignore ATTRIBUTE_UNUSED; 8075{ 8076 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 8077 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 8078 tree arglist = TREE_OPERAND (exp, 1); 8079 8080 switch (fcode) 8081 { 8082 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8083 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8084 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8085 case IA64_BUILTIN_LOCK_RELEASE_SI: 8086 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8087 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8088 case IA64_BUILTIN_FETCH_AND_OR_SI: 8089 case IA64_BUILTIN_FETCH_AND_AND_SI: 8090 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8091 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8092 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8093 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8094 case IA64_BUILTIN_OR_AND_FETCH_SI: 8095 case IA64_BUILTIN_AND_AND_FETCH_SI: 8096 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8097 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8098 mode = SImode; 8099 break; 8100 8101 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8102 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8103 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8104 case IA64_BUILTIN_LOCK_RELEASE_DI: 8105 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8106 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8107 case IA64_BUILTIN_FETCH_AND_OR_DI: 8108 case IA64_BUILTIN_FETCH_AND_AND_DI: 8109 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8110 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8111 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8112 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8113 case IA64_BUILTIN_OR_AND_FETCH_DI: 8114 case IA64_BUILTIN_AND_AND_FETCH_DI: 8115 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8116 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8117 mode = DImode; 8118 break; 8119 8120 default: 8121 break; 8122 } 8123 8124 switch (fcode) 8125 { 8126 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8127 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8128 return ia64_expand_compare_and_swap (mode, 1, arglist, target); 8129 8130 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8131 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8132 return ia64_expand_compare_and_swap (mode, 0, arglist, target); 8133 8134 case IA64_BUILTIN_SYNCHRONIZE: 8135 emit_insn (gen_mf ()); 8136 return const0_rtx; 8137 8138 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8139 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8140 return ia64_expand_lock_test_and_set (mode, arglist, target); 8141 8142 case IA64_BUILTIN_LOCK_RELEASE_SI: 8143 case IA64_BUILTIN_LOCK_RELEASE_DI: 8144 return ia64_expand_lock_release (mode, arglist, target); 8145 8146 case IA64_BUILTIN_BSP: 8147 if (! target || ! register_operand (target, DImode)) 8148 target = gen_reg_rtx (DImode); 8149 emit_insn (gen_bsp_value (target)); 8150 return target; 8151 8152 case IA64_BUILTIN_FLUSHRS: 8153 emit_insn (gen_flushrs ()); 8154 return const0_rtx; 8155 8156 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8157 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8158 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target); 8159 8160 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8161 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8162 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target); 8163 8164 case IA64_BUILTIN_FETCH_AND_OR_SI: 8165 case IA64_BUILTIN_FETCH_AND_OR_DI: 8166 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target); 8167 8168 case IA64_BUILTIN_FETCH_AND_AND_SI: 8169 case IA64_BUILTIN_FETCH_AND_AND_DI: 8170 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target); 8171 8172 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8173 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8174 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target); 8175 8176 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8177 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8178 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target); 8179 8180 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8181 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8182 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target); 8183 8184 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8185 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8186 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target); 8187 8188 case IA64_BUILTIN_OR_AND_FETCH_SI: 8189 case IA64_BUILTIN_OR_AND_FETCH_DI: 8190 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target); 8191 8192 case IA64_BUILTIN_AND_AND_FETCH_SI: 8193 case IA64_BUILTIN_AND_AND_FETCH_DI: 8194 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target); 8195 8196 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8197 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8198 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target); 8199 8200 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8201 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8202 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target); 8203 8204 default: 8205 break; 8206 } 8207 8208 return NULL_RTX; 8209} 8210 8211/* For the HP-UX IA64 aggregate parameters are passed stored in the 8212 most significant bits of the stack slot. */ 8213 8214enum direction 8215ia64_hpux_function_arg_padding (mode, type) 8216 enum machine_mode mode; 8217 tree type; 8218{ 8219 /* Exception to normal case for structures/unions/etc. */ 8220 8221 if (type && AGGREGATE_TYPE_P (type) 8222 && int_size_in_bytes (type) < UNITS_PER_WORD) 8223 return upward; 8224 8225 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN 8226 hardwired to be true. */ 8227 8228 return((mode == BLKmode 8229 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 8230 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT)) 8231 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 8232 ? downward : upward); 8233} 8234 8235/* Linked list of all external functions that are to be emitted by GCC. 8236 We output the name if and only if TREE_SYMBOL_REFERENCED is set in 8237 order to avoid putting out names that are never really used. */ 8238 8239struct extern_func_list 8240{ 8241 struct extern_func_list *next; /* next external */ 8242 char *name; /* name of the external */ 8243} *extern_func_head = 0; 8244 8245static void 8246ia64_hpux_add_extern_decl (name) 8247 const char *name; 8248{ 8249 struct extern_func_list *p; 8250 8251 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list)); 8252 p->name = xmalloc (strlen (name) + 1); 8253 strcpy(p->name, name); 8254 p->next = extern_func_head; 8255 extern_func_head = p; 8256} 8257 8258/* Print out the list of used global functions. */ 8259 8260void 8261ia64_hpux_asm_file_end (file) 8262 FILE *file; 8263{ 8264 while (extern_func_head) 8265 { 8266 const char *real_name; 8267 tree decl; 8268 8269 real_name = (* targetm.strip_name_encoding) (extern_func_head->name); 8270 decl = maybe_get_identifier (real_name); 8271 8272 if (!decl 8273 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl))) 8274 { 8275 if (decl) 8276 TREE_ASM_WRITTEN (decl) = 1; 8277 (*targetm.asm_out.globalize_label) (file, extern_func_head->name); 8278 fprintf (file, "%s", TYPE_ASM_OP); 8279 assemble_name (file, extern_func_head->name); 8280 putc (',', file); 8281 fprintf (file, TYPE_OPERAND_FMT, "function"); 8282 putc ('\n', file); 8283 } 8284 extern_func_head = extern_func_head->next; 8285 } 8286} 8287 8288 8289/* Switch to the section to which we should output X. The only thing 8290 special we do here is to honor small data. */ 8291 8292static void 8293ia64_select_rtx_section (mode, x, align) 8294 enum machine_mode mode; 8295 rtx x; 8296 unsigned HOST_WIDE_INT align; 8297{ 8298 if (GET_MODE_SIZE (mode) > 0 8299 && GET_MODE_SIZE (mode) <= ia64_section_threshold) 8300 sdata_section (); 8301 else 8302 default_elf_select_rtx_section (mode, x, align); 8303} 8304 8305/* It is illegal to have relocations in shared segments on AIX and HPUX. 8306 Pretend flag_pic is always set. */ 8307 8308static void 8309ia64_rwreloc_select_section (exp, reloc, align) 8310 tree exp; 8311 int reloc; 8312 unsigned HOST_WIDE_INT align; 8313{ 8314 default_elf_select_section_1 (exp, reloc, align, true); 8315} 8316 8317static void 8318ia64_rwreloc_unique_section (decl, reloc) 8319 tree decl; 8320 int reloc; 8321{ 8322 default_unique_section_1 (decl, reloc, true); 8323} 8324 8325static void 8326ia64_rwreloc_select_rtx_section (mode, x, align) 8327 enum machine_mode mode; 8328 rtx x; 8329 unsigned HOST_WIDE_INT align; 8330{ 8331 int save_pic = flag_pic; 8332 flag_pic = 1; 8333 ia64_select_rtx_section (mode, x, align); 8334 flag_pic = save_pic; 8335} 8336 8337static unsigned int 8338ia64_rwreloc_section_type_flags (decl, name, reloc) 8339 tree decl; 8340 const char *name; 8341 int reloc; 8342{ 8343 return default_section_type_flags_1 (decl, name, reloc, true); 8344} 8345 8346 8347/* Output the assembler code for a thunk function. THUNK_DECL is the 8348 declaration for the thunk function itself, FUNCTION is the decl for 8349 the target function. DELTA is an immediate constant offset to be 8350 added to THIS. If VCALL_OFFSET is non-zero, the word at 8351 *(*this + vcall_offset) should be added to THIS. */ 8352 8353static void 8354ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) 8355 FILE *file; 8356 tree thunk ATTRIBUTE_UNUSED; 8357 HOST_WIDE_INT delta; 8358 HOST_WIDE_INT vcall_offset; 8359 tree function; 8360{ 8361 rtx this, insn, funexp; 8362 8363 reload_completed = 1; 8364 no_new_pseudos = 1; 8365 8366 /* Set things up as ia64_expand_prologue might. */ 8367 last_scratch_gr_reg = 15; 8368 8369 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 8370 current_frame_info.spill_cfa_off = -16; 8371 current_frame_info.n_input_regs = 1; 8372 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 8373 8374 if (!TARGET_REG_NAMES) 8375 reg_names[IN_REG (0)] = ia64_reg_numbers[0]; 8376 8377 /* Mark the end of the (empty) prologue. */ 8378 emit_note (NULL, NOTE_INSN_PROLOGUE_END); 8379 8380 this = gen_rtx_REG (Pmode, IN_REG (0)); 8381 8382 /* Apply the constant offset, if required. */ 8383 if (delta) 8384 { 8385 rtx delta_rtx = GEN_INT (delta); 8386 8387 if (!CONST_OK_FOR_I (delta)) 8388 { 8389 rtx tmp = gen_rtx_REG (Pmode, 2); 8390 emit_move_insn (tmp, delta_rtx); 8391 delta_rtx = tmp; 8392 } 8393 emit_insn (gen_adddi3 (this, this, delta_rtx)); 8394 } 8395 8396 /* Apply the offset from the vtable, if required. */ 8397 if (vcall_offset) 8398 { 8399 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 8400 rtx tmp = gen_rtx_REG (Pmode, 2); 8401 8402 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); 8403 8404 if (!CONST_OK_FOR_J (vcall_offset)) 8405 { 8406 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 8407 emit_move_insn (tmp2, vcall_offset_rtx); 8408 vcall_offset_rtx = tmp2; 8409 } 8410 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 8411 8412 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 8413 8414 emit_insn (gen_adddi3 (this, this, tmp)); 8415 } 8416 8417 /* Generate a tail call to the target function. */ 8418 if (! TREE_USED (function)) 8419 { 8420 assemble_external (function); 8421 TREE_USED (function) = 1; 8422 } 8423 funexp = XEXP (DECL_RTL (function), 0); 8424 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8425 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 8426 insn = get_last_insn (); 8427 SIBLING_CALL_P (insn) = 1; 8428 8429 /* Code generation for calls relies on splitting. */ 8430 reload_completed = 1; 8431 try_split (PATTERN (insn), insn, 0); 8432 8433 emit_barrier (); 8434 8435 /* Run just enough of rest_of_compilation to get the insns emitted. 8436 There's not really enough bulk here to make other passes such as 8437 instruction scheduling worth while. Note that use_thunk calls 8438 assemble_start_function and assemble_end_function. */ 8439 8440 insn = get_insns (); 8441 emit_all_insn_group_barriers (NULL, insn); 8442 shorten_branches (insn); 8443 final_start_function (insn, file, 1); 8444 final (insn, file, 1, 0); 8445 final_end_function (); 8446 8447 reload_completed = 0; 8448 no_new_pseudos = 0; 8449} 8450 8451#include "gt-ia64.h" 8452