ia64.c revision 119256
1/* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 3 Contributed by James E. Wilson <wilson@cygnus.com> and 4 David Mosberger <davidm@hpl.hp.com>. 5 6This file is part of GNU CC. 7 8GNU CC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 2, or (at your option) 11any later version. 12 13GNU CC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GNU CC; see the file COPYING. If not, write to 20the Free Software Foundation, 59 Temple Place - Suite 330, 21Boston, MA 02111-1307, USA. */ 22 23#include "config.h" 24#include "system.h" 25#include "rtl.h" 26#include "tree.h" 27#include "regs.h" 28#include "hard-reg-set.h" 29#include "real.h" 30#include "insn-config.h" 31#include "conditions.h" 32#include "output.h" 33#include "insn-attr.h" 34#include "flags.h" 35#include "recog.h" 36#include "expr.h" 37#include "optabs.h" 38#include "except.h" 39#include "function.h" 40#include "ggc.h" 41#include "basic-block.h" 42#include "toplev.h" 43#include "sched-int.h" 44#include "timevar.h" 45#include "target.h" 46#include "target-def.h" 47#include "tm_p.h" 48#include "langhooks.h" 49 50/* This is used for communication between ASM_OUTPUT_LABEL and 51 ASM_OUTPUT_LABELREF. */ 52int ia64_asm_output_label = 0; 53 54/* Define the information needed to generate branch and scc insns. This is 55 stored from the compare operation. */ 56struct rtx_def * ia64_compare_op0; 57struct rtx_def * ia64_compare_op1; 58 59/* Register names for ia64_expand_prologue. */ 60static const char * const ia64_reg_numbers[96] = 61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 70 "r104","r105","r106","r107","r108","r109","r110","r111", 71 "r112","r113","r114","r115","r116","r117","r118","r119", 72 "r120","r121","r122","r123","r124","r125","r126","r127"}; 73 74/* ??? These strings could be shared with REGISTER_NAMES. */ 75static const char * const ia64_input_reg_names[8] = 76{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 77 78/* ??? These strings could be shared with REGISTER_NAMES. */ 79static const char * const ia64_local_reg_names[80] = 80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 90 91/* ??? These strings could be shared with REGISTER_NAMES. */ 92static const char * const ia64_output_reg_names[8] = 93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 94 95/* String used with the -mfixed-range= option. */ 96const char *ia64_fixed_range_string; 97 98/* Determines whether we use adds, addl, or movl to generate our 99 TLS immediate offsets. */ 100int ia64_tls_size = 22; 101 102/* String used with the -mtls-size= option. */ 103const char *ia64_tls_size_string; 104 105/* Determines whether we run our final scheduling pass or not. We always 106 avoid the normal second scheduling pass. */ 107static int ia64_flag_schedule_insns2; 108 109/* Variables which are this size or smaller are put in the sdata/sbss 110 sections. */ 111 112unsigned int ia64_section_threshold; 113 114/* Structure to be filled in by ia64_compute_frame_size with register 115 save masks and offsets for the current function. */ 116 117struct ia64_frame_info 118{ 119 HOST_WIDE_INT total_size; /* size of the stack frame, not including 120 the caller's scratch area. */ 121 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 122 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 123 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 124 HARD_REG_SET mask; /* mask of saved registers. */ 125 unsigned int gr_used_mask; /* mask of registers in use as gr spill 126 registers or long-term scratches. */ 127 int n_spilled; /* number of spilled registers. */ 128 int reg_fp; /* register for fp. */ 129 int reg_save_b0; /* save register for b0. */ 130 int reg_save_pr; /* save register for prs. */ 131 int reg_save_ar_pfs; /* save register for ar.pfs. */ 132 int reg_save_ar_unat; /* save register for ar.unat. */ 133 int reg_save_ar_lc; /* save register for ar.lc. */ 134 int reg_save_gp; /* save register for gp. */ 135 int n_input_regs; /* number of input registers used. */ 136 int n_local_regs; /* number of local registers used. */ 137 int n_output_regs; /* number of output registers used. */ 138 int n_rotate_regs; /* number of rotating registers used. */ 139 140 char need_regstk; /* true if a .regstk directive needed. */ 141 char initialized; /* true if the data is finalized. */ 142}; 143 144/* Current frame information calculated by ia64_compute_frame_size. */ 145static struct ia64_frame_info current_frame_info; 146 147static rtx gen_tls_get_addr PARAMS ((void)); 148static rtx gen_thread_pointer PARAMS ((void)); 149static int find_gr_spill PARAMS ((int)); 150static int next_scratch_gr_reg PARAMS ((void)); 151static void mark_reg_gr_used_mask PARAMS ((rtx, void *)); 152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT)); 153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT)); 154static void finish_spill_pointers PARAMS ((void)); 155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT)); 156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx)); 157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT)); 158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx)); 159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx)); 160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx)); 161 162static enum machine_mode hfa_element_mode PARAMS ((tree, int)); 163static void fix_range PARAMS ((const char *)); 164static struct machine_function * ia64_init_machine_status PARAMS ((void)); 165static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); 166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); 167static void emit_predicate_relation_info PARAMS ((void)); 168static bool ia64_in_small_data_p PARAMS ((tree)); 169static void ia64_encode_section_info PARAMS ((tree, int)); 170static const char *ia64_strip_name_encoding PARAMS ((const char *)); 171static void process_epilogue PARAMS ((void)); 172static int process_set PARAMS ((FILE *, rtx)); 173 174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, 175 tree, rtx)); 176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, 177 tree, rtx)); 178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, int, 179 tree, rtx)); 180static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, 181 tree, rtx)); 182static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); 183static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int)); 184static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT)); 185static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); 186static void ia64_output_function_end_prologue PARAMS ((FILE *)); 187 188static int ia64_issue_rate PARAMS ((void)); 189static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 190static void ia64_sched_init PARAMS ((FILE *, int, int)); 191static void ia64_sched_finish PARAMS ((FILE *, int)); 192static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, 193 int *, int, int)); 194static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 195static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); 196static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); 197 198static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, 199 HOST_WIDE_INT, tree)); 200 201static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx, 202 unsigned HOST_WIDE_INT)); 203static void ia64_rwreloc_select_section PARAMS ((tree, int, 204 unsigned HOST_WIDE_INT)) 205 ATTRIBUTE_UNUSED; 206static void ia64_rwreloc_unique_section PARAMS ((tree, int)) 207 ATTRIBUTE_UNUSED; 208static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx, 209 unsigned HOST_WIDE_INT)) 210 ATTRIBUTE_UNUSED; 211static unsigned int ia64_rwreloc_section_type_flags 212 PARAMS ((tree, const char *, int)) 213 ATTRIBUTE_UNUSED; 214 215static void ia64_hpux_add_extern_decl PARAMS ((const char *name)) 216 ATTRIBUTE_UNUSED; 217 218/* Table of valid machine attributes. */ 219static const struct attribute_spec ia64_attribute_table[] = 220{ 221 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 222 { "syscall_linkage", 0, 0, false, true, true, NULL }, 223 { NULL, 0, 0, false, false, false, NULL } 224}; 225 226/* Initialize the GCC target structure. */ 227#undef TARGET_ATTRIBUTE_TABLE 228#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 229 230#undef TARGET_INIT_BUILTINS 231#define TARGET_INIT_BUILTINS ia64_init_builtins 232 233#undef TARGET_EXPAND_BUILTIN 234#define TARGET_EXPAND_BUILTIN ia64_expand_builtin 235 236#undef TARGET_ASM_BYTE_OP 237#define TARGET_ASM_BYTE_OP "\tdata1\t" 238#undef TARGET_ASM_ALIGNED_HI_OP 239#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 240#undef TARGET_ASM_ALIGNED_SI_OP 241#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 242#undef TARGET_ASM_ALIGNED_DI_OP 243#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 244#undef TARGET_ASM_UNALIGNED_HI_OP 245#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 246#undef TARGET_ASM_UNALIGNED_SI_OP 247#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 248#undef TARGET_ASM_UNALIGNED_DI_OP 249#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 250#undef TARGET_ASM_INTEGER 251#define TARGET_ASM_INTEGER ia64_assemble_integer 252 253#undef TARGET_ASM_FUNCTION_PROLOGUE 254#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 255#undef TARGET_ASM_FUNCTION_END_PROLOGUE 256#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 257#undef TARGET_ASM_FUNCTION_EPILOGUE 258#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 259 260#undef TARGET_IN_SMALL_DATA_P 261#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 262#undef TARGET_ENCODE_SECTION_INFO 263#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 264#undef TARGET_STRIP_NAME_ENCODING 265#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding 266 267#undef TARGET_SCHED_ADJUST_COST 268#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost 269#undef TARGET_SCHED_ISSUE_RATE 270#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 271#undef TARGET_SCHED_VARIABLE_ISSUE 272#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 273#undef TARGET_SCHED_INIT 274#define TARGET_SCHED_INIT ia64_sched_init 275#undef TARGET_SCHED_FINISH 276#define TARGET_SCHED_FINISH ia64_sched_finish 277#undef TARGET_SCHED_REORDER 278#define TARGET_SCHED_REORDER ia64_sched_reorder 279#undef TARGET_SCHED_REORDER2 280#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 281 282#ifdef HAVE_AS_TLS 283#undef TARGET_HAVE_TLS 284#define TARGET_HAVE_TLS true 285#endif 286 287#undef TARGET_ASM_OUTPUT_MI_THUNK 288#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 289#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 290#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true 291 292struct gcc_target targetm = TARGET_INITIALIZER; 293 294/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ 295 296int 297call_operand (op, mode) 298 rtx op; 299 enum machine_mode mode; 300{ 301 if (mode != GET_MODE (op) && mode != VOIDmode) 302 return 0; 303 304 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG 305 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); 306} 307 308/* Return 1 if OP refers to a symbol in the sdata section. */ 309 310int 311sdata_symbolic_operand (op, mode) 312 rtx op; 313 enum machine_mode mode ATTRIBUTE_UNUSED; 314{ 315 switch (GET_CODE (op)) 316 { 317 case CONST: 318 if (GET_CODE (XEXP (op, 0)) != PLUS 319 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF) 320 break; 321 op = XEXP (XEXP (op, 0), 0); 322 /* FALLTHRU */ 323 324 case SYMBOL_REF: 325 if (CONSTANT_POOL_ADDRESS_P (op)) 326 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold; 327 else 328 { 329 const char *str = XSTR (op, 0); 330 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's'); 331 } 332 333 default: 334 break; 335 } 336 337 return 0; 338} 339 340/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */ 341 342int 343got_symbolic_operand (op, mode) 344 rtx op; 345 enum machine_mode mode ATTRIBUTE_UNUSED; 346{ 347 switch (GET_CODE (op)) 348 { 349 case CONST: 350 op = XEXP (op, 0); 351 if (GET_CODE (op) != PLUS) 352 return 0; 353 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF) 354 return 0; 355 op = XEXP (op, 1); 356 if (GET_CODE (op) != CONST_INT) 357 return 0; 358 359 return 1; 360 361 /* Ok if we're not using GOT entries at all. */ 362 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 363 return 1; 364 365 /* "Ok" while emitting rtl, since otherwise we won't be provided 366 with the entire offset during emission, which makes it very 367 hard to split the offset into high and low parts. */ 368 if (rtx_equal_function_value_matters) 369 return 1; 370 371 /* Force the low 14 bits of the constant to zero so that we do not 372 use up so many GOT entries. */ 373 return (INTVAL (op) & 0x3fff) == 0; 374 375 case SYMBOL_REF: 376 case LABEL_REF: 377 return 1; 378 379 default: 380 break; 381 } 382 return 0; 383} 384 385/* Return 1 if OP refers to a symbol. */ 386 387int 388symbolic_operand (op, mode) 389 rtx op; 390 enum machine_mode mode ATTRIBUTE_UNUSED; 391{ 392 switch (GET_CODE (op)) 393 { 394 case CONST: 395 case SYMBOL_REF: 396 case LABEL_REF: 397 return 1; 398 399 default: 400 break; 401 } 402 return 0; 403} 404 405/* Return tls_model if OP refers to a TLS symbol. */ 406 407int 408tls_symbolic_operand (op, mode) 409 rtx op; 410 enum machine_mode mode ATTRIBUTE_UNUSED; 411{ 412 const char *str; 413 414 if (GET_CODE (op) != SYMBOL_REF) 415 return 0; 416 str = XSTR (op, 0); 417 if (str[0] != ENCODE_SECTION_INFO_CHAR) 418 return 0; 419 switch (str[1]) 420 { 421 case 'G': 422 return TLS_MODEL_GLOBAL_DYNAMIC; 423 case 'L': 424 return TLS_MODEL_LOCAL_DYNAMIC; 425 case 'i': 426 return TLS_MODEL_INITIAL_EXEC; 427 case 'l': 428 return TLS_MODEL_LOCAL_EXEC; 429 } 430 return 0; 431} 432 433 434/* Return 1 if OP refers to a function. */ 435 436int 437function_operand (op, mode) 438 rtx op; 439 enum machine_mode mode ATTRIBUTE_UNUSED; 440{ 441 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) 442 return 1; 443 else 444 return 0; 445} 446 447/* Return 1 if OP is setjmp or a similar function. */ 448 449/* ??? This is an unsatisfying solution. Should rethink. */ 450 451int 452setjmp_operand (op, mode) 453 rtx op; 454 enum machine_mode mode ATTRIBUTE_UNUSED; 455{ 456 const char *name; 457 int retval = 0; 458 459 if (GET_CODE (op) != SYMBOL_REF) 460 return 0; 461 462 name = XSTR (op, 0); 463 464 /* The following code is borrowed from special_function_p in calls.c. */ 465 466 /* Disregard prefix _, __ or __x. */ 467 if (name[0] == '_') 468 { 469 if (name[1] == '_' && name[2] == 'x') 470 name += 3; 471 else if (name[1] == '_') 472 name += 2; 473 else 474 name += 1; 475 } 476 477 if (name[0] == 's') 478 { 479 retval 480 = ((name[1] == 'e' 481 && (! strcmp (name, "setjmp") 482 || ! strcmp (name, "setjmp_syscall"))) 483 || (name[1] == 'i' 484 && ! strcmp (name, "sigsetjmp")) 485 || (name[1] == 'a' 486 && ! strcmp (name, "savectx"))); 487 } 488 else if ((name[0] == 'q' && name[1] == 's' 489 && ! strcmp (name, "qsetjmp")) 490 || (name[0] == 'v' && name[1] == 'f' 491 && ! strcmp (name, "vfork"))) 492 retval = 1; 493 494 return retval; 495} 496 497/* Return 1 if OP is a general operand, but when pic exclude symbolic 498 operands. */ 499 500/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF 501 from PREDICATE_CODES. */ 502 503int 504move_operand (op, mode) 505 rtx op; 506 enum machine_mode mode; 507{ 508 if (! TARGET_NO_PIC && symbolic_operand (op, mode)) 509 return 0; 510 511 return general_operand (op, mode); 512} 513 514/* Return 1 if OP is a register operand that is (or could be) a GR reg. */ 515 516int 517gr_register_operand (op, mode) 518 rtx op; 519 enum machine_mode mode; 520{ 521 if (! register_operand (op, mode)) 522 return 0; 523 if (GET_CODE (op) == SUBREG) 524 op = SUBREG_REG (op); 525 if (GET_CODE (op) == REG) 526 { 527 unsigned int regno = REGNO (op); 528 if (regno < FIRST_PSEUDO_REGISTER) 529 return GENERAL_REGNO_P (regno); 530 } 531 return 1; 532} 533 534/* Return 1 if OP is a register operand that is (or could be) an FR reg. */ 535 536int 537fr_register_operand (op, mode) 538 rtx op; 539 enum machine_mode mode; 540{ 541 if (! register_operand (op, mode)) 542 return 0; 543 if (GET_CODE (op) == SUBREG) 544 op = SUBREG_REG (op); 545 if (GET_CODE (op) == REG) 546 { 547 unsigned int regno = REGNO (op); 548 if (regno < FIRST_PSEUDO_REGISTER) 549 return FR_REGNO_P (regno); 550 } 551 return 1; 552} 553 554/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */ 555 556int 557grfr_register_operand (op, mode) 558 rtx op; 559 enum machine_mode mode; 560{ 561 if (! register_operand (op, mode)) 562 return 0; 563 if (GET_CODE (op) == SUBREG) 564 op = SUBREG_REG (op); 565 if (GET_CODE (op) == REG) 566 { 567 unsigned int regno = REGNO (op); 568 if (regno < FIRST_PSEUDO_REGISTER) 569 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 570 } 571 return 1; 572} 573 574/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */ 575 576int 577gr_nonimmediate_operand (op, mode) 578 rtx op; 579 enum machine_mode mode; 580{ 581 if (! nonimmediate_operand (op, mode)) 582 return 0; 583 if (GET_CODE (op) == SUBREG) 584 op = SUBREG_REG (op); 585 if (GET_CODE (op) == REG) 586 { 587 unsigned int regno = REGNO (op); 588 if (regno < FIRST_PSEUDO_REGISTER) 589 return GENERAL_REGNO_P (regno); 590 } 591 return 1; 592} 593 594/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */ 595 596int 597fr_nonimmediate_operand (op, mode) 598 rtx op; 599 enum machine_mode mode; 600{ 601 if (! nonimmediate_operand (op, mode)) 602 return 0; 603 if (GET_CODE (op) == SUBREG) 604 op = SUBREG_REG (op); 605 if (GET_CODE (op) == REG) 606 { 607 unsigned int regno = REGNO (op); 608 if (regno < FIRST_PSEUDO_REGISTER) 609 return FR_REGNO_P (regno); 610 } 611 return 1; 612} 613 614/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */ 615 616int 617grfr_nonimmediate_operand (op, mode) 618 rtx op; 619 enum machine_mode mode; 620{ 621 if (! nonimmediate_operand (op, mode)) 622 return 0; 623 if (GET_CODE (op) == SUBREG) 624 op = SUBREG_REG (op); 625 if (GET_CODE (op) == REG) 626 { 627 unsigned int regno = REGNO (op); 628 if (regno < FIRST_PSEUDO_REGISTER) 629 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 630 } 631 return 1; 632} 633 634/* Return 1 if OP is a GR register operand, or zero. */ 635 636int 637gr_reg_or_0_operand (op, mode) 638 rtx op; 639 enum machine_mode mode; 640{ 641 return (op == const0_rtx || gr_register_operand (op, mode)); 642} 643 644/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */ 645 646int 647gr_reg_or_5bit_operand (op, mode) 648 rtx op; 649 enum machine_mode mode; 650{ 651 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32) 652 || GET_CODE (op) == CONSTANT_P_RTX 653 || gr_register_operand (op, mode)); 654} 655 656/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */ 657 658int 659gr_reg_or_6bit_operand (op, mode) 660 rtx op; 661 enum machine_mode mode; 662{ 663 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 664 || GET_CODE (op) == CONSTANT_P_RTX 665 || gr_register_operand (op, mode)); 666} 667 668/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */ 669 670int 671gr_reg_or_8bit_operand (op, mode) 672 rtx op; 673 enum machine_mode mode; 674{ 675 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 676 || GET_CODE (op) == CONSTANT_P_RTX 677 || gr_register_operand (op, mode)); 678} 679 680/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */ 681 682int 683grfr_reg_or_8bit_operand (op, mode) 684 rtx op; 685 enum machine_mode mode; 686{ 687 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 688 || GET_CODE (op) == CONSTANT_P_RTX 689 || grfr_register_operand (op, mode)); 690} 691 692/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate 693 operand. */ 694 695int 696gr_reg_or_8bit_adjusted_operand (op, mode) 697 rtx op; 698 enum machine_mode mode; 699{ 700 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) 701 || GET_CODE (op) == CONSTANT_P_RTX 702 || gr_register_operand (op, mode)); 703} 704 705/* Return 1 if OP is a register operand, or is valid for both an 8 bit 706 immediate and an 8 bit adjusted immediate operand. This is necessary 707 because when we emit a compare, we don't know what the condition will be, 708 so we need the union of the immediates accepted by GT and LT. */ 709 710int 711gr_reg_or_8bit_and_adjusted_operand (op, mode) 712 rtx op; 713 enum machine_mode mode; 714{ 715 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) 716 && CONST_OK_FOR_L (INTVAL (op))) 717 || GET_CODE (op) == CONSTANT_P_RTX 718 || gr_register_operand (op, mode)); 719} 720 721/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ 722 723int 724gr_reg_or_14bit_operand (op, mode) 725 rtx op; 726 enum machine_mode mode; 727{ 728 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) 729 || GET_CODE (op) == CONSTANT_P_RTX 730 || gr_register_operand (op, mode)); 731} 732 733/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ 734 735int 736gr_reg_or_22bit_operand (op, mode) 737 rtx op; 738 enum machine_mode mode; 739{ 740 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) 741 || GET_CODE (op) == CONSTANT_P_RTX 742 || gr_register_operand (op, mode)); 743} 744 745/* Return 1 if OP is a 6 bit immediate operand. */ 746 747int 748shift_count_operand (op, mode) 749 rtx op; 750 enum machine_mode mode ATTRIBUTE_UNUSED; 751{ 752 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 753 || GET_CODE (op) == CONSTANT_P_RTX); 754} 755 756/* Return 1 if OP is a 5 bit immediate operand. */ 757 758int 759shift_32bit_count_operand (op, mode) 760 rtx op; 761 enum machine_mode mode ATTRIBUTE_UNUSED; 762{ 763 return ((GET_CODE (op) == CONST_INT 764 && (INTVAL (op) >= 0 && INTVAL (op) < 32)) 765 || GET_CODE (op) == CONSTANT_P_RTX); 766} 767 768/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ 769 770int 771shladd_operand (op, mode) 772 rtx op; 773 enum machine_mode mode ATTRIBUTE_UNUSED; 774{ 775 return (GET_CODE (op) == CONST_INT 776 && (INTVAL (op) == 2 || INTVAL (op) == 4 777 || INTVAL (op) == 8 || INTVAL (op) == 16)); 778} 779 780/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ 781 782int 783fetchadd_operand (op, mode) 784 rtx op; 785 enum machine_mode mode ATTRIBUTE_UNUSED; 786{ 787 return (GET_CODE (op) == CONST_INT 788 && (INTVAL (op) == -16 || INTVAL (op) == -8 || 789 INTVAL (op) == -4 || INTVAL (op) == -1 || 790 INTVAL (op) == 1 || INTVAL (op) == 4 || 791 INTVAL (op) == 8 || INTVAL (op) == 16)); 792} 793 794/* Return 1 if OP is a floating-point constant zero, one, or a register. */ 795 796int 797fr_reg_or_fp01_operand (op, mode) 798 rtx op; 799 enum machine_mode mode; 800{ 801 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) 802 || fr_register_operand (op, mode)); 803} 804 805/* Like nonimmediate_operand, but don't allow MEMs that try to use a 806 POST_MODIFY with a REG as displacement. */ 807 808int 809destination_operand (op, mode) 810 rtx op; 811 enum machine_mode mode; 812{ 813 if (! nonimmediate_operand (op, mode)) 814 return 0; 815 if (GET_CODE (op) == MEM 816 && GET_CODE (XEXP (op, 0)) == POST_MODIFY 817 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG) 818 return 0; 819 return 1; 820} 821 822/* Like memory_operand, but don't allow post-increments. */ 823 824int 825not_postinc_memory_operand (op, mode) 826 rtx op; 827 enum machine_mode mode; 828{ 829 return (memory_operand (op, mode) 830 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a'); 831} 832 833/* Return 1 if this is a comparison operator, which accepts an normal 8-bit 834 signed immediate operand. */ 835 836int 837normal_comparison_operator (op, mode) 838 register rtx op; 839 enum machine_mode mode; 840{ 841 enum rtx_code code = GET_CODE (op); 842 return ((mode == VOIDmode || GET_MODE (op) == mode) 843 && (code == EQ || code == NE 844 || code == GT || code == LE || code == GTU || code == LEU)); 845} 846 847/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit 848 signed immediate operand. */ 849 850int 851adjusted_comparison_operator (op, mode) 852 register rtx op; 853 enum machine_mode mode; 854{ 855 enum rtx_code code = GET_CODE (op); 856 return ((mode == VOIDmode || GET_MODE (op) == mode) 857 && (code == LT || code == GE || code == LTU || code == GEU)); 858} 859 860/* Return 1 if this is a signed inequality operator. */ 861 862int 863signed_inequality_operator (op, mode) 864 register rtx op; 865 enum machine_mode mode; 866{ 867 enum rtx_code code = GET_CODE (op); 868 return ((mode == VOIDmode || GET_MODE (op) == mode) 869 && (code == GE || code == GT 870 || code == LE || code == LT)); 871} 872 873/* Return 1 if this operator is valid for predication. */ 874 875int 876predicate_operator (op, mode) 877 register rtx op; 878 enum machine_mode mode; 879{ 880 enum rtx_code code = GET_CODE (op); 881 return ((GET_MODE (op) == mode || mode == VOIDmode) 882 && (code == EQ || code == NE)); 883} 884 885/* Return 1 if this operator can be used in a conditional operation. */ 886 887int 888condop_operator (op, mode) 889 register rtx op; 890 enum machine_mode mode; 891{ 892 enum rtx_code code = GET_CODE (op); 893 return ((GET_MODE (op) == mode || mode == VOIDmode) 894 && (code == PLUS || code == MINUS || code == AND 895 || code == IOR || code == XOR)); 896} 897 898/* Return 1 if this is the ar.lc register. */ 899 900int 901ar_lc_reg_operand (op, mode) 902 register rtx op; 903 enum machine_mode mode; 904{ 905 return (GET_MODE (op) == DImode 906 && (mode == DImode || mode == VOIDmode) 907 && GET_CODE (op) == REG 908 && REGNO (op) == AR_LC_REGNUM); 909} 910 911/* Return 1 if this is the ar.ccv register. */ 912 913int 914ar_ccv_reg_operand (op, mode) 915 register rtx op; 916 enum machine_mode mode; 917{ 918 return ((GET_MODE (op) == mode || mode == VOIDmode) 919 && GET_CODE (op) == REG 920 && REGNO (op) == AR_CCV_REGNUM); 921} 922 923/* Return 1 if this is the ar.pfs register. */ 924 925int 926ar_pfs_reg_operand (op, mode) 927 register rtx op; 928 enum machine_mode mode; 929{ 930 return ((GET_MODE (op) == mode || mode == VOIDmode) 931 && GET_CODE (op) == REG 932 && REGNO (op) == AR_PFS_REGNUM); 933} 934 935/* Like general_operand, but don't allow (mem (addressof)). */ 936 937int 938general_tfmode_operand (op, mode) 939 rtx op; 940 enum machine_mode mode; 941{ 942 if (! general_operand (op, mode)) 943 return 0; 944 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 945 return 0; 946 return 1; 947} 948 949/* Similarly. */ 950 951int 952destination_tfmode_operand (op, mode) 953 rtx op; 954 enum machine_mode mode; 955{ 956 if (! destination_operand (op, mode)) 957 return 0; 958 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 959 return 0; 960 return 1; 961} 962 963/* Similarly. */ 964 965int 966tfreg_or_fp01_operand (op, mode) 967 rtx op; 968 enum machine_mode mode; 969{ 970 if (GET_CODE (op) == SUBREG) 971 return 0; 972 return fr_reg_or_fp01_operand (op, mode); 973} 974 975/* Return 1 if OP is valid as a base register in a reg + offset address. */ 976 977int 978basereg_operand (op, mode) 979 rtx op; 980 enum machine_mode mode; 981{ 982 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected 983 checks from pa.c basereg_operand as well? Seems to be OK without them 984 in test runs. */ 985 986 return (register_operand (op, mode) && 987 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op)); 988} 989 990/* Return 1 if the operands of a move are ok. */ 991 992int 993ia64_move_ok (dst, src) 994 rtx dst, src; 995{ 996 /* If we're under init_recog_no_volatile, we'll not be able to use 997 memory_operand. So check the code directly and don't worry about 998 the validity of the underlying address, which should have been 999 checked elsewhere anyway. */ 1000 if (GET_CODE (dst) != MEM) 1001 return 1; 1002 if (GET_CODE (src) == MEM) 1003 return 0; 1004 if (register_operand (src, VOIDmode)) 1005 return 1; 1006 1007 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 1008 if (INTEGRAL_MODE_P (GET_MODE (dst))) 1009 return src == const0_rtx; 1010 else 1011 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); 1012} 1013 1014/* Return 0 if we are doing C++ code. This optimization fails with 1015 C++ because of GNAT c++/6685. */ 1016 1017int 1018addp4_optimize_ok (op1, op2) 1019 rtx op1, op2; 1020{ 1021 1022 if (!strcmp (lang_hooks.name, "GNU C++")) 1023 return 0; 1024 1025 return (basereg_operand (op1, GET_MODE(op1)) != 1026 basereg_operand (op2, GET_MODE(op2))); 1027} 1028 1029/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction. 1030 Return the length of the field, or <= 0 on failure. */ 1031 1032int 1033ia64_depz_field_mask (rop, rshift) 1034 rtx rop, rshift; 1035{ 1036 unsigned HOST_WIDE_INT op = INTVAL (rop); 1037 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 1038 1039 /* Get rid of the zero bits we're shifting in. */ 1040 op >>= shift; 1041 1042 /* We must now have a solid block of 1's at bit 0. */ 1043 return exact_log2 (op + 1); 1044} 1045 1046/* Expand a symbolic constant load. */ 1047/* ??? Should generalize this, so that we can also support 32 bit pointers. */ 1048 1049void 1050ia64_expand_load_address (dest, src, scratch) 1051 rtx dest, src, scratch; 1052{ 1053 rtx temp; 1054 1055 /* The destination could be a MEM during initial rtl generation, 1056 which isn't a valid destination for the PIC load address patterns. */ 1057 if (! register_operand (dest, DImode)) 1058 if (! scratch || ! register_operand (scratch, DImode)) 1059 temp = gen_reg_rtx (DImode); 1060 else 1061 temp = scratch; 1062 else 1063 temp = dest; 1064 1065 if (tls_symbolic_operand (src, Pmode)) 1066 abort (); 1067 1068 if (TARGET_AUTO_PIC) 1069 emit_insn (gen_load_gprel64 (temp, src)); 1070 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src)) 1071 emit_insn (gen_load_fptr (temp, src)); 1072 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode) 1073 && sdata_symbolic_operand (src, VOIDmode)) 1074 emit_insn (gen_load_gprel (temp, src)); 1075 else if (GET_CODE (src) == CONST 1076 && GET_CODE (XEXP (src, 0)) == PLUS 1077 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT 1078 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0) 1079 { 1080 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1081 rtx sym = XEXP (XEXP (src, 0), 0); 1082 HOST_WIDE_INT ofs, hi, lo; 1083 1084 /* Split the offset into a sign extended 14-bit low part 1085 and a complementary high part. */ 1086 ofs = INTVAL (XEXP (XEXP (src, 0), 1)); 1087 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000; 1088 hi = ofs - lo; 1089 1090 if (! scratch) 1091 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode); 1092 1093 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi), 1094 scratch)); 1095 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); 1096 } 1097 else 1098 { 1099 rtx insn; 1100 if (! scratch) 1101 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1102 1103 insn = emit_insn (gen_load_symptr (temp, src, scratch)); 1104#ifdef POINTERS_EXTEND_UNSIGNED 1105 if (GET_MODE (temp) != GET_MODE (src)) 1106 src = convert_memory_address (GET_MODE (temp), src); 1107#endif 1108 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn)); 1109 } 1110 1111 if (temp != dest) 1112 { 1113 if (GET_MODE (dest) != GET_MODE (temp)) 1114 temp = convert_to_mode (GET_MODE (dest), temp, 0); 1115 emit_move_insn (dest, temp); 1116 } 1117} 1118 1119static GTY(()) rtx gen_tls_tga; 1120static rtx 1121gen_tls_get_addr () 1122{ 1123 if (!gen_tls_tga) 1124 { 1125 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1126 } 1127 return gen_tls_tga; 1128} 1129 1130static GTY(()) rtx thread_pointer_rtx; 1131static rtx 1132gen_thread_pointer () 1133{ 1134 if (!thread_pointer_rtx) 1135 { 1136 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1137 RTX_UNCHANGING_P (thread_pointer_rtx) = 1; 1138 } 1139 return thread_pointer_rtx; 1140} 1141 1142rtx 1143ia64_expand_move (op0, op1) 1144 rtx op0, op1; 1145{ 1146 enum machine_mode mode = GET_MODE (op0); 1147 1148 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1149 op1 = force_reg (mode, op1); 1150 1151 if (mode == Pmode || mode == ptr_mode) 1152 { 1153 enum tls_model tls_kind; 1154 if ((tls_kind = tls_symbolic_operand (op1, Pmode))) 1155 { 1156 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; 1157 1158 switch (tls_kind) 1159 { 1160 case TLS_MODEL_GLOBAL_DYNAMIC: 1161 start_sequence (); 1162 1163 tga_op1 = gen_reg_rtx (Pmode); 1164 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1165 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1166 RTX_UNCHANGING_P (tga_op1) = 1; 1167 1168 tga_op2 = gen_reg_rtx (Pmode); 1169 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1)); 1170 tga_op2 = gen_rtx_MEM (Pmode, tga_op2); 1171 RTX_UNCHANGING_P (tga_op2) = 1; 1172 1173 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1174 LCT_CONST, Pmode, 2, tga_op1, 1175 Pmode, tga_op2, Pmode); 1176 1177 insns = get_insns (); 1178 end_sequence (); 1179 1180 emit_libcall_block (insns, op0, tga_ret, op1); 1181 return NULL_RTX; 1182 1183 case TLS_MODEL_LOCAL_DYNAMIC: 1184 /* ??? This isn't the completely proper way to do local-dynamic 1185 If the call to __tls_get_addr is used only by a single symbol, 1186 then we should (somehow) move the dtprel to the second arg 1187 to avoid the extra add. */ 1188 start_sequence (); 1189 1190 tga_op1 = gen_reg_rtx (Pmode); 1191 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1192 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1193 RTX_UNCHANGING_P (tga_op1) = 1; 1194 1195 tga_op2 = const0_rtx; 1196 1197 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1198 LCT_CONST, Pmode, 2, tga_op1, 1199 Pmode, tga_op2, Pmode); 1200 1201 insns = get_insns (); 1202 end_sequence (); 1203 1204 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1205 UNSPEC_LD_BASE); 1206 tmp = gen_reg_rtx (Pmode); 1207 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1208 1209 if (register_operand (op0, Pmode)) 1210 tga_ret = op0; 1211 else 1212 tga_ret = gen_reg_rtx (Pmode); 1213 if (TARGET_TLS64) 1214 { 1215 emit_insn (gen_load_dtprel (tga_ret, op1)); 1216 emit_insn (gen_adddi3 (tga_ret, tmp, tga_ret)); 1217 } 1218 else 1219 emit_insn (gen_add_dtprel (tga_ret, tmp, op1)); 1220 if (tga_ret == op0) 1221 return NULL_RTX; 1222 op1 = tga_ret; 1223 break; 1224 1225 case TLS_MODEL_INITIAL_EXEC: 1226 tmp = gen_reg_rtx (Pmode); 1227 emit_insn (gen_load_ltoff_tprel (tmp, op1)); 1228 tmp = gen_rtx_MEM (Pmode, tmp); 1229 RTX_UNCHANGING_P (tmp) = 1; 1230 tmp = force_reg (Pmode, tmp); 1231 1232 if (register_operand (op0, Pmode)) 1233 op1 = op0; 1234 else 1235 op1 = gen_reg_rtx (Pmode); 1236 emit_insn (gen_adddi3 (op1, tmp, gen_thread_pointer ())); 1237 if (op1 == op0) 1238 return NULL_RTX; 1239 break; 1240 1241 case TLS_MODEL_LOCAL_EXEC: 1242 if (register_operand (op0, Pmode)) 1243 tmp = op0; 1244 else 1245 tmp = gen_reg_rtx (Pmode); 1246 if (TARGET_TLS64) 1247 { 1248 emit_insn (gen_load_tprel (tmp, op1)); 1249 emit_insn (gen_adddi3 (tmp, gen_thread_pointer (), tmp)); 1250 } 1251 else 1252 emit_insn (gen_add_tprel (tmp, gen_thread_pointer (), op1)); 1253 if (tmp == op0) 1254 return NULL_RTX; 1255 op1 = tmp; 1256 break; 1257 1258 default: 1259 abort (); 1260 } 1261 } 1262 else if (!TARGET_NO_PIC && 1263 (symbolic_operand (op1, Pmode) || 1264 symbolic_operand (op1, ptr_mode))) 1265 { 1266 /* Before optimization starts, delay committing to any particular 1267 type of PIC address load. If this function gets deferred, we 1268 may acquire information that changes the value of the 1269 sdata_symbolic_operand predicate. 1270 1271 But don't delay for function pointers. Loading a function address 1272 actually loads the address of the descriptor not the function. 1273 If we represent these as SYMBOL_REFs, then they get cse'd with 1274 calls, and we end up with calls to the descriptor address instead 1275 of calls to the function address. Functions are not candidates 1276 for sdata anyways. 1277 1278 Don't delay for LABEL_REF because the splitter loses REG_LABEL 1279 notes. Don't delay for pool addresses on general principals; 1280 they'll never become non-local behind our back. */ 1281 1282 if (rtx_equal_function_value_matters 1283 && GET_CODE (op1) != LABEL_REF 1284 && ! (GET_CODE (op1) == SYMBOL_REF 1285 && (SYMBOL_REF_FLAG (op1) 1286 || CONSTANT_POOL_ADDRESS_P (op1) 1287 || STRING_POOL_ADDRESS_P (op1)))) 1288 if (GET_MODE (op1) == DImode) 1289 emit_insn (gen_movdi_symbolic (op0, op1)); 1290 else 1291 emit_insn (gen_movsi_symbolic (op0, op1)); 1292 else 1293 ia64_expand_load_address (op0, op1, NULL_RTX); 1294 return NULL_RTX; 1295 } 1296 } 1297 1298 return op1; 1299} 1300 1301/* Split a post-reload TImode reference into two DImode components. */ 1302 1303rtx 1304ia64_split_timode (out, in, scratch) 1305 rtx out[2]; 1306 rtx in, scratch; 1307{ 1308 switch (GET_CODE (in)) 1309 { 1310 case REG: 1311 out[0] = gen_rtx_REG (DImode, REGNO (in)); 1312 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1); 1313 return NULL_RTX; 1314 1315 case MEM: 1316 { 1317 rtx base = XEXP (in, 0); 1318 1319 switch (GET_CODE (base)) 1320 { 1321 case REG: 1322 out[0] = adjust_address (in, DImode, 0); 1323 break; 1324 case POST_MODIFY: 1325 base = XEXP (base, 0); 1326 out[0] = adjust_address (in, DImode, 0); 1327 break; 1328 1329 /* Since we're changing the mode, we need to change to POST_MODIFY 1330 as well to preserve the size of the increment. Either that or 1331 do the update in two steps, but we've already got this scratch 1332 register handy so let's use it. */ 1333 case POST_INC: 1334 base = XEXP (base, 0); 1335 out[0] 1336 = change_address (in, DImode, 1337 gen_rtx_POST_MODIFY 1338 (Pmode, base, plus_constant (base, 16))); 1339 break; 1340 case POST_DEC: 1341 base = XEXP (base, 0); 1342 out[0] 1343 = change_address (in, DImode, 1344 gen_rtx_POST_MODIFY 1345 (Pmode, base, plus_constant (base, -16))); 1346 break; 1347 default: 1348 abort (); 1349 } 1350 1351 if (scratch == NULL_RTX) 1352 abort (); 1353 out[1] = change_address (in, DImode, scratch); 1354 return gen_adddi3 (scratch, base, GEN_INT (8)); 1355 } 1356 1357 case CONST_INT: 1358 case CONST_DOUBLE: 1359 split_double (in, &out[0], &out[1]); 1360 return NULL_RTX; 1361 1362 default: 1363 abort (); 1364 } 1365} 1366 1367/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go 1368 through memory plus an extra GR scratch register. Except that you can 1369 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1370 SECONDARY_RELOAD_CLASS, but not both. 1371 1372 We got into problems in the first place by allowing a construct like 1373 (subreg:TF (reg:TI)), which we got from a union containing a long double. 1374 This solution attempts to prevent this situation from occurring. When 1375 we see something like the above, we spill the inner register to memory. */ 1376 1377rtx 1378spill_tfmode_operand (in, force) 1379 rtx in; 1380 int force; 1381{ 1382 if (GET_CODE (in) == SUBREG 1383 && GET_MODE (SUBREG_REG (in)) == TImode 1384 && GET_CODE (SUBREG_REG (in)) == REG) 1385 { 1386 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true); 1387 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1388 } 1389 else if (force && GET_CODE (in) == REG) 1390 { 1391 rtx mem = gen_mem_addressof (in, NULL_TREE, true); 1392 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1393 } 1394 else if (GET_CODE (in) == MEM 1395 && GET_CODE (XEXP (in, 0)) == ADDRESSOF) 1396 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0))); 1397 else 1398 return in; 1399} 1400 1401/* Emit comparison instruction if necessary, returning the expression 1402 that holds the compare result in the proper mode. */ 1403 1404rtx 1405ia64_expand_compare (code, mode) 1406 enum rtx_code code; 1407 enum machine_mode mode; 1408{ 1409 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1; 1410 rtx cmp; 1411 1412 /* If we have a BImode input, then we already have a compare result, and 1413 do not need to emit another comparison. */ 1414 if (GET_MODE (op0) == BImode) 1415 { 1416 if ((code == NE || code == EQ) && op1 == const0_rtx) 1417 cmp = op0; 1418 else 1419 abort (); 1420 } 1421 else 1422 { 1423 cmp = gen_reg_rtx (BImode); 1424 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1425 gen_rtx_fmt_ee (code, BImode, op0, op1))); 1426 code = NE; 1427 } 1428 1429 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); 1430} 1431 1432/* Emit the appropriate sequence for a call. */ 1433void 1434ia64_expand_call (retval, addr, nextarg, sibcall_p) 1435 rtx retval; 1436 rtx addr; 1437 rtx nextarg ATTRIBUTE_UNUSED; 1438 int sibcall_p; 1439{ 1440 rtx insn, b0; 1441 1442 addr = XEXP (addr, 0); 1443 b0 = gen_rtx_REG (DImode, R_BR (0)); 1444 1445 /* ??? Should do this for functions known to bind local too. */ 1446 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 1447 { 1448 if (sibcall_p) 1449 insn = gen_sibcall_nogp (addr); 1450 else if (! retval) 1451 insn = gen_call_nogp (addr, b0); 1452 else 1453 insn = gen_call_value_nogp (retval, addr, b0); 1454 insn = emit_call_insn (insn); 1455 } 1456 else 1457 { 1458 if (sibcall_p) 1459 insn = gen_sibcall_gp (addr); 1460 else if (! retval) 1461 insn = gen_call_gp (addr, b0); 1462 else 1463 insn = gen_call_value_gp (retval, addr, b0); 1464 insn = emit_call_insn (insn); 1465 1466 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 1467 } 1468 1469 if (sibcall_p) 1470 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 1471} 1472void 1473ia64_reload_gp () 1474{ 1475 rtx tmp; 1476 1477 if (current_frame_info.reg_save_gp) 1478 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); 1479 else 1480 { 1481 HOST_WIDE_INT offset; 1482 1483 offset = (current_frame_info.spill_cfa_off 1484 + current_frame_info.spill_size); 1485 if (frame_pointer_needed) 1486 { 1487 tmp = hard_frame_pointer_rtx; 1488 offset = -offset; 1489 } 1490 else 1491 { 1492 tmp = stack_pointer_rtx; 1493 offset = current_frame_info.total_size - offset; 1494 } 1495 1496 if (CONST_OK_FOR_I (offset)) 1497 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1498 tmp, GEN_INT (offset))); 1499 else 1500 { 1501 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); 1502 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1503 pic_offset_table_rtx, tmp)); 1504 } 1505 1506 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 1507 } 1508 1509 emit_move_insn (pic_offset_table_rtx, tmp); 1510} 1511 1512void 1513ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b, 1514 noreturn_p, sibcall_p) 1515 rtx retval, addr, retaddr, scratch_r, scratch_b; 1516 int noreturn_p, sibcall_p; 1517{ 1518 rtx insn; 1519 bool is_desc = false; 1520 1521 /* If we find we're calling through a register, then we're actually 1522 calling through a descriptor, so load up the values. */ 1523 if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) 1524 { 1525 rtx tmp; 1526 bool addr_dead_p; 1527 1528 /* ??? We are currently constrained to *not* use peep2, because 1529 we can legitimiately change the global lifetime of the GP 1530 (in the form of killing where previously live). This is 1531 because a call through a descriptor doesn't use the previous 1532 value of the GP, while a direct call does, and we do not 1533 commit to either form until the split here. 1534 1535 That said, this means that we lack precise life info for 1536 whether ADDR is dead after this call. This is not terribly 1537 important, since we can fix things up essentially for free 1538 with the POST_DEC below, but it's nice to not use it when we 1539 can immediately tell it's not necessary. */ 1540 addr_dead_p = ((noreturn_p || sibcall_p 1541 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 1542 REGNO (addr))) 1543 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 1544 1545 /* Load the code address into scratch_b. */ 1546 tmp = gen_rtx_POST_INC (Pmode, addr); 1547 tmp = gen_rtx_MEM (Pmode, tmp); 1548 emit_move_insn (scratch_r, tmp); 1549 emit_move_insn (scratch_b, scratch_r); 1550 1551 /* Load the GP address. If ADDR is not dead here, then we must 1552 revert the change made above via the POST_INCREMENT. */ 1553 if (!addr_dead_p) 1554 tmp = gen_rtx_POST_DEC (Pmode, addr); 1555 else 1556 tmp = addr; 1557 tmp = gen_rtx_MEM (Pmode, tmp); 1558 emit_move_insn (pic_offset_table_rtx, tmp); 1559 1560 is_desc = true; 1561 addr = scratch_b; 1562 } 1563 1564 if (sibcall_p) 1565 insn = gen_sibcall_nogp (addr); 1566 else if (retval) 1567 insn = gen_call_value_nogp (retval, addr, retaddr); 1568 else 1569 insn = gen_call_nogp (addr, retaddr); 1570 emit_call_insn (insn); 1571 1572 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 1573 ia64_reload_gp (); 1574} 1575 1576/* Begin the assembly file. */ 1577 1578void 1579emit_safe_across_calls (f) 1580 FILE *f; 1581{ 1582 unsigned int rs, re; 1583 int out_state; 1584 1585 rs = 1; 1586 out_state = 0; 1587 while (1) 1588 { 1589 while (rs < 64 && call_used_regs[PR_REG (rs)]) 1590 rs++; 1591 if (rs >= 64) 1592 break; 1593 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 1594 continue; 1595 if (out_state == 0) 1596 { 1597 fputs ("\t.pred.safe_across_calls ", f); 1598 out_state = 1; 1599 } 1600 else 1601 fputc (',', f); 1602 if (re == rs + 1) 1603 fprintf (f, "p%u", rs); 1604 else 1605 fprintf (f, "p%u-p%u", rs, re - 1); 1606 rs = re + 1; 1607 } 1608 if (out_state) 1609 fputc ('\n', f); 1610} 1611 1612/* Helper function for ia64_compute_frame_size: find an appropriate general 1613 register to spill some special register to. SPECIAL_SPILL_MASK contains 1614 bits in GR0 to GR31 that have already been allocated by this routine. 1615 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 1616 1617static int 1618find_gr_spill (try_locals) 1619 int try_locals; 1620{ 1621 int regno; 1622 1623 /* If this is a leaf function, first try an otherwise unused 1624 call-clobbered register. */ 1625 if (current_function_is_leaf) 1626 { 1627 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1628 if (! regs_ever_live[regno] 1629 && call_used_regs[regno] 1630 && ! fixed_regs[regno] 1631 && ! global_regs[regno] 1632 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1633 { 1634 current_frame_info.gr_used_mask |= 1 << regno; 1635 return regno; 1636 } 1637 } 1638 1639 if (try_locals) 1640 { 1641 regno = current_frame_info.n_local_regs; 1642 /* If there is a frame pointer, then we can't use loc79, because 1643 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 1644 reg_name switching code in ia64_expand_prologue. */ 1645 if (regno < (80 - frame_pointer_needed)) 1646 { 1647 current_frame_info.n_local_regs = regno + 1; 1648 return LOC_REG (0) + regno; 1649 } 1650 } 1651 1652 /* Failed to find a general register to spill to. Must use stack. */ 1653 return 0; 1654} 1655 1656/* In order to make for nice schedules, we try to allocate every temporary 1657 to a different register. We must of course stay away from call-saved, 1658 fixed, and global registers. We must also stay away from registers 1659 allocated in current_frame_info.gr_used_mask, since those include regs 1660 used all through the prologue. 1661 1662 Any register allocated here must be used immediately. The idea is to 1663 aid scheduling, not to solve data flow problems. */ 1664 1665static int last_scratch_gr_reg; 1666 1667static int 1668next_scratch_gr_reg () 1669{ 1670 int i, regno; 1671 1672 for (i = 0; i < 32; ++i) 1673 { 1674 regno = (last_scratch_gr_reg + i + 1) & 31; 1675 if (call_used_regs[regno] 1676 && ! fixed_regs[regno] 1677 && ! global_regs[regno] 1678 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1679 { 1680 last_scratch_gr_reg = regno; 1681 return regno; 1682 } 1683 } 1684 1685 /* There must be _something_ available. */ 1686 abort (); 1687} 1688 1689/* Helper function for ia64_compute_frame_size, called through 1690 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 1691 1692static void 1693mark_reg_gr_used_mask (reg, data) 1694 rtx reg; 1695 void *data ATTRIBUTE_UNUSED; 1696{ 1697 unsigned int regno = REGNO (reg); 1698 if (regno < 32) 1699 { 1700 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg)); 1701 for (i = 0; i < n; ++i) 1702 current_frame_info.gr_used_mask |= 1 << (regno + i); 1703 } 1704} 1705 1706/* Returns the number of bytes offset between the frame pointer and the stack 1707 pointer for the current function. SIZE is the number of bytes of space 1708 needed for local variables. */ 1709 1710static void 1711ia64_compute_frame_size (size) 1712 HOST_WIDE_INT size; 1713{ 1714 HOST_WIDE_INT total_size; 1715 HOST_WIDE_INT spill_size = 0; 1716 HOST_WIDE_INT extra_spill_size = 0; 1717 HOST_WIDE_INT pretend_args_size; 1718 HARD_REG_SET mask; 1719 int n_spilled = 0; 1720 int spilled_gr_p = 0; 1721 int spilled_fr_p = 0; 1722 unsigned int regno; 1723 int i; 1724 1725 if (current_frame_info.initialized) 1726 return; 1727 1728 memset (¤t_frame_info, 0, sizeof current_frame_info); 1729 CLEAR_HARD_REG_SET (mask); 1730 1731 /* Don't allocate scratches to the return register. */ 1732 diddle_return_value (mark_reg_gr_used_mask, NULL); 1733 1734 /* Don't allocate scratches to the EH scratch registers. */ 1735 if (cfun->machine->ia64_eh_epilogue_sp) 1736 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 1737 if (cfun->machine->ia64_eh_epilogue_bsp) 1738 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 1739 1740 /* Find the size of the register stack frame. We have only 80 local 1741 registers, because we reserve 8 for the inputs and 8 for the 1742 outputs. */ 1743 1744 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 1745 since we'll be adjusting that down later. */ 1746 regno = LOC_REG (78) + ! frame_pointer_needed; 1747 for (; regno >= LOC_REG (0); regno--) 1748 if (regs_ever_live[regno]) 1749 break; 1750 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 1751 1752 /* For functions marked with the syscall_linkage attribute, we must mark 1753 all eight input registers as in use, so that locals aren't visible to 1754 the caller. */ 1755 1756 if (cfun->machine->n_varargs > 0 1757 || lookup_attribute ("syscall_linkage", 1758 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 1759 current_frame_info.n_input_regs = 8; 1760 else 1761 { 1762 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 1763 if (regs_ever_live[regno]) 1764 break; 1765 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 1766 } 1767 1768 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 1769 if (regs_ever_live[regno]) 1770 break; 1771 i = regno - OUT_REG (0) + 1; 1772 1773 /* When -p profiling, we need one output register for the mcount argument. 1774 Likwise for -a profiling for the bb_init_func argument. For -ax 1775 profiling, we need two output registers for the two bb_init_trace_func 1776 arguments. */ 1777 if (current_function_profile) 1778 i = MAX (i, 1); 1779 current_frame_info.n_output_regs = i; 1780 1781 /* ??? No rotating register support yet. */ 1782 current_frame_info.n_rotate_regs = 0; 1783 1784 /* Discover which registers need spilling, and how much room that 1785 will take. Begin with floating point and general registers, 1786 which will always wind up on the stack. */ 1787 1788 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 1789 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1790 { 1791 SET_HARD_REG_BIT (mask, regno); 1792 spill_size += 16; 1793 n_spilled += 1; 1794 spilled_fr_p = 1; 1795 } 1796 1797 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1798 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1799 { 1800 SET_HARD_REG_BIT (mask, regno); 1801 spill_size += 8; 1802 n_spilled += 1; 1803 spilled_gr_p = 1; 1804 } 1805 1806 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 1807 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1808 { 1809 SET_HARD_REG_BIT (mask, regno); 1810 spill_size += 8; 1811 n_spilled += 1; 1812 } 1813 1814 /* Now come all special registers that might get saved in other 1815 general registers. */ 1816 1817 if (frame_pointer_needed) 1818 { 1819 current_frame_info.reg_fp = find_gr_spill (1); 1820 /* If we did not get a register, then we take LOC79. This is guaranteed 1821 to be free, even if regs_ever_live is already set, because this is 1822 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 1823 as we don't count loc79 above. */ 1824 if (current_frame_info.reg_fp == 0) 1825 { 1826 current_frame_info.reg_fp = LOC_REG (79); 1827 current_frame_info.n_local_regs++; 1828 } 1829 } 1830 1831 if (! current_function_is_leaf) 1832 { 1833 /* Emit a save of BR0 if we call other functions. Do this even 1834 if this function doesn't return, as EH depends on this to be 1835 able to unwind the stack. */ 1836 SET_HARD_REG_BIT (mask, BR_REG (0)); 1837 1838 current_frame_info.reg_save_b0 = find_gr_spill (1); 1839 if (current_frame_info.reg_save_b0 == 0) 1840 { 1841 spill_size += 8; 1842 n_spilled += 1; 1843 } 1844 1845 /* Similarly for ar.pfs. */ 1846 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1847 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1848 if (current_frame_info.reg_save_ar_pfs == 0) 1849 { 1850 extra_spill_size += 8; 1851 n_spilled += 1; 1852 } 1853 1854 /* Similarly for gp. Note that if we're calling setjmp, the stacked 1855 registers are clobbered, so we fall back to the stack. */ 1856 current_frame_info.reg_save_gp 1857 = (current_function_calls_setjmp ? 0 : find_gr_spill (1)); 1858 if (current_frame_info.reg_save_gp == 0) 1859 { 1860 SET_HARD_REG_BIT (mask, GR_REG (1)); 1861 spill_size += 8; 1862 n_spilled += 1; 1863 } 1864 } 1865 else 1866 { 1867 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)]) 1868 { 1869 SET_HARD_REG_BIT (mask, BR_REG (0)); 1870 spill_size += 8; 1871 n_spilled += 1; 1872 } 1873 1874 if (regs_ever_live[AR_PFS_REGNUM]) 1875 { 1876 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1877 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1878 if (current_frame_info.reg_save_ar_pfs == 0) 1879 { 1880 extra_spill_size += 8; 1881 n_spilled += 1; 1882 } 1883 } 1884 } 1885 1886 /* Unwind descriptor hackery: things are most efficient if we allocate 1887 consecutive GR save registers for RP, PFS, FP in that order. However, 1888 it is absolutely critical that FP get the only hard register that's 1889 guaranteed to be free, so we allocated it first. If all three did 1890 happen to be allocated hard regs, and are consecutive, rearrange them 1891 into the preferred order now. */ 1892 if (current_frame_info.reg_fp != 0 1893 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1 1894 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2) 1895 { 1896 current_frame_info.reg_save_b0 = current_frame_info.reg_fp; 1897 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1; 1898 current_frame_info.reg_fp = current_frame_info.reg_fp + 2; 1899 } 1900 1901 /* See if we need to store the predicate register block. */ 1902 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1903 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1904 break; 1905 if (regno <= PR_REG (63)) 1906 { 1907 SET_HARD_REG_BIT (mask, PR_REG (0)); 1908 current_frame_info.reg_save_pr = find_gr_spill (1); 1909 if (current_frame_info.reg_save_pr == 0) 1910 { 1911 extra_spill_size += 8; 1912 n_spilled += 1; 1913 } 1914 1915 /* ??? Mark them all as used so that register renaming and such 1916 are free to use them. */ 1917 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1918 regs_ever_live[regno] = 1; 1919 } 1920 1921 /* If we're forced to use st8.spill, we're forced to save and restore 1922 ar.unat as well. The check for existing liveness allows inline asm 1923 to touch ar.unat. */ 1924 if (spilled_gr_p || cfun->machine->n_varargs 1925 || regs_ever_live[AR_UNAT_REGNUM]) 1926 { 1927 regs_ever_live[AR_UNAT_REGNUM] = 1; 1928 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 1929 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0); 1930 if (current_frame_info.reg_save_ar_unat == 0) 1931 { 1932 extra_spill_size += 8; 1933 n_spilled += 1; 1934 } 1935 } 1936 1937 if (regs_ever_live[AR_LC_REGNUM]) 1938 { 1939 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 1940 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0); 1941 if (current_frame_info.reg_save_ar_lc == 0) 1942 { 1943 extra_spill_size += 8; 1944 n_spilled += 1; 1945 } 1946 } 1947 1948 /* If we have an odd number of words of pretend arguments written to 1949 the stack, then the FR save area will be unaligned. We round the 1950 size of this area up to keep things 16 byte aligned. */ 1951 if (spilled_fr_p) 1952 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size); 1953 else 1954 pretend_args_size = current_function_pretend_args_size; 1955 1956 total_size = (spill_size + extra_spill_size + size + pretend_args_size 1957 + current_function_outgoing_args_size); 1958 total_size = IA64_STACK_ALIGN (total_size); 1959 1960 /* We always use the 16-byte scratch area provided by the caller, but 1961 if we are a leaf function, there's no one to which we need to provide 1962 a scratch area. */ 1963 if (current_function_is_leaf) 1964 total_size = MAX (0, total_size - 16); 1965 1966 current_frame_info.total_size = total_size; 1967 current_frame_info.spill_cfa_off = pretend_args_size - 16; 1968 current_frame_info.spill_size = spill_size; 1969 current_frame_info.extra_spill_size = extra_spill_size; 1970 COPY_HARD_REG_SET (current_frame_info.mask, mask); 1971 current_frame_info.n_spilled = n_spilled; 1972 current_frame_info.initialized = reload_completed; 1973} 1974 1975/* Compute the initial difference between the specified pair of registers. */ 1976 1977HOST_WIDE_INT 1978ia64_initial_elimination_offset (from, to) 1979 int from, to; 1980{ 1981 HOST_WIDE_INT offset; 1982 1983 ia64_compute_frame_size (get_frame_size ()); 1984 switch (from) 1985 { 1986 case FRAME_POINTER_REGNUM: 1987 if (to == HARD_FRAME_POINTER_REGNUM) 1988 { 1989 if (current_function_is_leaf) 1990 offset = -current_frame_info.total_size; 1991 else 1992 offset = -(current_frame_info.total_size 1993 - current_function_outgoing_args_size - 16); 1994 } 1995 else if (to == STACK_POINTER_REGNUM) 1996 { 1997 if (current_function_is_leaf) 1998 offset = 0; 1999 else 2000 offset = 16 + current_function_outgoing_args_size; 2001 } 2002 else 2003 abort (); 2004 break; 2005 2006 case ARG_POINTER_REGNUM: 2007 /* Arguments start above the 16 byte save area, unless stdarg 2008 in which case we store through the 16 byte save area. */ 2009 if (to == HARD_FRAME_POINTER_REGNUM) 2010 offset = 16 - current_function_pretend_args_size; 2011 else if (to == STACK_POINTER_REGNUM) 2012 offset = (current_frame_info.total_size 2013 + 16 - current_function_pretend_args_size); 2014 else 2015 abort (); 2016 break; 2017 2018 case RETURN_ADDRESS_POINTER_REGNUM: 2019 offset = 0; 2020 break; 2021 2022 default: 2023 abort (); 2024 } 2025 2026 return offset; 2027} 2028 2029/* If there are more than a trivial number of register spills, we use 2030 two interleaved iterators so that we can get two memory references 2031 per insn group. 2032 2033 In order to simplify things in the prologue and epilogue expanders, 2034 we use helper functions to fix up the memory references after the 2035 fact with the appropriate offsets to a POST_MODIFY memory mode. 2036 The following data structure tracks the state of the two iterators 2037 while insns are being emitted. */ 2038 2039struct spill_fill_data 2040{ 2041 rtx init_after; /* point at which to emit initializations */ 2042 rtx init_reg[2]; /* initial base register */ 2043 rtx iter_reg[2]; /* the iterator registers */ 2044 rtx *prev_addr[2]; /* address of last memory use */ 2045 rtx prev_insn[2]; /* the insn corresponding to prev_addr */ 2046 HOST_WIDE_INT prev_off[2]; /* last offset */ 2047 int n_iter; /* number of iterators in use */ 2048 int next_iter; /* next iterator to use */ 2049 unsigned int save_gr_used_mask; 2050}; 2051 2052static struct spill_fill_data spill_fill_data; 2053 2054static void 2055setup_spill_pointers (n_spills, init_reg, cfa_off) 2056 int n_spills; 2057 rtx init_reg; 2058 HOST_WIDE_INT cfa_off; 2059{ 2060 int i; 2061 2062 spill_fill_data.init_after = get_last_insn (); 2063 spill_fill_data.init_reg[0] = init_reg; 2064 spill_fill_data.init_reg[1] = init_reg; 2065 spill_fill_data.prev_addr[0] = NULL; 2066 spill_fill_data.prev_addr[1] = NULL; 2067 spill_fill_data.prev_insn[0] = NULL; 2068 spill_fill_data.prev_insn[1] = NULL; 2069 spill_fill_data.prev_off[0] = cfa_off; 2070 spill_fill_data.prev_off[1] = cfa_off; 2071 spill_fill_data.next_iter = 0; 2072 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 2073 2074 spill_fill_data.n_iter = 1 + (n_spills > 2); 2075 for (i = 0; i < spill_fill_data.n_iter; ++i) 2076 { 2077 int regno = next_scratch_gr_reg (); 2078 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 2079 current_frame_info.gr_used_mask |= 1 << regno; 2080 } 2081} 2082 2083static void 2084finish_spill_pointers () 2085{ 2086 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 2087} 2088 2089static rtx 2090spill_restore_mem (reg, cfa_off) 2091 rtx reg; 2092 HOST_WIDE_INT cfa_off; 2093{ 2094 int iter = spill_fill_data.next_iter; 2095 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 2096 rtx disp_rtx = GEN_INT (disp); 2097 rtx mem; 2098 2099 if (spill_fill_data.prev_addr[iter]) 2100 { 2101 if (CONST_OK_FOR_N (disp)) 2102 { 2103 *spill_fill_data.prev_addr[iter] 2104 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 2105 gen_rtx_PLUS (DImode, 2106 spill_fill_data.iter_reg[iter], 2107 disp_rtx)); 2108 REG_NOTES (spill_fill_data.prev_insn[iter]) 2109 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter], 2110 REG_NOTES (spill_fill_data.prev_insn[iter])); 2111 } 2112 else 2113 { 2114 /* ??? Could use register post_modify for loads. */ 2115 if (! CONST_OK_FOR_I (disp)) 2116 { 2117 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2118 emit_move_insn (tmp, disp_rtx); 2119 disp_rtx = tmp; 2120 } 2121 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2122 spill_fill_data.iter_reg[iter], disp_rtx)); 2123 } 2124 } 2125 /* Micro-optimization: if we've created a frame pointer, it's at 2126 CFA 0, which may allow the real iterator to be initialized lower, 2127 slightly increasing parallelism. Also, if there are few saves 2128 it may eliminate the iterator entirely. */ 2129 else if (disp == 0 2130 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 2131 && frame_pointer_needed) 2132 { 2133 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 2134 set_mem_alias_set (mem, get_varargs_alias_set ()); 2135 return mem; 2136 } 2137 else 2138 { 2139 rtx seq, insn; 2140 2141 if (disp == 0) 2142 seq = gen_movdi (spill_fill_data.iter_reg[iter], 2143 spill_fill_data.init_reg[iter]); 2144 else 2145 { 2146 start_sequence (); 2147 2148 if (! CONST_OK_FOR_I (disp)) 2149 { 2150 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2151 emit_move_insn (tmp, disp_rtx); 2152 disp_rtx = tmp; 2153 } 2154 2155 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2156 spill_fill_data.init_reg[iter], 2157 disp_rtx)); 2158 2159 seq = get_insns (); 2160 end_sequence (); 2161 } 2162 2163 /* Careful for being the first insn in a sequence. */ 2164 if (spill_fill_data.init_after) 2165 insn = emit_insn_after (seq, spill_fill_data.init_after); 2166 else 2167 { 2168 rtx first = get_insns (); 2169 if (first) 2170 insn = emit_insn_before (seq, first); 2171 else 2172 insn = emit_insn (seq); 2173 } 2174 spill_fill_data.init_after = insn; 2175 2176 /* If DISP is 0, we may or may not have a further adjustment 2177 afterward. If we do, then the load/store insn may be modified 2178 to be a post-modify. If we don't, then this copy may be 2179 eliminated by copyprop_hardreg_forward, which makes this 2180 insn garbage, which runs afoul of the sanity check in 2181 propagate_one_insn. So mark this insn as legal to delete. */ 2182 if (disp == 0) 2183 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, 2184 REG_NOTES (insn)); 2185 } 2186 2187 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 2188 2189 /* ??? Not all of the spills are for varargs, but some of them are. 2190 The rest of the spills belong in an alias set of their own. But 2191 it doesn't actually hurt to include them here. */ 2192 set_mem_alias_set (mem, get_varargs_alias_set ()); 2193 2194 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 2195 spill_fill_data.prev_off[iter] = cfa_off; 2196 2197 if (++iter >= spill_fill_data.n_iter) 2198 iter = 0; 2199 spill_fill_data.next_iter = iter; 2200 2201 return mem; 2202} 2203 2204static void 2205do_spill (move_fn, reg, cfa_off, frame_reg) 2206 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2207 rtx reg, frame_reg; 2208 HOST_WIDE_INT cfa_off; 2209{ 2210 int iter = spill_fill_data.next_iter; 2211 rtx mem, insn; 2212 2213 mem = spill_restore_mem (reg, cfa_off); 2214 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 2215 spill_fill_data.prev_insn[iter] = insn; 2216 2217 if (frame_reg) 2218 { 2219 rtx base; 2220 HOST_WIDE_INT off; 2221 2222 RTX_FRAME_RELATED_P (insn) = 1; 2223 2224 /* Don't even pretend that the unwind code can intuit its way 2225 through a pair of interleaved post_modify iterators. Just 2226 provide the correct answer. */ 2227 2228 if (frame_pointer_needed) 2229 { 2230 base = hard_frame_pointer_rtx; 2231 off = - cfa_off; 2232 } 2233 else 2234 { 2235 base = stack_pointer_rtx; 2236 off = current_frame_info.total_size - cfa_off; 2237 } 2238 2239 REG_NOTES (insn) 2240 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2241 gen_rtx_SET (VOIDmode, 2242 gen_rtx_MEM (GET_MODE (reg), 2243 plus_constant (base, off)), 2244 frame_reg), 2245 REG_NOTES (insn)); 2246 } 2247} 2248 2249static void 2250do_restore (move_fn, reg, cfa_off) 2251 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2252 rtx reg; 2253 HOST_WIDE_INT cfa_off; 2254{ 2255 int iter = spill_fill_data.next_iter; 2256 rtx insn; 2257 2258 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 2259 GEN_INT (cfa_off))); 2260 spill_fill_data.prev_insn[iter] = insn; 2261} 2262 2263/* Wrapper functions that discards the CONST_INT spill offset. These 2264 exist so that we can give gr_spill/gr_fill the offset they need and 2265 use a consistant function interface. */ 2266 2267static rtx 2268gen_movdi_x (dest, src, offset) 2269 rtx dest, src; 2270 rtx offset ATTRIBUTE_UNUSED; 2271{ 2272 return gen_movdi (dest, src); 2273} 2274 2275static rtx 2276gen_fr_spill_x (dest, src, offset) 2277 rtx dest, src; 2278 rtx offset ATTRIBUTE_UNUSED; 2279{ 2280 return gen_fr_spill (dest, src); 2281} 2282 2283static rtx 2284gen_fr_restore_x (dest, src, offset) 2285 rtx dest, src; 2286 rtx offset ATTRIBUTE_UNUSED; 2287{ 2288 return gen_fr_restore (dest, src); 2289} 2290 2291/* Called after register allocation to add any instructions needed for the 2292 prologue. Using a prologue insn is favored compared to putting all of the 2293 instructions in output_function_prologue(), since it allows the scheduler 2294 to intermix instructions with the saves of the caller saved registers. In 2295 some cases, it might be necessary to emit a barrier instruction as the last 2296 insn to prevent such scheduling. 2297 2298 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 2299 so that the debug info generation code can handle them properly. 2300 2301 The register save area is layed out like so: 2302 cfa+16 2303 [ varargs spill area ] 2304 [ fr register spill area ] 2305 [ br register spill area ] 2306 [ ar register spill area ] 2307 [ pr register spill area ] 2308 [ gr register spill area ] */ 2309 2310/* ??? Get inefficient code when the frame size is larger than can fit in an 2311 adds instruction. */ 2312 2313void 2314ia64_expand_prologue () 2315{ 2316 rtx insn, ar_pfs_save_reg, ar_unat_save_reg; 2317 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 2318 rtx reg, alt_reg; 2319 2320 ia64_compute_frame_size (get_frame_size ()); 2321 last_scratch_gr_reg = 15; 2322 2323 /* If there is no epilogue, then we don't need some prologue insns. 2324 We need to avoid emitting the dead prologue insns, because flow 2325 will complain about them. */ 2326 if (optimize) 2327 { 2328 edge e; 2329 2330 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) 2331 if ((e->flags & EDGE_FAKE) == 0 2332 && (e->flags & EDGE_FALLTHRU) != 0) 2333 break; 2334 epilogue_p = (e != NULL); 2335 } 2336 else 2337 epilogue_p = 1; 2338 2339 /* Set the local, input, and output register names. We need to do this 2340 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 2341 half. If we use in/loc/out register names, then we get assembler errors 2342 in crtn.S because there is no alloc insn or regstk directive in there. */ 2343 if (! TARGET_REG_NAMES) 2344 { 2345 int inputs = current_frame_info.n_input_regs; 2346 int locals = current_frame_info.n_local_regs; 2347 int outputs = current_frame_info.n_output_regs; 2348 2349 for (i = 0; i < inputs; i++) 2350 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 2351 for (i = 0; i < locals; i++) 2352 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 2353 for (i = 0; i < outputs; i++) 2354 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 2355 } 2356 2357 /* Set the frame pointer register name. The regnum is logically loc79, 2358 but of course we'll not have allocated that many locals. Rather than 2359 worrying about renumbering the existing rtxs, we adjust the name. */ 2360 /* ??? This code means that we can never use one local register when 2361 there is a frame pointer. loc79 gets wasted in this case, as it is 2362 renamed to a register that will never be used. See also the try_locals 2363 code in find_gr_spill. */ 2364 if (current_frame_info.reg_fp) 2365 { 2366 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 2367 reg_names[HARD_FRAME_POINTER_REGNUM] 2368 = reg_names[current_frame_info.reg_fp]; 2369 reg_names[current_frame_info.reg_fp] = tmp; 2370 } 2371 2372 /* Fix up the return address placeholder. */ 2373 /* ??? We can fail if __builtin_return_address is used, and we didn't 2374 allocate a register in which to save b0. I can't think of a way to 2375 eliminate RETURN_ADDRESS_POINTER_REGNUM to a local register and 2376 then be sure that I got the right one. Further, reload doesn't seem 2377 to care if an eliminable register isn't used, and "eliminates" it 2378 anyway. */ 2379 if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM] 2380 && current_frame_info.reg_save_b0 != 0) 2381 XINT (return_address_pointer_rtx, 0) = current_frame_info.reg_save_b0; 2382 2383 /* We don't need an alloc instruction if we've used no outputs or locals. */ 2384 if (current_frame_info.n_local_regs == 0 2385 && current_frame_info.n_output_regs == 0 2386 && current_frame_info.n_input_regs <= current_function_args_info.int_regs 2387 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 2388 { 2389 /* If there is no alloc, but there are input registers used, then we 2390 need a .regstk directive. */ 2391 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 2392 ar_pfs_save_reg = NULL_RTX; 2393 } 2394 else 2395 { 2396 current_frame_info.need_regstk = 0; 2397 2398 if (current_frame_info.reg_save_ar_pfs) 2399 regno = current_frame_info.reg_save_ar_pfs; 2400 else 2401 regno = next_scratch_gr_reg (); 2402 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 2403 2404 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 2405 GEN_INT (current_frame_info.n_input_regs), 2406 GEN_INT (current_frame_info.n_local_regs), 2407 GEN_INT (current_frame_info.n_output_regs), 2408 GEN_INT (current_frame_info.n_rotate_regs))); 2409 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0); 2410 } 2411 2412 /* Set up frame pointer, stack pointer, and spill iterators. */ 2413 2414 n_varargs = cfun->machine->n_varargs; 2415 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 2416 stack_pointer_rtx, 0); 2417 2418 if (frame_pointer_needed) 2419 { 2420 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 2421 RTX_FRAME_RELATED_P (insn) = 1; 2422 } 2423 2424 if (current_frame_info.total_size != 0) 2425 { 2426 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 2427 rtx offset; 2428 2429 if (CONST_OK_FOR_I (- current_frame_info.total_size)) 2430 offset = frame_size_rtx; 2431 else 2432 { 2433 regno = next_scratch_gr_reg (); 2434 offset = gen_rtx_REG (DImode, regno); 2435 emit_move_insn (offset, frame_size_rtx); 2436 } 2437 2438 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 2439 stack_pointer_rtx, offset)); 2440 2441 if (! frame_pointer_needed) 2442 { 2443 RTX_FRAME_RELATED_P (insn) = 1; 2444 if (GET_CODE (offset) != CONST_INT) 2445 { 2446 REG_NOTES (insn) 2447 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2448 gen_rtx_SET (VOIDmode, 2449 stack_pointer_rtx, 2450 gen_rtx_PLUS (DImode, 2451 stack_pointer_rtx, 2452 frame_size_rtx)), 2453 REG_NOTES (insn)); 2454 } 2455 } 2456 2457 /* ??? At this point we must generate a magic insn that appears to 2458 modify the stack pointer, the frame pointer, and all spill 2459 iterators. This would allow the most scheduling freedom. For 2460 now, just hard stop. */ 2461 emit_insn (gen_blockage ()); 2462 } 2463 2464 /* Must copy out ar.unat before doing any integer spills. */ 2465 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2466 { 2467 if (current_frame_info.reg_save_ar_unat) 2468 ar_unat_save_reg 2469 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2470 else 2471 { 2472 alt_regno = next_scratch_gr_reg (); 2473 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2474 current_frame_info.gr_used_mask |= 1 << alt_regno; 2475 } 2476 2477 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2478 insn = emit_move_insn (ar_unat_save_reg, reg); 2479 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0); 2480 2481 /* Even if we're not going to generate an epilogue, we still 2482 need to save the register so that EH works. */ 2483 if (! epilogue_p && current_frame_info.reg_save_ar_unat) 2484 emit_insn (gen_prologue_use (ar_unat_save_reg)); 2485 } 2486 else 2487 ar_unat_save_reg = NULL_RTX; 2488 2489 /* Spill all varargs registers. Do this before spilling any GR registers, 2490 since we want the UNAT bits for the GR registers to override the UNAT 2491 bits from varargs, which we don't care about. */ 2492 2493 cfa_off = -16; 2494 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 2495 { 2496 reg = gen_rtx_REG (DImode, regno); 2497 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 2498 } 2499 2500 /* Locate the bottom of the register save area. */ 2501 cfa_off = (current_frame_info.spill_cfa_off 2502 + current_frame_info.spill_size 2503 + current_frame_info.extra_spill_size); 2504 2505 /* Save the predicate register block either in a register or in memory. */ 2506 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2507 { 2508 reg = gen_rtx_REG (DImode, PR_REG (0)); 2509 if (current_frame_info.reg_save_pr != 0) 2510 { 2511 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2512 insn = emit_move_insn (alt_reg, reg); 2513 2514 /* ??? Denote pr spill/fill by a DImode move that modifies all 2515 64 hard registers. */ 2516 RTX_FRAME_RELATED_P (insn) = 1; 2517 REG_NOTES (insn) 2518 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2519 gen_rtx_SET (VOIDmode, alt_reg, reg), 2520 REG_NOTES (insn)); 2521 2522 /* Even if we're not going to generate an epilogue, we still 2523 need to save the register so that EH works. */ 2524 if (! epilogue_p) 2525 emit_insn (gen_prologue_use (alt_reg)); 2526 } 2527 else 2528 { 2529 alt_regno = next_scratch_gr_reg (); 2530 alt_reg = gen_rtx_REG (DImode, alt_regno); 2531 insn = emit_move_insn (alt_reg, reg); 2532 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2533 cfa_off -= 8; 2534 } 2535 } 2536 2537 /* Handle AR regs in numerical order. All of them get special handling. */ 2538 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 2539 && current_frame_info.reg_save_ar_unat == 0) 2540 { 2541 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2542 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 2543 cfa_off -= 8; 2544 } 2545 2546 /* The alloc insn already copied ar.pfs into a general register. The 2547 only thing we have to do now is copy that register to a stack slot 2548 if we'd not allocated a local register for the job. */ 2549 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 2550 && current_frame_info.reg_save_ar_pfs == 0) 2551 { 2552 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2553 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 2554 cfa_off -= 8; 2555 } 2556 2557 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2558 { 2559 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2560 if (current_frame_info.reg_save_ar_lc != 0) 2561 { 2562 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2563 insn = emit_move_insn (alt_reg, reg); 2564 RTX_FRAME_RELATED_P (insn) = 1; 2565 2566 /* Even if we're not going to generate an epilogue, we still 2567 need to save the register so that EH works. */ 2568 if (! epilogue_p) 2569 emit_insn (gen_prologue_use (alt_reg)); 2570 } 2571 else 2572 { 2573 alt_regno = next_scratch_gr_reg (); 2574 alt_reg = gen_rtx_REG (DImode, alt_regno); 2575 emit_move_insn (alt_reg, reg); 2576 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2577 cfa_off -= 8; 2578 } 2579 } 2580 2581 if (current_frame_info.reg_save_gp) 2582 { 2583 insn = emit_move_insn (gen_rtx_REG (DImode, 2584 current_frame_info.reg_save_gp), 2585 pic_offset_table_rtx); 2586 /* We don't know for sure yet if this is actually needed, since 2587 we've not split the PIC call patterns. If all of the calls 2588 are indirect, and not followed by any uses of the gp, then 2589 this save is dead. Allow it to go away. */ 2590 REG_NOTES (insn) 2591 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn)); 2592 } 2593 2594 /* We should now be at the base of the gr/br/fr spill area. */ 2595 if (cfa_off != (current_frame_info.spill_cfa_off 2596 + current_frame_info.spill_size)) 2597 abort (); 2598 2599 /* Spill all general registers. */ 2600 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 2601 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2602 { 2603 reg = gen_rtx_REG (DImode, regno); 2604 do_spill (gen_gr_spill, reg, cfa_off, reg); 2605 cfa_off -= 8; 2606 } 2607 2608 /* Handle BR0 specially -- it may be getting stored permanently in 2609 some GR register. */ 2610 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2611 { 2612 reg = gen_rtx_REG (DImode, BR_REG (0)); 2613 if (current_frame_info.reg_save_b0 != 0) 2614 { 2615 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2616 insn = emit_move_insn (alt_reg, reg); 2617 RTX_FRAME_RELATED_P (insn) = 1; 2618 2619 /* Even if we're not going to generate an epilogue, we still 2620 need to save the register so that EH works. */ 2621 if (! epilogue_p) 2622 emit_insn (gen_prologue_use (alt_reg)); 2623 } 2624 else 2625 { 2626 alt_regno = next_scratch_gr_reg (); 2627 alt_reg = gen_rtx_REG (DImode, alt_regno); 2628 emit_move_insn (alt_reg, reg); 2629 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2630 cfa_off -= 8; 2631 } 2632 } 2633 2634 /* Spill the rest of the BR registers. */ 2635 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2636 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2637 { 2638 alt_regno = next_scratch_gr_reg (); 2639 alt_reg = gen_rtx_REG (DImode, alt_regno); 2640 reg = gen_rtx_REG (DImode, regno); 2641 emit_move_insn (alt_reg, reg); 2642 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2643 cfa_off -= 8; 2644 } 2645 2646 /* Align the frame and spill all FR registers. */ 2647 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2648 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2649 { 2650 if (cfa_off & 15) 2651 abort (); 2652 reg = gen_rtx_REG (TFmode, regno); 2653 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 2654 cfa_off -= 16; 2655 } 2656 2657 if (cfa_off != current_frame_info.spill_cfa_off) 2658 abort (); 2659 2660 finish_spill_pointers (); 2661} 2662 2663/* Called after register allocation to add any instructions needed for the 2664 epilogue. Using an epilogue insn is favored compared to putting all of the 2665 instructions in output_function_prologue(), since it allows the scheduler 2666 to intermix instructions with the saves of the caller saved registers. In 2667 some cases, it might be necessary to emit a barrier instruction as the last 2668 insn to prevent such scheduling. */ 2669 2670void 2671ia64_expand_epilogue (sibcall_p) 2672 int sibcall_p; 2673{ 2674 rtx insn, reg, alt_reg, ar_unat_save_reg; 2675 int regno, alt_regno, cfa_off; 2676 2677 ia64_compute_frame_size (get_frame_size ()); 2678 2679 /* If there is a frame pointer, then we use it instead of the stack 2680 pointer, so that the stack pointer does not need to be valid when 2681 the epilogue starts. See EXIT_IGNORE_STACK. */ 2682 if (frame_pointer_needed) 2683 setup_spill_pointers (current_frame_info.n_spilled, 2684 hard_frame_pointer_rtx, 0); 2685 else 2686 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 2687 current_frame_info.total_size); 2688 2689 if (current_frame_info.total_size != 0) 2690 { 2691 /* ??? At this point we must generate a magic insn that appears to 2692 modify the spill iterators and the frame pointer. This would 2693 allow the most scheduling freedom. For now, just hard stop. */ 2694 emit_insn (gen_blockage ()); 2695 } 2696 2697 /* Locate the bottom of the register save area. */ 2698 cfa_off = (current_frame_info.spill_cfa_off 2699 + current_frame_info.spill_size 2700 + current_frame_info.extra_spill_size); 2701 2702 /* Restore the predicate registers. */ 2703 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2704 { 2705 if (current_frame_info.reg_save_pr != 0) 2706 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2707 else 2708 { 2709 alt_regno = next_scratch_gr_reg (); 2710 alt_reg = gen_rtx_REG (DImode, alt_regno); 2711 do_restore (gen_movdi_x, alt_reg, cfa_off); 2712 cfa_off -= 8; 2713 } 2714 reg = gen_rtx_REG (DImode, PR_REG (0)); 2715 emit_move_insn (reg, alt_reg); 2716 } 2717 2718 /* Restore the application registers. */ 2719 2720 /* Load the saved unat from the stack, but do not restore it until 2721 after the GRs have been restored. */ 2722 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2723 { 2724 if (current_frame_info.reg_save_ar_unat != 0) 2725 ar_unat_save_reg 2726 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2727 else 2728 { 2729 alt_regno = next_scratch_gr_reg (); 2730 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2731 current_frame_info.gr_used_mask |= 1 << alt_regno; 2732 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 2733 cfa_off -= 8; 2734 } 2735 } 2736 else 2737 ar_unat_save_reg = NULL_RTX; 2738 2739 if (current_frame_info.reg_save_ar_pfs != 0) 2740 { 2741 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs); 2742 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2743 emit_move_insn (reg, alt_reg); 2744 } 2745 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 2746 { 2747 alt_regno = next_scratch_gr_reg (); 2748 alt_reg = gen_rtx_REG (DImode, alt_regno); 2749 do_restore (gen_movdi_x, alt_reg, cfa_off); 2750 cfa_off -= 8; 2751 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2752 emit_move_insn (reg, alt_reg); 2753 } 2754 2755 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2756 { 2757 if (current_frame_info.reg_save_ar_lc != 0) 2758 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2759 else 2760 { 2761 alt_regno = next_scratch_gr_reg (); 2762 alt_reg = gen_rtx_REG (DImode, alt_regno); 2763 do_restore (gen_movdi_x, alt_reg, cfa_off); 2764 cfa_off -= 8; 2765 } 2766 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2767 emit_move_insn (reg, alt_reg); 2768 } 2769 2770 /* We should now be at the base of the gr/br/fr spill area. */ 2771 if (cfa_off != (current_frame_info.spill_cfa_off 2772 + current_frame_info.spill_size)) 2773 abort (); 2774 2775 /* The GP may be stored on the stack in the prologue, but it's 2776 never restored in the epilogue. Skip the stack slot. */ 2777 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 2778 cfa_off -= 8; 2779 2780 /* Restore all general registers. */ 2781 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 2782 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2783 { 2784 reg = gen_rtx_REG (DImode, regno); 2785 do_restore (gen_gr_restore, reg, cfa_off); 2786 cfa_off -= 8; 2787 } 2788 2789 /* Restore the branch registers. Handle B0 specially, as it may 2790 have gotten stored in some GR register. */ 2791 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2792 { 2793 if (current_frame_info.reg_save_b0 != 0) 2794 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2795 else 2796 { 2797 alt_regno = next_scratch_gr_reg (); 2798 alt_reg = gen_rtx_REG (DImode, alt_regno); 2799 do_restore (gen_movdi_x, alt_reg, cfa_off); 2800 cfa_off -= 8; 2801 } 2802 reg = gen_rtx_REG (DImode, BR_REG (0)); 2803 emit_move_insn (reg, alt_reg); 2804 } 2805 2806 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2807 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2808 { 2809 alt_regno = next_scratch_gr_reg (); 2810 alt_reg = gen_rtx_REG (DImode, alt_regno); 2811 do_restore (gen_movdi_x, alt_reg, cfa_off); 2812 cfa_off -= 8; 2813 reg = gen_rtx_REG (DImode, regno); 2814 emit_move_insn (reg, alt_reg); 2815 } 2816 2817 /* Restore floating point registers. */ 2818 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2819 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2820 { 2821 if (cfa_off & 15) 2822 abort (); 2823 reg = gen_rtx_REG (TFmode, regno); 2824 do_restore (gen_fr_restore_x, reg, cfa_off); 2825 cfa_off -= 16; 2826 } 2827 2828 /* Restore ar.unat for real. */ 2829 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2830 { 2831 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2832 emit_move_insn (reg, ar_unat_save_reg); 2833 } 2834 2835 if (cfa_off != current_frame_info.spill_cfa_off) 2836 abort (); 2837 2838 finish_spill_pointers (); 2839 2840 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp) 2841 { 2842 /* ??? At this point we must generate a magic insn that appears to 2843 modify the spill iterators, the stack pointer, and the frame 2844 pointer. This would allow the most scheduling freedom. For now, 2845 just hard stop. */ 2846 emit_insn (gen_blockage ()); 2847 } 2848 2849 if (cfun->machine->ia64_eh_epilogue_sp) 2850 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 2851 else if (frame_pointer_needed) 2852 { 2853 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 2854 RTX_FRAME_RELATED_P (insn) = 1; 2855 } 2856 else if (current_frame_info.total_size) 2857 { 2858 rtx offset, frame_size_rtx; 2859 2860 frame_size_rtx = GEN_INT (current_frame_info.total_size); 2861 if (CONST_OK_FOR_I (current_frame_info.total_size)) 2862 offset = frame_size_rtx; 2863 else 2864 { 2865 regno = next_scratch_gr_reg (); 2866 offset = gen_rtx_REG (DImode, regno); 2867 emit_move_insn (offset, frame_size_rtx); 2868 } 2869 2870 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 2871 offset)); 2872 2873 RTX_FRAME_RELATED_P (insn) = 1; 2874 if (GET_CODE (offset) != CONST_INT) 2875 { 2876 REG_NOTES (insn) 2877 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2878 gen_rtx_SET (VOIDmode, 2879 stack_pointer_rtx, 2880 gen_rtx_PLUS (DImode, 2881 stack_pointer_rtx, 2882 frame_size_rtx)), 2883 REG_NOTES (insn)); 2884 } 2885 } 2886 2887 if (cfun->machine->ia64_eh_epilogue_bsp) 2888 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 2889 2890 if (! sibcall_p) 2891 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 2892 else 2893 { 2894 int fp = GR_REG (2); 2895 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the 2896 first available call clobbered register. If there was a frame_pointer 2897 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, 2898 so we have to make sure we're using the string "r2" when emitting 2899 the register name for the assmbler. */ 2900 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2)) 2901 fp = HARD_FRAME_POINTER_REGNUM; 2902 2903 /* We must emit an alloc to force the input registers to become output 2904 registers. Otherwise, if the callee tries to pass its parameters 2905 through to another call without an intervening alloc, then these 2906 values get lost. */ 2907 /* ??? We don't need to preserve all input registers. We only need to 2908 preserve those input registers used as arguments to the sibling call. 2909 It is unclear how to compute that number here. */ 2910 if (current_frame_info.n_input_regs != 0) 2911 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 2912 GEN_INT (0), GEN_INT (0), 2913 GEN_INT (current_frame_info.n_input_regs), 2914 GEN_INT (0))); 2915 } 2916} 2917 2918/* Return 1 if br.ret can do all the work required to return from a 2919 function. */ 2920 2921int 2922ia64_direct_return () 2923{ 2924 if (reload_completed && ! frame_pointer_needed) 2925 { 2926 ia64_compute_frame_size (get_frame_size ()); 2927 2928 return (current_frame_info.total_size == 0 2929 && current_frame_info.n_spilled == 0 2930 && current_frame_info.reg_save_b0 == 0 2931 && current_frame_info.reg_save_pr == 0 2932 && current_frame_info.reg_save_ar_pfs == 0 2933 && current_frame_info.reg_save_ar_unat == 0 2934 && current_frame_info.reg_save_ar_lc == 0); 2935 } 2936 return 0; 2937} 2938 2939int 2940ia64_hard_regno_rename_ok (from, to) 2941 int from; 2942 int to; 2943{ 2944 /* Don't clobber any of the registers we reserved for the prologue. */ 2945 if (to == current_frame_info.reg_fp 2946 || to == current_frame_info.reg_save_b0 2947 || to == current_frame_info.reg_save_pr 2948 || to == current_frame_info.reg_save_ar_pfs 2949 || to == current_frame_info.reg_save_ar_unat 2950 || to == current_frame_info.reg_save_ar_lc) 2951 return 0; 2952 2953 if (from == current_frame_info.reg_fp 2954 || from == current_frame_info.reg_save_b0 2955 || from == current_frame_info.reg_save_pr 2956 || from == current_frame_info.reg_save_ar_pfs 2957 || from == current_frame_info.reg_save_ar_unat 2958 || from == current_frame_info.reg_save_ar_lc) 2959 return 0; 2960 2961 /* Don't use output registers outside the register frame. */ 2962 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 2963 return 0; 2964 2965 /* Retain even/oddness on predicate register pairs. */ 2966 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 2967 return (from & 1) == (to & 1); 2968 2969 return 1; 2970} 2971 2972/* Target hook for assembling integer objects. Handle word-sized 2973 aligned objects and detect the cases when @fptr is needed. */ 2974 2975static bool 2976ia64_assemble_integer (x, size, aligned_p) 2977 rtx x; 2978 unsigned int size; 2979 int aligned_p; 2980{ 2981 if (size == (TARGET_ILP32 ? 4 : 8) 2982 && aligned_p 2983 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 2984 && GET_CODE (x) == SYMBOL_REF 2985 && SYMBOL_REF_FLAG (x)) 2986 { 2987 if (TARGET_ILP32) 2988 fputs ("\tdata4\t@fptr(", asm_out_file); 2989 else 2990 fputs ("\tdata8\t@fptr(", asm_out_file); 2991 output_addr_const (asm_out_file, x); 2992 fputs (")\n", asm_out_file); 2993 return true; 2994 } 2995 return default_assemble_integer (x, size, aligned_p); 2996} 2997 2998/* Emit the function prologue. */ 2999 3000static void 3001ia64_output_function_prologue (file, size) 3002 FILE *file; 3003 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3004{ 3005 int mask, grsave, grsave_prev; 3006 3007 if (current_frame_info.need_regstk) 3008 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 3009 current_frame_info.n_input_regs, 3010 current_frame_info.n_local_regs, 3011 current_frame_info.n_output_regs, 3012 current_frame_info.n_rotate_regs); 3013 3014 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3015 return; 3016 3017 /* Emit the .prologue directive. */ 3018 3019 mask = 0; 3020 grsave = grsave_prev = 0; 3021 if (current_frame_info.reg_save_b0 != 0) 3022 { 3023 mask |= 8; 3024 grsave = grsave_prev = current_frame_info.reg_save_b0; 3025 } 3026 if (current_frame_info.reg_save_ar_pfs != 0 3027 && (grsave_prev == 0 3028 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1)) 3029 { 3030 mask |= 4; 3031 if (grsave_prev == 0) 3032 grsave = current_frame_info.reg_save_ar_pfs; 3033 grsave_prev = current_frame_info.reg_save_ar_pfs; 3034 } 3035 if (current_frame_info.reg_fp != 0 3036 && (grsave_prev == 0 3037 || current_frame_info.reg_fp == grsave_prev + 1)) 3038 { 3039 mask |= 2; 3040 if (grsave_prev == 0) 3041 grsave = HARD_FRAME_POINTER_REGNUM; 3042 grsave_prev = current_frame_info.reg_fp; 3043 } 3044 if (current_frame_info.reg_save_pr != 0 3045 && (grsave_prev == 0 3046 || current_frame_info.reg_save_pr == grsave_prev + 1)) 3047 { 3048 mask |= 1; 3049 if (grsave_prev == 0) 3050 grsave = current_frame_info.reg_save_pr; 3051 } 3052 3053 if (mask) 3054 fprintf (file, "\t.prologue %d, %d\n", mask, 3055 ia64_dbx_register_number (grsave)); 3056 else 3057 fputs ("\t.prologue\n", file); 3058 3059 /* Emit a .spill directive, if necessary, to relocate the base of 3060 the register spill area. */ 3061 if (current_frame_info.spill_cfa_off != -16) 3062 fprintf (file, "\t.spill %ld\n", 3063 (long) (current_frame_info.spill_cfa_off 3064 + current_frame_info.spill_size)); 3065} 3066 3067/* Emit the .body directive at the scheduled end of the prologue. */ 3068 3069static void 3070ia64_output_function_end_prologue (file) 3071 FILE *file; 3072{ 3073 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3074 return; 3075 3076 fputs ("\t.body\n", file); 3077} 3078 3079/* Emit the function epilogue. */ 3080 3081static void 3082ia64_output_function_epilogue (file, size) 3083 FILE *file ATTRIBUTE_UNUSED; 3084 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3085{ 3086 int i; 3087 3088 /* Reset from the function's potential modifications. */ 3089 XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; 3090 3091 if (current_frame_info.reg_fp) 3092 { 3093 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3094 reg_names[HARD_FRAME_POINTER_REGNUM] 3095 = reg_names[current_frame_info.reg_fp]; 3096 reg_names[current_frame_info.reg_fp] = tmp; 3097 } 3098 if (! TARGET_REG_NAMES) 3099 { 3100 for (i = 0; i < current_frame_info.n_input_regs; i++) 3101 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 3102 for (i = 0; i < current_frame_info.n_local_regs; i++) 3103 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 3104 for (i = 0; i < current_frame_info.n_output_regs; i++) 3105 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 3106 } 3107 3108 current_frame_info.initialized = 0; 3109} 3110 3111int 3112ia64_dbx_register_number (regno) 3113 int regno; 3114{ 3115 /* In ia64_expand_prologue we quite literally renamed the frame pointer 3116 from its home at loc79 to something inside the register frame. We 3117 must perform the same renumbering here for the debug info. */ 3118 if (current_frame_info.reg_fp) 3119 { 3120 if (regno == HARD_FRAME_POINTER_REGNUM) 3121 regno = current_frame_info.reg_fp; 3122 else if (regno == current_frame_info.reg_fp) 3123 regno = HARD_FRAME_POINTER_REGNUM; 3124 } 3125 3126 if (IN_REGNO_P (regno)) 3127 return 32 + regno - IN_REG (0); 3128 else if (LOC_REGNO_P (regno)) 3129 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 3130 else if (OUT_REGNO_P (regno)) 3131 return (32 + current_frame_info.n_input_regs 3132 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 3133 else 3134 return regno; 3135} 3136 3137void 3138ia64_initialize_trampoline (addr, fnaddr, static_chain) 3139 rtx addr, fnaddr, static_chain; 3140{ 3141 rtx addr_reg, eight = GEN_INT (8); 3142 3143 /* Load up our iterator. */ 3144 addr_reg = gen_reg_rtx (Pmode); 3145 emit_move_insn (addr_reg, addr); 3146 3147 /* The first two words are the fake descriptor: 3148 __ia64_trampoline, ADDR+16. */ 3149 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3150 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline")); 3151 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3152 3153 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3154 copy_to_reg (plus_constant (addr, 16))); 3155 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3156 3157 /* The third word is the target descriptor. */ 3158 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr); 3159 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3160 3161 /* The fourth word is the static chain. */ 3162 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain); 3163} 3164 3165/* Do any needed setup for a variadic function. CUM has not been updated 3166 for the last named argument which has type TYPE and mode MODE. 3167 3168 We generate the actual spill instructions during prologue generation. */ 3169 3170void 3171ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) 3172 CUMULATIVE_ARGS cum; 3173 int int_mode; 3174 tree type; 3175 int * pretend_size; 3176 int second_time ATTRIBUTE_UNUSED; 3177{ 3178 /* Skip the current argument. */ 3179 ia64_function_arg_advance (&cum, int_mode, type, 1); 3180 3181 if (cum.words < MAX_ARGUMENT_SLOTS) 3182 { 3183 int n = MAX_ARGUMENT_SLOTS - cum.words; 3184 *pretend_size = n * UNITS_PER_WORD; 3185 cfun->machine->n_varargs = n; 3186 } 3187} 3188 3189/* Check whether TYPE is a homogeneous floating point aggregate. If 3190 it is, return the mode of the floating point type that appears 3191 in all leafs. If it is not, return VOIDmode. 3192 3193 An aggregate is a homogeneous floating point aggregate is if all 3194 fields/elements in it have the same floating point type (e.g, 3195 SFmode). 128-bit quad-precision floats are excluded. */ 3196 3197static enum machine_mode 3198hfa_element_mode (type, nested) 3199 tree type; 3200 int nested; 3201{ 3202 enum machine_mode element_mode = VOIDmode; 3203 enum machine_mode mode; 3204 enum tree_code code = TREE_CODE (type); 3205 int know_element_mode = 0; 3206 tree t; 3207 3208 switch (code) 3209 { 3210 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 3211 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: 3212 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 3213 case FILE_TYPE: case SET_TYPE: case LANG_TYPE: 3214 case FUNCTION_TYPE: 3215 return VOIDmode; 3216 3217 /* Fortran complex types are supposed to be HFAs, so we need to handle 3218 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 3219 types though. */ 3220 case COMPLEX_TYPE: 3221 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 3222 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT)) 3223 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) 3224 * BITS_PER_UNIT, MODE_FLOAT, 0); 3225 else 3226 return VOIDmode; 3227 3228 case REAL_TYPE: 3229 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 3230 mode if this is contained within an aggregate. */ 3231 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT)) 3232 return TYPE_MODE (type); 3233 else 3234 return VOIDmode; 3235 3236 case ARRAY_TYPE: 3237 return hfa_element_mode (TREE_TYPE (type), 1); 3238 3239 case RECORD_TYPE: 3240 case UNION_TYPE: 3241 case QUAL_UNION_TYPE: 3242 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) 3243 { 3244 if (TREE_CODE (t) != FIELD_DECL) 3245 continue; 3246 3247 mode = hfa_element_mode (TREE_TYPE (t), 1); 3248 if (know_element_mode) 3249 { 3250 if (mode != element_mode) 3251 return VOIDmode; 3252 } 3253 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 3254 return VOIDmode; 3255 else 3256 { 3257 know_element_mode = 1; 3258 element_mode = mode; 3259 } 3260 } 3261 return element_mode; 3262 3263 default: 3264 /* If we reach here, we probably have some front-end specific type 3265 that the backend doesn't know about. This can happen via the 3266 aggregate_value_p call in init_function_start. All we can do is 3267 ignore unknown tree types. */ 3268 return VOIDmode; 3269 } 3270 3271 return VOIDmode; 3272} 3273 3274/* Return rtx for register where argument is passed, or zero if it is passed 3275 on the stack. */ 3276 3277/* ??? 128-bit quad-precision floats are always passed in general 3278 registers. */ 3279 3280rtx 3281ia64_function_arg (cum, mode, type, named, incoming) 3282 CUMULATIVE_ARGS *cum; 3283 enum machine_mode mode; 3284 tree type; 3285 int named; 3286 int incoming; 3287{ 3288 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 3289 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3290 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3291 / UNITS_PER_WORD); 3292 int offset = 0; 3293 enum machine_mode hfa_mode = VOIDmode; 3294 3295 /* Integer and float arguments larger than 8 bytes start at the next even 3296 boundary. Aggregates larger than 8 bytes start at the next even boundary 3297 if the aggregate has 16 byte alignment. Net effect is that types with 3298 alignment greater than 8 start at the next even boundary. */ 3299 /* ??? The ABI does not specify how to handle aggregates with alignment from 3300 9 to 15 bytes, or greater than 16. We handle them all as if they had 3301 16 byte alignment. Such aggregates can occur only if gcc extensions are 3302 used. */ 3303 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3304 : (words > 1)) 3305 && (cum->words & 1)) 3306 offset = 1; 3307 3308 /* If all argument slots are used, then it must go on the stack. */ 3309 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3310 return 0; 3311 3312 /* Check for and handle homogeneous FP aggregates. */ 3313 if (type) 3314 hfa_mode = hfa_element_mode (type, 0); 3315 3316 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3317 and unprototyped hfas are passed specially. */ 3318 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3319 { 3320 rtx loc[16]; 3321 int i = 0; 3322 int fp_regs = cum->fp_regs; 3323 int int_regs = cum->words + offset; 3324 int hfa_size = GET_MODE_SIZE (hfa_mode); 3325 int byte_size; 3326 int args_byte_size; 3327 3328 /* If prototyped, pass it in FR regs then GR regs. 3329 If not prototyped, pass it in both FR and GR regs. 3330 3331 If this is an SFmode aggregate, then it is possible to run out of 3332 FR regs while GR regs are still left. In that case, we pass the 3333 remaining part in the GR regs. */ 3334 3335 /* Fill the FP regs. We do this always. We stop if we reach the end 3336 of the argument, the last FP register, or the last argument slot. */ 3337 3338 byte_size = ((mode == BLKmode) 3339 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3340 args_byte_size = int_regs * UNITS_PER_WORD; 3341 offset = 0; 3342 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3343 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 3344 { 3345 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3346 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 3347 + fp_regs)), 3348 GEN_INT (offset)); 3349 offset += hfa_size; 3350 args_byte_size += hfa_size; 3351 fp_regs++; 3352 } 3353 3354 /* If no prototype, then the whole thing must go in GR regs. */ 3355 if (! cum->prototype) 3356 offset = 0; 3357 /* If this is an SFmode aggregate, then we might have some left over 3358 that needs to go in GR regs. */ 3359 else if (byte_size != offset) 3360 int_regs += offset / UNITS_PER_WORD; 3361 3362 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 3363 3364 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 3365 { 3366 enum machine_mode gr_mode = DImode; 3367 3368 /* If we have an odd 4 byte hunk because we ran out of FR regs, 3369 then this goes in a GR reg left adjusted/little endian, right 3370 adjusted/big endian. */ 3371 /* ??? Currently this is handled wrong, because 4-byte hunks are 3372 always right adjusted/little endian. */ 3373 if (offset & 0x4) 3374 gr_mode = SImode; 3375 /* If we have an even 4 byte hunk because the aggregate is a 3376 multiple of 4 bytes in size, then this goes in a GR reg right 3377 adjusted/little endian. */ 3378 else if (byte_size - offset == 4) 3379 gr_mode = SImode; 3380 /* Complex floats need to have float mode. */ 3381 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 3382 gr_mode = hfa_mode; 3383 3384 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3385 gen_rtx_REG (gr_mode, (basereg 3386 + int_regs)), 3387 GEN_INT (offset)); 3388 offset += GET_MODE_SIZE (gr_mode); 3389 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD 3390 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD; 3391 } 3392 3393 /* If we ended up using just one location, just return that one loc. */ 3394 if (i == 1) 3395 return XEXP (loc[0], 0); 3396 else 3397 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3398 } 3399 3400 /* Integral and aggregates go in general registers. If we have run out of 3401 FR registers, then FP values must also go in general registers. This can 3402 happen when we have a SFmode HFA. */ 3403 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT) 3404 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 3405 { 3406 int byte_size = ((mode == BLKmode) 3407 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3408 if (BYTES_BIG_ENDIAN 3409 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3410 && byte_size < UNITS_PER_WORD 3411 && byte_size > 0) 3412 { 3413 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3414 gen_rtx_REG (DImode, 3415 (basereg + cum->words 3416 + offset)), 3417 const0_rtx); 3418 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 3419 } 3420 else 3421 return gen_rtx_REG (mode, basereg + cum->words + offset); 3422 3423 } 3424 3425 /* If there is a prototype, then FP values go in a FR register when 3426 named, and in a GR registeer when unnamed. */ 3427 else if (cum->prototype) 3428 { 3429 if (! named) 3430 return gen_rtx_REG (mode, basereg + cum->words + offset); 3431 else 3432 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 3433 } 3434 /* If there is no prototype, then FP values go in both FR and GR 3435 registers. */ 3436 else 3437 { 3438 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 3439 gen_rtx_REG (mode, (FR_ARG_FIRST 3440 + cum->fp_regs)), 3441 const0_rtx); 3442 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3443 gen_rtx_REG (mode, 3444 (basereg + cum->words 3445 + offset)), 3446 const0_rtx); 3447 3448 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 3449 } 3450} 3451 3452/* Return number of words, at the beginning of the argument, that must be 3453 put in registers. 0 is the argument is entirely in registers or entirely 3454 in memory. */ 3455 3456int 3457ia64_function_arg_partial_nregs (cum, mode, type, named) 3458 CUMULATIVE_ARGS *cum; 3459 enum machine_mode mode; 3460 tree type; 3461 int named ATTRIBUTE_UNUSED; 3462{ 3463 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3464 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3465 / UNITS_PER_WORD); 3466 int offset = 0; 3467 3468 /* Arguments with alignment larger than 8 bytes start at the next even 3469 boundary. */ 3470 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3471 : (words > 1)) 3472 && (cum->words & 1)) 3473 offset = 1; 3474 3475 /* If all argument slots are used, then it must go on the stack. */ 3476 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3477 return 0; 3478 3479 /* It doesn't matter whether the argument goes in FR or GR regs. If 3480 it fits within the 8 argument slots, then it goes entirely in 3481 registers. If it extends past the last argument slot, then the rest 3482 goes on the stack. */ 3483 3484 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 3485 return 0; 3486 3487 return MAX_ARGUMENT_SLOTS - cum->words - offset; 3488} 3489 3490/* Update CUM to point after this argument. This is patterned after 3491 ia64_function_arg. */ 3492 3493void 3494ia64_function_arg_advance (cum, mode, type, named) 3495 CUMULATIVE_ARGS *cum; 3496 enum machine_mode mode; 3497 tree type; 3498 int named; 3499{ 3500 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3501 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3502 / UNITS_PER_WORD); 3503 int offset = 0; 3504 enum machine_mode hfa_mode = VOIDmode; 3505 3506 /* If all arg slots are already full, then there is nothing to do. */ 3507 if (cum->words >= MAX_ARGUMENT_SLOTS) 3508 return; 3509 3510 /* Arguments with alignment larger than 8 bytes start at the next even 3511 boundary. */ 3512 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3513 : (words > 1)) 3514 && (cum->words & 1)) 3515 offset = 1; 3516 3517 cum->words += words + offset; 3518 3519 /* Check for and handle homogeneous FP aggregates. */ 3520 if (type) 3521 hfa_mode = hfa_element_mode (type, 0); 3522 3523 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3524 and unprototyped hfas are passed specially. */ 3525 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3526 { 3527 int fp_regs = cum->fp_regs; 3528 /* This is the original value of cum->words + offset. */ 3529 int int_regs = cum->words - words; 3530 int hfa_size = GET_MODE_SIZE (hfa_mode); 3531 int byte_size; 3532 int args_byte_size; 3533 3534 /* If prototyped, pass it in FR regs then GR regs. 3535 If not prototyped, pass it in both FR and GR regs. 3536 3537 If this is an SFmode aggregate, then it is possible to run out of 3538 FR regs while GR regs are still left. In that case, we pass the 3539 remaining part in the GR regs. */ 3540 3541 /* Fill the FP regs. We do this always. We stop if we reach the end 3542 of the argument, the last FP register, or the last argument slot. */ 3543 3544 byte_size = ((mode == BLKmode) 3545 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3546 args_byte_size = int_regs * UNITS_PER_WORD; 3547 offset = 0; 3548 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3549 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 3550 { 3551 offset += hfa_size; 3552 args_byte_size += hfa_size; 3553 fp_regs++; 3554 } 3555 3556 cum->fp_regs = fp_regs; 3557 } 3558 3559 /* Integral and aggregates go in general registers. If we have run out of 3560 FR registers, then FP values must also go in general registers. This can 3561 happen when we have a SFmode HFA. */ 3562 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) 3563 cum->int_regs = cum->words; 3564 3565 /* If there is a prototype, then FP values go in a FR register when 3566 named, and in a GR registeer when unnamed. */ 3567 else if (cum->prototype) 3568 { 3569 if (! named) 3570 cum->int_regs = cum->words; 3571 else 3572 /* ??? Complex types should not reach here. */ 3573 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3574 } 3575 /* If there is no prototype, then FP values go in both FR and GR 3576 registers. */ 3577 else 3578 { 3579 /* ??? Complex types should not reach here. */ 3580 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3581 cum->int_regs = cum->words; 3582 } 3583} 3584 3585/* Variable sized types are passed by reference. */ 3586/* ??? At present this is a GCC extension to the IA-64 ABI. */ 3587 3588int 3589ia64_function_arg_pass_by_reference (cum, mode, type, named) 3590 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; 3591 enum machine_mode mode ATTRIBUTE_UNUSED; 3592 tree type; 3593 int named ATTRIBUTE_UNUSED; 3594{ 3595 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 3596} 3597 3598 3599/* Implement va_arg. */ 3600 3601rtx 3602ia64_va_arg (valist, type) 3603 tree valist, type; 3604{ 3605 tree t; 3606 3607 /* Variable sized types are passed by reference. */ 3608 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 3609 { 3610 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type)); 3611 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr)); 3612 } 3613 3614 /* Arguments with alignment larger than 8 bytes start at the next even 3615 boundary. */ 3616 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3617 { 3618 t = build (PLUS_EXPR, TREE_TYPE (valist), valist, 3619 build_int_2 (2 * UNITS_PER_WORD - 1, 0)); 3620 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, 3621 build_int_2 (-2 * UNITS_PER_WORD, -1)); 3622 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 3623 TREE_SIDE_EFFECTS (t) = 1; 3624 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3625 } 3626 3627 return std_expand_builtin_va_arg (valist, type); 3628} 3629 3630/* Return 1 if function return value returned in memory. Return 0 if it is 3631 in a register. */ 3632 3633int 3634ia64_return_in_memory (valtype) 3635 tree valtype; 3636{ 3637 enum machine_mode mode; 3638 enum machine_mode hfa_mode; 3639 HOST_WIDE_INT byte_size; 3640 3641 mode = TYPE_MODE (valtype); 3642 byte_size = GET_MODE_SIZE (mode); 3643 if (mode == BLKmode) 3644 { 3645 byte_size = int_size_in_bytes (valtype); 3646 if (byte_size < 0) 3647 return 1; 3648 } 3649 3650 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 3651 3652 hfa_mode = hfa_element_mode (valtype, 0); 3653 if (hfa_mode != VOIDmode) 3654 { 3655 int hfa_size = GET_MODE_SIZE (hfa_mode); 3656 3657 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 3658 return 1; 3659 else 3660 return 0; 3661 } 3662 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 3663 return 1; 3664 else 3665 return 0; 3666} 3667 3668/* Return rtx for register that holds the function return value. */ 3669 3670rtx 3671ia64_function_value (valtype, func) 3672 tree valtype; 3673 tree func ATTRIBUTE_UNUSED; 3674{ 3675 enum machine_mode mode; 3676 enum machine_mode hfa_mode; 3677 3678 mode = TYPE_MODE (valtype); 3679 hfa_mode = hfa_element_mode (valtype, 0); 3680 3681 if (hfa_mode != VOIDmode) 3682 { 3683 rtx loc[8]; 3684 int i; 3685 int hfa_size; 3686 int byte_size; 3687 int offset; 3688 3689 hfa_size = GET_MODE_SIZE (hfa_mode); 3690 byte_size = ((mode == BLKmode) 3691 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 3692 offset = 0; 3693 for (i = 0; offset < byte_size; i++) 3694 { 3695 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3696 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 3697 GEN_INT (offset)); 3698 offset += hfa_size; 3699 } 3700 3701 if (i == 1) 3702 return XEXP (loc[0], 0); 3703 else 3704 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3705 } 3706 else if (FLOAT_TYPE_P (valtype) && 3707 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT)) 3708 return gen_rtx_REG (mode, FR_ARG_FIRST); 3709 else 3710 { 3711 if (BYTES_BIG_ENDIAN 3712 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 3713 { 3714 rtx loc[8]; 3715 int offset; 3716 int bytesize; 3717 int i; 3718 3719 offset = 0; 3720 bytesize = int_size_in_bytes (valtype); 3721 for (i = 0; offset < bytesize; i++) 3722 { 3723 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3724 gen_rtx_REG (DImode, 3725 GR_RET_FIRST + i), 3726 GEN_INT (offset)); 3727 offset += UNITS_PER_WORD; 3728 } 3729 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3730 } 3731 else 3732 return gen_rtx_REG (mode, GR_RET_FIRST); 3733 } 3734} 3735 3736/* Print a memory address as an operand to reference that memory location. */ 3737 3738/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 3739 also call this from ia64_print_operand for memory addresses. */ 3740 3741void 3742ia64_print_operand_address (stream, address) 3743 FILE * stream ATTRIBUTE_UNUSED; 3744 rtx address ATTRIBUTE_UNUSED; 3745{ 3746} 3747 3748/* Print an operand to an assembler instruction. 3749 C Swap and print a comparison operator. 3750 D Print an FP comparison operator. 3751 E Print 32 - constant, for SImode shifts as extract. 3752 e Print 64 - constant, for DImode rotates. 3753 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 3754 a floating point register emitted normally. 3755 I Invert a predicate register by adding 1. 3756 J Select the proper predicate register for a condition. 3757 j Select the inverse predicate register for a condition. 3758 O Append .acq for volatile load. 3759 P Postincrement of a MEM. 3760 Q Append .rel for volatile store. 3761 S Shift amount for shladd instruction. 3762 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 3763 for Intel assembler. 3764 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 3765 for Intel assembler. 3766 r Print register name, or constant 0 as r0. HP compatibility for 3767 Linux kernel. */ 3768void 3769ia64_print_operand (file, x, code) 3770 FILE * file; 3771 rtx x; 3772 int code; 3773{ 3774 const char *str; 3775 3776 switch (code) 3777 { 3778 case 0: 3779 /* Handled below. */ 3780 break; 3781 3782 case 'C': 3783 { 3784 enum rtx_code c = swap_condition (GET_CODE (x)); 3785 fputs (GET_RTX_NAME (c), file); 3786 return; 3787 } 3788 3789 case 'D': 3790 switch (GET_CODE (x)) 3791 { 3792 case NE: 3793 str = "neq"; 3794 break; 3795 case UNORDERED: 3796 str = "unord"; 3797 break; 3798 case ORDERED: 3799 str = "ord"; 3800 break; 3801 default: 3802 str = GET_RTX_NAME (GET_CODE (x)); 3803 break; 3804 } 3805 fputs (str, file); 3806 return; 3807 3808 case 'E': 3809 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 3810 return; 3811 3812 case 'e': 3813 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 3814 return; 3815 3816 case 'F': 3817 if (x == CONST0_RTX (GET_MODE (x))) 3818 str = reg_names [FR_REG (0)]; 3819 else if (x == CONST1_RTX (GET_MODE (x))) 3820 str = reg_names [FR_REG (1)]; 3821 else if (GET_CODE (x) == REG) 3822 str = reg_names [REGNO (x)]; 3823 else 3824 abort (); 3825 fputs (str, file); 3826 return; 3827 3828 case 'I': 3829 fputs (reg_names [REGNO (x) + 1], file); 3830 return; 3831 3832 case 'J': 3833 case 'j': 3834 { 3835 unsigned int regno = REGNO (XEXP (x, 0)); 3836 if (GET_CODE (x) == EQ) 3837 regno += 1; 3838 if (code == 'j') 3839 regno ^= 1; 3840 fputs (reg_names [regno], file); 3841 } 3842 return; 3843 3844 case 'O': 3845 if (MEM_VOLATILE_P (x)) 3846 fputs(".acq", file); 3847 return; 3848 3849 case 'P': 3850 { 3851 HOST_WIDE_INT value; 3852 3853 switch (GET_CODE (XEXP (x, 0))) 3854 { 3855 default: 3856 return; 3857 3858 case POST_MODIFY: 3859 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 3860 if (GET_CODE (x) == CONST_INT) 3861 value = INTVAL (x); 3862 else if (GET_CODE (x) == REG) 3863 { 3864 fprintf (file, ", %s", reg_names[REGNO (x)]); 3865 return; 3866 } 3867 else 3868 abort (); 3869 break; 3870 3871 case POST_INC: 3872 value = GET_MODE_SIZE (GET_MODE (x)); 3873 break; 3874 3875 case POST_DEC: 3876 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 3877 break; 3878 } 3879 3880 putc (',', file); 3881 putc (' ', file); 3882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value); 3883 return; 3884 } 3885 3886 case 'Q': 3887 if (MEM_VOLATILE_P (x)) 3888 fputs(".rel", file); 3889 return; 3890 3891 case 'S': 3892 fprintf (file, "%d", exact_log2 (INTVAL (x))); 3893 return; 3894 3895 case 'T': 3896 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3897 { 3898 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 3899 return; 3900 } 3901 break; 3902 3903 case 'U': 3904 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3905 { 3906 const char *prefix = "0x"; 3907 if (INTVAL (x) & 0x80000000) 3908 { 3909 fprintf (file, "0xffffffff"); 3910 prefix = ""; 3911 } 3912 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 3913 return; 3914 } 3915 break; 3916 3917 case 'r': 3918 /* If this operand is the constant zero, write it as register zero. 3919 Any register, zero, or CONST_INT value is OK here. */ 3920 if (GET_CODE (x) == REG) 3921 fputs (reg_names[REGNO (x)], file); 3922 else if (x == CONST0_RTX (GET_MODE (x))) 3923 fputs ("r0", file); 3924 else if (GET_CODE (x) == CONST_INT) 3925 output_addr_const (file, x); 3926 else 3927 output_operand_lossage ("invalid %%r value"); 3928 return; 3929 3930 case '+': 3931 { 3932 const char *which; 3933 3934 /* For conditional branches, returns or calls, substitute 3935 sptk, dptk, dpnt, or spnt for %s. */ 3936 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 3937 if (x) 3938 { 3939 int pred_val = INTVAL (XEXP (x, 0)); 3940 3941 /* Guess top and bottom 10% statically predicted. */ 3942 if (pred_val < REG_BR_PROB_BASE / 50) 3943 which = ".spnt"; 3944 else if (pred_val < REG_BR_PROB_BASE / 2) 3945 which = ".dpnt"; 3946 else if (pred_val < REG_BR_PROB_BASE / 100 * 98) 3947 which = ".dptk"; 3948 else 3949 which = ".sptk"; 3950 } 3951 else if (GET_CODE (current_output_insn) == CALL_INSN) 3952 which = ".sptk"; 3953 else 3954 which = ".dptk"; 3955 3956 fputs (which, file); 3957 return; 3958 } 3959 3960 case ',': 3961 x = current_insn_predicate; 3962 if (x) 3963 { 3964 unsigned int regno = REGNO (XEXP (x, 0)); 3965 if (GET_CODE (x) == EQ) 3966 regno += 1; 3967 fprintf (file, "(%s) ", reg_names [regno]); 3968 } 3969 return; 3970 3971 default: 3972 output_operand_lossage ("ia64_print_operand: unknown code"); 3973 return; 3974 } 3975 3976 switch (GET_CODE (x)) 3977 { 3978 /* This happens for the spill/restore instructions. */ 3979 case POST_INC: 3980 case POST_DEC: 3981 case POST_MODIFY: 3982 x = XEXP (x, 0); 3983 /* ... fall through ... */ 3984 3985 case REG: 3986 fputs (reg_names [REGNO (x)], file); 3987 break; 3988 3989 case MEM: 3990 { 3991 rtx addr = XEXP (x, 0); 3992 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a') 3993 addr = XEXP (addr, 0); 3994 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 3995 break; 3996 } 3997 3998 default: 3999 output_addr_const (file, x); 4000 break; 4001 } 4002 4003 return; 4004} 4005 4006/* Calulate the cost of moving data from a register in class FROM to 4007 one in class TO, using MODE. */ 4008 4009int 4010ia64_register_move_cost (mode, from, to) 4011 enum machine_mode mode; 4012 enum reg_class from, to; 4013{ 4014 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 4015 if (to == ADDL_REGS) 4016 to = GR_REGS; 4017 if (from == ADDL_REGS) 4018 from = GR_REGS; 4019 4020 /* All costs are symmetric, so reduce cases by putting the 4021 lower number class as the destination. */ 4022 if (from < to) 4023 { 4024 enum reg_class tmp = to; 4025 to = from, from = tmp; 4026 } 4027 4028 /* Moving from FR<->GR in TFmode must be more expensive than 2, 4029 so that we get secondary memory reloads. Between FR_REGS, 4030 we have to make this at least as expensive as MEMORY_MOVE_COST 4031 to avoid spectacularly poor register class preferencing. */ 4032 if (mode == TFmode) 4033 { 4034 if (to != GR_REGS || from != GR_REGS) 4035 return MEMORY_MOVE_COST (mode, to, 0); 4036 else 4037 return 3; 4038 } 4039 4040 switch (to) 4041 { 4042 case PR_REGS: 4043 /* Moving between PR registers takes two insns. */ 4044 if (from == PR_REGS) 4045 return 3; 4046 /* Moving between PR and anything but GR is impossible. */ 4047 if (from != GR_REGS) 4048 return MEMORY_MOVE_COST (mode, to, 0); 4049 break; 4050 4051 case BR_REGS: 4052 /* Moving between BR and anything but GR is impossible. */ 4053 if (from != GR_REGS && from != GR_AND_BR_REGS) 4054 return MEMORY_MOVE_COST (mode, to, 0); 4055 break; 4056 4057 case AR_I_REGS: 4058 case AR_M_REGS: 4059 /* Moving between AR and anything but GR is impossible. */ 4060 if (from != GR_REGS) 4061 return MEMORY_MOVE_COST (mode, to, 0); 4062 break; 4063 4064 case GR_REGS: 4065 case FR_REGS: 4066 case GR_AND_FR_REGS: 4067 case GR_AND_BR_REGS: 4068 case ALL_REGS: 4069 break; 4070 4071 default: 4072 abort (); 4073 } 4074 4075 return 2; 4076} 4077 4078/* This function returns the register class required for a secondary 4079 register when copying between one of the registers in CLASS, and X, 4080 using MODE. A return value of NO_REGS means that no secondary register 4081 is required. */ 4082 4083enum reg_class 4084ia64_secondary_reload_class (class, mode, x) 4085 enum reg_class class; 4086 enum machine_mode mode ATTRIBUTE_UNUSED; 4087 rtx x; 4088{ 4089 int regno = -1; 4090 4091 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 4092 regno = true_regnum (x); 4093 4094 switch (class) 4095 { 4096 case BR_REGS: 4097 case AR_M_REGS: 4098 case AR_I_REGS: 4099 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 4100 interaction. We end up with two pseudos with overlapping lifetimes 4101 both of which are equiv to the same constant, and both which need 4102 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 4103 changes depending on the path length, which means the qty_first_reg 4104 check in make_regs_eqv can give different answers at different times. 4105 At some point I'll probably need a reload_indi pattern to handle 4106 this. 4107 4108 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 4109 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 4110 non-general registers for good measure. */ 4111 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 4112 return GR_REGS; 4113 4114 /* This is needed if a pseudo used as a call_operand gets spilled to a 4115 stack slot. */ 4116 if (GET_CODE (x) == MEM) 4117 return GR_REGS; 4118 break; 4119 4120 case FR_REGS: 4121 /* Need to go through general regsters to get to other class regs. */ 4122 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 4123 return GR_REGS; 4124 4125 /* This can happen when a paradoxical subreg is an operand to the 4126 muldi3 pattern. */ 4127 /* ??? This shouldn't be necessary after instruction scheduling is 4128 enabled, because paradoxical subregs are not accepted by 4129 register_operand when INSN_SCHEDULING is defined. Or alternatively, 4130 stop the paradoxical subreg stupidity in the *_operand functions 4131 in recog.c. */ 4132 if (GET_CODE (x) == MEM 4133 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 4134 || GET_MODE (x) == QImode)) 4135 return GR_REGS; 4136 4137 /* This can happen because of the ior/and/etc patterns that accept FP 4138 registers as operands. If the third operand is a constant, then it 4139 needs to be reloaded into a FP register. */ 4140 if (GET_CODE (x) == CONST_INT) 4141 return GR_REGS; 4142 4143 /* This can happen because of register elimination in a muldi3 insn. 4144 E.g. `26107 * (unsigned long)&u'. */ 4145 if (GET_CODE (x) == PLUS) 4146 return GR_REGS; 4147 break; 4148 4149 case PR_REGS: 4150 /* ??? This happens if we cse/gcse a BImode value across a call, 4151 and the function has a nonlocal goto. This is because global 4152 does not allocate call crossing pseudos to hard registers when 4153 current_function_has_nonlocal_goto is true. This is relatively 4154 common for C++ programs that use exceptions. To reproduce, 4155 return NO_REGS and compile libstdc++. */ 4156 if (GET_CODE (x) == MEM) 4157 return GR_REGS; 4158 4159 /* This can happen when we take a BImode subreg of a DImode value, 4160 and that DImode value winds up in some non-GR register. */ 4161 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 4162 return GR_REGS; 4163 break; 4164 4165 case GR_REGS: 4166 /* Since we have no offsettable memory addresses, we need a temporary 4167 to hold the address of the second word. */ 4168 if (mode == TImode) 4169 return GR_REGS; 4170 break; 4171 4172 default: 4173 break; 4174 } 4175 4176 return NO_REGS; 4177} 4178 4179/* Emit text to declare externally defined variables and functions, because 4180 the Intel assembler does not support undefined externals. */ 4181 4182void 4183ia64_asm_output_external (file, decl, name) 4184 FILE *file; 4185 tree decl; 4186 const char *name; 4187{ 4188 int save_referenced; 4189 4190 /* GNU as does not need anything here, but the HP linker does need 4191 something for external functions. */ 4192 4193 if (TARGET_GNU_AS 4194 && (!TARGET_HPUX_LD 4195 || TREE_CODE (decl) != FUNCTION_DECL 4196 || strstr(name, "__builtin_") == name)) 4197 return; 4198 4199 /* ??? The Intel assembler creates a reference that needs to be satisfied by 4200 the linker when we do this, so we need to be careful not to do this for 4201 builtin functions which have no library equivalent. Unfortunately, we 4202 can't tell here whether or not a function will actually be called by 4203 expand_expr, so we pull in library functions even if we may not need 4204 them later. */ 4205 if (! strcmp (name, "__builtin_next_arg") 4206 || ! strcmp (name, "alloca") 4207 || ! strcmp (name, "__builtin_constant_p") 4208 || ! strcmp (name, "__builtin_args_info")) 4209 return; 4210 4211 if (TARGET_HPUX_LD) 4212 ia64_hpux_add_extern_decl (name); 4213 else 4214 { 4215 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and 4216 restore it. */ 4217 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); 4218 if (TREE_CODE (decl) == FUNCTION_DECL) 4219 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 4220 (*targetm.asm_out.globalize_label) (file, name); 4221 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; 4222 } 4223} 4224 4225/* Parse the -mfixed-range= option string. */ 4226 4227static void 4228fix_range (const_str) 4229 const char *const_str; 4230{ 4231 int i, first, last; 4232 char *str, *dash, *comma; 4233 4234 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 4235 REG2 are either register names or register numbers. The effect 4236 of this option is to mark the registers in the range from REG1 to 4237 REG2 as ``fixed'' so they won't be used by the compiler. This is 4238 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 4239 4240 i = strlen (const_str); 4241 str = (char *) alloca (i + 1); 4242 memcpy (str, const_str, i + 1); 4243 4244 while (1) 4245 { 4246 dash = strchr (str, '-'); 4247 if (!dash) 4248 { 4249 warning ("value of -mfixed-range must have form REG1-REG2"); 4250 return; 4251 } 4252 *dash = '\0'; 4253 4254 comma = strchr (dash + 1, ','); 4255 if (comma) 4256 *comma = '\0'; 4257 4258 first = decode_reg_name (str); 4259 if (first < 0) 4260 { 4261 warning ("unknown register name: %s", str); 4262 return; 4263 } 4264 4265 last = decode_reg_name (dash + 1); 4266 if (last < 0) 4267 { 4268 warning ("unknown register name: %s", dash + 1); 4269 return; 4270 } 4271 4272 *dash = '-'; 4273 4274 if (first > last) 4275 { 4276 warning ("%s-%s is an empty range", str, dash + 1); 4277 return; 4278 } 4279 4280 for (i = first; i <= last; ++i) 4281 fixed_regs[i] = call_used_regs[i] = 1; 4282 4283 if (!comma) 4284 break; 4285 4286 *comma = ','; 4287 str = comma + 1; 4288 } 4289} 4290 4291static struct machine_function * 4292ia64_init_machine_status () 4293{ 4294 return ggc_alloc_cleared (sizeof (struct machine_function)); 4295} 4296 4297/* Handle TARGET_OPTIONS switches. */ 4298 4299void 4300ia64_override_options () 4301{ 4302 if (TARGET_AUTO_PIC) 4303 target_flags |= MASK_CONST_GP; 4304 4305 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR) 4306 { 4307 warning ("cannot optimize floating point division for both latency and throughput"); 4308 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR; 4309 } 4310 4311 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR) 4312 { 4313 warning ("cannot optimize integer division for both latency and throughput"); 4314 target_flags &= ~MASK_INLINE_INT_DIV_THR; 4315 } 4316 4317 if (ia64_fixed_range_string) 4318 fix_range (ia64_fixed_range_string); 4319 4320 if (ia64_tls_size_string) 4321 { 4322 char *end; 4323 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10); 4324 if (*end || (tmp != 14 && tmp != 22 && tmp != 64)) 4325 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string); 4326 else 4327 ia64_tls_size = tmp; 4328 } 4329 4330 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; 4331 flag_schedule_insns_after_reload = 0; 4332 4333 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; 4334 4335 init_machine_status = ia64_init_machine_status; 4336 4337 /* Tell the compiler which flavor of TFmode we're using. */ 4338 if (INTEL_EXTENDED_IEEE_FORMAT) 4339 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; 4340} 4341 4342static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); 4343static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); 4344static enum attr_type ia64_safe_type PARAMS((rtx)); 4345 4346static enum attr_itanium_requires_unit0 4347ia64_safe_itanium_requires_unit0 (insn) 4348 rtx insn; 4349{ 4350 if (recog_memoized (insn) >= 0) 4351 return get_attr_itanium_requires_unit0 (insn); 4352 else 4353 return ITANIUM_REQUIRES_UNIT0_NO; 4354} 4355 4356static enum attr_itanium_class 4357ia64_safe_itanium_class (insn) 4358 rtx insn; 4359{ 4360 if (recog_memoized (insn) >= 0) 4361 return get_attr_itanium_class (insn); 4362 else 4363 return ITANIUM_CLASS_UNKNOWN; 4364} 4365 4366static enum attr_type 4367ia64_safe_type (insn) 4368 rtx insn; 4369{ 4370 if (recog_memoized (insn) >= 0) 4371 return get_attr_type (insn); 4372 else 4373 return TYPE_UNKNOWN; 4374} 4375 4376/* The following collection of routines emit instruction group stop bits as 4377 necessary to avoid dependencies. */ 4378 4379/* Need to track some additional registers as far as serialization is 4380 concerned so we can properly handle br.call and br.ret. We could 4381 make these registers visible to gcc, but since these registers are 4382 never explicitly used in gcc generated code, it seems wasteful to 4383 do so (plus it would make the call and return patterns needlessly 4384 complex). */ 4385#define REG_GP (GR_REG (1)) 4386#define REG_RP (BR_REG (0)) 4387#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 4388/* This is used for volatile asms which may require a stop bit immediately 4389 before and after them. */ 4390#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 4391#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 4392#define NUM_REGS (AR_UNAT_BIT_0 + 64) 4393 4394/* For each register, we keep track of how it has been written in the 4395 current instruction group. 4396 4397 If a register is written unconditionally (no qualifying predicate), 4398 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 4399 4400 If a register is written if its qualifying predicate P is true, we 4401 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 4402 may be written again by the complement of P (P^1) and when this happens, 4403 WRITE_COUNT gets set to 2. 4404 4405 The result of this is that whenever an insn attempts to write a register 4406 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 4407 4408 If a predicate register is written by a floating-point insn, we set 4409 WRITTEN_BY_FP to true. 4410 4411 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 4412 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 4413 4414struct reg_write_state 4415{ 4416 unsigned int write_count : 2; 4417 unsigned int first_pred : 16; 4418 unsigned int written_by_fp : 1; 4419 unsigned int written_by_and : 1; 4420 unsigned int written_by_or : 1; 4421}; 4422 4423/* Cumulative info for the current instruction group. */ 4424struct reg_write_state rws_sum[NUM_REGS]; 4425/* Info for the current instruction. This gets copied to rws_sum after a 4426 stop bit is emitted. */ 4427struct reg_write_state rws_insn[NUM_REGS]; 4428 4429/* Indicates whether this is the first instruction after a stop bit, 4430 in which case we don't need another stop bit. Without this, we hit 4431 the abort in ia64_variable_issue when scheduling an alloc. */ 4432static int first_instruction; 4433 4434/* Misc flags needed to compute RAW/WAW dependencies while we are traversing 4435 RTL for one instruction. */ 4436struct reg_flags 4437{ 4438 unsigned int is_write : 1; /* Is register being written? */ 4439 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 4440 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 4441 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 4442 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 4443 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 4444}; 4445 4446static void rws_update PARAMS ((struct reg_write_state *, int, 4447 struct reg_flags, int)); 4448static int rws_access_regno PARAMS ((int, struct reg_flags, int)); 4449static int rws_access_reg PARAMS ((rtx, struct reg_flags, int)); 4450static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *)); 4451static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx)); 4452static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int)); 4453static void init_insn_group_barriers PARAMS ((void)); 4454static int group_barrier_needed_p PARAMS ((rtx)); 4455static int safe_group_barrier_needed_p PARAMS ((rtx)); 4456 4457/* Update *RWS for REGNO, which is being written by the current instruction, 4458 with predicate PRED, and associated register flags in FLAGS. */ 4459 4460static void 4461rws_update (rws, regno, flags, pred) 4462 struct reg_write_state *rws; 4463 int regno; 4464 struct reg_flags flags; 4465 int pred; 4466{ 4467 if (pred) 4468 rws[regno].write_count++; 4469 else 4470 rws[regno].write_count = 2; 4471 rws[regno].written_by_fp |= flags.is_fp; 4472 /* ??? Not tracking and/or across differing predicates. */ 4473 rws[regno].written_by_and = flags.is_and; 4474 rws[regno].written_by_or = flags.is_or; 4475 rws[regno].first_pred = pred; 4476} 4477 4478/* Handle an access to register REGNO of type FLAGS using predicate register 4479 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates 4480 a dependency with an earlier instruction in the same group. */ 4481 4482static int 4483rws_access_regno (regno, flags, pred) 4484 int regno; 4485 struct reg_flags flags; 4486 int pred; 4487{ 4488 int need_barrier = 0; 4489 4490 if (regno >= NUM_REGS) 4491 abort (); 4492 4493 if (! PR_REGNO_P (regno)) 4494 flags.is_and = flags.is_or = 0; 4495 4496 if (flags.is_write) 4497 { 4498 int write_count; 4499 4500 /* One insn writes same reg multiple times? */ 4501 if (rws_insn[regno].write_count > 0) 4502 abort (); 4503 4504 /* Update info for current instruction. */ 4505 rws_update (rws_insn, regno, flags, pred); 4506 write_count = rws_sum[regno].write_count; 4507 4508 switch (write_count) 4509 { 4510 case 0: 4511 /* The register has not been written yet. */ 4512 rws_update (rws_sum, regno, flags, pred); 4513 break; 4514 4515 case 1: 4516 /* The register has been written via a predicate. If this is 4517 not a complementary predicate, then we need a barrier. */ 4518 /* ??? This assumes that P and P+1 are always complementary 4519 predicates for P even. */ 4520 if (flags.is_and && rws_sum[regno].written_by_and) 4521 ; 4522 else if (flags.is_or && rws_sum[regno].written_by_or) 4523 ; 4524 else if ((rws_sum[regno].first_pred ^ 1) != pred) 4525 need_barrier = 1; 4526 rws_update (rws_sum, regno, flags, pred); 4527 break; 4528 4529 case 2: 4530 /* The register has been unconditionally written already. We 4531 need a barrier. */ 4532 if (flags.is_and && rws_sum[regno].written_by_and) 4533 ; 4534 else if (flags.is_or && rws_sum[regno].written_by_or) 4535 ; 4536 else 4537 need_barrier = 1; 4538 rws_sum[regno].written_by_and = flags.is_and; 4539 rws_sum[regno].written_by_or = flags.is_or; 4540 break; 4541 4542 default: 4543 abort (); 4544 } 4545 } 4546 else 4547 { 4548 if (flags.is_branch) 4549 { 4550 /* Branches have several RAW exceptions that allow to avoid 4551 barriers. */ 4552 4553 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 4554 /* RAW dependencies on branch regs are permissible as long 4555 as the writer is a non-branch instruction. Since we 4556 never generate code that uses a branch register written 4557 by a branch instruction, handling this case is 4558 easy. */ 4559 return 0; 4560 4561 if (REGNO_REG_CLASS (regno) == PR_REGS 4562 && ! rws_sum[regno].written_by_fp) 4563 /* The predicates of a branch are available within the 4564 same insn group as long as the predicate was written by 4565 something other than a floating-point instruction. */ 4566 return 0; 4567 } 4568 4569 if (flags.is_and && rws_sum[regno].written_by_and) 4570 return 0; 4571 if (flags.is_or && rws_sum[regno].written_by_or) 4572 return 0; 4573 4574 switch (rws_sum[regno].write_count) 4575 { 4576 case 0: 4577 /* The register has not been written yet. */ 4578 break; 4579 4580 case 1: 4581 /* The register has been written via a predicate. If this is 4582 not a complementary predicate, then we need a barrier. */ 4583 /* ??? This assumes that P and P+1 are always complementary 4584 predicates for P even. */ 4585 if ((rws_sum[regno].first_pred ^ 1) != pred) 4586 need_barrier = 1; 4587 break; 4588 4589 case 2: 4590 /* The register has been unconditionally written already. We 4591 need a barrier. */ 4592 need_barrier = 1; 4593 break; 4594 4595 default: 4596 abort (); 4597 } 4598 } 4599 4600 return need_barrier; 4601} 4602 4603static int 4604rws_access_reg (reg, flags, pred) 4605 rtx reg; 4606 struct reg_flags flags; 4607 int pred; 4608{ 4609 int regno = REGNO (reg); 4610 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 4611 4612 if (n == 1) 4613 return rws_access_regno (regno, flags, pred); 4614 else 4615 { 4616 int need_barrier = 0; 4617 while (--n >= 0) 4618 need_barrier |= rws_access_regno (regno + n, flags, pred); 4619 return need_barrier; 4620 } 4621} 4622 4623/* Examine X, which is a SET rtx, and update the flags, the predicate, and 4624 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 4625 4626static void 4627update_set_flags (x, pflags, ppred, pcond) 4628 rtx x; 4629 struct reg_flags *pflags; 4630 int *ppred; 4631 rtx *pcond; 4632{ 4633 rtx src = SET_SRC (x); 4634 4635 *pcond = 0; 4636 4637 switch (GET_CODE (src)) 4638 { 4639 case CALL: 4640 return; 4641 4642 case IF_THEN_ELSE: 4643 if (SET_DEST (x) == pc_rtx) 4644 /* X is a conditional branch. */ 4645 return; 4646 else 4647 { 4648 int is_complemented = 0; 4649 4650 /* X is a conditional move. */ 4651 rtx cond = XEXP (src, 0); 4652 if (GET_CODE (cond) == EQ) 4653 is_complemented = 1; 4654 cond = XEXP (cond, 0); 4655 if (GET_CODE (cond) != REG 4656 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4657 abort (); 4658 *pcond = cond; 4659 if (XEXP (src, 1) == SET_DEST (x) 4660 || XEXP (src, 2) == SET_DEST (x)) 4661 { 4662 /* X is a conditional move that conditionally writes the 4663 destination. */ 4664 4665 /* We need another complement in this case. */ 4666 if (XEXP (src, 1) == SET_DEST (x)) 4667 is_complemented = ! is_complemented; 4668 4669 *ppred = REGNO (cond); 4670 if (is_complemented) 4671 ++*ppred; 4672 } 4673 4674 /* ??? If this is a conditional write to the dest, then this 4675 instruction does not actually read one source. This probably 4676 doesn't matter, because that source is also the dest. */ 4677 /* ??? Multiple writes to predicate registers are allowed 4678 if they are all AND type compares, or if they are all OR 4679 type compares. We do not generate such instructions 4680 currently. */ 4681 } 4682 /* ... fall through ... */ 4683 4684 default: 4685 if (GET_RTX_CLASS (GET_CODE (src)) == '<' 4686 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) 4687 /* Set pflags->is_fp to 1 so that we know we're dealing 4688 with a floating point comparison when processing the 4689 destination of the SET. */ 4690 pflags->is_fp = 1; 4691 4692 /* Discover if this is a parallel comparison. We only handle 4693 and.orcm and or.andcm at present, since we must retain a 4694 strict inverse on the predicate pair. */ 4695 else if (GET_CODE (src) == AND) 4696 pflags->is_and = 1; 4697 else if (GET_CODE (src) == IOR) 4698 pflags->is_or = 1; 4699 4700 break; 4701 } 4702} 4703 4704/* Subroutine of rtx_needs_barrier; this function determines whether the 4705 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 4706 are as in rtx_needs_barrier. COND is an rtx that holds the condition 4707 for this insn. */ 4708 4709static int 4710set_src_needs_barrier (x, flags, pred, cond) 4711 rtx x; 4712 struct reg_flags flags; 4713 int pred; 4714 rtx cond; 4715{ 4716 int need_barrier = 0; 4717 rtx dst; 4718 rtx src = SET_SRC (x); 4719 4720 if (GET_CODE (src) == CALL) 4721 /* We don't need to worry about the result registers that 4722 get written by subroutine call. */ 4723 return rtx_needs_barrier (src, flags, pred); 4724 else if (SET_DEST (x) == pc_rtx) 4725 { 4726 /* X is a conditional branch. */ 4727 /* ??? This seems redundant, as the caller sets this bit for 4728 all JUMP_INSNs. */ 4729 flags.is_branch = 1; 4730 return rtx_needs_barrier (src, flags, pred); 4731 } 4732 4733 need_barrier = rtx_needs_barrier (src, flags, pred); 4734 4735 /* This instruction unconditionally uses a predicate register. */ 4736 if (cond) 4737 need_barrier |= rws_access_reg (cond, flags, 0); 4738 4739 dst = SET_DEST (x); 4740 if (GET_CODE (dst) == ZERO_EXTRACT) 4741 { 4742 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 4743 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 4744 dst = XEXP (dst, 0); 4745 } 4746 return need_barrier; 4747} 4748 4749/* Handle an access to rtx X of type FLAGS using predicate register PRED. 4750 Return 1 is this access creates a dependency with an earlier instruction 4751 in the same group. */ 4752 4753static int 4754rtx_needs_barrier (x, flags, pred) 4755 rtx x; 4756 struct reg_flags flags; 4757 int pred; 4758{ 4759 int i, j; 4760 int is_complemented = 0; 4761 int need_barrier = 0; 4762 const char *format_ptr; 4763 struct reg_flags new_flags; 4764 rtx cond = 0; 4765 4766 if (! x) 4767 return 0; 4768 4769 new_flags = flags; 4770 4771 switch (GET_CODE (x)) 4772 { 4773 case SET: 4774 update_set_flags (x, &new_flags, &pred, &cond); 4775 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond); 4776 if (GET_CODE (SET_SRC (x)) != CALL) 4777 { 4778 new_flags.is_write = 1; 4779 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 4780 } 4781 break; 4782 4783 case CALL: 4784 new_flags.is_write = 0; 4785 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 4786 4787 /* Avoid multiple register writes, in case this is a pattern with 4788 multiple CALL rtx. This avoids an abort in rws_access_reg. */ 4789 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count) 4790 { 4791 new_flags.is_write = 1; 4792 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 4793 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 4794 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 4795 } 4796 break; 4797 4798 case COND_EXEC: 4799 /* X is a predicated instruction. */ 4800 4801 cond = COND_EXEC_TEST (x); 4802 if (pred) 4803 abort (); 4804 need_barrier = rtx_needs_barrier (cond, flags, 0); 4805 4806 if (GET_CODE (cond) == EQ) 4807 is_complemented = 1; 4808 cond = XEXP (cond, 0); 4809 if (GET_CODE (cond) != REG 4810 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4811 abort (); 4812 pred = REGNO (cond); 4813 if (is_complemented) 4814 ++pred; 4815 4816 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 4817 return need_barrier; 4818 4819 case CLOBBER: 4820 case USE: 4821 /* Clobber & use are for earlier compiler-phases only. */ 4822 break; 4823 4824 case ASM_OPERANDS: 4825 case ASM_INPUT: 4826 /* We always emit stop bits for traditional asms. We emit stop bits 4827 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 4828 if (GET_CODE (x) != ASM_OPERANDS 4829 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 4830 { 4831 /* Avoid writing the register multiple times if we have multiple 4832 asm outputs. This avoids an abort in rws_access_reg. */ 4833 if (! rws_insn[REG_VOLATILE].write_count) 4834 { 4835 new_flags.is_write = 1; 4836 rws_access_regno (REG_VOLATILE, new_flags, pred); 4837 } 4838 return 1; 4839 } 4840 4841 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 4842 We can not just fall through here since then we would be confused 4843 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 4844 traditional asms unlike their normal usage. */ 4845 4846 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 4847 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 4848 need_barrier = 1; 4849 break; 4850 4851 case PARALLEL: 4852 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4853 { 4854 rtx pat = XVECEXP (x, 0, i); 4855 if (GET_CODE (pat) == SET) 4856 { 4857 update_set_flags (pat, &new_flags, &pred, &cond); 4858 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond); 4859 } 4860 else if (GET_CODE (pat) == USE 4861 || GET_CODE (pat) == CALL 4862 || GET_CODE (pat) == ASM_OPERANDS) 4863 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4864 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN) 4865 abort (); 4866 } 4867 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4868 { 4869 rtx pat = XVECEXP (x, 0, i); 4870 if (GET_CODE (pat) == SET) 4871 { 4872 if (GET_CODE (SET_SRC (pat)) != CALL) 4873 { 4874 new_flags.is_write = 1; 4875 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 4876 pred); 4877 } 4878 } 4879 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 4880 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4881 } 4882 break; 4883 4884 case SUBREG: 4885 x = SUBREG_REG (x); 4886 /* FALLTHRU */ 4887 case REG: 4888 if (REGNO (x) == AR_UNAT_REGNUM) 4889 { 4890 for (i = 0; i < 64; ++i) 4891 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 4892 } 4893 else 4894 need_barrier = rws_access_reg (x, flags, pred); 4895 break; 4896 4897 case MEM: 4898 /* Find the regs used in memory address computation. */ 4899 new_flags.is_write = 0; 4900 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4901 break; 4902 4903 case CONST_INT: case CONST_DOUBLE: 4904 case SYMBOL_REF: case LABEL_REF: case CONST: 4905 break; 4906 4907 /* Operators with side-effects. */ 4908 case POST_INC: case POST_DEC: 4909 if (GET_CODE (XEXP (x, 0)) != REG) 4910 abort (); 4911 4912 new_flags.is_write = 0; 4913 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4914 new_flags.is_write = 1; 4915 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4916 break; 4917 4918 case POST_MODIFY: 4919 if (GET_CODE (XEXP (x, 0)) != REG) 4920 abort (); 4921 4922 new_flags.is_write = 0; 4923 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4924 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4925 new_flags.is_write = 1; 4926 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4927 break; 4928 4929 /* Handle common unary and binary ops for efficiency. */ 4930 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 4931 case MOD: case UDIV: case UMOD: case AND: case IOR: 4932 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 4933 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 4934 case NE: case EQ: case GE: case GT: case LE: 4935 case LT: case GEU: case GTU: case LEU: case LTU: 4936 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4937 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4938 break; 4939 4940 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 4941 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 4942 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 4943 case SQRT: case FFS: 4944 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 4945 break; 4946 4947 case UNSPEC: 4948 switch (XINT (x, 1)) 4949 { 4950 case UNSPEC_LTOFF_DTPMOD: 4951 case UNSPEC_LTOFF_DTPREL: 4952 case UNSPEC_DTPREL: 4953 case UNSPEC_LTOFF_TPREL: 4954 case UNSPEC_TPREL: 4955 case UNSPEC_PRED_REL_MUTEX: 4956 case UNSPEC_PIC_CALL: 4957 case UNSPEC_MF: 4958 case UNSPEC_FETCHADD_ACQ: 4959 case UNSPEC_BSP_VALUE: 4960 case UNSPEC_FLUSHRS: 4961 case UNSPEC_BUNDLE_SELECTOR: 4962 break; 4963 4964 case UNSPEC_GR_SPILL: 4965 case UNSPEC_GR_RESTORE: 4966 { 4967 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 4968 HOST_WIDE_INT bit = (offset >> 3) & 63; 4969 4970 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4971 new_flags.is_write = (XINT (x, 1) == 1); 4972 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 4973 new_flags, pred); 4974 break; 4975 } 4976 4977 case UNSPEC_FR_SPILL: 4978 case UNSPEC_FR_RESTORE: 4979 case UNSPEC_POPCNT: 4980 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4981 break; 4982 4983 case UNSPEC_ADDP4: 4984 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4985 break; 4986 4987 case UNSPEC_FR_RECIP_APPROX: 4988 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 4989 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 4990 break; 4991 4992 case UNSPEC_CMPXCHG_ACQ: 4993 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 4994 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 4995 break; 4996 4997 default: 4998 abort (); 4999 } 5000 break; 5001 5002 case UNSPEC_VOLATILE: 5003 switch (XINT (x, 1)) 5004 { 5005 case UNSPECV_ALLOC: 5006 /* Alloc must always be the first instruction of a group. 5007 We force this by always returning true. */ 5008 /* ??? We might get better scheduling if we explicitly check for 5009 input/local/output register dependencies, and modify the 5010 scheduler so that alloc is always reordered to the start of 5011 the current group. We could then eliminate all of the 5012 first_instruction code. */ 5013 rws_access_regno (AR_PFS_REGNUM, flags, pred); 5014 5015 new_flags.is_write = 1; 5016 rws_access_regno (REG_AR_CFM, new_flags, pred); 5017 return 1; 5018 5019 case UNSPECV_SET_BSP: 5020 need_barrier = 1; 5021 break; 5022 5023 case UNSPECV_BLOCKAGE: 5024 case UNSPECV_INSN_GROUP_BARRIER: 5025 case UNSPECV_BREAK: 5026 case UNSPECV_PSAC_ALL: 5027 case UNSPECV_PSAC_NORMAL: 5028 return 0; 5029 5030 default: 5031 abort (); 5032 } 5033 break; 5034 5035 case RETURN: 5036 new_flags.is_write = 0; 5037 need_barrier = rws_access_regno (REG_RP, flags, pred); 5038 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 5039 5040 new_flags.is_write = 1; 5041 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 5042 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 5043 break; 5044 5045 default: 5046 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 5047 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 5048 switch (format_ptr[i]) 5049 { 5050 case '0': /* unused field */ 5051 case 'i': /* integer */ 5052 case 'n': /* note */ 5053 case 'w': /* wide integer */ 5054 case 's': /* pointer to string */ 5055 case 'S': /* optional pointer to string */ 5056 break; 5057 5058 case 'e': 5059 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 5060 need_barrier = 1; 5061 break; 5062 5063 case 'E': 5064 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 5065 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 5066 need_barrier = 1; 5067 break; 5068 5069 default: 5070 abort (); 5071 } 5072 break; 5073 } 5074 return need_barrier; 5075} 5076 5077/* Clear out the state for group_barrier_needed_p at the start of a 5078 sequence of insns. */ 5079 5080static void 5081init_insn_group_barriers () 5082{ 5083 memset (rws_sum, 0, sizeof (rws_sum)); 5084 first_instruction = 1; 5085} 5086 5087/* Given the current state, recorded by previous calls to this function, 5088 determine whether a group barrier (a stop bit) is necessary before INSN. 5089 Return nonzero if so. */ 5090 5091static int 5092group_barrier_needed_p (insn) 5093 rtx insn; 5094{ 5095 rtx pat; 5096 int need_barrier = 0; 5097 struct reg_flags flags; 5098 5099 memset (&flags, 0, sizeof (flags)); 5100 switch (GET_CODE (insn)) 5101 { 5102 case NOTE: 5103 break; 5104 5105 case BARRIER: 5106 /* A barrier doesn't imply an instruction group boundary. */ 5107 break; 5108 5109 case CODE_LABEL: 5110 memset (rws_insn, 0, sizeof (rws_insn)); 5111 return 1; 5112 5113 case CALL_INSN: 5114 flags.is_branch = 1; 5115 flags.is_sibcall = SIBLING_CALL_P (insn); 5116 memset (rws_insn, 0, sizeof (rws_insn)); 5117 5118 /* Don't bundle a call following another call. */ 5119 if ((pat = prev_active_insn (insn)) 5120 && GET_CODE (pat) == CALL_INSN) 5121 { 5122 need_barrier = 1; 5123 break; 5124 } 5125 5126 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 5127 break; 5128 5129 case JUMP_INSN: 5130 flags.is_branch = 1; 5131 5132 /* Don't bundle a jump following a call. */ 5133 if ((pat = prev_active_insn (insn)) 5134 && GET_CODE (pat) == CALL_INSN) 5135 { 5136 need_barrier = 1; 5137 break; 5138 } 5139 /* FALLTHRU */ 5140 5141 case INSN: 5142 if (GET_CODE (PATTERN (insn)) == USE 5143 || GET_CODE (PATTERN (insn)) == CLOBBER) 5144 /* Don't care about USE and CLOBBER "insns"---those are used to 5145 indicate to the optimizer that it shouldn't get rid of 5146 certain operations. */ 5147 break; 5148 5149 pat = PATTERN (insn); 5150 5151 /* Ug. Hack hacks hacked elsewhere. */ 5152 switch (recog_memoized (insn)) 5153 { 5154 /* We play dependency tricks with the epilogue in order 5155 to get proper schedules. Undo this for dv analysis. */ 5156 case CODE_FOR_epilogue_deallocate_stack: 5157 case CODE_FOR_prologue_allocate_stack: 5158 pat = XVECEXP (pat, 0, 0); 5159 break; 5160 5161 /* The pattern we use for br.cloop confuses the code above. 5162 The second element of the vector is representative. */ 5163 case CODE_FOR_doloop_end_internal: 5164 pat = XVECEXP (pat, 0, 1); 5165 break; 5166 5167 /* Doesn't generate code. */ 5168 case CODE_FOR_pred_rel_mutex: 5169 case CODE_FOR_prologue_use: 5170 return 0; 5171 5172 default: 5173 break; 5174 } 5175 5176 memset (rws_insn, 0, sizeof (rws_insn)); 5177 need_barrier = rtx_needs_barrier (pat, flags, 0); 5178 5179 /* Check to see if the previous instruction was a volatile 5180 asm. */ 5181 if (! need_barrier) 5182 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 5183 break; 5184 5185 default: 5186 abort (); 5187 } 5188 5189 if (first_instruction) 5190 { 5191 need_barrier = 0; 5192 first_instruction = 0; 5193 } 5194 5195 return need_barrier; 5196} 5197 5198/* Like group_barrier_needed_p, but do not clobber the current state. */ 5199 5200static int 5201safe_group_barrier_needed_p (insn) 5202 rtx insn; 5203{ 5204 struct reg_write_state rws_saved[NUM_REGS]; 5205 int saved_first_instruction; 5206 int t; 5207 5208 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); 5209 saved_first_instruction = first_instruction; 5210 5211 t = group_barrier_needed_p (insn); 5212 5213 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); 5214 first_instruction = saved_first_instruction; 5215 5216 return t; 5217} 5218 5219/* INSNS is an chain of instructions. Scan the chain, and insert stop bits 5220 as necessary to eliminate dependendencies. This function assumes that 5221 a final instruction scheduling pass has been run which has already 5222 inserted most of the necessary stop bits. This function only inserts 5223 new ones at basic block boundaries, since these are invisible to the 5224 scheduler. */ 5225 5226static void 5227emit_insn_group_barriers (dump, insns) 5228 FILE *dump; 5229 rtx insns; 5230{ 5231 rtx insn; 5232 rtx last_label = 0; 5233 int insns_since_last_label = 0; 5234 5235 init_insn_group_barriers (); 5236 5237 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5238 { 5239 if (GET_CODE (insn) == CODE_LABEL) 5240 { 5241 if (insns_since_last_label) 5242 last_label = insn; 5243 insns_since_last_label = 0; 5244 } 5245 else if (GET_CODE (insn) == NOTE 5246 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 5247 { 5248 if (insns_since_last_label) 5249 last_label = insn; 5250 insns_since_last_label = 0; 5251 } 5252 else if (GET_CODE (insn) == INSN 5253 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 5254 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 5255 { 5256 init_insn_group_barriers (); 5257 last_label = 0; 5258 } 5259 else if (INSN_P (insn)) 5260 { 5261 insns_since_last_label = 1; 5262 5263 if (group_barrier_needed_p (insn)) 5264 { 5265 if (last_label) 5266 { 5267 if (dump) 5268 fprintf (dump, "Emitting stop before label %d\n", 5269 INSN_UID (last_label)); 5270 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 5271 insn = last_label; 5272 5273 init_insn_group_barriers (); 5274 last_label = 0; 5275 } 5276 } 5277 } 5278 } 5279} 5280 5281/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 5282 This function has to emit all necessary group barriers. */ 5283 5284static void 5285emit_all_insn_group_barriers (dump, insns) 5286 FILE *dump ATTRIBUTE_UNUSED; 5287 rtx insns; 5288{ 5289 rtx insn; 5290 5291 init_insn_group_barriers (); 5292 5293 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5294 { 5295 if (GET_CODE (insn) == BARRIER) 5296 { 5297 rtx last = prev_active_insn (insn); 5298 5299 if (! last) 5300 continue; 5301 if (GET_CODE (last) == JUMP_INSN 5302 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) 5303 last = prev_active_insn (last); 5304 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 5305 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 5306 5307 init_insn_group_barriers (); 5308 } 5309 else if (INSN_P (insn)) 5310 { 5311 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 5312 init_insn_group_barriers (); 5313 else if (group_barrier_needed_p (insn)) 5314 { 5315 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5316 init_insn_group_barriers (); 5317 group_barrier_needed_p (insn); 5318 } 5319 } 5320 } 5321} 5322 5323static int errata_find_address_regs PARAMS ((rtx *, void *)); 5324static void errata_emit_nops PARAMS ((rtx)); 5325static void fixup_errata PARAMS ((void)); 5326 5327/* This structure is used to track some details about the previous insns 5328 groups so we can determine if it may be necessary to insert NOPs to 5329 workaround hardware errata. */ 5330static struct group 5331{ 5332 HARD_REG_SET p_reg_set; 5333 HARD_REG_SET gr_reg_conditionally_set; 5334} last_group[2]; 5335 5336/* Index into the last_group array. */ 5337static int group_idx; 5338 5339/* Called through for_each_rtx; determines if a hard register that was 5340 conditionally set in the previous group is used as an address register. 5341 It ensures that for_each_rtx returns 1 in that case. */ 5342static int 5343errata_find_address_regs (xp, data) 5344 rtx *xp; 5345 void *data ATTRIBUTE_UNUSED; 5346{ 5347 rtx x = *xp; 5348 if (GET_CODE (x) != MEM) 5349 return 0; 5350 x = XEXP (x, 0); 5351 if (GET_CODE (x) == POST_MODIFY) 5352 x = XEXP (x, 0); 5353 if (GET_CODE (x) == REG) 5354 { 5355 struct group *prev_group = last_group + (group_idx ^ 1); 5356 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set, 5357 REGNO (x))) 5358 return 1; 5359 return -1; 5360 } 5361 return 0; 5362} 5363 5364/* Called for each insn; this function keeps track of the state in 5365 last_group and emits additional NOPs if necessary to work around 5366 an Itanium A/B step erratum. */ 5367static void 5368errata_emit_nops (insn) 5369 rtx insn; 5370{ 5371 struct group *this_group = last_group + group_idx; 5372 struct group *prev_group = last_group + (group_idx ^ 1); 5373 rtx pat = PATTERN (insn); 5374 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0; 5375 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat; 5376 enum attr_type type; 5377 rtx set = real_pat; 5378 5379 if (GET_CODE (real_pat) == USE 5380 || GET_CODE (real_pat) == CLOBBER 5381 || GET_CODE (real_pat) == ASM_INPUT 5382 || GET_CODE (real_pat) == ADDR_VEC 5383 || GET_CODE (real_pat) == ADDR_DIFF_VEC 5384 || asm_noperands (PATTERN (insn)) >= 0) 5385 return; 5386 5387 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate 5388 parts of it. */ 5389 5390 if (GET_CODE (set) == PARALLEL) 5391 { 5392 int i; 5393 set = XVECEXP (real_pat, 0, 0); 5394 for (i = 1; i < XVECLEN (real_pat, 0); i++) 5395 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE 5396 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER) 5397 { 5398 set = 0; 5399 break; 5400 } 5401 } 5402 5403 if (set && GET_CODE (set) != SET) 5404 set = 0; 5405 5406 type = get_attr_type (insn); 5407 5408 if (type == TYPE_F 5409 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set)))) 5410 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set))); 5411 5412 if ((type == TYPE_M || type == TYPE_A) && cond && set 5413 && REG_P (SET_DEST (set)) 5414 && GET_CODE (SET_SRC (set)) != PLUS 5415 && GET_CODE (SET_SRC (set)) != MINUS 5416 && (GET_CODE (SET_SRC (set)) != ASHIFT 5417 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode)) 5418 && (GET_CODE (SET_SRC (set)) != MEM 5419 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY) 5420 && GENERAL_REGNO_P (REGNO (SET_DEST (set)))) 5421 { 5422 if (GET_RTX_CLASS (GET_CODE (cond)) != '<' 5423 || ! REG_P (XEXP (cond, 0))) 5424 abort (); 5425 5426 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0)))) 5427 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set))); 5428 } 5429 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL)) 5430 { 5431 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5432 emit_insn_before (gen_nop (), insn); 5433 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5434 group_idx = 0; 5435 memset (last_group, 0, sizeof last_group); 5436 } 5437} 5438 5439/* Emit extra nops if they are required to work around hardware errata. */ 5440 5441static void 5442fixup_errata () 5443{ 5444 rtx insn; 5445 5446 if (! TARGET_B_STEP) 5447 return; 5448 5449 group_idx = 0; 5450 memset (last_group, 0, sizeof last_group); 5451 5452 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 5453 { 5454 if (!INSN_P (insn)) 5455 continue; 5456 5457 if (ia64_safe_type (insn) == TYPE_S) 5458 { 5459 group_idx ^= 1; 5460 memset (last_group + group_idx, 0, sizeof last_group[group_idx]); 5461 } 5462 else 5463 errata_emit_nops (insn); 5464 } 5465} 5466 5467/* Instruction scheduling support. */ 5468/* Describe one bundle. */ 5469 5470struct bundle 5471{ 5472 /* Zero if there's no possibility of a stop in this bundle other than 5473 at the end, otherwise the position of the optional stop bit. */ 5474 int possible_stop; 5475 /* The types of the three slots. */ 5476 enum attr_type t[3]; 5477 /* The pseudo op to be emitted into the assembler output. */ 5478 const char *name; 5479}; 5480 5481#define NR_BUNDLES 10 5482 5483/* A list of all available bundles. */ 5484 5485static const struct bundle bundle[NR_BUNDLES] = 5486{ 5487 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, 5488 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, 5489 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, 5490 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, 5491#if NR_BUNDLES == 10 5492 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, 5493 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, 5494#endif 5495 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, 5496 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, 5497 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, 5498 /* .mfi needs to occur earlier than .mlx, so that we only generate it if 5499 it matches an L type insn. Otherwise we'll try to generate L type 5500 nops. */ 5501 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } 5502}; 5503 5504/* Describe a packet of instructions. Packets consist of two bundles that 5505 are visible to the hardware in one scheduling window. */ 5506 5507struct ia64_packet 5508{ 5509 const struct bundle *t1, *t2; 5510 /* Precomputed value of the first split issue in this packet if a cycle 5511 starts at its beginning. */ 5512 int first_split; 5513 /* For convenience, the insn types are replicated here so we don't have 5514 to go through T1 and T2 all the time. */ 5515 enum attr_type t[6]; 5516}; 5517 5518/* An array containing all possible packets. */ 5519#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) 5520static struct ia64_packet packets[NR_PACKETS]; 5521 5522/* Map attr_type to a string with the name. */ 5523 5524static const char *const type_names[] = 5525{ 5526 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" 5527}; 5528 5529/* Nonzero if we should insert stop bits into the schedule. */ 5530int ia64_final_schedule = 0; 5531 5532static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); 5533static rtx ia64_single_set PARAMS ((rtx)); 5534static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); 5535static void ia64_emit_insn_before PARAMS ((rtx, rtx)); 5536static void maybe_rotate PARAMS ((FILE *)); 5537static void finish_last_head PARAMS ((FILE *, int)); 5538static void rotate_one_bundle PARAMS ((FILE *)); 5539static void rotate_two_bundles PARAMS ((FILE *)); 5540static void nop_cycles_until PARAMS ((int, FILE *)); 5541static void cycle_end_fill_slots PARAMS ((FILE *)); 5542static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); 5543static int get_split PARAMS ((const struct ia64_packet *, int)); 5544static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, 5545 const struct ia64_packet *, int)); 5546static void find_best_packet PARAMS ((int *, const struct ia64_packet **, 5547 rtx *, enum attr_type *, int)); 5548static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); 5549static void dump_current_packet PARAMS ((FILE *)); 5550static void schedule_stop PARAMS ((FILE *)); 5551static rtx gen_nop_type PARAMS ((enum attr_type)); 5552static void ia64_emit_nops PARAMS ((void)); 5553 5554/* Map a bundle number to its pseudo-op. */ 5555 5556const char * 5557get_bundle_name (b) 5558 int b; 5559{ 5560 return bundle[b].name; 5561} 5562 5563/* Compute the slot which will cause a split issue in packet P if the 5564 current cycle begins at slot BEGIN. */ 5565 5566static int 5567itanium_split_issue (p, begin) 5568 const struct ia64_packet *p; 5569 int begin; 5570{ 5571 int type_count[TYPE_S]; 5572 int i; 5573 int split = 6; 5574 5575 if (begin < 3) 5576 { 5577 /* Always split before and after MMF. */ 5578 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) 5579 return 3; 5580 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) 5581 return 3; 5582 /* Always split after MBB and BBB. */ 5583 if (p->t[1] == TYPE_B) 5584 return 3; 5585 /* Split after first bundle in MIB BBB combination. */ 5586 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) 5587 return 3; 5588 } 5589 5590 memset (type_count, 0, sizeof type_count); 5591 for (i = begin; i < split; i++) 5592 { 5593 enum attr_type t0 = p->t[i]; 5594 /* An MLX bundle reserves the same units as an MFI bundle. */ 5595 enum attr_type t = (t0 == TYPE_L ? TYPE_F 5596 : t0 == TYPE_X ? TYPE_I 5597 : t0); 5598 5599 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and 5600 2 integer per cycle. */ 5601 int max = (t == TYPE_B ? 3 : 2); 5602 if (type_count[t] == max) 5603 return i; 5604 5605 type_count[t]++; 5606 } 5607 return split; 5608} 5609 5610/* Return the maximum number of instructions a cpu can issue. */ 5611 5612static int 5613ia64_issue_rate () 5614{ 5615 return 6; 5616} 5617 5618/* Helper function - like single_set, but look inside COND_EXEC. */ 5619 5620static rtx 5621ia64_single_set (insn) 5622 rtx insn; 5623{ 5624 rtx x = PATTERN (insn), ret; 5625 if (GET_CODE (x) == COND_EXEC) 5626 x = COND_EXEC_CODE (x); 5627 if (GET_CODE (x) == SET) 5628 return x; 5629 5630 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 5631 Although they are not classical single set, the second set is there just 5632 to protect it from moving past FP-relative stack accesses. */ 5633 switch (recog_memoized (insn)) 5634 { 5635 case CODE_FOR_prologue_allocate_stack: 5636 case CODE_FOR_epilogue_deallocate_stack: 5637 ret = XVECEXP (x, 0, 0); 5638 break; 5639 5640 default: 5641 ret = single_set_2 (insn, x); 5642 break; 5643 } 5644 5645 return ret; 5646} 5647 5648/* Adjust the cost of a scheduling dependency. Return the new cost of 5649 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 5650 5651static int 5652ia64_adjust_cost (insn, link, dep_insn, cost) 5653 rtx insn, link, dep_insn; 5654 int cost; 5655{ 5656 enum attr_type dep_type; 5657 enum attr_itanium_class dep_class; 5658 enum attr_itanium_class insn_class; 5659 rtx dep_set, set, src, addr; 5660 5661 if (GET_CODE (PATTERN (insn)) == CLOBBER 5662 || GET_CODE (PATTERN (insn)) == USE 5663 || GET_CODE (PATTERN (dep_insn)) == CLOBBER 5664 || GET_CODE (PATTERN (dep_insn)) == USE 5665 /* @@@ Not accurate for indirect calls. */ 5666 || GET_CODE (insn) == CALL_INSN 5667 || ia64_safe_type (insn) == TYPE_S) 5668 return 0; 5669 5670 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT 5671 || REG_NOTE_KIND (link) == REG_DEP_ANTI) 5672 return 0; 5673 5674 dep_type = ia64_safe_type (dep_insn); 5675 dep_class = ia64_safe_itanium_class (dep_insn); 5676 insn_class = ia64_safe_itanium_class (insn); 5677 5678 /* Compares that feed a conditional branch can execute in the same 5679 cycle. */ 5680 dep_set = ia64_single_set (dep_insn); 5681 set = ia64_single_set (insn); 5682 5683 if (dep_type != TYPE_F 5684 && dep_set 5685 && GET_CODE (SET_DEST (dep_set)) == REG 5686 && PR_REG (REGNO (SET_DEST (dep_set))) 5687 && GET_CODE (insn) == JUMP_INSN) 5688 return 0; 5689 5690 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) 5691 { 5692 /* ??? Can't find any information in the documenation about whether 5693 a sequence 5694 st [rx] = ra 5695 ld rb = [ry] 5696 splits issue. Assume it doesn't. */ 5697 return 0; 5698 } 5699 5700 src = set ? SET_SRC (set) : 0; 5701 addr = 0; 5702 if (set) 5703 { 5704 if (GET_CODE (SET_DEST (set)) == MEM) 5705 addr = XEXP (SET_DEST (set), 0); 5706 else if (GET_CODE (SET_DEST (set)) == SUBREG 5707 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) 5708 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); 5709 else 5710 { 5711 addr = src; 5712 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) 5713 addr = XVECEXP (addr, 0, 0); 5714 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) 5715 addr = XEXP (addr, 0); 5716 5717 /* Note that LO_SUM is used for GOT loads. */ 5718 if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM) 5719 addr = XEXP (addr, 0); 5720 else 5721 addr = 0; 5722 } 5723 } 5724 5725 if (addr && GET_CODE (addr) == POST_MODIFY) 5726 addr = XEXP (addr, 0); 5727 5728 set = ia64_single_set (dep_insn); 5729 5730 if ((dep_class == ITANIUM_CLASS_IALU 5731 || dep_class == ITANIUM_CLASS_ILOG 5732 || dep_class == ITANIUM_CLASS_LD) 5733 && (insn_class == ITANIUM_CLASS_LD 5734 || insn_class == ITANIUM_CLASS_ST)) 5735 { 5736 if (! addr || ! set) 5737 abort (); 5738 /* This isn't completely correct - an IALU that feeds an address has 5739 a latency of 1 cycle if it's issued in an M slot, but 2 cycles 5740 otherwise. Unfortunately there's no good way to describe this. */ 5741 if (reg_overlap_mentioned_p (SET_DEST (set), addr)) 5742 return cost + 1; 5743 } 5744 5745 if ((dep_class == ITANIUM_CLASS_IALU 5746 || dep_class == ITANIUM_CLASS_ILOG 5747 || dep_class == ITANIUM_CLASS_LD) 5748 && (insn_class == ITANIUM_CLASS_MMMUL 5749 || insn_class == ITANIUM_CLASS_MMSHF 5750 || insn_class == ITANIUM_CLASS_MMSHFI)) 5751 return 3; 5752 5753 if (dep_class == ITANIUM_CLASS_FMAC 5754 && (insn_class == ITANIUM_CLASS_FMISC 5755 || insn_class == ITANIUM_CLASS_FCVTFX 5756 || insn_class == ITANIUM_CLASS_XMPY)) 5757 return 7; 5758 5759 if ((dep_class == ITANIUM_CLASS_FMAC 5760 || dep_class == ITANIUM_CLASS_FMISC 5761 || dep_class == ITANIUM_CLASS_FCVTFX 5762 || dep_class == ITANIUM_CLASS_XMPY) 5763 && insn_class == ITANIUM_CLASS_STF) 5764 return 8; 5765 5766 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, 5767 but HP engineers say any non-MM operation. */ 5768 if ((dep_class == ITANIUM_CLASS_MMMUL 5769 || dep_class == ITANIUM_CLASS_MMSHF 5770 || dep_class == ITANIUM_CLASS_MMSHFI) 5771 && insn_class != ITANIUM_CLASS_MMMUL 5772 && insn_class != ITANIUM_CLASS_MMSHF 5773 && insn_class != ITANIUM_CLASS_MMSHFI) 5774 return 4; 5775 5776 return cost; 5777} 5778 5779/* Describe the current state of the Itanium pipeline. */ 5780static struct 5781{ 5782 /* The first slot that is used in the current cycle. */ 5783 int first_slot; 5784 /* The next slot to fill. */ 5785 int cur; 5786 /* The packet we have selected for the current issue window. */ 5787 const struct ia64_packet *packet; 5788 /* The position of the split issue that occurs due to issue width 5789 limitations (6 if there's no split issue). */ 5790 int split; 5791 /* Record data about the insns scheduled so far in the same issue 5792 window. The elements up to but not including FIRST_SLOT belong 5793 to the previous cycle, the ones starting with FIRST_SLOT belong 5794 to the current cycle. */ 5795 enum attr_type types[6]; 5796 rtx insns[6]; 5797 int stopbit[6]; 5798 /* Nonzero if we decided to schedule a stop bit. */ 5799 int last_was_stop; 5800} sched_data; 5801 5802/* Temporary arrays; they have enough elements to hold all insns that 5803 can be ready at the same time while scheduling of the current block. 5804 SCHED_READY can hold ready insns, SCHED_TYPES their types. */ 5805static rtx *sched_ready; 5806static enum attr_type *sched_types; 5807 5808/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT 5809 of packet P. */ 5810 5811static int 5812insn_matches_slot (p, itype, slot, insn) 5813 const struct ia64_packet *p; 5814 enum attr_type itype; 5815 int slot; 5816 rtx insn; 5817{ 5818 enum attr_itanium_requires_unit0 u0; 5819 enum attr_type stype = p->t[slot]; 5820 5821 if (insn) 5822 { 5823 u0 = ia64_safe_itanium_requires_unit0 (insn); 5824 if (u0 == ITANIUM_REQUIRES_UNIT0_YES) 5825 { 5826 int i; 5827 for (i = sched_data.first_slot; i < slot; i++) 5828 if (p->t[i] == stype 5829 || (stype == TYPE_F && p->t[i] == TYPE_L) 5830 || (stype == TYPE_I && p->t[i] == TYPE_X)) 5831 return 0; 5832 } 5833 if (GET_CODE (insn) == CALL_INSN) 5834 { 5835 /* Reject calls in multiway branch packets. We want to limit 5836 the number of multiway branches we generate (since the branch 5837 predictor is limited), and this seems to work fairly well. 5838 (If we didn't do this, we'd have to add another test here to 5839 force calls into the third slot of the bundle.) */ 5840 if (slot < 3) 5841 { 5842 if (p->t[1] == TYPE_B) 5843 return 0; 5844 } 5845 else 5846 { 5847 if (p->t[4] == TYPE_B) 5848 return 0; 5849 } 5850 } 5851 } 5852 5853 if (itype == stype) 5854 return 1; 5855 if (itype == TYPE_A) 5856 return stype == TYPE_M || stype == TYPE_I; 5857 return 0; 5858} 5859 5860/* Like emit_insn_before, but skip cycle_display notes. 5861 ??? When cycle display notes are implemented, update this. */ 5862 5863static void 5864ia64_emit_insn_before (insn, before) 5865 rtx insn, before; 5866{ 5867 emit_insn_before (insn, before); 5868} 5869 5870/* When rotating a bundle out of the issue window, insert a bundle selector 5871 insn in front of it. DUMP is the scheduling dump file or NULL. START 5872 is either 0 or 3, depending on whether we want to emit a bundle selector 5873 for the first bundle or the second bundle in the current issue window. 5874 5875 The selector insns are emitted this late because the selected packet can 5876 be changed until parts of it get rotated out. */ 5877 5878static void 5879finish_last_head (dump, start) 5880 FILE *dump; 5881 int start; 5882{ 5883 const struct ia64_packet *p = sched_data.packet; 5884 const struct bundle *b = start == 0 ? p->t1 : p->t2; 5885 int bundle_type = b - bundle; 5886 rtx insn; 5887 int i; 5888 5889 if (! ia64_final_schedule) 5890 return; 5891 5892 for (i = start; sched_data.insns[i] == 0; i++) 5893 if (i == start + 3) 5894 abort (); 5895 insn = sched_data.insns[i]; 5896 5897 if (dump) 5898 fprintf (dump, "// Emitting template before %d: %s\n", 5899 INSN_UID (insn), b->name); 5900 5901 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); 5902} 5903 5904/* We can't schedule more insns this cycle. Fix up the scheduling state 5905 and advance FIRST_SLOT and CUR. 5906 We have to distribute the insns that are currently found between 5907 FIRST_SLOT and CUR into the slots of the packet we have selected. So 5908 far, they are stored successively in the fields starting at FIRST_SLOT; 5909 now they must be moved to the correct slots. 5910 DUMP is the current scheduling dump file, or NULL. */ 5911 5912static void 5913cycle_end_fill_slots (dump) 5914 FILE *dump; 5915{ 5916 const struct ia64_packet *packet = sched_data.packet; 5917 int slot, i; 5918 enum attr_type tmp_types[6]; 5919 rtx tmp_insns[6]; 5920 5921 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); 5922 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); 5923 5924 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) 5925 { 5926 enum attr_type t = tmp_types[i]; 5927 if (t != ia64_safe_type (tmp_insns[i])) 5928 abort (); 5929 while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) 5930 { 5931 if (slot > sched_data.split) 5932 abort (); 5933 if (dump) 5934 fprintf (dump, "// Packet needs %s, have %s\n", 5935 type_names[packet->t[slot]], type_names[t]); 5936 sched_data.types[slot] = packet->t[slot]; 5937 sched_data.insns[slot] = 0; 5938 sched_data.stopbit[slot] = 0; 5939 5940 /* ??? TYPE_L instructions always fill up two slots, but we don't 5941 support TYPE_L nops. */ 5942 if (packet->t[slot] == TYPE_L) 5943 abort (); 5944 5945 slot++; 5946 } 5947 5948 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the 5949 actual slot type later. */ 5950 sched_data.types[slot] = packet->t[slot]; 5951 sched_data.insns[slot] = tmp_insns[i]; 5952 sched_data.stopbit[slot] = 0; 5953 slot++; 5954 5955 /* TYPE_L instructions always fill up two slots. */ 5956 if (t == TYPE_L) 5957 { 5958 sched_data.types[slot] = packet->t[slot]; 5959 sched_data.insns[slot] = 0; 5960 sched_data.stopbit[slot] = 0; 5961 slot++; 5962 } 5963 } 5964 5965 /* This isn't right - there's no need to pad out until the forced split; 5966 the CPU will automatically split if an insn isn't ready. */ 5967#if 0 5968 while (slot < sched_data.split) 5969 { 5970 sched_data.types[slot] = packet->t[slot]; 5971 sched_data.insns[slot] = 0; 5972 sched_data.stopbit[slot] = 0; 5973 slot++; 5974 } 5975#endif 5976 5977 sched_data.first_slot = sched_data.cur = slot; 5978} 5979 5980/* Bundle rotations, as described in the Itanium optimization manual. 5981 We can rotate either one or both bundles out of the issue window. 5982 DUMP is the current scheduling dump file, or NULL. */ 5983 5984static void 5985rotate_one_bundle (dump) 5986 FILE *dump; 5987{ 5988 if (dump) 5989 fprintf (dump, "// Rotating one bundle.\n"); 5990 5991 finish_last_head (dump, 0); 5992 if (sched_data.cur > 3) 5993 { 5994 sched_data.cur -= 3; 5995 sched_data.first_slot -= 3; 5996 memmove (sched_data.types, 5997 sched_data.types + 3, 5998 sched_data.cur * sizeof *sched_data.types); 5999 memmove (sched_data.stopbit, 6000 sched_data.stopbit + 3, 6001 sched_data.cur * sizeof *sched_data.stopbit); 6002 memmove (sched_data.insns, 6003 sched_data.insns + 3, 6004 sched_data.cur * sizeof *sched_data.insns); 6005 sched_data.packet 6006 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES]; 6007 } 6008 else 6009 { 6010 sched_data.cur = 0; 6011 sched_data.first_slot = 0; 6012 } 6013} 6014 6015static void 6016rotate_two_bundles (dump) 6017 FILE *dump; 6018{ 6019 if (dump) 6020 fprintf (dump, "// Rotating two bundles.\n"); 6021 6022 if (sched_data.cur == 0) 6023 return; 6024 6025 finish_last_head (dump, 0); 6026 if (sched_data.cur > 3) 6027 finish_last_head (dump, 3); 6028 sched_data.cur = 0; 6029 sched_data.first_slot = 0; 6030} 6031 6032/* We're beginning a new block. Initialize data structures as necessary. */ 6033 6034static void 6035ia64_sched_init (dump, sched_verbose, max_ready) 6036 FILE *dump ATTRIBUTE_UNUSED; 6037 int sched_verbose ATTRIBUTE_UNUSED; 6038 int max_ready; 6039{ 6040 static int initialized = 0; 6041 6042 if (! initialized) 6043 { 6044 int b1, b2, i; 6045 6046 initialized = 1; 6047 6048 for (i = b1 = 0; b1 < NR_BUNDLES; b1++) 6049 { 6050 const struct bundle *t1 = bundle + b1; 6051 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) 6052 { 6053 const struct bundle *t2 = bundle + b2; 6054 6055 packets[i].t1 = t1; 6056 packets[i].t2 = t2; 6057 } 6058 } 6059 for (i = 0; i < NR_PACKETS; i++) 6060 { 6061 int j; 6062 for (j = 0; j < 3; j++) 6063 packets[i].t[j] = packets[i].t1->t[j]; 6064 for (j = 0; j < 3; j++) 6065 packets[i].t[j + 3] = packets[i].t2->t[j]; 6066 packets[i].first_split = itanium_split_issue (packets + i, 0); 6067 } 6068 6069 } 6070 6071 init_insn_group_barriers (); 6072 6073 memset (&sched_data, 0, sizeof sched_data); 6074 sched_types = (enum attr_type *) xmalloc (max_ready 6075 * sizeof (enum attr_type)); 6076 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); 6077} 6078 6079/* See if the packet P can match the insns we have already scheduled. Return 6080 nonzero if so. In *PSLOT, we store the first slot that is available for 6081 more instructions if we choose this packet. 6082 SPLIT holds the last slot we can use, there's a split issue after it so 6083 scheduling beyond it would cause us to use more than one cycle. */ 6084 6085static int 6086packet_matches_p (p, split, pslot) 6087 const struct ia64_packet *p; 6088 int split; 6089 int *pslot; 6090{ 6091 int filled = sched_data.cur; 6092 int first = sched_data.first_slot; 6093 int i, slot; 6094 6095 /* First, check if the first of the two bundles must be a specific one (due 6096 to stop bits). */ 6097 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) 6098 return 0; 6099 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) 6100 return 0; 6101 6102 for (i = 0; i < first; i++) 6103 if (! insn_matches_slot (p, sched_data.types[i], i, 6104 sched_data.insns[i])) 6105 return 0; 6106 for (i = slot = first; i < filled; i++) 6107 { 6108 while (slot < split) 6109 { 6110 if (insn_matches_slot (p, sched_data.types[i], slot, 6111 sched_data.insns[i])) 6112 break; 6113 slot++; 6114 } 6115 if (slot == split) 6116 return 0; 6117 slot++; 6118 } 6119 6120 if (pslot) 6121 *pslot = slot; 6122 return 1; 6123} 6124 6125/* A frontend for itanium_split_issue. For a packet P and a slot 6126 number FIRST that describes the start of the current clock cycle, 6127 return the slot number of the first split issue. This function 6128 uses the cached number found in P if possible. */ 6129 6130static int 6131get_split (p, first) 6132 const struct ia64_packet *p; 6133 int first; 6134{ 6135 if (first == 0) 6136 return p->first_split; 6137 return itanium_split_issue (p, first); 6138} 6139 6140/* Given N_READY insns in the array READY, whose types are found in the 6141 corresponding array TYPES, return the insn that is best suited to be 6142 scheduled in slot SLOT of packet P. */ 6143 6144static int 6145find_best_insn (ready, types, n_ready, p, slot) 6146 rtx *ready; 6147 enum attr_type *types; 6148 int n_ready; 6149 const struct ia64_packet *p; 6150 int slot; 6151{ 6152 int best = -1; 6153 int best_pri = 0; 6154 while (n_ready-- > 0) 6155 { 6156 rtx insn = ready[n_ready]; 6157 if (! insn) 6158 continue; 6159 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) 6160 break; 6161 /* If we have equally good insns, one of which has a stricter 6162 slot requirement, prefer the one with the stricter requirement. */ 6163 if (best >= 0 && types[n_ready] == TYPE_A) 6164 continue; 6165 if (insn_matches_slot (p, types[n_ready], slot, insn)) 6166 { 6167 best = n_ready; 6168 best_pri = INSN_PRIORITY (ready[best]); 6169 6170 /* If there's no way we could get a stricter requirement, stop 6171 looking now. */ 6172 if (types[n_ready] != TYPE_A 6173 && ia64_safe_itanium_requires_unit0 (ready[n_ready])) 6174 break; 6175 break; 6176 } 6177 } 6178 return best; 6179} 6180 6181/* Select the best packet to use given the current scheduler state and the 6182 current ready list. 6183 READY is an array holding N_READY ready insns; TYPES is a corresponding 6184 array that holds their types. Store the best packet in *PPACKET and the 6185 number of insns that can be scheduled in the current cycle in *PBEST. */ 6186 6187static void 6188find_best_packet (pbest, ppacket, ready, types, n_ready) 6189 int *pbest; 6190 const struct ia64_packet **ppacket; 6191 rtx *ready; 6192 enum attr_type *types; 6193 int n_ready; 6194{ 6195 int first = sched_data.first_slot; 6196 int best = 0; 6197 int lowest_end = 6; 6198 const struct ia64_packet *best_packet = NULL; 6199 int i; 6200 6201 for (i = 0; i < NR_PACKETS; i++) 6202 { 6203 const struct ia64_packet *p = packets + i; 6204 int slot; 6205 int split = get_split (p, first); 6206 int win = 0; 6207 int first_slot, last_slot; 6208 int b_nops = 0; 6209 6210 if (! packet_matches_p (p, split, &first_slot)) 6211 continue; 6212 6213 memcpy (sched_ready, ready, n_ready * sizeof (rtx)); 6214 6215 win = 0; 6216 last_slot = 6; 6217 for (slot = first_slot; slot < split; slot++) 6218 { 6219 int insn_nr; 6220 6221 /* Disallow a degenerate case where the first bundle doesn't 6222 contain anything but NOPs! */ 6223 if (first_slot == 0 && win == 0 && slot == 3) 6224 { 6225 win = -1; 6226 break; 6227 } 6228 6229 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); 6230 if (insn_nr >= 0) 6231 { 6232 sched_ready[insn_nr] = 0; 6233 last_slot = slot; 6234 win++; 6235 } 6236 else if (p->t[slot] == TYPE_B) 6237 b_nops++; 6238 } 6239 /* We must disallow MBB/BBB packets if any of their B slots would be 6240 filled with nops. */ 6241 if (last_slot < 3) 6242 { 6243 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) 6244 win = -1; 6245 } 6246 else 6247 { 6248 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) 6249 win = -1; 6250 } 6251 6252 if (win > best 6253 || (win == best && last_slot < lowest_end)) 6254 { 6255 best = win; 6256 lowest_end = last_slot; 6257 best_packet = p; 6258 } 6259 } 6260 *pbest = best; 6261 *ppacket = best_packet; 6262} 6263 6264/* Reorder the ready list so that the insns that can be issued in this cycle 6265 are found in the correct order at the end of the list. 6266 DUMP is the scheduling dump file, or NULL. READY points to the start, 6267 E_READY to the end of the ready list. MAY_FAIL determines what should be 6268 done if no insns can be scheduled in this cycle: if it is zero, we abort, 6269 otherwise we return 0. 6270 Return 1 if any insns can be scheduled in this cycle. */ 6271 6272static int 6273itanium_reorder (dump, ready, e_ready, may_fail) 6274 FILE *dump; 6275 rtx *ready; 6276 rtx *e_ready; 6277 int may_fail; 6278{ 6279 const struct ia64_packet *best_packet; 6280 int n_ready = e_ready - ready; 6281 int first = sched_data.first_slot; 6282 int i, best, best_split, filled; 6283 6284 for (i = 0; i < n_ready; i++) 6285 sched_types[i] = ia64_safe_type (ready[i]); 6286 6287 find_best_packet (&best, &best_packet, ready, sched_types, n_ready); 6288 6289 if (best == 0) 6290 { 6291 if (may_fail) 6292 return 0; 6293 abort (); 6294 } 6295 6296 if (dump) 6297 { 6298 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", 6299 best_packet->t1->name, 6300 best_packet->t2 ? best_packet->t2->name : NULL, best); 6301 } 6302 6303 best_split = itanium_split_issue (best_packet, first); 6304 packet_matches_p (best_packet, best_split, &filled); 6305 6306 for (i = filled; i < best_split; i++) 6307 { 6308 int insn_nr; 6309 6310 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); 6311 if (insn_nr >= 0) 6312 { 6313 rtx insn = ready[insn_nr]; 6314 memmove (ready + insn_nr, ready + insn_nr + 1, 6315 (n_ready - insn_nr - 1) * sizeof (rtx)); 6316 memmove (sched_types + insn_nr, sched_types + insn_nr + 1, 6317 (n_ready - insn_nr - 1) * sizeof (enum attr_type)); 6318 ready[--n_ready] = insn; 6319 } 6320 } 6321 6322 sched_data.packet = best_packet; 6323 sched_data.split = best_split; 6324 return 1; 6325} 6326 6327/* Dump information about the current scheduling state to file DUMP. */ 6328 6329static void 6330dump_current_packet (dump) 6331 FILE *dump; 6332{ 6333 int i; 6334 fprintf (dump, "// %d slots filled:", sched_data.cur); 6335 for (i = 0; i < sched_data.first_slot; i++) 6336 { 6337 rtx insn = sched_data.insns[i]; 6338 fprintf (dump, " %s", type_names[sched_data.types[i]]); 6339 if (insn) 6340 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); 6341 if (sched_data.stopbit[i]) 6342 fprintf (dump, " ;;"); 6343 } 6344 fprintf (dump, " :::"); 6345 for (i = sched_data.first_slot; i < sched_data.cur; i++) 6346 { 6347 rtx insn = sched_data.insns[i]; 6348 enum attr_type t = ia64_safe_type (insn); 6349 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); 6350 } 6351 fprintf (dump, "\n"); 6352} 6353 6354/* Schedule a stop bit. DUMP is the current scheduling dump file, or 6355 NULL. */ 6356 6357static void 6358schedule_stop (dump) 6359 FILE *dump; 6360{ 6361 const struct ia64_packet *best = sched_data.packet; 6362 int i; 6363 int best_stop = 6; 6364 6365 if (dump) 6366 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); 6367 6368 if (sched_data.cur == 0) 6369 { 6370 if (dump) 6371 fprintf (dump, "// At start of bundle, so nothing to do.\n"); 6372 6373 rotate_two_bundles (NULL); 6374 return; 6375 } 6376 6377 for (i = -1; i < NR_PACKETS; i++) 6378 { 6379 /* This is a slight hack to give the current packet the first chance. 6380 This is done to avoid e.g. switching from MIB to MBB bundles. */ 6381 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); 6382 int split = get_split (p, sched_data.first_slot); 6383 const struct bundle *compare; 6384 int next, stoppos; 6385 6386 if (! packet_matches_p (p, split, &next)) 6387 continue; 6388 6389 compare = next > 3 ? p->t2 : p->t1; 6390 6391 stoppos = 3; 6392 if (compare->possible_stop) 6393 stoppos = compare->possible_stop; 6394 if (next > 3) 6395 stoppos += 3; 6396 6397 if (stoppos < next || stoppos >= best_stop) 6398 { 6399 if (compare->possible_stop == 0) 6400 continue; 6401 stoppos = (next > 3 ? 6 : 3); 6402 } 6403 if (stoppos < next || stoppos >= best_stop) 6404 continue; 6405 6406 if (dump) 6407 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", 6408 best->t1->name, best->t2->name, p->t1->name, p->t2->name, 6409 stoppos); 6410 6411 best_stop = stoppos; 6412 best = p; 6413 } 6414 6415 sched_data.packet = best; 6416 cycle_end_fill_slots (dump); 6417 while (sched_data.cur < best_stop) 6418 { 6419 sched_data.types[sched_data.cur] = best->t[sched_data.cur]; 6420 sched_data.insns[sched_data.cur] = 0; 6421 sched_data.stopbit[sched_data.cur] = 0; 6422 sched_data.cur++; 6423 } 6424 sched_data.stopbit[sched_data.cur - 1] = 1; 6425 sched_data.first_slot = best_stop; 6426 6427 if (dump) 6428 dump_current_packet (dump); 6429} 6430 6431/* If necessary, perform one or two rotations on the scheduling state. 6432 This should only be called if we are starting a new cycle. */ 6433 6434static void 6435maybe_rotate (dump) 6436 FILE *dump; 6437{ 6438 cycle_end_fill_slots (dump); 6439 if (sched_data.cur == 6) 6440 rotate_two_bundles (dump); 6441 else if (sched_data.cur >= 3) 6442 rotate_one_bundle (dump); 6443 sched_data.first_slot = sched_data.cur; 6444} 6445 6446/* The clock cycle when ia64_sched_reorder was last called. */ 6447static int prev_cycle; 6448 6449/* The first insn scheduled in the previous cycle. This is the saved 6450 value of sched_data.first_slot. */ 6451static int prev_first; 6452 6453/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to 6454 pad out the delay between MM (shifts, etc.) and integer operations. */ 6455 6456static void 6457nop_cycles_until (clock_var, dump) 6458 int clock_var; 6459 FILE *dump; 6460{ 6461 int prev_clock = prev_cycle; 6462 int cycles_left = clock_var - prev_clock; 6463 bool did_stop = false; 6464 6465 /* Finish the previous cycle; pad it out with NOPs. */ 6466 if (sched_data.cur == 3) 6467 { 6468 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6469 did_stop = true; 6470 maybe_rotate (dump); 6471 } 6472 else if (sched_data.cur > 0) 6473 { 6474 int need_stop = 0; 6475 int split = itanium_split_issue (sched_data.packet, prev_first); 6476 6477 if (sched_data.cur < 3 && split > 3) 6478 { 6479 split = 3; 6480 need_stop = 1; 6481 } 6482 6483 if (split > sched_data.cur) 6484 { 6485 int i; 6486 for (i = sched_data.cur; i < split; i++) 6487 { 6488 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6489 sched_data.types[i] = sched_data.packet->t[i]; 6490 sched_data.insns[i] = t; 6491 sched_data.stopbit[i] = 0; 6492 } 6493 sched_data.cur = split; 6494 } 6495 6496 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 6497 && cycles_left > 1) 6498 { 6499 int i; 6500 for (i = sched_data.cur; i < 6; i++) 6501 { 6502 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6503 sched_data.types[i] = sched_data.packet->t[i]; 6504 sched_data.insns[i] = t; 6505 sched_data.stopbit[i] = 0; 6506 } 6507 sched_data.cur = 6; 6508 cycles_left--; 6509 need_stop = 1; 6510 } 6511 6512 if (need_stop || sched_data.cur == 6) 6513 { 6514 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6515 did_stop = true; 6516 } 6517 maybe_rotate (dump); 6518 } 6519 6520 cycles_left--; 6521 while (cycles_left > 0) 6522 { 6523 sched_emit_insn (gen_bundle_selector (GEN_INT (0))); 6524 sched_emit_insn (gen_nop_type (TYPE_M)); 6525 sched_emit_insn (gen_nop_type (TYPE_I)); 6526 if (cycles_left > 1) 6527 { 6528 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2))); 6529 cycles_left--; 6530 } 6531 sched_emit_insn (gen_nop_type (TYPE_I)); 6532 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6533 did_stop = true; 6534 cycles_left--; 6535 } 6536 6537 if (did_stop) 6538 init_insn_group_barriers (); 6539} 6540 6541/* We are about to being issuing insns for this clock cycle. 6542 Override the default sort algorithm to better slot instructions. */ 6543 6544static int 6545ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, 6546 reorder_type, clock_var) 6547 FILE *dump ATTRIBUTE_UNUSED; 6548 int sched_verbose ATTRIBUTE_UNUSED; 6549 rtx *ready; 6550 int *pn_ready; 6551 int reorder_type, clock_var; 6552{ 6553 int n_asms; 6554 int n_ready = *pn_ready; 6555 rtx *e_ready = ready + n_ready; 6556 rtx *insnp; 6557 6558 if (sched_verbose) 6559 { 6560 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); 6561 dump_current_packet (dump); 6562 } 6563 6564 /* Work around the pipeline flush that will occurr if the results of 6565 an MM instruction are accessed before the result is ready. Intel 6566 documentation says this only happens with IALU, ISHF, ILOG, LD, 6567 and ST consumers, but experimental evidence shows that *any* non-MM 6568 type instruction will incurr the flush. */ 6569 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) 6570 { 6571 for (insnp = ready; insnp < e_ready; insnp++) 6572 { 6573 rtx insn = *insnp, link; 6574 enum attr_itanium_class t = ia64_safe_itanium_class (insn); 6575 6576 if (t == ITANIUM_CLASS_MMMUL 6577 || t == ITANIUM_CLASS_MMSHF 6578 || t == ITANIUM_CLASS_MMSHFI) 6579 continue; 6580 6581 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) 6582 if (REG_NOTE_KIND (link) == 0) 6583 { 6584 rtx other = XEXP (link, 0); 6585 enum attr_itanium_class t0 = ia64_safe_itanium_class (other); 6586 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL) 6587 { 6588 nop_cycles_until (clock_var, sched_verbose ? dump : NULL); 6589 goto out; 6590 } 6591 } 6592 } 6593 } 6594 out: 6595 6596 prev_first = sched_data.first_slot; 6597 prev_cycle = clock_var; 6598 6599 if (reorder_type == 0) 6600 maybe_rotate (sched_verbose ? dump : NULL); 6601 6602 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 6603 n_asms = 0; 6604 for (insnp = ready; insnp < e_ready; insnp++) 6605 if (insnp < e_ready) 6606 { 6607 rtx insn = *insnp; 6608 enum attr_type t = ia64_safe_type (insn); 6609 if (t == TYPE_UNKNOWN) 6610 { 6611 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6612 || asm_noperands (PATTERN (insn)) >= 0) 6613 { 6614 rtx lowest = ready[n_asms]; 6615 ready[n_asms] = insn; 6616 *insnp = lowest; 6617 n_asms++; 6618 } 6619 else 6620 { 6621 rtx highest = ready[n_ready - 1]; 6622 ready[n_ready - 1] = insn; 6623 *insnp = highest; 6624 if (ia64_final_schedule && group_barrier_needed_p (insn)) 6625 { 6626 schedule_stop (sched_verbose ? dump : NULL); 6627 sched_data.last_was_stop = 1; 6628 maybe_rotate (sched_verbose ? dump : NULL); 6629 } 6630 6631 return 1; 6632 } 6633 } 6634 } 6635 if (n_asms < n_ready) 6636 { 6637 /* Some normal insns to process. Skip the asms. */ 6638 ready += n_asms; 6639 n_ready -= n_asms; 6640 } 6641 else if (n_ready > 0) 6642 { 6643 /* Only asm insns left. */ 6644 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) 6645 { 6646 schedule_stop (sched_verbose ? dump : NULL); 6647 sched_data.last_was_stop = 1; 6648 maybe_rotate (sched_verbose ? dump : NULL); 6649 } 6650 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6651 return 1; 6652 } 6653 6654 if (ia64_final_schedule) 6655 { 6656 int nr_need_stop = 0; 6657 6658 for (insnp = ready; insnp < e_ready; insnp++) 6659 if (safe_group_barrier_needed_p (*insnp)) 6660 nr_need_stop++; 6661 6662 /* Schedule a stop bit if 6663 - all insns require a stop bit, or 6664 - we are starting a new cycle and _any_ insns require a stop bit. 6665 The reason for the latter is that if our schedule is accurate, then 6666 the additional stop won't decrease performance at this point (since 6667 there's a split issue at this point anyway), but it gives us more 6668 freedom when scheduling the currently ready insns. */ 6669 if ((reorder_type == 0 && nr_need_stop) 6670 || (reorder_type == 1 && n_ready == nr_need_stop)) 6671 { 6672 schedule_stop (sched_verbose ? dump : NULL); 6673 sched_data.last_was_stop = 1; 6674 maybe_rotate (sched_verbose ? dump : NULL); 6675 if (reorder_type == 1) 6676 return 0; 6677 } 6678 else 6679 { 6680 int deleted = 0; 6681 insnp = e_ready; 6682 /* Move down everything that needs a stop bit, preserving relative 6683 order. */ 6684 while (insnp-- > ready + deleted) 6685 while (insnp >= ready + deleted) 6686 { 6687 rtx insn = *insnp; 6688 if (! safe_group_barrier_needed_p (insn)) 6689 break; 6690 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 6691 *ready = insn; 6692 deleted++; 6693 } 6694 n_ready -= deleted; 6695 ready += deleted; 6696 if (deleted != nr_need_stop) 6697 abort (); 6698 } 6699 } 6700 6701 return itanium_reorder (sched_verbose ? dump : NULL, 6702 ready, e_ready, reorder_type == 1); 6703} 6704 6705static int 6706ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) 6707 FILE *dump; 6708 int sched_verbose; 6709 rtx *ready; 6710 int *pn_ready; 6711 int clock_var; 6712{ 6713 return ia64_internal_sched_reorder (dump, sched_verbose, ready, 6714 pn_ready, 0, clock_var); 6715} 6716 6717/* Like ia64_sched_reorder, but called after issuing each insn. 6718 Override the default sort algorithm to better slot instructions. */ 6719 6720static int 6721ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) 6722 FILE *dump ATTRIBUTE_UNUSED; 6723 int sched_verbose ATTRIBUTE_UNUSED; 6724 rtx *ready; 6725 int *pn_ready; 6726 int clock_var; 6727{ 6728 if (sched_data.last_was_stop) 6729 return 0; 6730 6731 /* Detect one special case and try to optimize it. 6732 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, 6733 then we can get better code by transforming this to 1.MFB;; 2.MIx. */ 6734 if (sched_data.first_slot == 1 6735 && sched_data.stopbit[0] 6736 && ((sched_data.cur == 4 6737 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) 6738 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) 6739 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) 6740 || (sched_data.cur == 3 6741 && (sched_data.types[1] == TYPE_M 6742 || sched_data.types[1] == TYPE_A) 6743 && (sched_data.types[2] != TYPE_M 6744 && sched_data.types[2] != TYPE_I 6745 && sched_data.types[2] != TYPE_A)))) 6746 6747 { 6748 int i, best; 6749 rtx stop = sched_data.insns[1]; 6750 6751 /* Search backward for the stop bit that must be there. */ 6752 while (1) 6753 { 6754 int insn_code; 6755 6756 stop = PREV_INSN (stop); 6757 if (GET_CODE (stop) != INSN) 6758 abort (); 6759 insn_code = recog_memoized (stop); 6760 6761 /* Ignore .pred.rel.mutex. 6762 6763 ??? Update this to ignore cycle display notes too 6764 ??? once those are implemented */ 6765 if (insn_code == CODE_FOR_pred_rel_mutex 6766 || insn_code == CODE_FOR_prologue_use) 6767 continue; 6768 6769 if (insn_code == CODE_FOR_insn_group_barrier) 6770 break; 6771 abort (); 6772 } 6773 6774 /* Adjust the stop bit's slot selector. */ 6775 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) 6776 abort (); 6777 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); 6778 6779 sched_data.stopbit[0] = 0; 6780 sched_data.stopbit[2] = 1; 6781 6782 sched_data.types[5] = sched_data.types[3]; 6783 sched_data.types[4] = sched_data.types[2]; 6784 sched_data.types[3] = sched_data.types[1]; 6785 sched_data.insns[5] = sched_data.insns[3]; 6786 sched_data.insns[4] = sched_data.insns[2]; 6787 sched_data.insns[3] = sched_data.insns[1]; 6788 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; 6789 sched_data.cur += 2; 6790 sched_data.first_slot = 3; 6791 for (i = 0; i < NR_PACKETS; i++) 6792 { 6793 const struct ia64_packet *p = packets + i; 6794 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) 6795 { 6796 sched_data.packet = p; 6797 break; 6798 } 6799 } 6800 rotate_one_bundle (sched_verbose ? dump : NULL); 6801 6802 best = 6; 6803 for (i = 0; i < NR_PACKETS; i++) 6804 { 6805 const struct ia64_packet *p = packets + i; 6806 int split = get_split (p, sched_data.first_slot); 6807 int next; 6808 6809 /* Disallow multiway branches here. */ 6810 if (p->t[1] == TYPE_B) 6811 continue; 6812 6813 if (packet_matches_p (p, split, &next) && next < best) 6814 { 6815 best = next; 6816 sched_data.packet = p; 6817 sched_data.split = split; 6818 } 6819 } 6820 if (best == 6) 6821 abort (); 6822 } 6823 6824 if (*pn_ready > 0) 6825 { 6826 int more = ia64_internal_sched_reorder (dump, sched_verbose, 6827 ready, pn_ready, 1, 6828 clock_var); 6829 if (more) 6830 return more; 6831 /* Did we schedule a stop? If so, finish this cycle. */ 6832 if (sched_data.cur == sched_data.first_slot) 6833 return 0; 6834 } 6835 6836 if (sched_verbose) 6837 fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); 6838 6839 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6840 if (sched_verbose) 6841 dump_current_packet (dump); 6842 return 0; 6843} 6844 6845/* We are about to issue INSN. Return the number of insns left on the 6846 ready queue that can be issued this cycle. */ 6847 6848static int 6849ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) 6850 FILE *dump; 6851 int sched_verbose; 6852 rtx insn; 6853 int can_issue_more ATTRIBUTE_UNUSED; 6854{ 6855 enum attr_type t = ia64_safe_type (insn); 6856 6857 if (sched_data.last_was_stop) 6858 { 6859 int t = sched_data.first_slot; 6860 if (t == 0) 6861 t = 3; 6862 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); 6863 init_insn_group_barriers (); 6864 sched_data.last_was_stop = 0; 6865 } 6866 6867 if (t == TYPE_UNKNOWN) 6868 { 6869 if (sched_verbose) 6870 fprintf (dump, "// Ignoring type %s\n", type_names[t]); 6871 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6872 || asm_noperands (PATTERN (insn)) >= 0) 6873 { 6874 /* This must be some kind of asm. Clear the scheduling state. */ 6875 rotate_two_bundles (sched_verbose ? dump : NULL); 6876 if (ia64_final_schedule) 6877 group_barrier_needed_p (insn); 6878 } 6879 return 1; 6880 } 6881 6882 /* This is _not_ just a sanity check. group_barrier_needed_p will update 6883 important state info. Don't delete this test. */ 6884 if (ia64_final_schedule 6885 && group_barrier_needed_p (insn)) 6886 abort (); 6887 6888 sched_data.stopbit[sched_data.cur] = 0; 6889 sched_data.insns[sched_data.cur] = insn; 6890 sched_data.types[sched_data.cur] = t; 6891 6892 sched_data.cur++; 6893 if (sched_verbose) 6894 fprintf (dump, "// Scheduling insn %d of type %s\n", 6895 INSN_UID (insn), type_names[t]); 6896 6897 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) 6898 { 6899 schedule_stop (sched_verbose ? dump : NULL); 6900 sched_data.last_was_stop = 1; 6901 } 6902 6903 return 1; 6904} 6905 6906/* Free data allocated by ia64_sched_init. */ 6907 6908static void 6909ia64_sched_finish (dump, sched_verbose) 6910 FILE *dump; 6911 int sched_verbose; 6912{ 6913 if (sched_verbose) 6914 fprintf (dump, "// Finishing schedule.\n"); 6915 rotate_two_bundles (NULL); 6916 free (sched_types); 6917 free (sched_ready); 6918} 6919 6920/* Emit pseudo-ops for the assembler to describe predicate relations. 6921 At present this assumes that we only consider predicate pairs to 6922 be mutex, and that the assembler can deduce proper values from 6923 straight-line code. */ 6924 6925static void 6926emit_predicate_relation_info () 6927{ 6928 basic_block bb; 6929 6930 FOR_EACH_BB_REVERSE (bb) 6931 { 6932 int r; 6933 rtx head = bb->head; 6934 6935 /* We only need such notes at code labels. */ 6936 if (GET_CODE (head) != CODE_LABEL) 6937 continue; 6938 if (GET_CODE (NEXT_INSN (head)) == NOTE 6939 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) 6940 head = NEXT_INSN (head); 6941 6942 for (r = PR_REG (0); r < PR_REG (64); r += 2) 6943 if (REGNO_REG_SET_P (bb->global_live_at_start, r)) 6944 { 6945 rtx p = gen_rtx_REG (BImode, r); 6946 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); 6947 if (head == bb->end) 6948 bb->end = n; 6949 head = n; 6950 } 6951 } 6952 6953 /* Look for conditional calls that do not return, and protect predicate 6954 relations around them. Otherwise the assembler will assume the call 6955 returns, and complain about uses of call-clobbered predicates after 6956 the call. */ 6957 FOR_EACH_BB_REVERSE (bb) 6958 { 6959 rtx insn = bb->head; 6960 6961 while (1) 6962 { 6963 if (GET_CODE (insn) == CALL_INSN 6964 && GET_CODE (PATTERN (insn)) == COND_EXEC 6965 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 6966 { 6967 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); 6968 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); 6969 if (bb->head == insn) 6970 bb->head = b; 6971 if (bb->end == insn) 6972 bb->end = a; 6973 } 6974 6975 if (insn == bb->end) 6976 break; 6977 insn = NEXT_INSN (insn); 6978 } 6979 } 6980} 6981 6982/* Generate a NOP instruction of type T. We will never generate L type 6983 nops. */ 6984 6985static rtx 6986gen_nop_type (t) 6987 enum attr_type t; 6988{ 6989 switch (t) 6990 { 6991 case TYPE_M: 6992 return gen_nop_m (); 6993 case TYPE_I: 6994 return gen_nop_i (); 6995 case TYPE_B: 6996 return gen_nop_b (); 6997 case TYPE_F: 6998 return gen_nop_f (); 6999 case TYPE_X: 7000 return gen_nop_x (); 7001 default: 7002 abort (); 7003 } 7004} 7005 7006/* After the last scheduling pass, fill in NOPs. It's easier to do this 7007 here than while scheduling. */ 7008 7009static void 7010ia64_emit_nops () 7011{ 7012 rtx insn; 7013 const struct bundle *b = 0; 7014 int bundle_pos = 0; 7015 7016 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7017 { 7018 rtx pat; 7019 enum attr_type t; 7020 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; 7021 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) 7022 continue; 7023 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR) 7024 || GET_CODE (insn) == CODE_LABEL) 7025 { 7026 if (b) 7027 while (bundle_pos < 3) 7028 { 7029 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7030 bundle_pos++; 7031 } 7032 if (GET_CODE (insn) != CODE_LABEL) 7033 b = bundle + INTVAL (XVECEXP (pat, 0, 0)); 7034 else 7035 b = 0; 7036 bundle_pos = 0; 7037 continue; 7038 } 7039 else if (GET_CODE (pat) == UNSPEC_VOLATILE 7040 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER) 7041 { 7042 int t = INTVAL (XVECEXP (pat, 0, 0)); 7043 if (b) 7044 while (bundle_pos < t) 7045 { 7046 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7047 bundle_pos++; 7048 } 7049 continue; 7050 } 7051 7052 if (bundle_pos == 3) 7053 b = 0; 7054 7055 if (b && INSN_P (insn)) 7056 { 7057 t = ia64_safe_type (insn); 7058 if (asm_noperands (PATTERN (insn)) >= 0 7059 || GET_CODE (PATTERN (insn)) == ASM_INPUT) 7060 { 7061 while (bundle_pos < 3) 7062 { 7063 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7064 bundle_pos++; 7065 } 7066 continue; 7067 } 7068 7069 if (t == TYPE_UNKNOWN) 7070 continue; 7071 while (bundle_pos < 3) 7072 { 7073 if (t == b->t[bundle_pos] 7074 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M 7075 || b->t[bundle_pos] == TYPE_I))) 7076 break; 7077 7078 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7079 bundle_pos++; 7080 } 7081 if (bundle_pos < 3) 7082 bundle_pos++; 7083 } 7084 } 7085} 7086 7087/* Perform machine dependent operations on the rtl chain INSNS. */ 7088 7089void 7090ia64_reorg (insns) 7091 rtx insns; 7092{ 7093 /* We are freeing block_for_insn in the toplev to keep compatibility 7094 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 7095 compute_bb_for_insn (); 7096 7097 /* If optimizing, we'll have split before scheduling. */ 7098 if (optimize == 0) 7099 split_all_insns (0); 7100 7101 /* ??? update_life_info_in_dirty_blocks fails to terminate during 7102 non-optimizing bootstrap. */ 7103 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES); 7104 7105 if (ia64_flag_schedule_insns2) 7106 { 7107 timevar_push (TV_SCHED2); 7108 ia64_final_schedule = 1; 7109 schedule_ebbs (rtl_dump_file); 7110 ia64_final_schedule = 0; 7111 timevar_pop (TV_SCHED2); 7112 7113 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same 7114 place as they were during scheduling. */ 7115 emit_insn_group_barriers (rtl_dump_file, insns); 7116 ia64_emit_nops (); 7117 } 7118 else 7119 emit_all_insn_group_barriers (rtl_dump_file, insns); 7120 7121 /* A call must not be the last instruction in a function, so that the 7122 return address is still within the function, so that unwinding works 7123 properly. Note that IA-64 differs from dwarf2 on this point. */ 7124 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7125 { 7126 rtx insn; 7127 int saw_stop = 0; 7128 7129 insn = get_last_insn (); 7130 if (! INSN_P (insn)) 7131 insn = prev_active_insn (insn); 7132 if (GET_CODE (insn) == INSN 7133 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 7134 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 7135 { 7136 saw_stop = 1; 7137 insn = prev_active_insn (insn); 7138 } 7139 if (GET_CODE (insn) == CALL_INSN) 7140 { 7141 if (! saw_stop) 7142 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7143 emit_insn (gen_break_f ()); 7144 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7145 } 7146 } 7147 7148 fixup_errata (); 7149 emit_predicate_relation_info (); 7150} 7151 7152/* Return true if REGNO is used by the epilogue. */ 7153 7154int 7155ia64_epilogue_uses (regno) 7156 int regno; 7157{ 7158 switch (regno) 7159 { 7160 case R_GR (1): 7161 /* With a call to a function in another module, we will write a new 7162 value to "gp". After returning from such a call, we need to make 7163 sure the function restores the original gp-value, even if the 7164 function itself does not use the gp anymore. */ 7165 return !(TARGET_AUTO_PIC || TARGET_NO_PIC); 7166 7167 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 7168 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 7169 /* For functions defined with the syscall_linkage attribute, all 7170 input registers are marked as live at all function exits. This 7171 prevents the register allocator from using the input registers, 7172 which in turn makes it possible to restart a system call after 7173 an interrupt without having to save/restore the input registers. 7174 This also prevents kernel data from leaking to application code. */ 7175 return lookup_attribute ("syscall_linkage", 7176 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 7177 7178 case R_BR (0): 7179 /* Conditional return patterns can't represent the use of `b0' as 7180 the return address, so we force the value live this way. */ 7181 return 1; 7182 7183 case AR_PFS_REGNUM: 7184 /* Likewise for ar.pfs, which is used by br.ret. */ 7185 return 1; 7186 7187 default: 7188 return 0; 7189 } 7190} 7191 7192/* Return true if REGNO is used by the frame unwinder. */ 7193 7194int 7195ia64_eh_uses (regno) 7196 int regno; 7197{ 7198 if (! reload_completed) 7199 return 0; 7200 7201 if (current_frame_info.reg_save_b0 7202 && regno == current_frame_info.reg_save_b0) 7203 return 1; 7204 if (current_frame_info.reg_save_pr 7205 && regno == current_frame_info.reg_save_pr) 7206 return 1; 7207 if (current_frame_info.reg_save_ar_pfs 7208 && regno == current_frame_info.reg_save_ar_pfs) 7209 return 1; 7210 if (current_frame_info.reg_save_ar_unat 7211 && regno == current_frame_info.reg_save_ar_unat) 7212 return 1; 7213 if (current_frame_info.reg_save_ar_lc 7214 && regno == current_frame_info.reg_save_ar_lc) 7215 return 1; 7216 7217 return 0; 7218} 7219 7220/* For ia64, SYMBOL_REF_FLAG set means that it is a function. 7221 7222 We add @ to the name if this goes in small data/bss. We can only put 7223 a variable in small data/bss if it is defined in this module or a module 7224 that we are statically linked with. We can't check the second condition, 7225 but TREE_STATIC gives us the first one. */ 7226 7227/* ??? If we had IPA, we could check the second condition. We could support 7228 programmer added section attributes if the variable is not defined in this 7229 module. */ 7230 7231/* ??? See the v850 port for a cleaner way to do this. */ 7232 7233/* ??? We could also support own long data here. Generating movl/add/ld8 7234 instead of addl,ld8/ld8. This makes the code bigger, but should make the 7235 code faster because there is one less load. This also includes incomplete 7236 types which can't go in sdata/sbss. */ 7237 7238static bool 7239ia64_in_small_data_p (exp) 7240 tree exp; 7241{ 7242 if (TARGET_NO_SDATA) 7243 return false; 7244 7245 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 7246 { 7247 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 7248 if (strcmp (section, ".sdata") == 0 7249 || strcmp (section, ".sbss") == 0) 7250 return true; 7251 } 7252 else 7253 { 7254 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 7255 7256 /* If this is an incomplete type with size 0, then we can't put it 7257 in sdata because it might be too big when completed. */ 7258 if (size > 0 && size <= ia64_section_threshold) 7259 return true; 7260 } 7261 7262 return false; 7263} 7264 7265static void 7266ia64_encode_section_info (decl, first) 7267 tree decl; 7268 int first ATTRIBUTE_UNUSED; 7269{ 7270 const char *symbol_str; 7271 bool is_local; 7272 rtx symbol; 7273 char encoding = 0; 7274 7275 if (TREE_CODE (decl) == FUNCTION_DECL) 7276 { 7277 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; 7278 return; 7279 } 7280 7281 /* Careful not to prod global register variables. */ 7282 if (TREE_CODE (decl) != VAR_DECL 7283 || GET_CODE (DECL_RTL (decl)) != MEM 7284 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) 7285 return; 7286 7287 symbol = XEXP (DECL_RTL (decl), 0); 7288 symbol_str = XSTR (symbol, 0); 7289 7290 is_local = (*targetm.binds_local_p) (decl); 7291 7292 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) 7293 encoding = " GLil"[decl_tls_model (decl)]; 7294 /* Determine if DECL will wind up in .sdata/.sbss. */ 7295 else if (is_local && ia64_in_small_data_p (decl)) 7296 encoding = 's'; 7297 7298 /* Finally, encode this into the symbol string. */ 7299 if (encoding) 7300 { 7301 char *newstr; 7302 size_t len; 7303 7304 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7305 { 7306 if (encoding == symbol_str[1]) 7307 return; 7308 /* ??? Sdata became thread or thread becaome not thread. Lose. */ 7309 abort (); 7310 } 7311 7312 len = strlen (symbol_str); 7313 newstr = alloca (len + 3); 7314 newstr[0] = ENCODE_SECTION_INFO_CHAR; 7315 newstr[1] = encoding; 7316 memcpy (newstr + 2, symbol_str, len + 1); 7317 7318 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2); 7319 } 7320 7321 /* This decl is marked as being in small data/bss but it shouldn't be; 7322 one likely explanation for this is that the decl has been moved into 7323 a different section from the one it was in when encode_section_info 7324 was first called. Remove the encoding. */ 7325 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7326 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2); 7327} 7328 7329static const char * 7330ia64_strip_name_encoding (str) 7331 const char *str; 7332{ 7333 if (str[0] == ENCODE_SECTION_INFO_CHAR) 7334 str += 2; 7335 if (str[0] == '*') 7336 str++; 7337 return str; 7338} 7339 7340/* True if it is OK to do sibling call optimization for the specified 7341 call expression EXP. DECL will be the called function, or NULL if 7342 this is an indirect call. */ 7343bool 7344ia64_function_ok_for_sibcall (decl) 7345 tree decl; 7346{ 7347 /* We must always return with our current GP. This means we can 7348 only sibcall to functions defined in the current module. */ 7349 return decl && (*targetm.binds_local_p) (decl); 7350} 7351 7352/* Output assembly directives for prologue regions. */ 7353 7354/* The current basic block number. */ 7355 7356static bool last_block; 7357 7358/* True if we need a copy_state command at the start of the next block. */ 7359 7360static bool need_copy_state; 7361 7362/* The function emits unwind directives for the start of an epilogue. */ 7363 7364static void 7365process_epilogue () 7366{ 7367 /* If this isn't the last block of the function, then we need to label the 7368 current state, and copy it back in at the start of the next block. */ 7369 7370 if (!last_block) 7371 { 7372 fprintf (asm_out_file, "\t.label_state 1\n"); 7373 need_copy_state = true; 7374 } 7375 7376 fprintf (asm_out_file, "\t.restore sp\n"); 7377} 7378 7379/* This function processes a SET pattern looking for specific patterns 7380 which result in emitting an assembly directive required for unwinding. */ 7381 7382static int 7383process_set (asm_out_file, pat) 7384 FILE *asm_out_file; 7385 rtx pat; 7386{ 7387 rtx src = SET_SRC (pat); 7388 rtx dest = SET_DEST (pat); 7389 int src_regno, dest_regno; 7390 7391 /* Look for the ALLOC insn. */ 7392 if (GET_CODE (src) == UNSPEC_VOLATILE 7393 && XINT (src, 1) == UNSPECV_ALLOC 7394 && GET_CODE (dest) == REG) 7395 { 7396 dest_regno = REGNO (dest); 7397 7398 /* If this isn't the final destination for ar.pfs, the alloc 7399 shouldn't have been marked frame related. */ 7400 if (dest_regno != current_frame_info.reg_save_ar_pfs) 7401 abort (); 7402 7403 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 7404 ia64_dbx_register_number (dest_regno)); 7405 return 1; 7406 } 7407 7408 /* Look for SP = .... */ 7409 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) 7410 { 7411 if (GET_CODE (src) == PLUS) 7412 { 7413 rtx op0 = XEXP (src, 0); 7414 rtx op1 = XEXP (src, 1); 7415 if (op0 == dest && GET_CODE (op1) == CONST_INT) 7416 { 7417 if (INTVAL (op1) < 0) 7418 { 7419 fputs ("\t.fframe ", asm_out_file); 7420 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 7421 -INTVAL (op1)); 7422 fputc ('\n', asm_out_file); 7423 } 7424 else 7425 process_epilogue (); 7426 } 7427 else 7428 abort (); 7429 } 7430 else if (GET_CODE (src) == REG 7431 && REGNO (src) == HARD_FRAME_POINTER_REGNUM) 7432 process_epilogue (); 7433 else 7434 abort (); 7435 7436 return 1; 7437 } 7438 7439 /* Register move we need to look at. */ 7440 if (GET_CODE (dest) == REG && GET_CODE (src) == REG) 7441 { 7442 src_regno = REGNO (src); 7443 dest_regno = REGNO (dest); 7444 7445 switch (src_regno) 7446 { 7447 case BR_REG (0): 7448 /* Saving return address pointer. */ 7449 if (dest_regno != current_frame_info.reg_save_b0) 7450 abort (); 7451 fprintf (asm_out_file, "\t.save rp, r%d\n", 7452 ia64_dbx_register_number (dest_regno)); 7453 return 1; 7454 7455 case PR_REG (0): 7456 if (dest_regno != current_frame_info.reg_save_pr) 7457 abort (); 7458 fprintf (asm_out_file, "\t.save pr, r%d\n", 7459 ia64_dbx_register_number (dest_regno)); 7460 return 1; 7461 7462 case AR_UNAT_REGNUM: 7463 if (dest_regno != current_frame_info.reg_save_ar_unat) 7464 abort (); 7465 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 7466 ia64_dbx_register_number (dest_regno)); 7467 return 1; 7468 7469 case AR_LC_REGNUM: 7470 if (dest_regno != current_frame_info.reg_save_ar_lc) 7471 abort (); 7472 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 7473 ia64_dbx_register_number (dest_regno)); 7474 return 1; 7475 7476 case STACK_POINTER_REGNUM: 7477 if (dest_regno != HARD_FRAME_POINTER_REGNUM 7478 || ! frame_pointer_needed) 7479 abort (); 7480 fprintf (asm_out_file, "\t.vframe r%d\n", 7481 ia64_dbx_register_number (dest_regno)); 7482 return 1; 7483 7484 default: 7485 /* Everything else should indicate being stored to memory. */ 7486 abort (); 7487 } 7488 } 7489 7490 /* Memory store we need to look at. */ 7491 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) 7492 { 7493 long off; 7494 rtx base; 7495 const char *saveop; 7496 7497 if (GET_CODE (XEXP (dest, 0)) == REG) 7498 { 7499 base = XEXP (dest, 0); 7500 off = 0; 7501 } 7502 else if (GET_CODE (XEXP (dest, 0)) == PLUS 7503 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT) 7504 { 7505 base = XEXP (XEXP (dest, 0), 0); 7506 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 7507 } 7508 else 7509 abort (); 7510 7511 if (base == hard_frame_pointer_rtx) 7512 { 7513 saveop = ".savepsp"; 7514 off = - off; 7515 } 7516 else if (base == stack_pointer_rtx) 7517 saveop = ".savesp"; 7518 else 7519 abort (); 7520 7521 src_regno = REGNO (src); 7522 switch (src_regno) 7523 { 7524 case BR_REG (0): 7525 if (current_frame_info.reg_save_b0 != 0) 7526 abort (); 7527 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); 7528 return 1; 7529 7530 case PR_REG (0): 7531 if (current_frame_info.reg_save_pr != 0) 7532 abort (); 7533 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); 7534 return 1; 7535 7536 case AR_LC_REGNUM: 7537 if (current_frame_info.reg_save_ar_lc != 0) 7538 abort (); 7539 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); 7540 return 1; 7541 7542 case AR_PFS_REGNUM: 7543 if (current_frame_info.reg_save_ar_pfs != 0) 7544 abort (); 7545 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); 7546 return 1; 7547 7548 case AR_UNAT_REGNUM: 7549 if (current_frame_info.reg_save_ar_unat != 0) 7550 abort (); 7551 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); 7552 return 1; 7553 7554 case GR_REG (4): 7555 case GR_REG (5): 7556 case GR_REG (6): 7557 case GR_REG (7): 7558 fprintf (asm_out_file, "\t.save.g 0x%x\n", 7559 1 << (src_regno - GR_REG (4))); 7560 return 1; 7561 7562 case BR_REG (1): 7563 case BR_REG (2): 7564 case BR_REG (3): 7565 case BR_REG (4): 7566 case BR_REG (5): 7567 fprintf (asm_out_file, "\t.save.b 0x%x\n", 7568 1 << (src_regno - BR_REG (1))); 7569 return 1; 7570 7571 case FR_REG (2): 7572 case FR_REG (3): 7573 case FR_REG (4): 7574 case FR_REG (5): 7575 fprintf (asm_out_file, "\t.save.f 0x%x\n", 7576 1 << (src_regno - FR_REG (2))); 7577 return 1; 7578 7579 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 7580 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 7581 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 7582 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 7583 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 7584 1 << (src_regno - FR_REG (12))); 7585 return 1; 7586 7587 default: 7588 return 0; 7589 } 7590 } 7591 7592 return 0; 7593} 7594 7595 7596/* This function looks at a single insn and emits any directives 7597 required to unwind this insn. */ 7598void 7599process_for_unwind_directive (asm_out_file, insn) 7600 FILE *asm_out_file; 7601 rtx insn; 7602{ 7603 if (flag_unwind_tables 7604 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7605 { 7606 rtx pat; 7607 7608 if (GET_CODE (insn) == NOTE 7609 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 7610 { 7611 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR; 7612 7613 /* Restore unwind state from immediately before the epilogue. */ 7614 if (need_copy_state) 7615 { 7616 fprintf (asm_out_file, "\t.body\n"); 7617 fprintf (asm_out_file, "\t.copy_state 1\n"); 7618 need_copy_state = false; 7619 } 7620 } 7621 7622 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn)) 7623 return; 7624 7625 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); 7626 if (pat) 7627 pat = XEXP (pat, 0); 7628 else 7629 pat = PATTERN (insn); 7630 7631 switch (GET_CODE (pat)) 7632 { 7633 case SET: 7634 process_set (asm_out_file, pat); 7635 break; 7636 7637 case PARALLEL: 7638 { 7639 int par_index; 7640 int limit = XVECLEN (pat, 0); 7641 for (par_index = 0; par_index < limit; par_index++) 7642 { 7643 rtx x = XVECEXP (pat, 0, par_index); 7644 if (GET_CODE (x) == SET) 7645 process_set (asm_out_file, x); 7646 } 7647 break; 7648 } 7649 7650 default: 7651 abort (); 7652 } 7653 } 7654} 7655 7656 7657void 7658ia64_init_builtins () 7659{ 7660 tree psi_type_node = build_pointer_type (integer_type_node); 7661 tree pdi_type_node = build_pointer_type (long_integer_type_node); 7662 7663 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ 7664 tree si_ftype_psi_si_si 7665 = build_function_type_list (integer_type_node, 7666 psi_type_node, integer_type_node, 7667 integer_type_node, NULL_TREE); 7668 7669 /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ 7670 tree di_ftype_pdi_di_di 7671 = build_function_type_list (long_integer_type_node, 7672 pdi_type_node, long_integer_type_node, 7673 long_integer_type_node, NULL_TREE); 7674 /* __sync_synchronize */ 7675 tree void_ftype_void 7676 = build_function_type (void_type_node, void_list_node); 7677 7678 /* __sync_lock_test_and_set_si */ 7679 tree si_ftype_psi_si 7680 = build_function_type_list (integer_type_node, 7681 psi_type_node, integer_type_node, NULL_TREE); 7682 7683 /* __sync_lock_test_and_set_di */ 7684 tree di_ftype_pdi_di 7685 = build_function_type_list (long_integer_type_node, 7686 pdi_type_node, long_integer_type_node, 7687 NULL_TREE); 7688 7689 /* __sync_lock_release_si */ 7690 tree void_ftype_psi 7691 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE); 7692 7693 /* __sync_lock_release_di */ 7694 tree void_ftype_pdi 7695 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE); 7696 7697#define def_builtin(name, type, code) \ 7698 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE) 7699 7700 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, 7701 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); 7702 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, 7703 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); 7704 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, 7705 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); 7706 def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, 7707 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); 7708 7709 def_builtin ("__sync_synchronize", void_ftype_void, 7710 IA64_BUILTIN_SYNCHRONIZE); 7711 7712 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, 7713 IA64_BUILTIN_LOCK_TEST_AND_SET_SI); 7714 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, 7715 IA64_BUILTIN_LOCK_TEST_AND_SET_DI); 7716 def_builtin ("__sync_lock_release_si", void_ftype_psi, 7717 IA64_BUILTIN_LOCK_RELEASE_SI); 7718 def_builtin ("__sync_lock_release_di", void_ftype_pdi, 7719 IA64_BUILTIN_LOCK_RELEASE_DI); 7720 7721 def_builtin ("__builtin_ia64_bsp", 7722 build_function_type (ptr_type_node, void_list_node), 7723 IA64_BUILTIN_BSP); 7724 7725 def_builtin ("__builtin_ia64_flushrs", 7726 build_function_type (void_type_node, void_list_node), 7727 IA64_BUILTIN_FLUSHRS); 7728 7729 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si, 7730 IA64_BUILTIN_FETCH_AND_ADD_SI); 7731 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si, 7732 IA64_BUILTIN_FETCH_AND_SUB_SI); 7733 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si, 7734 IA64_BUILTIN_FETCH_AND_OR_SI); 7735 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si, 7736 IA64_BUILTIN_FETCH_AND_AND_SI); 7737 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si, 7738 IA64_BUILTIN_FETCH_AND_XOR_SI); 7739 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si, 7740 IA64_BUILTIN_FETCH_AND_NAND_SI); 7741 7742 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si, 7743 IA64_BUILTIN_ADD_AND_FETCH_SI); 7744 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si, 7745 IA64_BUILTIN_SUB_AND_FETCH_SI); 7746 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si, 7747 IA64_BUILTIN_OR_AND_FETCH_SI); 7748 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si, 7749 IA64_BUILTIN_AND_AND_FETCH_SI); 7750 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si, 7751 IA64_BUILTIN_XOR_AND_FETCH_SI); 7752 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si, 7753 IA64_BUILTIN_NAND_AND_FETCH_SI); 7754 7755 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di, 7756 IA64_BUILTIN_FETCH_AND_ADD_DI); 7757 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di, 7758 IA64_BUILTIN_FETCH_AND_SUB_DI); 7759 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di, 7760 IA64_BUILTIN_FETCH_AND_OR_DI); 7761 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di, 7762 IA64_BUILTIN_FETCH_AND_AND_DI); 7763 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di, 7764 IA64_BUILTIN_FETCH_AND_XOR_DI); 7765 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di, 7766 IA64_BUILTIN_FETCH_AND_NAND_DI); 7767 7768 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di, 7769 IA64_BUILTIN_ADD_AND_FETCH_DI); 7770 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di, 7771 IA64_BUILTIN_SUB_AND_FETCH_DI); 7772 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di, 7773 IA64_BUILTIN_OR_AND_FETCH_DI); 7774 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di, 7775 IA64_BUILTIN_AND_AND_FETCH_DI); 7776 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di, 7777 IA64_BUILTIN_XOR_AND_FETCH_DI); 7778 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di, 7779 IA64_BUILTIN_NAND_AND_FETCH_DI); 7780 7781#undef def_builtin 7782} 7783 7784/* Expand fetch_and_op intrinsics. The basic code sequence is: 7785 7786 mf 7787 tmp = [ptr]; 7788 do { 7789 ret = tmp; 7790 ar.ccv = tmp; 7791 tmp <op>= value; 7792 cmpxchgsz.acq tmp = [ptr], tmp 7793 } while (tmp != ret) 7794*/ 7795 7796static rtx 7797ia64_expand_fetch_and_op (binoptab, mode, arglist, target) 7798 optab binoptab; 7799 enum machine_mode mode; 7800 tree arglist; 7801 rtx target; 7802{ 7803 rtx ret, label, tmp, ccv, insn, mem, value; 7804 tree arg0, arg1; 7805 7806 arg0 = TREE_VALUE (arglist); 7807 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7808 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7809#ifdef POINTERS_EXTEND_UNSIGNED 7810 if (GET_MODE(mem) != Pmode) 7811 mem = convert_memory_address (Pmode, mem); 7812#endif 7813 value = expand_expr (arg1, NULL_RTX, mode, 0); 7814 7815 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7816 MEM_VOLATILE_P (mem) = 1; 7817 7818 if (target && register_operand (target, mode)) 7819 ret = target; 7820 else 7821 ret = gen_reg_rtx (mode); 7822 7823 emit_insn (gen_mf ()); 7824 7825 /* Special case for fetchadd instructions. */ 7826 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode)) 7827 { 7828 if (mode == SImode) 7829 insn = gen_fetchadd_acq_si (ret, mem, value); 7830 else 7831 insn = gen_fetchadd_acq_di (ret, mem, value); 7832 emit_insn (insn); 7833 return ret; 7834 } 7835 7836 tmp = gen_reg_rtx (mode); 7837 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7838 emit_move_insn (tmp, mem); 7839 7840 label = gen_label_rtx (); 7841 emit_label (label); 7842 emit_move_insn (ret, tmp); 7843 emit_move_insn (ccv, tmp); 7844 7845 /* Perform the specific operation. Special case NAND by noticing 7846 one_cmpl_optab instead. */ 7847 if (binoptab == one_cmpl_optab) 7848 { 7849 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7850 binoptab = and_optab; 7851 } 7852 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN); 7853 7854 if (mode == SImode) 7855 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv); 7856 else 7857 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv); 7858 emit_insn (insn); 7859 7860 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label); 7861 7862 return ret; 7863} 7864 7865/* Expand op_and_fetch intrinsics. The basic code sequence is: 7866 7867 mf 7868 tmp = [ptr]; 7869 do { 7870 old = tmp; 7871 ar.ccv = tmp; 7872 ret = tmp <op> value; 7873 cmpxchgsz.acq tmp = [ptr], ret 7874 } while (tmp != old) 7875*/ 7876 7877static rtx 7878ia64_expand_op_and_fetch (binoptab, mode, arglist, target) 7879 optab binoptab; 7880 enum machine_mode mode; 7881 tree arglist; 7882 rtx target; 7883{ 7884 rtx old, label, tmp, ret, ccv, insn, mem, value; 7885 tree arg0, arg1; 7886 7887 arg0 = TREE_VALUE (arglist); 7888 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7889 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7890#ifdef POINTERS_EXTEND_UNSIGNED 7891 if (GET_MODE(mem) != Pmode) 7892 mem = convert_memory_address (Pmode, mem); 7893#endif 7894 7895 value = expand_expr (arg1, NULL_RTX, mode, 0); 7896 7897 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7898 MEM_VOLATILE_P (mem) = 1; 7899 7900 if (target && ! register_operand (target, mode)) 7901 target = NULL_RTX; 7902 7903 emit_insn (gen_mf ()); 7904 tmp = gen_reg_rtx (mode); 7905 old = gen_reg_rtx (mode); 7906 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7907 7908 emit_move_insn (tmp, mem); 7909 7910 label = gen_label_rtx (); 7911 emit_label (label); 7912 emit_move_insn (old, tmp); 7913 emit_move_insn (ccv, tmp); 7914 7915 /* Perform the specific operation. Special case NAND by noticing 7916 one_cmpl_optab instead. */ 7917 if (binoptab == one_cmpl_optab) 7918 { 7919 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7920 binoptab = and_optab; 7921 } 7922 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN); 7923 7924 if (mode == SImode) 7925 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv); 7926 else 7927 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv); 7928 emit_insn (insn); 7929 7930 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label); 7931 7932 return ret; 7933} 7934 7935/* Expand val_ and bool_compare_and_swap. For val_ we want: 7936 7937 ar.ccv = oldval 7938 mf 7939 cmpxchgsz.acq ret = [ptr], newval, ar.ccv 7940 return ret 7941 7942 For bool_ it's the same except return ret == oldval. 7943*/ 7944 7945static rtx 7946ia64_expand_compare_and_swap (mode, boolp, arglist, target) 7947 enum machine_mode mode; 7948 int boolp; 7949 tree arglist; 7950 rtx target; 7951{ 7952 tree arg0, arg1, arg2; 7953 rtx mem, old, new, ccv, tmp, insn; 7954 7955 arg0 = TREE_VALUE (arglist); 7956 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7957 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 7958 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 7959 old = expand_expr (arg1, NULL_RTX, mode, 0); 7960 new = expand_expr (arg2, NULL_RTX, mode, 0); 7961 7962 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 7963 MEM_VOLATILE_P (mem) = 1; 7964 7965 if (! register_operand (old, mode)) 7966 old = copy_to_mode_reg (mode, old); 7967 if (! register_operand (new, mode)) 7968 new = copy_to_mode_reg (mode, new); 7969 7970 if (! boolp && target && register_operand (target, mode)) 7971 tmp = target; 7972 else 7973 tmp = gen_reg_rtx (mode); 7974 7975 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 7976 if (mode == DImode) 7977 emit_move_insn (ccv, old); 7978 else 7979 { 7980 rtx ccvtmp = gen_reg_rtx (DImode); 7981 emit_insn (gen_zero_extendsidi2 (ccvtmp, old)); 7982 emit_move_insn (ccv, ccvtmp); 7983 } 7984 emit_insn (gen_mf ()); 7985 if (mode == SImode) 7986 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv); 7987 else 7988 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv); 7989 emit_insn (insn); 7990 7991 if (boolp) 7992 { 7993 if (! target) 7994 target = gen_reg_rtx (mode); 7995 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); 7996 } 7997 else 7998 return tmp; 7999} 8000 8001/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */ 8002 8003static rtx 8004ia64_expand_lock_test_and_set (mode, arglist, target) 8005 enum machine_mode mode; 8006 tree arglist; 8007 rtx target; 8008{ 8009 tree arg0, arg1; 8010 rtx mem, new, ret, insn; 8011 8012 arg0 = TREE_VALUE (arglist); 8013 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 8014 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8015 new = expand_expr (arg1, NULL_RTX, mode, 0); 8016 8017 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8018 MEM_VOLATILE_P (mem) = 1; 8019 if (! register_operand (new, mode)) 8020 new = copy_to_mode_reg (mode, new); 8021 8022 if (target && register_operand (target, mode)) 8023 ret = target; 8024 else 8025 ret = gen_reg_rtx (mode); 8026 8027 if (mode == SImode) 8028 insn = gen_xchgsi (ret, mem, new); 8029 else 8030 insn = gen_xchgdi (ret, mem, new); 8031 emit_insn (insn); 8032 8033 return ret; 8034} 8035 8036/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */ 8037 8038static rtx 8039ia64_expand_lock_release (mode, arglist, target) 8040 enum machine_mode mode; 8041 tree arglist; 8042 rtx target ATTRIBUTE_UNUSED; 8043{ 8044 tree arg0; 8045 rtx mem; 8046 8047 arg0 = TREE_VALUE (arglist); 8048 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8049 8050 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8051 MEM_VOLATILE_P (mem) = 1; 8052 8053 emit_move_insn (mem, const0_rtx); 8054 8055 return const0_rtx; 8056} 8057 8058rtx 8059ia64_expand_builtin (exp, target, subtarget, mode, ignore) 8060 tree exp; 8061 rtx target; 8062 rtx subtarget ATTRIBUTE_UNUSED; 8063 enum machine_mode mode ATTRIBUTE_UNUSED; 8064 int ignore ATTRIBUTE_UNUSED; 8065{ 8066 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 8067 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 8068 tree arglist = TREE_OPERAND (exp, 1); 8069 8070 switch (fcode) 8071 { 8072 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8073 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8074 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8075 case IA64_BUILTIN_LOCK_RELEASE_SI: 8076 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8077 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8078 case IA64_BUILTIN_FETCH_AND_OR_SI: 8079 case IA64_BUILTIN_FETCH_AND_AND_SI: 8080 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8081 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8082 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8083 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8084 case IA64_BUILTIN_OR_AND_FETCH_SI: 8085 case IA64_BUILTIN_AND_AND_FETCH_SI: 8086 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8087 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8088 mode = SImode; 8089 break; 8090 8091 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8092 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8093 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8094 case IA64_BUILTIN_LOCK_RELEASE_DI: 8095 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8096 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8097 case IA64_BUILTIN_FETCH_AND_OR_DI: 8098 case IA64_BUILTIN_FETCH_AND_AND_DI: 8099 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8100 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8101 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8102 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8103 case IA64_BUILTIN_OR_AND_FETCH_DI: 8104 case IA64_BUILTIN_AND_AND_FETCH_DI: 8105 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8106 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8107 mode = DImode; 8108 break; 8109 8110 default: 8111 break; 8112 } 8113 8114 switch (fcode) 8115 { 8116 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8117 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8118 return ia64_expand_compare_and_swap (mode, 1, arglist, target); 8119 8120 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8121 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8122 return ia64_expand_compare_and_swap (mode, 0, arglist, target); 8123 8124 case IA64_BUILTIN_SYNCHRONIZE: 8125 emit_insn (gen_mf ()); 8126 return const0_rtx; 8127 8128 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8129 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8130 return ia64_expand_lock_test_and_set (mode, arglist, target); 8131 8132 case IA64_BUILTIN_LOCK_RELEASE_SI: 8133 case IA64_BUILTIN_LOCK_RELEASE_DI: 8134 return ia64_expand_lock_release (mode, arglist, target); 8135 8136 case IA64_BUILTIN_BSP: 8137 if (! target || ! register_operand (target, DImode)) 8138 target = gen_reg_rtx (DImode); 8139 emit_insn (gen_bsp_value (target)); 8140 return target; 8141 8142 case IA64_BUILTIN_FLUSHRS: 8143 emit_insn (gen_flushrs ()); 8144 return const0_rtx; 8145 8146 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8147 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8148 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target); 8149 8150 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8151 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8152 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target); 8153 8154 case IA64_BUILTIN_FETCH_AND_OR_SI: 8155 case IA64_BUILTIN_FETCH_AND_OR_DI: 8156 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target); 8157 8158 case IA64_BUILTIN_FETCH_AND_AND_SI: 8159 case IA64_BUILTIN_FETCH_AND_AND_DI: 8160 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target); 8161 8162 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8163 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8164 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target); 8165 8166 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8167 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8168 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target); 8169 8170 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8171 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8172 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target); 8173 8174 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8175 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8176 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target); 8177 8178 case IA64_BUILTIN_OR_AND_FETCH_SI: 8179 case IA64_BUILTIN_OR_AND_FETCH_DI: 8180 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target); 8181 8182 case IA64_BUILTIN_AND_AND_FETCH_SI: 8183 case IA64_BUILTIN_AND_AND_FETCH_DI: 8184 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target); 8185 8186 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8187 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8188 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target); 8189 8190 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8191 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8192 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target); 8193 8194 default: 8195 break; 8196 } 8197 8198 return NULL_RTX; 8199} 8200 8201/* For the HP-UX IA64 aggregate parameters are passed stored in the 8202 most significant bits of the stack slot. */ 8203 8204enum direction 8205ia64_hpux_function_arg_padding (mode, type) 8206 enum machine_mode mode; 8207 tree type; 8208{ 8209 /* Exception to normal case for structures/unions/etc. */ 8210 8211 if (type && AGGREGATE_TYPE_P (type) 8212 && int_size_in_bytes (type) < UNITS_PER_WORD) 8213 return upward; 8214 8215 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN 8216 hardwired to be true. */ 8217 8218 return((mode == BLKmode 8219 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 8220 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT)) 8221 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 8222 ? downward : upward); 8223} 8224 8225/* Linked list of all external functions that are to be emitted by GCC. 8226 We output the name if and only if TREE_SYMBOL_REFERENCED is set in 8227 order to avoid putting out names that are never really used. */ 8228 8229struct extern_func_list 8230{ 8231 struct extern_func_list *next; /* next external */ 8232 char *name; /* name of the external */ 8233} *extern_func_head = 0; 8234 8235static void 8236ia64_hpux_add_extern_decl (name) 8237 const char *name; 8238{ 8239 struct extern_func_list *p; 8240 8241 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list)); 8242 p->name = xmalloc (strlen (name) + 1); 8243 strcpy(p->name, name); 8244 p->next = extern_func_head; 8245 extern_func_head = p; 8246} 8247 8248/* Print out the list of used global functions. */ 8249 8250void 8251ia64_hpux_asm_file_end (file) 8252 FILE *file; 8253{ 8254 while (extern_func_head) 8255 { 8256 const char *real_name; 8257 tree decl; 8258 8259 real_name = (* targetm.strip_name_encoding) (extern_func_head->name); 8260 decl = maybe_get_identifier (real_name); 8261 8262 if (!decl 8263 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl))) 8264 { 8265 if (decl) 8266 TREE_ASM_WRITTEN (decl) = 1; 8267 (*targetm.asm_out.globalize_label) (file, extern_func_head->name); 8268 fprintf (file, "%s", TYPE_ASM_OP); 8269 assemble_name (file, extern_func_head->name); 8270 putc (',', file); 8271 fprintf (file, TYPE_OPERAND_FMT, "function"); 8272 putc ('\n', file); 8273 } 8274 extern_func_head = extern_func_head->next; 8275 } 8276} 8277 8278 8279/* Switch to the section to which we should output X. The only thing 8280 special we do here is to honor small data. */ 8281 8282static void 8283ia64_select_rtx_section (mode, x, align) 8284 enum machine_mode mode; 8285 rtx x; 8286 unsigned HOST_WIDE_INT align; 8287{ 8288 if (GET_MODE_SIZE (mode) > 0 8289 && GET_MODE_SIZE (mode) <= ia64_section_threshold) 8290 sdata_section (); 8291 else 8292 default_elf_select_rtx_section (mode, x, align); 8293} 8294 8295/* It is illegal to have relocations in shared segments on AIX and HPUX. 8296 Pretend flag_pic is always set. */ 8297 8298static void 8299ia64_rwreloc_select_section (exp, reloc, align) 8300 tree exp; 8301 int reloc; 8302 unsigned HOST_WIDE_INT align; 8303{ 8304 default_elf_select_section_1 (exp, reloc, align, true); 8305} 8306 8307static void 8308ia64_rwreloc_unique_section (decl, reloc) 8309 tree decl; 8310 int reloc; 8311{ 8312 default_unique_section_1 (decl, reloc, true); 8313} 8314 8315static void 8316ia64_rwreloc_select_rtx_section (mode, x, align) 8317 enum machine_mode mode; 8318 rtx x; 8319 unsigned HOST_WIDE_INT align; 8320{ 8321 int save_pic = flag_pic; 8322 flag_pic = 1; 8323 ia64_select_rtx_section (mode, x, align); 8324 flag_pic = save_pic; 8325} 8326 8327static unsigned int 8328ia64_rwreloc_section_type_flags (decl, name, reloc) 8329 tree decl; 8330 const char *name; 8331 int reloc; 8332{ 8333 return default_section_type_flags_1 (decl, name, reloc, true); 8334} 8335 8336 8337/* Output the assembler code for a thunk function. THUNK_DECL is the 8338 declaration for the thunk function itself, FUNCTION is the decl for 8339 the target function. DELTA is an immediate constant offset to be 8340 added to THIS. If VCALL_OFFSET is non-zero, the word at 8341 *(*this + vcall_offset) should be added to THIS. */ 8342 8343static void 8344ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) 8345 FILE *file; 8346 tree thunk ATTRIBUTE_UNUSED; 8347 HOST_WIDE_INT delta; 8348 HOST_WIDE_INT vcall_offset; 8349 tree function; 8350{ 8351 rtx this, insn, funexp; 8352 8353 reload_completed = 1; 8354 no_new_pseudos = 1; 8355 8356 /* Set things up as ia64_expand_prologue might. */ 8357 last_scratch_gr_reg = 15; 8358 8359 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 8360 current_frame_info.spill_cfa_off = -16; 8361 current_frame_info.n_input_regs = 1; 8362 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 8363 8364 if (!TARGET_REG_NAMES) 8365 reg_names[IN_REG (0)] = ia64_reg_numbers[0]; 8366 8367 /* Mark the end of the (empty) prologue. */ 8368 emit_note (NULL, NOTE_INSN_PROLOGUE_END); 8369 8370 this = gen_rtx_REG (Pmode, IN_REG (0)); 8371 8372 /* Apply the constant offset, if required. */ 8373 if (delta) 8374 { 8375 rtx delta_rtx = GEN_INT (delta); 8376 8377 if (!CONST_OK_FOR_I (delta)) 8378 { 8379 rtx tmp = gen_rtx_REG (Pmode, 2); 8380 emit_move_insn (tmp, delta_rtx); 8381 delta_rtx = tmp; 8382 } 8383 emit_insn (gen_adddi3 (this, this, delta_rtx)); 8384 } 8385 8386 /* Apply the offset from the vtable, if required. */ 8387 if (vcall_offset) 8388 { 8389 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 8390 rtx tmp = gen_rtx_REG (Pmode, 2); 8391 8392 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); 8393 8394 if (!CONST_OK_FOR_J (vcall_offset)) 8395 { 8396 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 8397 emit_move_insn (tmp2, vcall_offset_rtx); 8398 vcall_offset_rtx = tmp2; 8399 } 8400 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 8401 8402 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 8403 8404 emit_insn (gen_adddi3 (this, this, tmp)); 8405 } 8406 8407 /* Generate a tail call to the target function. */ 8408 if (! TREE_USED (function)) 8409 { 8410 assemble_external (function); 8411 TREE_USED (function) = 1; 8412 } 8413 funexp = XEXP (DECL_RTL (function), 0); 8414 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8415 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 8416 insn = get_last_insn (); 8417 SIBLING_CALL_P (insn) = 1; 8418 8419 /* Code generation for calls relies on splitting. */ 8420 reload_completed = 1; 8421 try_split (PATTERN (insn), insn, 0); 8422 8423 emit_barrier (); 8424 8425 /* Run just enough of rest_of_compilation to get the insns emitted. 8426 There's not really enough bulk here to make other passes such as 8427 instruction scheduling worth while. Note that use_thunk calls 8428 assemble_start_function and assemble_end_function. */ 8429 8430 insn = get_insns (); 8431 emit_all_insn_group_barriers (NULL, insn); 8432 shorten_branches (insn); 8433 final_start_function (insn, file, 1); 8434 final (insn, file, 1, 0); 8435 final_end_function (); 8436 8437 reload_completed = 0; 8438 no_new_pseudos = 0; 8439} 8440 8441#include "gt-ia64.h" 8442