ia64.c revision 122180
1/* Definitions of target machine for GNU compiler. 2 Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 3 Contributed by James E. Wilson <wilson@cygnus.com> and 4 David Mosberger <davidm@hpl.hp.com>. 5 6This file is part of GNU CC. 7 8GNU CC is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 2, or (at your option) 11any later version. 12 13GNU CC is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with GNU CC; see the file COPYING. If not, write to 20the Free Software Foundation, 59 Temple Place - Suite 330, 21Boston, MA 02111-1307, USA. */ 22 23#include "config.h" 24#include "system.h" 25#include "rtl.h" 26#include "tree.h" 27#include "regs.h" 28#include "hard-reg-set.h" 29#include "real.h" 30#include "insn-config.h" 31#include "conditions.h" 32#include "output.h" 33#include "insn-attr.h" 34#include "flags.h" 35#include "recog.h" 36#include "expr.h" 37#include "optabs.h" 38#include "except.h" 39#include "function.h" 40#include "ggc.h" 41#include "basic-block.h" 42#include "toplev.h" 43#include "sched-int.h" 44#include "timevar.h" 45#include "target.h" 46#include "target-def.h" 47#include "tm_p.h" 48#include "langhooks.h" 49 50/* This is used for communication between ASM_OUTPUT_LABEL and 51 ASM_OUTPUT_LABELREF. */ 52int ia64_asm_output_label = 0; 53 54/* Define the information needed to generate branch and scc insns. This is 55 stored from the compare operation. */ 56struct rtx_def * ia64_compare_op0; 57struct rtx_def * ia64_compare_op1; 58 59/* Register names for ia64_expand_prologue. */ 60static const char * const ia64_reg_numbers[96] = 61{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", 62 "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", 63 "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", 64 "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", 65 "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", 66 "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", 67 "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", 68 "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", 69 "r96", "r97", "r98", "r99", "r100","r101","r102","r103", 70 "r104","r105","r106","r107","r108","r109","r110","r111", 71 "r112","r113","r114","r115","r116","r117","r118","r119", 72 "r120","r121","r122","r123","r124","r125","r126","r127"}; 73 74/* ??? These strings could be shared with REGISTER_NAMES. */ 75static const char * const ia64_input_reg_names[8] = 76{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; 77 78/* ??? These strings could be shared with REGISTER_NAMES. */ 79static const char * const ia64_local_reg_names[80] = 80{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", 81 "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", 82 "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", 83 "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", 84 "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", 85 "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", 86 "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", 87 "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", 88 "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", 89 "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; 90 91/* ??? These strings could be shared with REGISTER_NAMES. */ 92static const char * const ia64_output_reg_names[8] = 93{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; 94 95/* String used with the -mfixed-range= option. */ 96const char *ia64_fixed_range_string; 97 98/* Determines whether we use adds, addl, or movl to generate our 99 TLS immediate offsets. */ 100int ia64_tls_size = 22; 101 102/* String used with the -mtls-size= option. */ 103const char *ia64_tls_size_string; 104 105/* Determines whether we run our final scheduling pass or not. We always 106 avoid the normal second scheduling pass. */ 107static int ia64_flag_schedule_insns2; 108 109/* Variables which are this size or smaller are put in the sdata/sbss 110 sections. */ 111 112unsigned int ia64_section_threshold; 113 114/* Structure to be filled in by ia64_compute_frame_size with register 115 save masks and offsets for the current function. */ 116 117struct ia64_frame_info 118{ 119 HOST_WIDE_INT total_size; /* size of the stack frame, not including 120 the caller's scratch area. */ 121 HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */ 122 HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */ 123 HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */ 124 HARD_REG_SET mask; /* mask of saved registers. */ 125 unsigned int gr_used_mask; /* mask of registers in use as gr spill 126 registers or long-term scratches. */ 127 int n_spilled; /* number of spilled registers. */ 128 int reg_fp; /* register for fp. */ 129 int reg_save_b0; /* save register for b0. */ 130 int reg_save_pr; /* save register for prs. */ 131 int reg_save_ar_pfs; /* save register for ar.pfs. */ 132 int reg_save_ar_unat; /* save register for ar.unat. */ 133 int reg_save_ar_lc; /* save register for ar.lc. */ 134 int reg_save_gp; /* save register for gp. */ 135 int n_input_regs; /* number of input registers used. */ 136 int n_local_regs; /* number of local registers used. */ 137 int n_output_regs; /* number of output registers used. */ 138 int n_rotate_regs; /* number of rotating registers used. */ 139 140 char need_regstk; /* true if a .regstk directive needed. */ 141 char initialized; /* true if the data is finalized. */ 142}; 143 144/* Current frame information calculated by ia64_compute_frame_size. */ 145static struct ia64_frame_info current_frame_info; 146 147static rtx gen_tls_get_addr PARAMS ((void)); 148static rtx gen_thread_pointer PARAMS ((void)); 149static int find_gr_spill PARAMS ((int)); 150static int next_scratch_gr_reg PARAMS ((void)); 151static void mark_reg_gr_used_mask PARAMS ((rtx, void *)); 152static void ia64_compute_frame_size PARAMS ((HOST_WIDE_INT)); 153static void setup_spill_pointers PARAMS ((int, rtx, HOST_WIDE_INT)); 154static void finish_spill_pointers PARAMS ((void)); 155static rtx spill_restore_mem PARAMS ((rtx, HOST_WIDE_INT)); 156static void do_spill PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx)); 157static void do_restore PARAMS ((rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT)); 158static rtx gen_movdi_x PARAMS ((rtx, rtx, rtx)); 159static rtx gen_fr_spill_x PARAMS ((rtx, rtx, rtx)); 160static rtx gen_fr_restore_x PARAMS ((rtx, rtx, rtx)); 161 162static enum machine_mode hfa_element_mode PARAMS ((tree, int)); 163static void fix_range PARAMS ((const char *)); 164static struct machine_function * ia64_init_machine_status PARAMS ((void)); 165static void emit_insn_group_barriers PARAMS ((FILE *, rtx)); 166static void emit_all_insn_group_barriers PARAMS ((FILE *, rtx)); 167static void emit_predicate_relation_info PARAMS ((void)); 168static bool ia64_in_small_data_p PARAMS ((tree)); 169static void ia64_encode_section_info PARAMS ((tree, int)); 170static const char *ia64_strip_name_encoding PARAMS ((const char *)); 171static void process_epilogue PARAMS ((void)); 172static int process_set PARAMS ((FILE *, rtx)); 173 174static rtx ia64_expand_fetch_and_op PARAMS ((optab, enum machine_mode, 175 tree, rtx)); 176static rtx ia64_expand_op_and_fetch PARAMS ((optab, enum machine_mode, 177 tree, rtx)); 178static rtx ia64_expand_compare_and_swap PARAMS ((enum machine_mode, 179 enum machine_mode, 180 int, tree, rtx)); 181static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode, 182 tree, rtx)); 183static rtx ia64_expand_lock_release PARAMS ((enum machine_mode, tree, rtx)); 184static bool ia64_assemble_integer PARAMS ((rtx, unsigned int, int)); 185static void ia64_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT)); 186static void ia64_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT)); 187static void ia64_output_function_end_prologue PARAMS ((FILE *)); 188 189static int ia64_issue_rate PARAMS ((void)); 190static int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int)); 191static void ia64_sched_init PARAMS ((FILE *, int, int)); 192static void ia64_sched_finish PARAMS ((FILE *, int)); 193static int ia64_internal_sched_reorder PARAMS ((FILE *, int, rtx *, 194 int *, int, int)); 195static int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); 196static int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int)); 197static int ia64_variable_issue PARAMS ((FILE *, int, rtx, int)); 198 199static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, 200 HOST_WIDE_INT, tree)); 201 202static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx, 203 unsigned HOST_WIDE_INT)); 204static void ia64_rwreloc_select_section PARAMS ((tree, int, 205 unsigned HOST_WIDE_INT)) 206 ATTRIBUTE_UNUSED; 207static void ia64_rwreloc_unique_section PARAMS ((tree, int)) 208 ATTRIBUTE_UNUSED; 209static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx, 210 unsigned HOST_WIDE_INT)) 211 ATTRIBUTE_UNUSED; 212static unsigned int ia64_rwreloc_section_type_flags 213 PARAMS ((tree, const char *, int)) 214 ATTRIBUTE_UNUSED; 215 216static void ia64_hpux_add_extern_decl PARAMS ((const char *name)) 217 ATTRIBUTE_UNUSED; 218 219/* Table of valid machine attributes. */ 220static const struct attribute_spec ia64_attribute_table[] = 221{ 222 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 223 { "syscall_linkage", 0, 0, false, true, true, NULL }, 224 { NULL, 0, 0, false, false, false, NULL } 225}; 226 227/* Initialize the GCC target structure. */ 228#undef TARGET_ATTRIBUTE_TABLE 229#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table 230 231#undef TARGET_INIT_BUILTINS 232#define TARGET_INIT_BUILTINS ia64_init_builtins 233 234#undef TARGET_EXPAND_BUILTIN 235#define TARGET_EXPAND_BUILTIN ia64_expand_builtin 236 237#undef TARGET_ASM_BYTE_OP 238#define TARGET_ASM_BYTE_OP "\tdata1\t" 239#undef TARGET_ASM_ALIGNED_HI_OP 240#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t" 241#undef TARGET_ASM_ALIGNED_SI_OP 242#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t" 243#undef TARGET_ASM_ALIGNED_DI_OP 244#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t" 245#undef TARGET_ASM_UNALIGNED_HI_OP 246#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t" 247#undef TARGET_ASM_UNALIGNED_SI_OP 248#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t" 249#undef TARGET_ASM_UNALIGNED_DI_OP 250#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t" 251#undef TARGET_ASM_INTEGER 252#define TARGET_ASM_INTEGER ia64_assemble_integer 253 254#undef TARGET_ASM_FUNCTION_PROLOGUE 255#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue 256#undef TARGET_ASM_FUNCTION_END_PROLOGUE 257#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue 258#undef TARGET_ASM_FUNCTION_EPILOGUE 259#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue 260 261#undef TARGET_IN_SMALL_DATA_P 262#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p 263#undef TARGET_ENCODE_SECTION_INFO 264#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info 265#undef TARGET_STRIP_NAME_ENCODING 266#define TARGET_STRIP_NAME_ENCODING ia64_strip_name_encoding 267 268#undef TARGET_SCHED_ADJUST_COST 269#define TARGET_SCHED_ADJUST_COST ia64_adjust_cost 270#undef TARGET_SCHED_ISSUE_RATE 271#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate 272#undef TARGET_SCHED_VARIABLE_ISSUE 273#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue 274#undef TARGET_SCHED_INIT 275#define TARGET_SCHED_INIT ia64_sched_init 276#undef TARGET_SCHED_FINISH 277#define TARGET_SCHED_FINISH ia64_sched_finish 278#undef TARGET_SCHED_REORDER 279#define TARGET_SCHED_REORDER ia64_sched_reorder 280#undef TARGET_SCHED_REORDER2 281#define TARGET_SCHED_REORDER2 ia64_sched_reorder2 282 283#ifdef HAVE_AS_TLS 284#undef TARGET_HAVE_TLS 285#define TARGET_HAVE_TLS true 286#endif 287 288#undef TARGET_ASM_OUTPUT_MI_THUNK 289#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk 290#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 291#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true 292 293struct gcc_target targetm = TARGET_INITIALIZER; 294 295/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ 296 297int 298call_operand (op, mode) 299 rtx op; 300 enum machine_mode mode; 301{ 302 if (mode != GET_MODE (op) && mode != VOIDmode) 303 return 0; 304 305 return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG 306 || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); 307} 308 309/* Return 1 if OP refers to a symbol in the sdata section. */ 310 311int 312sdata_symbolic_operand (op, mode) 313 rtx op; 314 enum machine_mode mode ATTRIBUTE_UNUSED; 315{ 316 switch (GET_CODE (op)) 317 { 318 case CONST: 319 if (GET_CODE (XEXP (op, 0)) != PLUS 320 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF) 321 break; 322 op = XEXP (XEXP (op, 0), 0); 323 /* FALLTHRU */ 324 325 case SYMBOL_REF: 326 if (CONSTANT_POOL_ADDRESS_P (op)) 327 return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold; 328 else 329 { 330 const char *str = XSTR (op, 0); 331 return (str[0] == ENCODE_SECTION_INFO_CHAR && str[1] == 's'); 332 } 333 334 default: 335 break; 336 } 337 338 return 0; 339} 340 341/* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */ 342 343int 344got_symbolic_operand (op, mode) 345 rtx op; 346 enum machine_mode mode ATTRIBUTE_UNUSED; 347{ 348 switch (GET_CODE (op)) 349 { 350 case CONST: 351 op = XEXP (op, 0); 352 if (GET_CODE (op) != PLUS) 353 return 0; 354 if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF) 355 return 0; 356 op = XEXP (op, 1); 357 if (GET_CODE (op) != CONST_INT) 358 return 0; 359 360 return 1; 361 362 /* Ok if we're not using GOT entries at all. */ 363 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 364 return 1; 365 366 /* "Ok" while emitting rtl, since otherwise we won't be provided 367 with the entire offset during emission, which makes it very 368 hard to split the offset into high and low parts. */ 369 if (rtx_equal_function_value_matters) 370 return 1; 371 372 /* Force the low 14 bits of the constant to zero so that we do not 373 use up so many GOT entries. */ 374 return (INTVAL (op) & 0x3fff) == 0; 375 376 case SYMBOL_REF: 377 case LABEL_REF: 378 return 1; 379 380 default: 381 break; 382 } 383 return 0; 384} 385 386/* Return 1 if OP refers to a symbol. */ 387 388int 389symbolic_operand (op, mode) 390 rtx op; 391 enum machine_mode mode ATTRIBUTE_UNUSED; 392{ 393 switch (GET_CODE (op)) 394 { 395 case CONST: 396 case SYMBOL_REF: 397 case LABEL_REF: 398 return 1; 399 400 default: 401 break; 402 } 403 return 0; 404} 405 406/* Return tls_model if OP refers to a TLS symbol. */ 407 408int 409tls_symbolic_operand (op, mode) 410 rtx op; 411 enum machine_mode mode ATTRIBUTE_UNUSED; 412{ 413 const char *str; 414 415 if (GET_CODE (op) != SYMBOL_REF) 416 return 0; 417 str = XSTR (op, 0); 418 if (str[0] != ENCODE_SECTION_INFO_CHAR) 419 return 0; 420 switch (str[1]) 421 { 422 case 'G': 423 return TLS_MODEL_GLOBAL_DYNAMIC; 424 case 'L': 425 return TLS_MODEL_LOCAL_DYNAMIC; 426 case 'i': 427 return TLS_MODEL_INITIAL_EXEC; 428 case 'l': 429 return TLS_MODEL_LOCAL_EXEC; 430 } 431 return 0; 432} 433 434 435/* Return 1 if OP refers to a function. */ 436 437int 438function_operand (op, mode) 439 rtx op; 440 enum machine_mode mode ATTRIBUTE_UNUSED; 441{ 442 if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) 443 return 1; 444 else 445 return 0; 446} 447 448/* Return 1 if OP is setjmp or a similar function. */ 449 450/* ??? This is an unsatisfying solution. Should rethink. */ 451 452int 453setjmp_operand (op, mode) 454 rtx op; 455 enum machine_mode mode ATTRIBUTE_UNUSED; 456{ 457 const char *name; 458 int retval = 0; 459 460 if (GET_CODE (op) != SYMBOL_REF) 461 return 0; 462 463 name = XSTR (op, 0); 464 465 /* The following code is borrowed from special_function_p in calls.c. */ 466 467 /* Disregard prefix _, __ or __x. */ 468 if (name[0] == '_') 469 { 470 if (name[1] == '_' && name[2] == 'x') 471 name += 3; 472 else if (name[1] == '_') 473 name += 2; 474 else 475 name += 1; 476 } 477 478 if (name[0] == 's') 479 { 480 retval 481 = ((name[1] == 'e' 482 && (! strcmp (name, "setjmp") 483 || ! strcmp (name, "setjmp_syscall"))) 484 || (name[1] == 'i' 485 && ! strcmp (name, "sigsetjmp")) 486 || (name[1] == 'a' 487 && ! strcmp (name, "savectx"))); 488 } 489 else if ((name[0] == 'q' && name[1] == 's' 490 && ! strcmp (name, "qsetjmp")) 491 || (name[0] == 'v' && name[1] == 'f' 492 && ! strcmp (name, "vfork"))) 493 retval = 1; 494 495 return retval; 496} 497 498/* Return 1 if OP is a general operand, but when pic exclude symbolic 499 operands. */ 500 501/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF 502 from PREDICATE_CODES. */ 503 504int 505move_operand (op, mode) 506 rtx op; 507 enum machine_mode mode; 508{ 509 if (! TARGET_NO_PIC && symbolic_operand (op, mode)) 510 return 0; 511 512 return general_operand (op, mode); 513} 514 515/* Return 1 if OP is a register operand that is (or could be) a GR reg. */ 516 517int 518gr_register_operand (op, mode) 519 rtx op; 520 enum machine_mode mode; 521{ 522 if (! register_operand (op, mode)) 523 return 0; 524 if (GET_CODE (op) == SUBREG) 525 op = SUBREG_REG (op); 526 if (GET_CODE (op) == REG) 527 { 528 unsigned int regno = REGNO (op); 529 if (regno < FIRST_PSEUDO_REGISTER) 530 return GENERAL_REGNO_P (regno); 531 } 532 return 1; 533} 534 535/* Return 1 if OP is a register operand that is (or could be) an FR reg. */ 536 537int 538fr_register_operand (op, mode) 539 rtx op; 540 enum machine_mode mode; 541{ 542 if (! register_operand (op, mode)) 543 return 0; 544 if (GET_CODE (op) == SUBREG) 545 op = SUBREG_REG (op); 546 if (GET_CODE (op) == REG) 547 { 548 unsigned int regno = REGNO (op); 549 if (regno < FIRST_PSEUDO_REGISTER) 550 return FR_REGNO_P (regno); 551 } 552 return 1; 553} 554 555/* Return 1 if OP is a register operand that is (or could be) a GR/FR reg. */ 556 557int 558grfr_register_operand (op, mode) 559 rtx op; 560 enum machine_mode mode; 561{ 562 if (! register_operand (op, mode)) 563 return 0; 564 if (GET_CODE (op) == SUBREG) 565 op = SUBREG_REG (op); 566 if (GET_CODE (op) == REG) 567 { 568 unsigned int regno = REGNO (op); 569 if (regno < FIRST_PSEUDO_REGISTER) 570 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 571 } 572 return 1; 573} 574 575/* Return 1 if OP is a nonimmediate operand that is (or could be) a GR reg. */ 576 577int 578gr_nonimmediate_operand (op, mode) 579 rtx op; 580 enum machine_mode mode; 581{ 582 if (! nonimmediate_operand (op, mode)) 583 return 0; 584 if (GET_CODE (op) == SUBREG) 585 op = SUBREG_REG (op); 586 if (GET_CODE (op) == REG) 587 { 588 unsigned int regno = REGNO (op); 589 if (regno < FIRST_PSEUDO_REGISTER) 590 return GENERAL_REGNO_P (regno); 591 } 592 return 1; 593} 594 595/* Return 1 if OP is a nonimmediate operand that is (or could be) a FR reg. */ 596 597int 598fr_nonimmediate_operand (op, mode) 599 rtx op; 600 enum machine_mode mode; 601{ 602 if (! nonimmediate_operand (op, mode)) 603 return 0; 604 if (GET_CODE (op) == SUBREG) 605 op = SUBREG_REG (op); 606 if (GET_CODE (op) == REG) 607 { 608 unsigned int regno = REGNO (op); 609 if (regno < FIRST_PSEUDO_REGISTER) 610 return FR_REGNO_P (regno); 611 } 612 return 1; 613} 614 615/* Return 1 if OP is a nonimmediate operand that is a GR/FR reg. */ 616 617int 618grfr_nonimmediate_operand (op, mode) 619 rtx op; 620 enum machine_mode mode; 621{ 622 if (! nonimmediate_operand (op, mode)) 623 return 0; 624 if (GET_CODE (op) == SUBREG) 625 op = SUBREG_REG (op); 626 if (GET_CODE (op) == REG) 627 { 628 unsigned int regno = REGNO (op); 629 if (regno < FIRST_PSEUDO_REGISTER) 630 return GENERAL_REGNO_P (regno) || FR_REGNO_P (regno); 631 } 632 return 1; 633} 634 635/* Return 1 if OP is a GR register operand, or zero. */ 636 637int 638gr_reg_or_0_operand (op, mode) 639 rtx op; 640 enum machine_mode mode; 641{ 642 return (op == const0_rtx || gr_register_operand (op, mode)); 643} 644 645/* Return 1 if OP is a GR register operand, or a 5 bit immediate operand. */ 646 647int 648gr_reg_or_5bit_operand (op, mode) 649 rtx op; 650 enum machine_mode mode; 651{ 652 return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32) 653 || GET_CODE (op) == CONSTANT_P_RTX 654 || gr_register_operand (op, mode)); 655} 656 657/* Return 1 if OP is a GR register operand, or a 6 bit immediate operand. */ 658 659int 660gr_reg_or_6bit_operand (op, mode) 661 rtx op; 662 enum machine_mode mode; 663{ 664 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 665 || GET_CODE (op) == CONSTANT_P_RTX 666 || gr_register_operand (op, mode)); 667} 668 669/* Return 1 if OP is a GR register operand, or an 8 bit immediate operand. */ 670 671int 672gr_reg_or_8bit_operand (op, mode) 673 rtx op; 674 enum machine_mode mode; 675{ 676 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 677 || GET_CODE (op) == CONSTANT_P_RTX 678 || gr_register_operand (op, mode)); 679} 680 681/* Return 1 if OP is a GR/FR register operand, or an 8 bit immediate. */ 682 683int 684grfr_reg_or_8bit_operand (op, mode) 685 rtx op; 686 enum machine_mode mode; 687{ 688 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) 689 || GET_CODE (op) == CONSTANT_P_RTX 690 || grfr_register_operand (op, mode)); 691} 692 693/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate 694 operand. */ 695 696int 697gr_reg_or_8bit_adjusted_operand (op, mode) 698 rtx op; 699 enum machine_mode mode; 700{ 701 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) 702 || GET_CODE (op) == CONSTANT_P_RTX 703 || gr_register_operand (op, mode)); 704} 705 706/* Return 1 if OP is a register operand, or is valid for both an 8 bit 707 immediate and an 8 bit adjusted immediate operand. This is necessary 708 because when we emit a compare, we don't know what the condition will be, 709 so we need the union of the immediates accepted by GT and LT. */ 710 711int 712gr_reg_or_8bit_and_adjusted_operand (op, mode) 713 rtx op; 714 enum machine_mode mode; 715{ 716 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) 717 && CONST_OK_FOR_L (INTVAL (op))) 718 || GET_CODE (op) == CONSTANT_P_RTX 719 || gr_register_operand (op, mode)); 720} 721 722/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ 723 724int 725gr_reg_or_14bit_operand (op, mode) 726 rtx op; 727 enum machine_mode mode; 728{ 729 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) 730 || GET_CODE (op) == CONSTANT_P_RTX 731 || gr_register_operand (op, mode)); 732} 733 734/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ 735 736int 737gr_reg_or_22bit_operand (op, mode) 738 rtx op; 739 enum machine_mode mode; 740{ 741 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) 742 || GET_CODE (op) == CONSTANT_P_RTX 743 || gr_register_operand (op, mode)); 744} 745 746/* Return 1 if OP is a 6 bit immediate operand. */ 747 748int 749shift_count_operand (op, mode) 750 rtx op; 751 enum machine_mode mode ATTRIBUTE_UNUSED; 752{ 753 return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) 754 || GET_CODE (op) == CONSTANT_P_RTX); 755} 756 757/* Return 1 if OP is a 5 bit immediate operand. */ 758 759int 760shift_32bit_count_operand (op, mode) 761 rtx op; 762 enum machine_mode mode ATTRIBUTE_UNUSED; 763{ 764 return ((GET_CODE (op) == CONST_INT 765 && (INTVAL (op) >= 0 && INTVAL (op) < 32)) 766 || GET_CODE (op) == CONSTANT_P_RTX); 767} 768 769/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ 770 771int 772shladd_operand (op, mode) 773 rtx op; 774 enum machine_mode mode ATTRIBUTE_UNUSED; 775{ 776 return (GET_CODE (op) == CONST_INT 777 && (INTVAL (op) == 2 || INTVAL (op) == 4 778 || INTVAL (op) == 8 || INTVAL (op) == 16)); 779} 780 781/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ 782 783int 784fetchadd_operand (op, mode) 785 rtx op; 786 enum machine_mode mode ATTRIBUTE_UNUSED; 787{ 788 return (GET_CODE (op) == CONST_INT 789 && (INTVAL (op) == -16 || INTVAL (op) == -8 || 790 INTVAL (op) == -4 || INTVAL (op) == -1 || 791 INTVAL (op) == 1 || INTVAL (op) == 4 || 792 INTVAL (op) == 8 || INTVAL (op) == 16)); 793} 794 795/* Return 1 if OP is a floating-point constant zero, one, or a register. */ 796 797int 798fr_reg_or_fp01_operand (op, mode) 799 rtx op; 800 enum machine_mode mode; 801{ 802 return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) 803 || fr_register_operand (op, mode)); 804} 805 806/* Like nonimmediate_operand, but don't allow MEMs that try to use a 807 POST_MODIFY with a REG as displacement. */ 808 809int 810destination_operand (op, mode) 811 rtx op; 812 enum machine_mode mode; 813{ 814 if (! nonimmediate_operand (op, mode)) 815 return 0; 816 if (GET_CODE (op) == MEM 817 && GET_CODE (XEXP (op, 0)) == POST_MODIFY 818 && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG) 819 return 0; 820 return 1; 821} 822 823/* Like memory_operand, but don't allow post-increments. */ 824 825int 826not_postinc_memory_operand (op, mode) 827 rtx op; 828 enum machine_mode mode; 829{ 830 return (memory_operand (op, mode) 831 && GET_RTX_CLASS (GET_CODE (XEXP (op, 0))) != 'a'); 832} 833 834/* Return 1 if this is a comparison operator, which accepts an normal 8-bit 835 signed immediate operand. */ 836 837int 838normal_comparison_operator (op, mode) 839 register rtx op; 840 enum machine_mode mode; 841{ 842 enum rtx_code code = GET_CODE (op); 843 return ((mode == VOIDmode || GET_MODE (op) == mode) 844 && (code == EQ || code == NE 845 || code == GT || code == LE || code == GTU || code == LEU)); 846} 847 848/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit 849 signed immediate operand. */ 850 851int 852adjusted_comparison_operator (op, mode) 853 register rtx op; 854 enum machine_mode mode; 855{ 856 enum rtx_code code = GET_CODE (op); 857 return ((mode == VOIDmode || GET_MODE (op) == mode) 858 && (code == LT || code == GE || code == LTU || code == GEU)); 859} 860 861/* Return 1 if this is a signed inequality operator. */ 862 863int 864signed_inequality_operator (op, mode) 865 register rtx op; 866 enum machine_mode mode; 867{ 868 enum rtx_code code = GET_CODE (op); 869 return ((mode == VOIDmode || GET_MODE (op) == mode) 870 && (code == GE || code == GT 871 || code == LE || code == LT)); 872} 873 874/* Return 1 if this operator is valid for predication. */ 875 876int 877predicate_operator (op, mode) 878 register rtx op; 879 enum machine_mode mode; 880{ 881 enum rtx_code code = GET_CODE (op); 882 return ((GET_MODE (op) == mode || mode == VOIDmode) 883 && (code == EQ || code == NE)); 884} 885 886/* Return 1 if this operator can be used in a conditional operation. */ 887 888int 889condop_operator (op, mode) 890 register rtx op; 891 enum machine_mode mode; 892{ 893 enum rtx_code code = GET_CODE (op); 894 return ((GET_MODE (op) == mode || mode == VOIDmode) 895 && (code == PLUS || code == MINUS || code == AND 896 || code == IOR || code == XOR)); 897} 898 899/* Return 1 if this is the ar.lc register. */ 900 901int 902ar_lc_reg_operand (op, mode) 903 register rtx op; 904 enum machine_mode mode; 905{ 906 return (GET_MODE (op) == DImode 907 && (mode == DImode || mode == VOIDmode) 908 && GET_CODE (op) == REG 909 && REGNO (op) == AR_LC_REGNUM); 910} 911 912/* Return 1 if this is the ar.ccv register. */ 913 914int 915ar_ccv_reg_operand (op, mode) 916 register rtx op; 917 enum machine_mode mode; 918{ 919 return ((GET_MODE (op) == mode || mode == VOIDmode) 920 && GET_CODE (op) == REG 921 && REGNO (op) == AR_CCV_REGNUM); 922} 923 924/* Return 1 if this is the ar.pfs register. */ 925 926int 927ar_pfs_reg_operand (op, mode) 928 register rtx op; 929 enum machine_mode mode; 930{ 931 return ((GET_MODE (op) == mode || mode == VOIDmode) 932 && GET_CODE (op) == REG 933 && REGNO (op) == AR_PFS_REGNUM); 934} 935 936/* Like general_operand, but don't allow (mem (addressof)). */ 937 938int 939general_tfmode_operand (op, mode) 940 rtx op; 941 enum machine_mode mode; 942{ 943 if (! general_operand (op, mode)) 944 return 0; 945 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 946 return 0; 947 return 1; 948} 949 950/* Similarly. */ 951 952int 953destination_tfmode_operand (op, mode) 954 rtx op; 955 enum machine_mode mode; 956{ 957 if (! destination_operand (op, mode)) 958 return 0; 959 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) 960 return 0; 961 return 1; 962} 963 964/* Similarly. */ 965 966int 967tfreg_or_fp01_operand (op, mode) 968 rtx op; 969 enum machine_mode mode; 970{ 971 if (GET_CODE (op) == SUBREG) 972 return 0; 973 return fr_reg_or_fp01_operand (op, mode); 974} 975 976/* Return 1 if OP is valid as a base register in a reg + offset address. */ 977 978int 979basereg_operand (op, mode) 980 rtx op; 981 enum machine_mode mode; 982{ 983 /* ??? Should I copy the flag_omit_frame_pointer and cse_not_expected 984 checks from pa.c basereg_operand as well? Seems to be OK without them 985 in test runs. */ 986 987 return (register_operand (op, mode) && 988 REG_POINTER ((GET_CODE (op) == SUBREG) ? SUBREG_REG (op) : op)); 989} 990 991/* Return 1 if the operands of a move are ok. */ 992 993int 994ia64_move_ok (dst, src) 995 rtx dst, src; 996{ 997 /* If we're under init_recog_no_volatile, we'll not be able to use 998 memory_operand. So check the code directly and don't worry about 999 the validity of the underlying address, which should have been 1000 checked elsewhere anyway. */ 1001 if (GET_CODE (dst) != MEM) 1002 return 1; 1003 if (GET_CODE (src) == MEM) 1004 return 0; 1005 if (register_operand (src, VOIDmode)) 1006 return 1; 1007 1008 /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ 1009 if (INTEGRAL_MODE_P (GET_MODE (dst))) 1010 return src == const0_rtx; 1011 else 1012 return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); 1013} 1014 1015/* Return 0 if we are doing C++ code. This optimization fails with 1016 C++ because of GNAT c++/6685. */ 1017 1018int 1019addp4_optimize_ok (op1, op2) 1020 rtx op1, op2; 1021{ 1022 1023 if (!strcmp (lang_hooks.name, "GNU C++")) 1024 return 0; 1025 1026 return (basereg_operand (op1, GET_MODE(op1)) != 1027 basereg_operand (op2, GET_MODE(op2))); 1028} 1029 1030/* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction. 1031 Return the length of the field, or <= 0 on failure. */ 1032 1033int 1034ia64_depz_field_mask (rop, rshift) 1035 rtx rop, rshift; 1036{ 1037 unsigned HOST_WIDE_INT op = INTVAL (rop); 1038 unsigned HOST_WIDE_INT shift = INTVAL (rshift); 1039 1040 /* Get rid of the zero bits we're shifting in. */ 1041 op >>= shift; 1042 1043 /* We must now have a solid block of 1's at bit 0. */ 1044 return exact_log2 (op + 1); 1045} 1046 1047/* Expand a symbolic constant load. */ 1048/* ??? Should generalize this, so that we can also support 32 bit pointers. */ 1049 1050void 1051ia64_expand_load_address (dest, src, scratch) 1052 rtx dest, src, scratch; 1053{ 1054 rtx temp; 1055 1056 /* The destination could be a MEM during initial rtl generation, 1057 which isn't a valid destination for the PIC load address patterns. */ 1058 if (! register_operand (dest, DImode)) 1059 if (! scratch || ! register_operand (scratch, DImode)) 1060 temp = gen_reg_rtx (DImode); 1061 else 1062 temp = scratch; 1063 else 1064 temp = dest; 1065 1066 if (tls_symbolic_operand (src, Pmode)) 1067 abort (); 1068 1069 if (TARGET_AUTO_PIC) 1070 emit_insn (gen_load_gprel64 (temp, src)); 1071 else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src)) 1072 emit_insn (gen_load_fptr (temp, src)); 1073 else if ((GET_MODE (src) == Pmode || GET_MODE (src) == ptr_mode) 1074 && sdata_symbolic_operand (src, VOIDmode)) 1075 emit_insn (gen_load_gprel (temp, src)); 1076 else if (GET_CODE (src) == CONST 1077 && GET_CODE (XEXP (src, 0)) == PLUS 1078 && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT 1079 && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0) 1080 { 1081 rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1082 rtx sym = XEXP (XEXP (src, 0), 0); 1083 HOST_WIDE_INT ofs, hi, lo; 1084 1085 /* Split the offset into a sign extended 14-bit low part 1086 and a complementary high part. */ 1087 ofs = INTVAL (XEXP (XEXP (src, 0), 1)); 1088 lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000; 1089 hi = ofs - lo; 1090 1091 if (! scratch) 1092 scratch = no_new_pseudos ? subtarget : gen_reg_rtx (DImode); 1093 1094 emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi), 1095 scratch)); 1096 emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); 1097 } 1098 else 1099 { 1100 rtx insn; 1101 if (! scratch) 1102 scratch = no_new_pseudos ? temp : gen_reg_rtx (DImode); 1103 1104 insn = emit_insn (gen_load_symptr (temp, src, scratch)); 1105#ifdef POINTERS_EXTEND_UNSIGNED 1106 if (GET_MODE (temp) != GET_MODE (src)) 1107 src = convert_memory_address (GET_MODE (temp), src); 1108#endif 1109 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, src, REG_NOTES (insn)); 1110 } 1111 1112 if (temp != dest) 1113 { 1114 if (GET_MODE (dest) != GET_MODE (temp)) 1115 temp = convert_to_mode (GET_MODE (dest), temp, 0); 1116 emit_move_insn (dest, temp); 1117 } 1118} 1119 1120static GTY(()) rtx gen_tls_tga; 1121static rtx 1122gen_tls_get_addr () 1123{ 1124 if (!gen_tls_tga) 1125 { 1126 gen_tls_tga = init_one_libfunc ("__tls_get_addr"); 1127 } 1128 return gen_tls_tga; 1129} 1130 1131static GTY(()) rtx thread_pointer_rtx; 1132static rtx 1133gen_thread_pointer () 1134{ 1135 if (!thread_pointer_rtx) 1136 { 1137 thread_pointer_rtx = gen_rtx_REG (Pmode, 13); 1138 RTX_UNCHANGING_P (thread_pointer_rtx) = 1; 1139 } 1140 return thread_pointer_rtx; 1141} 1142 1143rtx 1144ia64_expand_move (op0, op1) 1145 rtx op0, op1; 1146{ 1147 enum machine_mode mode = GET_MODE (op0); 1148 1149 if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1)) 1150 op1 = force_reg (mode, op1); 1151 1152 if (mode == Pmode || mode == ptr_mode) 1153 { 1154 enum tls_model tls_kind; 1155 if ((tls_kind = tls_symbolic_operand (op1, Pmode))) 1156 { 1157 rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp, insns; 1158 rtx orig_op0 = op0; 1159 1160 switch (tls_kind) 1161 { 1162 case TLS_MODEL_GLOBAL_DYNAMIC: 1163 start_sequence (); 1164 1165 tga_op1 = gen_reg_rtx (Pmode); 1166 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1167 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1168 RTX_UNCHANGING_P (tga_op1) = 1; 1169 1170 tga_op2 = gen_reg_rtx (Pmode); 1171 emit_insn (gen_load_ltoff_dtprel (tga_op2, op1)); 1172 tga_op2 = gen_rtx_MEM (Pmode, tga_op2); 1173 RTX_UNCHANGING_P (tga_op2) = 1; 1174 1175 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1176 LCT_CONST, Pmode, 2, tga_op1, 1177 Pmode, tga_op2, Pmode); 1178 1179 insns = get_insns (); 1180 end_sequence (); 1181 1182 if (GET_MODE (op0) != Pmode) 1183 op0 = tga_ret; 1184 emit_libcall_block (insns, op0, tga_ret, op1); 1185 break; 1186 1187 case TLS_MODEL_LOCAL_DYNAMIC: 1188 /* ??? This isn't the completely proper way to do local-dynamic 1189 If the call to __tls_get_addr is used only by a single symbol, 1190 then we should (somehow) move the dtprel to the second arg 1191 to avoid the extra add. */ 1192 start_sequence (); 1193 1194 tga_op1 = gen_reg_rtx (Pmode); 1195 emit_insn (gen_load_ltoff_dtpmod (tga_op1, op1)); 1196 tga_op1 = gen_rtx_MEM (Pmode, tga_op1); 1197 RTX_UNCHANGING_P (tga_op1) = 1; 1198 1199 tga_op2 = const0_rtx; 1200 1201 tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX, 1202 LCT_CONST, Pmode, 2, tga_op1, 1203 Pmode, tga_op2, Pmode); 1204 1205 insns = get_insns (); 1206 end_sequence (); 1207 1208 tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 1209 UNSPEC_LD_BASE); 1210 tmp = gen_reg_rtx (Pmode); 1211 emit_libcall_block (insns, tmp, tga_ret, tga_eqv); 1212 1213 if (!register_operand (op0, Pmode)) 1214 op0 = gen_reg_rtx (Pmode); 1215 if (TARGET_TLS64) 1216 { 1217 emit_insn (gen_load_dtprel (op0, op1)); 1218 emit_insn (gen_adddi3 (op0, tmp, op0)); 1219 } 1220 else 1221 emit_insn (gen_add_dtprel (op0, tmp, op1)); 1222 break; 1223 1224 case TLS_MODEL_INITIAL_EXEC: 1225 tmp = gen_reg_rtx (Pmode); 1226 emit_insn (gen_load_ltoff_tprel (tmp, op1)); 1227 tmp = gen_rtx_MEM (Pmode, tmp); 1228 RTX_UNCHANGING_P (tmp) = 1; 1229 tmp = force_reg (Pmode, tmp); 1230 1231 if (!register_operand (op0, Pmode)) 1232 op0 = gen_reg_rtx (Pmode); 1233 emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ())); 1234 break; 1235 1236 case TLS_MODEL_LOCAL_EXEC: 1237 if (!register_operand (op0, Pmode)) 1238 op0 = gen_reg_rtx (Pmode); 1239 if (TARGET_TLS64) 1240 { 1241 emit_insn (gen_load_tprel (op0, op1)); 1242 emit_insn (gen_adddi3 (op0, gen_thread_pointer (), op0)); 1243 } 1244 else 1245 emit_insn (gen_add_tprel (op0, gen_thread_pointer (), op1)); 1246 break; 1247 1248 default: 1249 abort (); 1250 } 1251 1252 if (orig_op0 == op0) 1253 return NULL_RTX; 1254 if (GET_MODE (orig_op0) == Pmode) 1255 return op0; 1256 return gen_lowpart (GET_MODE (orig_op0), op0); 1257 } 1258 else if (!TARGET_NO_PIC && 1259 (symbolic_operand (op1, Pmode) || 1260 symbolic_operand (op1, ptr_mode))) 1261 { 1262 /* Before optimization starts, delay committing to any particular 1263 type of PIC address load. If this function gets deferred, we 1264 may acquire information that changes the value of the 1265 sdata_symbolic_operand predicate. 1266 1267 But don't delay for function pointers. Loading a function address 1268 actually loads the address of the descriptor not the function. 1269 If we represent these as SYMBOL_REFs, then they get cse'd with 1270 calls, and we end up with calls to the descriptor address instead 1271 of calls to the function address. Functions are not candidates 1272 for sdata anyways. 1273 1274 Don't delay for LABEL_REF because the splitter loses REG_LABEL 1275 notes. Don't delay for pool addresses on general principals; 1276 they'll never become non-local behind our back. */ 1277 1278 if (rtx_equal_function_value_matters 1279 && GET_CODE (op1) != LABEL_REF 1280 && ! (GET_CODE (op1) == SYMBOL_REF 1281 && (SYMBOL_REF_FLAG (op1) 1282 || CONSTANT_POOL_ADDRESS_P (op1) 1283 || STRING_POOL_ADDRESS_P (op1)))) 1284 if (GET_MODE (op1) == DImode) 1285 emit_insn (gen_movdi_symbolic (op0, op1)); 1286 else 1287 emit_insn (gen_movsi_symbolic (op0, op1)); 1288 else 1289 ia64_expand_load_address (op0, op1, NULL_RTX); 1290 return NULL_RTX; 1291 } 1292 } 1293 1294 return op1; 1295} 1296 1297/* Split a post-reload TImode reference into two DImode components. */ 1298 1299rtx 1300ia64_split_timode (out, in, scratch) 1301 rtx out[2]; 1302 rtx in, scratch; 1303{ 1304 switch (GET_CODE (in)) 1305 { 1306 case REG: 1307 out[0] = gen_rtx_REG (DImode, REGNO (in)); 1308 out[1] = gen_rtx_REG (DImode, REGNO (in) + 1); 1309 return NULL_RTX; 1310 1311 case MEM: 1312 { 1313 rtx base = XEXP (in, 0); 1314 1315 switch (GET_CODE (base)) 1316 { 1317 case REG: 1318 out[0] = adjust_address (in, DImode, 0); 1319 break; 1320 case POST_MODIFY: 1321 base = XEXP (base, 0); 1322 out[0] = adjust_address (in, DImode, 0); 1323 break; 1324 1325 /* Since we're changing the mode, we need to change to POST_MODIFY 1326 as well to preserve the size of the increment. Either that or 1327 do the update in two steps, but we've already got this scratch 1328 register handy so let's use it. */ 1329 case POST_INC: 1330 base = XEXP (base, 0); 1331 out[0] 1332 = change_address (in, DImode, 1333 gen_rtx_POST_MODIFY 1334 (Pmode, base, plus_constant (base, 16))); 1335 break; 1336 case POST_DEC: 1337 base = XEXP (base, 0); 1338 out[0] 1339 = change_address (in, DImode, 1340 gen_rtx_POST_MODIFY 1341 (Pmode, base, plus_constant (base, -16))); 1342 break; 1343 default: 1344 abort (); 1345 } 1346 1347 if (scratch == NULL_RTX) 1348 abort (); 1349 out[1] = change_address (in, DImode, scratch); 1350 return gen_adddi3 (scratch, base, GEN_INT (8)); 1351 } 1352 1353 case CONST_INT: 1354 case CONST_DOUBLE: 1355 split_double (in, &out[0], &out[1]); 1356 return NULL_RTX; 1357 1358 default: 1359 abort (); 1360 } 1361} 1362 1363/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go 1364 through memory plus an extra GR scratch register. Except that you can 1365 either get the first from SECONDARY_MEMORY_NEEDED or the second from 1366 SECONDARY_RELOAD_CLASS, but not both. 1367 1368 We got into problems in the first place by allowing a construct like 1369 (subreg:TF (reg:TI)), which we got from a union containing a long double. 1370 This solution attempts to prevent this situation from occurring. When 1371 we see something like the above, we spill the inner register to memory. */ 1372 1373rtx 1374spill_tfmode_operand (in, force) 1375 rtx in; 1376 int force; 1377{ 1378 if (GET_CODE (in) == SUBREG 1379 && GET_MODE (SUBREG_REG (in)) == TImode 1380 && GET_CODE (SUBREG_REG (in)) == REG) 1381 { 1382 rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true); 1383 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1384 } 1385 else if (force && GET_CODE (in) == REG) 1386 { 1387 rtx mem = gen_mem_addressof (in, NULL_TREE, true); 1388 return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); 1389 } 1390 else if (GET_CODE (in) == MEM 1391 && GET_CODE (XEXP (in, 0)) == ADDRESSOF) 1392 return change_address (in, TFmode, copy_to_reg (XEXP (in, 0))); 1393 else 1394 return in; 1395} 1396 1397/* Emit comparison instruction if necessary, returning the expression 1398 that holds the compare result in the proper mode. */ 1399 1400rtx 1401ia64_expand_compare (code, mode) 1402 enum rtx_code code; 1403 enum machine_mode mode; 1404{ 1405 rtx op0 = ia64_compare_op0, op1 = ia64_compare_op1; 1406 rtx cmp; 1407 1408 /* If we have a BImode input, then we already have a compare result, and 1409 do not need to emit another comparison. */ 1410 if (GET_MODE (op0) == BImode) 1411 { 1412 if ((code == NE || code == EQ) && op1 == const0_rtx) 1413 cmp = op0; 1414 else 1415 abort (); 1416 } 1417 else 1418 { 1419 cmp = gen_reg_rtx (BImode); 1420 emit_insn (gen_rtx_SET (VOIDmode, cmp, 1421 gen_rtx_fmt_ee (code, BImode, op0, op1))); 1422 code = NE; 1423 } 1424 1425 return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); 1426} 1427 1428/* Emit the appropriate sequence for a call. */ 1429void 1430ia64_expand_call (retval, addr, nextarg, sibcall_p) 1431 rtx retval; 1432 rtx addr; 1433 rtx nextarg ATTRIBUTE_UNUSED; 1434 int sibcall_p; 1435{ 1436 rtx insn, b0; 1437 1438 addr = XEXP (addr, 0); 1439 b0 = gen_rtx_REG (DImode, R_BR (0)); 1440 1441 /* ??? Should do this for functions known to bind local too. */ 1442 if (TARGET_NO_PIC || TARGET_AUTO_PIC) 1443 { 1444 if (sibcall_p) 1445 insn = gen_sibcall_nogp (addr); 1446 else if (! retval) 1447 insn = gen_call_nogp (addr, b0); 1448 else 1449 insn = gen_call_value_nogp (retval, addr, b0); 1450 insn = emit_call_insn (insn); 1451 } 1452 else 1453 { 1454 if (sibcall_p) 1455 insn = gen_sibcall_gp (addr); 1456 else if (! retval) 1457 insn = gen_call_gp (addr, b0); 1458 else 1459 insn = gen_call_value_gp (retval, addr, b0); 1460 insn = emit_call_insn (insn); 1461 1462 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 1463 } 1464 1465 if (sibcall_p) 1466 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0); 1467} 1468void 1469ia64_reload_gp () 1470{ 1471 rtx tmp; 1472 1473 if (current_frame_info.reg_save_gp) 1474 tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp); 1475 else 1476 { 1477 HOST_WIDE_INT offset; 1478 1479 offset = (current_frame_info.spill_cfa_off 1480 + current_frame_info.spill_size); 1481 if (frame_pointer_needed) 1482 { 1483 tmp = hard_frame_pointer_rtx; 1484 offset = -offset; 1485 } 1486 else 1487 { 1488 tmp = stack_pointer_rtx; 1489 offset = current_frame_info.total_size - offset; 1490 } 1491 1492 if (CONST_OK_FOR_I (offset)) 1493 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1494 tmp, GEN_INT (offset))); 1495 else 1496 { 1497 emit_move_insn (pic_offset_table_rtx, GEN_INT (offset)); 1498 emit_insn (gen_adddi3 (pic_offset_table_rtx, 1499 pic_offset_table_rtx, tmp)); 1500 } 1501 1502 tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx); 1503 } 1504 1505 emit_move_insn (pic_offset_table_rtx, tmp); 1506} 1507 1508void 1509ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b, 1510 noreturn_p, sibcall_p) 1511 rtx retval, addr, retaddr, scratch_r, scratch_b; 1512 int noreturn_p, sibcall_p; 1513{ 1514 rtx insn; 1515 bool is_desc = false; 1516 1517 /* If we find we're calling through a register, then we're actually 1518 calling through a descriptor, so load up the values. */ 1519 if (REG_P (addr) && GR_REGNO_P (REGNO (addr))) 1520 { 1521 rtx tmp; 1522 bool addr_dead_p; 1523 1524 /* ??? We are currently constrained to *not* use peep2, because 1525 we can legitimiately change the global lifetime of the GP 1526 (in the form of killing where previously live). This is 1527 because a call through a descriptor doesn't use the previous 1528 value of the GP, while a direct call does, and we do not 1529 commit to either form until the split here. 1530 1531 That said, this means that we lack precise life info for 1532 whether ADDR is dead after this call. This is not terribly 1533 important, since we can fix things up essentially for free 1534 with the POST_DEC below, but it's nice to not use it when we 1535 can immediately tell it's not necessary. */ 1536 addr_dead_p = ((noreturn_p || sibcall_p 1537 || TEST_HARD_REG_BIT (regs_invalidated_by_call, 1538 REGNO (addr))) 1539 && !FUNCTION_ARG_REGNO_P (REGNO (addr))); 1540 1541 /* Load the code address into scratch_b. */ 1542 tmp = gen_rtx_POST_INC (Pmode, addr); 1543 tmp = gen_rtx_MEM (Pmode, tmp); 1544 emit_move_insn (scratch_r, tmp); 1545 emit_move_insn (scratch_b, scratch_r); 1546 1547 /* Load the GP address. If ADDR is not dead here, then we must 1548 revert the change made above via the POST_INCREMENT. */ 1549 if (!addr_dead_p) 1550 tmp = gen_rtx_POST_DEC (Pmode, addr); 1551 else 1552 tmp = addr; 1553 tmp = gen_rtx_MEM (Pmode, tmp); 1554 emit_move_insn (pic_offset_table_rtx, tmp); 1555 1556 is_desc = true; 1557 addr = scratch_b; 1558 } 1559 1560 if (sibcall_p) 1561 insn = gen_sibcall_nogp (addr); 1562 else if (retval) 1563 insn = gen_call_value_nogp (retval, addr, retaddr); 1564 else 1565 insn = gen_call_nogp (addr, retaddr); 1566 emit_call_insn (insn); 1567 1568 if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p) 1569 ia64_reload_gp (); 1570} 1571 1572/* Begin the assembly file. */ 1573 1574void 1575emit_safe_across_calls (f) 1576 FILE *f; 1577{ 1578 unsigned int rs, re; 1579 int out_state; 1580 1581 rs = 1; 1582 out_state = 0; 1583 while (1) 1584 { 1585 while (rs < 64 && call_used_regs[PR_REG (rs)]) 1586 rs++; 1587 if (rs >= 64) 1588 break; 1589 for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) 1590 continue; 1591 if (out_state == 0) 1592 { 1593 fputs ("\t.pred.safe_across_calls ", f); 1594 out_state = 1; 1595 } 1596 else 1597 fputc (',', f); 1598 if (re == rs + 1) 1599 fprintf (f, "p%u", rs); 1600 else 1601 fprintf (f, "p%u-p%u", rs, re - 1); 1602 rs = re + 1; 1603 } 1604 if (out_state) 1605 fputc ('\n', f); 1606} 1607 1608/* Helper function for ia64_compute_frame_size: find an appropriate general 1609 register to spill some special register to. SPECIAL_SPILL_MASK contains 1610 bits in GR0 to GR31 that have already been allocated by this routine. 1611 TRY_LOCALS is true if we should attempt to locate a local regnum. */ 1612 1613static int 1614find_gr_spill (try_locals) 1615 int try_locals; 1616{ 1617 int regno; 1618 1619 /* If this is a leaf function, first try an otherwise unused 1620 call-clobbered register. */ 1621 if (current_function_is_leaf) 1622 { 1623 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1624 if (! regs_ever_live[regno] 1625 && call_used_regs[regno] 1626 && ! fixed_regs[regno] 1627 && ! global_regs[regno] 1628 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1629 { 1630 current_frame_info.gr_used_mask |= 1 << regno; 1631 return regno; 1632 } 1633 } 1634 1635 if (try_locals) 1636 { 1637 regno = current_frame_info.n_local_regs; 1638 /* If there is a frame pointer, then we can't use loc79, because 1639 that is HARD_FRAME_POINTER_REGNUM. In particular, see the 1640 reg_name switching code in ia64_expand_prologue. */ 1641 if (regno < (80 - frame_pointer_needed)) 1642 { 1643 current_frame_info.n_local_regs = regno + 1; 1644 return LOC_REG (0) + regno; 1645 } 1646 } 1647 1648 /* Failed to find a general register to spill to. Must use stack. */ 1649 return 0; 1650} 1651 1652/* In order to make for nice schedules, we try to allocate every temporary 1653 to a different register. We must of course stay away from call-saved, 1654 fixed, and global registers. We must also stay away from registers 1655 allocated in current_frame_info.gr_used_mask, since those include regs 1656 used all through the prologue. 1657 1658 Any register allocated here must be used immediately. The idea is to 1659 aid scheduling, not to solve data flow problems. */ 1660 1661static int last_scratch_gr_reg; 1662 1663static int 1664next_scratch_gr_reg () 1665{ 1666 int i, regno; 1667 1668 for (i = 0; i < 32; ++i) 1669 { 1670 regno = (last_scratch_gr_reg + i + 1) & 31; 1671 if (call_used_regs[regno] 1672 && ! fixed_regs[regno] 1673 && ! global_regs[regno] 1674 && ((current_frame_info.gr_used_mask >> regno) & 1) == 0) 1675 { 1676 last_scratch_gr_reg = regno; 1677 return regno; 1678 } 1679 } 1680 1681 /* There must be _something_ available. */ 1682 abort (); 1683} 1684 1685/* Helper function for ia64_compute_frame_size, called through 1686 diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */ 1687 1688static void 1689mark_reg_gr_used_mask (reg, data) 1690 rtx reg; 1691 void *data ATTRIBUTE_UNUSED; 1692{ 1693 unsigned int regno = REGNO (reg); 1694 if (regno < 32) 1695 { 1696 unsigned int i, n = HARD_REGNO_NREGS (regno, GET_MODE (reg)); 1697 for (i = 0; i < n; ++i) 1698 current_frame_info.gr_used_mask |= 1 << (regno + i); 1699 } 1700} 1701 1702/* Returns the number of bytes offset between the frame pointer and the stack 1703 pointer for the current function. SIZE is the number of bytes of space 1704 needed for local variables. */ 1705 1706static void 1707ia64_compute_frame_size (size) 1708 HOST_WIDE_INT size; 1709{ 1710 HOST_WIDE_INT total_size; 1711 HOST_WIDE_INT spill_size = 0; 1712 HOST_WIDE_INT extra_spill_size = 0; 1713 HOST_WIDE_INT pretend_args_size; 1714 HARD_REG_SET mask; 1715 int n_spilled = 0; 1716 int spilled_gr_p = 0; 1717 int spilled_fr_p = 0; 1718 unsigned int regno; 1719 int i; 1720 1721 if (current_frame_info.initialized) 1722 return; 1723 1724 memset (¤t_frame_info, 0, sizeof current_frame_info); 1725 CLEAR_HARD_REG_SET (mask); 1726 1727 /* Don't allocate scratches to the return register. */ 1728 diddle_return_value (mark_reg_gr_used_mask, NULL); 1729 1730 /* Don't allocate scratches to the EH scratch registers. */ 1731 if (cfun->machine->ia64_eh_epilogue_sp) 1732 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL); 1733 if (cfun->machine->ia64_eh_epilogue_bsp) 1734 mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL); 1735 1736 /* Find the size of the register stack frame. We have only 80 local 1737 registers, because we reserve 8 for the inputs and 8 for the 1738 outputs. */ 1739 1740 /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed, 1741 since we'll be adjusting that down later. */ 1742 regno = LOC_REG (78) + ! frame_pointer_needed; 1743 for (; regno >= LOC_REG (0); regno--) 1744 if (regs_ever_live[regno]) 1745 break; 1746 current_frame_info.n_local_regs = regno - LOC_REG (0) + 1; 1747 1748 /* For functions marked with the syscall_linkage attribute, we must mark 1749 all eight input registers as in use, so that locals aren't visible to 1750 the caller. */ 1751 1752 if (cfun->machine->n_varargs > 0 1753 || lookup_attribute ("syscall_linkage", 1754 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 1755 current_frame_info.n_input_regs = 8; 1756 else 1757 { 1758 for (regno = IN_REG (7); regno >= IN_REG (0); regno--) 1759 if (regs_ever_live[regno]) 1760 break; 1761 current_frame_info.n_input_regs = regno - IN_REG (0) + 1; 1762 } 1763 1764 for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--) 1765 if (regs_ever_live[regno]) 1766 break; 1767 i = regno - OUT_REG (0) + 1; 1768 1769 /* When -p profiling, we need one output register for the mcount argument. 1770 Likwise for -a profiling for the bb_init_func argument. For -ax 1771 profiling, we need two output registers for the two bb_init_trace_func 1772 arguments. */ 1773 if (current_function_profile) 1774 i = MAX (i, 1); 1775 current_frame_info.n_output_regs = i; 1776 1777 /* ??? No rotating register support yet. */ 1778 current_frame_info.n_rotate_regs = 0; 1779 1780 /* Discover which registers need spilling, and how much room that 1781 will take. Begin with floating point and general registers, 1782 which will always wind up on the stack. */ 1783 1784 for (regno = FR_REG (2); regno <= FR_REG (127); regno++) 1785 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1786 { 1787 SET_HARD_REG_BIT (mask, regno); 1788 spill_size += 16; 1789 n_spilled += 1; 1790 spilled_fr_p = 1; 1791 } 1792 1793 for (regno = GR_REG (1); regno <= GR_REG (31); regno++) 1794 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1795 { 1796 SET_HARD_REG_BIT (mask, regno); 1797 spill_size += 8; 1798 n_spilled += 1; 1799 spilled_gr_p = 1; 1800 } 1801 1802 for (regno = BR_REG (1); regno <= BR_REG (7); regno++) 1803 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1804 { 1805 SET_HARD_REG_BIT (mask, regno); 1806 spill_size += 8; 1807 n_spilled += 1; 1808 } 1809 1810 /* Now come all special registers that might get saved in other 1811 general registers. */ 1812 1813 if (frame_pointer_needed) 1814 { 1815 current_frame_info.reg_fp = find_gr_spill (1); 1816 /* If we did not get a register, then we take LOC79. This is guaranteed 1817 to be free, even if regs_ever_live is already set, because this is 1818 HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs, 1819 as we don't count loc79 above. */ 1820 if (current_frame_info.reg_fp == 0) 1821 { 1822 current_frame_info.reg_fp = LOC_REG (79); 1823 current_frame_info.n_local_regs++; 1824 } 1825 } 1826 1827 if (! current_function_is_leaf) 1828 { 1829 /* Emit a save of BR0 if we call other functions. Do this even 1830 if this function doesn't return, as EH depends on this to be 1831 able to unwind the stack. */ 1832 SET_HARD_REG_BIT (mask, BR_REG (0)); 1833 1834 current_frame_info.reg_save_b0 = find_gr_spill (1); 1835 if (current_frame_info.reg_save_b0 == 0) 1836 { 1837 spill_size += 8; 1838 n_spilled += 1; 1839 } 1840 1841 /* Similarly for ar.pfs. */ 1842 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1843 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1844 if (current_frame_info.reg_save_ar_pfs == 0) 1845 { 1846 extra_spill_size += 8; 1847 n_spilled += 1; 1848 } 1849 1850 /* Similarly for gp. Note that if we're calling setjmp, the stacked 1851 registers are clobbered, so we fall back to the stack. */ 1852 current_frame_info.reg_save_gp 1853 = (current_function_calls_setjmp ? 0 : find_gr_spill (1)); 1854 if (current_frame_info.reg_save_gp == 0) 1855 { 1856 SET_HARD_REG_BIT (mask, GR_REG (1)); 1857 spill_size += 8; 1858 n_spilled += 1; 1859 } 1860 } 1861 else 1862 { 1863 if (regs_ever_live[BR_REG (0)] && ! call_used_regs[BR_REG (0)]) 1864 { 1865 SET_HARD_REG_BIT (mask, BR_REG (0)); 1866 spill_size += 8; 1867 n_spilled += 1; 1868 } 1869 1870 if (regs_ever_live[AR_PFS_REGNUM]) 1871 { 1872 SET_HARD_REG_BIT (mask, AR_PFS_REGNUM); 1873 current_frame_info.reg_save_ar_pfs = find_gr_spill (1); 1874 if (current_frame_info.reg_save_ar_pfs == 0) 1875 { 1876 extra_spill_size += 8; 1877 n_spilled += 1; 1878 } 1879 } 1880 } 1881 1882 /* Unwind descriptor hackery: things are most efficient if we allocate 1883 consecutive GR save registers for RP, PFS, FP in that order. However, 1884 it is absolutely critical that FP get the only hard register that's 1885 guaranteed to be free, so we allocated it first. If all three did 1886 happen to be allocated hard regs, and are consecutive, rearrange them 1887 into the preferred order now. */ 1888 if (current_frame_info.reg_fp != 0 1889 && current_frame_info.reg_save_b0 == current_frame_info.reg_fp + 1 1890 && current_frame_info.reg_save_ar_pfs == current_frame_info.reg_fp + 2) 1891 { 1892 current_frame_info.reg_save_b0 = current_frame_info.reg_fp; 1893 current_frame_info.reg_save_ar_pfs = current_frame_info.reg_fp + 1; 1894 current_frame_info.reg_fp = current_frame_info.reg_fp + 2; 1895 } 1896 1897 /* See if we need to store the predicate register block. */ 1898 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1899 if (regs_ever_live[regno] && ! call_used_regs[regno]) 1900 break; 1901 if (regno <= PR_REG (63)) 1902 { 1903 SET_HARD_REG_BIT (mask, PR_REG (0)); 1904 current_frame_info.reg_save_pr = find_gr_spill (1); 1905 if (current_frame_info.reg_save_pr == 0) 1906 { 1907 extra_spill_size += 8; 1908 n_spilled += 1; 1909 } 1910 1911 /* ??? Mark them all as used so that register renaming and such 1912 are free to use them. */ 1913 for (regno = PR_REG (0); regno <= PR_REG (63); regno++) 1914 regs_ever_live[regno] = 1; 1915 } 1916 1917 /* If we're forced to use st8.spill, we're forced to save and restore 1918 ar.unat as well. The check for existing liveness allows inline asm 1919 to touch ar.unat. */ 1920 if (spilled_gr_p || cfun->machine->n_varargs 1921 || regs_ever_live[AR_UNAT_REGNUM]) 1922 { 1923 regs_ever_live[AR_UNAT_REGNUM] = 1; 1924 SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM); 1925 current_frame_info.reg_save_ar_unat = find_gr_spill (spill_size == 0); 1926 if (current_frame_info.reg_save_ar_unat == 0) 1927 { 1928 extra_spill_size += 8; 1929 n_spilled += 1; 1930 } 1931 } 1932 1933 if (regs_ever_live[AR_LC_REGNUM]) 1934 { 1935 SET_HARD_REG_BIT (mask, AR_LC_REGNUM); 1936 current_frame_info.reg_save_ar_lc = find_gr_spill (spill_size == 0); 1937 if (current_frame_info.reg_save_ar_lc == 0) 1938 { 1939 extra_spill_size += 8; 1940 n_spilled += 1; 1941 } 1942 } 1943 1944 /* If we have an odd number of words of pretend arguments written to 1945 the stack, then the FR save area will be unaligned. We round the 1946 size of this area up to keep things 16 byte aligned. */ 1947 if (spilled_fr_p) 1948 pretend_args_size = IA64_STACK_ALIGN (current_function_pretend_args_size); 1949 else 1950 pretend_args_size = current_function_pretend_args_size; 1951 1952 total_size = (spill_size + extra_spill_size + size + pretend_args_size 1953 + current_function_outgoing_args_size); 1954 total_size = IA64_STACK_ALIGN (total_size); 1955 1956 /* We always use the 16-byte scratch area provided by the caller, but 1957 if we are a leaf function, there's no one to which we need to provide 1958 a scratch area. */ 1959 if (current_function_is_leaf) 1960 total_size = MAX (0, total_size - 16); 1961 1962 current_frame_info.total_size = total_size; 1963 current_frame_info.spill_cfa_off = pretend_args_size - 16; 1964 current_frame_info.spill_size = spill_size; 1965 current_frame_info.extra_spill_size = extra_spill_size; 1966 COPY_HARD_REG_SET (current_frame_info.mask, mask); 1967 current_frame_info.n_spilled = n_spilled; 1968 current_frame_info.initialized = reload_completed; 1969} 1970 1971/* Compute the initial difference between the specified pair of registers. */ 1972 1973HOST_WIDE_INT 1974ia64_initial_elimination_offset (from, to) 1975 int from, to; 1976{ 1977 HOST_WIDE_INT offset; 1978 1979 ia64_compute_frame_size (get_frame_size ()); 1980 switch (from) 1981 { 1982 case FRAME_POINTER_REGNUM: 1983 if (to == HARD_FRAME_POINTER_REGNUM) 1984 { 1985 if (current_function_is_leaf) 1986 offset = -current_frame_info.total_size; 1987 else 1988 offset = -(current_frame_info.total_size 1989 - current_function_outgoing_args_size - 16); 1990 } 1991 else if (to == STACK_POINTER_REGNUM) 1992 { 1993 if (current_function_is_leaf) 1994 offset = 0; 1995 else 1996 offset = 16 + current_function_outgoing_args_size; 1997 } 1998 else 1999 abort (); 2000 break; 2001 2002 case ARG_POINTER_REGNUM: 2003 /* Arguments start above the 16 byte save area, unless stdarg 2004 in which case we store through the 16 byte save area. */ 2005 if (to == HARD_FRAME_POINTER_REGNUM) 2006 offset = 16 - current_function_pretend_args_size; 2007 else if (to == STACK_POINTER_REGNUM) 2008 offset = (current_frame_info.total_size 2009 + 16 - current_function_pretend_args_size); 2010 else 2011 abort (); 2012 break; 2013 2014 default: 2015 abort (); 2016 } 2017 2018 return offset; 2019} 2020 2021/* If there are more than a trivial number of register spills, we use 2022 two interleaved iterators so that we can get two memory references 2023 per insn group. 2024 2025 In order to simplify things in the prologue and epilogue expanders, 2026 we use helper functions to fix up the memory references after the 2027 fact with the appropriate offsets to a POST_MODIFY memory mode. 2028 The following data structure tracks the state of the two iterators 2029 while insns are being emitted. */ 2030 2031struct spill_fill_data 2032{ 2033 rtx init_after; /* point at which to emit initializations */ 2034 rtx init_reg[2]; /* initial base register */ 2035 rtx iter_reg[2]; /* the iterator registers */ 2036 rtx *prev_addr[2]; /* address of last memory use */ 2037 rtx prev_insn[2]; /* the insn corresponding to prev_addr */ 2038 HOST_WIDE_INT prev_off[2]; /* last offset */ 2039 int n_iter; /* number of iterators in use */ 2040 int next_iter; /* next iterator to use */ 2041 unsigned int save_gr_used_mask; 2042}; 2043 2044static struct spill_fill_data spill_fill_data; 2045 2046static void 2047setup_spill_pointers (n_spills, init_reg, cfa_off) 2048 int n_spills; 2049 rtx init_reg; 2050 HOST_WIDE_INT cfa_off; 2051{ 2052 int i; 2053 2054 spill_fill_data.init_after = get_last_insn (); 2055 spill_fill_data.init_reg[0] = init_reg; 2056 spill_fill_data.init_reg[1] = init_reg; 2057 spill_fill_data.prev_addr[0] = NULL; 2058 spill_fill_data.prev_addr[1] = NULL; 2059 spill_fill_data.prev_insn[0] = NULL; 2060 spill_fill_data.prev_insn[1] = NULL; 2061 spill_fill_data.prev_off[0] = cfa_off; 2062 spill_fill_data.prev_off[1] = cfa_off; 2063 spill_fill_data.next_iter = 0; 2064 spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask; 2065 2066 spill_fill_data.n_iter = 1 + (n_spills > 2); 2067 for (i = 0; i < spill_fill_data.n_iter; ++i) 2068 { 2069 int regno = next_scratch_gr_reg (); 2070 spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno); 2071 current_frame_info.gr_used_mask |= 1 << regno; 2072 } 2073} 2074 2075static void 2076finish_spill_pointers () 2077{ 2078 current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask; 2079} 2080 2081static rtx 2082spill_restore_mem (reg, cfa_off) 2083 rtx reg; 2084 HOST_WIDE_INT cfa_off; 2085{ 2086 int iter = spill_fill_data.next_iter; 2087 HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off; 2088 rtx disp_rtx = GEN_INT (disp); 2089 rtx mem; 2090 2091 if (spill_fill_data.prev_addr[iter]) 2092 { 2093 if (CONST_OK_FOR_N (disp)) 2094 { 2095 *spill_fill_data.prev_addr[iter] 2096 = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter], 2097 gen_rtx_PLUS (DImode, 2098 spill_fill_data.iter_reg[iter], 2099 disp_rtx)); 2100 REG_NOTES (spill_fill_data.prev_insn[iter]) 2101 = gen_rtx_EXPR_LIST (REG_INC, spill_fill_data.iter_reg[iter], 2102 REG_NOTES (spill_fill_data.prev_insn[iter])); 2103 } 2104 else 2105 { 2106 /* ??? Could use register post_modify for loads. */ 2107 if (! CONST_OK_FOR_I (disp)) 2108 { 2109 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2110 emit_move_insn (tmp, disp_rtx); 2111 disp_rtx = tmp; 2112 } 2113 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2114 spill_fill_data.iter_reg[iter], disp_rtx)); 2115 } 2116 } 2117 /* Micro-optimization: if we've created a frame pointer, it's at 2118 CFA 0, which may allow the real iterator to be initialized lower, 2119 slightly increasing parallelism. Also, if there are few saves 2120 it may eliminate the iterator entirely. */ 2121 else if (disp == 0 2122 && spill_fill_data.init_reg[iter] == stack_pointer_rtx 2123 && frame_pointer_needed) 2124 { 2125 mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx); 2126 set_mem_alias_set (mem, get_varargs_alias_set ()); 2127 return mem; 2128 } 2129 else 2130 { 2131 rtx seq, insn; 2132 2133 if (disp == 0) 2134 seq = gen_movdi (spill_fill_data.iter_reg[iter], 2135 spill_fill_data.init_reg[iter]); 2136 else 2137 { 2138 start_sequence (); 2139 2140 if (! CONST_OK_FOR_I (disp)) 2141 { 2142 rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ()); 2143 emit_move_insn (tmp, disp_rtx); 2144 disp_rtx = tmp; 2145 } 2146 2147 emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter], 2148 spill_fill_data.init_reg[iter], 2149 disp_rtx)); 2150 2151 seq = get_insns (); 2152 end_sequence (); 2153 } 2154 2155 /* Careful for being the first insn in a sequence. */ 2156 if (spill_fill_data.init_after) 2157 insn = emit_insn_after (seq, spill_fill_data.init_after); 2158 else 2159 { 2160 rtx first = get_insns (); 2161 if (first) 2162 insn = emit_insn_before (seq, first); 2163 else 2164 insn = emit_insn (seq); 2165 } 2166 spill_fill_data.init_after = insn; 2167 2168 /* If DISP is 0, we may or may not have a further adjustment 2169 afterward. If we do, then the load/store insn may be modified 2170 to be a post-modify. If we don't, then this copy may be 2171 eliminated by copyprop_hardreg_forward, which makes this 2172 insn garbage, which runs afoul of the sanity check in 2173 propagate_one_insn. So mark this insn as legal to delete. */ 2174 if (disp == 0) 2175 REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, 2176 REG_NOTES (insn)); 2177 } 2178 2179 mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]); 2180 2181 /* ??? Not all of the spills are for varargs, but some of them are. 2182 The rest of the spills belong in an alias set of their own. But 2183 it doesn't actually hurt to include them here. */ 2184 set_mem_alias_set (mem, get_varargs_alias_set ()); 2185 2186 spill_fill_data.prev_addr[iter] = &XEXP (mem, 0); 2187 spill_fill_data.prev_off[iter] = cfa_off; 2188 2189 if (++iter >= spill_fill_data.n_iter) 2190 iter = 0; 2191 spill_fill_data.next_iter = iter; 2192 2193 return mem; 2194} 2195 2196static void 2197do_spill (move_fn, reg, cfa_off, frame_reg) 2198 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2199 rtx reg, frame_reg; 2200 HOST_WIDE_INT cfa_off; 2201{ 2202 int iter = spill_fill_data.next_iter; 2203 rtx mem, insn; 2204 2205 mem = spill_restore_mem (reg, cfa_off); 2206 insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off))); 2207 spill_fill_data.prev_insn[iter] = insn; 2208 2209 if (frame_reg) 2210 { 2211 rtx base; 2212 HOST_WIDE_INT off; 2213 2214 RTX_FRAME_RELATED_P (insn) = 1; 2215 2216 /* Don't even pretend that the unwind code can intuit its way 2217 through a pair of interleaved post_modify iterators. Just 2218 provide the correct answer. */ 2219 2220 if (frame_pointer_needed) 2221 { 2222 base = hard_frame_pointer_rtx; 2223 off = - cfa_off; 2224 } 2225 else 2226 { 2227 base = stack_pointer_rtx; 2228 off = current_frame_info.total_size - cfa_off; 2229 } 2230 2231 REG_NOTES (insn) 2232 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2233 gen_rtx_SET (VOIDmode, 2234 gen_rtx_MEM (GET_MODE (reg), 2235 plus_constant (base, off)), 2236 frame_reg), 2237 REG_NOTES (insn)); 2238 } 2239} 2240 2241static void 2242do_restore (move_fn, reg, cfa_off) 2243 rtx (*move_fn) PARAMS ((rtx, rtx, rtx)); 2244 rtx reg; 2245 HOST_WIDE_INT cfa_off; 2246{ 2247 int iter = spill_fill_data.next_iter; 2248 rtx insn; 2249 2250 insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off), 2251 GEN_INT (cfa_off))); 2252 spill_fill_data.prev_insn[iter] = insn; 2253} 2254 2255/* Wrapper functions that discards the CONST_INT spill offset. These 2256 exist so that we can give gr_spill/gr_fill the offset they need and 2257 use a consistant function interface. */ 2258 2259static rtx 2260gen_movdi_x (dest, src, offset) 2261 rtx dest, src; 2262 rtx offset ATTRIBUTE_UNUSED; 2263{ 2264 return gen_movdi (dest, src); 2265} 2266 2267static rtx 2268gen_fr_spill_x (dest, src, offset) 2269 rtx dest, src; 2270 rtx offset ATTRIBUTE_UNUSED; 2271{ 2272 return gen_fr_spill (dest, src); 2273} 2274 2275static rtx 2276gen_fr_restore_x (dest, src, offset) 2277 rtx dest, src; 2278 rtx offset ATTRIBUTE_UNUSED; 2279{ 2280 return gen_fr_restore (dest, src); 2281} 2282 2283/* Called after register allocation to add any instructions needed for the 2284 prologue. Using a prologue insn is favored compared to putting all of the 2285 instructions in output_function_prologue(), since it allows the scheduler 2286 to intermix instructions with the saves of the caller saved registers. In 2287 some cases, it might be necessary to emit a barrier instruction as the last 2288 insn to prevent such scheduling. 2289 2290 Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 2291 so that the debug info generation code can handle them properly. 2292 2293 The register save area is layed out like so: 2294 cfa+16 2295 [ varargs spill area ] 2296 [ fr register spill area ] 2297 [ br register spill area ] 2298 [ ar register spill area ] 2299 [ pr register spill area ] 2300 [ gr register spill area ] */ 2301 2302/* ??? Get inefficient code when the frame size is larger than can fit in an 2303 adds instruction. */ 2304 2305void 2306ia64_expand_prologue () 2307{ 2308 rtx insn, ar_pfs_save_reg, ar_unat_save_reg; 2309 int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs; 2310 rtx reg, alt_reg; 2311 2312 ia64_compute_frame_size (get_frame_size ()); 2313 last_scratch_gr_reg = 15; 2314 2315 /* If there is no epilogue, then we don't need some prologue insns. 2316 We need to avoid emitting the dead prologue insns, because flow 2317 will complain about them. */ 2318 if (optimize) 2319 { 2320 edge e; 2321 2322 for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) 2323 if ((e->flags & EDGE_FAKE) == 0 2324 && (e->flags & EDGE_FALLTHRU) != 0) 2325 break; 2326 epilogue_p = (e != NULL); 2327 } 2328 else 2329 epilogue_p = 1; 2330 2331 /* Set the local, input, and output register names. We need to do this 2332 for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in 2333 half. If we use in/loc/out register names, then we get assembler errors 2334 in crtn.S because there is no alloc insn or regstk directive in there. */ 2335 if (! TARGET_REG_NAMES) 2336 { 2337 int inputs = current_frame_info.n_input_regs; 2338 int locals = current_frame_info.n_local_regs; 2339 int outputs = current_frame_info.n_output_regs; 2340 2341 for (i = 0; i < inputs; i++) 2342 reg_names[IN_REG (i)] = ia64_reg_numbers[i]; 2343 for (i = 0; i < locals; i++) 2344 reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; 2345 for (i = 0; i < outputs; i++) 2346 reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; 2347 } 2348 2349 /* Set the frame pointer register name. The regnum is logically loc79, 2350 but of course we'll not have allocated that many locals. Rather than 2351 worrying about renumbering the existing rtxs, we adjust the name. */ 2352 /* ??? This code means that we can never use one local register when 2353 there is a frame pointer. loc79 gets wasted in this case, as it is 2354 renamed to a register that will never be used. See also the try_locals 2355 code in find_gr_spill. */ 2356 if (current_frame_info.reg_fp) 2357 { 2358 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 2359 reg_names[HARD_FRAME_POINTER_REGNUM] 2360 = reg_names[current_frame_info.reg_fp]; 2361 reg_names[current_frame_info.reg_fp] = tmp; 2362 } 2363 2364 /* We don't need an alloc instruction if we've used no outputs or locals. */ 2365 if (current_frame_info.n_local_regs == 0 2366 && current_frame_info.n_output_regs == 0 2367 && current_frame_info.n_input_regs <= current_function_args_info.int_regs 2368 && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 2369 { 2370 /* If there is no alloc, but there are input registers used, then we 2371 need a .regstk directive. */ 2372 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 2373 ar_pfs_save_reg = NULL_RTX; 2374 } 2375 else 2376 { 2377 current_frame_info.need_regstk = 0; 2378 2379 if (current_frame_info.reg_save_ar_pfs) 2380 regno = current_frame_info.reg_save_ar_pfs; 2381 else 2382 regno = next_scratch_gr_reg (); 2383 ar_pfs_save_reg = gen_rtx_REG (DImode, regno); 2384 2385 insn = emit_insn (gen_alloc (ar_pfs_save_reg, 2386 GEN_INT (current_frame_info.n_input_regs), 2387 GEN_INT (current_frame_info.n_local_regs), 2388 GEN_INT (current_frame_info.n_output_regs), 2389 GEN_INT (current_frame_info.n_rotate_regs))); 2390 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_pfs != 0); 2391 } 2392 2393 /* Set up frame pointer, stack pointer, and spill iterators. */ 2394 2395 n_varargs = cfun->machine->n_varargs; 2396 setup_spill_pointers (current_frame_info.n_spilled + n_varargs, 2397 stack_pointer_rtx, 0); 2398 2399 if (frame_pointer_needed) 2400 { 2401 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 2402 RTX_FRAME_RELATED_P (insn) = 1; 2403 } 2404 2405 if (current_frame_info.total_size != 0) 2406 { 2407 rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size); 2408 rtx offset; 2409 2410 if (CONST_OK_FOR_I (- current_frame_info.total_size)) 2411 offset = frame_size_rtx; 2412 else 2413 { 2414 regno = next_scratch_gr_reg (); 2415 offset = gen_rtx_REG (DImode, regno); 2416 emit_move_insn (offset, frame_size_rtx); 2417 } 2418 2419 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, 2420 stack_pointer_rtx, offset)); 2421 2422 if (! frame_pointer_needed) 2423 { 2424 RTX_FRAME_RELATED_P (insn) = 1; 2425 if (GET_CODE (offset) != CONST_INT) 2426 { 2427 REG_NOTES (insn) 2428 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2429 gen_rtx_SET (VOIDmode, 2430 stack_pointer_rtx, 2431 gen_rtx_PLUS (DImode, 2432 stack_pointer_rtx, 2433 frame_size_rtx)), 2434 REG_NOTES (insn)); 2435 } 2436 } 2437 2438 /* ??? At this point we must generate a magic insn that appears to 2439 modify the stack pointer, the frame pointer, and all spill 2440 iterators. This would allow the most scheduling freedom. For 2441 now, just hard stop. */ 2442 emit_insn (gen_blockage ()); 2443 } 2444 2445 /* Must copy out ar.unat before doing any integer spills. */ 2446 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2447 { 2448 if (current_frame_info.reg_save_ar_unat) 2449 ar_unat_save_reg 2450 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2451 else 2452 { 2453 alt_regno = next_scratch_gr_reg (); 2454 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2455 current_frame_info.gr_used_mask |= 1 << alt_regno; 2456 } 2457 2458 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2459 insn = emit_move_insn (ar_unat_save_reg, reg); 2460 RTX_FRAME_RELATED_P (insn) = (current_frame_info.reg_save_ar_unat != 0); 2461 2462 /* Even if we're not going to generate an epilogue, we still 2463 need to save the register so that EH works. */ 2464 if (! epilogue_p && current_frame_info.reg_save_ar_unat) 2465 emit_insn (gen_prologue_use (ar_unat_save_reg)); 2466 } 2467 else 2468 ar_unat_save_reg = NULL_RTX; 2469 2470 /* Spill all varargs registers. Do this before spilling any GR registers, 2471 since we want the UNAT bits for the GR registers to override the UNAT 2472 bits from varargs, which we don't care about. */ 2473 2474 cfa_off = -16; 2475 for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno) 2476 { 2477 reg = gen_rtx_REG (DImode, regno); 2478 do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX); 2479 } 2480 2481 /* Locate the bottom of the register save area. */ 2482 cfa_off = (current_frame_info.spill_cfa_off 2483 + current_frame_info.spill_size 2484 + current_frame_info.extra_spill_size); 2485 2486 /* Save the predicate register block either in a register or in memory. */ 2487 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2488 { 2489 reg = gen_rtx_REG (DImode, PR_REG (0)); 2490 if (current_frame_info.reg_save_pr != 0) 2491 { 2492 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2493 insn = emit_move_insn (alt_reg, reg); 2494 2495 /* ??? Denote pr spill/fill by a DImode move that modifies all 2496 64 hard registers. */ 2497 RTX_FRAME_RELATED_P (insn) = 1; 2498 REG_NOTES (insn) 2499 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2500 gen_rtx_SET (VOIDmode, alt_reg, reg), 2501 REG_NOTES (insn)); 2502 2503 /* Even if we're not going to generate an epilogue, we still 2504 need to save the register so that EH works. */ 2505 if (! epilogue_p) 2506 emit_insn (gen_prologue_use (alt_reg)); 2507 } 2508 else 2509 { 2510 alt_regno = next_scratch_gr_reg (); 2511 alt_reg = gen_rtx_REG (DImode, alt_regno); 2512 insn = emit_move_insn (alt_reg, reg); 2513 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2514 cfa_off -= 8; 2515 } 2516 } 2517 2518 /* Handle AR regs in numerical order. All of them get special handling. */ 2519 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM) 2520 && current_frame_info.reg_save_ar_unat == 0) 2521 { 2522 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2523 do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg); 2524 cfa_off -= 8; 2525 } 2526 2527 /* The alloc insn already copied ar.pfs into a general register. The 2528 only thing we have to do now is copy that register to a stack slot 2529 if we'd not allocated a local register for the job. */ 2530 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM) 2531 && current_frame_info.reg_save_ar_pfs == 0) 2532 { 2533 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2534 do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg); 2535 cfa_off -= 8; 2536 } 2537 2538 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2539 { 2540 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2541 if (current_frame_info.reg_save_ar_lc != 0) 2542 { 2543 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2544 insn = emit_move_insn (alt_reg, reg); 2545 RTX_FRAME_RELATED_P (insn) = 1; 2546 2547 /* Even if we're not going to generate an epilogue, we still 2548 need to save the register so that EH works. */ 2549 if (! epilogue_p) 2550 emit_insn (gen_prologue_use (alt_reg)); 2551 } 2552 else 2553 { 2554 alt_regno = next_scratch_gr_reg (); 2555 alt_reg = gen_rtx_REG (DImode, alt_regno); 2556 emit_move_insn (alt_reg, reg); 2557 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2558 cfa_off -= 8; 2559 } 2560 } 2561 2562 if (current_frame_info.reg_save_gp) 2563 { 2564 insn = emit_move_insn (gen_rtx_REG (DImode, 2565 current_frame_info.reg_save_gp), 2566 pic_offset_table_rtx); 2567 /* We don't know for sure yet if this is actually needed, since 2568 we've not split the PIC call patterns. If all of the calls 2569 are indirect, and not followed by any uses of the gp, then 2570 this save is dead. Allow it to go away. */ 2571 REG_NOTES (insn) 2572 = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn)); 2573 } 2574 2575 /* We should now be at the base of the gr/br/fr spill area. */ 2576 if (cfa_off != (current_frame_info.spill_cfa_off 2577 + current_frame_info.spill_size)) 2578 abort (); 2579 2580 /* Spill all general registers. */ 2581 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 2582 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2583 { 2584 reg = gen_rtx_REG (DImode, regno); 2585 do_spill (gen_gr_spill, reg, cfa_off, reg); 2586 cfa_off -= 8; 2587 } 2588 2589 /* Handle BR0 specially -- it may be getting stored permanently in 2590 some GR register. */ 2591 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2592 { 2593 reg = gen_rtx_REG (DImode, BR_REG (0)); 2594 if (current_frame_info.reg_save_b0 != 0) 2595 { 2596 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2597 insn = emit_move_insn (alt_reg, reg); 2598 RTX_FRAME_RELATED_P (insn) = 1; 2599 2600 /* Even if we're not going to generate an epilogue, we still 2601 need to save the register so that EH works. */ 2602 if (! epilogue_p) 2603 emit_insn (gen_prologue_use (alt_reg)); 2604 } 2605 else 2606 { 2607 alt_regno = next_scratch_gr_reg (); 2608 alt_reg = gen_rtx_REG (DImode, alt_regno); 2609 emit_move_insn (alt_reg, reg); 2610 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2611 cfa_off -= 8; 2612 } 2613 } 2614 2615 /* Spill the rest of the BR registers. */ 2616 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2617 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2618 { 2619 alt_regno = next_scratch_gr_reg (); 2620 alt_reg = gen_rtx_REG (DImode, alt_regno); 2621 reg = gen_rtx_REG (DImode, regno); 2622 emit_move_insn (alt_reg, reg); 2623 do_spill (gen_movdi_x, alt_reg, cfa_off, reg); 2624 cfa_off -= 8; 2625 } 2626 2627 /* Align the frame and spill all FR registers. */ 2628 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2629 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2630 { 2631 if (cfa_off & 15) 2632 abort (); 2633 reg = gen_rtx_REG (TFmode, regno); 2634 do_spill (gen_fr_spill_x, reg, cfa_off, reg); 2635 cfa_off -= 16; 2636 } 2637 2638 if (cfa_off != current_frame_info.spill_cfa_off) 2639 abort (); 2640 2641 finish_spill_pointers (); 2642} 2643 2644/* Called after register allocation to add any instructions needed for the 2645 epilogue. Using an epilogue insn is favored compared to putting all of the 2646 instructions in output_function_prologue(), since it allows the scheduler 2647 to intermix instructions with the saves of the caller saved registers. In 2648 some cases, it might be necessary to emit a barrier instruction as the last 2649 insn to prevent such scheduling. */ 2650 2651void 2652ia64_expand_epilogue (sibcall_p) 2653 int sibcall_p; 2654{ 2655 rtx insn, reg, alt_reg, ar_unat_save_reg; 2656 int regno, alt_regno, cfa_off; 2657 2658 ia64_compute_frame_size (get_frame_size ()); 2659 2660 /* If there is a frame pointer, then we use it instead of the stack 2661 pointer, so that the stack pointer does not need to be valid when 2662 the epilogue starts. See EXIT_IGNORE_STACK. */ 2663 if (frame_pointer_needed) 2664 setup_spill_pointers (current_frame_info.n_spilled, 2665 hard_frame_pointer_rtx, 0); 2666 else 2667 setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx, 2668 current_frame_info.total_size); 2669 2670 if (current_frame_info.total_size != 0) 2671 { 2672 /* ??? At this point we must generate a magic insn that appears to 2673 modify the spill iterators and the frame pointer. This would 2674 allow the most scheduling freedom. For now, just hard stop. */ 2675 emit_insn (gen_blockage ()); 2676 } 2677 2678 /* Locate the bottom of the register save area. */ 2679 cfa_off = (current_frame_info.spill_cfa_off 2680 + current_frame_info.spill_size 2681 + current_frame_info.extra_spill_size); 2682 2683 /* Restore the predicate registers. */ 2684 if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0))) 2685 { 2686 if (current_frame_info.reg_save_pr != 0) 2687 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_pr); 2688 else 2689 { 2690 alt_regno = next_scratch_gr_reg (); 2691 alt_reg = gen_rtx_REG (DImode, alt_regno); 2692 do_restore (gen_movdi_x, alt_reg, cfa_off); 2693 cfa_off -= 8; 2694 } 2695 reg = gen_rtx_REG (DImode, PR_REG (0)); 2696 emit_move_insn (reg, alt_reg); 2697 } 2698 2699 /* Restore the application registers. */ 2700 2701 /* Load the saved unat from the stack, but do not restore it until 2702 after the GRs have been restored. */ 2703 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2704 { 2705 if (current_frame_info.reg_save_ar_unat != 0) 2706 ar_unat_save_reg 2707 = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_unat); 2708 else 2709 { 2710 alt_regno = next_scratch_gr_reg (); 2711 ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno); 2712 current_frame_info.gr_used_mask |= 1 << alt_regno; 2713 do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off); 2714 cfa_off -= 8; 2715 } 2716 } 2717 else 2718 ar_unat_save_reg = NULL_RTX; 2719 2720 if (current_frame_info.reg_save_ar_pfs != 0) 2721 { 2722 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_pfs); 2723 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2724 emit_move_insn (reg, alt_reg); 2725 } 2726 else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)) 2727 { 2728 alt_regno = next_scratch_gr_reg (); 2729 alt_reg = gen_rtx_REG (DImode, alt_regno); 2730 do_restore (gen_movdi_x, alt_reg, cfa_off); 2731 cfa_off -= 8; 2732 reg = gen_rtx_REG (DImode, AR_PFS_REGNUM); 2733 emit_move_insn (reg, alt_reg); 2734 } 2735 2736 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) 2737 { 2738 if (current_frame_info.reg_save_ar_lc != 0) 2739 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_ar_lc); 2740 else 2741 { 2742 alt_regno = next_scratch_gr_reg (); 2743 alt_reg = gen_rtx_REG (DImode, alt_regno); 2744 do_restore (gen_movdi_x, alt_reg, cfa_off); 2745 cfa_off -= 8; 2746 } 2747 reg = gen_rtx_REG (DImode, AR_LC_REGNUM); 2748 emit_move_insn (reg, alt_reg); 2749 } 2750 2751 /* We should now be at the base of the gr/br/fr spill area. */ 2752 if (cfa_off != (current_frame_info.spill_cfa_off 2753 + current_frame_info.spill_size)) 2754 abort (); 2755 2756 /* The GP may be stored on the stack in the prologue, but it's 2757 never restored in the epilogue. Skip the stack slot. */ 2758 if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1))) 2759 cfa_off -= 8; 2760 2761 /* Restore all general registers. */ 2762 for (regno = GR_REG (2); regno <= GR_REG (31); ++regno) 2763 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2764 { 2765 reg = gen_rtx_REG (DImode, regno); 2766 do_restore (gen_gr_restore, reg, cfa_off); 2767 cfa_off -= 8; 2768 } 2769 2770 /* Restore the branch registers. Handle B0 specially, as it may 2771 have gotten stored in some GR register. */ 2772 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2773 { 2774 if (current_frame_info.reg_save_b0 != 0) 2775 alt_reg = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2776 else 2777 { 2778 alt_regno = next_scratch_gr_reg (); 2779 alt_reg = gen_rtx_REG (DImode, alt_regno); 2780 do_restore (gen_movdi_x, alt_reg, cfa_off); 2781 cfa_off -= 8; 2782 } 2783 reg = gen_rtx_REG (DImode, BR_REG (0)); 2784 emit_move_insn (reg, alt_reg); 2785 } 2786 2787 for (regno = BR_REG (1); regno <= BR_REG (7); ++regno) 2788 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2789 { 2790 alt_regno = next_scratch_gr_reg (); 2791 alt_reg = gen_rtx_REG (DImode, alt_regno); 2792 do_restore (gen_movdi_x, alt_reg, cfa_off); 2793 cfa_off -= 8; 2794 reg = gen_rtx_REG (DImode, regno); 2795 emit_move_insn (reg, alt_reg); 2796 } 2797 2798 /* Restore floating point registers. */ 2799 for (regno = FR_REG (2); regno <= FR_REG (127); ++regno) 2800 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2801 { 2802 if (cfa_off & 15) 2803 abort (); 2804 reg = gen_rtx_REG (TFmode, regno); 2805 do_restore (gen_fr_restore_x, reg, cfa_off); 2806 cfa_off -= 16; 2807 } 2808 2809 /* Restore ar.unat for real. */ 2810 if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)) 2811 { 2812 reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM); 2813 emit_move_insn (reg, ar_unat_save_reg); 2814 } 2815 2816 if (cfa_off != current_frame_info.spill_cfa_off) 2817 abort (); 2818 2819 finish_spill_pointers (); 2820 2821 if (current_frame_info.total_size || cfun->machine->ia64_eh_epilogue_sp) 2822 { 2823 /* ??? At this point we must generate a magic insn that appears to 2824 modify the spill iterators, the stack pointer, and the frame 2825 pointer. This would allow the most scheduling freedom. For now, 2826 just hard stop. */ 2827 emit_insn (gen_blockage ()); 2828 } 2829 2830 if (cfun->machine->ia64_eh_epilogue_sp) 2831 emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp); 2832 else if (frame_pointer_needed) 2833 { 2834 insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx); 2835 RTX_FRAME_RELATED_P (insn) = 1; 2836 } 2837 else if (current_frame_info.total_size) 2838 { 2839 rtx offset, frame_size_rtx; 2840 2841 frame_size_rtx = GEN_INT (current_frame_info.total_size); 2842 if (CONST_OK_FOR_I (current_frame_info.total_size)) 2843 offset = frame_size_rtx; 2844 else 2845 { 2846 regno = next_scratch_gr_reg (); 2847 offset = gen_rtx_REG (DImode, regno); 2848 emit_move_insn (offset, frame_size_rtx); 2849 } 2850 2851 insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, 2852 offset)); 2853 2854 RTX_FRAME_RELATED_P (insn) = 1; 2855 if (GET_CODE (offset) != CONST_INT) 2856 { 2857 REG_NOTES (insn) 2858 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 2859 gen_rtx_SET (VOIDmode, 2860 stack_pointer_rtx, 2861 gen_rtx_PLUS (DImode, 2862 stack_pointer_rtx, 2863 frame_size_rtx)), 2864 REG_NOTES (insn)); 2865 } 2866 } 2867 2868 if (cfun->machine->ia64_eh_epilogue_bsp) 2869 emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); 2870 2871 if (! sibcall_p) 2872 emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); 2873 else 2874 { 2875 int fp = GR_REG (2); 2876 /* We need a throw away register here, r0 and r1 are reserved, so r2 is the 2877 first available call clobbered register. If there was a frame_pointer 2878 register, we may have swapped the names of r2 and HARD_FRAME_POINTER_REGNUM, 2879 so we have to make sure we're using the string "r2" when emitting 2880 the register name for the assmbler. */ 2881 if (current_frame_info.reg_fp && current_frame_info.reg_fp == GR_REG (2)) 2882 fp = HARD_FRAME_POINTER_REGNUM; 2883 2884 /* We must emit an alloc to force the input registers to become output 2885 registers. Otherwise, if the callee tries to pass its parameters 2886 through to another call without an intervening alloc, then these 2887 values get lost. */ 2888 /* ??? We don't need to preserve all input registers. We only need to 2889 preserve those input registers used as arguments to the sibling call. 2890 It is unclear how to compute that number here. */ 2891 if (current_frame_info.n_input_regs != 0) 2892 emit_insn (gen_alloc (gen_rtx_REG (DImode, fp), 2893 GEN_INT (0), GEN_INT (0), 2894 GEN_INT (current_frame_info.n_input_regs), 2895 GEN_INT (0))); 2896 } 2897} 2898 2899/* Return 1 if br.ret can do all the work required to return from a 2900 function. */ 2901 2902int 2903ia64_direct_return () 2904{ 2905 if (reload_completed && ! frame_pointer_needed) 2906 { 2907 ia64_compute_frame_size (get_frame_size ()); 2908 2909 return (current_frame_info.total_size == 0 2910 && current_frame_info.n_spilled == 0 2911 && current_frame_info.reg_save_b0 == 0 2912 && current_frame_info.reg_save_pr == 0 2913 && current_frame_info.reg_save_ar_pfs == 0 2914 && current_frame_info.reg_save_ar_unat == 0 2915 && current_frame_info.reg_save_ar_lc == 0); 2916 } 2917 return 0; 2918} 2919 2920/* Return the magic cookie that we use to hold the return address 2921 during early compilation. */ 2922 2923rtx 2924ia64_return_addr_rtx (count, frame) 2925 HOST_WIDE_INT count; 2926 rtx frame ATTRIBUTE_UNUSED; 2927{ 2928 if (count != 0) 2929 return NULL; 2930 return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR); 2931} 2932 2933/* Split this value after reload, now that we know where the return 2934 address is saved. */ 2935 2936void 2937ia64_split_return_addr_rtx (dest) 2938 rtx dest; 2939{ 2940 rtx src; 2941 2942 if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0))) 2943 { 2944 if (current_frame_info.reg_save_b0 != 0) 2945 src = gen_rtx_REG (DImode, current_frame_info.reg_save_b0); 2946 else 2947 { 2948 HOST_WIDE_INT off; 2949 unsigned int regno; 2950 2951 /* Compute offset from CFA for BR0. */ 2952 /* ??? Must be kept in sync with ia64_expand_prologue. */ 2953 off = (current_frame_info.spill_cfa_off 2954 + current_frame_info.spill_size); 2955 for (regno = GR_REG (1); regno <= GR_REG (31); ++regno) 2956 if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) 2957 off -= 8; 2958 2959 /* Convert CFA offset to a register based offset. */ 2960 if (frame_pointer_needed) 2961 src = hard_frame_pointer_rtx; 2962 else 2963 { 2964 src = stack_pointer_rtx; 2965 off += current_frame_info.total_size; 2966 } 2967 2968 /* Load address into scratch register. */ 2969 if (CONST_OK_FOR_I (off)) 2970 emit_insn (gen_adddi3 (dest, src, GEN_INT (off))); 2971 else 2972 { 2973 emit_move_insn (dest, GEN_INT (off)); 2974 emit_insn (gen_adddi3 (dest, src, dest)); 2975 } 2976 2977 src = gen_rtx_MEM (Pmode, dest); 2978 } 2979 } 2980 else 2981 src = gen_rtx_REG (DImode, BR_REG (0)); 2982 2983 emit_move_insn (dest, src); 2984} 2985 2986int 2987ia64_hard_regno_rename_ok (from, to) 2988 int from; 2989 int to; 2990{ 2991 /* Don't clobber any of the registers we reserved for the prologue. */ 2992 if (to == current_frame_info.reg_fp 2993 || to == current_frame_info.reg_save_b0 2994 || to == current_frame_info.reg_save_pr 2995 || to == current_frame_info.reg_save_ar_pfs 2996 || to == current_frame_info.reg_save_ar_unat 2997 || to == current_frame_info.reg_save_ar_lc) 2998 return 0; 2999 3000 if (from == current_frame_info.reg_fp 3001 || from == current_frame_info.reg_save_b0 3002 || from == current_frame_info.reg_save_pr 3003 || from == current_frame_info.reg_save_ar_pfs 3004 || from == current_frame_info.reg_save_ar_unat 3005 || from == current_frame_info.reg_save_ar_lc) 3006 return 0; 3007 3008 /* Don't use output registers outside the register frame. */ 3009 if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs)) 3010 return 0; 3011 3012 /* Retain even/oddness on predicate register pairs. */ 3013 if (PR_REGNO_P (from) && PR_REGNO_P (to)) 3014 return (from & 1) == (to & 1); 3015 3016 return 1; 3017} 3018 3019/* Target hook for assembling integer objects. Handle word-sized 3020 aligned objects and detect the cases when @fptr is needed. */ 3021 3022static bool 3023ia64_assemble_integer (x, size, aligned_p) 3024 rtx x; 3025 unsigned int size; 3026 int aligned_p; 3027{ 3028 if (size == (TARGET_ILP32 ? 4 : 8) 3029 && aligned_p 3030 && !(TARGET_NO_PIC || TARGET_AUTO_PIC) 3031 && GET_CODE (x) == SYMBOL_REF 3032 && SYMBOL_REF_FLAG (x)) 3033 { 3034 if (TARGET_ILP32) 3035 fputs ("\tdata4\t@fptr(", asm_out_file); 3036 else 3037 fputs ("\tdata8\t@fptr(", asm_out_file); 3038 output_addr_const (asm_out_file, x); 3039 fputs (")\n", asm_out_file); 3040 return true; 3041 } 3042 return default_assemble_integer (x, size, aligned_p); 3043} 3044 3045/* Emit the function prologue. */ 3046 3047static void 3048ia64_output_function_prologue (file, size) 3049 FILE *file; 3050 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3051{ 3052 int mask, grsave, grsave_prev; 3053 3054 if (current_frame_info.need_regstk) 3055 fprintf (file, "\t.regstk %d, %d, %d, %d\n", 3056 current_frame_info.n_input_regs, 3057 current_frame_info.n_local_regs, 3058 current_frame_info.n_output_regs, 3059 current_frame_info.n_rotate_regs); 3060 3061 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3062 return; 3063 3064 /* Emit the .prologue directive. */ 3065 3066 mask = 0; 3067 grsave = grsave_prev = 0; 3068 if (current_frame_info.reg_save_b0 != 0) 3069 { 3070 mask |= 8; 3071 grsave = grsave_prev = current_frame_info.reg_save_b0; 3072 } 3073 if (current_frame_info.reg_save_ar_pfs != 0 3074 && (grsave_prev == 0 3075 || current_frame_info.reg_save_ar_pfs == grsave_prev + 1)) 3076 { 3077 mask |= 4; 3078 if (grsave_prev == 0) 3079 grsave = current_frame_info.reg_save_ar_pfs; 3080 grsave_prev = current_frame_info.reg_save_ar_pfs; 3081 } 3082 if (current_frame_info.reg_fp != 0 3083 && (grsave_prev == 0 3084 || current_frame_info.reg_fp == grsave_prev + 1)) 3085 { 3086 mask |= 2; 3087 if (grsave_prev == 0) 3088 grsave = HARD_FRAME_POINTER_REGNUM; 3089 grsave_prev = current_frame_info.reg_fp; 3090 } 3091 if (current_frame_info.reg_save_pr != 0 3092 && (grsave_prev == 0 3093 || current_frame_info.reg_save_pr == grsave_prev + 1)) 3094 { 3095 mask |= 1; 3096 if (grsave_prev == 0) 3097 grsave = current_frame_info.reg_save_pr; 3098 } 3099 3100 if (mask) 3101 fprintf (file, "\t.prologue %d, %d\n", mask, 3102 ia64_dbx_register_number (grsave)); 3103 else 3104 fputs ("\t.prologue\n", file); 3105 3106 /* Emit a .spill directive, if necessary, to relocate the base of 3107 the register spill area. */ 3108 if (current_frame_info.spill_cfa_off != -16) 3109 fprintf (file, "\t.spill %ld\n", 3110 (long) (current_frame_info.spill_cfa_off 3111 + current_frame_info.spill_size)); 3112} 3113 3114/* Emit the .body directive at the scheduled end of the prologue. */ 3115 3116static void 3117ia64_output_function_end_prologue (file) 3118 FILE *file; 3119{ 3120 if (!flag_unwind_tables && (!flag_exceptions || USING_SJLJ_EXCEPTIONS)) 3121 return; 3122 3123 fputs ("\t.body\n", file); 3124} 3125 3126/* Emit the function epilogue. */ 3127 3128static void 3129ia64_output_function_epilogue (file, size) 3130 FILE *file ATTRIBUTE_UNUSED; 3131 HOST_WIDE_INT size ATTRIBUTE_UNUSED; 3132{ 3133 int i; 3134 3135 if (current_frame_info.reg_fp) 3136 { 3137 const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM]; 3138 reg_names[HARD_FRAME_POINTER_REGNUM] 3139 = reg_names[current_frame_info.reg_fp]; 3140 reg_names[current_frame_info.reg_fp] = tmp; 3141 } 3142 if (! TARGET_REG_NAMES) 3143 { 3144 for (i = 0; i < current_frame_info.n_input_regs; i++) 3145 reg_names[IN_REG (i)] = ia64_input_reg_names[i]; 3146 for (i = 0; i < current_frame_info.n_local_regs; i++) 3147 reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; 3148 for (i = 0; i < current_frame_info.n_output_regs; i++) 3149 reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; 3150 } 3151 3152 current_frame_info.initialized = 0; 3153} 3154 3155int 3156ia64_dbx_register_number (regno) 3157 int regno; 3158{ 3159 /* In ia64_expand_prologue we quite literally renamed the frame pointer 3160 from its home at loc79 to something inside the register frame. We 3161 must perform the same renumbering here for the debug info. */ 3162 if (current_frame_info.reg_fp) 3163 { 3164 if (regno == HARD_FRAME_POINTER_REGNUM) 3165 regno = current_frame_info.reg_fp; 3166 else if (regno == current_frame_info.reg_fp) 3167 regno = HARD_FRAME_POINTER_REGNUM; 3168 } 3169 3170 if (IN_REGNO_P (regno)) 3171 return 32 + regno - IN_REG (0); 3172 else if (LOC_REGNO_P (regno)) 3173 return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0); 3174 else if (OUT_REGNO_P (regno)) 3175 return (32 + current_frame_info.n_input_regs 3176 + current_frame_info.n_local_regs + regno - OUT_REG (0)); 3177 else 3178 return regno; 3179} 3180 3181void 3182ia64_initialize_trampoline (addr, fnaddr, static_chain) 3183 rtx addr, fnaddr, static_chain; 3184{ 3185 rtx addr_reg, eight = GEN_INT (8); 3186 3187 /* Load up our iterator. */ 3188 addr_reg = gen_reg_rtx (Pmode); 3189 emit_move_insn (addr_reg, addr); 3190 3191 /* The first two words are the fake descriptor: 3192 __ia64_trampoline, ADDR+16. */ 3193 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3194 gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline")); 3195 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3196 3197 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), 3198 copy_to_reg (plus_constant (addr, 16))); 3199 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3200 3201 /* The third word is the target descriptor. */ 3202 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), fnaddr); 3203 emit_insn (gen_adddi3 (addr_reg, addr_reg, eight)); 3204 3205 /* The fourth word is the static chain. */ 3206 emit_move_insn (gen_rtx_MEM (Pmode, addr_reg), static_chain); 3207} 3208 3209/* Do any needed setup for a variadic function. CUM has not been updated 3210 for the last named argument which has type TYPE and mode MODE. 3211 3212 We generate the actual spill instructions during prologue generation. */ 3213 3214void 3215ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) 3216 CUMULATIVE_ARGS cum; 3217 int int_mode; 3218 tree type; 3219 int * pretend_size; 3220 int second_time ATTRIBUTE_UNUSED; 3221{ 3222 /* Skip the current argument. */ 3223 ia64_function_arg_advance (&cum, int_mode, type, 1); 3224 3225 if (cum.words < MAX_ARGUMENT_SLOTS) 3226 { 3227 int n = MAX_ARGUMENT_SLOTS - cum.words; 3228 *pretend_size = n * UNITS_PER_WORD; 3229 cfun->machine->n_varargs = n; 3230 } 3231} 3232 3233/* Check whether TYPE is a homogeneous floating point aggregate. If 3234 it is, return the mode of the floating point type that appears 3235 in all leafs. If it is not, return VOIDmode. 3236 3237 An aggregate is a homogeneous floating point aggregate is if all 3238 fields/elements in it have the same floating point type (e.g, 3239 SFmode). 128-bit quad-precision floats are excluded. */ 3240 3241static enum machine_mode 3242hfa_element_mode (type, nested) 3243 tree type; 3244 int nested; 3245{ 3246 enum machine_mode element_mode = VOIDmode; 3247 enum machine_mode mode; 3248 enum tree_code code = TREE_CODE (type); 3249 int know_element_mode = 0; 3250 tree t; 3251 3252 switch (code) 3253 { 3254 case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: 3255 case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: 3256 case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: 3257 case FILE_TYPE: case SET_TYPE: case LANG_TYPE: 3258 case FUNCTION_TYPE: 3259 return VOIDmode; 3260 3261 /* Fortran complex types are supposed to be HFAs, so we need to handle 3262 gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex 3263 types though. */ 3264 case COMPLEX_TYPE: 3265 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT 3266 && (TYPE_MODE (type) != TCmode || INTEL_EXTENDED_IEEE_FORMAT)) 3267 return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) 3268 * BITS_PER_UNIT, MODE_FLOAT, 0); 3269 else 3270 return VOIDmode; 3271 3272 case REAL_TYPE: 3273 /* We want to return VOIDmode for raw REAL_TYPEs, but the actual 3274 mode if this is contained within an aggregate. */ 3275 if (nested && (TYPE_MODE (type) != TFmode || INTEL_EXTENDED_IEEE_FORMAT)) 3276 return TYPE_MODE (type); 3277 else 3278 return VOIDmode; 3279 3280 case ARRAY_TYPE: 3281 return hfa_element_mode (TREE_TYPE (type), 1); 3282 3283 case RECORD_TYPE: 3284 case UNION_TYPE: 3285 case QUAL_UNION_TYPE: 3286 for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) 3287 { 3288 if (TREE_CODE (t) != FIELD_DECL) 3289 continue; 3290 3291 mode = hfa_element_mode (TREE_TYPE (t), 1); 3292 if (know_element_mode) 3293 { 3294 if (mode != element_mode) 3295 return VOIDmode; 3296 } 3297 else if (GET_MODE_CLASS (mode) != MODE_FLOAT) 3298 return VOIDmode; 3299 else 3300 { 3301 know_element_mode = 1; 3302 element_mode = mode; 3303 } 3304 } 3305 return element_mode; 3306 3307 default: 3308 /* If we reach here, we probably have some front-end specific type 3309 that the backend doesn't know about. This can happen via the 3310 aggregate_value_p call in init_function_start. All we can do is 3311 ignore unknown tree types. */ 3312 return VOIDmode; 3313 } 3314 3315 return VOIDmode; 3316} 3317 3318/* Return rtx for register where argument is passed, or zero if it is passed 3319 on the stack. */ 3320 3321/* ??? 128-bit quad-precision floats are always passed in general 3322 registers. */ 3323 3324rtx 3325ia64_function_arg (cum, mode, type, named, incoming) 3326 CUMULATIVE_ARGS *cum; 3327 enum machine_mode mode; 3328 tree type; 3329 int named; 3330 int incoming; 3331{ 3332 int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); 3333 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3334 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3335 / UNITS_PER_WORD); 3336 int offset = 0; 3337 enum machine_mode hfa_mode = VOIDmode; 3338 3339 /* Integer and float arguments larger than 8 bytes start at the next even 3340 boundary. Aggregates larger than 8 bytes start at the next even boundary 3341 if the aggregate has 16 byte alignment. Net effect is that types with 3342 alignment greater than 8 start at the next even boundary. */ 3343 /* ??? The ABI does not specify how to handle aggregates with alignment from 3344 9 to 15 bytes, or greater than 16. We handle them all as if they had 3345 16 byte alignment. Such aggregates can occur only if gcc extensions are 3346 used. */ 3347 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3348 : (words > 1)) 3349 && (cum->words & 1)) 3350 offset = 1; 3351 3352 /* If all argument slots are used, then it must go on the stack. */ 3353 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3354 return 0; 3355 3356 /* Check for and handle homogeneous FP aggregates. */ 3357 if (type) 3358 hfa_mode = hfa_element_mode (type, 0); 3359 3360 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3361 and unprototyped hfas are passed specially. */ 3362 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3363 { 3364 rtx loc[16]; 3365 int i = 0; 3366 int fp_regs = cum->fp_regs; 3367 int int_regs = cum->words + offset; 3368 int hfa_size = GET_MODE_SIZE (hfa_mode); 3369 int byte_size; 3370 int args_byte_size; 3371 3372 /* If prototyped, pass it in FR regs then GR regs. 3373 If not prototyped, pass it in both FR and GR regs. 3374 3375 If this is an SFmode aggregate, then it is possible to run out of 3376 FR regs while GR regs are still left. In that case, we pass the 3377 remaining part in the GR regs. */ 3378 3379 /* Fill the FP regs. We do this always. We stop if we reach the end 3380 of the argument, the last FP register, or the last argument slot. */ 3381 3382 byte_size = ((mode == BLKmode) 3383 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3384 args_byte_size = int_regs * UNITS_PER_WORD; 3385 offset = 0; 3386 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3387 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) 3388 { 3389 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3390 gen_rtx_REG (hfa_mode, (FR_ARG_FIRST 3391 + fp_regs)), 3392 GEN_INT (offset)); 3393 offset += hfa_size; 3394 args_byte_size += hfa_size; 3395 fp_regs++; 3396 } 3397 3398 /* If no prototype, then the whole thing must go in GR regs. */ 3399 if (! cum->prototype) 3400 offset = 0; 3401 /* If this is an SFmode aggregate, then we might have some left over 3402 that needs to go in GR regs. */ 3403 else if (byte_size != offset) 3404 int_regs += offset / UNITS_PER_WORD; 3405 3406 /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ 3407 3408 for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) 3409 { 3410 enum machine_mode gr_mode = DImode; 3411 3412 /* If we have an odd 4 byte hunk because we ran out of FR regs, 3413 then this goes in a GR reg left adjusted/little endian, right 3414 adjusted/big endian. */ 3415 /* ??? Currently this is handled wrong, because 4-byte hunks are 3416 always right adjusted/little endian. */ 3417 if (offset & 0x4) 3418 gr_mode = SImode; 3419 /* If we have an even 4 byte hunk because the aggregate is a 3420 multiple of 4 bytes in size, then this goes in a GR reg right 3421 adjusted/little endian. */ 3422 else if (byte_size - offset == 4) 3423 gr_mode = SImode; 3424 /* Complex floats need to have float mode. */ 3425 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 3426 gr_mode = hfa_mode; 3427 3428 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3429 gen_rtx_REG (gr_mode, (basereg 3430 + int_regs)), 3431 GEN_INT (offset)); 3432 offset += GET_MODE_SIZE (gr_mode); 3433 int_regs += GET_MODE_SIZE (gr_mode) <= UNITS_PER_WORD 3434 ? 1 : GET_MODE_SIZE (gr_mode) / UNITS_PER_WORD; 3435 } 3436 3437 /* If we ended up using just one location, just return that one loc. */ 3438 if (i == 1) 3439 return XEXP (loc[0], 0); 3440 else 3441 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3442 } 3443 3444 /* Integral and aggregates go in general registers. If we have run out of 3445 FR registers, then FP values must also go in general registers. This can 3446 happen when we have a SFmode HFA. */ 3447 else if (((mode == TFmode) && ! INTEL_EXTENDED_IEEE_FORMAT) 3448 || (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS)) 3449 { 3450 int byte_size = ((mode == BLKmode) 3451 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3452 if (BYTES_BIG_ENDIAN 3453 && (mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3454 && byte_size < UNITS_PER_WORD 3455 && byte_size > 0) 3456 { 3457 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3458 gen_rtx_REG (DImode, 3459 (basereg + cum->words 3460 + offset)), 3461 const0_rtx); 3462 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 3463 } 3464 else 3465 return gen_rtx_REG (mode, basereg + cum->words + offset); 3466 3467 } 3468 3469 /* If there is a prototype, then FP values go in a FR register when 3470 named, and in a GR registeer when unnamed. */ 3471 else if (cum->prototype) 3472 { 3473 if (! named) 3474 return gen_rtx_REG (mode, basereg + cum->words + offset); 3475 else 3476 return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); 3477 } 3478 /* If there is no prototype, then FP values go in both FR and GR 3479 registers. */ 3480 else 3481 { 3482 rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, 3483 gen_rtx_REG (mode, (FR_ARG_FIRST 3484 + cum->fp_regs)), 3485 const0_rtx); 3486 rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 3487 gen_rtx_REG (mode, 3488 (basereg + cum->words 3489 + offset)), 3490 const0_rtx); 3491 3492 return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); 3493 } 3494} 3495 3496/* Return number of words, at the beginning of the argument, that must be 3497 put in registers. 0 is the argument is entirely in registers or entirely 3498 in memory. */ 3499 3500int 3501ia64_function_arg_partial_nregs (cum, mode, type, named) 3502 CUMULATIVE_ARGS *cum; 3503 enum machine_mode mode; 3504 tree type; 3505 int named ATTRIBUTE_UNUSED; 3506{ 3507 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3508 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3509 / UNITS_PER_WORD); 3510 int offset = 0; 3511 3512 /* Arguments with alignment larger than 8 bytes start at the next even 3513 boundary. */ 3514 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3515 : (words > 1)) 3516 && (cum->words & 1)) 3517 offset = 1; 3518 3519 /* If all argument slots are used, then it must go on the stack. */ 3520 if (cum->words + offset >= MAX_ARGUMENT_SLOTS) 3521 return 0; 3522 3523 /* It doesn't matter whether the argument goes in FR or GR regs. If 3524 it fits within the 8 argument slots, then it goes entirely in 3525 registers. If it extends past the last argument slot, then the rest 3526 goes on the stack. */ 3527 3528 if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) 3529 return 0; 3530 3531 return MAX_ARGUMENT_SLOTS - cum->words - offset; 3532} 3533 3534/* Update CUM to point after this argument. This is patterned after 3535 ia64_function_arg. */ 3536 3537void 3538ia64_function_arg_advance (cum, mode, type, named) 3539 CUMULATIVE_ARGS *cum; 3540 enum machine_mode mode; 3541 tree type; 3542 int named; 3543{ 3544 int words = (((mode == BLKmode ? int_size_in_bytes (type) 3545 : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) 3546 / UNITS_PER_WORD); 3547 int offset = 0; 3548 enum machine_mode hfa_mode = VOIDmode; 3549 3550 /* If all arg slots are already full, then there is nothing to do. */ 3551 if (cum->words >= MAX_ARGUMENT_SLOTS) 3552 return; 3553 3554 /* Arguments with alignment larger than 8 bytes start at the next even 3555 boundary. */ 3556 if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3557 : (words > 1)) 3558 && (cum->words & 1)) 3559 offset = 1; 3560 3561 cum->words += words + offset; 3562 3563 /* Check for and handle homogeneous FP aggregates. */ 3564 if (type) 3565 hfa_mode = hfa_element_mode (type, 0); 3566 3567 /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas 3568 and unprototyped hfas are passed specially. */ 3569 if (hfa_mode != VOIDmode && (! cum->prototype || named)) 3570 { 3571 int fp_regs = cum->fp_regs; 3572 /* This is the original value of cum->words + offset. */ 3573 int int_regs = cum->words - words; 3574 int hfa_size = GET_MODE_SIZE (hfa_mode); 3575 int byte_size; 3576 int args_byte_size; 3577 3578 /* If prototyped, pass it in FR regs then GR regs. 3579 If not prototyped, pass it in both FR and GR regs. 3580 3581 If this is an SFmode aggregate, then it is possible to run out of 3582 FR regs while GR regs are still left. In that case, we pass the 3583 remaining part in the GR regs. */ 3584 3585 /* Fill the FP regs. We do this always. We stop if we reach the end 3586 of the argument, the last FP register, or the last argument slot. */ 3587 3588 byte_size = ((mode == BLKmode) 3589 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3590 args_byte_size = int_regs * UNITS_PER_WORD; 3591 offset = 0; 3592 for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS 3593 && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) 3594 { 3595 offset += hfa_size; 3596 args_byte_size += hfa_size; 3597 fp_regs++; 3598 } 3599 3600 cum->fp_regs = fp_regs; 3601 } 3602 3603 /* Integral and aggregates go in general registers. If we have run out of 3604 FR registers, then FP values must also go in general registers. This can 3605 happen when we have a SFmode HFA. */ 3606 else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) 3607 cum->int_regs = cum->words; 3608 3609 /* If there is a prototype, then FP values go in a FR register when 3610 named, and in a GR registeer when unnamed. */ 3611 else if (cum->prototype) 3612 { 3613 if (! named) 3614 cum->int_regs = cum->words; 3615 else 3616 /* ??? Complex types should not reach here. */ 3617 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3618 } 3619 /* If there is no prototype, then FP values go in both FR and GR 3620 registers. */ 3621 else 3622 { 3623 /* ??? Complex types should not reach here. */ 3624 cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); 3625 cum->int_regs = cum->words; 3626 } 3627} 3628 3629/* Variable sized types are passed by reference. */ 3630/* ??? At present this is a GCC extension to the IA-64 ABI. */ 3631 3632int 3633ia64_function_arg_pass_by_reference (cum, mode, type, named) 3634 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED; 3635 enum machine_mode mode ATTRIBUTE_UNUSED; 3636 tree type; 3637 int named ATTRIBUTE_UNUSED; 3638{ 3639 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 3640} 3641 3642 3643/* Implement va_arg. */ 3644 3645rtx 3646ia64_va_arg (valist, type) 3647 tree valist, type; 3648{ 3649 tree t; 3650 3651 /* Variable sized types are passed by reference. */ 3652 if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 3653 { 3654 rtx addr = std_expand_builtin_va_arg (valist, build_pointer_type (type)); 3655 return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr)); 3656 } 3657 3658 /* Arguments with alignment larger than 8 bytes start at the next even 3659 boundary. */ 3660 if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) 3661 { 3662 t = build (PLUS_EXPR, TREE_TYPE (valist), valist, 3663 build_int_2 (2 * UNITS_PER_WORD - 1, 0)); 3664 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, 3665 build_int_2 (-2 * UNITS_PER_WORD, -1)); 3666 t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); 3667 TREE_SIDE_EFFECTS (t) = 1; 3668 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 3669 } 3670 3671 return std_expand_builtin_va_arg (valist, type); 3672} 3673 3674/* Return 1 if function return value returned in memory. Return 0 if it is 3675 in a register. */ 3676 3677int 3678ia64_return_in_memory (valtype) 3679 tree valtype; 3680{ 3681 enum machine_mode mode; 3682 enum machine_mode hfa_mode; 3683 HOST_WIDE_INT byte_size; 3684 3685 mode = TYPE_MODE (valtype); 3686 byte_size = GET_MODE_SIZE (mode); 3687 if (mode == BLKmode) 3688 { 3689 byte_size = int_size_in_bytes (valtype); 3690 if (byte_size < 0) 3691 return 1; 3692 } 3693 3694 /* Hfa's with up to 8 elements are returned in the FP argument registers. */ 3695 3696 hfa_mode = hfa_element_mode (valtype, 0); 3697 if (hfa_mode != VOIDmode) 3698 { 3699 int hfa_size = GET_MODE_SIZE (hfa_mode); 3700 3701 if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) 3702 return 1; 3703 else 3704 return 0; 3705 } 3706 else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) 3707 return 1; 3708 else 3709 return 0; 3710} 3711 3712/* Return rtx for register that holds the function return value. */ 3713 3714rtx 3715ia64_function_value (valtype, func) 3716 tree valtype; 3717 tree func ATTRIBUTE_UNUSED; 3718{ 3719 enum machine_mode mode; 3720 enum machine_mode hfa_mode; 3721 3722 mode = TYPE_MODE (valtype); 3723 hfa_mode = hfa_element_mode (valtype, 0); 3724 3725 if (hfa_mode != VOIDmode) 3726 { 3727 rtx loc[8]; 3728 int i; 3729 int hfa_size; 3730 int byte_size; 3731 int offset; 3732 3733 hfa_size = GET_MODE_SIZE (hfa_mode); 3734 byte_size = ((mode == BLKmode) 3735 ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); 3736 offset = 0; 3737 for (i = 0; offset < byte_size; i++) 3738 { 3739 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3740 gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), 3741 GEN_INT (offset)); 3742 offset += hfa_size; 3743 } 3744 3745 if (i == 1) 3746 return XEXP (loc[0], 0); 3747 else 3748 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3749 } 3750 else if (FLOAT_TYPE_P (valtype) && 3751 ((mode != TFmode) || INTEL_EXTENDED_IEEE_FORMAT)) 3752 return gen_rtx_REG (mode, FR_ARG_FIRST); 3753 else 3754 { 3755 if (BYTES_BIG_ENDIAN 3756 && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype)))) 3757 { 3758 rtx loc[8]; 3759 int offset; 3760 int bytesize; 3761 int i; 3762 3763 offset = 0; 3764 bytesize = int_size_in_bytes (valtype); 3765 for (i = 0; offset < bytesize; i++) 3766 { 3767 loc[i] = gen_rtx_EXPR_LIST (VOIDmode, 3768 gen_rtx_REG (DImode, 3769 GR_RET_FIRST + i), 3770 GEN_INT (offset)); 3771 offset += UNITS_PER_WORD; 3772 } 3773 return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); 3774 } 3775 else 3776 return gen_rtx_REG (mode, GR_RET_FIRST); 3777 } 3778} 3779 3780/* Print a memory address as an operand to reference that memory location. */ 3781 3782/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps 3783 also call this from ia64_print_operand for memory addresses. */ 3784 3785void 3786ia64_print_operand_address (stream, address) 3787 FILE * stream ATTRIBUTE_UNUSED; 3788 rtx address ATTRIBUTE_UNUSED; 3789{ 3790} 3791 3792/* Print an operand to an assembler instruction. 3793 C Swap and print a comparison operator. 3794 D Print an FP comparison operator. 3795 E Print 32 - constant, for SImode shifts as extract. 3796 e Print 64 - constant, for DImode rotates. 3797 F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or 3798 a floating point register emitted normally. 3799 I Invert a predicate register by adding 1. 3800 J Select the proper predicate register for a condition. 3801 j Select the inverse predicate register for a condition. 3802 O Append .acq for volatile load. 3803 P Postincrement of a MEM. 3804 Q Append .rel for volatile store. 3805 S Shift amount for shladd instruction. 3806 T Print an 8-bit sign extended number (K) as a 32-bit unsigned number 3807 for Intel assembler. 3808 U Print an 8-bit sign extended number (K) as a 64-bit unsigned number 3809 for Intel assembler. 3810 r Print register name, or constant 0 as r0. HP compatibility for 3811 Linux kernel. */ 3812void 3813ia64_print_operand (file, x, code) 3814 FILE * file; 3815 rtx x; 3816 int code; 3817{ 3818 const char *str; 3819 3820 switch (code) 3821 { 3822 case 0: 3823 /* Handled below. */ 3824 break; 3825 3826 case 'C': 3827 { 3828 enum rtx_code c = swap_condition (GET_CODE (x)); 3829 fputs (GET_RTX_NAME (c), file); 3830 return; 3831 } 3832 3833 case 'D': 3834 switch (GET_CODE (x)) 3835 { 3836 case NE: 3837 str = "neq"; 3838 break; 3839 case UNORDERED: 3840 str = "unord"; 3841 break; 3842 case ORDERED: 3843 str = "ord"; 3844 break; 3845 default: 3846 str = GET_RTX_NAME (GET_CODE (x)); 3847 break; 3848 } 3849 fputs (str, file); 3850 return; 3851 3852 case 'E': 3853 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); 3854 return; 3855 3856 case 'e': 3857 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x)); 3858 return; 3859 3860 case 'F': 3861 if (x == CONST0_RTX (GET_MODE (x))) 3862 str = reg_names [FR_REG (0)]; 3863 else if (x == CONST1_RTX (GET_MODE (x))) 3864 str = reg_names [FR_REG (1)]; 3865 else if (GET_CODE (x) == REG) 3866 str = reg_names [REGNO (x)]; 3867 else 3868 abort (); 3869 fputs (str, file); 3870 return; 3871 3872 case 'I': 3873 fputs (reg_names [REGNO (x) + 1], file); 3874 return; 3875 3876 case 'J': 3877 case 'j': 3878 { 3879 unsigned int regno = REGNO (XEXP (x, 0)); 3880 if (GET_CODE (x) == EQ) 3881 regno += 1; 3882 if (code == 'j') 3883 regno ^= 1; 3884 fputs (reg_names [regno], file); 3885 } 3886 return; 3887 3888 case 'O': 3889 if (MEM_VOLATILE_P (x)) 3890 fputs(".acq", file); 3891 return; 3892 3893 case 'P': 3894 { 3895 HOST_WIDE_INT value; 3896 3897 switch (GET_CODE (XEXP (x, 0))) 3898 { 3899 default: 3900 return; 3901 3902 case POST_MODIFY: 3903 x = XEXP (XEXP (XEXP (x, 0), 1), 1); 3904 if (GET_CODE (x) == CONST_INT) 3905 value = INTVAL (x); 3906 else if (GET_CODE (x) == REG) 3907 { 3908 fprintf (file, ", %s", reg_names[REGNO (x)]); 3909 return; 3910 } 3911 else 3912 abort (); 3913 break; 3914 3915 case POST_INC: 3916 value = GET_MODE_SIZE (GET_MODE (x)); 3917 break; 3918 3919 case POST_DEC: 3920 value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); 3921 break; 3922 } 3923 3924 putc (',', file); 3925 putc (' ', file); 3926 fprintf (file, HOST_WIDE_INT_PRINT_DEC, value); 3927 return; 3928 } 3929 3930 case 'Q': 3931 if (MEM_VOLATILE_P (x)) 3932 fputs(".rel", file); 3933 return; 3934 3935 case 'S': 3936 fprintf (file, "%d", exact_log2 (INTVAL (x))); 3937 return; 3938 3939 case 'T': 3940 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3941 { 3942 fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); 3943 return; 3944 } 3945 break; 3946 3947 case 'U': 3948 if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) 3949 { 3950 const char *prefix = "0x"; 3951 if (INTVAL (x) & 0x80000000) 3952 { 3953 fprintf (file, "0xffffffff"); 3954 prefix = ""; 3955 } 3956 fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); 3957 return; 3958 } 3959 break; 3960 3961 case 'r': 3962 /* If this operand is the constant zero, write it as register zero. 3963 Any register, zero, or CONST_INT value is OK here. */ 3964 if (GET_CODE (x) == REG) 3965 fputs (reg_names[REGNO (x)], file); 3966 else if (x == CONST0_RTX (GET_MODE (x))) 3967 fputs ("r0", file); 3968 else if (GET_CODE (x) == CONST_INT) 3969 output_addr_const (file, x); 3970 else 3971 output_operand_lossage ("invalid %%r value"); 3972 return; 3973 3974 case '+': 3975 { 3976 const char *which; 3977 3978 /* For conditional branches, returns or calls, substitute 3979 sptk, dptk, dpnt, or spnt for %s. */ 3980 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 3981 if (x) 3982 { 3983 int pred_val = INTVAL (XEXP (x, 0)); 3984 3985 /* Guess top and bottom 10% statically predicted. */ 3986 if (pred_val < REG_BR_PROB_BASE / 50) 3987 which = ".spnt"; 3988 else if (pred_val < REG_BR_PROB_BASE / 2) 3989 which = ".dpnt"; 3990 else if (pred_val < REG_BR_PROB_BASE / 100 * 98) 3991 which = ".dptk"; 3992 else 3993 which = ".sptk"; 3994 } 3995 else if (GET_CODE (current_output_insn) == CALL_INSN) 3996 which = ".sptk"; 3997 else 3998 which = ".dptk"; 3999 4000 fputs (which, file); 4001 return; 4002 } 4003 4004 case ',': 4005 x = current_insn_predicate; 4006 if (x) 4007 { 4008 unsigned int regno = REGNO (XEXP (x, 0)); 4009 if (GET_CODE (x) == EQ) 4010 regno += 1; 4011 fprintf (file, "(%s) ", reg_names [regno]); 4012 } 4013 return; 4014 4015 default: 4016 output_operand_lossage ("ia64_print_operand: unknown code"); 4017 return; 4018 } 4019 4020 switch (GET_CODE (x)) 4021 { 4022 /* This happens for the spill/restore instructions. */ 4023 case POST_INC: 4024 case POST_DEC: 4025 case POST_MODIFY: 4026 x = XEXP (x, 0); 4027 /* ... fall through ... */ 4028 4029 case REG: 4030 fputs (reg_names [REGNO (x)], file); 4031 break; 4032 4033 case MEM: 4034 { 4035 rtx addr = XEXP (x, 0); 4036 if (GET_RTX_CLASS (GET_CODE (addr)) == 'a') 4037 addr = XEXP (addr, 0); 4038 fprintf (file, "[%s]", reg_names [REGNO (addr)]); 4039 break; 4040 } 4041 4042 default: 4043 output_addr_const (file, x); 4044 break; 4045 } 4046 4047 return; 4048} 4049 4050/* Calulate the cost of moving data from a register in class FROM to 4051 one in class TO, using MODE. */ 4052 4053int 4054ia64_register_move_cost (mode, from, to) 4055 enum machine_mode mode; 4056 enum reg_class from, to; 4057{ 4058 /* ADDL_REGS is the same as GR_REGS for movement purposes. */ 4059 if (to == ADDL_REGS) 4060 to = GR_REGS; 4061 if (from == ADDL_REGS) 4062 from = GR_REGS; 4063 4064 /* All costs are symmetric, so reduce cases by putting the 4065 lower number class as the destination. */ 4066 if (from < to) 4067 { 4068 enum reg_class tmp = to; 4069 to = from, from = tmp; 4070 } 4071 4072 /* Moving from FR<->GR in TFmode must be more expensive than 2, 4073 so that we get secondary memory reloads. Between FR_REGS, 4074 we have to make this at least as expensive as MEMORY_MOVE_COST 4075 to avoid spectacularly poor register class preferencing. */ 4076 if (mode == TFmode) 4077 { 4078 if (to != GR_REGS || from != GR_REGS) 4079 return MEMORY_MOVE_COST (mode, to, 0); 4080 else 4081 return 3; 4082 } 4083 4084 switch (to) 4085 { 4086 case PR_REGS: 4087 /* Moving between PR registers takes two insns. */ 4088 if (from == PR_REGS) 4089 return 3; 4090 /* Moving between PR and anything but GR is impossible. */ 4091 if (from != GR_REGS) 4092 return MEMORY_MOVE_COST (mode, to, 0); 4093 break; 4094 4095 case BR_REGS: 4096 /* Moving between BR and anything but GR is impossible. */ 4097 if (from != GR_REGS && from != GR_AND_BR_REGS) 4098 return MEMORY_MOVE_COST (mode, to, 0); 4099 break; 4100 4101 case AR_I_REGS: 4102 case AR_M_REGS: 4103 /* Moving between AR and anything but GR is impossible. */ 4104 if (from != GR_REGS) 4105 return MEMORY_MOVE_COST (mode, to, 0); 4106 break; 4107 4108 case GR_REGS: 4109 case FR_REGS: 4110 case GR_AND_FR_REGS: 4111 case GR_AND_BR_REGS: 4112 case ALL_REGS: 4113 break; 4114 4115 default: 4116 abort (); 4117 } 4118 4119 return 2; 4120} 4121 4122/* This function returns the register class required for a secondary 4123 register when copying between one of the registers in CLASS, and X, 4124 using MODE. A return value of NO_REGS means that no secondary register 4125 is required. */ 4126 4127enum reg_class 4128ia64_secondary_reload_class (class, mode, x) 4129 enum reg_class class; 4130 enum machine_mode mode ATTRIBUTE_UNUSED; 4131 rtx x; 4132{ 4133 int regno = -1; 4134 4135 if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 4136 regno = true_regnum (x); 4137 4138 switch (class) 4139 { 4140 case BR_REGS: 4141 case AR_M_REGS: 4142 case AR_I_REGS: 4143 /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global 4144 interaction. We end up with two pseudos with overlapping lifetimes 4145 both of which are equiv to the same constant, and both which need 4146 to be in BR_REGS. This seems to be a cse bug. cse_basic_block_end 4147 changes depending on the path length, which means the qty_first_reg 4148 check in make_regs_eqv can give different answers at different times. 4149 At some point I'll probably need a reload_indi pattern to handle 4150 this. 4151 4152 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we 4153 wound up with a FP register from GR_AND_FR_REGS. Extend that to all 4154 non-general registers for good measure. */ 4155 if (regno >= 0 && ! GENERAL_REGNO_P (regno)) 4156 return GR_REGS; 4157 4158 /* This is needed if a pseudo used as a call_operand gets spilled to a 4159 stack slot. */ 4160 if (GET_CODE (x) == MEM) 4161 return GR_REGS; 4162 break; 4163 4164 case FR_REGS: 4165 /* Need to go through general regsters to get to other class regs. */ 4166 if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno))) 4167 return GR_REGS; 4168 4169 /* This can happen when a paradoxical subreg is an operand to the 4170 muldi3 pattern. */ 4171 /* ??? This shouldn't be necessary after instruction scheduling is 4172 enabled, because paradoxical subregs are not accepted by 4173 register_operand when INSN_SCHEDULING is defined. Or alternatively, 4174 stop the paradoxical subreg stupidity in the *_operand functions 4175 in recog.c. */ 4176 if (GET_CODE (x) == MEM 4177 && (GET_MODE (x) == SImode || GET_MODE (x) == HImode 4178 || GET_MODE (x) == QImode)) 4179 return GR_REGS; 4180 4181 /* This can happen because of the ior/and/etc patterns that accept FP 4182 registers as operands. If the third operand is a constant, then it 4183 needs to be reloaded into a FP register. */ 4184 if (GET_CODE (x) == CONST_INT) 4185 return GR_REGS; 4186 4187 /* This can happen because of register elimination in a muldi3 insn. 4188 E.g. `26107 * (unsigned long)&u'. */ 4189 if (GET_CODE (x) == PLUS) 4190 return GR_REGS; 4191 break; 4192 4193 case PR_REGS: 4194 /* ??? This happens if we cse/gcse a BImode value across a call, 4195 and the function has a nonlocal goto. This is because global 4196 does not allocate call crossing pseudos to hard registers when 4197 current_function_has_nonlocal_goto is true. This is relatively 4198 common for C++ programs that use exceptions. To reproduce, 4199 return NO_REGS and compile libstdc++. */ 4200 if (GET_CODE (x) == MEM) 4201 return GR_REGS; 4202 4203 /* This can happen when we take a BImode subreg of a DImode value, 4204 and that DImode value winds up in some non-GR register. */ 4205 if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno)) 4206 return GR_REGS; 4207 break; 4208 4209 case GR_REGS: 4210 /* Since we have no offsettable memory addresses, we need a temporary 4211 to hold the address of the second word. */ 4212 if (mode == TImode) 4213 return GR_REGS; 4214 break; 4215 4216 default: 4217 break; 4218 } 4219 4220 return NO_REGS; 4221} 4222 4223/* Emit text to declare externally defined variables and functions, because 4224 the Intel assembler does not support undefined externals. */ 4225 4226void 4227ia64_asm_output_external (file, decl, name) 4228 FILE *file; 4229 tree decl; 4230 const char *name; 4231{ 4232 int save_referenced; 4233 4234 /* GNU as does not need anything here, but the HP linker does need 4235 something for external functions. */ 4236 4237 if (TARGET_GNU_AS 4238 && (!TARGET_HPUX_LD 4239 || TREE_CODE (decl) != FUNCTION_DECL 4240 || strstr(name, "__builtin_") == name)) 4241 return; 4242 4243 /* ??? The Intel assembler creates a reference that needs to be satisfied by 4244 the linker when we do this, so we need to be careful not to do this for 4245 builtin functions which have no library equivalent. Unfortunately, we 4246 can't tell here whether or not a function will actually be called by 4247 expand_expr, so we pull in library functions even if we may not need 4248 them later. */ 4249 if (! strcmp (name, "__builtin_next_arg") 4250 || ! strcmp (name, "alloca") 4251 || ! strcmp (name, "__builtin_constant_p") 4252 || ! strcmp (name, "__builtin_args_info")) 4253 return; 4254 4255 if (TARGET_HPUX_LD) 4256 ia64_hpux_add_extern_decl (name); 4257 else 4258 { 4259 /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and 4260 restore it. */ 4261 save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); 4262 if (TREE_CODE (decl) == FUNCTION_DECL) 4263 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 4264 (*targetm.asm_out.globalize_label) (file, name); 4265 TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; 4266 } 4267} 4268 4269/* Parse the -mfixed-range= option string. */ 4270 4271static void 4272fix_range (const_str) 4273 const char *const_str; 4274{ 4275 int i, first, last; 4276 char *str, *dash, *comma; 4277 4278 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 4279 REG2 are either register names or register numbers. The effect 4280 of this option is to mark the registers in the range from REG1 to 4281 REG2 as ``fixed'' so they won't be used by the compiler. This is 4282 used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ 4283 4284 i = strlen (const_str); 4285 str = (char *) alloca (i + 1); 4286 memcpy (str, const_str, i + 1); 4287 4288 while (1) 4289 { 4290 dash = strchr (str, '-'); 4291 if (!dash) 4292 { 4293 warning ("value of -mfixed-range must have form REG1-REG2"); 4294 return; 4295 } 4296 *dash = '\0'; 4297 4298 comma = strchr (dash + 1, ','); 4299 if (comma) 4300 *comma = '\0'; 4301 4302 first = decode_reg_name (str); 4303 if (first < 0) 4304 { 4305 warning ("unknown register name: %s", str); 4306 return; 4307 } 4308 4309 last = decode_reg_name (dash + 1); 4310 if (last < 0) 4311 { 4312 warning ("unknown register name: %s", dash + 1); 4313 return; 4314 } 4315 4316 *dash = '-'; 4317 4318 if (first > last) 4319 { 4320 warning ("%s-%s is an empty range", str, dash + 1); 4321 return; 4322 } 4323 4324 for (i = first; i <= last; ++i) 4325 fixed_regs[i] = call_used_regs[i] = 1; 4326 4327 if (!comma) 4328 break; 4329 4330 *comma = ','; 4331 str = comma + 1; 4332 } 4333} 4334 4335static struct machine_function * 4336ia64_init_machine_status () 4337{ 4338 return ggc_alloc_cleared (sizeof (struct machine_function)); 4339} 4340 4341/* Handle TARGET_OPTIONS switches. */ 4342 4343void 4344ia64_override_options () 4345{ 4346 if (TARGET_AUTO_PIC) 4347 target_flags |= MASK_CONST_GP; 4348 4349 if (TARGET_INLINE_FLOAT_DIV_LAT && TARGET_INLINE_FLOAT_DIV_THR) 4350 { 4351 warning ("cannot optimize floating point division for both latency and throughput"); 4352 target_flags &= ~MASK_INLINE_FLOAT_DIV_THR; 4353 } 4354 4355 if (TARGET_INLINE_INT_DIV_LAT && TARGET_INLINE_INT_DIV_THR) 4356 { 4357 warning ("cannot optimize integer division for both latency and throughput"); 4358 target_flags &= ~MASK_INLINE_INT_DIV_THR; 4359 } 4360 4361 if (ia64_fixed_range_string) 4362 fix_range (ia64_fixed_range_string); 4363 4364 if (ia64_tls_size_string) 4365 { 4366 char *end; 4367 unsigned long tmp = strtoul (ia64_tls_size_string, &end, 10); 4368 if (*end || (tmp != 14 && tmp != 22 && tmp != 64)) 4369 error ("bad value (%s) for -mtls-size= switch", ia64_tls_size_string); 4370 else 4371 ia64_tls_size = tmp; 4372 } 4373 4374 ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload; 4375 flag_schedule_insns_after_reload = 0; 4376 4377 ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; 4378 4379 init_machine_status = ia64_init_machine_status; 4380 4381 /* Tell the compiler which flavor of TFmode we're using. */ 4382 if (INTEL_EXTENDED_IEEE_FORMAT) 4383 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format; 4384} 4385 4386static enum attr_itanium_requires_unit0 ia64_safe_itanium_requires_unit0 PARAMS((rtx)); 4387static enum attr_itanium_class ia64_safe_itanium_class PARAMS((rtx)); 4388static enum attr_type ia64_safe_type PARAMS((rtx)); 4389 4390static enum attr_itanium_requires_unit0 4391ia64_safe_itanium_requires_unit0 (insn) 4392 rtx insn; 4393{ 4394 if (recog_memoized (insn) >= 0) 4395 return get_attr_itanium_requires_unit0 (insn); 4396 else 4397 return ITANIUM_REQUIRES_UNIT0_NO; 4398} 4399 4400static enum attr_itanium_class 4401ia64_safe_itanium_class (insn) 4402 rtx insn; 4403{ 4404 if (recog_memoized (insn) >= 0) 4405 return get_attr_itanium_class (insn); 4406 else 4407 return ITANIUM_CLASS_UNKNOWN; 4408} 4409 4410static enum attr_type 4411ia64_safe_type (insn) 4412 rtx insn; 4413{ 4414 if (recog_memoized (insn) >= 0) 4415 return get_attr_type (insn); 4416 else 4417 return TYPE_UNKNOWN; 4418} 4419 4420/* The following collection of routines emit instruction group stop bits as 4421 necessary to avoid dependencies. */ 4422 4423/* Need to track some additional registers as far as serialization is 4424 concerned so we can properly handle br.call and br.ret. We could 4425 make these registers visible to gcc, but since these registers are 4426 never explicitly used in gcc generated code, it seems wasteful to 4427 do so (plus it would make the call and return patterns needlessly 4428 complex). */ 4429#define REG_GP (GR_REG (1)) 4430#define REG_RP (BR_REG (0)) 4431#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) 4432/* This is used for volatile asms which may require a stop bit immediately 4433 before and after them. */ 4434#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) 4435#define AR_UNAT_BIT_0 (FIRST_PSEUDO_REGISTER + 3) 4436#define NUM_REGS (AR_UNAT_BIT_0 + 64) 4437 4438/* For each register, we keep track of how it has been written in the 4439 current instruction group. 4440 4441 If a register is written unconditionally (no qualifying predicate), 4442 WRITE_COUNT is set to 2 and FIRST_PRED is ignored. 4443 4444 If a register is written if its qualifying predicate P is true, we 4445 set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register 4446 may be written again by the complement of P (P^1) and when this happens, 4447 WRITE_COUNT gets set to 2. 4448 4449 The result of this is that whenever an insn attempts to write a register 4450 whose WRITE_COUNT is two, we need to issue an insn group barrier first. 4451 4452 If a predicate register is written by a floating-point insn, we set 4453 WRITTEN_BY_FP to true. 4454 4455 If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND 4456 to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true. */ 4457 4458struct reg_write_state 4459{ 4460 unsigned int write_count : 2; 4461 unsigned int first_pred : 16; 4462 unsigned int written_by_fp : 1; 4463 unsigned int written_by_and : 1; 4464 unsigned int written_by_or : 1; 4465}; 4466 4467/* Cumulative info for the current instruction group. */ 4468struct reg_write_state rws_sum[NUM_REGS]; 4469/* Info for the current instruction. This gets copied to rws_sum after a 4470 stop bit is emitted. */ 4471struct reg_write_state rws_insn[NUM_REGS]; 4472 4473/* Indicates whether this is the first instruction after a stop bit, 4474 in which case we don't need another stop bit. Without this, we hit 4475 the abort in ia64_variable_issue when scheduling an alloc. */ 4476static int first_instruction; 4477 4478/* Misc flags needed to compute RAW/WAW dependencies while we are traversing 4479 RTL for one instruction. */ 4480struct reg_flags 4481{ 4482 unsigned int is_write : 1; /* Is register being written? */ 4483 unsigned int is_fp : 1; /* Is register used as part of an fp op? */ 4484 unsigned int is_branch : 1; /* Is register used as part of a branch? */ 4485 unsigned int is_and : 1; /* Is register used as part of and.orcm? */ 4486 unsigned int is_or : 1; /* Is register used as part of or.andcm? */ 4487 unsigned int is_sibcall : 1; /* Is this a sibling or normal call? */ 4488}; 4489 4490static void rws_update PARAMS ((struct reg_write_state *, int, 4491 struct reg_flags, int)); 4492static int rws_access_regno PARAMS ((int, struct reg_flags, int)); 4493static int rws_access_reg PARAMS ((rtx, struct reg_flags, int)); 4494static void update_set_flags PARAMS ((rtx, struct reg_flags *, int *, rtx *)); 4495static int set_src_needs_barrier PARAMS ((rtx, struct reg_flags, int, rtx)); 4496static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int)); 4497static void init_insn_group_barriers PARAMS ((void)); 4498static int group_barrier_needed_p PARAMS ((rtx)); 4499static int safe_group_barrier_needed_p PARAMS ((rtx)); 4500 4501/* Update *RWS for REGNO, which is being written by the current instruction, 4502 with predicate PRED, and associated register flags in FLAGS. */ 4503 4504static void 4505rws_update (rws, regno, flags, pred) 4506 struct reg_write_state *rws; 4507 int regno; 4508 struct reg_flags flags; 4509 int pred; 4510{ 4511 if (pred) 4512 rws[regno].write_count++; 4513 else 4514 rws[regno].write_count = 2; 4515 rws[regno].written_by_fp |= flags.is_fp; 4516 /* ??? Not tracking and/or across differing predicates. */ 4517 rws[regno].written_by_and = flags.is_and; 4518 rws[regno].written_by_or = flags.is_or; 4519 rws[regno].first_pred = pred; 4520} 4521 4522/* Handle an access to register REGNO of type FLAGS using predicate register 4523 PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates 4524 a dependency with an earlier instruction in the same group. */ 4525 4526static int 4527rws_access_regno (regno, flags, pred) 4528 int regno; 4529 struct reg_flags flags; 4530 int pred; 4531{ 4532 int need_barrier = 0; 4533 4534 if (regno >= NUM_REGS) 4535 abort (); 4536 4537 if (! PR_REGNO_P (regno)) 4538 flags.is_and = flags.is_or = 0; 4539 4540 if (flags.is_write) 4541 { 4542 int write_count; 4543 4544 /* One insn writes same reg multiple times? */ 4545 if (rws_insn[regno].write_count > 0) 4546 abort (); 4547 4548 /* Update info for current instruction. */ 4549 rws_update (rws_insn, regno, flags, pred); 4550 write_count = rws_sum[regno].write_count; 4551 4552 switch (write_count) 4553 { 4554 case 0: 4555 /* The register has not been written yet. */ 4556 rws_update (rws_sum, regno, flags, pred); 4557 break; 4558 4559 case 1: 4560 /* The register has been written via a predicate. If this is 4561 not a complementary predicate, then we need a barrier. */ 4562 /* ??? This assumes that P and P+1 are always complementary 4563 predicates for P even. */ 4564 if (flags.is_and && rws_sum[regno].written_by_and) 4565 ; 4566 else if (flags.is_or && rws_sum[regno].written_by_or) 4567 ; 4568 else if ((rws_sum[regno].first_pred ^ 1) != pred) 4569 need_barrier = 1; 4570 rws_update (rws_sum, regno, flags, pred); 4571 break; 4572 4573 case 2: 4574 /* The register has been unconditionally written already. We 4575 need a barrier. */ 4576 if (flags.is_and && rws_sum[regno].written_by_and) 4577 ; 4578 else if (flags.is_or && rws_sum[regno].written_by_or) 4579 ; 4580 else 4581 need_barrier = 1; 4582 rws_sum[regno].written_by_and = flags.is_and; 4583 rws_sum[regno].written_by_or = flags.is_or; 4584 break; 4585 4586 default: 4587 abort (); 4588 } 4589 } 4590 else 4591 { 4592 if (flags.is_branch) 4593 { 4594 /* Branches have several RAW exceptions that allow to avoid 4595 barriers. */ 4596 4597 if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) 4598 /* RAW dependencies on branch regs are permissible as long 4599 as the writer is a non-branch instruction. Since we 4600 never generate code that uses a branch register written 4601 by a branch instruction, handling this case is 4602 easy. */ 4603 return 0; 4604 4605 if (REGNO_REG_CLASS (regno) == PR_REGS 4606 && ! rws_sum[regno].written_by_fp) 4607 /* The predicates of a branch are available within the 4608 same insn group as long as the predicate was written by 4609 something other than a floating-point instruction. */ 4610 return 0; 4611 } 4612 4613 if (flags.is_and && rws_sum[regno].written_by_and) 4614 return 0; 4615 if (flags.is_or && rws_sum[regno].written_by_or) 4616 return 0; 4617 4618 switch (rws_sum[regno].write_count) 4619 { 4620 case 0: 4621 /* The register has not been written yet. */ 4622 break; 4623 4624 case 1: 4625 /* The register has been written via a predicate. If this is 4626 not a complementary predicate, then we need a barrier. */ 4627 /* ??? This assumes that P and P+1 are always complementary 4628 predicates for P even. */ 4629 if ((rws_sum[regno].first_pred ^ 1) != pred) 4630 need_barrier = 1; 4631 break; 4632 4633 case 2: 4634 /* The register has been unconditionally written already. We 4635 need a barrier. */ 4636 need_barrier = 1; 4637 break; 4638 4639 default: 4640 abort (); 4641 } 4642 } 4643 4644 return need_barrier; 4645} 4646 4647static int 4648rws_access_reg (reg, flags, pred) 4649 rtx reg; 4650 struct reg_flags flags; 4651 int pred; 4652{ 4653 int regno = REGNO (reg); 4654 int n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)); 4655 4656 if (n == 1) 4657 return rws_access_regno (regno, flags, pred); 4658 else 4659 { 4660 int need_barrier = 0; 4661 while (--n >= 0) 4662 need_barrier |= rws_access_regno (regno + n, flags, pred); 4663 return need_barrier; 4664 } 4665} 4666 4667/* Examine X, which is a SET rtx, and update the flags, the predicate, and 4668 the condition, stored in *PFLAGS, *PPRED and *PCOND. */ 4669 4670static void 4671update_set_flags (x, pflags, ppred, pcond) 4672 rtx x; 4673 struct reg_flags *pflags; 4674 int *ppred; 4675 rtx *pcond; 4676{ 4677 rtx src = SET_SRC (x); 4678 4679 *pcond = 0; 4680 4681 switch (GET_CODE (src)) 4682 { 4683 case CALL: 4684 return; 4685 4686 case IF_THEN_ELSE: 4687 if (SET_DEST (x) == pc_rtx) 4688 /* X is a conditional branch. */ 4689 return; 4690 else 4691 { 4692 int is_complemented = 0; 4693 4694 /* X is a conditional move. */ 4695 rtx cond = XEXP (src, 0); 4696 if (GET_CODE (cond) == EQ) 4697 is_complemented = 1; 4698 cond = XEXP (cond, 0); 4699 if (GET_CODE (cond) != REG 4700 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4701 abort (); 4702 *pcond = cond; 4703 if (XEXP (src, 1) == SET_DEST (x) 4704 || XEXP (src, 2) == SET_DEST (x)) 4705 { 4706 /* X is a conditional move that conditionally writes the 4707 destination. */ 4708 4709 /* We need another complement in this case. */ 4710 if (XEXP (src, 1) == SET_DEST (x)) 4711 is_complemented = ! is_complemented; 4712 4713 *ppred = REGNO (cond); 4714 if (is_complemented) 4715 ++*ppred; 4716 } 4717 4718 /* ??? If this is a conditional write to the dest, then this 4719 instruction does not actually read one source. This probably 4720 doesn't matter, because that source is also the dest. */ 4721 /* ??? Multiple writes to predicate registers are allowed 4722 if they are all AND type compares, or if they are all OR 4723 type compares. We do not generate such instructions 4724 currently. */ 4725 } 4726 /* ... fall through ... */ 4727 4728 default: 4729 if (GET_RTX_CLASS (GET_CODE (src)) == '<' 4730 && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) 4731 /* Set pflags->is_fp to 1 so that we know we're dealing 4732 with a floating point comparison when processing the 4733 destination of the SET. */ 4734 pflags->is_fp = 1; 4735 4736 /* Discover if this is a parallel comparison. We only handle 4737 and.orcm and or.andcm at present, since we must retain a 4738 strict inverse on the predicate pair. */ 4739 else if (GET_CODE (src) == AND) 4740 pflags->is_and = 1; 4741 else if (GET_CODE (src) == IOR) 4742 pflags->is_or = 1; 4743 4744 break; 4745 } 4746} 4747 4748/* Subroutine of rtx_needs_barrier; this function determines whether the 4749 source of a given SET rtx found in X needs a barrier. FLAGS and PRED 4750 are as in rtx_needs_barrier. COND is an rtx that holds the condition 4751 for this insn. */ 4752 4753static int 4754set_src_needs_barrier (x, flags, pred, cond) 4755 rtx x; 4756 struct reg_flags flags; 4757 int pred; 4758 rtx cond; 4759{ 4760 int need_barrier = 0; 4761 rtx dst; 4762 rtx src = SET_SRC (x); 4763 4764 if (GET_CODE (src) == CALL) 4765 /* We don't need to worry about the result registers that 4766 get written by subroutine call. */ 4767 return rtx_needs_barrier (src, flags, pred); 4768 else if (SET_DEST (x) == pc_rtx) 4769 { 4770 /* X is a conditional branch. */ 4771 /* ??? This seems redundant, as the caller sets this bit for 4772 all JUMP_INSNs. */ 4773 flags.is_branch = 1; 4774 return rtx_needs_barrier (src, flags, pred); 4775 } 4776 4777 need_barrier = rtx_needs_barrier (src, flags, pred); 4778 4779 /* This instruction unconditionally uses a predicate register. */ 4780 if (cond) 4781 need_barrier |= rws_access_reg (cond, flags, 0); 4782 4783 dst = SET_DEST (x); 4784 if (GET_CODE (dst) == ZERO_EXTRACT) 4785 { 4786 need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); 4787 need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); 4788 dst = XEXP (dst, 0); 4789 } 4790 return need_barrier; 4791} 4792 4793/* Handle an access to rtx X of type FLAGS using predicate register PRED. 4794 Return 1 is this access creates a dependency with an earlier instruction 4795 in the same group. */ 4796 4797static int 4798rtx_needs_barrier (x, flags, pred) 4799 rtx x; 4800 struct reg_flags flags; 4801 int pred; 4802{ 4803 int i, j; 4804 int is_complemented = 0; 4805 int need_barrier = 0; 4806 const char *format_ptr; 4807 struct reg_flags new_flags; 4808 rtx cond = 0; 4809 4810 if (! x) 4811 return 0; 4812 4813 new_flags = flags; 4814 4815 switch (GET_CODE (x)) 4816 { 4817 case SET: 4818 update_set_flags (x, &new_flags, &pred, &cond); 4819 need_barrier = set_src_needs_barrier (x, new_flags, pred, cond); 4820 if (GET_CODE (SET_SRC (x)) != CALL) 4821 { 4822 new_flags.is_write = 1; 4823 need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred); 4824 } 4825 break; 4826 4827 case CALL: 4828 new_flags.is_write = 0; 4829 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 4830 4831 /* Avoid multiple register writes, in case this is a pattern with 4832 multiple CALL rtx. This avoids an abort in rws_access_reg. */ 4833 if (! flags.is_sibcall && ! rws_insn[REG_AR_CFM].write_count) 4834 { 4835 new_flags.is_write = 1; 4836 need_barrier |= rws_access_regno (REG_RP, new_flags, pred); 4837 need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred); 4838 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 4839 } 4840 break; 4841 4842 case COND_EXEC: 4843 /* X is a predicated instruction. */ 4844 4845 cond = COND_EXEC_TEST (x); 4846 if (pred) 4847 abort (); 4848 need_barrier = rtx_needs_barrier (cond, flags, 0); 4849 4850 if (GET_CODE (cond) == EQ) 4851 is_complemented = 1; 4852 cond = XEXP (cond, 0); 4853 if (GET_CODE (cond) != REG 4854 && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) 4855 abort (); 4856 pred = REGNO (cond); 4857 if (is_complemented) 4858 ++pred; 4859 4860 need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); 4861 return need_barrier; 4862 4863 case CLOBBER: 4864 case USE: 4865 /* Clobber & use are for earlier compiler-phases only. */ 4866 break; 4867 4868 case ASM_OPERANDS: 4869 case ASM_INPUT: 4870 /* We always emit stop bits for traditional asms. We emit stop bits 4871 for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ 4872 if (GET_CODE (x) != ASM_OPERANDS 4873 || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) 4874 { 4875 /* Avoid writing the register multiple times if we have multiple 4876 asm outputs. This avoids an abort in rws_access_reg. */ 4877 if (! rws_insn[REG_VOLATILE].write_count) 4878 { 4879 new_flags.is_write = 1; 4880 rws_access_regno (REG_VOLATILE, new_flags, pred); 4881 } 4882 return 1; 4883 } 4884 4885 /* For all ASM_OPERANDS, we must traverse the vector of input operands. 4886 We can not just fall through here since then we would be confused 4887 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate 4888 traditional asms unlike their normal usage. */ 4889 4890 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) 4891 if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) 4892 need_barrier = 1; 4893 break; 4894 4895 case PARALLEL: 4896 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4897 { 4898 rtx pat = XVECEXP (x, 0, i); 4899 if (GET_CODE (pat) == SET) 4900 { 4901 update_set_flags (pat, &new_flags, &pred, &cond); 4902 need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond); 4903 } 4904 else if (GET_CODE (pat) == USE 4905 || GET_CODE (pat) == CALL 4906 || GET_CODE (pat) == ASM_OPERANDS) 4907 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4908 else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN) 4909 abort (); 4910 } 4911 for (i = XVECLEN (x, 0) - 1; i >= 0; --i) 4912 { 4913 rtx pat = XVECEXP (x, 0, i); 4914 if (GET_CODE (pat) == SET) 4915 { 4916 if (GET_CODE (SET_SRC (pat)) != CALL) 4917 { 4918 new_flags.is_write = 1; 4919 need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags, 4920 pred); 4921 } 4922 } 4923 else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN) 4924 need_barrier |= rtx_needs_barrier (pat, flags, pred); 4925 } 4926 break; 4927 4928 case SUBREG: 4929 x = SUBREG_REG (x); 4930 /* FALLTHRU */ 4931 case REG: 4932 if (REGNO (x) == AR_UNAT_REGNUM) 4933 { 4934 for (i = 0; i < 64; ++i) 4935 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred); 4936 } 4937 else 4938 need_barrier = rws_access_reg (x, flags, pred); 4939 break; 4940 4941 case MEM: 4942 /* Find the regs used in memory address computation. */ 4943 new_flags.is_write = 0; 4944 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4945 break; 4946 4947 case CONST_INT: case CONST_DOUBLE: 4948 case SYMBOL_REF: case LABEL_REF: case CONST: 4949 break; 4950 4951 /* Operators with side-effects. */ 4952 case POST_INC: case POST_DEC: 4953 if (GET_CODE (XEXP (x, 0)) != REG) 4954 abort (); 4955 4956 new_flags.is_write = 0; 4957 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4958 new_flags.is_write = 1; 4959 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4960 break; 4961 4962 case POST_MODIFY: 4963 if (GET_CODE (XEXP (x, 0)) != REG) 4964 abort (); 4965 4966 new_flags.is_write = 0; 4967 need_barrier = rws_access_reg (XEXP (x, 0), new_flags, pred); 4968 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4969 new_flags.is_write = 1; 4970 need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred); 4971 break; 4972 4973 /* Handle common unary and binary ops for efficiency. */ 4974 case COMPARE: case PLUS: case MINUS: case MULT: case DIV: 4975 case MOD: case UDIV: case UMOD: case AND: case IOR: 4976 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: 4977 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: 4978 case NE: case EQ: case GE: case GT: case LE: 4979 case LT: case GEU: case GTU: case LEU: case LTU: 4980 need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); 4981 need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); 4982 break; 4983 4984 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: 4985 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: 4986 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: 4987 case SQRT: case FFS: 4988 need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); 4989 break; 4990 4991 case UNSPEC: 4992 switch (XINT (x, 1)) 4993 { 4994 case UNSPEC_LTOFF_DTPMOD: 4995 case UNSPEC_LTOFF_DTPREL: 4996 case UNSPEC_DTPREL: 4997 case UNSPEC_LTOFF_TPREL: 4998 case UNSPEC_TPREL: 4999 case UNSPEC_PRED_REL_MUTEX: 5000 case UNSPEC_PIC_CALL: 5001 case UNSPEC_MF: 5002 case UNSPEC_FETCHADD_ACQ: 5003 case UNSPEC_BSP_VALUE: 5004 case UNSPEC_FLUSHRS: 5005 case UNSPEC_BUNDLE_SELECTOR: 5006 break; 5007 5008 case UNSPEC_GR_SPILL: 5009 case UNSPEC_GR_RESTORE: 5010 { 5011 HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1)); 5012 HOST_WIDE_INT bit = (offset >> 3) & 63; 5013 5014 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 5015 new_flags.is_write = (XINT (x, 1) == 1); 5016 need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit, 5017 new_flags, pred); 5018 break; 5019 } 5020 5021 case UNSPEC_FR_SPILL: 5022 case UNSPEC_FR_RESTORE: 5023 case UNSPEC_POPCNT: 5024 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 5025 break; 5026 5027 case UNSPEC_ADDP4: 5028 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 5029 break; 5030 5031 case UNSPEC_FR_RECIP_APPROX: 5032 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); 5033 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 5034 break; 5035 5036 case UNSPEC_CMPXCHG_ACQ: 5037 need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred); 5038 need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred); 5039 break; 5040 5041 default: 5042 abort (); 5043 } 5044 break; 5045 5046 case UNSPEC_VOLATILE: 5047 switch (XINT (x, 1)) 5048 { 5049 case UNSPECV_ALLOC: 5050 /* Alloc must always be the first instruction of a group. 5051 We force this by always returning true. */ 5052 /* ??? We might get better scheduling if we explicitly check for 5053 input/local/output register dependencies, and modify the 5054 scheduler so that alloc is always reordered to the start of 5055 the current group. We could then eliminate all of the 5056 first_instruction code. */ 5057 rws_access_regno (AR_PFS_REGNUM, flags, pred); 5058 5059 new_flags.is_write = 1; 5060 rws_access_regno (REG_AR_CFM, new_flags, pred); 5061 return 1; 5062 5063 case UNSPECV_SET_BSP: 5064 need_barrier = 1; 5065 break; 5066 5067 case UNSPECV_BLOCKAGE: 5068 case UNSPECV_INSN_GROUP_BARRIER: 5069 case UNSPECV_BREAK: 5070 case UNSPECV_PSAC_ALL: 5071 case UNSPECV_PSAC_NORMAL: 5072 return 0; 5073 5074 default: 5075 abort (); 5076 } 5077 break; 5078 5079 case RETURN: 5080 new_flags.is_write = 0; 5081 need_barrier = rws_access_regno (REG_RP, flags, pred); 5082 need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred); 5083 5084 new_flags.is_write = 1; 5085 need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred); 5086 need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred); 5087 break; 5088 5089 default: 5090 format_ptr = GET_RTX_FORMAT (GET_CODE (x)); 5091 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 5092 switch (format_ptr[i]) 5093 { 5094 case '0': /* unused field */ 5095 case 'i': /* integer */ 5096 case 'n': /* note */ 5097 case 'w': /* wide integer */ 5098 case 's': /* pointer to string */ 5099 case 'S': /* optional pointer to string */ 5100 break; 5101 5102 case 'e': 5103 if (rtx_needs_barrier (XEXP (x, i), flags, pred)) 5104 need_barrier = 1; 5105 break; 5106 5107 case 'E': 5108 for (j = XVECLEN (x, i) - 1; j >= 0; --j) 5109 if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) 5110 need_barrier = 1; 5111 break; 5112 5113 default: 5114 abort (); 5115 } 5116 break; 5117 } 5118 return need_barrier; 5119} 5120 5121/* Clear out the state for group_barrier_needed_p at the start of a 5122 sequence of insns. */ 5123 5124static void 5125init_insn_group_barriers () 5126{ 5127 memset (rws_sum, 0, sizeof (rws_sum)); 5128 first_instruction = 1; 5129} 5130 5131/* Given the current state, recorded by previous calls to this function, 5132 determine whether a group barrier (a stop bit) is necessary before INSN. 5133 Return nonzero if so. */ 5134 5135static int 5136group_barrier_needed_p (insn) 5137 rtx insn; 5138{ 5139 rtx pat; 5140 int need_barrier = 0; 5141 struct reg_flags flags; 5142 5143 memset (&flags, 0, sizeof (flags)); 5144 switch (GET_CODE (insn)) 5145 { 5146 case NOTE: 5147 break; 5148 5149 case BARRIER: 5150 /* A barrier doesn't imply an instruction group boundary. */ 5151 break; 5152 5153 case CODE_LABEL: 5154 memset (rws_insn, 0, sizeof (rws_insn)); 5155 return 1; 5156 5157 case CALL_INSN: 5158 flags.is_branch = 1; 5159 flags.is_sibcall = SIBLING_CALL_P (insn); 5160 memset (rws_insn, 0, sizeof (rws_insn)); 5161 5162 /* Don't bundle a call following another call. */ 5163 if ((pat = prev_active_insn (insn)) 5164 && GET_CODE (pat) == CALL_INSN) 5165 { 5166 need_barrier = 1; 5167 break; 5168 } 5169 5170 need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); 5171 break; 5172 5173 case JUMP_INSN: 5174 flags.is_branch = 1; 5175 5176 /* Don't bundle a jump following a call. */ 5177 if ((pat = prev_active_insn (insn)) 5178 && GET_CODE (pat) == CALL_INSN) 5179 { 5180 need_barrier = 1; 5181 break; 5182 } 5183 /* FALLTHRU */ 5184 5185 case INSN: 5186 if (GET_CODE (PATTERN (insn)) == USE 5187 || GET_CODE (PATTERN (insn)) == CLOBBER) 5188 /* Don't care about USE and CLOBBER "insns"---those are used to 5189 indicate to the optimizer that it shouldn't get rid of 5190 certain operations. */ 5191 break; 5192 5193 pat = PATTERN (insn); 5194 5195 /* Ug. Hack hacks hacked elsewhere. */ 5196 switch (recog_memoized (insn)) 5197 { 5198 /* We play dependency tricks with the epilogue in order 5199 to get proper schedules. Undo this for dv analysis. */ 5200 case CODE_FOR_epilogue_deallocate_stack: 5201 case CODE_FOR_prologue_allocate_stack: 5202 pat = XVECEXP (pat, 0, 0); 5203 break; 5204 5205 /* The pattern we use for br.cloop confuses the code above. 5206 The second element of the vector is representative. */ 5207 case CODE_FOR_doloop_end_internal: 5208 pat = XVECEXP (pat, 0, 1); 5209 break; 5210 5211 /* Doesn't generate code. */ 5212 case CODE_FOR_pred_rel_mutex: 5213 case CODE_FOR_prologue_use: 5214 return 0; 5215 5216 default: 5217 break; 5218 } 5219 5220 memset (rws_insn, 0, sizeof (rws_insn)); 5221 need_barrier = rtx_needs_barrier (pat, flags, 0); 5222 5223 /* Check to see if the previous instruction was a volatile 5224 asm. */ 5225 if (! need_barrier) 5226 need_barrier = rws_access_regno (REG_VOLATILE, flags, 0); 5227 break; 5228 5229 default: 5230 abort (); 5231 } 5232 5233 if (first_instruction) 5234 { 5235 need_barrier = 0; 5236 first_instruction = 0; 5237 } 5238 5239 return need_barrier; 5240} 5241 5242/* Like group_barrier_needed_p, but do not clobber the current state. */ 5243 5244static int 5245safe_group_barrier_needed_p (insn) 5246 rtx insn; 5247{ 5248 struct reg_write_state rws_saved[NUM_REGS]; 5249 int saved_first_instruction; 5250 int t; 5251 5252 memcpy (rws_saved, rws_sum, NUM_REGS * sizeof *rws_saved); 5253 saved_first_instruction = first_instruction; 5254 5255 t = group_barrier_needed_p (insn); 5256 5257 memcpy (rws_sum, rws_saved, NUM_REGS * sizeof *rws_saved); 5258 first_instruction = saved_first_instruction; 5259 5260 return t; 5261} 5262 5263/* INSNS is an chain of instructions. Scan the chain, and insert stop bits 5264 as necessary to eliminate dependendencies. This function assumes that 5265 a final instruction scheduling pass has been run which has already 5266 inserted most of the necessary stop bits. This function only inserts 5267 new ones at basic block boundaries, since these are invisible to the 5268 scheduler. */ 5269 5270static void 5271emit_insn_group_barriers (dump, insns) 5272 FILE *dump; 5273 rtx insns; 5274{ 5275 rtx insn; 5276 rtx last_label = 0; 5277 int insns_since_last_label = 0; 5278 5279 init_insn_group_barriers (); 5280 5281 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5282 { 5283 if (GET_CODE (insn) == CODE_LABEL) 5284 { 5285 if (insns_since_last_label) 5286 last_label = insn; 5287 insns_since_last_label = 0; 5288 } 5289 else if (GET_CODE (insn) == NOTE 5290 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 5291 { 5292 if (insns_since_last_label) 5293 last_label = insn; 5294 insns_since_last_label = 0; 5295 } 5296 else if (GET_CODE (insn) == INSN 5297 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 5298 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 5299 { 5300 init_insn_group_barriers (); 5301 last_label = 0; 5302 } 5303 else if (INSN_P (insn)) 5304 { 5305 insns_since_last_label = 1; 5306 5307 if (group_barrier_needed_p (insn)) 5308 { 5309 if (last_label) 5310 { 5311 if (dump) 5312 fprintf (dump, "Emitting stop before label %d\n", 5313 INSN_UID (last_label)); 5314 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label); 5315 insn = last_label; 5316 5317 init_insn_group_barriers (); 5318 last_label = 0; 5319 } 5320 } 5321 } 5322 } 5323} 5324 5325/* Like emit_insn_group_barriers, but run if no final scheduling pass was run. 5326 This function has to emit all necessary group barriers. */ 5327 5328static void 5329emit_all_insn_group_barriers (dump, insns) 5330 FILE *dump ATTRIBUTE_UNUSED; 5331 rtx insns; 5332{ 5333 rtx insn; 5334 5335 init_insn_group_barriers (); 5336 5337 for (insn = insns; insn; insn = NEXT_INSN (insn)) 5338 { 5339 if (GET_CODE (insn) == BARRIER) 5340 { 5341 rtx last = prev_active_insn (insn); 5342 5343 if (! last) 5344 continue; 5345 if (GET_CODE (last) == JUMP_INSN 5346 && GET_CODE (PATTERN (last)) == ADDR_DIFF_VEC) 5347 last = prev_active_insn (last); 5348 if (recog_memoized (last) != CODE_FOR_insn_group_barrier) 5349 emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last); 5350 5351 init_insn_group_barriers (); 5352 } 5353 else if (INSN_P (insn)) 5354 { 5355 if (recog_memoized (insn) == CODE_FOR_insn_group_barrier) 5356 init_insn_group_barriers (); 5357 else if (group_barrier_needed_p (insn)) 5358 { 5359 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5360 init_insn_group_barriers (); 5361 group_barrier_needed_p (insn); 5362 } 5363 } 5364 } 5365} 5366 5367static int errata_find_address_regs PARAMS ((rtx *, void *)); 5368static void errata_emit_nops PARAMS ((rtx)); 5369static void fixup_errata PARAMS ((void)); 5370 5371/* This structure is used to track some details about the previous insns 5372 groups so we can determine if it may be necessary to insert NOPs to 5373 workaround hardware errata. */ 5374static struct group 5375{ 5376 HARD_REG_SET p_reg_set; 5377 HARD_REG_SET gr_reg_conditionally_set; 5378} last_group[2]; 5379 5380/* Index into the last_group array. */ 5381static int group_idx; 5382 5383/* Called through for_each_rtx; determines if a hard register that was 5384 conditionally set in the previous group is used as an address register. 5385 It ensures that for_each_rtx returns 1 in that case. */ 5386static int 5387errata_find_address_regs (xp, data) 5388 rtx *xp; 5389 void *data ATTRIBUTE_UNUSED; 5390{ 5391 rtx x = *xp; 5392 if (GET_CODE (x) != MEM) 5393 return 0; 5394 x = XEXP (x, 0); 5395 if (GET_CODE (x) == POST_MODIFY) 5396 x = XEXP (x, 0); 5397 if (GET_CODE (x) == REG) 5398 { 5399 struct group *prev_group = last_group + (group_idx ^ 1); 5400 if (TEST_HARD_REG_BIT (prev_group->gr_reg_conditionally_set, 5401 REGNO (x))) 5402 return 1; 5403 return -1; 5404 } 5405 return 0; 5406} 5407 5408/* Called for each insn; this function keeps track of the state in 5409 last_group and emits additional NOPs if necessary to work around 5410 an Itanium A/B step erratum. */ 5411static void 5412errata_emit_nops (insn) 5413 rtx insn; 5414{ 5415 struct group *this_group = last_group + group_idx; 5416 struct group *prev_group = last_group + (group_idx ^ 1); 5417 rtx pat = PATTERN (insn); 5418 rtx cond = GET_CODE (pat) == COND_EXEC ? COND_EXEC_TEST (pat) : 0; 5419 rtx real_pat = cond ? COND_EXEC_CODE (pat) : pat; 5420 enum attr_type type; 5421 rtx set = real_pat; 5422 5423 if (GET_CODE (real_pat) == USE 5424 || GET_CODE (real_pat) == CLOBBER 5425 || GET_CODE (real_pat) == ASM_INPUT 5426 || GET_CODE (real_pat) == ADDR_VEC 5427 || GET_CODE (real_pat) == ADDR_DIFF_VEC 5428 || asm_noperands (PATTERN (insn)) >= 0) 5429 return; 5430 5431 /* single_set doesn't work for COND_EXEC insns, so we have to duplicate 5432 parts of it. */ 5433 5434 if (GET_CODE (set) == PARALLEL) 5435 { 5436 int i; 5437 set = XVECEXP (real_pat, 0, 0); 5438 for (i = 1; i < XVECLEN (real_pat, 0); i++) 5439 if (GET_CODE (XVECEXP (real_pat, 0, i)) != USE 5440 && GET_CODE (XVECEXP (real_pat, 0, i)) != CLOBBER) 5441 { 5442 set = 0; 5443 break; 5444 } 5445 } 5446 5447 if (set && GET_CODE (set) != SET) 5448 set = 0; 5449 5450 type = get_attr_type (insn); 5451 5452 if (type == TYPE_F 5453 && set && REG_P (SET_DEST (set)) && PR_REGNO_P (REGNO (SET_DEST (set)))) 5454 SET_HARD_REG_BIT (this_group->p_reg_set, REGNO (SET_DEST (set))); 5455 5456 if ((type == TYPE_M || type == TYPE_A) && cond && set 5457 && REG_P (SET_DEST (set)) 5458 && GET_CODE (SET_SRC (set)) != PLUS 5459 && GET_CODE (SET_SRC (set)) != MINUS 5460 && (GET_CODE (SET_SRC (set)) != ASHIFT 5461 || !shladd_operand (XEXP (SET_SRC (set), 1), VOIDmode)) 5462 && (GET_CODE (SET_SRC (set)) != MEM 5463 || GET_CODE (XEXP (SET_SRC (set), 0)) != POST_MODIFY) 5464 && GENERAL_REGNO_P (REGNO (SET_DEST (set)))) 5465 { 5466 if (GET_RTX_CLASS (GET_CODE (cond)) != '<' 5467 || ! REG_P (XEXP (cond, 0))) 5468 abort (); 5469 5470 if (TEST_HARD_REG_BIT (prev_group->p_reg_set, REGNO (XEXP (cond, 0)))) 5471 SET_HARD_REG_BIT (this_group->gr_reg_conditionally_set, REGNO (SET_DEST (set))); 5472 } 5473 if (for_each_rtx (&real_pat, errata_find_address_regs, NULL)) 5474 { 5475 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5476 emit_insn_before (gen_nop (), insn); 5477 emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn); 5478 group_idx = 0; 5479 memset (last_group, 0, sizeof last_group); 5480 } 5481} 5482 5483/* Emit extra nops if they are required to work around hardware errata. */ 5484 5485static void 5486fixup_errata () 5487{ 5488 rtx insn; 5489 5490 if (! TARGET_B_STEP) 5491 return; 5492 5493 group_idx = 0; 5494 memset (last_group, 0, sizeof last_group); 5495 5496 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 5497 { 5498 if (!INSN_P (insn)) 5499 continue; 5500 5501 if (ia64_safe_type (insn) == TYPE_S) 5502 { 5503 group_idx ^= 1; 5504 memset (last_group + group_idx, 0, sizeof last_group[group_idx]); 5505 } 5506 else 5507 errata_emit_nops (insn); 5508 } 5509} 5510 5511/* Instruction scheduling support. */ 5512/* Describe one bundle. */ 5513 5514struct bundle 5515{ 5516 /* Zero if there's no possibility of a stop in this bundle other than 5517 at the end, otherwise the position of the optional stop bit. */ 5518 int possible_stop; 5519 /* The types of the three slots. */ 5520 enum attr_type t[3]; 5521 /* The pseudo op to be emitted into the assembler output. */ 5522 const char *name; 5523}; 5524 5525#define NR_BUNDLES 10 5526 5527/* A list of all available bundles. */ 5528 5529static const struct bundle bundle[NR_BUNDLES] = 5530{ 5531 { 2, { TYPE_M, TYPE_I, TYPE_I }, ".mii" }, 5532 { 1, { TYPE_M, TYPE_M, TYPE_I }, ".mmi" }, 5533 { 0, { TYPE_M, TYPE_F, TYPE_I }, ".mfi" }, 5534 { 0, { TYPE_M, TYPE_M, TYPE_F }, ".mmf" }, 5535#if NR_BUNDLES == 10 5536 { 0, { TYPE_B, TYPE_B, TYPE_B }, ".bbb" }, 5537 { 0, { TYPE_M, TYPE_B, TYPE_B }, ".mbb" }, 5538#endif 5539 { 0, { TYPE_M, TYPE_I, TYPE_B }, ".mib" }, 5540 { 0, { TYPE_M, TYPE_M, TYPE_B }, ".mmb" }, 5541 { 0, { TYPE_M, TYPE_F, TYPE_B }, ".mfb" }, 5542 /* .mfi needs to occur earlier than .mlx, so that we only generate it if 5543 it matches an L type insn. Otherwise we'll try to generate L type 5544 nops. */ 5545 { 0, { TYPE_M, TYPE_L, TYPE_X }, ".mlx" } 5546}; 5547 5548/* Describe a packet of instructions. Packets consist of two bundles that 5549 are visible to the hardware in one scheduling window. */ 5550 5551struct ia64_packet 5552{ 5553 const struct bundle *t1, *t2; 5554 /* Precomputed value of the first split issue in this packet if a cycle 5555 starts at its beginning. */ 5556 int first_split; 5557 /* For convenience, the insn types are replicated here so we don't have 5558 to go through T1 and T2 all the time. */ 5559 enum attr_type t[6]; 5560}; 5561 5562/* An array containing all possible packets. */ 5563#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES) 5564static struct ia64_packet packets[NR_PACKETS]; 5565 5566/* Map attr_type to a string with the name. */ 5567 5568static const char *const type_names[] = 5569{ 5570 "UNKNOWN", "A", "I", "M", "F", "B", "L", "X", "S" 5571}; 5572 5573/* Nonzero if we should insert stop bits into the schedule. */ 5574int ia64_final_schedule = 0; 5575 5576static int itanium_split_issue PARAMS ((const struct ia64_packet *, int)); 5577static rtx ia64_single_set PARAMS ((rtx)); 5578static int insn_matches_slot PARAMS ((const struct ia64_packet *, enum attr_type, int, rtx)); 5579static void ia64_emit_insn_before PARAMS ((rtx, rtx)); 5580static void maybe_rotate PARAMS ((FILE *)); 5581static void finish_last_head PARAMS ((FILE *, int)); 5582static void rotate_one_bundle PARAMS ((FILE *)); 5583static void rotate_two_bundles PARAMS ((FILE *)); 5584static void nop_cycles_until PARAMS ((int, FILE *)); 5585static void cycle_end_fill_slots PARAMS ((FILE *)); 5586static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *)); 5587static int get_split PARAMS ((const struct ia64_packet *, int)); 5588static int find_best_insn PARAMS ((rtx *, enum attr_type *, int, 5589 const struct ia64_packet *, int)); 5590static void find_best_packet PARAMS ((int *, const struct ia64_packet **, 5591 rtx *, enum attr_type *, int)); 5592static int itanium_reorder PARAMS ((FILE *, rtx *, rtx *, int)); 5593static void dump_current_packet PARAMS ((FILE *)); 5594static void schedule_stop PARAMS ((FILE *)); 5595static rtx gen_nop_type PARAMS ((enum attr_type)); 5596static void ia64_emit_nops PARAMS ((void)); 5597 5598/* Map a bundle number to its pseudo-op. */ 5599 5600const char * 5601get_bundle_name (b) 5602 int b; 5603{ 5604 return bundle[b].name; 5605} 5606 5607/* Compute the slot which will cause a split issue in packet P if the 5608 current cycle begins at slot BEGIN. */ 5609 5610static int 5611itanium_split_issue (p, begin) 5612 const struct ia64_packet *p; 5613 int begin; 5614{ 5615 int type_count[TYPE_S]; 5616 int i; 5617 int split = 6; 5618 5619 if (begin < 3) 5620 { 5621 /* Always split before and after MMF. */ 5622 if (p->t[0] == TYPE_M && p->t[1] == TYPE_M && p->t[2] == TYPE_F) 5623 return 3; 5624 if (p->t[3] == TYPE_M && p->t[4] == TYPE_M && p->t[5] == TYPE_F) 5625 return 3; 5626 /* Always split after MBB and BBB. */ 5627 if (p->t[1] == TYPE_B) 5628 return 3; 5629 /* Split after first bundle in MIB BBB combination. */ 5630 if (p->t[2] == TYPE_B && p->t[3] == TYPE_B) 5631 return 3; 5632 } 5633 5634 memset (type_count, 0, sizeof type_count); 5635 for (i = begin; i < split; i++) 5636 { 5637 enum attr_type t0 = p->t[i]; 5638 /* An MLX bundle reserves the same units as an MFI bundle. */ 5639 enum attr_type t = (t0 == TYPE_L ? TYPE_F 5640 : t0 == TYPE_X ? TYPE_I 5641 : t0); 5642 5643 /* Itanium can execute up to 3 branches, 2 floating point, 2 memory, and 5644 2 integer per cycle. */ 5645 int max = (t == TYPE_B ? 3 : 2); 5646 if (type_count[t] == max) 5647 return i; 5648 5649 type_count[t]++; 5650 } 5651 return split; 5652} 5653 5654/* Return the maximum number of instructions a cpu can issue. */ 5655 5656static int 5657ia64_issue_rate () 5658{ 5659 return 6; 5660} 5661 5662/* Helper function - like single_set, but look inside COND_EXEC. */ 5663 5664static rtx 5665ia64_single_set (insn) 5666 rtx insn; 5667{ 5668 rtx x = PATTERN (insn), ret; 5669 if (GET_CODE (x) == COND_EXEC) 5670 x = COND_EXEC_CODE (x); 5671 if (GET_CODE (x) == SET) 5672 return x; 5673 5674 /* Special case here prologue_allocate_stack and epilogue_deallocate_stack. 5675 Although they are not classical single set, the second set is there just 5676 to protect it from moving past FP-relative stack accesses. */ 5677 switch (recog_memoized (insn)) 5678 { 5679 case CODE_FOR_prologue_allocate_stack: 5680 case CODE_FOR_epilogue_deallocate_stack: 5681 ret = XVECEXP (x, 0, 0); 5682 break; 5683 5684 default: 5685 ret = single_set_2 (insn, x); 5686 break; 5687 } 5688 5689 return ret; 5690} 5691 5692/* Adjust the cost of a scheduling dependency. Return the new cost of 5693 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 5694 5695static int 5696ia64_adjust_cost (insn, link, dep_insn, cost) 5697 rtx insn, link, dep_insn; 5698 int cost; 5699{ 5700 enum attr_type dep_type; 5701 enum attr_itanium_class dep_class; 5702 enum attr_itanium_class insn_class; 5703 rtx dep_set, set, src, addr; 5704 5705 if (GET_CODE (PATTERN (insn)) == CLOBBER 5706 || GET_CODE (PATTERN (insn)) == USE 5707 || GET_CODE (PATTERN (dep_insn)) == CLOBBER 5708 || GET_CODE (PATTERN (dep_insn)) == USE 5709 /* @@@ Not accurate for indirect calls. */ 5710 || GET_CODE (insn) == CALL_INSN 5711 || ia64_safe_type (insn) == TYPE_S) 5712 return 0; 5713 5714 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT 5715 || REG_NOTE_KIND (link) == REG_DEP_ANTI) 5716 return 0; 5717 5718 dep_type = ia64_safe_type (dep_insn); 5719 dep_class = ia64_safe_itanium_class (dep_insn); 5720 insn_class = ia64_safe_itanium_class (insn); 5721 5722 /* Compares that feed a conditional branch can execute in the same 5723 cycle. */ 5724 dep_set = ia64_single_set (dep_insn); 5725 set = ia64_single_set (insn); 5726 5727 if (dep_type != TYPE_F 5728 && dep_set 5729 && GET_CODE (SET_DEST (dep_set)) == REG 5730 && PR_REG (REGNO (SET_DEST (dep_set))) 5731 && GET_CODE (insn) == JUMP_INSN) 5732 return 0; 5733 5734 if (dep_set && GET_CODE (SET_DEST (dep_set)) == MEM) 5735 { 5736 /* ??? Can't find any information in the documenation about whether 5737 a sequence 5738 st [rx] = ra 5739 ld rb = [ry] 5740 splits issue. Assume it doesn't. */ 5741 return 0; 5742 } 5743 5744 src = set ? SET_SRC (set) : 0; 5745 addr = 0; 5746 if (set) 5747 { 5748 if (GET_CODE (SET_DEST (set)) == MEM) 5749 addr = XEXP (SET_DEST (set), 0); 5750 else if (GET_CODE (SET_DEST (set)) == SUBREG 5751 && GET_CODE (SUBREG_REG (SET_DEST (set))) == MEM) 5752 addr = XEXP (SUBREG_REG (SET_DEST (set)), 0); 5753 else 5754 { 5755 addr = src; 5756 if (GET_CODE (addr) == UNSPEC && XVECLEN (addr, 0) > 0) 5757 addr = XVECEXP (addr, 0, 0); 5758 while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND) 5759 addr = XEXP (addr, 0); 5760 5761 /* Note that LO_SUM is used for GOT loads. */ 5762 if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM) 5763 addr = XEXP (addr, 0); 5764 else 5765 addr = 0; 5766 } 5767 } 5768 5769 if (addr && GET_CODE (addr) == POST_MODIFY) 5770 addr = XEXP (addr, 0); 5771 5772 set = ia64_single_set (dep_insn); 5773 5774 if ((dep_class == ITANIUM_CLASS_IALU 5775 || dep_class == ITANIUM_CLASS_ILOG 5776 || dep_class == ITANIUM_CLASS_LD) 5777 && (insn_class == ITANIUM_CLASS_LD 5778 || insn_class == ITANIUM_CLASS_ST)) 5779 { 5780 if (! addr || ! set) 5781 abort (); 5782 /* This isn't completely correct - an IALU that feeds an address has 5783 a latency of 1 cycle if it's issued in an M slot, but 2 cycles 5784 otherwise. Unfortunately there's no good way to describe this. */ 5785 if (reg_overlap_mentioned_p (SET_DEST (set), addr)) 5786 return cost + 1; 5787 } 5788 5789 if ((dep_class == ITANIUM_CLASS_IALU 5790 || dep_class == ITANIUM_CLASS_ILOG 5791 || dep_class == ITANIUM_CLASS_LD) 5792 && (insn_class == ITANIUM_CLASS_MMMUL 5793 || insn_class == ITANIUM_CLASS_MMSHF 5794 || insn_class == ITANIUM_CLASS_MMSHFI)) 5795 return 3; 5796 5797 if (dep_class == ITANIUM_CLASS_FMAC 5798 && (insn_class == ITANIUM_CLASS_FMISC 5799 || insn_class == ITANIUM_CLASS_FCVTFX 5800 || insn_class == ITANIUM_CLASS_XMPY)) 5801 return 7; 5802 5803 if ((dep_class == ITANIUM_CLASS_FMAC 5804 || dep_class == ITANIUM_CLASS_FMISC 5805 || dep_class == ITANIUM_CLASS_FCVTFX 5806 || dep_class == ITANIUM_CLASS_XMPY) 5807 && insn_class == ITANIUM_CLASS_STF) 5808 return 8; 5809 5810 /* Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, 5811 but HP engineers say any non-MM operation. */ 5812 if ((dep_class == ITANIUM_CLASS_MMMUL 5813 || dep_class == ITANIUM_CLASS_MMSHF 5814 || dep_class == ITANIUM_CLASS_MMSHFI) 5815 && insn_class != ITANIUM_CLASS_MMMUL 5816 && insn_class != ITANIUM_CLASS_MMSHF 5817 && insn_class != ITANIUM_CLASS_MMSHFI) 5818 return 4; 5819 5820 return cost; 5821} 5822 5823/* Describe the current state of the Itanium pipeline. */ 5824static struct 5825{ 5826 /* The first slot that is used in the current cycle. */ 5827 int first_slot; 5828 /* The next slot to fill. */ 5829 int cur; 5830 /* The packet we have selected for the current issue window. */ 5831 const struct ia64_packet *packet; 5832 /* The position of the split issue that occurs due to issue width 5833 limitations (6 if there's no split issue). */ 5834 int split; 5835 /* Record data about the insns scheduled so far in the same issue 5836 window. The elements up to but not including FIRST_SLOT belong 5837 to the previous cycle, the ones starting with FIRST_SLOT belong 5838 to the current cycle. */ 5839 enum attr_type types[6]; 5840 rtx insns[6]; 5841 int stopbit[6]; 5842 /* Nonzero if we decided to schedule a stop bit. */ 5843 int last_was_stop; 5844} sched_data; 5845 5846/* Temporary arrays; they have enough elements to hold all insns that 5847 can be ready at the same time while scheduling of the current block. 5848 SCHED_READY can hold ready insns, SCHED_TYPES their types. */ 5849static rtx *sched_ready; 5850static enum attr_type *sched_types; 5851 5852/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT 5853 of packet P. */ 5854 5855static int 5856insn_matches_slot (p, itype, slot, insn) 5857 const struct ia64_packet *p; 5858 enum attr_type itype; 5859 int slot; 5860 rtx insn; 5861{ 5862 enum attr_itanium_requires_unit0 u0; 5863 enum attr_type stype = p->t[slot]; 5864 5865 if (insn) 5866 { 5867 u0 = ia64_safe_itanium_requires_unit0 (insn); 5868 if (u0 == ITANIUM_REQUIRES_UNIT0_YES) 5869 { 5870 int i; 5871 for (i = sched_data.first_slot; i < slot; i++) 5872 if (p->t[i] == stype 5873 || (stype == TYPE_F && p->t[i] == TYPE_L) 5874 || (stype == TYPE_I && p->t[i] == TYPE_X)) 5875 return 0; 5876 } 5877 if (GET_CODE (insn) == CALL_INSN) 5878 { 5879 /* Reject calls in multiway branch packets. We want to limit 5880 the number of multiway branches we generate (since the branch 5881 predictor is limited), and this seems to work fairly well. 5882 (If we didn't do this, we'd have to add another test here to 5883 force calls into the third slot of the bundle.) */ 5884 if (slot < 3) 5885 { 5886 if (p->t[1] == TYPE_B) 5887 return 0; 5888 } 5889 else 5890 { 5891 if (p->t[4] == TYPE_B) 5892 return 0; 5893 } 5894 } 5895 } 5896 5897 if (itype == stype) 5898 return 1; 5899 if (itype == TYPE_A) 5900 return stype == TYPE_M || stype == TYPE_I; 5901 return 0; 5902} 5903 5904/* Like emit_insn_before, but skip cycle_display notes. 5905 ??? When cycle display notes are implemented, update this. */ 5906 5907static void 5908ia64_emit_insn_before (insn, before) 5909 rtx insn, before; 5910{ 5911 emit_insn_before (insn, before); 5912} 5913 5914/* When rotating a bundle out of the issue window, insert a bundle selector 5915 insn in front of it. DUMP is the scheduling dump file or NULL. START 5916 is either 0 or 3, depending on whether we want to emit a bundle selector 5917 for the first bundle or the second bundle in the current issue window. 5918 5919 The selector insns are emitted this late because the selected packet can 5920 be changed until parts of it get rotated out. */ 5921 5922static void 5923finish_last_head (dump, start) 5924 FILE *dump; 5925 int start; 5926{ 5927 const struct ia64_packet *p = sched_data.packet; 5928 const struct bundle *b = start == 0 ? p->t1 : p->t2; 5929 int bundle_type = b - bundle; 5930 rtx insn; 5931 int i; 5932 5933 if (! ia64_final_schedule) 5934 return; 5935 5936 for (i = start; sched_data.insns[i] == 0; i++) 5937 if (i == start + 3) 5938 abort (); 5939 insn = sched_data.insns[i]; 5940 5941 if (dump) 5942 fprintf (dump, "// Emitting template before %d: %s\n", 5943 INSN_UID (insn), b->name); 5944 5945 ia64_emit_insn_before (gen_bundle_selector (GEN_INT (bundle_type)), insn); 5946} 5947 5948/* We can't schedule more insns this cycle. Fix up the scheduling state 5949 and advance FIRST_SLOT and CUR. 5950 We have to distribute the insns that are currently found between 5951 FIRST_SLOT and CUR into the slots of the packet we have selected. So 5952 far, they are stored successively in the fields starting at FIRST_SLOT; 5953 now they must be moved to the correct slots. 5954 DUMP is the current scheduling dump file, or NULL. */ 5955 5956static void 5957cycle_end_fill_slots (dump) 5958 FILE *dump; 5959{ 5960 const struct ia64_packet *packet = sched_data.packet; 5961 int slot, i; 5962 enum attr_type tmp_types[6]; 5963 rtx tmp_insns[6]; 5964 5965 memcpy (tmp_types, sched_data.types, 6 * sizeof (enum attr_type)); 5966 memcpy (tmp_insns, sched_data.insns, 6 * sizeof (rtx)); 5967 5968 for (i = slot = sched_data.first_slot; i < sched_data.cur; i++) 5969 { 5970 enum attr_type t = tmp_types[i]; 5971 if (t != ia64_safe_type (tmp_insns[i])) 5972 abort (); 5973 while (! insn_matches_slot (packet, t, slot, tmp_insns[i])) 5974 { 5975 if (slot > sched_data.split) 5976 abort (); 5977 if (dump) 5978 fprintf (dump, "// Packet needs %s, have %s\n", 5979 type_names[packet->t[slot]], type_names[t]); 5980 sched_data.types[slot] = packet->t[slot]; 5981 sched_data.insns[slot] = 0; 5982 sched_data.stopbit[slot] = 0; 5983 5984 /* ??? TYPE_L instructions always fill up two slots, but we don't 5985 support TYPE_L nops. */ 5986 if (packet->t[slot] == TYPE_L) 5987 abort (); 5988 5989 slot++; 5990 } 5991 5992 /* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the 5993 actual slot type later. */ 5994 sched_data.types[slot] = packet->t[slot]; 5995 sched_data.insns[slot] = tmp_insns[i]; 5996 sched_data.stopbit[slot] = 0; 5997 slot++; 5998 5999 /* TYPE_L instructions always fill up two slots. */ 6000 if (t == TYPE_L) 6001 { 6002 sched_data.types[slot] = packet->t[slot]; 6003 sched_data.insns[slot] = 0; 6004 sched_data.stopbit[slot] = 0; 6005 slot++; 6006 } 6007 } 6008 6009 /* This isn't right - there's no need to pad out until the forced split; 6010 the CPU will automatically split if an insn isn't ready. */ 6011#if 0 6012 while (slot < sched_data.split) 6013 { 6014 sched_data.types[slot] = packet->t[slot]; 6015 sched_data.insns[slot] = 0; 6016 sched_data.stopbit[slot] = 0; 6017 slot++; 6018 } 6019#endif 6020 6021 sched_data.first_slot = sched_data.cur = slot; 6022} 6023 6024/* Bundle rotations, as described in the Itanium optimization manual. 6025 We can rotate either one or both bundles out of the issue window. 6026 DUMP is the current scheduling dump file, or NULL. */ 6027 6028static void 6029rotate_one_bundle (dump) 6030 FILE *dump; 6031{ 6032 if (dump) 6033 fprintf (dump, "// Rotating one bundle.\n"); 6034 6035 finish_last_head (dump, 0); 6036 if (sched_data.cur > 3) 6037 { 6038 sched_data.cur -= 3; 6039 sched_data.first_slot -= 3; 6040 memmove (sched_data.types, 6041 sched_data.types + 3, 6042 sched_data.cur * sizeof *sched_data.types); 6043 memmove (sched_data.stopbit, 6044 sched_data.stopbit + 3, 6045 sched_data.cur * sizeof *sched_data.stopbit); 6046 memmove (sched_data.insns, 6047 sched_data.insns + 3, 6048 sched_data.cur * sizeof *sched_data.insns); 6049 sched_data.packet 6050 = &packets[(sched_data.packet->t2 - bundle) * NR_BUNDLES]; 6051 } 6052 else 6053 { 6054 sched_data.cur = 0; 6055 sched_data.first_slot = 0; 6056 } 6057} 6058 6059static void 6060rotate_two_bundles (dump) 6061 FILE *dump; 6062{ 6063 if (dump) 6064 fprintf (dump, "// Rotating two bundles.\n"); 6065 6066 if (sched_data.cur == 0) 6067 return; 6068 6069 finish_last_head (dump, 0); 6070 if (sched_data.cur > 3) 6071 finish_last_head (dump, 3); 6072 sched_data.cur = 0; 6073 sched_data.first_slot = 0; 6074} 6075 6076/* We're beginning a new block. Initialize data structures as necessary. */ 6077 6078static void 6079ia64_sched_init (dump, sched_verbose, max_ready) 6080 FILE *dump ATTRIBUTE_UNUSED; 6081 int sched_verbose ATTRIBUTE_UNUSED; 6082 int max_ready; 6083{ 6084 static int initialized = 0; 6085 6086 if (! initialized) 6087 { 6088 int b1, b2, i; 6089 6090 initialized = 1; 6091 6092 for (i = b1 = 0; b1 < NR_BUNDLES; b1++) 6093 { 6094 const struct bundle *t1 = bundle + b1; 6095 for (b2 = 0; b2 < NR_BUNDLES; b2++, i++) 6096 { 6097 const struct bundle *t2 = bundle + b2; 6098 6099 packets[i].t1 = t1; 6100 packets[i].t2 = t2; 6101 } 6102 } 6103 for (i = 0; i < NR_PACKETS; i++) 6104 { 6105 int j; 6106 for (j = 0; j < 3; j++) 6107 packets[i].t[j] = packets[i].t1->t[j]; 6108 for (j = 0; j < 3; j++) 6109 packets[i].t[j + 3] = packets[i].t2->t[j]; 6110 packets[i].first_split = itanium_split_issue (packets + i, 0); 6111 } 6112 6113 } 6114 6115 init_insn_group_barriers (); 6116 6117 memset (&sched_data, 0, sizeof sched_data); 6118 sched_types = (enum attr_type *) xmalloc (max_ready 6119 * sizeof (enum attr_type)); 6120 sched_ready = (rtx *) xmalloc (max_ready * sizeof (rtx)); 6121} 6122 6123/* See if the packet P can match the insns we have already scheduled. Return 6124 nonzero if so. In *PSLOT, we store the first slot that is available for 6125 more instructions if we choose this packet. 6126 SPLIT holds the last slot we can use, there's a split issue after it so 6127 scheduling beyond it would cause us to use more than one cycle. */ 6128 6129static int 6130packet_matches_p (p, split, pslot) 6131 const struct ia64_packet *p; 6132 int split; 6133 int *pslot; 6134{ 6135 int filled = sched_data.cur; 6136 int first = sched_data.first_slot; 6137 int i, slot; 6138 6139 /* First, check if the first of the two bundles must be a specific one (due 6140 to stop bits). */ 6141 if (first > 0 && sched_data.stopbit[0] && p->t1->possible_stop != 1) 6142 return 0; 6143 if (first > 1 && sched_data.stopbit[1] && p->t1->possible_stop != 2) 6144 return 0; 6145 6146 for (i = 0; i < first; i++) 6147 if (! insn_matches_slot (p, sched_data.types[i], i, 6148 sched_data.insns[i])) 6149 return 0; 6150 for (i = slot = first; i < filled; i++) 6151 { 6152 while (slot < split) 6153 { 6154 if (insn_matches_slot (p, sched_data.types[i], slot, 6155 sched_data.insns[i])) 6156 break; 6157 slot++; 6158 } 6159 if (slot == split) 6160 return 0; 6161 slot++; 6162 } 6163 6164 if (pslot) 6165 *pslot = slot; 6166 return 1; 6167} 6168 6169/* A frontend for itanium_split_issue. For a packet P and a slot 6170 number FIRST that describes the start of the current clock cycle, 6171 return the slot number of the first split issue. This function 6172 uses the cached number found in P if possible. */ 6173 6174static int 6175get_split (p, first) 6176 const struct ia64_packet *p; 6177 int first; 6178{ 6179 if (first == 0) 6180 return p->first_split; 6181 return itanium_split_issue (p, first); 6182} 6183 6184/* Given N_READY insns in the array READY, whose types are found in the 6185 corresponding array TYPES, return the insn that is best suited to be 6186 scheduled in slot SLOT of packet P. */ 6187 6188static int 6189find_best_insn (ready, types, n_ready, p, slot) 6190 rtx *ready; 6191 enum attr_type *types; 6192 int n_ready; 6193 const struct ia64_packet *p; 6194 int slot; 6195{ 6196 int best = -1; 6197 int best_pri = 0; 6198 while (n_ready-- > 0) 6199 { 6200 rtx insn = ready[n_ready]; 6201 if (! insn) 6202 continue; 6203 if (best >= 0 && INSN_PRIORITY (ready[n_ready]) < best_pri) 6204 break; 6205 /* If we have equally good insns, one of which has a stricter 6206 slot requirement, prefer the one with the stricter requirement. */ 6207 if (best >= 0 && types[n_ready] == TYPE_A) 6208 continue; 6209 if (insn_matches_slot (p, types[n_ready], slot, insn)) 6210 { 6211 best = n_ready; 6212 best_pri = INSN_PRIORITY (ready[best]); 6213 6214 /* If there's no way we could get a stricter requirement, stop 6215 looking now. */ 6216 if (types[n_ready] != TYPE_A 6217 && ia64_safe_itanium_requires_unit0 (ready[n_ready])) 6218 break; 6219 break; 6220 } 6221 } 6222 return best; 6223} 6224 6225/* Select the best packet to use given the current scheduler state and the 6226 current ready list. 6227 READY is an array holding N_READY ready insns; TYPES is a corresponding 6228 array that holds their types. Store the best packet in *PPACKET and the 6229 number of insns that can be scheduled in the current cycle in *PBEST. */ 6230 6231static void 6232find_best_packet (pbest, ppacket, ready, types, n_ready) 6233 int *pbest; 6234 const struct ia64_packet **ppacket; 6235 rtx *ready; 6236 enum attr_type *types; 6237 int n_ready; 6238{ 6239 int first = sched_data.first_slot; 6240 int best = 0; 6241 int lowest_end = 6; 6242 const struct ia64_packet *best_packet = NULL; 6243 int i; 6244 6245 for (i = 0; i < NR_PACKETS; i++) 6246 { 6247 const struct ia64_packet *p = packets + i; 6248 int slot; 6249 int split = get_split (p, first); 6250 int win = 0; 6251 int first_slot, last_slot; 6252 int b_nops = 0; 6253 6254 if (! packet_matches_p (p, split, &first_slot)) 6255 continue; 6256 6257 memcpy (sched_ready, ready, n_ready * sizeof (rtx)); 6258 6259 win = 0; 6260 last_slot = 6; 6261 for (slot = first_slot; slot < split; slot++) 6262 { 6263 int insn_nr; 6264 6265 /* Disallow a degenerate case where the first bundle doesn't 6266 contain anything but NOPs! */ 6267 if (first_slot == 0 && win == 0 && slot == 3) 6268 { 6269 win = -1; 6270 break; 6271 } 6272 6273 insn_nr = find_best_insn (sched_ready, types, n_ready, p, slot); 6274 if (insn_nr >= 0) 6275 { 6276 sched_ready[insn_nr] = 0; 6277 last_slot = slot; 6278 win++; 6279 } 6280 else if (p->t[slot] == TYPE_B) 6281 b_nops++; 6282 } 6283 /* We must disallow MBB/BBB packets if any of their B slots would be 6284 filled with nops. */ 6285 if (last_slot < 3) 6286 { 6287 if (p->t[1] == TYPE_B && (b_nops || last_slot < 2)) 6288 win = -1; 6289 } 6290 else 6291 { 6292 if (p->t[4] == TYPE_B && (b_nops || last_slot < 5)) 6293 win = -1; 6294 } 6295 6296 if (win > best 6297 || (win == best && last_slot < lowest_end)) 6298 { 6299 best = win; 6300 lowest_end = last_slot; 6301 best_packet = p; 6302 } 6303 } 6304 *pbest = best; 6305 *ppacket = best_packet; 6306} 6307 6308/* Reorder the ready list so that the insns that can be issued in this cycle 6309 are found in the correct order at the end of the list. 6310 DUMP is the scheduling dump file, or NULL. READY points to the start, 6311 E_READY to the end of the ready list. MAY_FAIL determines what should be 6312 done if no insns can be scheduled in this cycle: if it is zero, we abort, 6313 otherwise we return 0. 6314 Return 1 if any insns can be scheduled in this cycle. */ 6315 6316static int 6317itanium_reorder (dump, ready, e_ready, may_fail) 6318 FILE *dump; 6319 rtx *ready; 6320 rtx *e_ready; 6321 int may_fail; 6322{ 6323 const struct ia64_packet *best_packet; 6324 int n_ready = e_ready - ready; 6325 int first = sched_data.first_slot; 6326 int i, best, best_split, filled; 6327 6328 for (i = 0; i < n_ready; i++) 6329 sched_types[i] = ia64_safe_type (ready[i]); 6330 6331 find_best_packet (&best, &best_packet, ready, sched_types, n_ready); 6332 6333 if (best == 0) 6334 { 6335 if (may_fail) 6336 return 0; 6337 abort (); 6338 } 6339 6340 if (dump) 6341 { 6342 fprintf (dump, "// Selected bundles: %s %s (%d insns)\n", 6343 best_packet->t1->name, 6344 best_packet->t2 ? best_packet->t2->name : NULL, best); 6345 } 6346 6347 best_split = itanium_split_issue (best_packet, first); 6348 packet_matches_p (best_packet, best_split, &filled); 6349 6350 for (i = filled; i < best_split; i++) 6351 { 6352 int insn_nr; 6353 6354 insn_nr = find_best_insn (ready, sched_types, n_ready, best_packet, i); 6355 if (insn_nr >= 0) 6356 { 6357 rtx insn = ready[insn_nr]; 6358 memmove (ready + insn_nr, ready + insn_nr + 1, 6359 (n_ready - insn_nr - 1) * sizeof (rtx)); 6360 memmove (sched_types + insn_nr, sched_types + insn_nr + 1, 6361 (n_ready - insn_nr - 1) * sizeof (enum attr_type)); 6362 ready[--n_ready] = insn; 6363 } 6364 } 6365 6366 sched_data.packet = best_packet; 6367 sched_data.split = best_split; 6368 return 1; 6369} 6370 6371/* Dump information about the current scheduling state to file DUMP. */ 6372 6373static void 6374dump_current_packet (dump) 6375 FILE *dump; 6376{ 6377 int i; 6378 fprintf (dump, "// %d slots filled:", sched_data.cur); 6379 for (i = 0; i < sched_data.first_slot; i++) 6380 { 6381 rtx insn = sched_data.insns[i]; 6382 fprintf (dump, " %s", type_names[sched_data.types[i]]); 6383 if (insn) 6384 fprintf (dump, "/%s", type_names[ia64_safe_type (insn)]); 6385 if (sched_data.stopbit[i]) 6386 fprintf (dump, " ;;"); 6387 } 6388 fprintf (dump, " :::"); 6389 for (i = sched_data.first_slot; i < sched_data.cur; i++) 6390 { 6391 rtx insn = sched_data.insns[i]; 6392 enum attr_type t = ia64_safe_type (insn); 6393 fprintf (dump, " (%d) %s", INSN_UID (insn), type_names[t]); 6394 } 6395 fprintf (dump, "\n"); 6396} 6397 6398/* Schedule a stop bit. DUMP is the current scheduling dump file, or 6399 NULL. */ 6400 6401static void 6402schedule_stop (dump) 6403 FILE *dump; 6404{ 6405 const struct ia64_packet *best = sched_data.packet; 6406 int i; 6407 int best_stop = 6; 6408 6409 if (dump) 6410 fprintf (dump, "// Stop bit, cur = %d.\n", sched_data.cur); 6411 6412 if (sched_data.cur == 0) 6413 { 6414 if (dump) 6415 fprintf (dump, "// At start of bundle, so nothing to do.\n"); 6416 6417 rotate_two_bundles (NULL); 6418 return; 6419 } 6420 6421 for (i = -1; i < NR_PACKETS; i++) 6422 { 6423 /* This is a slight hack to give the current packet the first chance. 6424 This is done to avoid e.g. switching from MIB to MBB bundles. */ 6425 const struct ia64_packet *p = (i >= 0 ? packets + i : sched_data.packet); 6426 int split = get_split (p, sched_data.first_slot); 6427 const struct bundle *compare; 6428 int next, stoppos; 6429 6430 if (! packet_matches_p (p, split, &next)) 6431 continue; 6432 6433 compare = next > 3 ? p->t2 : p->t1; 6434 6435 stoppos = 3; 6436 if (compare->possible_stop) 6437 stoppos = compare->possible_stop; 6438 if (next > 3) 6439 stoppos += 3; 6440 6441 if (stoppos < next || stoppos >= best_stop) 6442 { 6443 if (compare->possible_stop == 0) 6444 continue; 6445 stoppos = (next > 3 ? 6 : 3); 6446 } 6447 if (stoppos < next || stoppos >= best_stop) 6448 continue; 6449 6450 if (dump) 6451 fprintf (dump, "// switching from %s %s to %s %s (stop at %d)\n", 6452 best->t1->name, best->t2->name, p->t1->name, p->t2->name, 6453 stoppos); 6454 6455 best_stop = stoppos; 6456 best = p; 6457 } 6458 6459 sched_data.packet = best; 6460 cycle_end_fill_slots (dump); 6461 while (sched_data.cur < best_stop) 6462 { 6463 sched_data.types[sched_data.cur] = best->t[sched_data.cur]; 6464 sched_data.insns[sched_data.cur] = 0; 6465 sched_data.stopbit[sched_data.cur] = 0; 6466 sched_data.cur++; 6467 } 6468 sched_data.stopbit[sched_data.cur - 1] = 1; 6469 sched_data.first_slot = best_stop; 6470 6471 if (dump) 6472 dump_current_packet (dump); 6473} 6474 6475/* If necessary, perform one or two rotations on the scheduling state. 6476 This should only be called if we are starting a new cycle. */ 6477 6478static void 6479maybe_rotate (dump) 6480 FILE *dump; 6481{ 6482 cycle_end_fill_slots (dump); 6483 if (sched_data.cur == 6) 6484 rotate_two_bundles (dump); 6485 else if (sched_data.cur >= 3) 6486 rotate_one_bundle (dump); 6487 sched_data.first_slot = sched_data.cur; 6488} 6489 6490/* The clock cycle when ia64_sched_reorder was last called. */ 6491static int prev_cycle; 6492 6493/* The first insn scheduled in the previous cycle. This is the saved 6494 value of sched_data.first_slot. */ 6495static int prev_first; 6496 6497/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR. Used to 6498 pad out the delay between MM (shifts, etc.) and integer operations. */ 6499 6500static void 6501nop_cycles_until (clock_var, dump) 6502 int clock_var; 6503 FILE *dump; 6504{ 6505 int prev_clock = prev_cycle; 6506 int cycles_left = clock_var - prev_clock; 6507 bool did_stop = false; 6508 6509 /* Finish the previous cycle; pad it out with NOPs. */ 6510 if (sched_data.cur == 3) 6511 { 6512 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6513 did_stop = true; 6514 maybe_rotate (dump); 6515 } 6516 else if (sched_data.cur > 0) 6517 { 6518 int need_stop = 0; 6519 int split = itanium_split_issue (sched_data.packet, prev_first); 6520 6521 if (sched_data.cur < 3 && split > 3) 6522 { 6523 split = 3; 6524 need_stop = 1; 6525 } 6526 6527 if (split > sched_data.cur) 6528 { 6529 int i; 6530 for (i = sched_data.cur; i < split; i++) 6531 { 6532 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6533 sched_data.types[i] = sched_data.packet->t[i]; 6534 sched_data.insns[i] = t; 6535 sched_data.stopbit[i] = 0; 6536 } 6537 sched_data.cur = split; 6538 } 6539 6540 if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6 6541 && cycles_left > 1) 6542 { 6543 int i; 6544 for (i = sched_data.cur; i < 6; i++) 6545 { 6546 rtx t = sched_emit_insn (gen_nop_type (sched_data.packet->t[i])); 6547 sched_data.types[i] = sched_data.packet->t[i]; 6548 sched_data.insns[i] = t; 6549 sched_data.stopbit[i] = 0; 6550 } 6551 sched_data.cur = 6; 6552 cycles_left--; 6553 need_stop = 1; 6554 } 6555 6556 if (need_stop || sched_data.cur == 6) 6557 { 6558 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6559 did_stop = true; 6560 } 6561 maybe_rotate (dump); 6562 } 6563 6564 cycles_left--; 6565 while (cycles_left > 0) 6566 { 6567 sched_emit_insn (gen_bundle_selector (GEN_INT (0))); 6568 sched_emit_insn (gen_nop_type (TYPE_M)); 6569 sched_emit_insn (gen_nop_type (TYPE_I)); 6570 if (cycles_left > 1) 6571 { 6572 sched_emit_insn (gen_insn_group_barrier (GEN_INT (2))); 6573 cycles_left--; 6574 } 6575 sched_emit_insn (gen_nop_type (TYPE_I)); 6576 sched_emit_insn (gen_insn_group_barrier (GEN_INT (3))); 6577 did_stop = true; 6578 cycles_left--; 6579 } 6580 6581 if (did_stop) 6582 init_insn_group_barriers (); 6583} 6584 6585/* We are about to being issuing insns for this clock cycle. 6586 Override the default sort algorithm to better slot instructions. */ 6587 6588static int 6589ia64_internal_sched_reorder (dump, sched_verbose, ready, pn_ready, 6590 reorder_type, clock_var) 6591 FILE *dump ATTRIBUTE_UNUSED; 6592 int sched_verbose ATTRIBUTE_UNUSED; 6593 rtx *ready; 6594 int *pn_ready; 6595 int reorder_type, clock_var; 6596{ 6597 int n_asms; 6598 int n_ready = *pn_ready; 6599 rtx *e_ready = ready + n_ready; 6600 rtx *insnp; 6601 6602 if (sched_verbose) 6603 { 6604 fprintf (dump, "// ia64_sched_reorder (type %d):\n", reorder_type); 6605 dump_current_packet (dump); 6606 } 6607 6608 /* Work around the pipeline flush that will occurr if the results of 6609 an MM instruction are accessed before the result is ready. Intel 6610 documentation says this only happens with IALU, ISHF, ILOG, LD, 6611 and ST consumers, but experimental evidence shows that *any* non-MM 6612 type instruction will incurr the flush. */ 6613 if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule) 6614 { 6615 for (insnp = ready; insnp < e_ready; insnp++) 6616 { 6617 rtx insn = *insnp, link; 6618 enum attr_itanium_class t = ia64_safe_itanium_class (insn); 6619 6620 if (t == ITANIUM_CLASS_MMMUL 6621 || t == ITANIUM_CLASS_MMSHF 6622 || t == ITANIUM_CLASS_MMSHFI) 6623 continue; 6624 6625 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) 6626 if (REG_NOTE_KIND (link) == 0) 6627 { 6628 rtx other = XEXP (link, 0); 6629 enum attr_itanium_class t0 = ia64_safe_itanium_class (other); 6630 if (t0 == ITANIUM_CLASS_MMSHF || t0 == ITANIUM_CLASS_MMMUL) 6631 { 6632 nop_cycles_until (clock_var, sched_verbose ? dump : NULL); 6633 goto out; 6634 } 6635 } 6636 } 6637 } 6638 out: 6639 6640 prev_first = sched_data.first_slot; 6641 prev_cycle = clock_var; 6642 6643 if (reorder_type == 0) 6644 maybe_rotate (sched_verbose ? dump : NULL); 6645 6646 /* First, move all USEs, CLOBBERs and other crud out of the way. */ 6647 n_asms = 0; 6648 for (insnp = ready; insnp < e_ready; insnp++) 6649 if (insnp < e_ready) 6650 { 6651 rtx insn = *insnp; 6652 enum attr_type t = ia64_safe_type (insn); 6653 if (t == TYPE_UNKNOWN) 6654 { 6655 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6656 || asm_noperands (PATTERN (insn)) >= 0) 6657 { 6658 rtx lowest = ready[n_asms]; 6659 ready[n_asms] = insn; 6660 *insnp = lowest; 6661 n_asms++; 6662 } 6663 else 6664 { 6665 rtx highest = ready[n_ready - 1]; 6666 ready[n_ready - 1] = insn; 6667 *insnp = highest; 6668 if (ia64_final_schedule && group_barrier_needed_p (insn)) 6669 { 6670 schedule_stop (sched_verbose ? dump : NULL); 6671 sched_data.last_was_stop = 1; 6672 maybe_rotate (sched_verbose ? dump : NULL); 6673 } 6674 6675 return 1; 6676 } 6677 } 6678 } 6679 if (n_asms < n_ready) 6680 { 6681 /* Some normal insns to process. Skip the asms. */ 6682 ready += n_asms; 6683 n_ready -= n_asms; 6684 } 6685 else if (n_ready > 0) 6686 { 6687 /* Only asm insns left. */ 6688 if (ia64_final_schedule && group_barrier_needed_p (ready[n_ready - 1])) 6689 { 6690 schedule_stop (sched_verbose ? dump : NULL); 6691 sched_data.last_was_stop = 1; 6692 maybe_rotate (sched_verbose ? dump : NULL); 6693 } 6694 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6695 return 1; 6696 } 6697 6698 if (ia64_final_schedule) 6699 { 6700 int nr_need_stop = 0; 6701 6702 for (insnp = ready; insnp < e_ready; insnp++) 6703 if (safe_group_barrier_needed_p (*insnp)) 6704 nr_need_stop++; 6705 6706 /* Schedule a stop bit if 6707 - all insns require a stop bit, or 6708 - we are starting a new cycle and _any_ insns require a stop bit. 6709 The reason for the latter is that if our schedule is accurate, then 6710 the additional stop won't decrease performance at this point (since 6711 there's a split issue at this point anyway), but it gives us more 6712 freedom when scheduling the currently ready insns. */ 6713 if ((reorder_type == 0 && nr_need_stop) 6714 || (reorder_type == 1 && n_ready == nr_need_stop)) 6715 { 6716 schedule_stop (sched_verbose ? dump : NULL); 6717 sched_data.last_was_stop = 1; 6718 maybe_rotate (sched_verbose ? dump : NULL); 6719 if (reorder_type == 1) 6720 return 0; 6721 } 6722 else 6723 { 6724 int deleted = 0; 6725 insnp = e_ready; 6726 /* Move down everything that needs a stop bit, preserving relative 6727 order. */ 6728 while (insnp-- > ready + deleted) 6729 while (insnp >= ready + deleted) 6730 { 6731 rtx insn = *insnp; 6732 if (! safe_group_barrier_needed_p (insn)) 6733 break; 6734 memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx)); 6735 *ready = insn; 6736 deleted++; 6737 } 6738 n_ready -= deleted; 6739 ready += deleted; 6740 if (deleted != nr_need_stop) 6741 abort (); 6742 } 6743 } 6744 6745 return itanium_reorder (sched_verbose ? dump : NULL, 6746 ready, e_ready, reorder_type == 1); 6747} 6748 6749static int 6750ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, clock_var) 6751 FILE *dump; 6752 int sched_verbose; 6753 rtx *ready; 6754 int *pn_ready; 6755 int clock_var; 6756{ 6757 return ia64_internal_sched_reorder (dump, sched_verbose, ready, 6758 pn_ready, 0, clock_var); 6759} 6760 6761/* Like ia64_sched_reorder, but called after issuing each insn. 6762 Override the default sort algorithm to better slot instructions. */ 6763 6764static int 6765ia64_sched_reorder2 (dump, sched_verbose, ready, pn_ready, clock_var) 6766 FILE *dump ATTRIBUTE_UNUSED; 6767 int sched_verbose ATTRIBUTE_UNUSED; 6768 rtx *ready; 6769 int *pn_ready; 6770 int clock_var; 6771{ 6772 if (sched_data.last_was_stop) 6773 return 0; 6774 6775 /* Detect one special case and try to optimize it. 6776 If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs, 6777 then we can get better code by transforming this to 1.MFB;; 2.MIx. */ 6778 if (sched_data.first_slot == 1 6779 && sched_data.stopbit[0] 6780 && ((sched_data.cur == 4 6781 && (sched_data.types[1] == TYPE_M || sched_data.types[1] == TYPE_A) 6782 && (sched_data.types[2] == TYPE_I || sched_data.types[2] == TYPE_A) 6783 && (sched_data.types[3] != TYPE_M && sched_data.types[3] != TYPE_A)) 6784 || (sched_data.cur == 3 6785 && (sched_data.types[1] == TYPE_M 6786 || sched_data.types[1] == TYPE_A) 6787 && (sched_data.types[2] != TYPE_M 6788 && sched_data.types[2] != TYPE_I 6789 && sched_data.types[2] != TYPE_A)))) 6790 6791 { 6792 int i, best; 6793 rtx stop = sched_data.insns[1]; 6794 6795 /* Search backward for the stop bit that must be there. */ 6796 while (1) 6797 { 6798 int insn_code; 6799 6800 stop = PREV_INSN (stop); 6801 if (GET_CODE (stop) != INSN) 6802 abort (); 6803 insn_code = recog_memoized (stop); 6804 6805 /* Ignore .pred.rel.mutex. 6806 6807 ??? Update this to ignore cycle display notes too 6808 ??? once those are implemented */ 6809 if (insn_code == CODE_FOR_pred_rel_mutex 6810 || insn_code == CODE_FOR_prologue_use) 6811 continue; 6812 6813 if (insn_code == CODE_FOR_insn_group_barrier) 6814 break; 6815 abort (); 6816 } 6817 6818 /* Adjust the stop bit's slot selector. */ 6819 if (INTVAL (XVECEXP (PATTERN (stop), 0, 0)) != 1) 6820 abort (); 6821 XVECEXP (PATTERN (stop), 0, 0) = GEN_INT (3); 6822 6823 sched_data.stopbit[0] = 0; 6824 sched_data.stopbit[2] = 1; 6825 6826 sched_data.types[5] = sched_data.types[3]; 6827 sched_data.types[4] = sched_data.types[2]; 6828 sched_data.types[3] = sched_data.types[1]; 6829 sched_data.insns[5] = sched_data.insns[3]; 6830 sched_data.insns[4] = sched_data.insns[2]; 6831 sched_data.insns[3] = sched_data.insns[1]; 6832 sched_data.stopbit[5] = sched_data.stopbit[4] = sched_data.stopbit[3] = 0; 6833 sched_data.cur += 2; 6834 sched_data.first_slot = 3; 6835 for (i = 0; i < NR_PACKETS; i++) 6836 { 6837 const struct ia64_packet *p = packets + i; 6838 if (p->t[0] == TYPE_M && p->t[1] == TYPE_F && p->t[2] == TYPE_B) 6839 { 6840 sched_data.packet = p; 6841 break; 6842 } 6843 } 6844 rotate_one_bundle (sched_verbose ? dump : NULL); 6845 6846 best = 6; 6847 for (i = 0; i < NR_PACKETS; i++) 6848 { 6849 const struct ia64_packet *p = packets + i; 6850 int split = get_split (p, sched_data.first_slot); 6851 int next; 6852 6853 /* Disallow multiway branches here. */ 6854 if (p->t[1] == TYPE_B) 6855 continue; 6856 6857 if (packet_matches_p (p, split, &next) && next < best) 6858 { 6859 best = next; 6860 sched_data.packet = p; 6861 sched_data.split = split; 6862 } 6863 } 6864 if (best == 6) 6865 abort (); 6866 } 6867 6868 if (*pn_ready > 0) 6869 { 6870 int more = ia64_internal_sched_reorder (dump, sched_verbose, 6871 ready, pn_ready, 1, 6872 clock_var); 6873 if (more) 6874 return more; 6875 /* Did we schedule a stop? If so, finish this cycle. */ 6876 if (sched_data.cur == sched_data.first_slot) 6877 return 0; 6878 } 6879 6880 if (sched_verbose) 6881 fprintf (dump, "// Can't issue more this cycle; updating type array.\n"); 6882 6883 cycle_end_fill_slots (sched_verbose ? dump : NULL); 6884 if (sched_verbose) 6885 dump_current_packet (dump); 6886 return 0; 6887} 6888 6889/* We are about to issue INSN. Return the number of insns left on the 6890 ready queue that can be issued this cycle. */ 6891 6892static int 6893ia64_variable_issue (dump, sched_verbose, insn, can_issue_more) 6894 FILE *dump; 6895 int sched_verbose; 6896 rtx insn; 6897 int can_issue_more ATTRIBUTE_UNUSED; 6898{ 6899 enum attr_type t = ia64_safe_type (insn); 6900 6901 if (sched_data.last_was_stop) 6902 { 6903 int t = sched_data.first_slot; 6904 if (t == 0) 6905 t = 3; 6906 ia64_emit_insn_before (gen_insn_group_barrier (GEN_INT (t)), insn); 6907 init_insn_group_barriers (); 6908 sched_data.last_was_stop = 0; 6909 } 6910 6911 if (t == TYPE_UNKNOWN) 6912 { 6913 if (sched_verbose) 6914 fprintf (dump, "// Ignoring type %s\n", type_names[t]); 6915 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 6916 || asm_noperands (PATTERN (insn)) >= 0) 6917 { 6918 /* This must be some kind of asm. Clear the scheduling state. */ 6919 rotate_two_bundles (sched_verbose ? dump : NULL); 6920 if (ia64_final_schedule) 6921 group_barrier_needed_p (insn); 6922 } 6923 return 1; 6924 } 6925 6926 /* This is _not_ just a sanity check. group_barrier_needed_p will update 6927 important state info. Don't delete this test. */ 6928 if (ia64_final_schedule 6929 && group_barrier_needed_p (insn)) 6930 abort (); 6931 6932 sched_data.stopbit[sched_data.cur] = 0; 6933 sched_data.insns[sched_data.cur] = insn; 6934 sched_data.types[sched_data.cur] = t; 6935 6936 sched_data.cur++; 6937 if (sched_verbose) 6938 fprintf (dump, "// Scheduling insn %d of type %s\n", 6939 INSN_UID (insn), type_names[t]); 6940 6941 if (GET_CODE (insn) == CALL_INSN && ia64_final_schedule) 6942 { 6943 schedule_stop (sched_verbose ? dump : NULL); 6944 sched_data.last_was_stop = 1; 6945 } 6946 6947 return 1; 6948} 6949 6950/* Free data allocated by ia64_sched_init. */ 6951 6952static void 6953ia64_sched_finish (dump, sched_verbose) 6954 FILE *dump; 6955 int sched_verbose; 6956{ 6957 if (sched_verbose) 6958 fprintf (dump, "// Finishing schedule.\n"); 6959 rotate_two_bundles (NULL); 6960 free (sched_types); 6961 free (sched_ready); 6962} 6963 6964/* Emit pseudo-ops for the assembler to describe predicate relations. 6965 At present this assumes that we only consider predicate pairs to 6966 be mutex, and that the assembler can deduce proper values from 6967 straight-line code. */ 6968 6969static void 6970emit_predicate_relation_info () 6971{ 6972 basic_block bb; 6973 6974 FOR_EACH_BB_REVERSE (bb) 6975 { 6976 int r; 6977 rtx head = bb->head; 6978 6979 /* We only need such notes at code labels. */ 6980 if (GET_CODE (head) != CODE_LABEL) 6981 continue; 6982 if (GET_CODE (NEXT_INSN (head)) == NOTE 6983 && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) 6984 head = NEXT_INSN (head); 6985 6986 for (r = PR_REG (0); r < PR_REG (64); r += 2) 6987 if (REGNO_REG_SET_P (bb->global_live_at_start, r)) 6988 { 6989 rtx p = gen_rtx_REG (BImode, r); 6990 rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); 6991 if (head == bb->end) 6992 bb->end = n; 6993 head = n; 6994 } 6995 } 6996 6997 /* Look for conditional calls that do not return, and protect predicate 6998 relations around them. Otherwise the assembler will assume the call 6999 returns, and complain about uses of call-clobbered predicates after 7000 the call. */ 7001 FOR_EACH_BB_REVERSE (bb) 7002 { 7003 rtx insn = bb->head; 7004 7005 while (1) 7006 { 7007 if (GET_CODE (insn) == CALL_INSN 7008 && GET_CODE (PATTERN (insn)) == COND_EXEC 7009 && find_reg_note (insn, REG_NORETURN, NULL_RTX)) 7010 { 7011 rtx b = emit_insn_before (gen_safe_across_calls_all (), insn); 7012 rtx a = emit_insn_after (gen_safe_across_calls_normal (), insn); 7013 if (bb->head == insn) 7014 bb->head = b; 7015 if (bb->end == insn) 7016 bb->end = a; 7017 } 7018 7019 if (insn == bb->end) 7020 break; 7021 insn = NEXT_INSN (insn); 7022 } 7023 } 7024} 7025 7026/* Generate a NOP instruction of type T. We will never generate L type 7027 nops. */ 7028 7029static rtx 7030gen_nop_type (t) 7031 enum attr_type t; 7032{ 7033 switch (t) 7034 { 7035 case TYPE_M: 7036 return gen_nop_m (); 7037 case TYPE_I: 7038 return gen_nop_i (); 7039 case TYPE_B: 7040 return gen_nop_b (); 7041 case TYPE_F: 7042 return gen_nop_f (); 7043 case TYPE_X: 7044 return gen_nop_x (); 7045 default: 7046 abort (); 7047 } 7048} 7049 7050/* After the last scheduling pass, fill in NOPs. It's easier to do this 7051 here than while scheduling. */ 7052 7053static void 7054ia64_emit_nops () 7055{ 7056 rtx insn; 7057 const struct bundle *b = 0; 7058 int bundle_pos = 0; 7059 7060 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 7061 { 7062 rtx pat; 7063 enum attr_type t; 7064 pat = INSN_P (insn) ? PATTERN (insn) : const0_rtx; 7065 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) 7066 continue; 7067 if ((GET_CODE (pat) == UNSPEC && XINT (pat, 1) == UNSPEC_BUNDLE_SELECTOR) 7068 || GET_CODE (insn) == CODE_LABEL) 7069 { 7070 if (b) 7071 while (bundle_pos < 3) 7072 { 7073 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7074 bundle_pos++; 7075 } 7076 if (GET_CODE (insn) != CODE_LABEL) 7077 b = bundle + INTVAL (XVECEXP (pat, 0, 0)); 7078 else 7079 b = 0; 7080 bundle_pos = 0; 7081 continue; 7082 } 7083 else if (GET_CODE (pat) == UNSPEC_VOLATILE 7084 && XINT (pat, 1) == UNSPECV_INSN_GROUP_BARRIER) 7085 { 7086 int t = INTVAL (XVECEXP (pat, 0, 0)); 7087 if (b) 7088 while (bundle_pos < t) 7089 { 7090 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7091 bundle_pos++; 7092 } 7093 continue; 7094 } 7095 7096 if (bundle_pos == 3) 7097 b = 0; 7098 7099 if (b && INSN_P (insn)) 7100 { 7101 t = ia64_safe_type (insn); 7102 if (asm_noperands (PATTERN (insn)) >= 0 7103 || GET_CODE (PATTERN (insn)) == ASM_INPUT) 7104 { 7105 while (bundle_pos < 3) 7106 { 7107 if (b->t[bundle_pos] != TYPE_L) 7108 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7109 bundle_pos++; 7110 } 7111 continue; 7112 } 7113 7114 if (t == TYPE_UNKNOWN) 7115 continue; 7116 while (bundle_pos < 3) 7117 { 7118 if (t == b->t[bundle_pos] 7119 || (t == TYPE_A && (b->t[bundle_pos] == TYPE_M 7120 || b->t[bundle_pos] == TYPE_I))) 7121 break; 7122 7123 emit_insn_before (gen_nop_type (b->t[bundle_pos]), insn); 7124 bundle_pos++; 7125 } 7126 if (bundle_pos < 3) 7127 bundle_pos++; 7128 } 7129 } 7130} 7131 7132/* Perform machine dependent operations on the rtl chain INSNS. */ 7133 7134void 7135ia64_reorg (insns) 7136 rtx insns; 7137{ 7138 /* We are freeing block_for_insn in the toplev to keep compatibility 7139 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 7140 compute_bb_for_insn (); 7141 7142 /* If optimizing, we'll have split before scheduling. */ 7143 if (optimize == 0) 7144 split_all_insns (0); 7145 7146 /* ??? update_life_info_in_dirty_blocks fails to terminate during 7147 non-optimizing bootstrap. */ 7148 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES); 7149 7150 if (ia64_flag_schedule_insns2) 7151 { 7152 timevar_push (TV_SCHED2); 7153 ia64_final_schedule = 1; 7154 schedule_ebbs (rtl_dump_file); 7155 ia64_final_schedule = 0; 7156 timevar_pop (TV_SCHED2); 7157 7158 /* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same 7159 place as they were during scheduling. */ 7160 emit_insn_group_barriers (rtl_dump_file, insns); 7161 ia64_emit_nops (); 7162 } 7163 else 7164 emit_all_insn_group_barriers (rtl_dump_file, insns); 7165 7166 /* A call must not be the last instruction in a function, so that the 7167 return address is still within the function, so that unwinding works 7168 properly. Note that IA-64 differs from dwarf2 on this point. */ 7169 if (flag_unwind_tables || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7170 { 7171 rtx insn; 7172 int saw_stop = 0; 7173 7174 insn = get_last_insn (); 7175 if (! INSN_P (insn)) 7176 insn = prev_active_insn (insn); 7177 if (GET_CODE (insn) == INSN 7178 && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 7179 && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER) 7180 { 7181 saw_stop = 1; 7182 insn = prev_active_insn (insn); 7183 } 7184 if (GET_CODE (insn) == CALL_INSN) 7185 { 7186 if (! saw_stop) 7187 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7188 emit_insn (gen_break_f ()); 7189 emit_insn (gen_insn_group_barrier (GEN_INT (3))); 7190 } 7191 } 7192 7193 fixup_errata (); 7194 emit_predicate_relation_info (); 7195} 7196 7197/* Return true if REGNO is used by the epilogue. */ 7198 7199int 7200ia64_epilogue_uses (regno) 7201 int regno; 7202{ 7203 switch (regno) 7204 { 7205 case R_GR (1): 7206 /* With a call to a function in another module, we will write a new 7207 value to "gp". After returning from such a call, we need to make 7208 sure the function restores the original gp-value, even if the 7209 function itself does not use the gp anymore. */ 7210 return !(TARGET_AUTO_PIC || TARGET_NO_PIC); 7211 7212 case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3): 7213 case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7): 7214 /* For functions defined with the syscall_linkage attribute, all 7215 input registers are marked as live at all function exits. This 7216 prevents the register allocator from using the input registers, 7217 which in turn makes it possible to restart a system call after 7218 an interrupt without having to save/restore the input registers. 7219 This also prevents kernel data from leaking to application code. */ 7220 return lookup_attribute ("syscall_linkage", 7221 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL; 7222 7223 case R_BR (0): 7224 /* Conditional return patterns can't represent the use of `b0' as 7225 the return address, so we force the value live this way. */ 7226 return 1; 7227 7228 case AR_PFS_REGNUM: 7229 /* Likewise for ar.pfs, which is used by br.ret. */ 7230 return 1; 7231 7232 default: 7233 return 0; 7234 } 7235} 7236 7237/* Return true if REGNO is used by the frame unwinder. */ 7238 7239int 7240ia64_eh_uses (regno) 7241 int regno; 7242{ 7243 if (! reload_completed) 7244 return 0; 7245 7246 if (current_frame_info.reg_save_b0 7247 && regno == current_frame_info.reg_save_b0) 7248 return 1; 7249 if (current_frame_info.reg_save_pr 7250 && regno == current_frame_info.reg_save_pr) 7251 return 1; 7252 if (current_frame_info.reg_save_ar_pfs 7253 && regno == current_frame_info.reg_save_ar_pfs) 7254 return 1; 7255 if (current_frame_info.reg_save_ar_unat 7256 && regno == current_frame_info.reg_save_ar_unat) 7257 return 1; 7258 if (current_frame_info.reg_save_ar_lc 7259 && regno == current_frame_info.reg_save_ar_lc) 7260 return 1; 7261 7262 return 0; 7263} 7264 7265/* For ia64, SYMBOL_REF_FLAG set means that it is a function. 7266 7267 We add @ to the name if this goes in small data/bss. We can only put 7268 a variable in small data/bss if it is defined in this module or a module 7269 that we are statically linked with. We can't check the second condition, 7270 but TREE_STATIC gives us the first one. */ 7271 7272/* ??? If we had IPA, we could check the second condition. We could support 7273 programmer added section attributes if the variable is not defined in this 7274 module. */ 7275 7276/* ??? See the v850 port for a cleaner way to do this. */ 7277 7278/* ??? We could also support own long data here. Generating movl/add/ld8 7279 instead of addl,ld8/ld8. This makes the code bigger, but should make the 7280 code faster because there is one less load. This also includes incomplete 7281 types which can't go in sdata/sbss. */ 7282 7283static bool 7284ia64_in_small_data_p (exp) 7285 tree exp; 7286{ 7287 if (TARGET_NO_SDATA) 7288 return false; 7289 7290 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 7291 { 7292 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 7293 if (strcmp (section, ".sdata") == 0 7294 || strcmp (section, ".sbss") == 0) 7295 return true; 7296 } 7297 else 7298 { 7299 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 7300 7301 /* If this is an incomplete type with size 0, then we can't put it 7302 in sdata because it might be too big when completed. */ 7303 if (size > 0 && size <= ia64_section_threshold) 7304 return true; 7305 } 7306 7307 return false; 7308} 7309 7310static void 7311ia64_encode_section_info (decl, first) 7312 tree decl; 7313 int first ATTRIBUTE_UNUSED; 7314{ 7315 const char *symbol_str; 7316 bool is_local; 7317 rtx symbol; 7318 char encoding = 0; 7319 7320 if (TREE_CODE (decl) == FUNCTION_DECL) 7321 { 7322 SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; 7323 return; 7324 } 7325 7326 /* Careful not to prod global register variables. */ 7327 if (TREE_CODE (decl) != VAR_DECL 7328 || GET_CODE (DECL_RTL (decl)) != MEM 7329 || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) 7330 return; 7331 7332 symbol = XEXP (DECL_RTL (decl), 0); 7333 symbol_str = XSTR (symbol, 0); 7334 7335 is_local = (*targetm.binds_local_p) (decl); 7336 7337 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) 7338 encoding = " GLil"[decl_tls_model (decl)]; 7339 /* Determine if DECL will wind up in .sdata/.sbss. */ 7340 else if (is_local && ia64_in_small_data_p (decl)) 7341 encoding = 's'; 7342 7343 /* Finally, encode this into the symbol string. */ 7344 if (encoding) 7345 { 7346 char *newstr; 7347 size_t len; 7348 7349 if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7350 { 7351 if (encoding == symbol_str[1]) 7352 return; 7353 /* ??? Sdata became thread or thread becaome not thread. Lose. */ 7354 abort (); 7355 } 7356 7357 len = strlen (symbol_str); 7358 newstr = alloca (len + 3); 7359 newstr[0] = ENCODE_SECTION_INFO_CHAR; 7360 newstr[1] = encoding; 7361 memcpy (newstr + 2, symbol_str, len + 1); 7362 7363 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2); 7364 } 7365 7366 /* This decl is marked as being in small data/bss but it shouldn't be; 7367 one likely explanation for this is that the decl has been moved into 7368 a different section from the one it was in when encode_section_info 7369 was first called. Remove the encoding. */ 7370 else if (symbol_str[0] == ENCODE_SECTION_INFO_CHAR) 7371 XSTR (symbol, 0) = ggc_strdup (symbol_str + 2); 7372} 7373 7374static const char * 7375ia64_strip_name_encoding (str) 7376 const char *str; 7377{ 7378 if (str[0] == ENCODE_SECTION_INFO_CHAR) 7379 str += 2; 7380 if (str[0] == '*') 7381 str++; 7382 return str; 7383} 7384 7385/* True if it is OK to do sibling call optimization for the specified 7386 call expression EXP. DECL will be the called function, or NULL if 7387 this is an indirect call. */ 7388bool 7389ia64_function_ok_for_sibcall (decl) 7390 tree decl; 7391{ 7392 /* We must always return with our current GP. This means we can 7393 only sibcall to functions defined in the current module. */ 7394 return decl && (*targetm.binds_local_p) (decl); 7395} 7396 7397/* Output assembly directives for prologue regions. */ 7398 7399/* The current basic block number. */ 7400 7401static bool last_block; 7402 7403/* True if we need a copy_state command at the start of the next block. */ 7404 7405static bool need_copy_state; 7406 7407/* The function emits unwind directives for the start of an epilogue. */ 7408 7409static void 7410process_epilogue () 7411{ 7412 /* If this isn't the last block of the function, then we need to label the 7413 current state, and copy it back in at the start of the next block. */ 7414 7415 if (!last_block) 7416 { 7417 fprintf (asm_out_file, "\t.label_state 1\n"); 7418 need_copy_state = true; 7419 } 7420 7421 fprintf (asm_out_file, "\t.restore sp\n"); 7422} 7423 7424/* This function processes a SET pattern looking for specific patterns 7425 which result in emitting an assembly directive required for unwinding. */ 7426 7427static int 7428process_set (asm_out_file, pat) 7429 FILE *asm_out_file; 7430 rtx pat; 7431{ 7432 rtx src = SET_SRC (pat); 7433 rtx dest = SET_DEST (pat); 7434 int src_regno, dest_regno; 7435 7436 /* Look for the ALLOC insn. */ 7437 if (GET_CODE (src) == UNSPEC_VOLATILE 7438 && XINT (src, 1) == UNSPECV_ALLOC 7439 && GET_CODE (dest) == REG) 7440 { 7441 dest_regno = REGNO (dest); 7442 7443 /* If this isn't the final destination for ar.pfs, the alloc 7444 shouldn't have been marked frame related. */ 7445 if (dest_regno != current_frame_info.reg_save_ar_pfs) 7446 abort (); 7447 7448 fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", 7449 ia64_dbx_register_number (dest_regno)); 7450 return 1; 7451 } 7452 7453 /* Look for SP = .... */ 7454 if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) 7455 { 7456 if (GET_CODE (src) == PLUS) 7457 { 7458 rtx op0 = XEXP (src, 0); 7459 rtx op1 = XEXP (src, 1); 7460 if (op0 == dest && GET_CODE (op1) == CONST_INT) 7461 { 7462 if (INTVAL (op1) < 0) 7463 { 7464 fputs ("\t.fframe ", asm_out_file); 7465 fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, 7466 -INTVAL (op1)); 7467 fputc ('\n', asm_out_file); 7468 } 7469 else 7470 process_epilogue (); 7471 } 7472 else 7473 abort (); 7474 } 7475 else if (GET_CODE (src) == REG 7476 && REGNO (src) == HARD_FRAME_POINTER_REGNUM) 7477 process_epilogue (); 7478 else 7479 abort (); 7480 7481 return 1; 7482 } 7483 7484 /* Register move we need to look at. */ 7485 if (GET_CODE (dest) == REG && GET_CODE (src) == REG) 7486 { 7487 src_regno = REGNO (src); 7488 dest_regno = REGNO (dest); 7489 7490 switch (src_regno) 7491 { 7492 case BR_REG (0): 7493 /* Saving return address pointer. */ 7494 if (dest_regno != current_frame_info.reg_save_b0) 7495 abort (); 7496 fprintf (asm_out_file, "\t.save rp, r%d\n", 7497 ia64_dbx_register_number (dest_regno)); 7498 return 1; 7499 7500 case PR_REG (0): 7501 if (dest_regno != current_frame_info.reg_save_pr) 7502 abort (); 7503 fprintf (asm_out_file, "\t.save pr, r%d\n", 7504 ia64_dbx_register_number (dest_regno)); 7505 return 1; 7506 7507 case AR_UNAT_REGNUM: 7508 if (dest_regno != current_frame_info.reg_save_ar_unat) 7509 abort (); 7510 fprintf (asm_out_file, "\t.save ar.unat, r%d\n", 7511 ia64_dbx_register_number (dest_regno)); 7512 return 1; 7513 7514 case AR_LC_REGNUM: 7515 if (dest_regno != current_frame_info.reg_save_ar_lc) 7516 abort (); 7517 fprintf (asm_out_file, "\t.save ar.lc, r%d\n", 7518 ia64_dbx_register_number (dest_regno)); 7519 return 1; 7520 7521 case STACK_POINTER_REGNUM: 7522 if (dest_regno != HARD_FRAME_POINTER_REGNUM 7523 || ! frame_pointer_needed) 7524 abort (); 7525 fprintf (asm_out_file, "\t.vframe r%d\n", 7526 ia64_dbx_register_number (dest_regno)); 7527 return 1; 7528 7529 default: 7530 /* Everything else should indicate being stored to memory. */ 7531 abort (); 7532 } 7533 } 7534 7535 /* Memory store we need to look at. */ 7536 if (GET_CODE (dest) == MEM && GET_CODE (src) == REG) 7537 { 7538 long off; 7539 rtx base; 7540 const char *saveop; 7541 7542 if (GET_CODE (XEXP (dest, 0)) == REG) 7543 { 7544 base = XEXP (dest, 0); 7545 off = 0; 7546 } 7547 else if (GET_CODE (XEXP (dest, 0)) == PLUS 7548 && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT) 7549 { 7550 base = XEXP (XEXP (dest, 0), 0); 7551 off = INTVAL (XEXP (XEXP (dest, 0), 1)); 7552 } 7553 else 7554 abort (); 7555 7556 if (base == hard_frame_pointer_rtx) 7557 { 7558 saveop = ".savepsp"; 7559 off = - off; 7560 } 7561 else if (base == stack_pointer_rtx) 7562 saveop = ".savesp"; 7563 else 7564 abort (); 7565 7566 src_regno = REGNO (src); 7567 switch (src_regno) 7568 { 7569 case BR_REG (0): 7570 if (current_frame_info.reg_save_b0 != 0) 7571 abort (); 7572 fprintf (asm_out_file, "\t%s rp, %ld\n", saveop, off); 7573 return 1; 7574 7575 case PR_REG (0): 7576 if (current_frame_info.reg_save_pr != 0) 7577 abort (); 7578 fprintf (asm_out_file, "\t%s pr, %ld\n", saveop, off); 7579 return 1; 7580 7581 case AR_LC_REGNUM: 7582 if (current_frame_info.reg_save_ar_lc != 0) 7583 abort (); 7584 fprintf (asm_out_file, "\t%s ar.lc, %ld\n", saveop, off); 7585 return 1; 7586 7587 case AR_PFS_REGNUM: 7588 if (current_frame_info.reg_save_ar_pfs != 0) 7589 abort (); 7590 fprintf (asm_out_file, "\t%s ar.pfs, %ld\n", saveop, off); 7591 return 1; 7592 7593 case AR_UNAT_REGNUM: 7594 if (current_frame_info.reg_save_ar_unat != 0) 7595 abort (); 7596 fprintf (asm_out_file, "\t%s ar.unat, %ld\n", saveop, off); 7597 return 1; 7598 7599 case GR_REG (4): 7600 case GR_REG (5): 7601 case GR_REG (6): 7602 case GR_REG (7): 7603 fprintf (asm_out_file, "\t.save.g 0x%x\n", 7604 1 << (src_regno - GR_REG (4))); 7605 return 1; 7606 7607 case BR_REG (1): 7608 case BR_REG (2): 7609 case BR_REG (3): 7610 case BR_REG (4): 7611 case BR_REG (5): 7612 fprintf (asm_out_file, "\t.save.b 0x%x\n", 7613 1 << (src_regno - BR_REG (1))); 7614 return 1; 7615 7616 case FR_REG (2): 7617 case FR_REG (3): 7618 case FR_REG (4): 7619 case FR_REG (5): 7620 fprintf (asm_out_file, "\t.save.f 0x%x\n", 7621 1 << (src_regno - FR_REG (2))); 7622 return 1; 7623 7624 case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19): 7625 case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23): 7626 case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27): 7627 case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31): 7628 fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 7629 1 << (src_regno - FR_REG (12))); 7630 return 1; 7631 7632 default: 7633 return 0; 7634 } 7635 } 7636 7637 return 0; 7638} 7639 7640 7641/* This function looks at a single insn and emits any directives 7642 required to unwind this insn. */ 7643void 7644process_for_unwind_directive (asm_out_file, insn) 7645 FILE *asm_out_file; 7646 rtx insn; 7647{ 7648 if (flag_unwind_tables 7649 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)) 7650 { 7651 rtx pat; 7652 7653 if (GET_CODE (insn) == NOTE 7654 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_BASIC_BLOCK) 7655 { 7656 last_block = NOTE_BASIC_BLOCK (insn)->next_bb == EXIT_BLOCK_PTR; 7657 7658 /* Restore unwind state from immediately before the epilogue. */ 7659 if (need_copy_state) 7660 { 7661 fprintf (asm_out_file, "\t.body\n"); 7662 fprintf (asm_out_file, "\t.copy_state 1\n"); 7663 need_copy_state = false; 7664 } 7665 } 7666 7667 if (GET_CODE (insn) == NOTE || ! RTX_FRAME_RELATED_P (insn)) 7668 return; 7669 7670 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX); 7671 if (pat) 7672 pat = XEXP (pat, 0); 7673 else 7674 pat = PATTERN (insn); 7675 7676 switch (GET_CODE (pat)) 7677 { 7678 case SET: 7679 process_set (asm_out_file, pat); 7680 break; 7681 7682 case PARALLEL: 7683 { 7684 int par_index; 7685 int limit = XVECLEN (pat, 0); 7686 for (par_index = 0; par_index < limit; par_index++) 7687 { 7688 rtx x = XVECEXP (pat, 0, par_index); 7689 if (GET_CODE (x) == SET) 7690 process_set (asm_out_file, x); 7691 } 7692 break; 7693 } 7694 7695 default: 7696 abort (); 7697 } 7698 } 7699} 7700 7701 7702void 7703ia64_init_builtins () 7704{ 7705 tree psi_type_node = build_pointer_type (integer_type_node); 7706 tree pdi_type_node = build_pointer_type (long_integer_type_node); 7707 7708 /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ 7709 tree si_ftype_psi_si_si 7710 = build_function_type_list (integer_type_node, 7711 psi_type_node, integer_type_node, 7712 integer_type_node, NULL_TREE); 7713 7714 /* __sync_val_compare_and_swap_di */ 7715 tree di_ftype_pdi_di_di 7716 = build_function_type_list (long_integer_type_node, 7717 pdi_type_node, long_integer_type_node, 7718 long_integer_type_node, NULL_TREE); 7719 /* __sync_bool_compare_and_swap_di */ 7720 tree si_ftype_pdi_di_di 7721 = build_function_type_list (integer_type_node, 7722 pdi_type_node, long_integer_type_node, 7723 long_integer_type_node, NULL_TREE); 7724 /* __sync_synchronize */ 7725 tree void_ftype_void 7726 = build_function_type (void_type_node, void_list_node); 7727 7728 /* __sync_lock_test_and_set_si */ 7729 tree si_ftype_psi_si 7730 = build_function_type_list (integer_type_node, 7731 psi_type_node, integer_type_node, NULL_TREE); 7732 7733 /* __sync_lock_test_and_set_di */ 7734 tree di_ftype_pdi_di 7735 = build_function_type_list (long_integer_type_node, 7736 pdi_type_node, long_integer_type_node, 7737 NULL_TREE); 7738 7739 /* __sync_lock_release_si */ 7740 tree void_ftype_psi 7741 = build_function_type_list (void_type_node, psi_type_node, NULL_TREE); 7742 7743 /* __sync_lock_release_di */ 7744 tree void_ftype_pdi 7745 = build_function_type_list (void_type_node, pdi_type_node, NULL_TREE); 7746 7747#define def_builtin(name, type, code) \ 7748 builtin_function ((name), (type), (code), BUILT_IN_MD, NULL, NULL_TREE) 7749 7750 def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, 7751 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); 7752 def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, 7753 IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); 7754 def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, 7755 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); 7756 def_builtin ("__sync_bool_compare_and_swap_di", si_ftype_pdi_di_di, 7757 IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); 7758 7759 def_builtin ("__sync_synchronize", void_ftype_void, 7760 IA64_BUILTIN_SYNCHRONIZE); 7761 7762 def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, 7763 IA64_BUILTIN_LOCK_TEST_AND_SET_SI); 7764 def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, 7765 IA64_BUILTIN_LOCK_TEST_AND_SET_DI); 7766 def_builtin ("__sync_lock_release_si", void_ftype_psi, 7767 IA64_BUILTIN_LOCK_RELEASE_SI); 7768 def_builtin ("__sync_lock_release_di", void_ftype_pdi, 7769 IA64_BUILTIN_LOCK_RELEASE_DI); 7770 7771 def_builtin ("__builtin_ia64_bsp", 7772 build_function_type (ptr_type_node, void_list_node), 7773 IA64_BUILTIN_BSP); 7774 7775 def_builtin ("__builtin_ia64_flushrs", 7776 build_function_type (void_type_node, void_list_node), 7777 IA64_BUILTIN_FLUSHRS); 7778 7779 def_builtin ("__sync_fetch_and_add_si", si_ftype_psi_si, 7780 IA64_BUILTIN_FETCH_AND_ADD_SI); 7781 def_builtin ("__sync_fetch_and_sub_si", si_ftype_psi_si, 7782 IA64_BUILTIN_FETCH_AND_SUB_SI); 7783 def_builtin ("__sync_fetch_and_or_si", si_ftype_psi_si, 7784 IA64_BUILTIN_FETCH_AND_OR_SI); 7785 def_builtin ("__sync_fetch_and_and_si", si_ftype_psi_si, 7786 IA64_BUILTIN_FETCH_AND_AND_SI); 7787 def_builtin ("__sync_fetch_and_xor_si", si_ftype_psi_si, 7788 IA64_BUILTIN_FETCH_AND_XOR_SI); 7789 def_builtin ("__sync_fetch_and_nand_si", si_ftype_psi_si, 7790 IA64_BUILTIN_FETCH_AND_NAND_SI); 7791 7792 def_builtin ("__sync_add_and_fetch_si", si_ftype_psi_si, 7793 IA64_BUILTIN_ADD_AND_FETCH_SI); 7794 def_builtin ("__sync_sub_and_fetch_si", si_ftype_psi_si, 7795 IA64_BUILTIN_SUB_AND_FETCH_SI); 7796 def_builtin ("__sync_or_and_fetch_si", si_ftype_psi_si, 7797 IA64_BUILTIN_OR_AND_FETCH_SI); 7798 def_builtin ("__sync_and_and_fetch_si", si_ftype_psi_si, 7799 IA64_BUILTIN_AND_AND_FETCH_SI); 7800 def_builtin ("__sync_xor_and_fetch_si", si_ftype_psi_si, 7801 IA64_BUILTIN_XOR_AND_FETCH_SI); 7802 def_builtin ("__sync_nand_and_fetch_si", si_ftype_psi_si, 7803 IA64_BUILTIN_NAND_AND_FETCH_SI); 7804 7805 def_builtin ("__sync_fetch_and_add_di", di_ftype_pdi_di, 7806 IA64_BUILTIN_FETCH_AND_ADD_DI); 7807 def_builtin ("__sync_fetch_and_sub_di", di_ftype_pdi_di, 7808 IA64_BUILTIN_FETCH_AND_SUB_DI); 7809 def_builtin ("__sync_fetch_and_or_di", di_ftype_pdi_di, 7810 IA64_BUILTIN_FETCH_AND_OR_DI); 7811 def_builtin ("__sync_fetch_and_and_di", di_ftype_pdi_di, 7812 IA64_BUILTIN_FETCH_AND_AND_DI); 7813 def_builtin ("__sync_fetch_and_xor_di", di_ftype_pdi_di, 7814 IA64_BUILTIN_FETCH_AND_XOR_DI); 7815 def_builtin ("__sync_fetch_and_nand_di", di_ftype_pdi_di, 7816 IA64_BUILTIN_FETCH_AND_NAND_DI); 7817 7818 def_builtin ("__sync_add_and_fetch_di", di_ftype_pdi_di, 7819 IA64_BUILTIN_ADD_AND_FETCH_DI); 7820 def_builtin ("__sync_sub_and_fetch_di", di_ftype_pdi_di, 7821 IA64_BUILTIN_SUB_AND_FETCH_DI); 7822 def_builtin ("__sync_or_and_fetch_di", di_ftype_pdi_di, 7823 IA64_BUILTIN_OR_AND_FETCH_DI); 7824 def_builtin ("__sync_and_and_fetch_di", di_ftype_pdi_di, 7825 IA64_BUILTIN_AND_AND_FETCH_DI); 7826 def_builtin ("__sync_xor_and_fetch_di", di_ftype_pdi_di, 7827 IA64_BUILTIN_XOR_AND_FETCH_DI); 7828 def_builtin ("__sync_nand_and_fetch_di", di_ftype_pdi_di, 7829 IA64_BUILTIN_NAND_AND_FETCH_DI); 7830 7831#undef def_builtin 7832} 7833 7834/* Expand fetch_and_op intrinsics. The basic code sequence is: 7835 7836 mf 7837 tmp = [ptr]; 7838 do { 7839 ret = tmp; 7840 ar.ccv = tmp; 7841 tmp <op>= value; 7842 cmpxchgsz.acq tmp = [ptr], tmp 7843 } while (tmp != ret) 7844*/ 7845 7846static rtx 7847ia64_expand_fetch_and_op (binoptab, mode, arglist, target) 7848 optab binoptab; 7849 enum machine_mode mode; 7850 tree arglist; 7851 rtx target; 7852{ 7853 rtx ret, label, tmp, ccv, insn, mem, value; 7854 tree arg0, arg1; 7855 7856 arg0 = TREE_VALUE (arglist); 7857 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7858 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7859#ifdef POINTERS_EXTEND_UNSIGNED 7860 if (GET_MODE(mem) != Pmode) 7861 mem = convert_memory_address (Pmode, mem); 7862#endif 7863 value = expand_expr (arg1, NULL_RTX, mode, 0); 7864 7865 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7866 MEM_VOLATILE_P (mem) = 1; 7867 7868 if (target && register_operand (target, mode)) 7869 ret = target; 7870 else 7871 ret = gen_reg_rtx (mode); 7872 7873 emit_insn (gen_mf ()); 7874 7875 /* Special case for fetchadd instructions. */ 7876 if (binoptab == add_optab && fetchadd_operand (value, VOIDmode)) 7877 { 7878 if (mode == SImode) 7879 insn = gen_fetchadd_acq_si (ret, mem, value); 7880 else 7881 insn = gen_fetchadd_acq_di (ret, mem, value); 7882 emit_insn (insn); 7883 return ret; 7884 } 7885 7886 tmp = gen_reg_rtx (mode); 7887 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7888 emit_move_insn (tmp, mem); 7889 7890 label = gen_label_rtx (); 7891 emit_label (label); 7892 emit_move_insn (ret, tmp); 7893 emit_move_insn (ccv, tmp); 7894 7895 /* Perform the specific operation. Special case NAND by noticing 7896 one_cmpl_optab instead. */ 7897 if (binoptab == one_cmpl_optab) 7898 { 7899 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7900 binoptab = and_optab; 7901 } 7902 tmp = expand_binop (mode, binoptab, tmp, value, tmp, 1, OPTAB_WIDEN); 7903 7904 if (mode == SImode) 7905 insn = gen_cmpxchg_acq_si (tmp, mem, tmp, ccv); 7906 else 7907 insn = gen_cmpxchg_acq_di (tmp, mem, tmp, ccv); 7908 emit_insn (insn); 7909 7910 emit_cmp_and_jump_insns (tmp, ret, NE, 0, mode, 1, label); 7911 7912 return ret; 7913} 7914 7915/* Expand op_and_fetch intrinsics. The basic code sequence is: 7916 7917 mf 7918 tmp = [ptr]; 7919 do { 7920 old = tmp; 7921 ar.ccv = tmp; 7922 ret = tmp <op> value; 7923 cmpxchgsz.acq tmp = [ptr], ret 7924 } while (tmp != old) 7925*/ 7926 7927static rtx 7928ia64_expand_op_and_fetch (binoptab, mode, arglist, target) 7929 optab binoptab; 7930 enum machine_mode mode; 7931 tree arglist; 7932 rtx target; 7933{ 7934 rtx old, label, tmp, ret, ccv, insn, mem, value; 7935 tree arg0, arg1; 7936 7937 arg0 = TREE_VALUE (arglist); 7938 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 7939 mem = expand_expr (arg0, NULL_RTX, Pmode, 0); 7940#ifdef POINTERS_EXTEND_UNSIGNED 7941 if (GET_MODE(mem) != Pmode) 7942 mem = convert_memory_address (Pmode, mem); 7943#endif 7944 7945 value = expand_expr (arg1, NULL_RTX, mode, 0); 7946 7947 mem = gen_rtx_MEM (mode, force_reg (Pmode, mem)); 7948 MEM_VOLATILE_P (mem) = 1; 7949 7950 if (target && ! register_operand (target, mode)) 7951 target = NULL_RTX; 7952 7953 emit_insn (gen_mf ()); 7954 tmp = gen_reg_rtx (mode); 7955 old = gen_reg_rtx (mode); 7956 ccv = gen_rtx_REG (mode, AR_CCV_REGNUM); 7957 7958 emit_move_insn (tmp, mem); 7959 7960 label = gen_label_rtx (); 7961 emit_label (label); 7962 emit_move_insn (old, tmp); 7963 emit_move_insn (ccv, tmp); 7964 7965 /* Perform the specific operation. Special case NAND by noticing 7966 one_cmpl_optab instead. */ 7967 if (binoptab == one_cmpl_optab) 7968 { 7969 tmp = expand_unop (mode, binoptab, tmp, NULL, OPTAB_WIDEN); 7970 binoptab = and_optab; 7971 } 7972 ret = expand_binop (mode, binoptab, tmp, value, target, 1, OPTAB_WIDEN); 7973 7974 if (mode == SImode) 7975 insn = gen_cmpxchg_acq_si (tmp, mem, ret, ccv); 7976 else 7977 insn = gen_cmpxchg_acq_di (tmp, mem, ret, ccv); 7978 emit_insn (insn); 7979 7980 emit_cmp_and_jump_insns (tmp, old, NE, 0, mode, 1, label); 7981 7982 return ret; 7983} 7984 7985/* Expand val_ and bool_compare_and_swap. For val_ we want: 7986 7987 ar.ccv = oldval 7988 mf 7989 cmpxchgsz.acq ret = [ptr], newval, ar.ccv 7990 return ret 7991 7992 For bool_ it's the same except return ret == oldval. 7993*/ 7994 7995static rtx 7996ia64_expand_compare_and_swap (rmode, mode, boolp, arglist, target) 7997 enum machine_mode rmode; 7998 enum machine_mode mode; 7999 int boolp; 8000 tree arglist; 8001 rtx target; 8002{ 8003 tree arg0, arg1, arg2; 8004 rtx mem, old, new, ccv, tmp, insn; 8005 8006 arg0 = TREE_VALUE (arglist); 8007 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 8008 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); 8009 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8010 old = expand_expr (arg1, NULL_RTX, mode, 0); 8011 new = expand_expr (arg2, NULL_RTX, mode, 0); 8012 8013 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8014 MEM_VOLATILE_P (mem) = 1; 8015 8016 if (! register_operand (old, mode)) 8017 old = copy_to_mode_reg (mode, old); 8018 if (! register_operand (new, mode)) 8019 new = copy_to_mode_reg (mode, new); 8020 8021 if (! boolp && target && register_operand (target, mode)) 8022 tmp = target; 8023 else 8024 tmp = gen_reg_rtx (mode); 8025 8026 ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM); 8027 if (mode == DImode) 8028 emit_move_insn (ccv, old); 8029 else 8030 { 8031 rtx ccvtmp = gen_reg_rtx (DImode); 8032 emit_insn (gen_zero_extendsidi2 (ccvtmp, old)); 8033 emit_move_insn (ccv, ccvtmp); 8034 } 8035 emit_insn (gen_mf ()); 8036 if (mode == SImode) 8037 insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv); 8038 else 8039 insn = gen_cmpxchg_acq_di (tmp, mem, new, ccv); 8040 emit_insn (insn); 8041 8042 if (boolp) 8043 { 8044 if (! target) 8045 target = gen_reg_rtx (rmode); 8046 return emit_store_flag_force (target, EQ, tmp, old, mode, 1, 1); 8047 } 8048 else 8049 return tmp; 8050} 8051 8052/* Expand lock_test_and_set. I.e. `xchgsz ret = [ptr], new'. */ 8053 8054static rtx 8055ia64_expand_lock_test_and_set (mode, arglist, target) 8056 enum machine_mode mode; 8057 tree arglist; 8058 rtx target; 8059{ 8060 tree arg0, arg1; 8061 rtx mem, new, ret, insn; 8062 8063 arg0 = TREE_VALUE (arglist); 8064 arg1 = TREE_VALUE (TREE_CHAIN (arglist)); 8065 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8066 new = expand_expr (arg1, NULL_RTX, mode, 0); 8067 8068 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8069 MEM_VOLATILE_P (mem) = 1; 8070 if (! register_operand (new, mode)) 8071 new = copy_to_mode_reg (mode, new); 8072 8073 if (target && register_operand (target, mode)) 8074 ret = target; 8075 else 8076 ret = gen_reg_rtx (mode); 8077 8078 if (mode == SImode) 8079 insn = gen_xchgsi (ret, mem, new); 8080 else 8081 insn = gen_xchgdi (ret, mem, new); 8082 emit_insn (insn); 8083 8084 return ret; 8085} 8086 8087/* Expand lock_release. I.e. `stsz.rel [ptr] = r0'. */ 8088 8089static rtx 8090ia64_expand_lock_release (mode, arglist, target) 8091 enum machine_mode mode; 8092 tree arglist; 8093 rtx target ATTRIBUTE_UNUSED; 8094{ 8095 tree arg0; 8096 rtx mem; 8097 8098 arg0 = TREE_VALUE (arglist); 8099 mem = expand_expr (arg0, NULL_RTX, ptr_mode, 0); 8100 8101 mem = gen_rtx_MEM (mode, force_reg (ptr_mode, mem)); 8102 MEM_VOLATILE_P (mem) = 1; 8103 8104 emit_move_insn (mem, const0_rtx); 8105 8106 return const0_rtx; 8107} 8108 8109rtx 8110ia64_expand_builtin (exp, target, subtarget, mode, ignore) 8111 tree exp; 8112 rtx target; 8113 rtx subtarget ATTRIBUTE_UNUSED; 8114 enum machine_mode mode ATTRIBUTE_UNUSED; 8115 int ignore ATTRIBUTE_UNUSED; 8116{ 8117 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); 8118 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 8119 tree arglist = TREE_OPERAND (exp, 1); 8120 enum machine_mode rmode = VOIDmode; 8121 8122 switch (fcode) 8123 { 8124 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8125 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8126 mode = SImode; 8127 rmode = SImode; 8128 break; 8129 8130 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8131 case IA64_BUILTIN_LOCK_RELEASE_SI: 8132 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8133 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8134 case IA64_BUILTIN_FETCH_AND_OR_SI: 8135 case IA64_BUILTIN_FETCH_AND_AND_SI: 8136 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8137 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8138 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8139 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8140 case IA64_BUILTIN_OR_AND_FETCH_SI: 8141 case IA64_BUILTIN_AND_AND_FETCH_SI: 8142 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8143 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8144 mode = SImode; 8145 break; 8146 8147 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8148 mode = DImode; 8149 rmode = SImode; 8150 break; 8151 8152 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8153 mode = DImode; 8154 rmode = DImode; 8155 break; 8156 8157 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8158 case IA64_BUILTIN_LOCK_RELEASE_DI: 8159 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8160 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8161 case IA64_BUILTIN_FETCH_AND_OR_DI: 8162 case IA64_BUILTIN_FETCH_AND_AND_DI: 8163 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8164 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8165 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8166 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8167 case IA64_BUILTIN_OR_AND_FETCH_DI: 8168 case IA64_BUILTIN_AND_AND_FETCH_DI: 8169 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8170 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8171 mode = DImode; 8172 break; 8173 8174 default: 8175 break; 8176 } 8177 8178 switch (fcode) 8179 { 8180 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: 8181 case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: 8182 return ia64_expand_compare_and_swap (rmode, mode, 1, arglist, 8183 target); 8184 8185 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: 8186 case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: 8187 return ia64_expand_compare_and_swap (rmode, mode, 0, arglist, 8188 target); 8189 8190 case IA64_BUILTIN_SYNCHRONIZE: 8191 emit_insn (gen_mf ()); 8192 return const0_rtx; 8193 8194 case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: 8195 case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: 8196 return ia64_expand_lock_test_and_set (mode, arglist, target); 8197 8198 case IA64_BUILTIN_LOCK_RELEASE_SI: 8199 case IA64_BUILTIN_LOCK_RELEASE_DI: 8200 return ia64_expand_lock_release (mode, arglist, target); 8201 8202 case IA64_BUILTIN_BSP: 8203 if (! target || ! register_operand (target, DImode)) 8204 target = gen_reg_rtx (DImode); 8205 emit_insn (gen_bsp_value (target)); 8206 return target; 8207 8208 case IA64_BUILTIN_FLUSHRS: 8209 emit_insn (gen_flushrs ()); 8210 return const0_rtx; 8211 8212 case IA64_BUILTIN_FETCH_AND_ADD_SI: 8213 case IA64_BUILTIN_FETCH_AND_ADD_DI: 8214 return ia64_expand_fetch_and_op (add_optab, mode, arglist, target); 8215 8216 case IA64_BUILTIN_FETCH_AND_SUB_SI: 8217 case IA64_BUILTIN_FETCH_AND_SUB_DI: 8218 return ia64_expand_fetch_and_op (sub_optab, mode, arglist, target); 8219 8220 case IA64_BUILTIN_FETCH_AND_OR_SI: 8221 case IA64_BUILTIN_FETCH_AND_OR_DI: 8222 return ia64_expand_fetch_and_op (ior_optab, mode, arglist, target); 8223 8224 case IA64_BUILTIN_FETCH_AND_AND_SI: 8225 case IA64_BUILTIN_FETCH_AND_AND_DI: 8226 return ia64_expand_fetch_and_op (and_optab, mode, arglist, target); 8227 8228 case IA64_BUILTIN_FETCH_AND_XOR_SI: 8229 case IA64_BUILTIN_FETCH_AND_XOR_DI: 8230 return ia64_expand_fetch_and_op (xor_optab, mode, arglist, target); 8231 8232 case IA64_BUILTIN_FETCH_AND_NAND_SI: 8233 case IA64_BUILTIN_FETCH_AND_NAND_DI: 8234 return ia64_expand_fetch_and_op (one_cmpl_optab, mode, arglist, target); 8235 8236 case IA64_BUILTIN_ADD_AND_FETCH_SI: 8237 case IA64_BUILTIN_ADD_AND_FETCH_DI: 8238 return ia64_expand_op_and_fetch (add_optab, mode, arglist, target); 8239 8240 case IA64_BUILTIN_SUB_AND_FETCH_SI: 8241 case IA64_BUILTIN_SUB_AND_FETCH_DI: 8242 return ia64_expand_op_and_fetch (sub_optab, mode, arglist, target); 8243 8244 case IA64_BUILTIN_OR_AND_FETCH_SI: 8245 case IA64_BUILTIN_OR_AND_FETCH_DI: 8246 return ia64_expand_op_and_fetch (ior_optab, mode, arglist, target); 8247 8248 case IA64_BUILTIN_AND_AND_FETCH_SI: 8249 case IA64_BUILTIN_AND_AND_FETCH_DI: 8250 return ia64_expand_op_and_fetch (and_optab, mode, arglist, target); 8251 8252 case IA64_BUILTIN_XOR_AND_FETCH_SI: 8253 case IA64_BUILTIN_XOR_AND_FETCH_DI: 8254 return ia64_expand_op_and_fetch (xor_optab, mode, arglist, target); 8255 8256 case IA64_BUILTIN_NAND_AND_FETCH_SI: 8257 case IA64_BUILTIN_NAND_AND_FETCH_DI: 8258 return ia64_expand_op_and_fetch (one_cmpl_optab, mode, arglist, target); 8259 8260 default: 8261 break; 8262 } 8263 8264 return NULL_RTX; 8265} 8266 8267/* For the HP-UX IA64 aggregate parameters are passed stored in the 8268 most significant bits of the stack slot. */ 8269 8270enum direction 8271ia64_hpux_function_arg_padding (mode, type) 8272 enum machine_mode mode; 8273 tree type; 8274{ 8275 /* Exception to normal case for structures/unions/etc. */ 8276 8277 if (type && AGGREGATE_TYPE_P (type) 8278 && int_size_in_bytes (type) < UNITS_PER_WORD) 8279 return upward; 8280 8281 /* This is the standard FUNCTION_ARG_PADDING with !BYTES_BIG_ENDIAN 8282 hardwired to be true. */ 8283 8284 return((mode == BLKmode 8285 ? (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 8286 && int_size_in_bytes (type) < (PARM_BOUNDARY / BITS_PER_UNIT)) 8287 : GET_MODE_BITSIZE (mode) < PARM_BOUNDARY) 8288 ? downward : upward); 8289} 8290 8291/* Linked list of all external functions that are to be emitted by GCC. 8292 We output the name if and only if TREE_SYMBOL_REFERENCED is set in 8293 order to avoid putting out names that are never really used. */ 8294 8295struct extern_func_list 8296{ 8297 struct extern_func_list *next; /* next external */ 8298 char *name; /* name of the external */ 8299} *extern_func_head = 0; 8300 8301static void 8302ia64_hpux_add_extern_decl (name) 8303 const char *name; 8304{ 8305 struct extern_func_list *p; 8306 8307 p = (struct extern_func_list *) xmalloc (sizeof (struct extern_func_list)); 8308 p->name = xmalloc (strlen (name) + 1); 8309 strcpy(p->name, name); 8310 p->next = extern_func_head; 8311 extern_func_head = p; 8312} 8313 8314/* Print out the list of used global functions. */ 8315 8316void 8317ia64_hpux_asm_file_end (file) 8318 FILE *file; 8319{ 8320 while (extern_func_head) 8321 { 8322 const char *real_name; 8323 tree decl; 8324 8325 real_name = (* targetm.strip_name_encoding) (extern_func_head->name); 8326 decl = maybe_get_identifier (real_name); 8327 8328 if (!decl 8329 || (! TREE_ASM_WRITTEN (decl) && TREE_SYMBOL_REFERENCED (decl))) 8330 { 8331 if (decl) 8332 TREE_ASM_WRITTEN (decl) = 1; 8333 (*targetm.asm_out.globalize_label) (file, extern_func_head->name); 8334 fprintf (file, "%s", TYPE_ASM_OP); 8335 assemble_name (file, extern_func_head->name); 8336 putc (',', file); 8337 fprintf (file, TYPE_OPERAND_FMT, "function"); 8338 putc ('\n', file); 8339 } 8340 extern_func_head = extern_func_head->next; 8341 } 8342} 8343 8344 8345/* Switch to the section to which we should output X. The only thing 8346 special we do here is to honor small data. */ 8347 8348static void 8349ia64_select_rtx_section (mode, x, align) 8350 enum machine_mode mode; 8351 rtx x; 8352 unsigned HOST_WIDE_INT align; 8353{ 8354 if (GET_MODE_SIZE (mode) > 0 8355 && GET_MODE_SIZE (mode) <= ia64_section_threshold) 8356 sdata_section (); 8357 else 8358 default_elf_select_rtx_section (mode, x, align); 8359} 8360 8361/* It is illegal to have relocations in shared segments on AIX and HPUX. 8362 Pretend flag_pic is always set. */ 8363 8364static void 8365ia64_rwreloc_select_section (exp, reloc, align) 8366 tree exp; 8367 int reloc; 8368 unsigned HOST_WIDE_INT align; 8369{ 8370 default_elf_select_section_1 (exp, reloc, align, true); 8371} 8372 8373static void 8374ia64_rwreloc_unique_section (decl, reloc) 8375 tree decl; 8376 int reloc; 8377{ 8378 default_unique_section_1 (decl, reloc, true); 8379} 8380 8381static void 8382ia64_rwreloc_select_rtx_section (mode, x, align) 8383 enum machine_mode mode; 8384 rtx x; 8385 unsigned HOST_WIDE_INT align; 8386{ 8387 int save_pic = flag_pic; 8388 flag_pic = 1; 8389 ia64_select_rtx_section (mode, x, align); 8390 flag_pic = save_pic; 8391} 8392 8393static unsigned int 8394ia64_rwreloc_section_type_flags (decl, name, reloc) 8395 tree decl; 8396 const char *name; 8397 int reloc; 8398{ 8399 return default_section_type_flags_1 (decl, name, reloc, true); 8400} 8401 8402 8403/* Output the assembler code for a thunk function. THUNK_DECL is the 8404 declaration for the thunk function itself, FUNCTION is the decl for 8405 the target function. DELTA is an immediate constant offset to be 8406 added to THIS. If VCALL_OFFSET is non-zero, the word at 8407 *(*this + vcall_offset) should be added to THIS. */ 8408 8409static void 8410ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function) 8411 FILE *file; 8412 tree thunk ATTRIBUTE_UNUSED; 8413 HOST_WIDE_INT delta; 8414 HOST_WIDE_INT vcall_offset; 8415 tree function; 8416{ 8417 rtx this, insn, funexp; 8418 8419 reload_completed = 1; 8420 no_new_pseudos = 1; 8421 8422 /* Set things up as ia64_expand_prologue might. */ 8423 last_scratch_gr_reg = 15; 8424 8425 memset (¤t_frame_info, 0, sizeof (current_frame_info)); 8426 current_frame_info.spill_cfa_off = -16; 8427 current_frame_info.n_input_regs = 1; 8428 current_frame_info.need_regstk = (TARGET_REG_NAMES != 0); 8429 8430 if (!TARGET_REG_NAMES) 8431 reg_names[IN_REG (0)] = ia64_reg_numbers[0]; 8432 8433 /* Mark the end of the (empty) prologue. */ 8434 emit_note (NULL, NOTE_INSN_PROLOGUE_END); 8435 8436 this = gen_rtx_REG (Pmode, IN_REG (0)); 8437 if (TARGET_ILP32) 8438 emit_insn (gen_ptr_extend (this, 8439 gen_rtx_REG (ptr_mode, IN_REG (0)))); 8440 8441 /* Apply the constant offset, if required. */ 8442 if (delta) 8443 { 8444 rtx delta_rtx = GEN_INT (delta); 8445 8446 if (!CONST_OK_FOR_I (delta)) 8447 { 8448 rtx tmp = gen_rtx_REG (Pmode, 2); 8449 emit_move_insn (tmp, delta_rtx); 8450 delta_rtx = tmp; 8451 } 8452 emit_insn (gen_adddi3 (this, this, delta_rtx)); 8453 } 8454 8455 /* Apply the offset from the vtable, if required. */ 8456 if (vcall_offset) 8457 { 8458 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 8459 rtx tmp = gen_rtx_REG (Pmode, 2); 8460 8461 if (TARGET_ILP32) 8462 { 8463 rtx t = gen_rtx_REG (ptr_mode, 2); 8464 emit_move_insn (t, gen_rtx_MEM (ptr_mode, this)); 8465 emit_insn (gen_ptr_extend (tmp, t)); 8466 } 8467 else 8468 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this)); 8469 8470 if (!CONST_OK_FOR_J (vcall_offset)) 8471 { 8472 rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ()); 8473 emit_move_insn (tmp2, vcall_offset_rtx); 8474 vcall_offset_rtx = tmp2; 8475 } 8476 emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx)); 8477 8478 if (TARGET_ILP32) 8479 emit_move_insn (gen_rtx_REG (ptr_mode, 2), 8480 gen_rtx_MEM (ptr_mode, tmp)); 8481 else 8482 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); 8483 8484 emit_insn (gen_adddi3 (this, this, tmp)); 8485 } 8486 8487 /* Generate a tail call to the target function. */ 8488 if (! TREE_USED (function)) 8489 { 8490 assemble_external (function); 8491 TREE_USED (function) = 1; 8492 } 8493 funexp = XEXP (DECL_RTL (function), 0); 8494 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 8495 ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1); 8496 insn = get_last_insn (); 8497 SIBLING_CALL_P (insn) = 1; 8498 8499 /* Code generation for calls relies on splitting. */ 8500 reload_completed = 1; 8501 try_split (PATTERN (insn), insn, 0); 8502 8503 emit_barrier (); 8504 8505 /* Run just enough of rest_of_compilation to get the insns emitted. 8506 There's not really enough bulk here to make other passes such as 8507 instruction scheduling worth while. Note that use_thunk calls 8508 assemble_start_function and assemble_end_function. */ 8509 8510 insn = get_insns (); 8511 emit_all_insn_group_barriers (NULL, insn); 8512 shorten_branches (insn); 8513 final_start_function (insn, file, 1); 8514 final (insn, file, 1, 0); 8515 final_end_function (); 8516 8517 reload_completed = 0; 8518 no_new_pseudos = 0; 8519} 8520 8521#include "gt-ia64.h" 8522