1/* Machine description for AArch64 architecture. 2 Copyright (C) 2009-2015 Free Software Foundation, Inc. 3 Contributed by ARM Ltd. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "insn-codes.h" 26#include "rtl.h" 27#include "insn-attr.h" 28#include "hash-set.h" 29#include "machmode.h" 30#include "vec.h" 31#include "double-int.h" 32#include "input.h" 33#include "alias.h" 34#include "symtab.h" 35#include "wide-int.h" 36#include "inchash.h" 37#include "tree.h" 38#include "fold-const.h" 39#include "stringpool.h" 40#include "stor-layout.h" 41#include "calls.h" 42#include "varasm.h" 43#include "regs.h" 44#include "dominance.h" 45#include "cfg.h" 46#include "cfgrtl.h" 47#include "cfganal.h" 48#include "lcm.h" 49#include "cfgbuild.h" 50#include "cfgcleanup.h" 51#include "predict.h" 52#include "basic-block.h" 53#include "df.h" 54#include "hard-reg-set.h" 55#include "output.h" 56#include "hashtab.h" 57#include "function.h" 58#include "flags.h" 59#include "statistics.h" 60#include "real.h" 61#include "fixed-value.h" 62#include "insn-config.h" 63#include "expmed.h" 64#include "dojump.h" 65#include "explow.h" 66#include "emit-rtl.h" 67#include "stmt.h" 68#include "expr.h" 69#include "reload.h" 70#include "toplev.h" 71#include "target.h" 72#include "target-def.h" 73#include "targhooks.h" 74#include "ggc.h" 75#include "tm_p.h" 76#include "recog.h" 77#include "langhooks.h" 78#include "diagnostic-core.h" 79#include "hash-table.h" 80#include "tree-ssa-alias.h" 81#include "internal-fn.h" 82#include "gimple-fold.h" 83#include "tree-eh.h" 84#include "gimple-expr.h" 85#include "is-a.h" 86#include "gimple.h" 87#include "gimplify.h" 88#include "optabs.h" 89#include "dwarf2.h" 90#include "cfgloop.h" 91#include "tree-vectorizer.h" 92#include "aarch64-cost-tables.h" 93#include "dumpfile.h" 94#include "builtins.h" 95#include "rtl-iter.h" 96#include "tm-constrs.h" 97#include "sched-int.h" 98 99/* Defined for convenience. */ 100#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) 101 102/* Classifies an address. 103 104 ADDRESS_REG_IMM 105 A simple base register plus immediate offset. 106 107 ADDRESS_REG_WB 108 A base register indexed by immediate offset with writeback. 109 110 ADDRESS_REG_REG 111 A base register indexed by (optionally scaled) register. 112 113 ADDRESS_REG_UXTW 114 A base register indexed by (optionally scaled) zero-extended register. 115 116 ADDRESS_REG_SXTW 117 A base register indexed by (optionally scaled) sign-extended register. 118 119 ADDRESS_LO_SUM 120 A LO_SUM rtx with a base register and "LO12" symbol relocation. 121 122 ADDRESS_SYMBOLIC: 123 A constant symbolic address, in pc-relative literal pool. */ 124 125enum aarch64_address_type { 126 ADDRESS_REG_IMM, 127 ADDRESS_REG_WB, 128 ADDRESS_REG_REG, 129 ADDRESS_REG_UXTW, 130 ADDRESS_REG_SXTW, 131 ADDRESS_LO_SUM, 132 ADDRESS_SYMBOLIC 133}; 134 135struct aarch64_address_info { 136 enum aarch64_address_type type; 137 rtx base; 138 rtx offset; 139 int shift; 140 enum aarch64_symbol_type symbol_type; 141}; 142 143struct simd_immediate_info 144{ 145 rtx value; 146 int shift; 147 int element_width; 148 bool mvn; 149 bool msl; 150}; 151 152/* The current code model. */ 153enum aarch64_code_model aarch64_cmodel; 154 155#ifdef HAVE_AS_TLS 156#undef TARGET_HAVE_TLS 157#define TARGET_HAVE_TLS 1 158#endif 159 160static bool aarch64_composite_type_p (const_tree, machine_mode); 161static bool aarch64_vfp_is_call_or_return_candidate (machine_mode, 162 const_tree, 163 machine_mode *, int *, 164 bool *); 165static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; 166static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; 167static void aarch64_override_options_after_change (void); 168static bool aarch64_vector_mode_supported_p (machine_mode); 169static unsigned bit_count (unsigned HOST_WIDE_INT); 170static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, 171 const unsigned char *sel); 172static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); 173 174/* Major revision number of the ARM Architecture implemented by the target. */ 175unsigned aarch64_architecture_version; 176 177/* The processor for which instructions should be scheduled. */ 178enum aarch64_processor aarch64_tune = cortexa53; 179 180/* The current tuning set. */ 181const struct tune_params *aarch64_tune_params; 182 183/* Mask to specify which instructions we are allowed to generate. */ 184unsigned long aarch64_isa_flags = 0; 185 186/* Mask to specify which instruction scheduling options should be used. */ 187unsigned long aarch64_tune_flags = 0; 188 189/* Tuning parameters. */ 190 191static const struct cpu_addrcost_table generic_addrcost_table = 192{ 193 { 194 0, /* hi */ 195 0, /* si */ 196 0, /* di */ 197 0, /* ti */ 198 }, 199 0, /* pre_modify */ 200 0, /* post_modify */ 201 0, /* register_offset */ 202 0, /* register_extend */ 203 0 /* imm_offset */ 204}; 205 206static const struct cpu_addrcost_table cortexa57_addrcost_table = 207{ 208 { 209 1, /* hi */ 210 0, /* si */ 211 0, /* di */ 212 1, /* ti */ 213 }, 214 0, /* pre_modify */ 215 0, /* post_modify */ 216 0, /* register_offset */ 217 0, /* register_extend */ 218 0, /* imm_offset */ 219}; 220 221static const struct cpu_addrcost_table xgene1_addrcost_table = 222{ 223 { 224 1, /* hi */ 225 0, /* si */ 226 0, /* di */ 227 1, /* ti */ 228 }, 229 1, /* pre_modify */ 230 0, /* post_modify */ 231 0, /* register_offset */ 232 1, /* register_extend */ 233 0, /* imm_offset */ 234}; 235 236static const struct cpu_regmove_cost generic_regmove_cost = 237{ 238 1, /* GP2GP */ 239 /* Avoid the use of slow int<->fp moves for spilling by setting 240 their cost higher than memmov_cost. */ 241 5, /* GP2FP */ 242 5, /* FP2GP */ 243 2 /* FP2FP */ 244}; 245 246static const struct cpu_regmove_cost cortexa57_regmove_cost = 247{ 248 1, /* GP2GP */ 249 /* Avoid the use of slow int<->fp moves for spilling by setting 250 their cost higher than memmov_cost. */ 251 5, /* GP2FP */ 252 5, /* FP2GP */ 253 2 /* FP2FP */ 254}; 255 256static const struct cpu_regmove_cost cortexa53_regmove_cost = 257{ 258 1, /* GP2GP */ 259 /* Avoid the use of slow int<->fp moves for spilling by setting 260 their cost higher than memmov_cost. */ 261 5, /* GP2FP */ 262 5, /* FP2GP */ 263 2 /* FP2FP */ 264}; 265 266static const struct cpu_regmove_cost thunderx_regmove_cost = 267{ 268 2, /* GP2GP */ 269 2, /* GP2FP */ 270 6, /* FP2GP */ 271 4 /* FP2FP */ 272}; 273 274static const struct cpu_regmove_cost xgene1_regmove_cost = 275{ 276 1, /* GP2GP */ 277 /* Avoid the use of slow int<->fp moves for spilling by setting 278 their cost higher than memmov_cost. */ 279 8, /* GP2FP */ 280 8, /* FP2GP */ 281 2 /* FP2FP */ 282}; 283 284/* Generic costs for vector insn classes. */ 285static const struct cpu_vector_cost generic_vector_cost = 286{ 287 1, /* scalar_stmt_cost */ 288 1, /* scalar_load_cost */ 289 1, /* scalar_store_cost */ 290 1, /* vec_stmt_cost */ 291 1, /* vec_to_scalar_cost */ 292 1, /* scalar_to_vec_cost */ 293 1, /* vec_align_load_cost */ 294 1, /* vec_unalign_load_cost */ 295 1, /* vec_unalign_store_cost */ 296 1, /* vec_store_cost */ 297 3, /* cond_taken_branch_cost */ 298 1 /* cond_not_taken_branch_cost */ 299}; 300 301/* Generic costs for vector insn classes. */ 302static const struct cpu_vector_cost cortexa57_vector_cost = 303{ 304 1, /* scalar_stmt_cost */ 305 4, /* scalar_load_cost */ 306 1, /* scalar_store_cost */ 307 3, /* vec_stmt_cost */ 308 8, /* vec_to_scalar_cost */ 309 8, /* scalar_to_vec_cost */ 310 5, /* vec_align_load_cost */ 311 5, /* vec_unalign_load_cost */ 312 1, /* vec_unalign_store_cost */ 313 1, /* vec_store_cost */ 314 1, /* cond_taken_branch_cost */ 315 1 /* cond_not_taken_branch_cost */ 316}; 317 318/* Generic costs for vector insn classes. */ 319static const struct cpu_vector_cost xgene1_vector_cost = 320{ 321 1, /* scalar_stmt_cost */ 322 5, /* scalar_load_cost */ 323 1, /* scalar_store_cost */ 324 2, /* vec_stmt_cost */ 325 4, /* vec_to_scalar_cost */ 326 4, /* scalar_to_vec_cost */ 327 10, /* vec_align_load_cost */ 328 10, /* vec_unalign_load_cost */ 329 2, /* vec_unalign_store_cost */ 330 2, /* vec_store_cost */ 331 2, /* cond_taken_branch_cost */ 332 1 /* cond_not_taken_branch_cost */ 333}; 334 335#define AARCH64_FUSE_NOTHING (0) 336#define AARCH64_FUSE_MOV_MOVK (1 << 0) 337#define AARCH64_FUSE_ADRP_ADD (1 << 1) 338#define AARCH64_FUSE_MOVK_MOVK (1 << 2) 339#define AARCH64_FUSE_ADRP_LDR (1 << 3) 340#define AARCH64_FUSE_CMP_BRANCH (1 << 4) 341 342static const struct tune_params generic_tunings = 343{ 344 &cortexa57_extra_costs, 345 &generic_addrcost_table, 346 &generic_regmove_cost, 347 &generic_vector_cost, 348 4, /* memmov_cost */ 349 2, /* issue_rate */ 350 AARCH64_FUSE_NOTHING, /* fuseable_ops */ 351 8, /* function_align. */ 352 8, /* jump_align. */ 353 4, /* loop_align. */ 354 2, /* int_reassoc_width. */ 355 4, /* fp_reassoc_width. */ 356 1 /* vec_reassoc_width. */ 357}; 358 359static const struct tune_params cortexa53_tunings = 360{ 361 &cortexa53_extra_costs, 362 &generic_addrcost_table, 363 &cortexa53_regmove_cost, 364 &generic_vector_cost, 365 4, /* memmov_cost */ 366 2, /* issue_rate */ 367 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD 368 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */ 369 8, /* function_align. */ 370 8, /* jump_align. */ 371 4, /* loop_align. */ 372 2, /* int_reassoc_width. */ 373 4, /* fp_reassoc_width. */ 374 1 /* vec_reassoc_width. */ 375}; 376 377static const struct tune_params cortexa57_tunings = 378{ 379 &cortexa57_extra_costs, 380 &cortexa57_addrcost_table, 381 &cortexa57_regmove_cost, 382 &cortexa57_vector_cost, 383 4, /* memmov_cost */ 384 3, /* issue_rate */ 385 (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD 386 | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */ 387 16, /* function_align. */ 388 8, /* jump_align. */ 389 4, /* loop_align. */ 390 2, /* int_reassoc_width. */ 391 4, /* fp_reassoc_width. */ 392 1 /* vec_reassoc_width. */ 393}; 394 395static const struct tune_params thunderx_tunings = 396{ 397 &thunderx_extra_costs, 398 &generic_addrcost_table, 399 &thunderx_regmove_cost, 400 &generic_vector_cost, 401 6, /* memmov_cost */ 402 2, /* issue_rate */ 403 AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */ 404 8, /* function_align. */ 405 8, /* jump_align. */ 406 8, /* loop_align. */ 407 2, /* int_reassoc_width. */ 408 4, /* fp_reassoc_width. */ 409 1 /* vec_reassoc_width. */ 410}; 411 412static const struct tune_params xgene1_tunings = 413{ 414 &xgene1_extra_costs, 415 &xgene1_addrcost_table, 416 &xgene1_regmove_cost, 417 &xgene1_vector_cost, 418 6, /* memmov_cost */ 419 4, /* issue_rate */ 420 AARCH64_FUSE_NOTHING, /* fuseable_ops */ 421 16, /* function_align. */ 422 8, /* jump_align. */ 423 16, /* loop_align. */ 424 2, /* int_reassoc_width. */ 425 4, /* fp_reassoc_width. */ 426 1 /* vec_reassoc_width. */ 427}; 428 429/* A processor implementing AArch64. */ 430struct processor 431{ 432 const char *const name; 433 enum aarch64_processor core; 434 const char *arch; 435 unsigned architecture_version; 436 const unsigned long flags; 437 const struct tune_params *const tune; 438}; 439 440/* Processor cores implementing AArch64. */ 441static const struct processor all_cores[] = 442{ 443#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \ 444 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings}, 445#include "aarch64-cores.def" 446#undef AARCH64_CORE 447 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings}, 448 {NULL, aarch64_none, NULL, 0, 0, NULL} 449}; 450 451/* Architectures implementing AArch64. */ 452static const struct processor all_architectures[] = 453{ 454#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ 455 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL}, 456#include "aarch64-arches.def" 457#undef AARCH64_ARCH 458 {NULL, aarch64_none, NULL, 0, 0, NULL} 459}; 460 461/* Target specification. These are populated as commandline arguments 462 are processed, or NULL if not specified. */ 463static const struct processor *selected_arch; 464static const struct processor *selected_cpu; 465static const struct processor *selected_tune; 466 467#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) 468 469/* An ISA extension in the co-processor and main instruction set space. */ 470struct aarch64_option_extension 471{ 472 const char *const name; 473 const unsigned long flags_on; 474 const unsigned long flags_off; 475}; 476 477/* ISA extensions in AArch64. */ 478static const struct aarch64_option_extension all_extensions[] = 479{ 480#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ 481 {NAME, FLAGS_ON, FLAGS_OFF}, 482#include "aarch64-option-extensions.def" 483#undef AARCH64_OPT_EXTENSION 484 {NULL, 0, 0} 485}; 486 487/* Used to track the size of an address when generating a pre/post 488 increment address. */ 489static machine_mode aarch64_memory_reference_mode; 490 491/* A table of valid AArch64 "bitmask immediate" values for 492 logical instructions. */ 493 494#define AARCH64_NUM_BITMASKS 5334 495static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS]; 496 497typedef enum aarch64_cond_code 498{ 499 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, 500 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT, 501 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV 502} 503aarch64_cc; 504 505#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1)) 506 507/* The condition codes of the processor, and the inverse function. */ 508static const char * const aarch64_condition_codes[] = 509{ 510 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 511 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" 512}; 513 514static unsigned int 515aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED) 516{ 517 return 2; 518} 519 520static int 521aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, 522 enum machine_mode mode) 523{ 524 if (VECTOR_MODE_P (mode)) 525 return aarch64_tune_params->vec_reassoc_width; 526 if (INTEGRAL_MODE_P (mode)) 527 return aarch64_tune_params->int_reassoc_width; 528 if (FLOAT_MODE_P (mode)) 529 return aarch64_tune_params->fp_reassoc_width; 530 return 1; 531} 532 533/* Provide a mapping from gcc register numbers to dwarf register numbers. */ 534unsigned 535aarch64_dbx_register_number (unsigned regno) 536{ 537 if (GP_REGNUM_P (regno)) 538 return AARCH64_DWARF_R0 + regno - R0_REGNUM; 539 else if (regno == SP_REGNUM) 540 return AARCH64_DWARF_SP; 541 else if (FP_REGNUM_P (regno)) 542 return AARCH64_DWARF_V0 + regno - V0_REGNUM; 543 544 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no 545 equivalent DWARF register. */ 546 return DWARF_FRAME_REGISTERS; 547} 548 549/* Return TRUE if MODE is any of the large INT modes. */ 550static bool 551aarch64_vect_struct_mode_p (machine_mode mode) 552{ 553 return mode == OImode || mode == CImode || mode == XImode; 554} 555 556/* Return TRUE if MODE is any of the vector modes. */ 557static bool 558aarch64_vector_mode_p (machine_mode mode) 559{ 560 return aarch64_vector_mode_supported_p (mode) 561 || aarch64_vect_struct_mode_p (mode); 562} 563 564/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ 565static bool 566aarch64_array_mode_supported_p (machine_mode mode, 567 unsigned HOST_WIDE_INT nelems) 568{ 569 if (TARGET_SIMD 570 && AARCH64_VALID_SIMD_QREG_MODE (mode) 571 && (nelems >= 2 && nelems <= 4)) 572 return true; 573 574 return false; 575} 576 577/* Implement HARD_REGNO_NREGS. */ 578 579int 580aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) 581{ 582 switch (aarch64_regno_regclass (regno)) 583 { 584 case FP_REGS: 585 case FP_LO_REGS: 586 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; 587 default: 588 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 589 } 590 gcc_unreachable (); 591} 592 593/* Implement HARD_REGNO_MODE_OK. */ 594 595int 596aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) 597{ 598 if (GET_MODE_CLASS (mode) == MODE_CC) 599 return regno == CC_REGNUM; 600 601 if (regno == SP_REGNUM) 602 /* The purpose of comparing with ptr_mode is to support the 603 global register variable associated with the stack pointer 604 register via the syntax of asm ("wsp") in ILP32. */ 605 return mode == Pmode || mode == ptr_mode; 606 607 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) 608 return mode == Pmode; 609 610 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode)) 611 return 1; 612 613 if (FP_REGNUM_P (regno)) 614 { 615 if (aarch64_vect_struct_mode_p (mode)) 616 return 617 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM; 618 else 619 return 1; 620 } 621 622 return 0; 623} 624 625/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ 626machine_mode 627aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, 628 machine_mode mode) 629{ 630 /* Handle modes that fit within single registers. */ 631 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) 632 { 633 if (GET_MODE_SIZE (mode) >= 4) 634 return mode; 635 else 636 return SImode; 637 } 638 /* Fall back to generic for multi-reg and very large modes. */ 639 else 640 return choose_hard_reg_mode (regno, nregs, false); 641} 642 643/* Return true if calls to DECL should be treated as 644 long-calls (ie called via a register). */ 645static bool 646aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) 647{ 648 return false; 649} 650 651/* Return true if calls to symbol-ref SYM should be treated as 652 long-calls (ie called via a register). */ 653bool 654aarch64_is_long_call_p (rtx sym) 655{ 656 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); 657} 658 659/* Return true if the offsets to a zero/sign-extract operation 660 represent an expression that matches an extend operation. The 661 operands represent the paramters from 662 663 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ 664bool 665aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm, 666 rtx extract_imm) 667{ 668 HOST_WIDE_INT mult_val, extract_val; 669 670 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) 671 return false; 672 673 mult_val = INTVAL (mult_imm); 674 extract_val = INTVAL (extract_imm); 675 676 if (extract_val > 8 677 && extract_val < GET_MODE_BITSIZE (mode) 678 && exact_log2 (extract_val & ~7) > 0 679 && (extract_val & 7) <= 4 680 && mult_val == (1 << (extract_val & 7))) 681 return true; 682 683 return false; 684} 685 686/* Emit an insn that's a simple single-set. Both the operands must be 687 known to be valid. */ 688inline static rtx 689emit_set_insn (rtx x, rtx y) 690{ 691 return emit_insn (gen_rtx_SET (VOIDmode, x, y)); 692} 693 694/* X and Y are two things to compare using CODE. Emit the compare insn and 695 return the rtx for register 0 in the proper mode. */ 696rtx 697aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) 698{ 699 machine_mode mode = SELECT_CC_MODE (code, x, y); 700 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); 701 702 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); 703 return cc_reg; 704} 705 706/* Build the SYMBOL_REF for __tls_get_addr. */ 707 708static GTY(()) rtx tls_get_addr_libfunc; 709 710rtx 711aarch64_tls_get_addr (void) 712{ 713 if (!tls_get_addr_libfunc) 714 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); 715 return tls_get_addr_libfunc; 716} 717 718/* Return the TLS model to use for ADDR. */ 719 720static enum tls_model 721tls_symbolic_operand_type (rtx addr) 722{ 723 enum tls_model tls_kind = TLS_MODEL_NONE; 724 rtx sym, addend; 725 726 if (GET_CODE (addr) == CONST) 727 { 728 split_const (addr, &sym, &addend); 729 if (GET_CODE (sym) == SYMBOL_REF) 730 tls_kind = SYMBOL_REF_TLS_MODEL (sym); 731 } 732 else if (GET_CODE (addr) == SYMBOL_REF) 733 tls_kind = SYMBOL_REF_TLS_MODEL (addr); 734 735 return tls_kind; 736} 737 738/* We'll allow lo_sum's in addresses in our legitimate addresses 739 so that combine would take care of combining addresses where 740 necessary, but for generation purposes, we'll generate the address 741 as : 742 RTL Absolute 743 tmp = hi (symbol_ref); adrp x1, foo 744 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo 745 nop 746 747 PIC TLS 748 adrp x1, :got:foo adrp tmp, :tlsgd:foo 749 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo 750 bl __tls_get_addr 751 nop 752 753 Load TLS symbol, depending on TLS mechanism and TLS access model. 754 755 Global Dynamic - Traditional TLS: 756 adrp tmp, :tlsgd:imm 757 add dest, tmp, #:tlsgd_lo12:imm 758 bl __tls_get_addr 759 760 Global Dynamic - TLS Descriptors: 761 adrp dest, :tlsdesc:imm 762 ldr tmp, [dest, #:tlsdesc_lo12:imm] 763 add dest, dest, #:tlsdesc_lo12:imm 764 blr tmp 765 mrs tp, tpidr_el0 766 add dest, dest, tp 767 768 Initial Exec: 769 mrs tp, tpidr_el0 770 adrp tmp, :gottprel:imm 771 ldr dest, [tmp, #:gottprel_lo12:imm] 772 add dest, dest, tp 773 774 Local Exec: 775 mrs tp, tpidr_el0 776 add t0, tp, #:tprel_hi12:imm, lsl #12 777 add t0, t0, #:tprel_lo12_nc:imm 778*/ 779 780static void 781aarch64_load_symref_appropriately (rtx dest, rtx imm, 782 enum aarch64_symbol_type type) 783{ 784 switch (type) 785 { 786 case SYMBOL_SMALL_ABSOLUTE: 787 { 788 /* In ILP32, the mode of dest can be either SImode or DImode. */ 789 rtx tmp_reg = dest; 790 machine_mode mode = GET_MODE (dest); 791 792 gcc_assert (mode == Pmode || mode == ptr_mode); 793 794 if (can_create_pseudo_p ()) 795 tmp_reg = gen_reg_rtx (mode); 796 797 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); 798 emit_insn (gen_add_losym (dest, tmp_reg, imm)); 799 return; 800 } 801 802 case SYMBOL_TINY_ABSOLUTE: 803 emit_insn (gen_rtx_SET (Pmode, dest, imm)); 804 return; 805 806 case SYMBOL_SMALL_GOT: 807 { 808 /* In ILP32, the mode of dest can be either SImode or DImode, 809 while the got entry is always of SImode size. The mode of 810 dest depends on how dest is used: if dest is assigned to a 811 pointer (e.g. in the memory), it has SImode; it may have 812 DImode if dest is dereferenced to access the memeory. 813 This is why we have to handle three different ldr_got_small 814 patterns here (two patterns for ILP32). */ 815 rtx tmp_reg = dest; 816 machine_mode mode = GET_MODE (dest); 817 818 if (can_create_pseudo_p ()) 819 tmp_reg = gen_reg_rtx (mode); 820 821 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); 822 if (mode == ptr_mode) 823 { 824 if (mode == DImode) 825 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm)); 826 else 827 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm)); 828 } 829 else 830 { 831 gcc_assert (mode == Pmode); 832 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm)); 833 } 834 835 return; 836 } 837 838 case SYMBOL_SMALL_TLSGD: 839 { 840 rtx_insn *insns; 841 rtx result = gen_rtx_REG (Pmode, R0_REGNUM); 842 843 start_sequence (); 844 aarch64_emit_call_insn (gen_tlsgd_small (result, imm)); 845 insns = get_insns (); 846 end_sequence (); 847 848 RTL_CONST_CALL_P (insns) = 1; 849 emit_libcall_block (insns, dest, result, imm); 850 return; 851 } 852 853 case SYMBOL_SMALL_TLSDESC: 854 { 855 machine_mode mode = GET_MODE (dest); 856 rtx x0 = gen_rtx_REG (mode, R0_REGNUM); 857 rtx tp; 858 859 gcc_assert (mode == Pmode || mode == ptr_mode); 860 861 /* In ILP32, the got entry is always of SImode size. Unlike 862 small GOT, the dest is fixed at reg 0. */ 863 if (TARGET_ILP32) 864 emit_insn (gen_tlsdesc_small_si (imm)); 865 else 866 emit_insn (gen_tlsdesc_small_di (imm)); 867 tp = aarch64_load_tp (NULL); 868 869 if (mode != Pmode) 870 tp = gen_lowpart (mode, tp); 871 872 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); 873 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 874 return; 875 } 876 877 case SYMBOL_SMALL_GOTTPREL: 878 { 879 /* In ILP32, the mode of dest can be either SImode or DImode, 880 while the got entry is always of SImode size. The mode of 881 dest depends on how dest is used: if dest is assigned to a 882 pointer (e.g. in the memory), it has SImode; it may have 883 DImode if dest is dereferenced to access the memeory. 884 This is why we have to handle three different tlsie_small 885 patterns here (two patterns for ILP32). */ 886 machine_mode mode = GET_MODE (dest); 887 rtx tmp_reg = gen_reg_rtx (mode); 888 rtx tp = aarch64_load_tp (NULL); 889 890 if (mode == ptr_mode) 891 { 892 if (mode == DImode) 893 emit_insn (gen_tlsie_small_di (tmp_reg, imm)); 894 else 895 { 896 emit_insn (gen_tlsie_small_si (tmp_reg, imm)); 897 tp = gen_lowpart (mode, tp); 898 } 899 } 900 else 901 { 902 gcc_assert (mode == Pmode); 903 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); 904 } 905 906 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); 907 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 908 return; 909 } 910 911 case SYMBOL_SMALL_TPREL: 912 { 913 rtx tp = aarch64_load_tp (NULL); 914 915 if (GET_MODE (dest) != Pmode) 916 tp = gen_lowpart (GET_MODE (dest), tp); 917 918 emit_insn (gen_tlsle_small (dest, tp, imm)); 919 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); 920 return; 921 } 922 923 case SYMBOL_TINY_GOT: 924 emit_insn (gen_ldr_got_tiny (dest, imm)); 925 return; 926 927 default: 928 gcc_unreachable (); 929 } 930} 931 932/* Emit a move from SRC to DEST. Assume that the move expanders can 933 handle all moves if !can_create_pseudo_p (). The distinction is 934 important because, unlike emit_move_insn, the move expanders know 935 how to force Pmode objects into the constant pool even when the 936 constant pool address is not itself legitimate. */ 937static rtx 938aarch64_emit_move (rtx dest, rtx src) 939{ 940 return (can_create_pseudo_p () 941 ? emit_move_insn (dest, src) 942 : emit_move_insn_1 (dest, src)); 943} 944 945/* Split a 128-bit move operation into two 64-bit move operations, 946 taking care to handle partial overlap of register to register 947 copies. Special cases are needed when moving between GP regs and 948 FP regs. SRC can be a register, constant or memory; DST a register 949 or memory. If either operand is memory it must not have any side 950 effects. */ 951void 952aarch64_split_128bit_move (rtx dst, rtx src) 953{ 954 rtx dst_lo, dst_hi; 955 rtx src_lo, src_hi; 956 957 machine_mode mode = GET_MODE (dst); 958 959 gcc_assert (mode == TImode || mode == TFmode); 960 gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); 961 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); 962 963 if (REG_P (dst) && REG_P (src)) 964 { 965 int src_regno = REGNO (src); 966 int dst_regno = REGNO (dst); 967 968 /* Handle FP <-> GP regs. */ 969 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) 970 { 971 src_lo = gen_lowpart (word_mode, src); 972 src_hi = gen_highpart (word_mode, src); 973 974 if (mode == TImode) 975 { 976 emit_insn (gen_aarch64_movtilow_di (dst, src_lo)); 977 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi)); 978 } 979 else 980 { 981 emit_insn (gen_aarch64_movtflow_di (dst, src_lo)); 982 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi)); 983 } 984 return; 985 } 986 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) 987 { 988 dst_lo = gen_lowpart (word_mode, dst); 989 dst_hi = gen_highpart (word_mode, dst); 990 991 if (mode == TImode) 992 { 993 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src)); 994 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src)); 995 } 996 else 997 { 998 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src)); 999 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src)); 1000 } 1001 return; 1002 } 1003 } 1004 1005 dst_lo = gen_lowpart (word_mode, dst); 1006 dst_hi = gen_highpart (word_mode, dst); 1007 src_lo = gen_lowpart (word_mode, src); 1008 src_hi = gen_highpart_mode (word_mode, mode, src); 1009 1010 /* At most one pairing may overlap. */ 1011 if (reg_overlap_mentioned_p (dst_lo, src_hi)) 1012 { 1013 aarch64_emit_move (dst_hi, src_hi); 1014 aarch64_emit_move (dst_lo, src_lo); 1015 } 1016 else 1017 { 1018 aarch64_emit_move (dst_lo, src_lo); 1019 aarch64_emit_move (dst_hi, src_hi); 1020 } 1021} 1022 1023bool 1024aarch64_split_128bit_move_p (rtx dst, rtx src) 1025{ 1026 return (! REG_P (src) 1027 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); 1028} 1029 1030/* Split a complex SIMD combine. */ 1031 1032void 1033aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) 1034{ 1035 machine_mode src_mode = GET_MODE (src1); 1036 machine_mode dst_mode = GET_MODE (dst); 1037 1038 gcc_assert (VECTOR_MODE_P (dst_mode)); 1039 1040 if (REG_P (dst) && REG_P (src1) && REG_P (src2)) 1041 { 1042 rtx (*gen) (rtx, rtx, rtx); 1043 1044 switch (src_mode) 1045 { 1046 case V8QImode: 1047 gen = gen_aarch64_simd_combinev8qi; 1048 break; 1049 case V4HImode: 1050 gen = gen_aarch64_simd_combinev4hi; 1051 break; 1052 case V2SImode: 1053 gen = gen_aarch64_simd_combinev2si; 1054 break; 1055 case V2SFmode: 1056 gen = gen_aarch64_simd_combinev2sf; 1057 break; 1058 case DImode: 1059 gen = gen_aarch64_simd_combinedi; 1060 break; 1061 case DFmode: 1062 gen = gen_aarch64_simd_combinedf; 1063 break; 1064 default: 1065 gcc_unreachable (); 1066 } 1067 1068 emit_insn (gen (dst, src1, src2)); 1069 return; 1070 } 1071} 1072 1073/* Split a complex SIMD move. */ 1074 1075void 1076aarch64_split_simd_move (rtx dst, rtx src) 1077{ 1078 machine_mode src_mode = GET_MODE (src); 1079 machine_mode dst_mode = GET_MODE (dst); 1080 1081 gcc_assert (VECTOR_MODE_P (dst_mode)); 1082 1083 if (REG_P (dst) && REG_P (src)) 1084 { 1085 rtx (*gen) (rtx, rtx); 1086 1087 gcc_assert (VECTOR_MODE_P (src_mode)); 1088 1089 switch (src_mode) 1090 { 1091 case V16QImode: 1092 gen = gen_aarch64_split_simd_movv16qi; 1093 break; 1094 case V8HImode: 1095 gen = gen_aarch64_split_simd_movv8hi; 1096 break; 1097 case V4SImode: 1098 gen = gen_aarch64_split_simd_movv4si; 1099 break; 1100 case V2DImode: 1101 gen = gen_aarch64_split_simd_movv2di; 1102 break; 1103 case V4SFmode: 1104 gen = gen_aarch64_split_simd_movv4sf; 1105 break; 1106 case V2DFmode: 1107 gen = gen_aarch64_split_simd_movv2df; 1108 break; 1109 default: 1110 gcc_unreachable (); 1111 } 1112 1113 emit_insn (gen (dst, src)); 1114 return; 1115 } 1116} 1117 1118bool 1119aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, 1120 machine_mode ymode, rtx y) 1121{ 1122 rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode); 1123 gcc_assert (r != NULL); 1124 return rtx_equal_p (x, r); 1125} 1126 1127 1128static rtx 1129aarch64_force_temporary (machine_mode mode, rtx x, rtx value) 1130{ 1131 if (can_create_pseudo_p ()) 1132 return force_reg (mode, value); 1133 else 1134 { 1135 x = aarch64_emit_move (x, value); 1136 return x; 1137 } 1138} 1139 1140 1141static rtx 1142aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) 1143{ 1144 if (!aarch64_plus_immediate (GEN_INT (offset), mode)) 1145 { 1146 rtx high; 1147 /* Load the full offset into a register. This 1148 might be improvable in the future. */ 1149 high = GEN_INT (offset); 1150 offset = 0; 1151 high = aarch64_force_temporary (mode, temp, high); 1152 reg = aarch64_force_temporary (mode, temp, 1153 gen_rtx_PLUS (mode, high, reg)); 1154 } 1155 return plus_constant (mode, reg, offset); 1156} 1157 1158static int 1159aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, 1160 machine_mode mode) 1161{ 1162 unsigned HOST_WIDE_INT mask; 1163 int i; 1164 bool first; 1165 unsigned HOST_WIDE_INT val; 1166 bool subtargets; 1167 rtx subtarget; 1168 int one_match, zero_match, first_not_ffff_match; 1169 int num_insns = 0; 1170 1171 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) 1172 { 1173 if (generate) 1174 emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); 1175 num_insns++; 1176 return num_insns; 1177 } 1178 1179 if (mode == SImode) 1180 { 1181 /* We know we can't do this in 1 insn, and we must be able to do it 1182 in two; so don't mess around looking for sequences that don't buy 1183 us anything. */ 1184 if (generate) 1185 { 1186 emit_insn (gen_rtx_SET (VOIDmode, dest, 1187 GEN_INT (INTVAL (imm) & 0xffff))); 1188 emit_insn (gen_insv_immsi (dest, GEN_INT (16), 1189 GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); 1190 } 1191 num_insns += 2; 1192 return num_insns; 1193 } 1194 1195 /* Remaining cases are all for DImode. */ 1196 1197 val = INTVAL (imm); 1198 subtargets = optimize && can_create_pseudo_p (); 1199 1200 one_match = 0; 1201 zero_match = 0; 1202 mask = 0xffff; 1203 first_not_ffff_match = -1; 1204 1205 for (i = 0; i < 64; i += 16, mask <<= 16) 1206 { 1207 if ((val & mask) == mask) 1208 one_match++; 1209 else 1210 { 1211 if (first_not_ffff_match < 0) 1212 first_not_ffff_match = i; 1213 if ((val & mask) == 0) 1214 zero_match++; 1215 } 1216 } 1217 1218 if (one_match == 2) 1219 { 1220 /* Set one of the quarters and then insert back into result. */ 1221 mask = 0xffffll << first_not_ffff_match; 1222 if (generate) 1223 { 1224 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); 1225 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), 1226 GEN_INT ((val >> first_not_ffff_match) 1227 & 0xffff))); 1228 } 1229 num_insns += 2; 1230 return num_insns; 1231 } 1232 1233 if (zero_match == 2) 1234 goto simple_sequence; 1235 1236 mask = 0x0ffff0000UL; 1237 for (i = 16; i < 64; i += 16, mask <<= 16) 1238 { 1239 HOST_WIDE_INT comp = mask & ~(mask - 1); 1240 1241 if (aarch64_uimm12_shift (val - (val & mask))) 1242 { 1243 if (generate) 1244 { 1245 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 1246 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1247 GEN_INT (val & mask))); 1248 emit_insn (gen_adddi3 (dest, subtarget, 1249 GEN_INT (val - (val & mask)))); 1250 } 1251 num_insns += 2; 1252 return num_insns; 1253 } 1254 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) 1255 { 1256 if (generate) 1257 { 1258 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 1259 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1260 GEN_INT ((val + comp) & mask))); 1261 emit_insn (gen_adddi3 (dest, subtarget, 1262 GEN_INT (val - ((val + comp) & mask)))); 1263 } 1264 num_insns += 2; 1265 return num_insns; 1266 } 1267 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) 1268 { 1269 if (generate) 1270 { 1271 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 1272 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1273 GEN_INT ((val - comp) | ~mask))); 1274 emit_insn (gen_adddi3 (dest, subtarget, 1275 GEN_INT (val - ((val - comp) | ~mask)))); 1276 } 1277 num_insns += 2; 1278 return num_insns; 1279 } 1280 else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) 1281 { 1282 if (generate) 1283 { 1284 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 1285 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1286 GEN_INT (val | ~mask))); 1287 emit_insn (gen_adddi3 (dest, subtarget, 1288 GEN_INT (val - (val | ~mask)))); 1289 } 1290 num_insns += 2; 1291 return num_insns; 1292 } 1293 } 1294 1295 /* See if we can do it by arithmetically combining two 1296 immediates. */ 1297 for (i = 0; i < AARCH64_NUM_BITMASKS; i++) 1298 { 1299 int j; 1300 mask = 0xffff; 1301 1302 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) 1303 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) 1304 { 1305 if (generate) 1306 { 1307 subtarget = subtargets ? gen_reg_rtx (DImode) : dest; 1308 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1309 GEN_INT (aarch64_bitmasks[i]))); 1310 emit_insn (gen_adddi3 (dest, subtarget, 1311 GEN_INT (val - aarch64_bitmasks[i]))); 1312 } 1313 num_insns += 2; 1314 return num_insns; 1315 } 1316 1317 for (j = 0; j < 64; j += 16, mask <<= 16) 1318 { 1319 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) 1320 { 1321 if (generate) 1322 { 1323 emit_insn (gen_rtx_SET (VOIDmode, dest, 1324 GEN_INT (aarch64_bitmasks[i]))); 1325 emit_insn (gen_insv_immdi (dest, GEN_INT (j), 1326 GEN_INT ((val >> j) & 0xffff))); 1327 } 1328 num_insns += 2; 1329 return num_insns; 1330 } 1331 } 1332 } 1333 1334 /* See if we can do it by logically combining two immediates. */ 1335 for (i = 0; i < AARCH64_NUM_BITMASKS; i++) 1336 { 1337 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) 1338 { 1339 int j; 1340 1341 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) 1342 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) 1343 { 1344 if (generate) 1345 { 1346 subtarget = subtargets ? gen_reg_rtx (mode) : dest; 1347 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1348 GEN_INT (aarch64_bitmasks[i]))); 1349 emit_insn (gen_iordi3 (dest, subtarget, 1350 GEN_INT (aarch64_bitmasks[j]))); 1351 } 1352 num_insns += 2; 1353 return num_insns; 1354 } 1355 } 1356 else if ((val & aarch64_bitmasks[i]) == val) 1357 { 1358 int j; 1359 1360 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) 1361 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) 1362 { 1363 if (generate) 1364 { 1365 subtarget = subtargets ? gen_reg_rtx (mode) : dest; 1366 emit_insn (gen_rtx_SET (VOIDmode, subtarget, 1367 GEN_INT (aarch64_bitmasks[j]))); 1368 emit_insn (gen_anddi3 (dest, subtarget, 1369 GEN_INT (aarch64_bitmasks[i]))); 1370 } 1371 num_insns += 2; 1372 return num_insns; 1373 } 1374 } 1375 } 1376 1377 if (one_match > zero_match) 1378 { 1379 /* Set either first three quarters or all but the third. */ 1380 mask = 0xffffll << (16 - first_not_ffff_match); 1381 if (generate) 1382 emit_insn (gen_rtx_SET (VOIDmode, dest, 1383 GEN_INT (val | mask | 0xffffffff00000000ull))); 1384 num_insns ++; 1385 1386 /* Now insert other two quarters. */ 1387 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); 1388 i < 64; i += 16, mask <<= 16) 1389 { 1390 if ((val & mask) != mask) 1391 { 1392 if (generate) 1393 emit_insn (gen_insv_immdi (dest, GEN_INT (i), 1394 GEN_INT ((val >> i) & 0xffff))); 1395 num_insns ++; 1396 } 1397 } 1398 return num_insns; 1399 } 1400 1401 simple_sequence: 1402 first = true; 1403 mask = 0xffff; 1404 for (i = 0; i < 64; i += 16, mask <<= 16) 1405 { 1406 if ((val & mask) != 0) 1407 { 1408 if (first) 1409 { 1410 if (generate) 1411 emit_insn (gen_rtx_SET (VOIDmode, dest, 1412 GEN_INT (val & mask))); 1413 num_insns ++; 1414 first = false; 1415 } 1416 else 1417 { 1418 if (generate) 1419 emit_insn (gen_insv_immdi (dest, GEN_INT (i), 1420 GEN_INT ((val >> i) & 0xffff))); 1421 num_insns ++; 1422 } 1423 } 1424 } 1425 1426 return num_insns; 1427} 1428 1429 1430void 1431aarch64_expand_mov_immediate (rtx dest, rtx imm) 1432{ 1433 machine_mode mode = GET_MODE (dest); 1434 1435 gcc_assert (mode == SImode || mode == DImode); 1436 1437 /* Check on what type of symbol it is. */ 1438 if (GET_CODE (imm) == SYMBOL_REF 1439 || GET_CODE (imm) == LABEL_REF 1440 || GET_CODE (imm) == CONST) 1441 { 1442 rtx mem, base, offset; 1443 enum aarch64_symbol_type sty; 1444 1445 /* If we have (const (plus symbol offset)), separate out the offset 1446 before we start classifying the symbol. */ 1447 split_const (imm, &base, &offset); 1448 1449 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR); 1450 switch (sty) 1451 { 1452 case SYMBOL_FORCE_TO_MEM: 1453 if (offset != const0_rtx 1454 && targetm.cannot_force_const_mem (mode, imm)) 1455 { 1456 gcc_assert (can_create_pseudo_p ()); 1457 base = aarch64_force_temporary (mode, dest, base); 1458 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); 1459 aarch64_emit_move (dest, base); 1460 return; 1461 } 1462 mem = force_const_mem (ptr_mode, imm); 1463 gcc_assert (mem); 1464 if (mode != ptr_mode) 1465 mem = gen_rtx_ZERO_EXTEND (mode, mem); 1466 emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); 1467 return; 1468 1469 case SYMBOL_SMALL_TLSGD: 1470 case SYMBOL_SMALL_TLSDESC: 1471 case SYMBOL_SMALL_GOTTPREL: 1472 case SYMBOL_SMALL_GOT: 1473 case SYMBOL_TINY_GOT: 1474 if (offset != const0_rtx) 1475 { 1476 gcc_assert(can_create_pseudo_p ()); 1477 base = aarch64_force_temporary (mode, dest, base); 1478 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); 1479 aarch64_emit_move (dest, base); 1480 return; 1481 } 1482 /* FALLTHRU */ 1483 1484 case SYMBOL_SMALL_TPREL: 1485 case SYMBOL_SMALL_ABSOLUTE: 1486 case SYMBOL_TINY_ABSOLUTE: 1487 aarch64_load_symref_appropriately (dest, imm, sty); 1488 return; 1489 1490 default: 1491 gcc_unreachable (); 1492 } 1493 } 1494 1495 if (!CONST_INT_P (imm)) 1496 { 1497 if (GET_CODE (imm) == HIGH) 1498 emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); 1499 else 1500 { 1501 rtx mem = force_const_mem (mode, imm); 1502 gcc_assert (mem); 1503 emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); 1504 } 1505 1506 return; 1507 } 1508 1509 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); 1510} 1511 1512static bool 1513aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, 1514 tree exp ATTRIBUTE_UNUSED) 1515{ 1516 /* Currently, always true. */ 1517 return true; 1518} 1519 1520/* Implement TARGET_PASS_BY_REFERENCE. */ 1521 1522static bool 1523aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, 1524 machine_mode mode, 1525 const_tree type, 1526 bool named ATTRIBUTE_UNUSED) 1527{ 1528 HOST_WIDE_INT size; 1529 machine_mode dummymode; 1530 int nregs; 1531 1532 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ 1533 size = (mode == BLKmode && type) 1534 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 1535 1536 /* Aggregates are passed by reference based on their size. */ 1537 if (type && AGGREGATE_TYPE_P (type)) 1538 { 1539 size = int_size_in_bytes (type); 1540 } 1541 1542 /* Variable sized arguments are always returned by reference. */ 1543 if (size < 0) 1544 return true; 1545 1546 /* Can this be a candidate to be passed in fp/simd register(s)? */ 1547 if (aarch64_vfp_is_call_or_return_candidate (mode, type, 1548 &dummymode, &nregs, 1549 NULL)) 1550 return false; 1551 1552 /* Arguments which are variable sized or larger than 2 registers are 1553 passed by reference unless they are a homogenous floating point 1554 aggregate. */ 1555 return size > 2 * UNITS_PER_WORD; 1556} 1557 1558/* Return TRUE if VALTYPE is padded to its least significant bits. */ 1559static bool 1560aarch64_return_in_msb (const_tree valtype) 1561{ 1562 machine_mode dummy_mode; 1563 int dummy_int; 1564 1565 /* Never happens in little-endian mode. */ 1566 if (!BYTES_BIG_ENDIAN) 1567 return false; 1568 1569 /* Only composite types smaller than or equal to 16 bytes can 1570 be potentially returned in registers. */ 1571 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype)) 1572 || int_size_in_bytes (valtype) <= 0 1573 || int_size_in_bytes (valtype) > 16) 1574 return false; 1575 1576 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate) 1577 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite 1578 is always passed/returned in the least significant bits of fp/simd 1579 register(s). */ 1580 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype, 1581 &dummy_mode, &dummy_int, NULL)) 1582 return false; 1583 1584 return true; 1585} 1586 1587/* Implement TARGET_FUNCTION_VALUE. 1588 Define how to find the value returned by a function. */ 1589 1590static rtx 1591aarch64_function_value (const_tree type, const_tree func, 1592 bool outgoing ATTRIBUTE_UNUSED) 1593{ 1594 machine_mode mode; 1595 int unsignedp; 1596 int count; 1597 machine_mode ag_mode; 1598 1599 mode = TYPE_MODE (type); 1600 if (INTEGRAL_TYPE_P (type)) 1601 mode = promote_function_mode (type, mode, &unsignedp, func, 1); 1602 1603 if (aarch64_return_in_msb (type)) 1604 { 1605 HOST_WIDE_INT size = int_size_in_bytes (type); 1606 1607 if (size % UNITS_PER_WORD != 0) 1608 { 1609 size += UNITS_PER_WORD - size % UNITS_PER_WORD; 1610 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 1611 } 1612 } 1613 1614 if (aarch64_vfp_is_call_or_return_candidate (mode, type, 1615 &ag_mode, &count, NULL)) 1616 { 1617 if (!aarch64_composite_type_p (type, mode)) 1618 { 1619 gcc_assert (count == 1 && mode == ag_mode); 1620 return gen_rtx_REG (mode, V0_REGNUM); 1621 } 1622 else 1623 { 1624 int i; 1625 rtx par; 1626 1627 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); 1628 for (i = 0; i < count; i++) 1629 { 1630 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); 1631 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 1632 GEN_INT (i * GET_MODE_SIZE (ag_mode))); 1633 XVECEXP (par, 0, i) = tmp; 1634 } 1635 return par; 1636 } 1637 } 1638 else 1639 return gen_rtx_REG (mode, R0_REGNUM); 1640} 1641 1642/* Implements TARGET_FUNCTION_VALUE_REGNO_P. 1643 Return true if REGNO is the number of a hard register in which the values 1644 of called function may come back. */ 1645 1646static bool 1647aarch64_function_value_regno_p (const unsigned int regno) 1648{ 1649 /* Maximum of 16 bytes can be returned in the general registers. Examples 1650 of 16-byte return values are: 128-bit integers and 16-byte small 1651 structures (excluding homogeneous floating-point aggregates). */ 1652 if (regno == R0_REGNUM || regno == R1_REGNUM) 1653 return true; 1654 1655 /* Up to four fp/simd registers can return a function value, e.g. a 1656 homogeneous floating-point aggregate having four members. */ 1657 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS) 1658 return TARGET_FLOAT; 1659 1660 return false; 1661} 1662 1663/* Implement TARGET_RETURN_IN_MEMORY. 1664 1665 If the type T of the result of a function is such that 1666 void func (T arg) 1667 would require that arg be passed as a value in a register (or set of 1668 registers) according to the parameter passing rules, then the result 1669 is returned in the same registers as would be used for such an 1670 argument. */ 1671 1672static bool 1673aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) 1674{ 1675 HOST_WIDE_INT size; 1676 machine_mode ag_mode; 1677 int count; 1678 1679 if (!AGGREGATE_TYPE_P (type) 1680 && TREE_CODE (type) != COMPLEX_TYPE 1681 && TREE_CODE (type) != VECTOR_TYPE) 1682 /* Simple scalar types always returned in registers. */ 1683 return false; 1684 1685 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), 1686 type, 1687 &ag_mode, 1688 &count, 1689 NULL)) 1690 return false; 1691 1692 /* Types larger than 2 registers returned in memory. */ 1693 size = int_size_in_bytes (type); 1694 return (size < 0 || size > 2 * UNITS_PER_WORD); 1695} 1696 1697static bool 1698aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode, 1699 const_tree type, int *nregs) 1700{ 1701 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1702 return aarch64_vfp_is_call_or_return_candidate (mode, 1703 type, 1704 &pcum->aapcs_vfp_rmode, 1705 nregs, 1706 NULL); 1707} 1708 1709/* Given MODE and TYPE of a function argument, return the alignment in 1710 bits. The idea is to suppress any stronger alignment requested by 1711 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1). 1712 This is a helper function for local use only. */ 1713 1714static unsigned int 1715aarch64_function_arg_alignment (machine_mode mode, const_tree type) 1716{ 1717 unsigned int alignment; 1718 1719 if (type) 1720 { 1721 if (!integer_zerop (TYPE_SIZE (type))) 1722 { 1723 if (TYPE_MODE (type) == mode) 1724 alignment = TYPE_ALIGN (type); 1725 else 1726 alignment = GET_MODE_ALIGNMENT (mode); 1727 } 1728 else 1729 alignment = 0; 1730 } 1731 else 1732 alignment = GET_MODE_ALIGNMENT (mode); 1733 1734 return alignment; 1735} 1736 1737/* Layout a function argument according to the AAPCS64 rules. The rule 1738 numbers refer to the rule numbers in the AAPCS64. */ 1739 1740static void 1741aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, 1742 const_tree type, 1743 bool named ATTRIBUTE_UNUSED) 1744{ 1745 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1746 int ncrn, nvrn, nregs; 1747 bool allocate_ncrn, allocate_nvrn; 1748 HOST_WIDE_INT size; 1749 1750 /* We need to do this once per argument. */ 1751 if (pcum->aapcs_arg_processed) 1752 return; 1753 1754 pcum->aapcs_arg_processed = true; 1755 1756 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ 1757 size 1758 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), 1759 UNITS_PER_WORD); 1760 1761 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); 1762 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, 1763 mode, 1764 type, 1765 &nregs); 1766 1767 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable. 1768 The following code thus handles passing by SIMD/FP registers first. */ 1769 1770 nvrn = pcum->aapcs_nvrn; 1771 1772 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA) 1773 and homogenous short-vector aggregates (HVA). */ 1774 if (allocate_nvrn) 1775 { 1776 if (nvrn + nregs <= NUM_FP_ARG_REGS) 1777 { 1778 pcum->aapcs_nextnvrn = nvrn + nregs; 1779 if (!aarch64_composite_type_p (type, mode)) 1780 { 1781 gcc_assert (nregs == 1); 1782 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); 1783 } 1784 else 1785 { 1786 rtx par; 1787 int i; 1788 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); 1789 for (i = 0; i < nregs; i++) 1790 { 1791 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, 1792 V0_REGNUM + nvrn + i); 1793 tmp = gen_rtx_EXPR_LIST 1794 (VOIDmode, tmp, 1795 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); 1796 XVECEXP (par, 0, i) = tmp; 1797 } 1798 pcum->aapcs_reg = par; 1799 } 1800 return; 1801 } 1802 else 1803 { 1804 /* C.3 NSRN is set to 8. */ 1805 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS; 1806 goto on_stack; 1807 } 1808 } 1809 1810 ncrn = pcum->aapcs_ncrn; 1811 nregs = size / UNITS_PER_WORD; 1812 1813 /* C6 - C9. though the sign and zero extension semantics are 1814 handled elsewhere. This is the case where the argument fits 1815 entirely general registers. */ 1816 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) 1817 { 1818 unsigned int alignment = aarch64_function_arg_alignment (mode, type); 1819 1820 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); 1821 1822 /* C.8 if the argument has an alignment of 16 then the NGRN is 1823 rounded up to the next even number. */ 1824 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2) 1825 { 1826 ++ncrn; 1827 gcc_assert (ncrn + nregs <= NUM_ARG_REGS); 1828 } 1829 /* NREGS can be 0 when e.g. an empty structure is to be passed. 1830 A reg is still generated for it, but the caller should be smart 1831 enough not to use it. */ 1832 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT) 1833 { 1834 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); 1835 } 1836 else 1837 { 1838 rtx par; 1839 int i; 1840 1841 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); 1842 for (i = 0; i < nregs; i++) 1843 { 1844 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i); 1845 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, 1846 GEN_INT (i * UNITS_PER_WORD)); 1847 XVECEXP (par, 0, i) = tmp; 1848 } 1849 pcum->aapcs_reg = par; 1850 } 1851 1852 pcum->aapcs_nextncrn = ncrn + nregs; 1853 return; 1854 } 1855 1856 /* C.11 */ 1857 pcum->aapcs_nextncrn = NUM_ARG_REGS; 1858 1859 /* The argument is passed on stack; record the needed number of words for 1860 this argument and align the total size if necessary. */ 1861on_stack: 1862 pcum->aapcs_stack_words = size / UNITS_PER_WORD; 1863 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) 1864 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, 1865 16 / UNITS_PER_WORD); 1866 return; 1867} 1868 1869/* Implement TARGET_FUNCTION_ARG. */ 1870 1871static rtx 1872aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode, 1873 const_tree type, bool named) 1874{ 1875 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1876 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); 1877 1878 if (mode == VOIDmode) 1879 return NULL_RTX; 1880 1881 aarch64_layout_arg (pcum_v, mode, type, named); 1882 return pcum->aapcs_reg; 1883} 1884 1885void 1886aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, 1887 const_tree fntype ATTRIBUTE_UNUSED, 1888 rtx libname ATTRIBUTE_UNUSED, 1889 const_tree fndecl ATTRIBUTE_UNUSED, 1890 unsigned n_named ATTRIBUTE_UNUSED) 1891{ 1892 pcum->aapcs_ncrn = 0; 1893 pcum->aapcs_nvrn = 0; 1894 pcum->aapcs_nextncrn = 0; 1895 pcum->aapcs_nextnvrn = 0; 1896 pcum->pcs_variant = ARM_PCS_AAPCS64; 1897 pcum->aapcs_reg = NULL_RTX; 1898 pcum->aapcs_arg_processed = false; 1899 pcum->aapcs_stack_words = 0; 1900 pcum->aapcs_stack_size = 0; 1901 1902 return; 1903} 1904 1905static void 1906aarch64_function_arg_advance (cumulative_args_t pcum_v, 1907 machine_mode mode, 1908 const_tree type, 1909 bool named) 1910{ 1911 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); 1912 if (pcum->pcs_variant == ARM_PCS_AAPCS64) 1913 { 1914 aarch64_layout_arg (pcum_v, mode, type, named); 1915 gcc_assert ((pcum->aapcs_reg != NULL_RTX) 1916 != (pcum->aapcs_stack_words != 0)); 1917 pcum->aapcs_arg_processed = false; 1918 pcum->aapcs_ncrn = pcum->aapcs_nextncrn; 1919 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; 1920 pcum->aapcs_stack_size += pcum->aapcs_stack_words; 1921 pcum->aapcs_stack_words = 0; 1922 pcum->aapcs_reg = NULL_RTX; 1923 } 1924} 1925 1926bool 1927aarch64_function_arg_regno_p (unsigned regno) 1928{ 1929 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) 1930 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS)); 1931} 1932 1933/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least 1934 PARM_BOUNDARY bits of alignment, but will be given anything up 1935 to STACK_BOUNDARY bits if the type requires it. This makes sure 1936 that both before and after the layout of each argument, the Next 1937 Stacked Argument Address (NSAA) will have a minimum alignment of 1938 8 bytes. */ 1939 1940static unsigned int 1941aarch64_function_arg_boundary (machine_mode mode, const_tree type) 1942{ 1943 unsigned int alignment = aarch64_function_arg_alignment (mode, type); 1944 1945 if (alignment < PARM_BOUNDARY) 1946 alignment = PARM_BOUNDARY; 1947 if (alignment > STACK_BOUNDARY) 1948 alignment = STACK_BOUNDARY; 1949 return alignment; 1950} 1951 1952/* For use by FUNCTION_ARG_PADDING (MODE, TYPE). 1953 1954 Return true if an argument passed on the stack should be padded upwards, 1955 i.e. if the least-significant byte of the stack slot has useful data. 1956 1957 Small aggregate types are placed in the lowest memory address. 1958 1959 The related parameter passing rules are B.4, C.3, C.5 and C.14. */ 1960 1961bool 1962aarch64_pad_arg_upward (machine_mode mode, const_tree type) 1963{ 1964 /* On little-endian targets, the least significant byte of every stack 1965 argument is passed at the lowest byte address of the stack slot. */ 1966 if (!BYTES_BIG_ENDIAN) 1967 return true; 1968 1969 /* Otherwise, integral, floating-point and pointer types are padded downward: 1970 the least significant byte of a stack argument is passed at the highest 1971 byte address of the stack slot. */ 1972 if (type 1973 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type) 1974 || POINTER_TYPE_P (type)) 1975 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode))) 1976 return false; 1977 1978 /* Everything else padded upward, i.e. data in first byte of stack slot. */ 1979 return true; 1980} 1981 1982/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). 1983 1984 It specifies padding for the last (may also be the only) 1985 element of a block move between registers and memory. If 1986 assuming the block is in the memory, padding upward means that 1987 the last element is padded after its highest significant byte, 1988 while in downward padding, the last element is padded at the 1989 its least significant byte side. 1990 1991 Small aggregates and small complex types are always padded 1992 upwards. 1993 1994 We don't need to worry about homogeneous floating-point or 1995 short-vector aggregates; their move is not affected by the 1996 padding direction determined here. Regardless of endianness, 1997 each element of such an aggregate is put in the least 1998 significant bits of a fp/simd register. 1999 2000 Return !BYTES_BIG_ENDIAN if the least significant byte of the 2001 register has useful data, and return the opposite if the most 2002 significant byte does. */ 2003 2004bool 2005aarch64_pad_reg_upward (machine_mode mode, const_tree type, 2006 bool first ATTRIBUTE_UNUSED) 2007{ 2008 2009 /* Small composite types are always padded upward. */ 2010 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) 2011 { 2012 HOST_WIDE_INT size = (type ? int_size_in_bytes (type) 2013 : GET_MODE_SIZE (mode)); 2014 if (size < 2 * UNITS_PER_WORD) 2015 return true; 2016 } 2017 2018 /* Otherwise, use the default padding. */ 2019 return !BYTES_BIG_ENDIAN; 2020} 2021 2022static machine_mode 2023aarch64_libgcc_cmp_return_mode (void) 2024{ 2025 return SImode; 2026} 2027 2028static bool 2029aarch64_frame_pointer_required (void) 2030{ 2031 /* In aarch64_override_options_after_change 2032 flag_omit_leaf_frame_pointer turns off the frame pointer by 2033 default. Turn it back on now if we've not got a leaf 2034 function. */ 2035 if (flag_omit_leaf_frame_pointer 2036 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))) 2037 return true; 2038 2039 return false; 2040} 2041 2042/* Mark the registers that need to be saved by the callee and calculate 2043 the size of the callee-saved registers area and frame record (both FP 2044 and LR may be omitted). */ 2045static void 2046aarch64_layout_frame (void) 2047{ 2048 HOST_WIDE_INT offset = 0; 2049 int regno; 2050 2051 if (reload_completed && cfun->machine->frame.laid_out) 2052 return; 2053 2054#define SLOT_NOT_REQUIRED (-2) 2055#define SLOT_REQUIRED (-1) 2056 2057 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER; 2058 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER; 2059 2060 /* First mark all the registers that really need to be saved... */ 2061 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) 2062 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; 2063 2064 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 2065 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; 2066 2067 /* ... that includes the eh data registers (if needed)... */ 2068 if (crtl->calls_eh_return) 2069 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) 2070 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] 2071 = SLOT_REQUIRED; 2072 2073 /* ... and any callee saved register that dataflow says is live. */ 2074 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) 2075 if (df_regs_ever_live_p (regno) 2076 && (regno == R30_REGNUM 2077 || !call_used_regs[regno])) 2078 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; 2079 2080 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 2081 if (df_regs_ever_live_p (regno) 2082 && !call_used_regs[regno]) 2083 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; 2084 2085 if (frame_pointer_needed) 2086 { 2087 /* FP and LR are placed in the linkage record. */ 2088 cfun->machine->frame.reg_offset[R29_REGNUM] = 0; 2089 cfun->machine->frame.wb_candidate1 = R29_REGNUM; 2090 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; 2091 cfun->machine->frame.wb_candidate2 = R30_REGNUM; 2092 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; 2093 offset += 2 * UNITS_PER_WORD; 2094 } 2095 2096 /* Now assign stack slots for them. */ 2097 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) 2098 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) 2099 { 2100 cfun->machine->frame.reg_offset[regno] = offset; 2101 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) 2102 cfun->machine->frame.wb_candidate1 = regno; 2103 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER) 2104 cfun->machine->frame.wb_candidate2 = regno; 2105 offset += UNITS_PER_WORD; 2106 } 2107 2108 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) 2109 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) 2110 { 2111 cfun->machine->frame.reg_offset[regno] = offset; 2112 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) 2113 cfun->machine->frame.wb_candidate1 = regno; 2114 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER 2115 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) 2116 cfun->machine->frame.wb_candidate2 = regno; 2117 offset += UNITS_PER_WORD; 2118 } 2119 2120 cfun->machine->frame.padding0 = 2121 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); 2122 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); 2123 2124 cfun->machine->frame.saved_regs_size = offset; 2125 2126 cfun->machine->frame.hard_fp_offset 2127 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size 2128 + get_frame_size () 2129 + cfun->machine->frame.saved_regs_size, 2130 STACK_BOUNDARY / BITS_PER_UNIT); 2131 2132 cfun->machine->frame.frame_size 2133 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset 2134 + crtl->outgoing_args_size, 2135 STACK_BOUNDARY / BITS_PER_UNIT); 2136 2137 cfun->machine->frame.laid_out = true; 2138} 2139 2140static bool 2141aarch64_register_saved_on_entry (int regno) 2142{ 2143 return cfun->machine->frame.reg_offset[regno] >= 0; 2144} 2145 2146static unsigned 2147aarch64_next_callee_save (unsigned regno, unsigned limit) 2148{ 2149 while (regno <= limit && !aarch64_register_saved_on_entry (regno)) 2150 regno ++; 2151 return regno; 2152} 2153 2154static void 2155aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, 2156 HOST_WIDE_INT adjustment) 2157 { 2158 rtx base_rtx = stack_pointer_rtx; 2159 rtx insn, reg, mem; 2160 2161 reg = gen_rtx_REG (mode, regno); 2162 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, 2163 plus_constant (Pmode, base_rtx, -adjustment)); 2164 mem = gen_rtx_MEM (mode, mem); 2165 2166 insn = emit_move_insn (mem, reg); 2167 RTX_FRAME_RELATED_P (insn) = 1; 2168} 2169 2170static rtx 2171aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, 2172 HOST_WIDE_INT adjustment) 2173{ 2174 switch (mode) 2175 { 2176 case DImode: 2177 return gen_storewb_pairdi_di (base, base, reg, reg2, 2178 GEN_INT (-adjustment), 2179 GEN_INT (UNITS_PER_WORD - adjustment)); 2180 case DFmode: 2181 return gen_storewb_pairdf_di (base, base, reg, reg2, 2182 GEN_INT (-adjustment), 2183 GEN_INT (UNITS_PER_WORD - adjustment)); 2184 default: 2185 gcc_unreachable (); 2186 } 2187} 2188 2189static void 2190aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1, 2191 unsigned regno2, HOST_WIDE_INT adjustment) 2192{ 2193 rtx_insn *insn; 2194 rtx reg1 = gen_rtx_REG (mode, regno1); 2195 rtx reg2 = gen_rtx_REG (mode, regno2); 2196 2197 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1, 2198 reg2, adjustment)); 2199 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; 2200 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 2201 RTX_FRAME_RELATED_P (insn) = 1; 2202} 2203 2204static rtx 2205aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, 2206 HOST_WIDE_INT adjustment) 2207{ 2208 switch (mode) 2209 { 2210 case DImode: 2211 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), 2212 GEN_INT (UNITS_PER_WORD)); 2213 case DFmode: 2214 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), 2215 GEN_INT (UNITS_PER_WORD)); 2216 default: 2217 gcc_unreachable (); 2218 } 2219} 2220 2221static rtx 2222aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, 2223 rtx reg2) 2224{ 2225 switch (mode) 2226 { 2227 case DImode: 2228 return gen_store_pairdi (mem1, reg1, mem2, reg2); 2229 2230 case DFmode: 2231 return gen_store_pairdf (mem1, reg1, mem2, reg2); 2232 2233 default: 2234 gcc_unreachable (); 2235 } 2236} 2237 2238static rtx 2239aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, 2240 rtx mem2) 2241{ 2242 switch (mode) 2243 { 2244 case DImode: 2245 return gen_load_pairdi (reg1, mem1, reg2, mem2); 2246 2247 case DFmode: 2248 return gen_load_pairdf (reg1, mem1, reg2, mem2); 2249 2250 default: 2251 gcc_unreachable (); 2252 } 2253} 2254 2255 2256static void 2257aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, 2258 unsigned start, unsigned limit, bool skip_wb) 2259{ 2260 rtx_insn *insn; 2261 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed 2262 ? gen_frame_mem : gen_rtx_MEM); 2263 unsigned regno; 2264 unsigned regno2; 2265 2266 for (regno = aarch64_next_callee_save (start, limit); 2267 regno <= limit; 2268 regno = aarch64_next_callee_save (regno + 1, limit)) 2269 { 2270 rtx reg, mem; 2271 HOST_WIDE_INT offset; 2272 2273 if (skip_wb 2274 && (regno == cfun->machine->frame.wb_candidate1 2275 || regno == cfun->machine->frame.wb_candidate2)) 2276 continue; 2277 2278 reg = gen_rtx_REG (mode, regno); 2279 offset = start_offset + cfun->machine->frame.reg_offset[regno]; 2280 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, 2281 offset)); 2282 2283 regno2 = aarch64_next_callee_save (regno + 1, limit); 2284 2285 if (regno2 <= limit 2286 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) 2287 == cfun->machine->frame.reg_offset[regno2])) 2288 2289 { 2290 rtx reg2 = gen_rtx_REG (mode, regno2); 2291 rtx mem2; 2292 2293 offset = start_offset + cfun->machine->frame.reg_offset[regno2]; 2294 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, 2295 offset)); 2296 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, 2297 reg2)); 2298 2299 /* The first part of a frame-related parallel insn is 2300 always assumed to be relevant to the frame 2301 calculations; subsequent parts, are only 2302 frame-related if explicitly marked. */ 2303 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; 2304 regno = regno2; 2305 } 2306 else 2307 insn = emit_move_insn (mem, reg); 2308 2309 RTX_FRAME_RELATED_P (insn) = 1; 2310 } 2311} 2312 2313static void 2314aarch64_restore_callee_saves (machine_mode mode, 2315 HOST_WIDE_INT start_offset, unsigned start, 2316 unsigned limit, bool skip_wb, rtx *cfi_ops) 2317{ 2318 rtx base_rtx = stack_pointer_rtx; 2319 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed 2320 ? gen_frame_mem : gen_rtx_MEM); 2321 unsigned regno; 2322 unsigned regno2; 2323 HOST_WIDE_INT offset; 2324 2325 for (regno = aarch64_next_callee_save (start, limit); 2326 regno <= limit; 2327 regno = aarch64_next_callee_save (regno + 1, limit)) 2328 { 2329 rtx reg, mem; 2330 2331 if (skip_wb 2332 && (regno == cfun->machine->frame.wb_candidate1 2333 || regno == cfun->machine->frame.wb_candidate2)) 2334 continue; 2335 2336 reg = gen_rtx_REG (mode, regno); 2337 offset = start_offset + cfun->machine->frame.reg_offset[regno]; 2338 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); 2339 2340 regno2 = aarch64_next_callee_save (regno + 1, limit); 2341 2342 if (regno2 <= limit 2343 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) 2344 == cfun->machine->frame.reg_offset[regno2])) 2345 { 2346 rtx reg2 = gen_rtx_REG (mode, regno2); 2347 rtx mem2; 2348 2349 offset = start_offset + cfun->machine->frame.reg_offset[regno2]; 2350 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); 2351 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); 2352 2353 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); 2354 regno = regno2; 2355 } 2356 else 2357 emit_move_insn (reg, mem); 2358 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); 2359 } 2360} 2361 2362/* AArch64 stack frames generated by this compiler look like: 2363 2364 +-------------------------------+ 2365 | | 2366 | incoming stack arguments | 2367 | | 2368 +-------------------------------+ 2369 | | <-- incoming stack pointer (aligned) 2370 | callee-allocated save area | 2371 | for register varargs | 2372 | | 2373 +-------------------------------+ 2374 | local variables | <-- frame_pointer_rtx 2375 | | 2376 +-------------------------------+ 2377 | padding0 | \ 2378 +-------------------------------+ | 2379 | callee-saved registers | | frame.saved_regs_size 2380 +-------------------------------+ | 2381 | LR' | | 2382 +-------------------------------+ | 2383 | FP' | / <- hard_frame_pointer_rtx (aligned) 2384 +-------------------------------+ 2385 | dynamic allocation | 2386 +-------------------------------+ 2387 | padding | 2388 +-------------------------------+ 2389 | outgoing stack arguments | <-- arg_pointer 2390 | | 2391 +-------------------------------+ 2392 | | <-- stack_pointer_rtx (aligned) 2393 2394 Dynamic stack allocations via alloca() decrease stack_pointer_rtx 2395 but leave frame_pointer_rtx and hard_frame_pointer_rtx 2396 unchanged. */ 2397 2398/* Generate the prologue instructions for entry into a function. 2399 Establish the stack frame by decreasing the stack pointer with a 2400 properly calculated size and, if necessary, create a frame record 2401 filled with the values of LR and previous frame pointer. The 2402 current FP is also set up if it is in use. */ 2403 2404void 2405aarch64_expand_prologue (void) 2406{ 2407 /* sub sp, sp, #<frame_size> 2408 stp {fp, lr}, [sp, #<frame_size> - 16] 2409 add fp, sp, #<frame_size> - hardfp_offset 2410 stp {cs_reg}, [fp, #-16] etc. 2411 2412 sub sp, sp, <final_adjustment_if_any> 2413 */ 2414 HOST_WIDE_INT frame_size, offset; 2415 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */ 2416 HOST_WIDE_INT hard_fp_offset; 2417 rtx_insn *insn; 2418 2419 aarch64_layout_frame (); 2420 2421 offset = frame_size = cfun->machine->frame.frame_size; 2422 hard_fp_offset = cfun->machine->frame.hard_fp_offset; 2423 fp_offset = frame_size - hard_fp_offset; 2424 2425 if (flag_stack_usage_info) 2426 current_function_static_stack_size = frame_size; 2427 2428 /* Store pairs and load pairs have a range only -512 to 504. */ 2429 if (offset >= 512) 2430 { 2431 /* When the frame has a large size, an initial decrease is done on 2432 the stack pointer to jump over the callee-allocated save area for 2433 register varargs, the local variable area and/or the callee-saved 2434 register area. This will allow the pre-index write-back 2435 store pair instructions to be used for setting up the stack frame 2436 efficiently. */ 2437 offset = hard_fp_offset; 2438 if (offset >= 512) 2439 offset = cfun->machine->frame.saved_regs_size; 2440 2441 frame_size -= (offset + crtl->outgoing_args_size); 2442 fp_offset = 0; 2443 2444 if (frame_size >= 0x1000000) 2445 { 2446 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); 2447 emit_move_insn (op0, GEN_INT (-frame_size)); 2448 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); 2449 2450 add_reg_note (insn, REG_CFA_ADJUST_CFA, 2451 gen_rtx_SET (VOIDmode, stack_pointer_rtx, 2452 plus_constant (Pmode, stack_pointer_rtx, 2453 -frame_size))); 2454 RTX_FRAME_RELATED_P (insn) = 1; 2455 } 2456 else if (frame_size > 0) 2457 { 2458 int hi_ofs = frame_size & 0xfff000; 2459 int lo_ofs = frame_size & 0x000fff; 2460 2461 if (hi_ofs) 2462 { 2463 insn = emit_insn (gen_add2_insn 2464 (stack_pointer_rtx, GEN_INT (-hi_ofs))); 2465 RTX_FRAME_RELATED_P (insn) = 1; 2466 } 2467 if (lo_ofs) 2468 { 2469 insn = emit_insn (gen_add2_insn 2470 (stack_pointer_rtx, GEN_INT (-lo_ofs))); 2471 RTX_FRAME_RELATED_P (insn) = 1; 2472 } 2473 } 2474 } 2475 else 2476 frame_size = -1; 2477 2478 if (offset > 0) 2479 { 2480 bool skip_wb = false; 2481 2482 if (frame_pointer_needed) 2483 { 2484 skip_wb = true; 2485 2486 if (fp_offset) 2487 { 2488 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 2489 GEN_INT (-offset))); 2490 RTX_FRAME_RELATED_P (insn) = 1; 2491 2492 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM, 2493 R30_REGNUM, false); 2494 } 2495 else 2496 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset); 2497 2498 /* Set up frame pointer to point to the location of the 2499 previous frame pointer on the stack. */ 2500 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, 2501 stack_pointer_rtx, 2502 GEN_INT (fp_offset))); 2503 RTX_FRAME_RELATED_P (insn) = 1; 2504 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); 2505 } 2506 else 2507 { 2508 unsigned reg1 = cfun->machine->frame.wb_candidate1; 2509 unsigned reg2 = cfun->machine->frame.wb_candidate2; 2510 2511 if (fp_offset 2512 || reg1 == FIRST_PSEUDO_REGISTER 2513 || (reg2 == FIRST_PSEUDO_REGISTER 2514 && offset >= 256)) 2515 { 2516 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 2517 GEN_INT (-offset))); 2518 RTX_FRAME_RELATED_P (insn) = 1; 2519 } 2520 else 2521 { 2522 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; 2523 2524 skip_wb = true; 2525 2526 if (reg2 == FIRST_PSEUDO_REGISTER) 2527 aarch64_pushwb_single_reg (mode1, reg1, offset); 2528 else 2529 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset); 2530 } 2531 } 2532 2533 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, 2534 skip_wb); 2535 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, 2536 skip_wb); 2537 } 2538 2539 /* when offset >= 512, 2540 sub sp, sp, #<outgoing_args_size> */ 2541 if (frame_size > -1) 2542 { 2543 if (crtl->outgoing_args_size > 0) 2544 { 2545 insn = emit_insn (gen_add2_insn 2546 (stack_pointer_rtx, 2547 GEN_INT (- crtl->outgoing_args_size))); 2548 RTX_FRAME_RELATED_P (insn) = 1; 2549 } 2550 } 2551} 2552 2553/* Return TRUE if we can use a simple_return insn. 2554 2555 This function checks whether the callee saved stack is empty, which 2556 means no restore actions are need. The pro_and_epilogue will use 2557 this to check whether shrink-wrapping opt is feasible. */ 2558 2559bool 2560aarch64_use_return_insn_p (void) 2561{ 2562 if (!reload_completed) 2563 return false; 2564 2565 if (crtl->profile) 2566 return false; 2567 2568 aarch64_layout_frame (); 2569 2570 return cfun->machine->frame.frame_size == 0; 2571} 2572 2573/* Generate the epilogue instructions for returning from a function. */ 2574void 2575aarch64_expand_epilogue (bool for_sibcall) 2576{ 2577 HOST_WIDE_INT frame_size, offset; 2578 HOST_WIDE_INT fp_offset; 2579 HOST_WIDE_INT hard_fp_offset; 2580 rtx_insn *insn; 2581 /* We need to add memory barrier to prevent read from deallocated stack. */ 2582 bool need_barrier_p = (get_frame_size () != 0 2583 || cfun->machine->frame.saved_varargs_size); 2584 2585 aarch64_layout_frame (); 2586 2587 offset = frame_size = cfun->machine->frame.frame_size; 2588 hard_fp_offset = cfun->machine->frame.hard_fp_offset; 2589 fp_offset = frame_size - hard_fp_offset; 2590 2591 /* Store pairs and load pairs have a range only -512 to 504. */ 2592 if (offset >= 512) 2593 { 2594 offset = hard_fp_offset; 2595 if (offset >= 512) 2596 offset = cfun->machine->frame.saved_regs_size; 2597 2598 frame_size -= (offset + crtl->outgoing_args_size); 2599 fp_offset = 0; 2600 if (!frame_pointer_needed && crtl->outgoing_args_size > 0) 2601 { 2602 insn = emit_insn (gen_add2_insn 2603 (stack_pointer_rtx, 2604 GEN_INT (crtl->outgoing_args_size))); 2605 RTX_FRAME_RELATED_P (insn) = 1; 2606 } 2607 } 2608 else 2609 frame_size = -1; 2610 2611 /* If there were outgoing arguments or we've done dynamic stack 2612 allocation, then restore the stack pointer from the frame 2613 pointer. This is at most one insn and more efficient than using 2614 GCC's internal mechanism. */ 2615 if (frame_pointer_needed 2616 && (crtl->outgoing_args_size || cfun->calls_alloca)) 2617 { 2618 if (cfun->calls_alloca) 2619 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); 2620 2621 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, 2622 hard_frame_pointer_rtx, 2623 GEN_INT (0))); 2624 offset = offset - fp_offset; 2625 } 2626 2627 if (offset > 0) 2628 { 2629 unsigned reg1 = cfun->machine->frame.wb_candidate1; 2630 unsigned reg2 = cfun->machine->frame.wb_candidate2; 2631 bool skip_wb = true; 2632 rtx cfi_ops = NULL; 2633 2634 if (frame_pointer_needed) 2635 fp_offset = 0; 2636 else if (fp_offset 2637 || reg1 == FIRST_PSEUDO_REGISTER 2638 || (reg2 == FIRST_PSEUDO_REGISTER 2639 && offset >= 256)) 2640 skip_wb = false; 2641 2642 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, 2643 skip_wb, &cfi_ops); 2644 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, 2645 skip_wb, &cfi_ops); 2646 2647 if (need_barrier_p) 2648 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); 2649 2650 if (skip_wb) 2651 { 2652 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; 2653 rtx rreg1 = gen_rtx_REG (mode1, reg1); 2654 2655 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops); 2656 if (reg2 == FIRST_PSEUDO_REGISTER) 2657 { 2658 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset); 2659 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); 2660 mem = gen_rtx_MEM (mode1, mem); 2661 insn = emit_move_insn (rreg1, mem); 2662 } 2663 else 2664 { 2665 rtx rreg2 = gen_rtx_REG (mode1, reg2); 2666 2667 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops); 2668 insn = emit_insn (aarch64_gen_loadwb_pair 2669 (mode1, stack_pointer_rtx, rreg1, 2670 rreg2, offset)); 2671 } 2672 } 2673 else 2674 { 2675 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, 2676 GEN_INT (offset))); 2677 } 2678 2679 /* Reset the CFA to be SP + FRAME_SIZE. */ 2680 rtx new_cfa = stack_pointer_rtx; 2681 if (frame_size > 0) 2682 new_cfa = plus_constant (Pmode, new_cfa, frame_size); 2683 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops); 2684 REG_NOTES (insn) = cfi_ops; 2685 RTX_FRAME_RELATED_P (insn) = 1; 2686 } 2687 2688 if (frame_size > 0) 2689 { 2690 if (need_barrier_p) 2691 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); 2692 2693 if (frame_size >= 0x1000000) 2694 { 2695 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); 2696 emit_move_insn (op0, GEN_INT (frame_size)); 2697 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); 2698 } 2699 else 2700 { 2701 int hi_ofs = frame_size & 0xfff000; 2702 int lo_ofs = frame_size & 0x000fff; 2703 2704 if (hi_ofs && lo_ofs) 2705 { 2706 insn = emit_insn (gen_add2_insn 2707 (stack_pointer_rtx, GEN_INT (hi_ofs))); 2708 RTX_FRAME_RELATED_P (insn) = 1; 2709 frame_size = lo_ofs; 2710 } 2711 insn = emit_insn (gen_add2_insn 2712 (stack_pointer_rtx, GEN_INT (frame_size))); 2713 } 2714 2715 /* Reset the CFA to be SP + 0. */ 2716 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); 2717 RTX_FRAME_RELATED_P (insn) = 1; 2718 } 2719 2720 /* Stack adjustment for exception handler. */ 2721 if (crtl->calls_eh_return) 2722 { 2723 /* We need to unwind the stack by the offset computed by 2724 EH_RETURN_STACKADJ_RTX. We have already reset the CFA 2725 to be SP; letting the CFA move during this adjustment 2726 is just as correct as retaining the CFA from the body 2727 of the function. Therefore, do nothing special. */ 2728 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); 2729 } 2730 2731 emit_use (gen_rtx_REG (DImode, LR_REGNUM)); 2732 if (!for_sibcall) 2733 emit_jump_insn (ret_rtx); 2734} 2735 2736/* Return the place to copy the exception unwinding return address to. 2737 This will probably be a stack slot, but could (in theory be the 2738 return register). */ 2739rtx 2740aarch64_final_eh_return_addr (void) 2741{ 2742 HOST_WIDE_INT fp_offset; 2743 2744 aarch64_layout_frame (); 2745 2746 fp_offset = cfun->machine->frame.frame_size 2747 - cfun->machine->frame.hard_fp_offset; 2748 2749 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) 2750 return gen_rtx_REG (DImode, LR_REGNUM); 2751 2752 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can 2753 result in a store to save LR introduced by builtin_eh_return () being 2754 incorrectly deleted because the alias is not detected. 2755 So in the calculation of the address to copy the exception unwinding 2756 return address to, we note 2 cases. 2757 If FP is needed and the fp_offset is 0, it means that SP = FP and hence 2758 we return a SP-relative location since all the addresses are SP-relative 2759 in this case. This prevents the store from being optimized away. 2760 If the fp_offset is not 0, then the addresses will be FP-relative and 2761 therefore we return a FP-relative location. */ 2762 2763 if (frame_pointer_needed) 2764 { 2765 if (fp_offset) 2766 return gen_frame_mem (DImode, 2767 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD)); 2768 else 2769 return gen_frame_mem (DImode, 2770 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD)); 2771 } 2772 2773 /* If FP is not needed, we calculate the location of LR, which would be 2774 at the top of the saved registers block. */ 2775 2776 return gen_frame_mem (DImode, 2777 plus_constant (Pmode, 2778 stack_pointer_rtx, 2779 fp_offset 2780 + cfun->machine->frame.saved_regs_size 2781 - 2 * UNITS_PER_WORD)); 2782} 2783 2784/* Possibly output code to build up a constant in a register. For 2785 the benefit of the costs infrastructure, returns the number of 2786 instructions which would be emitted. GENERATE inhibits or 2787 enables code generation. */ 2788 2789static int 2790aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) 2791{ 2792 int insns = 0; 2793 2794 if (aarch64_bitmask_imm (val, DImode)) 2795 { 2796 if (generate) 2797 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); 2798 insns = 1; 2799 } 2800 else 2801 { 2802 int i; 2803 int ncount = 0; 2804 int zcount = 0; 2805 HOST_WIDE_INT valp = val >> 16; 2806 HOST_WIDE_INT valm; 2807 HOST_WIDE_INT tval; 2808 2809 for (i = 16; i < 64; i += 16) 2810 { 2811 valm = (valp & 0xffff); 2812 2813 if (valm != 0) 2814 ++ zcount; 2815 2816 if (valm != 0xffff) 2817 ++ ncount; 2818 2819 valp >>= 16; 2820 } 2821 2822 /* zcount contains the number of additional MOVK instructions 2823 required if the constant is built up with an initial MOVZ instruction, 2824 while ncount is the number of MOVK instructions required if starting 2825 with a MOVN instruction. Choose the sequence that yields the fewest 2826 number of instructions, preferring MOVZ instructions when they are both 2827 the same. */ 2828 if (ncount < zcount) 2829 { 2830 if (generate) 2831 emit_move_insn (gen_rtx_REG (Pmode, regnum), 2832 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); 2833 tval = 0xffff; 2834 insns++; 2835 } 2836 else 2837 { 2838 if (generate) 2839 emit_move_insn (gen_rtx_REG (Pmode, regnum), 2840 GEN_INT (val & 0xffff)); 2841 tval = 0; 2842 insns++; 2843 } 2844 2845 val >>= 16; 2846 2847 for (i = 16; i < 64; i += 16) 2848 { 2849 if ((val & 0xffff) != tval) 2850 { 2851 if (generate) 2852 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), 2853 GEN_INT (i), 2854 GEN_INT (val & 0xffff))); 2855 insns++; 2856 } 2857 val >>= 16; 2858 } 2859 } 2860 return insns; 2861} 2862 2863static void 2864aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta) 2865{ 2866 HOST_WIDE_INT mdelta = delta; 2867 rtx this_rtx = gen_rtx_REG (Pmode, regnum); 2868 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); 2869 2870 if (mdelta < 0) 2871 mdelta = -mdelta; 2872 2873 if (mdelta >= 4096 * 4096) 2874 { 2875 (void) aarch64_build_constant (scratchreg, delta, true); 2876 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); 2877 } 2878 else if (mdelta > 0) 2879 { 2880 if (mdelta >= 4096) 2881 { 2882 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096))); 2883 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12)); 2884 if (delta < 0) 2885 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2886 gen_rtx_MINUS (Pmode, this_rtx, shift))); 2887 else 2888 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2889 gen_rtx_PLUS (Pmode, this_rtx, shift))); 2890 } 2891 if (mdelta % 4096 != 0) 2892 { 2893 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096)); 2894 emit_insn (gen_rtx_SET (Pmode, this_rtx, 2895 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx))); 2896 } 2897 } 2898} 2899 2900/* Output code to add DELTA to the first argument, and then jump 2901 to FUNCTION. Used for C++ multiple inheritance. */ 2902static void 2903aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, 2904 HOST_WIDE_INT delta, 2905 HOST_WIDE_INT vcall_offset, 2906 tree function) 2907{ 2908 /* The this pointer is always in x0. Note that this differs from 2909 Arm where the this pointer maybe bumped to r1 if r0 is required 2910 to return a pointer to an aggregate. On AArch64 a result value 2911 pointer will be in x8. */ 2912 int this_regno = R0_REGNUM; 2913 rtx this_rtx, temp0, temp1, addr, funexp; 2914 rtx_insn *insn; 2915 2916 reload_completed = 1; 2917 emit_note (NOTE_INSN_PROLOGUE_END); 2918 2919 if (vcall_offset == 0) 2920 aarch64_add_constant (this_regno, IP1_REGNUM, delta); 2921 else 2922 { 2923 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); 2924 2925 this_rtx = gen_rtx_REG (Pmode, this_regno); 2926 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM); 2927 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM); 2928 2929 addr = this_rtx; 2930 if (delta != 0) 2931 { 2932 if (delta >= -256 && delta < 256) 2933 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, 2934 plus_constant (Pmode, this_rtx, delta)); 2935 else 2936 aarch64_add_constant (this_regno, IP1_REGNUM, delta); 2937 } 2938 2939 if (Pmode == ptr_mode) 2940 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr)); 2941 else 2942 aarch64_emit_move (temp0, 2943 gen_rtx_ZERO_EXTEND (Pmode, 2944 gen_rtx_MEM (ptr_mode, addr))); 2945 2946 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES) 2947 addr = plus_constant (Pmode, temp0, vcall_offset); 2948 else 2949 { 2950 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); 2951 addr = gen_rtx_PLUS (Pmode, temp0, temp1); 2952 } 2953 2954 if (Pmode == ptr_mode) 2955 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr)); 2956 else 2957 aarch64_emit_move (temp1, 2958 gen_rtx_SIGN_EXTEND (Pmode, 2959 gen_rtx_MEM (ptr_mode, addr))); 2960 2961 emit_insn (gen_add2_insn (this_rtx, temp1)); 2962 } 2963 2964 /* Generate a tail call to the target function. */ 2965 if (!TREE_USED (function)) 2966 { 2967 assemble_external (function); 2968 TREE_USED (function) = 1; 2969 } 2970 funexp = XEXP (DECL_RTL (function), 0); 2971 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 2972 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); 2973 SIBLING_CALL_P (insn) = 1; 2974 2975 insn = get_insns (); 2976 shorten_branches (insn); 2977 final_start_function (insn, file, 1); 2978 final (insn, file, 1); 2979 final_end_function (); 2980 2981 /* Stop pretending to be a post-reload pass. */ 2982 reload_completed = 0; 2983} 2984 2985static bool 2986aarch64_tls_referenced_p (rtx x) 2987{ 2988 if (!TARGET_HAVE_TLS) 2989 return false; 2990 subrtx_iterator::array_type array; 2991 FOR_EACH_SUBRTX (iter, array, x, ALL) 2992 { 2993 const_rtx x = *iter; 2994 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) 2995 return true; 2996 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are 2997 TLS offsets, not real symbol references. */ 2998 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 2999 iter.skip_subrtxes (); 3000 } 3001 return false; 3002} 3003 3004 3005static int 3006aarch64_bitmasks_cmp (const void *i1, const void *i2) 3007{ 3008 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1; 3009 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2; 3010 3011 if (*imm1 < *imm2) 3012 return -1; 3013 if (*imm1 > *imm2) 3014 return +1; 3015 return 0; 3016} 3017 3018 3019static void 3020aarch64_build_bitmask_table (void) 3021{ 3022 unsigned HOST_WIDE_INT mask, imm; 3023 unsigned int log_e, e, s, r; 3024 unsigned int nimms = 0; 3025 3026 for (log_e = 1; log_e <= 6; log_e++) 3027 { 3028 e = 1 << log_e; 3029 if (e == 64) 3030 mask = ~(HOST_WIDE_INT) 0; 3031 else 3032 mask = ((HOST_WIDE_INT) 1 << e) - 1; 3033 for (s = 1; s < e; s++) 3034 { 3035 for (r = 0; r < e; r++) 3036 { 3037 /* set s consecutive bits to 1 (s < 64) */ 3038 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1; 3039 /* rotate right by r */ 3040 if (r != 0) 3041 imm = ((imm >> r) | (imm << (e - r))) & mask; 3042 /* replicate the constant depending on SIMD size */ 3043 switch (log_e) { 3044 case 1: imm |= (imm << 2); 3045 case 2: imm |= (imm << 4); 3046 case 3: imm |= (imm << 8); 3047 case 4: imm |= (imm << 16); 3048 case 5: imm |= (imm << 32); 3049 case 6: 3050 break; 3051 default: 3052 gcc_unreachable (); 3053 } 3054 gcc_assert (nimms < AARCH64_NUM_BITMASKS); 3055 aarch64_bitmasks[nimms++] = imm; 3056 } 3057 } 3058 } 3059 3060 gcc_assert (nimms == AARCH64_NUM_BITMASKS); 3061 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]), 3062 aarch64_bitmasks_cmp); 3063} 3064 3065 3066/* Return true if val can be encoded as a 12-bit unsigned immediate with 3067 a left shift of 0 or 12 bits. */ 3068bool 3069aarch64_uimm12_shift (HOST_WIDE_INT val) 3070{ 3071 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val 3072 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val 3073 ); 3074} 3075 3076 3077/* Return true if val is an immediate that can be loaded into a 3078 register by a MOVZ instruction. */ 3079static bool 3080aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode) 3081{ 3082 if (GET_MODE_SIZE (mode) > 4) 3083 { 3084 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val 3085 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) 3086 return 1; 3087 } 3088 else 3089 { 3090 /* Ignore sign extension. */ 3091 val &= (HOST_WIDE_INT) 0xffffffff; 3092 } 3093 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val 3094 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); 3095} 3096 3097 3098/* Return true if val is a valid bitmask immediate. */ 3099bool 3100aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode) 3101{ 3102 if (GET_MODE_SIZE (mode) < 8) 3103 { 3104 /* Replicate bit pattern. */ 3105 val &= (HOST_WIDE_INT) 0xffffffff; 3106 val |= val << 32; 3107 } 3108 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS, 3109 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL; 3110} 3111 3112 3113/* Return true if val is an immediate that can be loaded into a 3114 register in a single instruction. */ 3115bool 3116aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) 3117{ 3118 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode)) 3119 return 1; 3120 return aarch64_bitmask_imm (val, mode); 3121} 3122 3123static bool 3124aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 3125{ 3126 rtx base, offset; 3127 3128 if (GET_CODE (x) == HIGH) 3129 return true; 3130 3131 split_const (x, &base, &offset); 3132 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) 3133 { 3134 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR) 3135 != SYMBOL_FORCE_TO_MEM) 3136 return true; 3137 else 3138 /* Avoid generating a 64-bit relocation in ILP32; leave 3139 to aarch64_expand_mov_immediate to handle it properly. */ 3140 return mode != ptr_mode; 3141 } 3142 3143 return aarch64_tls_referenced_p (x); 3144} 3145 3146/* Return true if register REGNO is a valid index register. 3147 STRICT_P is true if REG_OK_STRICT is in effect. */ 3148 3149bool 3150aarch64_regno_ok_for_index_p (int regno, bool strict_p) 3151{ 3152 if (!HARD_REGISTER_NUM_P (regno)) 3153 { 3154 if (!strict_p) 3155 return true; 3156 3157 if (!reg_renumber) 3158 return false; 3159 3160 regno = reg_renumber[regno]; 3161 } 3162 return GP_REGNUM_P (regno); 3163} 3164 3165/* Return true if register REGNO is a valid base register for mode MODE. 3166 STRICT_P is true if REG_OK_STRICT is in effect. */ 3167 3168bool 3169aarch64_regno_ok_for_base_p (int regno, bool strict_p) 3170{ 3171 if (!HARD_REGISTER_NUM_P (regno)) 3172 { 3173 if (!strict_p) 3174 return true; 3175 3176 if (!reg_renumber) 3177 return false; 3178 3179 regno = reg_renumber[regno]; 3180 } 3181 3182 /* The fake registers will be eliminated to either the stack or 3183 hard frame pointer, both of which are usually valid base registers. 3184 Reload deals with the cases where the eliminated form isn't valid. */ 3185 return (GP_REGNUM_P (regno) 3186 || regno == SP_REGNUM 3187 || regno == FRAME_POINTER_REGNUM 3188 || regno == ARG_POINTER_REGNUM); 3189} 3190 3191/* Return true if X is a valid base register for mode MODE. 3192 STRICT_P is true if REG_OK_STRICT is in effect. */ 3193 3194static bool 3195aarch64_base_register_rtx_p (rtx x, bool strict_p) 3196{ 3197 if (!strict_p && GET_CODE (x) == SUBREG) 3198 x = SUBREG_REG (x); 3199 3200 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p)); 3201} 3202 3203/* Return true if address offset is a valid index. If it is, fill in INFO 3204 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ 3205 3206static bool 3207aarch64_classify_index (struct aarch64_address_info *info, rtx x, 3208 machine_mode mode, bool strict_p) 3209{ 3210 enum aarch64_address_type type; 3211 rtx index; 3212 int shift; 3213 3214 /* (reg:P) */ 3215 if ((REG_P (x) || GET_CODE (x) == SUBREG) 3216 && GET_MODE (x) == Pmode) 3217 { 3218 type = ADDRESS_REG_REG; 3219 index = x; 3220 shift = 0; 3221 } 3222 /* (sign_extend:DI (reg:SI)) */ 3223 else if ((GET_CODE (x) == SIGN_EXTEND 3224 || GET_CODE (x) == ZERO_EXTEND) 3225 && GET_MODE (x) == DImode 3226 && GET_MODE (XEXP (x, 0)) == SImode) 3227 { 3228 type = (GET_CODE (x) == SIGN_EXTEND) 3229 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 3230 index = XEXP (x, 0); 3231 shift = 0; 3232 } 3233 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */ 3234 else if (GET_CODE (x) == MULT 3235 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 3236 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) 3237 && GET_MODE (XEXP (x, 0)) == DImode 3238 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode 3239 && CONST_INT_P (XEXP (x, 1))) 3240 { 3241 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) 3242 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 3243 index = XEXP (XEXP (x, 0), 0); 3244 shift = exact_log2 (INTVAL (XEXP (x, 1))); 3245 } 3246 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */ 3247 else if (GET_CODE (x) == ASHIFT 3248 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 3249 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) 3250 && GET_MODE (XEXP (x, 0)) == DImode 3251 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode 3252 && CONST_INT_P (XEXP (x, 1))) 3253 { 3254 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) 3255 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 3256 index = XEXP (XEXP (x, 0), 0); 3257 shift = INTVAL (XEXP (x, 1)); 3258 } 3259 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */ 3260 else if ((GET_CODE (x) == SIGN_EXTRACT 3261 || GET_CODE (x) == ZERO_EXTRACT) 3262 && GET_MODE (x) == DImode 3263 && GET_CODE (XEXP (x, 0)) == MULT 3264 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 3265 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 3266 { 3267 type = (GET_CODE (x) == SIGN_EXTRACT) 3268 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 3269 index = XEXP (XEXP (x, 0), 0); 3270 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); 3271 if (INTVAL (XEXP (x, 1)) != 32 + shift 3272 || INTVAL (XEXP (x, 2)) != 0) 3273 shift = -1; 3274 } 3275 /* (and:DI (mult:DI (reg:DI) (const_int scale)) 3276 (const_int 0xffffffff<<shift)) */ 3277 else if (GET_CODE (x) == AND 3278 && GET_MODE (x) == DImode 3279 && GET_CODE (XEXP (x, 0)) == MULT 3280 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 3281 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3282 && CONST_INT_P (XEXP (x, 1))) 3283 { 3284 type = ADDRESS_REG_UXTW; 3285 index = XEXP (XEXP (x, 0), 0); 3286 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))); 3287 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) 3288 shift = -1; 3289 } 3290 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */ 3291 else if ((GET_CODE (x) == SIGN_EXTRACT 3292 || GET_CODE (x) == ZERO_EXTRACT) 3293 && GET_MODE (x) == DImode 3294 && GET_CODE (XEXP (x, 0)) == ASHIFT 3295 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 3296 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 3297 { 3298 type = (GET_CODE (x) == SIGN_EXTRACT) 3299 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW; 3300 index = XEXP (XEXP (x, 0), 0); 3301 shift = INTVAL (XEXP (XEXP (x, 0), 1)); 3302 if (INTVAL (XEXP (x, 1)) != 32 + shift 3303 || INTVAL (XEXP (x, 2)) != 0) 3304 shift = -1; 3305 } 3306 /* (and:DI (ashift:DI (reg:DI) (const_int shift)) 3307 (const_int 0xffffffff<<shift)) */ 3308 else if (GET_CODE (x) == AND 3309 && GET_MODE (x) == DImode 3310 && GET_CODE (XEXP (x, 0)) == ASHIFT 3311 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode 3312 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 3313 && CONST_INT_P (XEXP (x, 1))) 3314 { 3315 type = ADDRESS_REG_UXTW; 3316 index = XEXP (XEXP (x, 0), 0); 3317 shift = INTVAL (XEXP (XEXP (x, 0), 1)); 3318 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift) 3319 shift = -1; 3320 } 3321 /* (mult:P (reg:P) (const_int scale)) */ 3322 else if (GET_CODE (x) == MULT 3323 && GET_MODE (x) == Pmode 3324 && GET_MODE (XEXP (x, 0)) == Pmode 3325 && CONST_INT_P (XEXP (x, 1))) 3326 { 3327 type = ADDRESS_REG_REG; 3328 index = XEXP (x, 0); 3329 shift = exact_log2 (INTVAL (XEXP (x, 1))); 3330 } 3331 /* (ashift:P (reg:P) (const_int shift)) */ 3332 else if (GET_CODE (x) == ASHIFT 3333 && GET_MODE (x) == Pmode 3334 && GET_MODE (XEXP (x, 0)) == Pmode 3335 && CONST_INT_P (XEXP (x, 1))) 3336 { 3337 type = ADDRESS_REG_REG; 3338 index = XEXP (x, 0); 3339 shift = INTVAL (XEXP (x, 1)); 3340 } 3341 else 3342 return false; 3343 3344 if (GET_CODE (index) == SUBREG) 3345 index = SUBREG_REG (index); 3346 3347 if ((shift == 0 || 3348 (shift > 0 && shift <= 3 3349 && (1 << shift) == GET_MODE_SIZE (mode))) 3350 && REG_P (index) 3351 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) 3352 { 3353 info->type = type; 3354 info->offset = index; 3355 info->shift = shift; 3356 return true; 3357 } 3358 3359 return false; 3360} 3361 3362bool 3363aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset) 3364{ 3365 return (offset >= -64 * GET_MODE_SIZE (mode) 3366 && offset < 64 * GET_MODE_SIZE (mode) 3367 && offset % GET_MODE_SIZE (mode) == 0); 3368} 3369 3370static inline bool 3371offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED, 3372 HOST_WIDE_INT offset) 3373{ 3374 return offset >= -256 && offset < 256; 3375} 3376 3377static inline bool 3378offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset) 3379{ 3380 return (offset >= 0 3381 && offset < 4096 * GET_MODE_SIZE (mode) 3382 && offset % GET_MODE_SIZE (mode) == 0); 3383} 3384 3385/* Return true if X is a valid address for machine mode MODE. If it is, 3386 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in 3387 effect. OUTER_CODE is PARALLEL for a load/store pair. */ 3388 3389static bool 3390aarch64_classify_address (struct aarch64_address_info *info, 3391 rtx x, machine_mode mode, 3392 RTX_CODE outer_code, bool strict_p) 3393{ 3394 enum rtx_code code = GET_CODE (x); 3395 rtx op0, op1; 3396 3397 /* On BE, we use load/store pair for all large int mode load/stores. */ 3398 bool load_store_pair_p = (outer_code == PARALLEL 3399 || (BYTES_BIG_ENDIAN 3400 && aarch64_vect_struct_mode_p (mode))); 3401 3402 bool allow_reg_index_p = 3403 !load_store_pair_p 3404 && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode)) 3405 && !aarch64_vect_struct_mode_p (mode); 3406 3407 /* On LE, for AdvSIMD, don't support anything other than POST_INC or 3408 REG addressing. */ 3409 if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN 3410 && (code != POST_INC && code != REG)) 3411 return false; 3412 3413 switch (code) 3414 { 3415 case REG: 3416 case SUBREG: 3417 info->type = ADDRESS_REG_IMM; 3418 info->base = x; 3419 info->offset = const0_rtx; 3420 return aarch64_base_register_rtx_p (x, strict_p); 3421 3422 case PLUS: 3423 op0 = XEXP (x, 0); 3424 op1 = XEXP (x, 1); 3425 3426 if (! strict_p 3427 && REG_P (op0) 3428 && (op0 == virtual_stack_vars_rtx 3429 || op0 == frame_pointer_rtx 3430 || op0 == arg_pointer_rtx) 3431 && CONST_INT_P (op1)) 3432 { 3433 info->type = ADDRESS_REG_IMM; 3434 info->base = op0; 3435 info->offset = op1; 3436 3437 return true; 3438 } 3439 3440 if (GET_MODE_SIZE (mode) != 0 3441 && CONST_INT_P (op1) 3442 && aarch64_base_register_rtx_p (op0, strict_p)) 3443 { 3444 HOST_WIDE_INT offset = INTVAL (op1); 3445 3446 info->type = ADDRESS_REG_IMM; 3447 info->base = op0; 3448 info->offset = op1; 3449 3450 /* TImode and TFmode values are allowed in both pairs of X 3451 registers and individual Q registers. The available 3452 address modes are: 3453 X,X: 7-bit signed scaled offset 3454 Q: 9-bit signed offset 3455 We conservatively require an offset representable in either mode. 3456 */ 3457 if (mode == TImode || mode == TFmode) 3458 return (aarch64_offset_7bit_signed_scaled_p (mode, offset) 3459 && offset_9bit_signed_unscaled_p (mode, offset)); 3460 3461 /* A 7bit offset check because OImode will emit a ldp/stp 3462 instruction (only big endian will get here). 3463 For ldp/stp instructions, the offset is scaled for the size of a 3464 single element of the pair. */ 3465 if (mode == OImode) 3466 return aarch64_offset_7bit_signed_scaled_p (TImode, offset); 3467 3468 /* Three 9/12 bit offsets checks because CImode will emit three 3469 ldr/str instructions (only big endian will get here). */ 3470 if (mode == CImode) 3471 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset) 3472 && (offset_9bit_signed_unscaled_p (V16QImode, offset + 32) 3473 || offset_12bit_unsigned_scaled_p (V16QImode, 3474 offset + 32))); 3475 3476 /* Two 7bit offsets checks because XImode will emit two ldp/stp 3477 instructions (only big endian will get here). */ 3478 if (mode == XImode) 3479 return (aarch64_offset_7bit_signed_scaled_p (TImode, offset) 3480 && aarch64_offset_7bit_signed_scaled_p (TImode, 3481 offset + 32)); 3482 3483 if (load_store_pair_p) 3484 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) 3485 && aarch64_offset_7bit_signed_scaled_p (mode, offset)); 3486 else 3487 return (offset_9bit_signed_unscaled_p (mode, offset) 3488 || offset_12bit_unsigned_scaled_p (mode, offset)); 3489 } 3490 3491 if (allow_reg_index_p) 3492 { 3493 /* Look for base + (scaled/extended) index register. */ 3494 if (aarch64_base_register_rtx_p (op0, strict_p) 3495 && aarch64_classify_index (info, op1, mode, strict_p)) 3496 { 3497 info->base = op0; 3498 return true; 3499 } 3500 if (aarch64_base_register_rtx_p (op1, strict_p) 3501 && aarch64_classify_index (info, op0, mode, strict_p)) 3502 { 3503 info->base = op1; 3504 return true; 3505 } 3506 } 3507 3508 return false; 3509 3510 case POST_INC: 3511 case POST_DEC: 3512 case PRE_INC: 3513 case PRE_DEC: 3514 info->type = ADDRESS_REG_WB; 3515 info->base = XEXP (x, 0); 3516 info->offset = NULL_RTX; 3517 return aarch64_base_register_rtx_p (info->base, strict_p); 3518 3519 case POST_MODIFY: 3520 case PRE_MODIFY: 3521 info->type = ADDRESS_REG_WB; 3522 info->base = XEXP (x, 0); 3523 if (GET_CODE (XEXP (x, 1)) == PLUS 3524 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 3525 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base) 3526 && aarch64_base_register_rtx_p (info->base, strict_p)) 3527 { 3528 HOST_WIDE_INT offset; 3529 info->offset = XEXP (XEXP (x, 1), 1); 3530 offset = INTVAL (info->offset); 3531 3532 /* TImode and TFmode values are allowed in both pairs of X 3533 registers and individual Q registers. The available 3534 address modes are: 3535 X,X: 7-bit signed scaled offset 3536 Q: 9-bit signed offset 3537 We conservatively require an offset representable in either mode. 3538 */ 3539 if (mode == TImode || mode == TFmode) 3540 return (aarch64_offset_7bit_signed_scaled_p (mode, offset) 3541 && offset_9bit_signed_unscaled_p (mode, offset)); 3542 3543 if (load_store_pair_p) 3544 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) 3545 && aarch64_offset_7bit_signed_scaled_p (mode, offset)); 3546 else 3547 return offset_9bit_signed_unscaled_p (mode, offset); 3548 } 3549 return false; 3550 3551 case CONST: 3552 case SYMBOL_REF: 3553 case LABEL_REF: 3554 /* load literal: pc-relative constant pool entry. Only supported 3555 for SI mode or larger. */ 3556 info->type = ADDRESS_SYMBOLIC; 3557 3558 if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4) 3559 { 3560 rtx sym, addend; 3561 3562 split_const (x, &sym, &addend); 3563 return (GET_CODE (sym) == LABEL_REF 3564 || (GET_CODE (sym) == SYMBOL_REF 3565 && CONSTANT_POOL_ADDRESS_P (sym))); 3566 } 3567 return false; 3568 3569 case LO_SUM: 3570 info->type = ADDRESS_LO_SUM; 3571 info->base = XEXP (x, 0); 3572 info->offset = XEXP (x, 1); 3573 if (allow_reg_index_p 3574 && aarch64_base_register_rtx_p (info->base, strict_p)) 3575 { 3576 rtx sym, offs; 3577 split_const (info->offset, &sym, &offs); 3578 if (GET_CODE (sym) == SYMBOL_REF 3579 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM) 3580 == SYMBOL_SMALL_ABSOLUTE)) 3581 { 3582 /* The symbol and offset must be aligned to the access size. */ 3583 unsigned int align; 3584 unsigned int ref_size; 3585 3586 if (CONSTANT_POOL_ADDRESS_P (sym)) 3587 align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); 3588 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym)) 3589 { 3590 tree exp = SYMBOL_REF_DECL (sym); 3591 align = TYPE_ALIGN (TREE_TYPE (exp)); 3592 align = CONSTANT_ALIGNMENT (exp, align); 3593 } 3594 else if (SYMBOL_REF_DECL (sym)) 3595 align = DECL_ALIGN (SYMBOL_REF_DECL (sym)); 3596 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym) 3597 && SYMBOL_REF_BLOCK (sym) != NULL) 3598 align = SYMBOL_REF_BLOCK (sym)->alignment; 3599 else 3600 align = BITS_PER_UNIT; 3601 3602 ref_size = GET_MODE_SIZE (mode); 3603 if (ref_size == 0) 3604 ref_size = GET_MODE_SIZE (DImode); 3605 3606 return ((INTVAL (offs) & (ref_size - 1)) == 0 3607 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); 3608 } 3609 } 3610 return false; 3611 3612 default: 3613 return false; 3614 } 3615} 3616 3617bool 3618aarch64_symbolic_address_p (rtx x) 3619{ 3620 rtx offset; 3621 3622 split_const (x, &x, &offset); 3623 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF; 3624} 3625 3626/* Classify the base of symbolic expression X, given that X appears in 3627 context CONTEXT. */ 3628 3629enum aarch64_symbol_type 3630aarch64_classify_symbolic_expression (rtx x, 3631 enum aarch64_symbol_context context) 3632{ 3633 rtx offset; 3634 3635 split_const (x, &x, &offset); 3636 return aarch64_classify_symbol (x, offset, context); 3637} 3638 3639 3640/* Return TRUE if X is a legitimate address for accessing memory in 3641 mode MODE. */ 3642static bool 3643aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p) 3644{ 3645 struct aarch64_address_info addr; 3646 3647 return aarch64_classify_address (&addr, x, mode, MEM, strict_p); 3648} 3649 3650/* Return TRUE if X is a legitimate address for accessing memory in 3651 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store 3652 pair operation. */ 3653bool 3654aarch64_legitimate_address_p (machine_mode mode, rtx x, 3655 RTX_CODE outer_code, bool strict_p) 3656{ 3657 struct aarch64_address_info addr; 3658 3659 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p); 3660} 3661 3662/* Return TRUE if rtx X is immediate constant 0.0 */ 3663bool 3664aarch64_float_const_zero_rtx_p (rtx x) 3665{ 3666 REAL_VALUE_TYPE r; 3667 3668 if (GET_MODE (x) == VOIDmode) 3669 return false; 3670 3671 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 3672 if (REAL_VALUE_MINUS_ZERO (r)) 3673 return !HONOR_SIGNED_ZEROS (GET_MODE (x)); 3674 return REAL_VALUES_EQUAL (r, dconst0); 3675} 3676 3677/* Return the fixed registers used for condition codes. */ 3678 3679static bool 3680aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 3681{ 3682 *p1 = CC_REGNUM; 3683 *p2 = INVALID_REGNUM; 3684 return true; 3685} 3686 3687/* Emit call insn with PAT and do aarch64-specific handling. */ 3688 3689void 3690aarch64_emit_call_insn (rtx pat) 3691{ 3692 rtx insn = emit_call_insn (pat); 3693 3694 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); 3695 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM)); 3696 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM)); 3697} 3698 3699machine_mode 3700aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) 3701{ 3702 /* All floating point compares return CCFP if it is an equality 3703 comparison, and CCFPE otherwise. */ 3704 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3705 { 3706 switch (code) 3707 { 3708 case EQ: 3709 case NE: 3710 case UNORDERED: 3711 case ORDERED: 3712 case UNLT: 3713 case UNLE: 3714 case UNGT: 3715 case UNGE: 3716 case UNEQ: 3717 case LTGT: 3718 return CCFPmode; 3719 3720 case LT: 3721 case LE: 3722 case GT: 3723 case GE: 3724 return CCFPEmode; 3725 3726 default: 3727 gcc_unreachable (); 3728 } 3729 } 3730 3731 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) 3732 && y == const0_rtx 3733 && (code == EQ || code == NE || code == LT || code == GE) 3734 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND 3735 || GET_CODE (x) == NEG)) 3736 return CC_NZmode; 3737 3738 /* A compare with a shifted operand. Because of canonicalization, 3739 the comparison will have to be swapped when we emit the assembly 3740 code. */ 3741 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) 3742 && (REG_P (y) || GET_CODE (y) == SUBREG) 3743 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT 3744 || GET_CODE (x) == LSHIFTRT 3745 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) 3746 return CC_SWPmode; 3747 3748 /* Similarly for a negated operand, but we can only do this for 3749 equalities. */ 3750 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) 3751 && (REG_P (y) || GET_CODE (y) == SUBREG) 3752 && (code == EQ || code == NE) 3753 && GET_CODE (x) == NEG) 3754 return CC_Zmode; 3755 3756 /* A compare of a mode narrower than SI mode against zero can be done 3757 by extending the value in the comparison. */ 3758 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode) 3759 && y == const0_rtx) 3760 /* Only use sign-extension if we really need it. */ 3761 return ((code == GT || code == GE || code == LE || code == LT) 3762 ? CC_SESWPmode : CC_ZESWPmode); 3763 3764 /* A test for unsigned overflow. */ 3765 if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) 3766 && code == NE 3767 && GET_CODE (x) == PLUS 3768 && GET_CODE (y) == ZERO_EXTEND) 3769 return CC_Cmode; 3770 3771 /* For everything else, return CCmode. */ 3772 return CCmode; 3773} 3774 3775static int 3776aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code); 3777 3778int 3779aarch64_get_condition_code (rtx x) 3780{ 3781 machine_mode mode = GET_MODE (XEXP (x, 0)); 3782 enum rtx_code comp_code = GET_CODE (x); 3783 3784 if (GET_MODE_CLASS (mode) != MODE_CC) 3785 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1)); 3786 return aarch64_get_condition_code_1 (mode, comp_code); 3787} 3788 3789static int 3790aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) 3791{ 3792 int ne = -1, eq = -1; 3793 switch (mode) 3794 { 3795 case CCFPmode: 3796 case CCFPEmode: 3797 switch (comp_code) 3798 { 3799 case GE: return AARCH64_GE; 3800 case GT: return AARCH64_GT; 3801 case LE: return AARCH64_LS; 3802 case LT: return AARCH64_MI; 3803 case NE: return AARCH64_NE; 3804 case EQ: return AARCH64_EQ; 3805 case ORDERED: return AARCH64_VC; 3806 case UNORDERED: return AARCH64_VS; 3807 case UNLT: return AARCH64_LT; 3808 case UNLE: return AARCH64_LE; 3809 case UNGT: return AARCH64_HI; 3810 case UNGE: return AARCH64_PL; 3811 default: return -1; 3812 } 3813 break; 3814 3815 case CC_DNEmode: 3816 ne = AARCH64_NE; 3817 eq = AARCH64_EQ; 3818 break; 3819 3820 case CC_DEQmode: 3821 ne = AARCH64_EQ; 3822 eq = AARCH64_NE; 3823 break; 3824 3825 case CC_DGEmode: 3826 ne = AARCH64_GE; 3827 eq = AARCH64_LT; 3828 break; 3829 3830 case CC_DLTmode: 3831 ne = AARCH64_LT; 3832 eq = AARCH64_GE; 3833 break; 3834 3835 case CC_DGTmode: 3836 ne = AARCH64_GT; 3837 eq = AARCH64_LE; 3838 break; 3839 3840 case CC_DLEmode: 3841 ne = AARCH64_LE; 3842 eq = AARCH64_GT; 3843 break; 3844 3845 case CC_DGEUmode: 3846 ne = AARCH64_CS; 3847 eq = AARCH64_CC; 3848 break; 3849 3850 case CC_DLTUmode: 3851 ne = AARCH64_CC; 3852 eq = AARCH64_CS; 3853 break; 3854 3855 case CC_DGTUmode: 3856 ne = AARCH64_HI; 3857 eq = AARCH64_LS; 3858 break; 3859 3860 case CC_DLEUmode: 3861 ne = AARCH64_LS; 3862 eq = AARCH64_HI; 3863 break; 3864 3865 case CCmode: 3866 switch (comp_code) 3867 { 3868 case NE: return AARCH64_NE; 3869 case EQ: return AARCH64_EQ; 3870 case GE: return AARCH64_GE; 3871 case GT: return AARCH64_GT; 3872 case LE: return AARCH64_LE; 3873 case LT: return AARCH64_LT; 3874 case GEU: return AARCH64_CS; 3875 case GTU: return AARCH64_HI; 3876 case LEU: return AARCH64_LS; 3877 case LTU: return AARCH64_CC; 3878 default: return -1; 3879 } 3880 break; 3881 3882 case CC_SWPmode: 3883 case CC_ZESWPmode: 3884 case CC_SESWPmode: 3885 switch (comp_code) 3886 { 3887 case NE: return AARCH64_NE; 3888 case EQ: return AARCH64_EQ; 3889 case GE: return AARCH64_LE; 3890 case GT: return AARCH64_LT; 3891 case LE: return AARCH64_GE; 3892 case LT: return AARCH64_GT; 3893 case GEU: return AARCH64_LS; 3894 case GTU: return AARCH64_CC; 3895 case LEU: return AARCH64_CS; 3896 case LTU: return AARCH64_HI; 3897 default: return -1; 3898 } 3899 break; 3900 3901 case CC_NZmode: 3902 switch (comp_code) 3903 { 3904 case NE: return AARCH64_NE; 3905 case EQ: return AARCH64_EQ; 3906 case GE: return AARCH64_PL; 3907 case LT: return AARCH64_MI; 3908 default: return -1; 3909 } 3910 break; 3911 3912 case CC_Zmode: 3913 switch (comp_code) 3914 { 3915 case NE: return AARCH64_NE; 3916 case EQ: return AARCH64_EQ; 3917 default: return -1; 3918 } 3919 break; 3920 3921 case CC_Cmode: 3922 switch (comp_code) 3923 { 3924 case NE: return AARCH64_CS; 3925 case EQ: return AARCH64_CC; 3926 default: return -1; 3927 } 3928 break; 3929 3930 default: 3931 return -1; 3932 break; 3933 } 3934 3935 if (comp_code == NE) 3936 return ne; 3937 3938 if (comp_code == EQ) 3939 return eq; 3940 3941 return -1; 3942} 3943 3944bool 3945aarch64_const_vec_all_same_in_range_p (rtx x, 3946 HOST_WIDE_INT minval, 3947 HOST_WIDE_INT maxval) 3948{ 3949 HOST_WIDE_INT firstval; 3950 int count, i; 3951 3952 if (GET_CODE (x) != CONST_VECTOR 3953 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) 3954 return false; 3955 3956 firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); 3957 if (firstval < minval || firstval > maxval) 3958 return false; 3959 3960 count = CONST_VECTOR_NUNITS (x); 3961 for (i = 1; i < count; i++) 3962 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) 3963 return false; 3964 3965 return true; 3966} 3967 3968bool 3969aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val) 3970{ 3971 return aarch64_const_vec_all_same_in_range_p (x, val, val); 3972} 3973 3974static unsigned 3975bit_count (unsigned HOST_WIDE_INT value) 3976{ 3977 unsigned count = 0; 3978 3979 while (value) 3980 { 3981 count++; 3982 value &= value - 1; 3983 } 3984 3985 return count; 3986} 3987 3988/* N Z C V. */ 3989#define AARCH64_CC_V 1 3990#define AARCH64_CC_C (1 << 1) 3991#define AARCH64_CC_Z (1 << 2) 3992#define AARCH64_CC_N (1 << 3) 3993 3994/* N Z C V flags for ccmp. The first code is for AND op and the other 3995 is for IOR op. Indexed by AARCH64_COND_CODE. */ 3996static const int aarch64_nzcv_codes[][2] = 3997{ 3998 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */ 3999 {0, AARCH64_CC_Z}, /* NE, Z == 0. */ 4000 {AARCH64_CC_C, 0}, /* CS, C == 1. */ 4001 {0, AARCH64_CC_C}, /* CC, C == 0. */ 4002 {AARCH64_CC_N, 0}, /* MI, N == 1. */ 4003 {0, AARCH64_CC_N}, /* PL, N == 0. */ 4004 {AARCH64_CC_V, 0}, /* VS, V == 1. */ 4005 {0, AARCH64_CC_V}, /* VC, V == 0. */ 4006 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */ 4007 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */ 4008 {0, AARCH64_CC_V}, /* GE, N == V. */ 4009 {AARCH64_CC_V, 0}, /* LT, N != V. */ 4010 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */ 4011 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */ 4012 {0, 0}, /* AL, Any. */ 4013 {0, 0}, /* NV, Any. */ 4014}; 4015 4016int 4017aarch64_ccmp_mode_to_code (enum machine_mode mode) 4018{ 4019 switch (mode) 4020 { 4021 case CC_DNEmode: 4022 return NE; 4023 4024 case CC_DEQmode: 4025 return EQ; 4026 4027 case CC_DLEmode: 4028 return LE; 4029 4030 case CC_DGTmode: 4031 return GT; 4032 4033 case CC_DLTmode: 4034 return LT; 4035 4036 case CC_DGEmode: 4037 return GE; 4038 4039 case CC_DLEUmode: 4040 return LEU; 4041 4042 case CC_DGTUmode: 4043 return GTU; 4044 4045 case CC_DLTUmode: 4046 return LTU; 4047 4048 case CC_DGEUmode: 4049 return GEU; 4050 4051 default: 4052 gcc_unreachable (); 4053 } 4054} 4055 4056 4057void 4058aarch64_print_operand (FILE *f, rtx x, char code) 4059{ 4060 switch (code) 4061 { 4062 /* An integer or symbol address without a preceding # sign. */ 4063 case 'c': 4064 switch (GET_CODE (x)) 4065 { 4066 case CONST_INT: 4067 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 4068 break; 4069 4070 case SYMBOL_REF: 4071 output_addr_const (f, x); 4072 break; 4073 4074 case CONST: 4075 if (GET_CODE (XEXP (x, 0)) == PLUS 4076 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) 4077 { 4078 output_addr_const (f, x); 4079 break; 4080 } 4081 /* Fall through. */ 4082 4083 default: 4084 output_operand_lossage ("Unsupported operand for code '%c'", code); 4085 } 4086 break; 4087 4088 case 'e': 4089 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */ 4090 { 4091 int n; 4092 4093 if (!CONST_INT_P (x) 4094 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) 4095 { 4096 output_operand_lossage ("invalid operand for '%%%c'", code); 4097 return; 4098 } 4099 4100 switch (n) 4101 { 4102 case 3: 4103 fputc ('b', f); 4104 break; 4105 case 4: 4106 fputc ('h', f); 4107 break; 4108 case 5: 4109 fputc ('w', f); 4110 break; 4111 default: 4112 output_operand_lossage ("invalid operand for '%%%c'", code); 4113 return; 4114 } 4115 } 4116 break; 4117 4118 case 'p': 4119 { 4120 int n; 4121 4122 /* Print N such that 2^N == X. */ 4123 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0) 4124 { 4125 output_operand_lossage ("invalid operand for '%%%c'", code); 4126 return; 4127 } 4128 4129 asm_fprintf (f, "%d", n); 4130 } 4131 break; 4132 4133 case 'P': 4134 /* Print the number of non-zero bits in X (a const_int). */ 4135 if (!CONST_INT_P (x)) 4136 { 4137 output_operand_lossage ("invalid operand for '%%%c'", code); 4138 return; 4139 } 4140 4141 asm_fprintf (f, "%u", bit_count (INTVAL (x))); 4142 break; 4143 4144 case 'H': 4145 /* Print the higher numbered register of a pair (TImode) of regs. */ 4146 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1)) 4147 { 4148 output_operand_lossage ("invalid operand for '%%%c'", code); 4149 return; 4150 } 4151 4152 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); 4153 break; 4154 4155 case 'm': 4156 { 4157 int cond_code; 4158 /* Print a condition (eq, ne, etc). */ 4159 4160 /* CONST_TRUE_RTX means always -- that's the default. */ 4161 if (x == const_true_rtx) 4162 return; 4163 4164 if (!COMPARISON_P (x)) 4165 { 4166 output_operand_lossage ("invalid operand for '%%%c'", code); 4167 return; 4168 } 4169 4170 cond_code = aarch64_get_condition_code (x); 4171 gcc_assert (cond_code >= 0); 4172 fputs (aarch64_condition_codes[cond_code], f); 4173 } 4174 break; 4175 4176 case 'M': 4177 { 4178 int cond_code; 4179 /* Print the inverse of a condition (eq <-> ne, etc). */ 4180 4181 /* CONST_TRUE_RTX means never -- that's the default. */ 4182 if (x == const_true_rtx) 4183 { 4184 fputs ("nv", f); 4185 return; 4186 } 4187 4188 if (!COMPARISON_P (x)) 4189 { 4190 output_operand_lossage ("invalid operand for '%%%c'", code); 4191 return; 4192 } 4193 cond_code = aarch64_get_condition_code (x); 4194 gcc_assert (cond_code >= 0); 4195 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE 4196 (cond_code)], f); 4197 } 4198 break; 4199 4200 case 'b': 4201 case 'h': 4202 case 's': 4203 case 'd': 4204 case 'q': 4205 /* Print a scalar FP/SIMD register name. */ 4206 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) 4207 { 4208 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); 4209 return; 4210 } 4211 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM); 4212 break; 4213 4214 case 'S': 4215 case 'T': 4216 case 'U': 4217 case 'V': 4218 /* Print the first FP/SIMD register name in a list. */ 4219 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) 4220 { 4221 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); 4222 return; 4223 } 4224 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); 4225 break; 4226 4227 case 'R': 4228 /* Print a scalar FP/SIMD register name + 1. */ 4229 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) 4230 { 4231 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); 4232 return; 4233 } 4234 asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); 4235 break; 4236 4237 case 'X': 4238 /* Print bottom 16 bits of integer constant in hex. */ 4239 if (!CONST_INT_P (x)) 4240 { 4241 output_operand_lossage ("invalid operand for '%%%c'", code); 4242 return; 4243 } 4244 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff); 4245 break; 4246 4247 case 'w': 4248 case 'x': 4249 /* Print a general register name or the zero register (32-bit or 4250 64-bit). */ 4251 if (x == const0_rtx 4252 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x))) 4253 { 4254 asm_fprintf (f, "%czr", code); 4255 break; 4256 } 4257 4258 if (REG_P (x) && GP_REGNUM_P (REGNO (x))) 4259 { 4260 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM); 4261 break; 4262 } 4263 4264 if (REG_P (x) && REGNO (x) == SP_REGNUM) 4265 { 4266 asm_fprintf (f, "%ssp", code == 'w' ? "w" : ""); 4267 break; 4268 } 4269 4270 /* Fall through */ 4271 4272 case 0: 4273 /* Print a normal operand, if it's a general register, then we 4274 assume DImode. */ 4275 if (x == NULL) 4276 { 4277 output_operand_lossage ("missing operand"); 4278 return; 4279 } 4280 4281 switch (GET_CODE (x)) 4282 { 4283 case REG: 4284 asm_fprintf (f, "%s", reg_names [REGNO (x)]); 4285 break; 4286 4287 case MEM: 4288 aarch64_memory_reference_mode = GET_MODE (x); 4289 output_address (XEXP (x, 0)); 4290 break; 4291 4292 case LABEL_REF: 4293 case SYMBOL_REF: 4294 output_addr_const (asm_out_file, x); 4295 break; 4296 4297 case CONST_INT: 4298 asm_fprintf (f, "%wd", INTVAL (x)); 4299 break; 4300 4301 case CONST_VECTOR: 4302 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT) 4303 { 4304 gcc_assert ( 4305 aarch64_const_vec_all_same_in_range_p (x, 4306 HOST_WIDE_INT_MIN, 4307 HOST_WIDE_INT_MAX)); 4308 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0))); 4309 } 4310 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x))) 4311 { 4312 fputc ('0', f); 4313 } 4314 else 4315 gcc_unreachable (); 4316 break; 4317 4318 case CONST_DOUBLE: 4319 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever 4320 be getting CONST_DOUBLEs holding integers. */ 4321 gcc_assert (GET_MODE (x) != VOIDmode); 4322 if (aarch64_float_const_zero_rtx_p (x)) 4323 { 4324 fputc ('0', f); 4325 break; 4326 } 4327 else if (aarch64_float_const_representable_p (x)) 4328 { 4329#define buf_size 20 4330 char float_buf[buf_size] = {'\0'}; 4331 REAL_VALUE_TYPE r; 4332 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 4333 real_to_decimal_for_mode (float_buf, &r, 4334 buf_size, buf_size, 4335 1, GET_MODE (x)); 4336 asm_fprintf (asm_out_file, "%s", float_buf); 4337 break; 4338#undef buf_size 4339 } 4340 output_operand_lossage ("invalid constant"); 4341 return; 4342 default: 4343 output_operand_lossage ("invalid operand"); 4344 return; 4345 } 4346 break; 4347 4348 case 'A': 4349 if (GET_CODE (x) == HIGH) 4350 x = XEXP (x, 0); 4351 4352 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 4353 { 4354 case SYMBOL_SMALL_GOT: 4355 asm_fprintf (asm_out_file, ":got:"); 4356 break; 4357 4358 case SYMBOL_SMALL_TLSGD: 4359 asm_fprintf (asm_out_file, ":tlsgd:"); 4360 break; 4361 4362 case SYMBOL_SMALL_TLSDESC: 4363 asm_fprintf (asm_out_file, ":tlsdesc:"); 4364 break; 4365 4366 case SYMBOL_SMALL_GOTTPREL: 4367 asm_fprintf (asm_out_file, ":gottprel:"); 4368 break; 4369 4370 case SYMBOL_SMALL_TPREL: 4371 asm_fprintf (asm_out_file, ":tprel:"); 4372 break; 4373 4374 case SYMBOL_TINY_GOT: 4375 gcc_unreachable (); 4376 break; 4377 4378 default: 4379 break; 4380 } 4381 output_addr_const (asm_out_file, x); 4382 break; 4383 4384 case 'L': 4385 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 4386 { 4387 case SYMBOL_SMALL_GOT: 4388 asm_fprintf (asm_out_file, ":lo12:"); 4389 break; 4390 4391 case SYMBOL_SMALL_TLSGD: 4392 asm_fprintf (asm_out_file, ":tlsgd_lo12:"); 4393 break; 4394 4395 case SYMBOL_SMALL_TLSDESC: 4396 asm_fprintf (asm_out_file, ":tlsdesc_lo12:"); 4397 break; 4398 4399 case SYMBOL_SMALL_GOTTPREL: 4400 asm_fprintf (asm_out_file, ":gottprel_lo12:"); 4401 break; 4402 4403 case SYMBOL_SMALL_TPREL: 4404 asm_fprintf (asm_out_file, ":tprel_lo12_nc:"); 4405 break; 4406 4407 case SYMBOL_TINY_GOT: 4408 asm_fprintf (asm_out_file, ":got:"); 4409 break; 4410 4411 default: 4412 break; 4413 } 4414 output_addr_const (asm_out_file, x); 4415 break; 4416 4417 case 'G': 4418 4419 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR)) 4420 { 4421 case SYMBOL_SMALL_TPREL: 4422 asm_fprintf (asm_out_file, ":tprel_hi12:"); 4423 break; 4424 default: 4425 break; 4426 } 4427 output_addr_const (asm_out_file, x); 4428 break; 4429 4430 case 'K': 4431 { 4432 int cond_code; 4433 /* Print nzcv. */ 4434 4435 if (!COMPARISON_P (x)) 4436 { 4437 output_operand_lossage ("invalid operand for '%%%c'", code); 4438 return; 4439 } 4440 4441 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x)); 4442 gcc_assert (cond_code >= 0); 4443 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]); 4444 } 4445 break; 4446 4447 case 'k': 4448 { 4449 int cond_code; 4450 /* Print nzcv. */ 4451 4452 if (!COMPARISON_P (x)) 4453 { 4454 output_operand_lossage ("invalid operand for '%%%c'", code); 4455 return; 4456 } 4457 4458 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x)); 4459 gcc_assert (cond_code >= 0); 4460 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]); 4461 } 4462 break; 4463 4464 default: 4465 output_operand_lossage ("invalid operand prefix '%%%c'", code); 4466 return; 4467 } 4468} 4469 4470void 4471aarch64_print_operand_address (FILE *f, rtx x) 4472{ 4473 struct aarch64_address_info addr; 4474 4475 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode, 4476 MEM, true)) 4477 switch (addr.type) 4478 { 4479 case ADDRESS_REG_IMM: 4480 if (addr.offset == const0_rtx) 4481 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); 4482 else 4483 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], 4484 INTVAL (addr.offset)); 4485 return; 4486 4487 case ADDRESS_REG_REG: 4488 if (addr.shift == 0) 4489 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)], 4490 reg_names [REGNO (addr.offset)]); 4491 else 4492 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)], 4493 reg_names [REGNO (addr.offset)], addr.shift); 4494 return; 4495 4496 case ADDRESS_REG_UXTW: 4497 if (addr.shift == 0) 4498 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)], 4499 REGNO (addr.offset) - R0_REGNUM); 4500 else 4501 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)], 4502 REGNO (addr.offset) - R0_REGNUM, addr.shift); 4503 return; 4504 4505 case ADDRESS_REG_SXTW: 4506 if (addr.shift == 0) 4507 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)], 4508 REGNO (addr.offset) - R0_REGNUM); 4509 else 4510 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)], 4511 REGNO (addr.offset) - R0_REGNUM, addr.shift); 4512 return; 4513 4514 case ADDRESS_REG_WB: 4515 switch (GET_CODE (x)) 4516 { 4517 case PRE_INC: 4518 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], 4519 GET_MODE_SIZE (aarch64_memory_reference_mode)); 4520 return; 4521 case POST_INC: 4522 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], 4523 GET_MODE_SIZE (aarch64_memory_reference_mode)); 4524 return; 4525 case PRE_DEC: 4526 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], 4527 GET_MODE_SIZE (aarch64_memory_reference_mode)); 4528 return; 4529 case POST_DEC: 4530 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], 4531 GET_MODE_SIZE (aarch64_memory_reference_mode)); 4532 return; 4533 case PRE_MODIFY: 4534 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], 4535 INTVAL (addr.offset)); 4536 return; 4537 case POST_MODIFY: 4538 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], 4539 INTVAL (addr.offset)); 4540 return; 4541 default: 4542 break; 4543 } 4544 break; 4545 4546 case ADDRESS_LO_SUM: 4547 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]); 4548 output_addr_const (f, addr.offset); 4549 asm_fprintf (f, "]"); 4550 return; 4551 4552 case ADDRESS_SYMBOLIC: 4553 break; 4554 } 4555 4556 output_addr_const (f, x); 4557} 4558 4559bool 4560aarch64_label_mentioned_p (rtx x) 4561{ 4562 const char *fmt; 4563 int i; 4564 4565 if (GET_CODE (x) == LABEL_REF) 4566 return true; 4567 4568 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the 4569 referencing instruction, but they are constant offsets, not 4570 symbols. */ 4571 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 4572 return false; 4573 4574 fmt = GET_RTX_FORMAT (GET_CODE (x)); 4575 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 4576 { 4577 if (fmt[i] == 'E') 4578 { 4579 int j; 4580 4581 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 4582 if (aarch64_label_mentioned_p (XVECEXP (x, i, j))) 4583 return 1; 4584 } 4585 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i))) 4586 return 1; 4587 } 4588 4589 return 0; 4590} 4591 4592/* Implement REGNO_REG_CLASS. */ 4593 4594enum reg_class 4595aarch64_regno_regclass (unsigned regno) 4596{ 4597 if (GP_REGNUM_P (regno)) 4598 return GENERAL_REGS; 4599 4600 if (regno == SP_REGNUM) 4601 return STACK_REG; 4602 4603 if (regno == FRAME_POINTER_REGNUM 4604 || regno == ARG_POINTER_REGNUM) 4605 return POINTER_REGS; 4606 4607 if (FP_REGNUM_P (regno)) 4608 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS; 4609 4610 return NO_REGS; 4611} 4612 4613static rtx 4614aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) 4615{ 4616 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask), 4617 where mask is selected by alignment and size of the offset. 4618 We try to pick as large a range for the offset as possible to 4619 maximize the chance of a CSE. However, for aligned addresses 4620 we limit the range to 4k so that structures with different sized 4621 elements are likely to use the same base. */ 4622 4623 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) 4624 { 4625 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); 4626 HOST_WIDE_INT base_offset; 4627 4628 /* Does it look like we'll need a load/store-pair operation? */ 4629 if (GET_MODE_SIZE (mode) > 16 4630 || mode == TImode) 4631 base_offset = ((offset + 64 * GET_MODE_SIZE (mode)) 4632 & ~((128 * GET_MODE_SIZE (mode)) - 1)); 4633 /* For offsets aren't a multiple of the access size, the limit is 4634 -256...255. */ 4635 else if (offset & (GET_MODE_SIZE (mode) - 1)) 4636 base_offset = (offset + 0x100) & ~0x1ff; 4637 else 4638 base_offset = offset & ~0xfff; 4639 4640 if (base_offset == 0) 4641 return x; 4642 4643 offset -= base_offset; 4644 rtx base_reg = gen_reg_rtx (Pmode); 4645 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset), 4646 NULL_RTX); 4647 emit_move_insn (base_reg, val); 4648 x = plus_constant (Pmode, base_reg, offset); 4649 } 4650 4651 return x; 4652} 4653 4654/* Try a machine-dependent way of reloading an illegitimate address 4655 operand. If we find one, push the reload and return the new rtx. */ 4656 4657rtx 4658aarch64_legitimize_reload_address (rtx *x_p, 4659 machine_mode mode, 4660 int opnum, int type, 4661 int ind_levels ATTRIBUTE_UNUSED) 4662{ 4663 rtx x = *x_p; 4664 4665 /* Do not allow mem (plus (reg, const)) if vector struct mode. */ 4666 if (aarch64_vect_struct_mode_p (mode) 4667 && GET_CODE (x) == PLUS 4668 && REG_P (XEXP (x, 0)) 4669 && CONST_INT_P (XEXP (x, 1))) 4670 { 4671 rtx orig_rtx = x; 4672 x = copy_rtx (x); 4673 push_reload (orig_rtx, NULL_RTX, x_p, NULL, 4674 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4675 opnum, (enum reload_type) type); 4676 return x; 4677 } 4678 4679 /* We must recognize output that we have already generated ourselves. */ 4680 if (GET_CODE (x) == PLUS 4681 && GET_CODE (XEXP (x, 0)) == PLUS 4682 && REG_P (XEXP (XEXP (x, 0), 0)) 4683 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 4684 && CONST_INT_P (XEXP (x, 1))) 4685 { 4686 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4687 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4688 opnum, (enum reload_type) type); 4689 return x; 4690 } 4691 4692 /* We wish to handle large displacements off a base register by splitting 4693 the addend across an add and the mem insn. This can cut the number of 4694 extra insns needed from 3 to 1. It is only useful for load/store of a 4695 single register with 12 bit offset field. */ 4696 if (GET_CODE (x) == PLUS 4697 && REG_P (XEXP (x, 0)) 4698 && CONST_INT_P (XEXP (x, 1)) 4699 && HARD_REGISTER_P (XEXP (x, 0)) 4700 && mode != TImode 4701 && mode != TFmode 4702 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true)) 4703 { 4704 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 4705 HOST_WIDE_INT low = val & 0xfff; 4706 HOST_WIDE_INT high = val - low; 4707 HOST_WIDE_INT offs; 4708 rtx cst; 4709 machine_mode xmode = GET_MODE (x); 4710 4711 /* In ILP32, xmode can be either DImode or SImode. */ 4712 gcc_assert (xmode == DImode || xmode == SImode); 4713 4714 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain 4715 BLKmode alignment. */ 4716 if (GET_MODE_SIZE (mode) == 0) 4717 return NULL_RTX; 4718 4719 offs = low % GET_MODE_SIZE (mode); 4720 4721 /* Align misaligned offset by adjusting high part to compensate. */ 4722 if (offs != 0) 4723 { 4724 if (aarch64_uimm12_shift (high + offs)) 4725 { 4726 /* Align down. */ 4727 low = low - offs; 4728 high = high + offs; 4729 } 4730 else 4731 { 4732 /* Align up. */ 4733 offs = GET_MODE_SIZE (mode) - offs; 4734 low = low + offs; 4735 high = high + (low & 0x1000) - offs; 4736 low &= 0xfff; 4737 } 4738 } 4739 4740 /* Check for overflow. */ 4741 if (high + low != val) 4742 return NULL_RTX; 4743 4744 cst = GEN_INT (high); 4745 if (!aarch64_uimm12_shift (high)) 4746 cst = force_const_mem (xmode, cst); 4747 4748 /* Reload high part into base reg, leaving the low part 4749 in the mem instruction. 4750 Note that replacing this gen_rtx_PLUS with plus_constant is 4751 wrong in this case because we rely on the 4752 (plus (plus reg c1) c2) structure being preserved so that 4753 XEXP (*p, 0) in push_reload below uses the correct term. */ 4754 x = gen_rtx_PLUS (xmode, 4755 gen_rtx_PLUS (xmode, XEXP (x, 0), cst), 4756 GEN_INT (low)); 4757 4758 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4759 BASE_REG_CLASS, xmode, VOIDmode, 0, 0, 4760 opnum, (enum reload_type) type); 4761 return x; 4762 } 4763 4764 return NULL_RTX; 4765} 4766 4767 4768static reg_class_t 4769aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, 4770 reg_class_t rclass, 4771 machine_mode mode, 4772 secondary_reload_info *sri) 4773{ 4774 /* Without the TARGET_SIMD instructions we cannot move a Q register 4775 to a Q register directly. We need a scratch. */ 4776 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) 4777 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD 4778 && reg_class_subset_p (rclass, FP_REGS)) 4779 { 4780 if (mode == TFmode) 4781 sri->icode = CODE_FOR_aarch64_reload_movtf; 4782 else if (mode == TImode) 4783 sri->icode = CODE_FOR_aarch64_reload_movti; 4784 return NO_REGS; 4785 } 4786 4787 /* A TFmode or TImode memory access should be handled via an FP_REGS 4788 because AArch64 has richer addressing modes for LDR/STR instructions 4789 than LDP/STP instructions. */ 4790 if (TARGET_FLOAT && rclass == GENERAL_REGS 4791 && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) 4792 return FP_REGS; 4793 4794 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) 4795 return GENERAL_REGS; 4796 4797 return NO_REGS; 4798} 4799 4800static bool 4801aarch64_can_eliminate (const int from, const int to) 4802{ 4803 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into 4804 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */ 4805 4806 if (frame_pointer_needed) 4807 { 4808 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4809 return true; 4810 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 4811 return false; 4812 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM 4813 && !cfun->calls_alloca) 4814 return true; 4815 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 4816 return true; 4817 4818 return false; 4819 } 4820 else 4821 { 4822 /* If we decided that we didn't need a leaf frame pointer but then used 4823 LR in the function, then we'll want a frame pointer after all, so 4824 prevent this elimination to ensure a frame pointer is used. */ 4825 if (to == STACK_POINTER_REGNUM 4826 && flag_omit_leaf_frame_pointer 4827 && df_regs_ever_live_p (LR_REGNUM)) 4828 return false; 4829 } 4830 4831 return true; 4832} 4833 4834HOST_WIDE_INT 4835aarch64_initial_elimination_offset (unsigned from, unsigned to) 4836{ 4837 aarch64_layout_frame (); 4838 4839 if (to == HARD_FRAME_POINTER_REGNUM) 4840 { 4841 if (from == ARG_POINTER_REGNUM) 4842 return cfun->machine->frame.frame_size - crtl->outgoing_args_size; 4843 4844 if (from == FRAME_POINTER_REGNUM) 4845 return (cfun->machine->frame.hard_fp_offset 4846 - cfun->machine->frame.saved_varargs_size); 4847 } 4848 4849 if (to == STACK_POINTER_REGNUM) 4850 { 4851 if (from == FRAME_POINTER_REGNUM) 4852 return (cfun->machine->frame.frame_size 4853 - cfun->machine->frame.saved_varargs_size); 4854 } 4855 4856 return cfun->machine->frame.frame_size; 4857} 4858 4859/* Implement RETURN_ADDR_RTX. We do not support moving back to a 4860 previous frame. */ 4861 4862rtx 4863aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 4864{ 4865 if (count != 0) 4866 return const0_rtx; 4867 return get_hard_reg_initial_val (Pmode, LR_REGNUM); 4868} 4869 4870 4871static void 4872aarch64_asm_trampoline_template (FILE *f) 4873{ 4874 if (TARGET_ILP32) 4875 { 4876 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM); 4877 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM); 4878 } 4879 else 4880 { 4881 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]); 4882 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]); 4883 } 4884 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]); 4885 assemble_aligned_integer (4, const0_rtx); 4886 assemble_aligned_integer (POINTER_BYTES, const0_rtx); 4887 assemble_aligned_integer (POINTER_BYTES, const0_rtx); 4888} 4889 4890static void 4891aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 4892{ 4893 rtx fnaddr, mem, a_tramp; 4894 const int tramp_code_sz = 16; 4895 4896 /* Don't need to copy the trailing D-words, we fill those in below. */ 4897 emit_block_move (m_tramp, assemble_trampoline_template (), 4898 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL); 4899 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz); 4900 fnaddr = XEXP (DECL_RTL (fndecl), 0); 4901 if (GET_MODE (fnaddr) != ptr_mode) 4902 fnaddr = convert_memory_address (ptr_mode, fnaddr); 4903 emit_move_insn (mem, fnaddr); 4904 4905 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES); 4906 emit_move_insn (mem, chain_value); 4907 4908 /* XXX We should really define a "clear_cache" pattern and use 4909 gen_clear_cache(). */ 4910 a_tramp = XEXP (m_tramp, 0); 4911 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), 4912 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode, 4913 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE), 4914 ptr_mode); 4915} 4916 4917static unsigned char 4918aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) 4919{ 4920 switch (regclass) 4921 { 4922 case CALLER_SAVE_REGS: 4923 case POINTER_REGS: 4924 case GENERAL_REGS: 4925 case ALL_REGS: 4926 case FP_REGS: 4927 case FP_LO_REGS: 4928 return 4929 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 : 4930 (GET_MODE_SIZE (mode) + 7) / 8; 4931 case STACK_REG: 4932 return 1; 4933 4934 case NO_REGS: 4935 return 0; 4936 4937 default: 4938 break; 4939 } 4940 gcc_unreachable (); 4941} 4942 4943static reg_class_t 4944aarch64_preferred_reload_class (rtx x, reg_class_t regclass) 4945{ 4946 if (regclass == POINTER_REGS) 4947 return GENERAL_REGS; 4948 4949 if (regclass == STACK_REG) 4950 { 4951 if (REG_P(x) 4952 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS)) 4953 return regclass; 4954 4955 return NO_REGS; 4956 } 4957 4958 /* If it's an integer immediate that MOVI can't handle, then 4959 FP_REGS is not an option, so we return NO_REGS instead. */ 4960 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS) 4961 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x))) 4962 return NO_REGS; 4963 4964 /* Register eliminiation can result in a request for 4965 SP+constant->FP_REGS. We cannot support such operations which 4966 use SP as source and an FP_REG as destination, so reject out 4967 right now. */ 4968 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS) 4969 { 4970 rtx lhs = XEXP (x, 0); 4971 4972 /* Look through a possible SUBREG introduced by ILP32. */ 4973 if (GET_CODE (lhs) == SUBREG) 4974 lhs = SUBREG_REG (lhs); 4975 4976 gcc_assert (REG_P (lhs)); 4977 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)), 4978 POINTER_REGS)); 4979 return NO_REGS; 4980 } 4981 4982 return regclass; 4983} 4984 4985void 4986aarch64_asm_output_labelref (FILE* f, const char *name) 4987{ 4988 asm_fprintf (f, "%U%s", name); 4989} 4990 4991static void 4992aarch64_elf_asm_constructor (rtx symbol, int priority) 4993{ 4994 if (priority == DEFAULT_INIT_PRIORITY) 4995 default_ctor_section_asm_out_constructor (symbol, priority); 4996 else 4997 { 4998 section *s; 4999 char buf[18]; 5000 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority); 5001 s = get_section (buf, SECTION_WRITE, NULL); 5002 switch_to_section (s); 5003 assemble_align (POINTER_SIZE); 5004 assemble_aligned_integer (POINTER_BYTES, symbol); 5005 } 5006} 5007 5008static void 5009aarch64_elf_asm_destructor (rtx symbol, int priority) 5010{ 5011 if (priority == DEFAULT_INIT_PRIORITY) 5012 default_dtor_section_asm_out_destructor (symbol, priority); 5013 else 5014 { 5015 section *s; 5016 char buf[18]; 5017 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority); 5018 s = get_section (buf, SECTION_WRITE, NULL); 5019 switch_to_section (s); 5020 assemble_align (POINTER_SIZE); 5021 assemble_aligned_integer (POINTER_BYTES, symbol); 5022 } 5023} 5024 5025const char* 5026aarch64_output_casesi (rtx *operands) 5027{ 5028 char buf[100]; 5029 char label[100]; 5030 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2]))); 5031 int index; 5032 static const char *const patterns[4][2] = 5033 { 5034 { 5035 "ldrb\t%w3, [%0,%w1,uxtw]", 5036 "add\t%3, %4, %w3, sxtb #2" 5037 }, 5038 { 5039 "ldrh\t%w3, [%0,%w1,uxtw #1]", 5040 "add\t%3, %4, %w3, sxth #2" 5041 }, 5042 { 5043 "ldr\t%w3, [%0,%w1,uxtw #2]", 5044 "add\t%3, %4, %w3, sxtw #2" 5045 }, 5046 /* We assume that DImode is only generated when not optimizing and 5047 that we don't really need 64-bit address offsets. That would 5048 imply an object file with 8GB of code in a single function! */ 5049 { 5050 "ldr\t%w3, [%0,%w1,uxtw #2]", 5051 "add\t%3, %4, %w3, sxtw #2" 5052 } 5053 }; 5054 5055 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); 5056 5057 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec))); 5058 5059 gcc_assert (index >= 0 && index <= 3); 5060 5061 /* Need to implement table size reduction, by chaning the code below. */ 5062 output_asm_insn (patterns[index][0], operands); 5063 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2])); 5064 snprintf (buf, sizeof (buf), 5065 "adr\t%%4, %s", targetm.strip_name_encoding (label)); 5066 output_asm_insn (buf, operands); 5067 output_asm_insn (patterns[index][1], operands); 5068 output_asm_insn ("br\t%3", operands); 5069 assemble_label (asm_out_file, label); 5070 return ""; 5071} 5072 5073 5074/* Return size in bits of an arithmetic operand which is shifted/scaled and 5075 masked such that it is suitable for a UXTB, UXTH, or UXTW extend 5076 operator. */ 5077 5078int 5079aarch64_uxt_size (int shift, HOST_WIDE_INT mask) 5080{ 5081 if (shift >= 0 && shift <= 3) 5082 { 5083 int size; 5084 for (size = 8; size <= 32; size *= 2) 5085 { 5086 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1; 5087 if (mask == bits << shift) 5088 return size; 5089 } 5090 } 5091 return 0; 5092} 5093 5094static bool 5095aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED, 5096 const_rtx x ATTRIBUTE_UNUSED) 5097{ 5098 /* We can't use blocks for constants when we're using a per-function 5099 constant pool. */ 5100 return false; 5101} 5102 5103static section * 5104aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED, 5105 rtx x ATTRIBUTE_UNUSED, 5106 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 5107{ 5108 /* Force all constant pool entries into the current function section. */ 5109 return function_section (current_function_decl); 5110} 5111 5112 5113/* Costs. */ 5114 5115/* Helper function for rtx cost calculation. Strip a shift expression 5116 from X. Returns the inner operand if successful, or the original 5117 expression on failure. */ 5118static rtx 5119aarch64_strip_shift (rtx x) 5120{ 5121 rtx op = x; 5122 5123 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant 5124 we can convert both to ROR during final output. */ 5125 if ((GET_CODE (op) == ASHIFT 5126 || GET_CODE (op) == ASHIFTRT 5127 || GET_CODE (op) == LSHIFTRT 5128 || GET_CODE (op) == ROTATERT 5129 || GET_CODE (op) == ROTATE) 5130 && CONST_INT_P (XEXP (op, 1))) 5131 return XEXP (op, 0); 5132 5133 if (GET_CODE (op) == MULT 5134 && CONST_INT_P (XEXP (op, 1)) 5135 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64) 5136 return XEXP (op, 0); 5137 5138 return x; 5139} 5140 5141/* Helper function for rtx cost calculation. Strip an extend 5142 expression from X. Returns the inner operand if successful, or the 5143 original expression on failure. We deal with a number of possible 5144 canonicalization variations here. */ 5145static rtx 5146aarch64_strip_extend (rtx x) 5147{ 5148 rtx op = x; 5149 5150 /* Zero and sign extraction of a widened value. */ 5151 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) 5152 && XEXP (op, 2) == const0_rtx 5153 && GET_CODE (XEXP (op, 0)) == MULT 5154 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), 5155 XEXP (op, 1))) 5156 return XEXP (XEXP (op, 0), 0); 5157 5158 /* It can also be represented (for zero-extend) as an AND with an 5159 immediate. */ 5160 if (GET_CODE (op) == AND 5161 && GET_CODE (XEXP (op, 0)) == MULT 5162 && CONST_INT_P (XEXP (XEXP (op, 0), 1)) 5163 && CONST_INT_P (XEXP (op, 1)) 5164 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))), 5165 INTVAL (XEXP (op, 1))) != 0) 5166 return XEXP (XEXP (op, 0), 0); 5167 5168 /* Now handle extended register, as this may also have an optional 5169 left shift by 1..4. */ 5170 if (GET_CODE (op) == ASHIFT 5171 && CONST_INT_P (XEXP (op, 1)) 5172 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4) 5173 op = XEXP (op, 0); 5174 5175 if (GET_CODE (op) == ZERO_EXTEND 5176 || GET_CODE (op) == SIGN_EXTEND) 5177 op = XEXP (op, 0); 5178 5179 if (op != x) 5180 return op; 5181 5182 return x; 5183} 5184 5185/* Helper function for rtx cost calculation. Calculate the cost of 5186 a MULT, which may be part of a multiply-accumulate rtx. Return 5187 the calculated cost of the expression, recursing manually in to 5188 operands where needed. */ 5189 5190static int 5191aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) 5192{ 5193 rtx op0, op1; 5194 const struct cpu_cost_table *extra_cost 5195 = aarch64_tune_params->insn_extra_cost; 5196 int cost = 0; 5197 bool maybe_fma = (outer == PLUS || outer == MINUS); 5198 machine_mode mode = GET_MODE (x); 5199 5200 gcc_checking_assert (code == MULT); 5201 5202 op0 = XEXP (x, 0); 5203 op1 = XEXP (x, 1); 5204 5205 if (VECTOR_MODE_P (mode)) 5206 mode = GET_MODE_INNER (mode); 5207 5208 /* Integer multiply/fma. */ 5209 if (GET_MODE_CLASS (mode) == MODE_INT) 5210 { 5211 /* The multiply will be canonicalized as a shift, cost it as such. */ 5212 if (CONST_INT_P (op1) 5213 && exact_log2 (INTVAL (op1)) > 0) 5214 { 5215 if (speed) 5216 { 5217 if (maybe_fma) 5218 /* ADD (shifted register). */ 5219 cost += extra_cost->alu.arith_shift; 5220 else 5221 /* LSL (immediate). */ 5222 cost += extra_cost->alu.shift; 5223 } 5224 5225 cost += rtx_cost (op0, GET_CODE (op0), 0, speed); 5226 5227 return cost; 5228 } 5229 5230 /* Integer multiplies or FMAs have zero/sign extending variants. */ 5231 if ((GET_CODE (op0) == ZERO_EXTEND 5232 && GET_CODE (op1) == ZERO_EXTEND) 5233 || (GET_CODE (op0) == SIGN_EXTEND 5234 && GET_CODE (op1) == SIGN_EXTEND)) 5235 { 5236 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed) 5237 + rtx_cost (XEXP (op1, 0), MULT, 1, speed); 5238 5239 if (speed) 5240 { 5241 if (maybe_fma) 5242 /* MADD/SMADDL/UMADDL. */ 5243 cost += extra_cost->mult[0].extend_add; 5244 else 5245 /* MUL/SMULL/UMULL. */ 5246 cost += extra_cost->mult[0].extend; 5247 } 5248 5249 return cost; 5250 } 5251 5252 /* This is either an integer multiply or an FMA. In both cases 5253 we want to recurse and cost the operands. */ 5254 cost += rtx_cost (op0, MULT, 0, speed) 5255 + rtx_cost (op1, MULT, 1, speed); 5256 5257 if (speed) 5258 { 5259 if (maybe_fma) 5260 /* MADD. */ 5261 cost += extra_cost->mult[mode == DImode].add; 5262 else 5263 /* MUL. */ 5264 cost += extra_cost->mult[mode == DImode].simple; 5265 } 5266 5267 return cost; 5268 } 5269 else 5270 { 5271 if (speed) 5272 { 5273 /* Floating-point FMA/FMUL can also support negations of the 5274 operands, unless the rounding mode is upward or downward in 5275 which case FNMUL is different than FMUL with operand negation. */ 5276 bool neg0 = GET_CODE (op0) == NEG; 5277 bool neg1 = GET_CODE (op1) == NEG; 5278 if (maybe_fma || !flag_rounding_math || (neg0 && neg1)) 5279 { 5280 if (neg0) 5281 op0 = XEXP (op0, 0); 5282 if (neg1) 5283 op1 = XEXP (op1, 0); 5284 } 5285 5286 if (maybe_fma) 5287 /* FMADD/FNMADD/FNMSUB/FMSUB. */ 5288 cost += extra_cost->fp[mode == DFmode].fma; 5289 else 5290 /* FMUL/FNMUL. */ 5291 cost += extra_cost->fp[mode == DFmode].mult; 5292 } 5293 5294 cost += rtx_cost (op0, MULT, 0, speed) 5295 + rtx_cost (op1, MULT, 1, speed); 5296 return cost; 5297 } 5298} 5299 5300static int 5301aarch64_address_cost (rtx x, 5302 machine_mode mode, 5303 addr_space_t as ATTRIBUTE_UNUSED, 5304 bool speed) 5305{ 5306 enum rtx_code c = GET_CODE (x); 5307 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; 5308 struct aarch64_address_info info; 5309 int cost = 0; 5310 info.shift = 0; 5311 5312 if (!aarch64_classify_address (&info, x, mode, c, false)) 5313 { 5314 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) 5315 { 5316 /* This is a CONST or SYMBOL ref which will be split 5317 in a different way depending on the code model in use. 5318 Cost it through the generic infrastructure. */ 5319 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed); 5320 /* Divide through by the cost of one instruction to 5321 bring it to the same units as the address costs. */ 5322 cost_symbol_ref /= COSTS_N_INSNS (1); 5323 /* The cost is then the cost of preparing the address, 5324 followed by an immediate (possibly 0) offset. */ 5325 return cost_symbol_ref + addr_cost->imm_offset; 5326 } 5327 else 5328 { 5329 /* This is most likely a jump table from a case 5330 statement. */ 5331 return addr_cost->register_offset; 5332 } 5333 } 5334 5335 switch (info.type) 5336 { 5337 case ADDRESS_LO_SUM: 5338 case ADDRESS_SYMBOLIC: 5339 case ADDRESS_REG_IMM: 5340 cost += addr_cost->imm_offset; 5341 break; 5342 5343 case ADDRESS_REG_WB: 5344 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) 5345 cost += addr_cost->pre_modify; 5346 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) 5347 cost += addr_cost->post_modify; 5348 else 5349 gcc_unreachable (); 5350 5351 break; 5352 5353 case ADDRESS_REG_REG: 5354 cost += addr_cost->register_offset; 5355 break; 5356 5357 case ADDRESS_REG_UXTW: 5358 case ADDRESS_REG_SXTW: 5359 cost += addr_cost->register_extend; 5360 break; 5361 5362 default: 5363 gcc_unreachable (); 5364 } 5365 5366 5367 if (info.shift > 0) 5368 { 5369 /* For the sake of calculating the cost of the shifted register 5370 component, we can treat same sized modes in the same way. */ 5371 switch (GET_MODE_BITSIZE (mode)) 5372 { 5373 case 16: 5374 cost += addr_cost->addr_scale_costs.hi; 5375 break; 5376 5377 case 32: 5378 cost += addr_cost->addr_scale_costs.si; 5379 break; 5380 5381 case 64: 5382 cost += addr_cost->addr_scale_costs.di; 5383 break; 5384 5385 /* We can't tell, or this is a 128-bit vector. */ 5386 default: 5387 cost += addr_cost->addr_scale_costs.ti; 5388 break; 5389 } 5390 } 5391 5392 return cost; 5393} 5394 5395/* Return true if the RTX X in mode MODE is a zero or sign extract 5396 usable in an ADD or SUB (extended register) instruction. */ 5397static bool 5398aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode) 5399{ 5400 /* Catch add with a sign extract. 5401 This is add_<optab><mode>_multp2. */ 5402 if (GET_CODE (x) == SIGN_EXTRACT 5403 || GET_CODE (x) == ZERO_EXTRACT) 5404 { 5405 rtx op0 = XEXP (x, 0); 5406 rtx op1 = XEXP (x, 1); 5407 rtx op2 = XEXP (x, 2); 5408 5409 if (GET_CODE (op0) == MULT 5410 && CONST_INT_P (op1) 5411 && op2 == const0_rtx 5412 && CONST_INT_P (XEXP (op0, 1)) 5413 && aarch64_is_extend_from_extract (mode, 5414 XEXP (op0, 1), 5415 op1)) 5416 { 5417 return true; 5418 } 5419 } 5420 5421 return false; 5422} 5423 5424static bool 5425aarch64_frint_unspec_p (unsigned int u) 5426{ 5427 switch (u) 5428 { 5429 case UNSPEC_FRINTZ: 5430 case UNSPEC_FRINTP: 5431 case UNSPEC_FRINTM: 5432 case UNSPEC_FRINTA: 5433 case UNSPEC_FRINTN: 5434 case UNSPEC_FRINTX: 5435 case UNSPEC_FRINTI: 5436 return true; 5437 5438 default: 5439 return false; 5440 } 5441} 5442 5443/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), 5444 storing it in *COST. Result is true if the total cost of the operation 5445 has now been calculated. */ 5446static bool 5447aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) 5448{ 5449 rtx inner; 5450 rtx comparator; 5451 enum rtx_code cmpcode; 5452 5453 if (COMPARISON_P (op0)) 5454 { 5455 inner = XEXP (op0, 0); 5456 comparator = XEXP (op0, 1); 5457 cmpcode = GET_CODE (op0); 5458 } 5459 else 5460 { 5461 inner = op0; 5462 comparator = const0_rtx; 5463 cmpcode = NE; 5464 } 5465 5466 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) 5467 { 5468 /* Conditional branch. */ 5469 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) 5470 return true; 5471 else 5472 { 5473 if (cmpcode == NE || cmpcode == EQ) 5474 { 5475 if (comparator == const0_rtx) 5476 { 5477 /* TBZ/TBNZ/CBZ/CBNZ. */ 5478 if (GET_CODE (inner) == ZERO_EXTRACT) 5479 /* TBZ/TBNZ. */ 5480 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT, 5481 0, speed); 5482 else 5483 /* CBZ/CBNZ. */ 5484 *cost += rtx_cost (inner, cmpcode, 0, speed); 5485 5486 return true; 5487 } 5488 } 5489 else if (cmpcode == LT || cmpcode == GE) 5490 { 5491 /* TBZ/TBNZ. */ 5492 if (comparator == const0_rtx) 5493 return true; 5494 } 5495 } 5496 } 5497 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) 5498 { 5499 /* It's a conditional operation based on the status flags, 5500 so it must be some flavor of CSEL. */ 5501 5502 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */ 5503 if (GET_CODE (op1) == NEG 5504 || GET_CODE (op1) == NOT 5505 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx)) 5506 op1 = XEXP (op1, 0); 5507 5508 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed); 5509 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed); 5510 return true; 5511 } 5512 5513 /* We don't know what this is, cost all operands. */ 5514 return false; 5515} 5516 5517/* Calculate the cost of calculating X, storing it in *COST. Result 5518 is true if the total cost of the operation has now been calculated. */ 5519static bool 5520aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, 5521 int param ATTRIBUTE_UNUSED, int *cost, bool speed) 5522{ 5523 rtx op0, op1, op2; 5524 const struct cpu_cost_table *extra_cost 5525 = aarch64_tune_params->insn_extra_cost; 5526 machine_mode mode = GET_MODE (x); 5527 5528 /* By default, assume that everything has equivalent cost to the 5529 cheapest instruction. Any additional costs are applied as a delta 5530 above this default. */ 5531 *cost = COSTS_N_INSNS (1); 5532 5533 /* TODO: The cost infrastructure currently does not handle 5534 vector operations. Assume that all vector operations 5535 are equally expensive. */ 5536 if (VECTOR_MODE_P (mode)) 5537 { 5538 if (speed) 5539 *cost += extra_cost->vect.alu; 5540 return true; 5541 } 5542 5543 switch (code) 5544 { 5545 case SET: 5546 /* The cost depends entirely on the operands to SET. */ 5547 *cost = 0; 5548 op0 = SET_DEST (x); 5549 op1 = SET_SRC (x); 5550 5551 switch (GET_CODE (op0)) 5552 { 5553 case MEM: 5554 if (speed) 5555 { 5556 rtx address = XEXP (op0, 0); 5557 if (GET_MODE_CLASS (mode) == MODE_INT) 5558 *cost += extra_cost->ldst.store; 5559 else if (mode == SFmode) 5560 *cost += extra_cost->ldst.storef; 5561 else if (mode == DFmode) 5562 *cost += extra_cost->ldst.stored; 5563 5564 *cost += 5565 COSTS_N_INSNS (aarch64_address_cost (address, mode, 5566 0, speed)); 5567 } 5568 5569 *cost += rtx_cost (op1, SET, 1, speed); 5570 return true; 5571 5572 case SUBREG: 5573 if (! REG_P (SUBREG_REG (op0))) 5574 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); 5575 5576 /* Fall through. */ 5577 case REG: 5578 /* const0_rtx is in general free, but we will use an 5579 instruction to set a register to 0. */ 5580 if (REG_P (op1) || op1 == const0_rtx) 5581 { 5582 /* The cost is 1 per register copied. */ 5583 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) 5584 / UNITS_PER_WORD; 5585 *cost = COSTS_N_INSNS (n_minus_1 + 1); 5586 } 5587 else 5588 /* Cost is just the cost of the RHS of the set. */ 5589 *cost += rtx_cost (op1, SET, 1, speed); 5590 return true; 5591 5592 case ZERO_EXTRACT: 5593 case SIGN_EXTRACT: 5594 /* Bit-field insertion. Strip any redundant widening of 5595 the RHS to meet the width of the target. */ 5596 if (GET_CODE (op1) == SUBREG) 5597 op1 = SUBREG_REG (op1); 5598 if ((GET_CODE (op1) == ZERO_EXTEND 5599 || GET_CODE (op1) == SIGN_EXTEND) 5600 && CONST_INT_P (XEXP (op0, 1)) 5601 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) 5602 >= INTVAL (XEXP (op0, 1)))) 5603 op1 = XEXP (op1, 0); 5604 5605 if (CONST_INT_P (op1)) 5606 { 5607 /* MOV immediate is assumed to always be cheap. */ 5608 *cost = COSTS_N_INSNS (1); 5609 } 5610 else 5611 { 5612 /* BFM. */ 5613 if (speed) 5614 *cost += extra_cost->alu.bfi; 5615 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed); 5616 } 5617 5618 return true; 5619 5620 default: 5621 /* We can't make sense of this, assume default cost. */ 5622 *cost = COSTS_N_INSNS (1); 5623 return false; 5624 } 5625 return false; 5626 5627 case CONST_INT: 5628 /* If an instruction can incorporate a constant within the 5629 instruction, the instruction's expression avoids calling 5630 rtx_cost() on the constant. If rtx_cost() is called on a 5631 constant, then it is usually because the constant must be 5632 moved into a register by one or more instructions. 5633 5634 The exception is constant 0, which can be expressed 5635 as XZR/WZR and is therefore free. The exception to this is 5636 if we have (set (reg) (const0_rtx)) in which case we must cost 5637 the move. However, we can catch that when we cost the SET, so 5638 we don't need to consider that here. */ 5639 if (x == const0_rtx) 5640 *cost = 0; 5641 else 5642 { 5643 /* To an approximation, building any other constant is 5644 proportionally expensive to the number of instructions 5645 required to build that constant. This is true whether we 5646 are compiling for SPEED or otherwise. */ 5647 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate 5648 (NULL_RTX, x, false, mode)); 5649 } 5650 return true; 5651 5652 case CONST_DOUBLE: 5653 if (speed) 5654 { 5655 /* mov[df,sf]_aarch64. */ 5656 if (aarch64_float_const_representable_p (x)) 5657 /* FMOV (scalar immediate). */ 5658 *cost += extra_cost->fp[mode == DFmode].fpconst; 5659 else if (!aarch64_float_const_zero_rtx_p (x)) 5660 { 5661 /* This will be a load from memory. */ 5662 if (mode == DFmode) 5663 *cost += extra_cost->ldst.loadd; 5664 else 5665 *cost += extra_cost->ldst.loadf; 5666 } 5667 else 5668 /* Otherwise this is +0.0. We get this using MOVI d0, #0 5669 or MOV v0.s[0], wzr - neither of which are modeled by the 5670 cost tables. Just use the default cost. */ 5671 { 5672 } 5673 } 5674 5675 return true; 5676 5677 case MEM: 5678 if (speed) 5679 { 5680 /* For loads we want the base cost of a load, plus an 5681 approximation for the additional cost of the addressing 5682 mode. */ 5683 rtx address = XEXP (x, 0); 5684 if (GET_MODE_CLASS (mode) == MODE_INT) 5685 *cost += extra_cost->ldst.load; 5686 else if (mode == SFmode) 5687 *cost += extra_cost->ldst.loadf; 5688 else if (mode == DFmode) 5689 *cost += extra_cost->ldst.loadd; 5690 5691 *cost += 5692 COSTS_N_INSNS (aarch64_address_cost (address, mode, 5693 0, speed)); 5694 } 5695 5696 return true; 5697 5698 case NEG: 5699 op0 = XEXP (x, 0); 5700 5701 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 5702 { 5703 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE 5704 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) 5705 { 5706 /* CSETM. */ 5707 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed); 5708 return true; 5709 } 5710 5711 /* Cost this as SUB wzr, X. */ 5712 op0 = CONST0_RTX (GET_MODE (x)); 5713 op1 = XEXP (x, 0); 5714 goto cost_minus; 5715 } 5716 5717 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 5718 { 5719 /* Support (neg(fma...)) as a single instruction only if 5720 sign of zeros is unimportant. This matches the decision 5721 making in aarch64.md. */ 5722 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0))) 5723 { 5724 /* FNMADD. */ 5725 *cost = rtx_cost (op0, NEG, 0, speed); 5726 return true; 5727 } 5728 if (GET_CODE (op0) == MULT) 5729 { 5730 /* FNMUL. */ 5731 *cost = rtx_cost (op0, NEG, 0, speed); 5732 return true; 5733 } 5734 if (speed) 5735 /* FNEG. */ 5736 *cost += extra_cost->fp[mode == DFmode].neg; 5737 return false; 5738 } 5739 5740 return false; 5741 5742 case CLRSB: 5743 case CLZ: 5744 if (speed) 5745 *cost += extra_cost->alu.clz; 5746 5747 return false; 5748 5749 case COMPARE: 5750 op0 = XEXP (x, 0); 5751 op1 = XEXP (x, 1); 5752 5753 if (op1 == const0_rtx 5754 && GET_CODE (op0) == AND) 5755 { 5756 x = op0; 5757 goto cost_logic; 5758 } 5759 5760 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) 5761 { 5762 /* TODO: A write to the CC flags possibly costs extra, this 5763 needs encoding in the cost tables. */ 5764 5765 /* CC_ZESWPmode supports zero extend for free. */ 5766 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) 5767 op0 = XEXP (op0, 0); 5768 5769 /* ANDS. */ 5770 if (GET_CODE (op0) == AND) 5771 { 5772 x = op0; 5773 goto cost_logic; 5774 } 5775 5776 if (GET_CODE (op0) == PLUS) 5777 { 5778 /* ADDS (and CMN alias). */ 5779 x = op0; 5780 goto cost_plus; 5781 } 5782 5783 if (GET_CODE (op0) == MINUS) 5784 { 5785 /* SUBS. */ 5786 x = op0; 5787 goto cost_minus; 5788 } 5789 5790 if (GET_CODE (op1) == NEG) 5791 { 5792 /* CMN. */ 5793 if (speed) 5794 *cost += extra_cost->alu.arith; 5795 5796 *cost += rtx_cost (op0, COMPARE, 0, speed); 5797 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed); 5798 return true; 5799 } 5800 5801 /* CMP. 5802 5803 Compare can freely swap the order of operands, and 5804 canonicalization puts the more complex operation first. 5805 But the integer MINUS logic expects the shift/extend 5806 operation in op1. */ 5807 if (! (REG_P (op0) 5808 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) 5809 { 5810 op0 = XEXP (x, 1); 5811 op1 = XEXP (x, 0); 5812 } 5813 goto cost_minus; 5814 } 5815 5816 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) 5817 { 5818 /* FCMP. */ 5819 if (speed) 5820 *cost += extra_cost->fp[mode == DFmode].compare; 5821 5822 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) 5823 { 5824 /* FCMP supports constant 0.0 for no extra cost. */ 5825 return true; 5826 } 5827 return false; 5828 } 5829 5830 return false; 5831 5832 case MINUS: 5833 { 5834 op0 = XEXP (x, 0); 5835 op1 = XEXP (x, 1); 5836 5837cost_minus: 5838 /* Detect valid immediates. */ 5839 if ((GET_MODE_CLASS (mode) == MODE_INT 5840 || (GET_MODE_CLASS (mode) == MODE_CC 5841 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) 5842 && CONST_INT_P (op1) 5843 && aarch64_uimm12_shift (INTVAL (op1))) 5844 { 5845 *cost += rtx_cost (op0, MINUS, 0, speed); 5846 5847 if (speed) 5848 /* SUB(S) (immediate). */ 5849 *cost += extra_cost->alu.arith; 5850 return true; 5851 5852 } 5853 5854 /* Look for SUB (extended register). */ 5855 if (aarch64_rtx_arith_op_extract_p (op1, mode)) 5856 { 5857 if (speed) 5858 *cost += extra_cost->alu.arith_shift; 5859 5860 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), 5861 (enum rtx_code) GET_CODE (op1), 5862 0, speed); 5863 return true; 5864 } 5865 5866 rtx new_op1 = aarch64_strip_extend (op1); 5867 5868 /* Cost this as an FMA-alike operation. */ 5869 if ((GET_CODE (new_op1) == MULT 5870 || GET_CODE (new_op1) == ASHIFT) 5871 && code != COMPARE) 5872 { 5873 *cost += aarch64_rtx_mult_cost (new_op1, MULT, 5874 (enum rtx_code) code, 5875 speed); 5876 *cost += rtx_cost (op0, MINUS, 0, speed); 5877 return true; 5878 } 5879 5880 *cost += rtx_cost (new_op1, MINUS, 1, speed); 5881 5882 if (speed) 5883 { 5884 if (GET_MODE_CLASS (mode) == MODE_INT) 5885 /* SUB(S). */ 5886 *cost += extra_cost->alu.arith; 5887 else if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5888 /* FSUB. */ 5889 *cost += extra_cost->fp[mode == DFmode].addsub; 5890 } 5891 return true; 5892 } 5893 5894 case PLUS: 5895 { 5896 rtx new_op0; 5897 5898 op0 = XEXP (x, 0); 5899 op1 = XEXP (x, 1); 5900 5901cost_plus: 5902 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE 5903 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) 5904 { 5905 /* CSINC. */ 5906 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed); 5907 *cost += rtx_cost (op1, PLUS, 1, speed); 5908 return true; 5909 } 5910 5911 if (GET_MODE_CLASS (mode) == MODE_INT 5912 && CONST_INT_P (op1) 5913 && aarch64_uimm12_shift (INTVAL (op1))) 5914 { 5915 *cost += rtx_cost (op0, PLUS, 0, speed); 5916 5917 if (speed) 5918 /* ADD (immediate). */ 5919 *cost += extra_cost->alu.arith; 5920 return true; 5921 } 5922 5923 /* Look for ADD (extended register). */ 5924 if (aarch64_rtx_arith_op_extract_p (op0, mode)) 5925 { 5926 if (speed) 5927 *cost += extra_cost->alu.arith_shift; 5928 5929 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), 5930 (enum rtx_code) GET_CODE (op0), 5931 0, speed); 5932 return true; 5933 } 5934 5935 /* Strip any extend, leave shifts behind as we will 5936 cost them through mult_cost. */ 5937 new_op0 = aarch64_strip_extend (op0); 5938 5939 if (GET_CODE (new_op0) == MULT 5940 || GET_CODE (new_op0) == ASHIFT) 5941 { 5942 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, 5943 speed); 5944 *cost += rtx_cost (op1, PLUS, 1, speed); 5945 return true; 5946 } 5947 5948 *cost += (rtx_cost (new_op0, PLUS, 0, speed) 5949 + rtx_cost (op1, PLUS, 1, speed)); 5950 5951 if (speed) 5952 { 5953 if (GET_MODE_CLASS (mode) == MODE_INT) 5954 /* ADD. */ 5955 *cost += extra_cost->alu.arith; 5956 else if (GET_MODE_CLASS (mode) == MODE_FLOAT) 5957 /* FADD. */ 5958 *cost += extra_cost->fp[mode == DFmode].addsub; 5959 } 5960 return true; 5961 } 5962 5963 case BSWAP: 5964 *cost = COSTS_N_INSNS (1); 5965 5966 if (speed) 5967 *cost += extra_cost->alu.rev; 5968 5969 return false; 5970 5971 case IOR: 5972 if (aarch_rev16_p (x)) 5973 { 5974 *cost = COSTS_N_INSNS (1); 5975 5976 if (speed) 5977 *cost += extra_cost->alu.rev; 5978 5979 return true; 5980 } 5981 /* Fall through. */ 5982 case XOR: 5983 case AND: 5984 cost_logic: 5985 op0 = XEXP (x, 0); 5986 op1 = XEXP (x, 1); 5987 5988 if (code == AND 5989 && GET_CODE (op0) == MULT 5990 && CONST_INT_P (XEXP (op0, 1)) 5991 && CONST_INT_P (op1) 5992 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))), 5993 INTVAL (op1)) != 0) 5994 { 5995 /* This is a UBFM/SBFM. */ 5996 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed); 5997 if (speed) 5998 *cost += extra_cost->alu.bfx; 5999 return true; 6000 } 6001 6002 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 6003 { 6004 /* We possibly get the immediate for free, this is not 6005 modelled. */ 6006 if (CONST_INT_P (op1) 6007 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) 6008 { 6009 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); 6010 6011 if (speed) 6012 *cost += extra_cost->alu.logical; 6013 6014 return true; 6015 } 6016 else 6017 { 6018 rtx new_op0 = op0; 6019 6020 /* Handle ORN, EON, or BIC. */ 6021 if (GET_CODE (op0) == NOT) 6022 op0 = XEXP (op0, 0); 6023 6024 new_op0 = aarch64_strip_shift (op0); 6025 6026 /* If we had a shift on op0 then this is a logical-shift- 6027 by-register/immediate operation. Otherwise, this is just 6028 a logical operation. */ 6029 if (speed) 6030 { 6031 if (new_op0 != op0) 6032 { 6033 /* Shift by immediate. */ 6034 if (CONST_INT_P (XEXP (op0, 1))) 6035 *cost += extra_cost->alu.log_shift; 6036 else 6037 *cost += extra_cost->alu.log_shift_reg; 6038 } 6039 else 6040 *cost += extra_cost->alu.logical; 6041 } 6042 6043 /* In both cases we want to cost both operands. */ 6044 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed) 6045 + rtx_cost (op1, (enum rtx_code) code, 1, speed); 6046 6047 return true; 6048 } 6049 } 6050 return false; 6051 6052 case NOT: 6053 /* MVN. */ 6054 if (speed) 6055 *cost += extra_cost->alu.logical; 6056 6057 /* The logical instruction could have the shifted register form, 6058 but the cost is the same if the shift is processed as a separate 6059 instruction, so we don't bother with it here. */ 6060 return false; 6061 6062 case ZERO_EXTEND: 6063 6064 op0 = XEXP (x, 0); 6065 /* If a value is written in SI mode, then zero extended to DI 6066 mode, the operation will in general be free as a write to 6067 a 'w' register implicitly zeroes the upper bits of an 'x' 6068 register. However, if this is 6069 6070 (set (reg) (zero_extend (reg))) 6071 6072 we must cost the explicit register move. */ 6073 if (mode == DImode 6074 && GET_MODE (op0) == SImode 6075 && outer == SET) 6076 { 6077 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); 6078 6079 if (!op_cost && speed) 6080 /* MOV. */ 6081 *cost += extra_cost->alu.extend; 6082 else 6083 /* Free, the cost is that of the SI mode operation. */ 6084 *cost = op_cost; 6085 6086 return true; 6087 } 6088 else if (MEM_P (XEXP (x, 0))) 6089 { 6090 /* All loads can zero extend to any size for free. */ 6091 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed); 6092 return true; 6093 } 6094 6095 /* UXTB/UXTH. */ 6096 if (speed) 6097 *cost += extra_cost->alu.extend; 6098 6099 return false; 6100 6101 case SIGN_EXTEND: 6102 if (MEM_P (XEXP (x, 0))) 6103 { 6104 /* LDRSH. */ 6105 if (speed) 6106 { 6107 rtx address = XEXP (XEXP (x, 0), 0); 6108 *cost += extra_cost->ldst.load_sign_extend; 6109 6110 *cost += 6111 COSTS_N_INSNS (aarch64_address_cost (address, mode, 6112 0, speed)); 6113 } 6114 return true; 6115 } 6116 6117 if (speed) 6118 *cost += extra_cost->alu.extend; 6119 return false; 6120 6121 case ASHIFT: 6122 op0 = XEXP (x, 0); 6123 op1 = XEXP (x, 1); 6124 6125 if (CONST_INT_P (op1)) 6126 { 6127 /* LSL (immediate), UBMF, UBFIZ and friends. These are all 6128 aliases. */ 6129 if (speed) 6130 *cost += extra_cost->alu.shift; 6131 6132 /* We can incorporate zero/sign extend for free. */ 6133 if (GET_CODE (op0) == ZERO_EXTEND 6134 || GET_CODE (op0) == SIGN_EXTEND) 6135 op0 = XEXP (op0, 0); 6136 6137 *cost += rtx_cost (op0, ASHIFT, 0, speed); 6138 return true; 6139 } 6140 else 6141 { 6142 /* LSLV. */ 6143 if (speed) 6144 *cost += extra_cost->alu.shift_reg; 6145 6146 return false; /* All arguments need to be in registers. */ 6147 } 6148 6149 case ROTATE: 6150 case ROTATERT: 6151 case LSHIFTRT: 6152 case ASHIFTRT: 6153 op0 = XEXP (x, 0); 6154 op1 = XEXP (x, 1); 6155 6156 if (CONST_INT_P (op1)) 6157 { 6158 /* ASR (immediate) and friends. */ 6159 if (speed) 6160 *cost += extra_cost->alu.shift; 6161 6162 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); 6163 return true; 6164 } 6165 else 6166 { 6167 6168 /* ASR (register) and friends. */ 6169 if (speed) 6170 *cost += extra_cost->alu.shift_reg; 6171 6172 return false; /* All arguments need to be in registers. */ 6173 } 6174 6175 case SYMBOL_REF: 6176 6177 if (aarch64_cmodel == AARCH64_CMODEL_LARGE) 6178 { 6179 /* LDR. */ 6180 if (speed) 6181 *cost += extra_cost->ldst.load; 6182 } 6183 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL 6184 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC) 6185 { 6186 /* ADRP, followed by ADD. */ 6187 *cost += COSTS_N_INSNS (1); 6188 if (speed) 6189 *cost += 2 * extra_cost->alu.arith; 6190 } 6191 else if (aarch64_cmodel == AARCH64_CMODEL_TINY 6192 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) 6193 { 6194 /* ADR. */ 6195 if (speed) 6196 *cost += extra_cost->alu.arith; 6197 } 6198 6199 if (flag_pic) 6200 { 6201 /* One extra load instruction, after accessing the GOT. */ 6202 *cost += COSTS_N_INSNS (1); 6203 if (speed) 6204 *cost += extra_cost->ldst.load; 6205 } 6206 return true; 6207 6208 case HIGH: 6209 case LO_SUM: 6210 /* ADRP/ADD (immediate). */ 6211 if (speed) 6212 *cost += extra_cost->alu.arith; 6213 return true; 6214 6215 case ZERO_EXTRACT: 6216 case SIGN_EXTRACT: 6217 /* UBFX/SBFX. */ 6218 if (speed) 6219 *cost += extra_cost->alu.bfx; 6220 6221 /* We can trust that the immediates used will be correct (there 6222 are no by-register forms), so we need only cost op0. */ 6223 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed); 6224 return true; 6225 6226 case MULT: 6227 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); 6228 /* aarch64_rtx_mult_cost always handles recursion to its 6229 operands. */ 6230 return true; 6231 6232 case MOD: 6233 case UMOD: 6234 if (speed) 6235 { 6236 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 6237 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add 6238 + extra_cost->mult[GET_MODE (x) == DImode].idiv); 6239 else if (GET_MODE (x) == DFmode) 6240 *cost += (extra_cost->fp[1].mult 6241 + extra_cost->fp[1].div); 6242 else if (GET_MODE (x) == SFmode) 6243 *cost += (extra_cost->fp[0].mult 6244 + extra_cost->fp[0].div); 6245 } 6246 return false; /* All arguments need to be in registers. */ 6247 6248 case DIV: 6249 case UDIV: 6250 case SQRT: 6251 if (speed) 6252 { 6253 if (GET_MODE_CLASS (mode) == MODE_INT) 6254 /* There is no integer SQRT, so only DIV and UDIV can get 6255 here. */ 6256 *cost += extra_cost->mult[mode == DImode].idiv; 6257 else 6258 *cost += extra_cost->fp[mode == DFmode].div; 6259 } 6260 return false; /* All arguments need to be in registers. */ 6261 6262 case IF_THEN_ELSE: 6263 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1), 6264 XEXP (x, 2), cost, speed); 6265 6266 case EQ: 6267 case NE: 6268 case GT: 6269 case GTU: 6270 case LT: 6271 case LTU: 6272 case GE: 6273 case GEU: 6274 case LE: 6275 case LEU: 6276 6277 return false; /* All arguments must be in registers. */ 6278 6279 case FMA: 6280 op0 = XEXP (x, 0); 6281 op1 = XEXP (x, 1); 6282 op2 = XEXP (x, 2); 6283 6284 if (speed) 6285 *cost += extra_cost->fp[mode == DFmode].fma; 6286 6287 /* FMSUB, FNMADD, and FNMSUB are free. */ 6288 if (GET_CODE (op0) == NEG) 6289 op0 = XEXP (op0, 0); 6290 6291 if (GET_CODE (op2) == NEG) 6292 op2 = XEXP (op2, 0); 6293 6294 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1, 6295 and the by-element operand as operand 0. */ 6296 if (GET_CODE (op1) == NEG) 6297 op1 = XEXP (op1, 0); 6298 6299 /* Catch vector-by-element operations. The by-element operand can 6300 either be (vec_duplicate (vec_select (x))) or just 6301 (vec_select (x)), depending on whether we are multiplying by 6302 a vector or a scalar. 6303 6304 Canonicalization is not very good in these cases, FMA4 will put the 6305 by-element operand as operand 0, FNMA4 will have it as operand 1. */ 6306 if (GET_CODE (op0) == VEC_DUPLICATE) 6307 op0 = XEXP (op0, 0); 6308 else if (GET_CODE (op1) == VEC_DUPLICATE) 6309 op1 = XEXP (op1, 0); 6310 6311 if (GET_CODE (op0) == VEC_SELECT) 6312 op0 = XEXP (op0, 0); 6313 else if (GET_CODE (op1) == VEC_SELECT) 6314 op1 = XEXP (op1, 0); 6315 6316 /* If the remaining parameters are not registers, 6317 get the cost to put them into registers. */ 6318 *cost += rtx_cost (op0, FMA, 0, speed); 6319 *cost += rtx_cost (op1, FMA, 1, speed); 6320 *cost += rtx_cost (op2, FMA, 2, speed); 6321 return true; 6322 6323 case FLOAT_EXTEND: 6324 if (speed) 6325 *cost += extra_cost->fp[mode == DFmode].widen; 6326 return false; 6327 6328 case FLOAT_TRUNCATE: 6329 if (speed) 6330 *cost += extra_cost->fp[mode == DFmode].narrow; 6331 return false; 6332 6333 case FIX: 6334 case UNSIGNED_FIX: 6335 x = XEXP (x, 0); 6336 /* Strip the rounding part. They will all be implemented 6337 by the fcvt* family of instructions anyway. */ 6338 if (GET_CODE (x) == UNSPEC) 6339 { 6340 unsigned int uns_code = XINT (x, 1); 6341 6342 if (uns_code == UNSPEC_FRINTA 6343 || uns_code == UNSPEC_FRINTM 6344 || uns_code == UNSPEC_FRINTN 6345 || uns_code == UNSPEC_FRINTP 6346 || uns_code == UNSPEC_FRINTZ) 6347 x = XVECEXP (x, 0, 0); 6348 } 6349 6350 if (speed) 6351 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint; 6352 6353 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed); 6354 return true; 6355 6356 case ABS: 6357 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 6358 { 6359 /* FABS and FNEG are analogous. */ 6360 if (speed) 6361 *cost += extra_cost->fp[mode == DFmode].neg; 6362 } 6363 else 6364 { 6365 /* Integer ABS will either be split to 6366 two arithmetic instructions, or will be an ABS 6367 (scalar), which we don't model. */ 6368 *cost = COSTS_N_INSNS (2); 6369 if (speed) 6370 *cost += 2 * extra_cost->alu.arith; 6371 } 6372 return false; 6373 6374 case SMAX: 6375 case SMIN: 6376 if (speed) 6377 { 6378 /* FMAXNM/FMINNM/FMAX/FMIN. 6379 TODO: This may not be accurate for all implementations, but 6380 we do not model this in the cost tables. */ 6381 *cost += extra_cost->fp[mode == DFmode].addsub; 6382 } 6383 return false; 6384 6385 case UNSPEC: 6386 /* The floating point round to integer frint* instructions. */ 6387 if (aarch64_frint_unspec_p (XINT (x, 1))) 6388 { 6389 if (speed) 6390 *cost += extra_cost->fp[mode == DFmode].roundint; 6391 6392 return false; 6393 } 6394 6395 if (XINT (x, 1) == UNSPEC_RBIT) 6396 { 6397 if (speed) 6398 *cost += extra_cost->alu.rev; 6399 6400 return false; 6401 } 6402 break; 6403 6404 case TRUNCATE: 6405 6406 /* Decompose <su>muldi3_highpart. */ 6407 if (/* (truncate:DI */ 6408 mode == DImode 6409 /* (lshiftrt:TI */ 6410 && GET_MODE (XEXP (x, 0)) == TImode 6411 && GET_CODE (XEXP (x, 0)) == LSHIFTRT 6412 /* (mult:TI */ 6413 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 6414 /* (ANY_EXTEND:TI (reg:DI)) 6415 (ANY_EXTEND:TI (reg:DI))) */ 6416 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND 6417 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND) 6418 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND 6419 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)) 6420 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode 6421 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode 6422 /* (const_int 64) */ 6423 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 6424 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64) 6425 { 6426 /* UMULH/SMULH. */ 6427 if (speed) 6428 *cost += extra_cost->mult[mode == DImode].extend; 6429 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), 6430 MULT, 0, speed); 6431 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), 6432 MULT, 1, speed); 6433 return true; 6434 } 6435 6436 /* Fall through. */ 6437 default: 6438 break; 6439 } 6440 6441 if (dump_file && (dump_flags & TDF_DETAILS)) 6442 fprintf (dump_file, 6443 "\nFailed to cost RTX. Assuming default cost.\n"); 6444 6445 return true; 6446} 6447 6448/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost 6449 calculated for X. This cost is stored in *COST. Returns true 6450 if the total cost of X was calculated. */ 6451static bool 6452aarch64_rtx_costs_wrapper (rtx x, int code, int outer, 6453 int param, int *cost, bool speed) 6454{ 6455 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed); 6456 6457 if (dump_file && (dump_flags & TDF_DETAILS)) 6458 { 6459 print_rtl_single (dump_file, x); 6460 fprintf (dump_file, "\n%s cost: %d (%s)\n", 6461 speed ? "Hot" : "Cold", 6462 *cost, result ? "final" : "partial"); 6463 } 6464 6465 return result; 6466} 6467 6468static int 6469aarch64_register_move_cost (machine_mode mode, 6470 reg_class_t from_i, reg_class_t to_i) 6471{ 6472 enum reg_class from = (enum reg_class) from_i; 6473 enum reg_class to = (enum reg_class) to_i; 6474 const struct cpu_regmove_cost *regmove_cost 6475 = aarch64_tune_params->regmove_cost; 6476 6477 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ 6478 if (to == CALLER_SAVE_REGS || to == POINTER_REGS) 6479 to = GENERAL_REGS; 6480 6481 if (from == CALLER_SAVE_REGS || from == POINTER_REGS) 6482 from = GENERAL_REGS; 6483 6484 /* Moving between GPR and stack cost is the same as GP2GP. */ 6485 if ((from == GENERAL_REGS && to == STACK_REG) 6486 || (to == GENERAL_REGS && from == STACK_REG)) 6487 return regmove_cost->GP2GP; 6488 6489 /* To/From the stack register, we move via the gprs. */ 6490 if (to == STACK_REG || from == STACK_REG) 6491 return aarch64_register_move_cost (mode, from, GENERAL_REGS) 6492 + aarch64_register_move_cost (mode, GENERAL_REGS, to); 6493 6494 if (GET_MODE_SIZE (mode) == 16) 6495 { 6496 /* 128-bit operations on general registers require 2 instructions. */ 6497 if (from == GENERAL_REGS && to == GENERAL_REGS) 6498 return regmove_cost->GP2GP * 2; 6499 else if (from == GENERAL_REGS) 6500 return regmove_cost->GP2FP * 2; 6501 else if (to == GENERAL_REGS) 6502 return regmove_cost->FP2GP * 2; 6503 6504 /* When AdvSIMD instructions are disabled it is not possible to move 6505 a 128-bit value directly between Q registers. This is handled in 6506 secondary reload. A general register is used as a scratch to move 6507 the upper DI value and the lower DI value is moved directly, 6508 hence the cost is the sum of three moves. */ 6509 if (! TARGET_SIMD) 6510 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; 6511 6512 return regmove_cost->FP2FP; 6513 } 6514 6515 if (from == GENERAL_REGS && to == GENERAL_REGS) 6516 return regmove_cost->GP2GP; 6517 else if (from == GENERAL_REGS) 6518 return regmove_cost->GP2FP; 6519 else if (to == GENERAL_REGS) 6520 return regmove_cost->FP2GP; 6521 6522 return regmove_cost->FP2FP; 6523} 6524 6525static int 6526aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 6527 reg_class_t rclass ATTRIBUTE_UNUSED, 6528 bool in ATTRIBUTE_UNUSED) 6529{ 6530 return aarch64_tune_params->memmov_cost; 6531} 6532 6533/* Return the number of instructions that can be issued per cycle. */ 6534static int 6535aarch64_sched_issue_rate (void) 6536{ 6537 return aarch64_tune_params->issue_rate; 6538} 6539 6540static int 6541aarch64_sched_first_cycle_multipass_dfa_lookahead (void) 6542{ 6543 int issue_rate = aarch64_sched_issue_rate (); 6544 6545 return issue_rate > 1 && !sched_fusion ? issue_rate : 0; 6546} 6547 6548/* Vectorizer cost model target hooks. */ 6549 6550/* Implement targetm.vectorize.builtin_vectorization_cost. */ 6551static int 6552aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 6553 tree vectype, 6554 int misalign ATTRIBUTE_UNUSED) 6555{ 6556 unsigned elements; 6557 6558 switch (type_of_cost) 6559 { 6560 case scalar_stmt: 6561 return aarch64_tune_params->vec_costs->scalar_stmt_cost; 6562 6563 case scalar_load: 6564 return aarch64_tune_params->vec_costs->scalar_load_cost; 6565 6566 case scalar_store: 6567 return aarch64_tune_params->vec_costs->scalar_store_cost; 6568 6569 case vector_stmt: 6570 return aarch64_tune_params->vec_costs->vec_stmt_cost; 6571 6572 case vector_load: 6573 return aarch64_tune_params->vec_costs->vec_align_load_cost; 6574 6575 case vector_store: 6576 return aarch64_tune_params->vec_costs->vec_store_cost; 6577 6578 case vec_to_scalar: 6579 return aarch64_tune_params->vec_costs->vec_to_scalar_cost; 6580 6581 case scalar_to_vec: 6582 return aarch64_tune_params->vec_costs->scalar_to_vec_cost; 6583 6584 case unaligned_load: 6585 return aarch64_tune_params->vec_costs->vec_unalign_load_cost; 6586 6587 case unaligned_store: 6588 return aarch64_tune_params->vec_costs->vec_unalign_store_cost; 6589 6590 case cond_branch_taken: 6591 return aarch64_tune_params->vec_costs->cond_taken_branch_cost; 6592 6593 case cond_branch_not_taken: 6594 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost; 6595 6596 case vec_perm: 6597 case vec_promote_demote: 6598 return aarch64_tune_params->vec_costs->vec_stmt_cost; 6599 6600 case vec_construct: 6601 elements = TYPE_VECTOR_SUBPARTS (vectype); 6602 return elements / 2 + 1; 6603 6604 default: 6605 gcc_unreachable (); 6606 } 6607} 6608 6609/* Implement targetm.vectorize.add_stmt_cost. */ 6610static unsigned 6611aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 6612 struct _stmt_vec_info *stmt_info, int misalign, 6613 enum vect_cost_model_location where) 6614{ 6615 unsigned *cost = (unsigned *) data; 6616 unsigned retval = 0; 6617 6618 if (flag_vect_cost_model) 6619 { 6620 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 6621 int stmt_cost = 6622 aarch64_builtin_vectorization_cost (kind, vectype, misalign); 6623 6624 /* Statements in an inner loop relative to the loop being 6625 vectorized are weighted more heavily. The value here is 6626 a function (linear for now) of the loop nest level. */ 6627 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) 6628 { 6629 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); 6630 struct loop *loop = LOOP_VINFO_LOOP (loop_info); 6631 unsigned nest_level = loop_depth (loop); 6632 6633 count *= nest_level; 6634 } 6635 6636 retval = (unsigned) (count * stmt_cost); 6637 cost[where] += retval; 6638 } 6639 6640 return retval; 6641} 6642 6643static void initialize_aarch64_code_model (void); 6644 6645/* Parse the architecture extension string. */ 6646 6647static void 6648aarch64_parse_extension (char *str) 6649{ 6650 /* The extension string is parsed left to right. */ 6651 const struct aarch64_option_extension *opt = NULL; 6652 6653 /* Flag to say whether we are adding or removing an extension. */ 6654 int adding_ext = -1; 6655 6656 while (str != NULL && *str != 0) 6657 { 6658 char *ext; 6659 size_t len; 6660 6661 str++; 6662 ext = strchr (str, '+'); 6663 6664 if (ext != NULL) 6665 len = ext - str; 6666 else 6667 len = strlen (str); 6668 6669 if (len >= 2 && strncmp (str, "no", 2) == 0) 6670 { 6671 adding_ext = 0; 6672 len -= 2; 6673 str += 2; 6674 } 6675 else if (len > 0) 6676 adding_ext = 1; 6677 6678 if (len == 0) 6679 { 6680 error ("missing feature modifier after %qs", adding_ext ? "+" 6681 : "+no"); 6682 return; 6683 } 6684 6685 /* Scan over the extensions table trying to find an exact match. */ 6686 for (opt = all_extensions; opt->name != NULL; opt++) 6687 { 6688 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0) 6689 { 6690 /* Add or remove the extension. */ 6691 if (adding_ext) 6692 aarch64_isa_flags |= opt->flags_on; 6693 else 6694 aarch64_isa_flags &= ~(opt->flags_off); 6695 break; 6696 } 6697 } 6698 6699 if (opt->name == NULL) 6700 { 6701 /* Extension not found in list. */ 6702 error ("unknown feature modifier %qs", str); 6703 return; 6704 } 6705 6706 str = ext; 6707 }; 6708 6709 return; 6710} 6711 6712/* Parse the ARCH string. */ 6713 6714static void 6715aarch64_parse_arch (void) 6716{ 6717 char *ext; 6718 const struct processor *arch; 6719 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1); 6720 size_t len; 6721 6722 strcpy (str, aarch64_arch_string); 6723 6724 ext = strchr (str, '+'); 6725 6726 if (ext != NULL) 6727 len = ext - str; 6728 else 6729 len = strlen (str); 6730 6731 if (len == 0) 6732 { 6733 error ("missing arch name in -march=%qs", str); 6734 return; 6735 } 6736 6737 /* Loop through the list of supported ARCHs to find a match. */ 6738 for (arch = all_architectures; arch->name != NULL; arch++) 6739 { 6740 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0) 6741 { 6742 selected_arch = arch; 6743 aarch64_isa_flags = selected_arch->flags; 6744 6745 if (!selected_cpu) 6746 selected_cpu = &all_cores[selected_arch->core]; 6747 6748 if (ext != NULL) 6749 { 6750 /* ARCH string contains at least one extension. */ 6751 aarch64_parse_extension (ext); 6752 } 6753 6754 if (strcmp (selected_arch->arch, selected_cpu->arch)) 6755 { 6756 warning (0, "switch -mcpu=%s conflicts with -march=%s switch", 6757 selected_cpu->name, selected_arch->name); 6758 } 6759 6760 return; 6761 } 6762 } 6763 6764 /* ARCH name not found in list. */ 6765 error ("unknown value %qs for -march", str); 6766 return; 6767} 6768 6769/* Parse the CPU string. */ 6770 6771static void 6772aarch64_parse_cpu (void) 6773{ 6774 char *ext; 6775 const struct processor *cpu; 6776 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1); 6777 size_t len; 6778 6779 strcpy (str, aarch64_cpu_string); 6780 6781 ext = strchr (str, '+'); 6782 6783 if (ext != NULL) 6784 len = ext - str; 6785 else 6786 len = strlen (str); 6787 6788 if (len == 0) 6789 { 6790 error ("missing cpu name in -mcpu=%qs", str); 6791 return; 6792 } 6793 6794 /* Loop through the list of supported CPUs to find a match. */ 6795 for (cpu = all_cores; cpu->name != NULL; cpu++) 6796 { 6797 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0) 6798 { 6799 selected_cpu = cpu; 6800 aarch64_isa_flags = selected_cpu->flags; 6801 6802 if (ext != NULL) 6803 { 6804 /* CPU string contains at least one extension. */ 6805 aarch64_parse_extension (ext); 6806 } 6807 6808 return; 6809 } 6810 } 6811 6812 /* CPU name not found in list. */ 6813 error ("unknown value %qs for -mcpu", str); 6814 return; 6815} 6816 6817/* Parse the TUNE string. */ 6818 6819static void 6820aarch64_parse_tune (void) 6821{ 6822 const struct processor *cpu; 6823 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1); 6824 strcpy (str, aarch64_tune_string); 6825 6826 /* Loop through the list of supported CPUs to find a match. */ 6827 for (cpu = all_cores; cpu->name != NULL; cpu++) 6828 { 6829 if (strcmp (cpu->name, str) == 0) 6830 { 6831 selected_tune = cpu; 6832 return; 6833 } 6834 } 6835 6836 /* CPU name not found in list. */ 6837 error ("unknown value %qs for -mtune", str); 6838 return; 6839} 6840 6841 6842/* Implement TARGET_OPTION_OVERRIDE. */ 6843 6844static void 6845aarch64_override_options (void) 6846{ 6847 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU. 6848 If either of -march or -mtune is given, they override their 6849 respective component of -mcpu. 6850 6851 So, first parse AARCH64_CPU_STRING, then the others, be careful 6852 with -march as, if -mcpu is not present on the command line, march 6853 must set a sensible default CPU. */ 6854 if (aarch64_cpu_string) 6855 { 6856 aarch64_parse_cpu (); 6857 } 6858 6859 if (aarch64_arch_string) 6860 { 6861 aarch64_parse_arch (); 6862 } 6863 6864 if (aarch64_tune_string) 6865 { 6866 aarch64_parse_tune (); 6867 } 6868 6869#ifndef HAVE_AS_MABI_OPTION 6870 /* The compiler may have been configured with 2.23.* binutils, which does 6871 not have support for ILP32. */ 6872 if (TARGET_ILP32) 6873 error ("Assembler does not support -mabi=ilp32"); 6874#endif 6875 6876 initialize_aarch64_code_model (); 6877 6878 aarch64_build_bitmask_table (); 6879 6880 /* This target defaults to strict volatile bitfields. */ 6881 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) 6882 flag_strict_volatile_bitfields = 1; 6883 6884 /* If the user did not specify a processor, choose the default 6885 one for them. This will be the CPU set during configuration using 6886 --with-cpu, otherwise it is "generic". */ 6887 if (!selected_cpu) 6888 { 6889 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f]; 6890 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6; 6891 } 6892 6893 gcc_assert (selected_cpu); 6894 6895 if (!selected_tune) 6896 selected_tune = selected_cpu; 6897 6898 aarch64_tune_flags = selected_tune->flags; 6899 aarch64_tune = selected_tune->core; 6900 aarch64_tune_params = selected_tune->tune; 6901 aarch64_architecture_version = selected_cpu->architecture_version; 6902 6903 if (aarch64_fix_a53_err835769 == 2) 6904 { 6905#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT 6906 aarch64_fix_a53_err835769 = 1; 6907#else 6908 aarch64_fix_a53_err835769 = 0; 6909#endif 6910 } 6911 6912 aarch64_override_options_after_change (); 6913} 6914 6915/* Implement targetm.override_options_after_change. */ 6916 6917static void 6918aarch64_override_options_after_change (void) 6919{ 6920 /* The logic here is that if we are disabling all frame pointer generation 6921 then we do not need to disable leaf frame pointer generation as a 6922 separate operation. But if we are *only* disabling leaf frame pointer 6923 generation then we set flag_omit_frame_pointer to true, but in 6924 aarch64_frame_pointer_required we return false only for leaf functions. 6925 6926 PR 70044: We have to be careful about being called multiple times for the 6927 same function. Once we have decided to set flag_omit_frame_pointer just 6928 so that we can omit leaf frame pointers, we must then not interpret a 6929 second call as meaning that all frame pointer generation should be 6930 omitted. We do this by setting flag_omit_frame_pointer to a special, 6931 non-zero value. */ 6932 6933 if (flag_omit_frame_pointer == 2) 6934 flag_omit_frame_pointer = 0; 6935 6936 if (flag_omit_frame_pointer) 6937 flag_omit_leaf_frame_pointer = false; 6938 else if (flag_omit_leaf_frame_pointer) 6939 flag_omit_frame_pointer = 2; 6940 6941 /* If not optimizing for size, set the default 6942 alignment to what the target wants */ 6943 if (!optimize_size) 6944 { 6945 if (align_loops <= 0) 6946 align_loops = aarch64_tune_params->loop_align; 6947 if (align_jumps <= 0) 6948 align_jumps = aarch64_tune_params->jump_align; 6949 if (align_functions <= 0) 6950 align_functions = aarch64_tune_params->function_align; 6951 } 6952} 6953 6954static struct machine_function * 6955aarch64_init_machine_status (void) 6956{ 6957 struct machine_function *machine; 6958 machine = ggc_cleared_alloc<machine_function> (); 6959 return machine; 6960} 6961 6962void 6963aarch64_init_expanders (void) 6964{ 6965 init_machine_status = aarch64_init_machine_status; 6966} 6967 6968/* A checking mechanism for the implementation of the various code models. */ 6969static void 6970initialize_aarch64_code_model (void) 6971{ 6972 if (flag_pic) 6973 { 6974 switch (aarch64_cmodel_var) 6975 { 6976 case AARCH64_CMODEL_TINY: 6977 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC; 6978 break; 6979 case AARCH64_CMODEL_SMALL: 6980 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; 6981 break; 6982 case AARCH64_CMODEL_LARGE: 6983 sorry ("code model %qs with -f%s", "large", 6984 flag_pic > 1 ? "PIC" : "pic"); 6985 default: 6986 gcc_unreachable (); 6987 } 6988 } 6989 else 6990 aarch64_cmodel = aarch64_cmodel_var; 6991} 6992 6993/* Return true if SYMBOL_REF X binds locally. */ 6994 6995static bool 6996aarch64_symbol_binds_local_p (const_rtx x) 6997{ 6998 return (SYMBOL_REF_DECL (x) 6999 ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) 7000 : SYMBOL_REF_LOCAL_P (x)); 7001} 7002 7003/* Return true if SYMBOL_REF X is thread local */ 7004static bool 7005aarch64_tls_symbol_p (rtx x) 7006{ 7007 if (! TARGET_HAVE_TLS) 7008 return false; 7009 7010 if (GET_CODE (x) != SYMBOL_REF) 7011 return false; 7012 7013 return SYMBOL_REF_TLS_MODEL (x) != 0; 7014} 7015 7016/* Classify a TLS symbol into one of the TLS kinds. */ 7017enum aarch64_symbol_type 7018aarch64_classify_tls_symbol (rtx x) 7019{ 7020 enum tls_model tls_kind = tls_symbolic_operand_type (x); 7021 7022 switch (tls_kind) 7023 { 7024 case TLS_MODEL_GLOBAL_DYNAMIC: 7025 case TLS_MODEL_LOCAL_DYNAMIC: 7026 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD; 7027 7028 case TLS_MODEL_INITIAL_EXEC: 7029 return SYMBOL_SMALL_GOTTPREL; 7030 7031 case TLS_MODEL_LOCAL_EXEC: 7032 return SYMBOL_SMALL_TPREL; 7033 7034 case TLS_MODEL_EMULATED: 7035 case TLS_MODEL_NONE: 7036 return SYMBOL_FORCE_TO_MEM; 7037 7038 default: 7039 gcc_unreachable (); 7040 } 7041} 7042 7043/* Return the method that should be used to access SYMBOL_REF or 7044 LABEL_REF X in context CONTEXT. */ 7045 7046enum aarch64_symbol_type 7047aarch64_classify_symbol (rtx x, rtx offset, 7048 enum aarch64_symbol_context context ATTRIBUTE_UNUSED) 7049{ 7050 if (GET_CODE (x) == LABEL_REF) 7051 { 7052 switch (aarch64_cmodel) 7053 { 7054 case AARCH64_CMODEL_LARGE: 7055 return SYMBOL_FORCE_TO_MEM; 7056 7057 case AARCH64_CMODEL_TINY_PIC: 7058 case AARCH64_CMODEL_TINY: 7059 return SYMBOL_TINY_ABSOLUTE; 7060 7061 case AARCH64_CMODEL_SMALL_PIC: 7062 case AARCH64_CMODEL_SMALL: 7063 return SYMBOL_SMALL_ABSOLUTE; 7064 7065 default: 7066 gcc_unreachable (); 7067 } 7068 } 7069 7070 if (GET_CODE (x) == SYMBOL_REF) 7071 { 7072 if (aarch64_cmodel == AARCH64_CMODEL_LARGE) 7073 return SYMBOL_FORCE_TO_MEM; 7074 7075 if (aarch64_tls_symbol_p (x)) 7076 return aarch64_classify_tls_symbol (x); 7077 7078 switch (aarch64_cmodel) 7079 { 7080 case AARCH64_CMODEL_TINY: 7081 /* When we retreive symbol + offset address, we have to make sure 7082 the offset does not cause overflow of the final address. But 7083 we have no way of knowing the address of symbol at compile time 7084 so we can't accurately say if the distance between the PC and 7085 symbol + offset is outside the addressible range of +/-1M in the 7086 TINY code model. So we rely on images not being greater than 7087 1M and cap the offset at 1M and anything beyond 1M will have to 7088 be loaded using an alternative mechanism. */ 7089 if (SYMBOL_REF_WEAK (x) 7090 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575) 7091 return SYMBOL_FORCE_TO_MEM; 7092 return SYMBOL_TINY_ABSOLUTE; 7093 7094 case AARCH64_CMODEL_SMALL: 7095 /* Same reasoning as the tiny code model, but the offset cap here is 7096 4G. */ 7097 if (SYMBOL_REF_WEAK (x) 7098 || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263), 7099 HOST_WIDE_INT_C (4294967264))) 7100 return SYMBOL_FORCE_TO_MEM; 7101 return SYMBOL_SMALL_ABSOLUTE; 7102 7103 case AARCH64_CMODEL_TINY_PIC: 7104 if (!aarch64_symbol_binds_local_p (x)) 7105 return SYMBOL_TINY_GOT; 7106 return SYMBOL_TINY_ABSOLUTE; 7107 7108 case AARCH64_CMODEL_SMALL_PIC: 7109 if (!aarch64_symbol_binds_local_p (x)) 7110 return SYMBOL_SMALL_GOT; 7111 return SYMBOL_SMALL_ABSOLUTE; 7112 7113 default: 7114 gcc_unreachable (); 7115 } 7116 } 7117 7118 /* By default push everything into the constant pool. */ 7119 return SYMBOL_FORCE_TO_MEM; 7120} 7121 7122bool 7123aarch64_constant_address_p (rtx x) 7124{ 7125 return (CONSTANT_P (x) && memory_address_p (DImode, x)); 7126} 7127 7128bool 7129aarch64_legitimate_pic_operand_p (rtx x) 7130{ 7131 if (GET_CODE (x) == SYMBOL_REF 7132 || (GET_CODE (x) == CONST 7133 && GET_CODE (XEXP (x, 0)) == PLUS 7134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) 7135 return false; 7136 7137 return true; 7138} 7139 7140/* Return true if X holds either a quarter-precision or 7141 floating-point +0.0 constant. */ 7142static bool 7143aarch64_valid_floating_const (machine_mode mode, rtx x) 7144{ 7145 if (!CONST_DOUBLE_P (x)) 7146 return false; 7147 7148 /* TODO: We could handle moving 0.0 to a TFmode register, 7149 but first we would like to refactor the movtf_aarch64 7150 to be more amicable to split moves properly and 7151 correctly gate on TARGET_SIMD. For now - reject all 7152 constants which are not to SFmode or DFmode registers. */ 7153 if (!(mode == SFmode || mode == DFmode)) 7154 return false; 7155 7156 if (aarch64_float_const_zero_rtx_p (x)) 7157 return true; 7158 return aarch64_float_const_representable_p (x); 7159} 7160 7161static bool 7162aarch64_legitimate_constant_p (machine_mode mode, rtx x) 7163{ 7164 /* Do not allow vector struct mode constants. We could support 7165 0 and -1 easily, but they need support in aarch64-simd.md. */ 7166 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode)) 7167 return false; 7168 7169 /* This could probably go away because 7170 we now decompose CONST_INTs according to expand_mov_immediate. */ 7171 if ((GET_CODE (x) == CONST_VECTOR 7172 && aarch64_simd_valid_immediate (x, mode, false, NULL)) 7173 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x)) 7174 return !targetm.cannot_force_const_mem (mode, x); 7175 7176 if (GET_CODE (x) == HIGH 7177 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 7178 return true; 7179 7180 return aarch64_constant_address_p (x); 7181} 7182 7183rtx 7184aarch64_load_tp (rtx target) 7185{ 7186 if (!target 7187 || GET_MODE (target) != Pmode 7188 || !register_operand (target, Pmode)) 7189 target = gen_reg_rtx (Pmode); 7190 7191 /* Can return in any reg. */ 7192 emit_insn (gen_aarch64_load_tp_hard (target)); 7193 return target; 7194} 7195 7196/* On AAPCS systems, this is the "struct __va_list". */ 7197static GTY(()) tree va_list_type; 7198 7199/* Implement TARGET_BUILD_BUILTIN_VA_LIST. 7200 Return the type to use as __builtin_va_list. 7201 7202 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as: 7203 7204 struct __va_list 7205 { 7206 void *__stack; 7207 void *__gr_top; 7208 void *__vr_top; 7209 int __gr_offs; 7210 int __vr_offs; 7211 }; */ 7212 7213static tree 7214aarch64_build_builtin_va_list (void) 7215{ 7216 tree va_list_name; 7217 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 7218 7219 /* Create the type. */ 7220 va_list_type = lang_hooks.types.make_type (RECORD_TYPE); 7221 /* Give it the required name. */ 7222 va_list_name = build_decl (BUILTINS_LOCATION, 7223 TYPE_DECL, 7224 get_identifier ("__va_list"), 7225 va_list_type); 7226 DECL_ARTIFICIAL (va_list_name) = 1; 7227 TYPE_NAME (va_list_type) = va_list_name; 7228 TYPE_STUB_DECL (va_list_type) = va_list_name; 7229 7230 /* Create the fields. */ 7231 f_stack = build_decl (BUILTINS_LOCATION, 7232 FIELD_DECL, get_identifier ("__stack"), 7233 ptr_type_node); 7234 f_grtop = build_decl (BUILTINS_LOCATION, 7235 FIELD_DECL, get_identifier ("__gr_top"), 7236 ptr_type_node); 7237 f_vrtop = build_decl (BUILTINS_LOCATION, 7238 FIELD_DECL, get_identifier ("__vr_top"), 7239 ptr_type_node); 7240 f_groff = build_decl (BUILTINS_LOCATION, 7241 FIELD_DECL, get_identifier ("__gr_offs"), 7242 integer_type_node); 7243 f_vroff = build_decl (BUILTINS_LOCATION, 7244 FIELD_DECL, get_identifier ("__vr_offs"), 7245 integer_type_node); 7246 7247 DECL_ARTIFICIAL (f_stack) = 1; 7248 DECL_ARTIFICIAL (f_grtop) = 1; 7249 DECL_ARTIFICIAL (f_vrtop) = 1; 7250 DECL_ARTIFICIAL (f_groff) = 1; 7251 DECL_ARTIFICIAL (f_vroff) = 1; 7252 7253 DECL_FIELD_CONTEXT (f_stack) = va_list_type; 7254 DECL_FIELD_CONTEXT (f_grtop) = va_list_type; 7255 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type; 7256 DECL_FIELD_CONTEXT (f_groff) = va_list_type; 7257 DECL_FIELD_CONTEXT (f_vroff) = va_list_type; 7258 7259 TYPE_FIELDS (va_list_type) = f_stack; 7260 DECL_CHAIN (f_stack) = f_grtop; 7261 DECL_CHAIN (f_grtop) = f_vrtop; 7262 DECL_CHAIN (f_vrtop) = f_groff; 7263 DECL_CHAIN (f_groff) = f_vroff; 7264 7265 /* Compute its layout. */ 7266 layout_type (va_list_type); 7267 7268 return va_list_type; 7269} 7270 7271/* Implement TARGET_EXPAND_BUILTIN_VA_START. */ 7272static void 7273aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) 7274{ 7275 const CUMULATIVE_ARGS *cum; 7276 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 7277 tree stack, grtop, vrtop, groff, vroff; 7278 tree t; 7279 int gr_save_area_size; 7280 int vr_save_area_size; 7281 int vr_offset; 7282 7283 cum = &crtl->args.info; 7284 gr_save_area_size 7285 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD; 7286 vr_save_area_size 7287 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG; 7288 7289 if (!TARGET_FLOAT) 7290 { 7291 if (cum->aapcs_nvrn > 0) 7292 sorry ("%qs and floating point or vector arguments", 7293 "-mgeneral-regs-only"); 7294 vr_save_area_size = 0; 7295 } 7296 7297 f_stack = TYPE_FIELDS (va_list_type_node); 7298 f_grtop = DECL_CHAIN (f_stack); 7299 f_vrtop = DECL_CHAIN (f_grtop); 7300 f_groff = DECL_CHAIN (f_vrtop); 7301 f_vroff = DECL_CHAIN (f_groff); 7302 7303 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack, 7304 NULL_TREE); 7305 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop, 7306 NULL_TREE); 7307 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop, 7308 NULL_TREE); 7309 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff, 7310 NULL_TREE); 7311 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff, 7312 NULL_TREE); 7313 7314 /* Emit code to initialize STACK, which points to the next varargs stack 7315 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used 7316 by named arguments. STACK is 8-byte aligned. */ 7317 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx); 7318 if (cum->aapcs_stack_size > 0) 7319 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD); 7320 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t); 7321 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7322 7323 /* Emit code to initialize GRTOP, the top of the GR save area. 7324 virtual_incoming_args_rtx should have been 16 byte aligned. */ 7325 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx); 7326 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t); 7327 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7328 7329 /* Emit code to initialize VRTOP, the top of the VR save area. 7330 This address is gr_save_area_bytes below GRTOP, rounded 7331 down to the next 16-byte boundary. */ 7332 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx); 7333 vr_offset = AARCH64_ROUND_UP (gr_save_area_size, 7334 STACK_BOUNDARY / BITS_PER_UNIT); 7335 7336 if (vr_offset) 7337 t = fold_build_pointer_plus_hwi (t, -vr_offset); 7338 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t); 7339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7340 7341 /* Emit code to initialize GROFF, the offset from GRTOP of the 7342 next GPR argument. */ 7343 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff, 7344 build_int_cst (TREE_TYPE (groff), -gr_save_area_size)); 7345 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7346 7347 /* Likewise emit code to initialize VROFF, the offset from FTOP 7348 of the next VR argument. */ 7349 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff, 7350 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size)); 7351 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 7352} 7353 7354/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ 7355 7356static tree 7357aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, 7358 gimple_seq *post_p ATTRIBUTE_UNUSED) 7359{ 7360 tree addr; 7361 bool indirect_p; 7362 bool is_ha; /* is HFA or HVA. */ 7363 bool dw_align; /* double-word align. */ 7364 machine_mode ag_mode = VOIDmode; 7365 int nregs; 7366 machine_mode mode; 7367 7368 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; 7369 tree stack, f_top, f_off, off, arg, roundup, on_stack; 7370 HOST_WIDE_INT size, rsize, adjust, align; 7371 tree t, u, cond1, cond2; 7372 7373 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 7374 if (indirect_p) 7375 type = build_pointer_type (type); 7376 7377 mode = TYPE_MODE (type); 7378 7379 f_stack = TYPE_FIELDS (va_list_type_node); 7380 f_grtop = DECL_CHAIN (f_stack); 7381 f_vrtop = DECL_CHAIN (f_grtop); 7382 f_groff = DECL_CHAIN (f_vrtop); 7383 f_vroff = DECL_CHAIN (f_groff); 7384 7385 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist), 7386 f_stack, NULL_TREE); 7387 size = int_size_in_bytes (type); 7388 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT; 7389 7390 dw_align = false; 7391 adjust = 0; 7392 if (aarch64_vfp_is_call_or_return_candidate (mode, 7393 type, 7394 &ag_mode, 7395 &nregs, 7396 &is_ha)) 7397 { 7398 /* TYPE passed in fp/simd registers. */ 7399 if (!TARGET_FLOAT) 7400 sorry ("%qs and floating point or vector arguments", 7401 "-mgeneral-regs-only"); 7402 7403 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), 7404 unshare_expr (valist), f_vrtop, NULL_TREE); 7405 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), 7406 unshare_expr (valist), f_vroff, NULL_TREE); 7407 7408 rsize = nregs * UNITS_PER_VREG; 7409 7410 if (is_ha) 7411 { 7412 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) 7413 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); 7414 } 7415 else if (BLOCK_REG_PADDING (mode, type, 1) == downward 7416 && size < UNITS_PER_VREG) 7417 { 7418 adjust = UNITS_PER_VREG - size; 7419 } 7420 } 7421 else 7422 { 7423 /* TYPE passed in general registers. */ 7424 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), 7425 unshare_expr (valist), f_grtop, NULL_TREE); 7426 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff), 7427 unshare_expr (valist), f_groff, NULL_TREE); 7428 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 7429 nregs = rsize / UNITS_PER_WORD; 7430 7431 if (align > 8) 7432 dw_align = true; 7433 7434 if (BLOCK_REG_PADDING (mode, type, 1) == downward 7435 && size < UNITS_PER_WORD) 7436 { 7437 adjust = UNITS_PER_WORD - size; 7438 } 7439 } 7440 7441 /* Get a local temporary for the field value. */ 7442 off = get_initialized_tmp_var (f_off, pre_p, NULL); 7443 7444 /* Emit code to branch if off >= 0. */ 7445 t = build2 (GE_EXPR, boolean_type_node, off, 7446 build_int_cst (TREE_TYPE (off), 0)); 7447 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE); 7448 7449 if (dw_align) 7450 { 7451 /* Emit: offs = (offs + 15) & -16. */ 7452 t = build2 (PLUS_EXPR, TREE_TYPE (off), off, 7453 build_int_cst (TREE_TYPE (off), 15)); 7454 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t, 7455 build_int_cst (TREE_TYPE (off), -16)); 7456 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t); 7457 } 7458 else 7459 roundup = NULL; 7460 7461 /* Update ap.__[g|v]r_offs */ 7462 t = build2 (PLUS_EXPR, TREE_TYPE (off), off, 7463 build_int_cst (TREE_TYPE (off), rsize)); 7464 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t); 7465 7466 /* String up. */ 7467 if (roundup) 7468 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); 7469 7470 /* [cond2] if (ap.__[g|v]r_offs > 0) */ 7471 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off), 7472 build_int_cst (TREE_TYPE (f_off), 0)); 7473 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE); 7474 7475 /* String up: make sure the assignment happens before the use. */ 7476 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2); 7477 COND_EXPR_ELSE (cond1) = t; 7478 7479 /* Prepare the trees handling the argument that is passed on the stack; 7480 the top level node will store in ON_STACK. */ 7481 arg = get_initialized_tmp_var (stack, pre_p, NULL); 7482 if (align > 8) 7483 { 7484 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */ 7485 t = fold_convert (intDI_type_node, arg); 7486 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 7487 build_int_cst (TREE_TYPE (t), 15)); 7488 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 7489 build_int_cst (TREE_TYPE (t), -16)); 7490 t = fold_convert (TREE_TYPE (arg), t); 7491 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t); 7492 } 7493 else 7494 roundup = NULL; 7495 /* Advance ap.__stack */ 7496 t = fold_convert (intDI_type_node, arg); 7497 t = build2 (PLUS_EXPR, TREE_TYPE (t), t, 7498 build_int_cst (TREE_TYPE (t), size + 7)); 7499 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 7500 build_int_cst (TREE_TYPE (t), -8)); 7501 t = fold_convert (TREE_TYPE (arg), t); 7502 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t); 7503 /* String up roundup and advance. */ 7504 if (roundup) 7505 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t); 7506 /* String up with arg */ 7507 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg); 7508 /* Big-endianness related address adjustment. */ 7509 if (BLOCK_REG_PADDING (mode, type, 1) == downward 7510 && size < UNITS_PER_WORD) 7511 { 7512 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg, 7513 size_int (UNITS_PER_WORD - size)); 7514 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t); 7515 } 7516 7517 COND_EXPR_THEN (cond1) = unshare_expr (on_stack); 7518 COND_EXPR_THEN (cond2) = unshare_expr (on_stack); 7519 7520 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */ 7521 t = off; 7522 if (adjust) 7523 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off, 7524 build_int_cst (TREE_TYPE (off), adjust)); 7525 7526 t = fold_convert (sizetype, t); 7527 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t); 7528 7529 if (is_ha) 7530 { 7531 /* type ha; // treat as "struct {ftype field[n];}" 7532 ... [computing offs] 7533 for (i = 0; i <nregs; ++i, offs += 16) 7534 ha.field[i] = *((ftype *)(ap.__vr_top + offs)); 7535 return ha; */ 7536 int i; 7537 tree tmp_ha, field_t, field_ptr_t; 7538 7539 /* Declare a local variable. */ 7540 tmp_ha = create_tmp_var_raw (type, "ha"); 7541 gimple_add_tmp_var (tmp_ha); 7542 7543 /* Establish the base type. */ 7544 switch (ag_mode) 7545 { 7546 case SFmode: 7547 field_t = float_type_node; 7548 field_ptr_t = float_ptr_type_node; 7549 break; 7550 case DFmode: 7551 field_t = double_type_node; 7552 field_ptr_t = double_ptr_type_node; 7553 break; 7554 case TFmode: 7555 field_t = long_double_type_node; 7556 field_ptr_t = long_double_ptr_type_node; 7557 break; 7558/* The half precision and quad precision are not fully supported yet. Enable 7559 the following code after the support is complete. Need to find the correct 7560 type node for __fp16 *. */ 7561#if 0 7562 case HFmode: 7563 field_t = float_type_node; 7564 field_ptr_t = float_ptr_type_node; 7565 break; 7566#endif 7567 case V2SImode: 7568 case V4SImode: 7569 { 7570 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode)); 7571 field_t = build_vector_type_for_mode (innertype, ag_mode); 7572 field_ptr_t = build_pointer_type (field_t); 7573 } 7574 break; 7575 default: 7576 gcc_assert (0); 7577 } 7578 7579 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */ 7580 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha); 7581 addr = t; 7582 t = fold_convert (field_ptr_t, addr); 7583 t = build2 (MODIFY_EXPR, field_t, 7584 build1 (INDIRECT_REF, field_t, tmp_ha), 7585 build1 (INDIRECT_REF, field_t, t)); 7586 7587 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */ 7588 for (i = 1; i < nregs; ++i) 7589 { 7590 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG); 7591 u = fold_convert (field_ptr_t, addr); 7592 u = build2 (MODIFY_EXPR, field_t, 7593 build2 (MEM_REF, field_t, tmp_ha, 7594 build_int_cst (field_ptr_t, 7595 (i * 7596 int_size_in_bytes (field_t)))), 7597 build1 (INDIRECT_REF, field_t, u)); 7598 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u); 7599 } 7600 7601 u = fold_convert (TREE_TYPE (f_top), tmp_ha); 7602 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u); 7603 } 7604 7605 COND_EXPR_ELSE (cond2) = t; 7606 addr = fold_convert (build_pointer_type (type), cond1); 7607 addr = build_va_arg_indirect_ref (addr); 7608 7609 if (indirect_p) 7610 addr = build_va_arg_indirect_ref (addr); 7611 7612 return addr; 7613} 7614 7615/* Implement TARGET_SETUP_INCOMING_VARARGS. */ 7616 7617static void 7618aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, 7619 tree type, int *pretend_size ATTRIBUTE_UNUSED, 7620 int no_rtl) 7621{ 7622 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7623 CUMULATIVE_ARGS local_cum; 7624 int gr_saved, vr_saved; 7625 7626 /* The caller has advanced CUM up to, but not beyond, the last named 7627 argument. Advance a local copy of CUM past the last "real" named 7628 argument, to find out how many registers are left over. */ 7629 local_cum = *cum; 7630 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); 7631 7632 /* Found out how many registers we need to save. */ 7633 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn; 7634 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn; 7635 7636 if (!TARGET_FLOAT) 7637 { 7638 if (local_cum.aapcs_nvrn > 0) 7639 sorry ("%qs and floating point or vector arguments", 7640 "-mgeneral-regs-only"); 7641 vr_saved = 0; 7642 } 7643 7644 if (!no_rtl) 7645 { 7646 if (gr_saved > 0) 7647 { 7648 rtx ptr, mem; 7649 7650 /* virtual_incoming_args_rtx should have been 16-byte aligned. */ 7651 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, 7652 - gr_saved * UNITS_PER_WORD); 7653 mem = gen_frame_mem (BLKmode, ptr); 7654 set_mem_alias_set (mem, get_varargs_alias_set ()); 7655 7656 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM, 7657 mem, gr_saved); 7658 } 7659 if (vr_saved > 0) 7660 { 7661 /* We can't use move_block_from_reg, because it will use 7662 the wrong mode, storing D regs only. */ 7663 machine_mode mode = TImode; 7664 int off, i; 7665 7666 /* Set OFF to the offset from virtual_incoming_args_rtx of 7667 the first vector register. The VR save area lies below 7668 the GR one, and is aligned to 16 bytes. */ 7669 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, 7670 STACK_BOUNDARY / BITS_PER_UNIT); 7671 off -= vr_saved * UNITS_PER_VREG; 7672 7673 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i) 7674 { 7675 rtx ptr, mem; 7676 7677 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off); 7678 mem = gen_frame_mem (mode, ptr); 7679 set_mem_alias_set (mem, get_varargs_alias_set ()); 7680 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i)); 7681 off += UNITS_PER_VREG; 7682 } 7683 } 7684 } 7685 7686 /* We don't save the size into *PRETEND_SIZE because we want to avoid 7687 any complication of having crtl->args.pretend_args_size changed. */ 7688 cfun->machine->frame.saved_varargs_size 7689 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD, 7690 STACK_BOUNDARY / BITS_PER_UNIT) 7691 + vr_saved * UNITS_PER_VREG); 7692} 7693 7694static void 7695aarch64_conditional_register_usage (void) 7696{ 7697 int i; 7698 if (!TARGET_FLOAT) 7699 { 7700 for (i = V0_REGNUM; i <= V31_REGNUM; i++) 7701 { 7702 fixed_regs[i] = 1; 7703 call_used_regs[i] = 1; 7704 } 7705 } 7706} 7707 7708/* Walk down the type tree of TYPE counting consecutive base elements. 7709 If *MODEP is VOIDmode, then set it to the first valid floating point 7710 type. If a non-floating point type is found, or if a floating point 7711 type that doesn't match a non-VOIDmode *MODEP is found, then return -1, 7712 otherwise return the count in the sub-tree. */ 7713static int 7714aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) 7715{ 7716 machine_mode mode; 7717 HOST_WIDE_INT size; 7718 7719 switch (TREE_CODE (type)) 7720 { 7721 case REAL_TYPE: 7722 mode = TYPE_MODE (type); 7723 if (mode != DFmode && mode != SFmode && mode != TFmode) 7724 return -1; 7725 7726 if (*modep == VOIDmode) 7727 *modep = mode; 7728 7729 if (*modep == mode) 7730 return 1; 7731 7732 break; 7733 7734 case COMPLEX_TYPE: 7735 mode = TYPE_MODE (TREE_TYPE (type)); 7736 if (mode != DFmode && mode != SFmode && mode != TFmode) 7737 return -1; 7738 7739 if (*modep == VOIDmode) 7740 *modep = mode; 7741 7742 if (*modep == mode) 7743 return 2; 7744 7745 break; 7746 7747 case VECTOR_TYPE: 7748 /* Use V2SImode and V4SImode as representatives of all 64-bit 7749 and 128-bit vector types. */ 7750 size = int_size_in_bytes (type); 7751 switch (size) 7752 { 7753 case 8: 7754 mode = V2SImode; 7755 break; 7756 case 16: 7757 mode = V4SImode; 7758 break; 7759 default: 7760 return -1; 7761 } 7762 7763 if (*modep == VOIDmode) 7764 *modep = mode; 7765 7766 /* Vector modes are considered to be opaque: two vectors are 7767 equivalent for the purposes of being homogeneous aggregates 7768 if they are the same size. */ 7769 if (*modep == mode) 7770 return 1; 7771 7772 break; 7773 7774 case ARRAY_TYPE: 7775 { 7776 int count; 7777 tree index = TYPE_DOMAIN (type); 7778 7779 /* Can't handle incomplete types nor sizes that are not 7780 fixed. */ 7781 if (!COMPLETE_TYPE_P (type) 7782 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 7783 return -1; 7784 7785 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); 7786 if (count == -1 7787 || !index 7788 || !TYPE_MAX_VALUE (index) 7789 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) 7790 || !TYPE_MIN_VALUE (index) 7791 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) 7792 || count < 0) 7793 return -1; 7794 7795 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) 7796 - tree_to_uhwi (TYPE_MIN_VALUE (index))); 7797 7798 /* There must be no padding. */ 7799 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) 7800 return -1; 7801 7802 return count; 7803 } 7804 7805 case RECORD_TYPE: 7806 { 7807 int count = 0; 7808 int sub_count; 7809 tree field; 7810 7811 /* Can't handle incomplete types nor sizes that are not 7812 fixed. */ 7813 if (!COMPLETE_TYPE_P (type) 7814 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 7815 return -1; 7816 7817 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 7818 { 7819 if (TREE_CODE (field) != FIELD_DECL) 7820 continue; 7821 7822 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 7823 if (sub_count < 0) 7824 return -1; 7825 count += sub_count; 7826 } 7827 7828 /* There must be no padding. */ 7829 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) 7830 return -1; 7831 7832 return count; 7833 } 7834 7835 case UNION_TYPE: 7836 case QUAL_UNION_TYPE: 7837 { 7838 /* These aren't very interesting except in a degenerate case. */ 7839 int count = 0; 7840 int sub_count; 7841 tree field; 7842 7843 /* Can't handle incomplete types nor sizes that are not 7844 fixed. */ 7845 if (!COMPLETE_TYPE_P (type) 7846 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) 7847 return -1; 7848 7849 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 7850 { 7851 if (TREE_CODE (field) != FIELD_DECL) 7852 continue; 7853 7854 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 7855 if (sub_count < 0) 7856 return -1; 7857 count = count > sub_count ? count : sub_count; 7858 } 7859 7860 /* There must be no padding. */ 7861 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) 7862 return -1; 7863 7864 return count; 7865 } 7866 7867 default: 7868 break; 7869 } 7870 7871 return -1; 7872} 7873 7874/* Return TRUE if the type, as described by TYPE and MODE, is a composite 7875 type as described in AAPCS64 \S 4.3. This includes aggregate, union and 7876 array types. The C99 floating-point complex types are also considered 7877 as composite types, according to AAPCS64 \S 7.1.1. The complex integer 7878 types, which are GCC extensions and out of the scope of AAPCS64, are 7879 treated as composite types here as well. 7880 7881 Note that MODE itself is not sufficient in determining whether a type 7882 is such a composite type or not. This is because 7883 stor-layout.c:compute_record_mode may have already changed the MODE 7884 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a 7885 structure with only one field may have its MODE set to the mode of the 7886 field. Also an integer mode whose size matches the size of the 7887 RECORD_TYPE type may be used to substitute the original mode 7888 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be 7889 solely relied on. */ 7890 7891static bool 7892aarch64_composite_type_p (const_tree type, 7893 machine_mode mode) 7894{ 7895 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)) 7896 return true; 7897 7898 if (mode == BLKmode 7899 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 7900 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 7901 return true; 7902 7903 return false; 7904} 7905 7906/* Return TRUE if the type, as described by TYPE and MODE, is a short vector 7907 type as described in AAPCS64 \S 4.1.2. 7908 7909 See the comment above aarch64_composite_type_p for the notes on MODE. */ 7910 7911static bool 7912aarch64_short_vector_p (const_tree type, 7913 machine_mode mode) 7914{ 7915 HOST_WIDE_INT size = -1; 7916 7917 if (type && TREE_CODE (type) == VECTOR_TYPE) 7918 size = int_size_in_bytes (type); 7919 else if (!aarch64_composite_type_p (type, mode) 7920 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT 7921 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)) 7922 size = GET_MODE_SIZE (mode); 7923 7924 return (size == 8 || size == 16) ? true : false; 7925} 7926 7927/* Return TRUE if an argument, whose type is described by TYPE and MODE, 7928 shall be passed or returned in simd/fp register(s) (providing these 7929 parameter passing registers are available). 7930 7931 Upon successful return, *COUNT returns the number of needed registers, 7932 *BASE_MODE returns the mode of the individual register and when IS_HAF 7933 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous 7934 floating-point aggregate or a homogeneous short-vector aggregate. */ 7935 7936static bool 7937aarch64_vfp_is_call_or_return_candidate (machine_mode mode, 7938 const_tree type, 7939 machine_mode *base_mode, 7940 int *count, 7941 bool *is_ha) 7942{ 7943 machine_mode new_mode = VOIDmode; 7944 bool composite_p = aarch64_composite_type_p (type, mode); 7945 7946 if (is_ha != NULL) *is_ha = false; 7947 7948 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) 7949 || aarch64_short_vector_p (type, mode)) 7950 { 7951 *count = 1; 7952 new_mode = mode; 7953 } 7954 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 7955 { 7956 if (is_ha != NULL) *is_ha = true; 7957 *count = 2; 7958 new_mode = GET_MODE_INNER (mode); 7959 } 7960 else if (type && composite_p) 7961 { 7962 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); 7963 7964 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS) 7965 { 7966 if (is_ha != NULL) *is_ha = true; 7967 *count = ag_count; 7968 } 7969 else 7970 return false; 7971 } 7972 else 7973 return false; 7974 7975 *base_mode = new_mode; 7976 return true; 7977} 7978 7979/* Implement TARGET_STRUCT_VALUE_RTX. */ 7980 7981static rtx 7982aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED, 7983 int incoming ATTRIBUTE_UNUSED) 7984{ 7985 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM); 7986} 7987 7988/* Implements target hook vector_mode_supported_p. */ 7989static bool 7990aarch64_vector_mode_supported_p (machine_mode mode) 7991{ 7992 if (TARGET_SIMD 7993 && (mode == V4SImode || mode == V8HImode 7994 || mode == V16QImode || mode == V2DImode 7995 || mode == V2SImode || mode == V4HImode 7996 || mode == V8QImode || mode == V2SFmode 7997 || mode == V4SFmode || mode == V2DFmode 7998 || mode == V1DFmode)) 7999 return true; 8000 8001 return false; 8002} 8003 8004/* Return appropriate SIMD container 8005 for MODE within a vector of WIDTH bits. */ 8006static machine_mode 8007aarch64_simd_container_mode (machine_mode mode, unsigned width) 8008{ 8009 gcc_assert (width == 64 || width == 128); 8010 if (TARGET_SIMD) 8011 { 8012 if (width == 128) 8013 switch (mode) 8014 { 8015 case DFmode: 8016 return V2DFmode; 8017 case SFmode: 8018 return V4SFmode; 8019 case SImode: 8020 return V4SImode; 8021 case HImode: 8022 return V8HImode; 8023 case QImode: 8024 return V16QImode; 8025 case DImode: 8026 return V2DImode; 8027 default: 8028 break; 8029 } 8030 else 8031 switch (mode) 8032 { 8033 case SFmode: 8034 return V2SFmode; 8035 case SImode: 8036 return V2SImode; 8037 case HImode: 8038 return V4HImode; 8039 case QImode: 8040 return V8QImode; 8041 default: 8042 break; 8043 } 8044 } 8045 return word_mode; 8046} 8047 8048/* Return 128-bit container as the preferred SIMD mode for MODE. */ 8049static machine_mode 8050aarch64_preferred_simd_mode (machine_mode mode) 8051{ 8052 return aarch64_simd_container_mode (mode, 128); 8053} 8054 8055/* Return the bitmask of possible vector sizes for the vectorizer 8056 to iterate over. */ 8057static unsigned int 8058aarch64_autovectorize_vector_sizes (void) 8059{ 8060 return (16 | 8); 8061} 8062 8063/* Implement TARGET_MANGLE_TYPE. */ 8064 8065static const char * 8066aarch64_mangle_type (const_tree type) 8067{ 8068 /* The AArch64 ABI documents say that "__va_list" has to be 8069 managled as if it is in the "std" namespace. */ 8070 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) 8071 return "St9__va_list"; 8072 8073 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for 8074 builtin types. */ 8075 if (TYPE_NAME (type) != NULL) 8076 return aarch64_mangle_builtin_type (type); 8077 8078 /* Use the default mangling. */ 8079 return NULL; 8080} 8081 8082 8083/* Return true if the rtx_insn contains a MEM RTX somewhere 8084 in it. */ 8085 8086static bool 8087has_memory_op (rtx_insn *mem_insn) 8088{ 8089 subrtx_iterator::array_type array; 8090 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL) 8091 if (MEM_P (*iter)) 8092 return true; 8093 8094 return false; 8095} 8096 8097/* Find the first rtx_insn before insn that will generate an assembly 8098 instruction. */ 8099 8100static rtx_insn * 8101aarch64_prev_real_insn (rtx_insn *insn) 8102{ 8103 if (!insn) 8104 return NULL; 8105 8106 do 8107 { 8108 insn = prev_real_insn (insn); 8109 } 8110 while (insn && recog_memoized (insn) < 0); 8111 8112 return insn; 8113} 8114 8115static bool 8116is_madd_op (enum attr_type t1) 8117{ 8118 unsigned int i; 8119 /* A number of these may be AArch32 only. */ 8120 enum attr_type mlatypes[] = { 8121 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD, 8122 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY, 8123 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD 8124 }; 8125 8126 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++) 8127 { 8128 if (t1 == mlatypes[i]) 8129 return true; 8130 } 8131 8132 return false; 8133} 8134 8135/* Check if there is a register dependency between a load and the insn 8136 for which we hold recog_data. */ 8137 8138static bool 8139dep_between_memop_and_curr (rtx memop) 8140{ 8141 rtx load_reg; 8142 int opno; 8143 8144 gcc_assert (GET_CODE (memop) == SET); 8145 8146 if (!REG_P (SET_DEST (memop))) 8147 return false; 8148 8149 load_reg = SET_DEST (memop); 8150 for (opno = 1; opno < recog_data.n_operands; opno++) 8151 { 8152 rtx operand = recog_data.operand[opno]; 8153 if (REG_P (operand) 8154 && reg_overlap_mentioned_p (load_reg, operand)) 8155 return true; 8156 8157 } 8158 return false; 8159} 8160 8161 8162/* When working around the Cortex-A53 erratum 835769, 8163 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate 8164 instruction and has a preceding memory instruction such that a NOP 8165 should be inserted between them. */ 8166 8167bool 8168aarch64_madd_needs_nop (rtx_insn* insn) 8169{ 8170 enum attr_type attr_type; 8171 rtx_insn *prev; 8172 rtx body; 8173 8174 if (!aarch64_fix_a53_err835769) 8175 return false; 8176 8177 if (!INSN_P (insn) || recog_memoized (insn) < 0) 8178 return false; 8179 8180 attr_type = get_attr_type (insn); 8181 if (!is_madd_op (attr_type)) 8182 return false; 8183 8184 prev = aarch64_prev_real_insn (insn); 8185 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN. 8186 Restore recog state to INSN to avoid state corruption. */ 8187 extract_constrain_insn_cached (insn); 8188 8189 if (!prev || !has_memory_op (prev)) 8190 return false; 8191 8192 body = single_set (prev); 8193 8194 /* If the previous insn is a memory op and there is no dependency between 8195 it and the DImode madd, emit a NOP between them. If body is NULL then we 8196 have a complex memory operation, probably a load/store pair. 8197 Be conservative for now and emit a NOP. */ 8198 if (GET_MODE (recog_data.operand[0]) == DImode 8199 && (!body || !dep_between_memop_and_curr (body))) 8200 return true; 8201 8202 return false; 8203 8204} 8205 8206 8207/* Implement FINAL_PRESCAN_INSN. */ 8208 8209void 8210aarch64_final_prescan_insn (rtx_insn *insn) 8211{ 8212 if (aarch64_madd_needs_nop (insn)) 8213 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n"); 8214} 8215 8216 8217/* Return the equivalent letter for size. */ 8218static char 8219sizetochar (int size) 8220{ 8221 switch (size) 8222 { 8223 case 64: return 'd'; 8224 case 32: return 's'; 8225 case 16: return 'h'; 8226 case 8 : return 'b'; 8227 default: gcc_unreachable (); 8228 } 8229} 8230 8231/* Return true iff x is a uniform vector of floating-point 8232 constants, and the constant can be represented in 8233 quarter-precision form. Note, as aarch64_float_const_representable 8234 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */ 8235static bool 8236aarch64_vect_float_const_representable_p (rtx x) 8237{ 8238 int i = 0; 8239 REAL_VALUE_TYPE r0, ri; 8240 rtx x0, xi; 8241 8242 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) 8243 return false; 8244 8245 x0 = CONST_VECTOR_ELT (x, 0); 8246 if (!CONST_DOUBLE_P (x0)) 8247 return false; 8248 8249 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0); 8250 8251 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++) 8252 { 8253 xi = CONST_VECTOR_ELT (x, i); 8254 if (!CONST_DOUBLE_P (xi)) 8255 return false; 8256 8257 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi); 8258 if (!REAL_VALUES_EQUAL (r0, ri)) 8259 return false; 8260 } 8261 8262 return aarch64_float_const_representable_p (x0); 8263} 8264 8265/* Return true for valid and false for invalid. */ 8266bool 8267aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse, 8268 struct simd_immediate_info *info) 8269{ 8270#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ 8271 matches = 1; \ 8272 for (i = 0; i < idx; i += (STRIDE)) \ 8273 if (!(TEST)) \ 8274 matches = 0; \ 8275 if (matches) \ 8276 { \ 8277 immtype = (CLASS); \ 8278 elsize = (ELSIZE); \ 8279 eshift = (SHIFT); \ 8280 emvn = (NEG); \ 8281 break; \ 8282 } 8283 8284 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op); 8285 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); 8286 unsigned char bytes[16]; 8287 int immtype = -1, matches; 8288 unsigned int invmask = inverse ? 0xff : 0; 8289 int eshift, emvn; 8290 8291 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) 8292 { 8293 if (! (aarch64_simd_imm_zero_p (op, mode) 8294 || aarch64_vect_float_const_representable_p (op))) 8295 return false; 8296 8297 if (info) 8298 { 8299 info->value = CONST_VECTOR_ELT (op, 0); 8300 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value)); 8301 info->mvn = false; 8302 info->shift = 0; 8303 } 8304 8305 return true; 8306 } 8307 8308 /* Splat vector constant out into a byte vector. */ 8309 for (i = 0; i < n_elts; i++) 8310 { 8311 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be, 8312 it must be laid out in the vector register in reverse order. */ 8313 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i); 8314 unsigned HOST_WIDE_INT elpart; 8315 unsigned int part, parts; 8316 8317 if (CONST_INT_P (el)) 8318 { 8319 elpart = INTVAL (el); 8320 parts = 1; 8321 } 8322 else if (GET_CODE (el) == CONST_DOUBLE) 8323 { 8324 elpart = CONST_DOUBLE_LOW (el); 8325 parts = 2; 8326 } 8327 else 8328 gcc_unreachable (); 8329 8330 for (part = 0; part < parts; part++) 8331 { 8332 unsigned int byte; 8333 for (byte = 0; byte < innersize; byte++) 8334 { 8335 bytes[idx++] = (elpart & 0xff) ^ invmask; 8336 elpart >>= BITS_PER_UNIT; 8337 } 8338 if (GET_CODE (el) == CONST_DOUBLE) 8339 elpart = CONST_DOUBLE_HIGH (el); 8340 } 8341 } 8342 8343 /* Sanity check. */ 8344 gcc_assert (idx == GET_MODE_SIZE (mode)); 8345 8346 do 8347 { 8348 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 8349 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); 8350 8351 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] 8352 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); 8353 8354 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 8355 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); 8356 8357 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 8358 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); 8359 8360 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); 8361 8362 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); 8363 8364 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff 8365 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); 8366 8367 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 8368 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); 8369 8370 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff 8371 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); 8372 8373 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff 8374 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); 8375 8376 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); 8377 8378 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); 8379 8380 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] 8381 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); 8382 8383 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] 8384 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); 8385 8386 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff 8387 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); 8388 8389 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 8390 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); 8391 8392 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); 8393 8394 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) 8395 && bytes[i] == bytes[(i + 8) % idx], 0, 0); 8396 } 8397 while (0); 8398 8399 if (immtype == -1) 8400 return false; 8401 8402 if (info) 8403 { 8404 info->element_width = elsize; 8405 info->mvn = emvn != 0; 8406 info->shift = eshift; 8407 8408 unsigned HOST_WIDE_INT imm = 0; 8409 8410 if (immtype >= 12 && immtype <= 15) 8411 info->msl = true; 8412 8413 /* Un-invert bytes of recognized vector, if necessary. */ 8414 if (invmask != 0) 8415 for (i = 0; i < idx; i++) 8416 bytes[i] ^= invmask; 8417 8418 if (immtype == 17) 8419 { 8420 /* FIXME: Broken on 32-bit H_W_I hosts. */ 8421 gcc_assert (sizeof (HOST_WIDE_INT) == 8); 8422 8423 for (i = 0; i < 8; i++) 8424 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) 8425 << (i * BITS_PER_UNIT); 8426 8427 8428 info->value = GEN_INT (imm); 8429 } 8430 else 8431 { 8432 for (i = 0; i < elsize / BITS_PER_UNIT; i++) 8433 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); 8434 8435 /* Construct 'abcdefgh' because the assembler cannot handle 8436 generic constants. */ 8437 if (info->mvn) 8438 imm = ~imm; 8439 imm = (imm >> info->shift) & 0xff; 8440 info->value = GEN_INT (imm); 8441 } 8442 } 8443 8444 return true; 8445#undef CHECK 8446} 8447 8448/* Check of immediate shift constants are within range. */ 8449bool 8450aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left) 8451{ 8452 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; 8453 if (left) 8454 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1); 8455 else 8456 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width); 8457} 8458 8459/* Return true if X is a uniform vector where all elements 8460 are either the floating-point constant 0.0 or the 8461 integer constant 0. */ 8462bool 8463aarch64_simd_imm_zero_p (rtx x, machine_mode mode) 8464{ 8465 return x == CONST0_RTX (mode); 8466} 8467 8468bool 8469aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED) 8470{ 8471 HOST_WIDE_INT imm = INTVAL (x); 8472 int i; 8473 8474 for (i = 0; i < 8; i++) 8475 { 8476 unsigned int byte = imm & 0xff; 8477 if (byte != 0xff && byte != 0) 8478 return false; 8479 imm >>= 8; 8480 } 8481 8482 return true; 8483} 8484 8485bool 8486aarch64_mov_operand_p (rtx x, 8487 enum aarch64_symbol_context context, 8488 machine_mode mode) 8489{ 8490 if (GET_CODE (x) == HIGH 8491 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) 8492 return true; 8493 8494 if (CONST_INT_P (x)) 8495 return true; 8496 8497 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) 8498 return true; 8499 8500 return aarch64_classify_symbolic_expression (x, context) 8501 == SYMBOL_TINY_ABSOLUTE; 8502} 8503 8504/* Return a const_int vector of VAL. */ 8505rtx 8506aarch64_simd_gen_const_vector_dup (machine_mode mode, int val) 8507{ 8508 int nunits = GET_MODE_NUNITS (mode); 8509 rtvec v = rtvec_alloc (nunits); 8510 int i; 8511 8512 for (i=0; i < nunits; i++) 8513 RTVEC_ELT (v, i) = GEN_INT (val); 8514 8515 return gen_rtx_CONST_VECTOR (mode, v); 8516} 8517 8518/* Check OP is a legal scalar immediate for the MOVI instruction. */ 8519 8520bool 8521aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode) 8522{ 8523 machine_mode vmode; 8524 8525 gcc_assert (!VECTOR_MODE_P (mode)); 8526 vmode = aarch64_preferred_simd_mode (mode); 8527 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op)); 8528 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL); 8529} 8530 8531/* Construct and return a PARALLEL RTX vector with elements numbering the 8532 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of 8533 the vector - from the perspective of the architecture. This does not 8534 line up with GCC's perspective on lane numbers, so we end up with 8535 different masks depending on our target endian-ness. The diagram 8536 below may help. We must draw the distinction when building masks 8537 which select one half of the vector. An instruction selecting 8538 architectural low-lanes for a big-endian target, must be described using 8539 a mask selecting GCC high-lanes. 8540 8541 Big-Endian Little-Endian 8542 8543GCC 0 1 2 3 3 2 1 0 8544 | x | x | x | x | | x | x | x | x | 8545Architecture 3 2 1 0 3 2 1 0 8546 8547Low Mask: { 2, 3 } { 0, 1 } 8548High Mask: { 0, 1 } { 2, 3 } 8549*/ 8550 8551rtx 8552aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high) 8553{ 8554 int nunits = GET_MODE_NUNITS (mode); 8555 rtvec v = rtvec_alloc (nunits / 2); 8556 int high_base = nunits / 2; 8557 int low_base = 0; 8558 int base; 8559 rtx t1; 8560 int i; 8561 8562 if (BYTES_BIG_ENDIAN) 8563 base = high ? low_base : high_base; 8564 else 8565 base = high ? high_base : low_base; 8566 8567 for (i = 0; i < nunits / 2; i++) 8568 RTVEC_ELT (v, i) = GEN_INT (base + i); 8569 8570 t1 = gen_rtx_PARALLEL (mode, v); 8571 return t1; 8572} 8573 8574/* Check OP for validity as a PARALLEL RTX vector with elements 8575 numbering the lanes of either the high (HIGH == TRUE) or low lanes, 8576 from the perspective of the architecture. See the diagram above 8577 aarch64_simd_vect_par_cnst_half for more details. */ 8578 8579bool 8580aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, 8581 bool high) 8582{ 8583 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high); 8584 HOST_WIDE_INT count_op = XVECLEN (op, 0); 8585 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); 8586 int i = 0; 8587 8588 if (!VECTOR_MODE_P (mode)) 8589 return false; 8590 8591 if (count_op != count_ideal) 8592 return false; 8593 8594 for (i = 0; i < count_ideal; i++) 8595 { 8596 rtx elt_op = XVECEXP (op, 0, i); 8597 rtx elt_ideal = XVECEXP (ideal, 0, i); 8598 8599 if (!CONST_INT_P (elt_op) 8600 || INTVAL (elt_ideal) != INTVAL (elt_op)) 8601 return false; 8602 } 8603 return true; 8604} 8605 8606/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and 8607 HIGH (exclusive). */ 8608void 8609aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, 8610 const_tree exp) 8611{ 8612 HOST_WIDE_INT lane; 8613 gcc_assert (CONST_INT_P (operand)); 8614 lane = INTVAL (operand); 8615 8616 if (lane < low || lane >= high) 8617 { 8618 if (exp) 8619 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1); 8620 else 8621 error ("lane %ld out of range %ld - %ld", lane, low, high - 1); 8622 } 8623} 8624 8625/* Emit code to place a AdvSIMD pair result in memory locations (with equal 8626 registers). */ 8627void 8628aarch64_simd_emit_pair_result_insn (machine_mode mode, 8629 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr, 8630 rtx op1) 8631{ 8632 rtx mem = gen_rtx_MEM (mode, destaddr); 8633 rtx tmp1 = gen_reg_rtx (mode); 8634 rtx tmp2 = gen_reg_rtx (mode); 8635 8636 emit_insn (intfn (tmp1, op1, tmp2)); 8637 8638 emit_move_insn (mem, tmp1); 8639 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); 8640 emit_move_insn (mem, tmp2); 8641} 8642 8643/* Return TRUE if OP is a valid vector addressing mode. */ 8644bool 8645aarch64_simd_mem_operand_p (rtx op) 8646{ 8647 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC 8648 || REG_P (XEXP (op, 0))); 8649} 8650 8651/* Emit a register copy from operand to operand, taking care not to 8652 early-clobber source registers in the process. 8653 8654 COUNT is the number of components into which the copy needs to be 8655 decomposed. */ 8656void 8657aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, 8658 unsigned int count) 8659{ 8660 unsigned int i; 8661 int rdest = REGNO (operands[0]); 8662 int rsrc = REGNO (operands[1]); 8663 8664 if (!reg_overlap_mentioned_p (operands[0], operands[1]) 8665 || rdest < rsrc) 8666 for (i = 0; i < count; i++) 8667 emit_move_insn (gen_rtx_REG (mode, rdest + i), 8668 gen_rtx_REG (mode, rsrc + i)); 8669 else 8670 for (i = 0; i < count; i++) 8671 emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1), 8672 gen_rtx_REG (mode, rsrc + count - i - 1)); 8673} 8674 8675/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is 8676 one of VSTRUCT modes: OI, CI or XI. */ 8677int 8678aarch64_simd_attr_length_move (rtx_insn *insn) 8679{ 8680 machine_mode mode; 8681 8682 extract_insn_cached (insn); 8683 8684 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) 8685 { 8686 mode = GET_MODE (recog_data.operand[0]); 8687 switch (mode) 8688 { 8689 case OImode: 8690 return 8; 8691 case CImode: 8692 return 12; 8693 case XImode: 8694 return 16; 8695 default: 8696 gcc_unreachable (); 8697 } 8698 } 8699 return 4; 8700} 8701 8702/* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is 8703 one of VSTRUCT modes: OI, CI, EI, or XI. */ 8704int 8705aarch64_simd_attr_length_rglist (enum machine_mode mode) 8706{ 8707 return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; 8708} 8709 8710/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum 8711 alignment of a vector to 128 bits. */ 8712static HOST_WIDE_INT 8713aarch64_simd_vector_alignment (const_tree type) 8714{ 8715 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type)); 8716 return MIN (align, 128); 8717} 8718 8719/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */ 8720static bool 8721aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) 8722{ 8723 if (is_packed) 8724 return false; 8725 8726 /* We guarantee alignment for vectors up to 128-bits. */ 8727 if (tree_int_cst_compare (TYPE_SIZE (type), 8728 bitsize_int (BIGGEST_ALIGNMENT)) > 0) 8729 return false; 8730 8731 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */ 8732 return true; 8733} 8734 8735/* If VALS is a vector constant that can be loaded into a register 8736 using DUP, generate instructions to do so and return an RTX to 8737 assign to the register. Otherwise return NULL_RTX. */ 8738static rtx 8739aarch64_simd_dup_constant (rtx vals) 8740{ 8741 machine_mode mode = GET_MODE (vals); 8742 machine_mode inner_mode = GET_MODE_INNER (mode); 8743 int n_elts = GET_MODE_NUNITS (mode); 8744 bool all_same = true; 8745 rtx x; 8746 int i; 8747 8748 if (GET_CODE (vals) != CONST_VECTOR) 8749 return NULL_RTX; 8750 8751 for (i = 1; i < n_elts; ++i) 8752 { 8753 x = CONST_VECTOR_ELT (vals, i); 8754 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0))) 8755 all_same = false; 8756 } 8757 8758 if (!all_same) 8759 return NULL_RTX; 8760 8761 /* We can load this constant by using DUP and a constant in a 8762 single ARM register. This will be cheaper than a vector 8763 load. */ 8764 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0)); 8765 return gen_rtx_VEC_DUPLICATE (mode, x); 8766} 8767 8768 8769/* Generate code to load VALS, which is a PARALLEL containing only 8770 constants (for vec_init) or CONST_VECTOR, efficiently into a 8771 register. Returns an RTX to copy into the register, or NULL_RTX 8772 for a PARALLEL that can not be converted into a CONST_VECTOR. */ 8773static rtx 8774aarch64_simd_make_constant (rtx vals) 8775{ 8776 machine_mode mode = GET_MODE (vals); 8777 rtx const_dup; 8778 rtx const_vec = NULL_RTX; 8779 int n_elts = GET_MODE_NUNITS (mode); 8780 int n_const = 0; 8781 int i; 8782 8783 if (GET_CODE (vals) == CONST_VECTOR) 8784 const_vec = vals; 8785 else if (GET_CODE (vals) == PARALLEL) 8786 { 8787 /* A CONST_VECTOR must contain only CONST_INTs and 8788 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). 8789 Only store valid constants in a CONST_VECTOR. */ 8790 for (i = 0; i < n_elts; ++i) 8791 { 8792 rtx x = XVECEXP (vals, 0, i); 8793 if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) 8794 n_const++; 8795 } 8796 if (n_const == n_elts) 8797 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 8798 } 8799 else 8800 gcc_unreachable (); 8801 8802 if (const_vec != NULL_RTX 8803 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL)) 8804 /* Load using MOVI/MVNI. */ 8805 return const_vec; 8806 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX) 8807 /* Loaded using DUP. */ 8808 return const_dup; 8809 else if (const_vec != NULL_RTX) 8810 /* Load from constant pool. We can not take advantage of single-cycle 8811 LD1 because we need a PC-relative addressing mode. */ 8812 return const_vec; 8813 else 8814 /* A PARALLEL containing something not valid inside CONST_VECTOR. 8815 We can not construct an initializer. */ 8816 return NULL_RTX; 8817} 8818 8819void 8820aarch64_expand_vector_init (rtx target, rtx vals) 8821{ 8822 machine_mode mode = GET_MODE (target); 8823 machine_mode inner_mode = GET_MODE_INNER (mode); 8824 int n_elts = GET_MODE_NUNITS (mode); 8825 int n_var = 0, one_var = -1; 8826 bool all_same = true; 8827 rtx x, mem; 8828 int i; 8829 8830 x = XVECEXP (vals, 0, 0); 8831 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) 8832 n_var = 1, one_var = 0; 8833 8834 for (i = 1; i < n_elts; ++i) 8835 { 8836 x = XVECEXP (vals, 0, i); 8837 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) 8838 ++n_var, one_var = i; 8839 8840 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0))) 8841 all_same = false; 8842 } 8843 8844 if (n_var == 0) 8845 { 8846 rtx constant = aarch64_simd_make_constant (vals); 8847 if (constant != NULL_RTX) 8848 { 8849 emit_move_insn (target, constant); 8850 return; 8851 } 8852 } 8853 8854 /* Splat a single non-constant element if we can. */ 8855 if (all_same) 8856 { 8857 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 8858 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); 8859 return; 8860 } 8861 8862 /* One field is non-constant. Load constant then overwrite varying 8863 field. This is more efficient than using the stack. */ 8864 if (n_var == 1) 8865 { 8866 rtx copy = copy_rtx (vals); 8867 rtx index = GEN_INT (one_var); 8868 enum insn_code icode; 8869 8870 /* Load constant part of vector, substitute neighboring value for 8871 varying element. */ 8872 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1); 8873 aarch64_expand_vector_init (target, copy); 8874 8875 /* Insert variable. */ 8876 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); 8877 icode = optab_handler (vec_set_optab, mode); 8878 gcc_assert (icode != CODE_FOR_nothing); 8879 emit_insn (GEN_FCN (icode) (target, x, index)); 8880 return; 8881 } 8882 8883 /* Construct the vector in memory one field at a time 8884 and load the whole vector. */ 8885 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 8886 for (i = 0; i < n_elts; i++) 8887 emit_move_insn (adjust_address_nv (mem, inner_mode, 8888 i * GET_MODE_SIZE (inner_mode)), 8889 XVECEXP (vals, 0, i)); 8890 emit_move_insn (target, mem); 8891 8892} 8893 8894static unsigned HOST_WIDE_INT 8895aarch64_shift_truncation_mask (machine_mode mode) 8896{ 8897 return 8898 (aarch64_vector_mode_supported_p (mode) 8899 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); 8900} 8901 8902#ifndef TLS_SECTION_ASM_FLAG 8903#define TLS_SECTION_ASM_FLAG 'T' 8904#endif 8905 8906void 8907aarch64_elf_asm_named_section (const char *name, unsigned int flags, 8908 tree decl ATTRIBUTE_UNUSED) 8909{ 8910 char flagchars[10], *f = flagchars; 8911 8912 /* If we have already declared this section, we can use an 8913 abbreviated form to switch back to it -- unless this section is 8914 part of a COMDAT groups, in which case GAS requires the full 8915 declaration every time. */ 8916 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 8917 && (flags & SECTION_DECLARED)) 8918 { 8919 fprintf (asm_out_file, "\t.section\t%s\n", name); 8920 return; 8921 } 8922 8923 if (!(flags & SECTION_DEBUG)) 8924 *f++ = 'a'; 8925 if (flags & SECTION_WRITE) 8926 *f++ = 'w'; 8927 if (flags & SECTION_CODE) 8928 *f++ = 'x'; 8929 if (flags & SECTION_SMALL) 8930 *f++ = 's'; 8931 if (flags & SECTION_MERGE) 8932 *f++ = 'M'; 8933 if (flags & SECTION_STRINGS) 8934 *f++ = 'S'; 8935 if (flags & SECTION_TLS) 8936 *f++ = TLS_SECTION_ASM_FLAG; 8937 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 8938 *f++ = 'G'; 8939 *f = '\0'; 8940 8941 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars); 8942 8943 if (!(flags & SECTION_NOTYPE)) 8944 { 8945 const char *type; 8946 const char *format; 8947 8948 if (flags & SECTION_BSS) 8949 type = "nobits"; 8950 else 8951 type = "progbits"; 8952 8953#ifdef TYPE_OPERAND_FMT 8954 format = "," TYPE_OPERAND_FMT; 8955#else 8956 format = ",@%s"; 8957#endif 8958 8959 fprintf (asm_out_file, format, type); 8960 8961 if (flags & SECTION_ENTSIZE) 8962 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE); 8963 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE)) 8964 { 8965 if (TREE_CODE (decl) == IDENTIFIER_NODE) 8966 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl)); 8967 else 8968 fprintf (asm_out_file, ",%s,comdat", 8969 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl))); 8970 } 8971 } 8972 8973 putc ('\n', asm_out_file); 8974} 8975 8976/* Select a format to encode pointers in exception handling data. */ 8977int 8978aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) 8979{ 8980 int type; 8981 switch (aarch64_cmodel) 8982 { 8983 case AARCH64_CMODEL_TINY: 8984 case AARCH64_CMODEL_TINY_PIC: 8985 case AARCH64_CMODEL_SMALL: 8986 case AARCH64_CMODEL_SMALL_PIC: 8987 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient 8988 for everything. */ 8989 type = DW_EH_PE_sdata4; 8990 break; 8991 default: 8992 /* No assumptions here. 8-byte relocs required. */ 8993 type = DW_EH_PE_sdata8; 8994 break; 8995 } 8996 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 8997} 8998 8999/* Emit load exclusive. */ 9000 9001static void 9002aarch64_emit_load_exclusive (machine_mode mode, rtx rval, 9003 rtx mem, rtx model_rtx) 9004{ 9005 rtx (*gen) (rtx, rtx, rtx); 9006 9007 switch (mode) 9008 { 9009 case QImode: gen = gen_aarch64_load_exclusiveqi; break; 9010 case HImode: gen = gen_aarch64_load_exclusivehi; break; 9011 case SImode: gen = gen_aarch64_load_exclusivesi; break; 9012 case DImode: gen = gen_aarch64_load_exclusivedi; break; 9013 default: 9014 gcc_unreachable (); 9015 } 9016 9017 emit_insn (gen (rval, mem, model_rtx)); 9018} 9019 9020/* Emit store exclusive. */ 9021 9022static void 9023aarch64_emit_store_exclusive (machine_mode mode, rtx bval, 9024 rtx rval, rtx mem, rtx model_rtx) 9025{ 9026 rtx (*gen) (rtx, rtx, rtx, rtx); 9027 9028 switch (mode) 9029 { 9030 case QImode: gen = gen_aarch64_store_exclusiveqi; break; 9031 case HImode: gen = gen_aarch64_store_exclusivehi; break; 9032 case SImode: gen = gen_aarch64_store_exclusivesi; break; 9033 case DImode: gen = gen_aarch64_store_exclusivedi; break; 9034 default: 9035 gcc_unreachable (); 9036 } 9037 9038 emit_insn (gen (bval, rval, mem, model_rtx)); 9039} 9040 9041/* Mark the previous jump instruction as unlikely. */ 9042 9043static void 9044aarch64_emit_unlikely_jump (rtx insn) 9045{ 9046 int very_unlikely = REG_BR_PROB_BASE / 100 - 1; 9047 9048 insn = emit_jump_insn (insn); 9049 add_int_reg_note (insn, REG_BR_PROB, very_unlikely); 9050} 9051 9052/* Expand a compare and swap pattern. */ 9053 9054void 9055aarch64_expand_compare_and_swap (rtx operands[]) 9056{ 9057 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; 9058 machine_mode mode, cmp_mode; 9059 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); 9060 9061 bval = operands[0]; 9062 rval = operands[1]; 9063 mem = operands[2]; 9064 oldval = operands[3]; 9065 newval = operands[4]; 9066 is_weak = operands[5]; 9067 mod_s = operands[6]; 9068 mod_f = operands[7]; 9069 mode = GET_MODE (mem); 9070 cmp_mode = mode; 9071 9072 /* Normally the succ memory model must be stronger than fail, but in the 9073 unlikely event of fail being ACQUIRE and succ being RELEASE we need to 9074 promote succ to ACQ_REL so that we don't lose the acquire semantics. */ 9075 9076 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) 9077 && is_mm_release (memmodel_from_int (INTVAL (mod_s)))) 9078 mod_s = GEN_INT (MEMMODEL_ACQ_REL); 9079 9080 switch (mode) 9081 { 9082 case QImode: 9083 case HImode: 9084 /* For short modes, we're going to perform the comparison in SImode, 9085 so do the zero-extension now. */ 9086 cmp_mode = SImode; 9087 rval = gen_reg_rtx (SImode); 9088 oldval = convert_modes (SImode, mode, oldval, true); 9089 /* Fall through. */ 9090 9091 case SImode: 9092 case DImode: 9093 /* Force the value into a register if needed. */ 9094 if (!aarch64_plus_operand (oldval, mode)) 9095 oldval = force_reg (cmp_mode, oldval); 9096 break; 9097 9098 default: 9099 gcc_unreachable (); 9100 } 9101 9102 switch (mode) 9103 { 9104 case QImode: gen = gen_atomic_compare_and_swapqi_1; break; 9105 case HImode: gen = gen_atomic_compare_and_swaphi_1; break; 9106 case SImode: gen = gen_atomic_compare_and_swapsi_1; break; 9107 case DImode: gen = gen_atomic_compare_and_swapdi_1; break; 9108 default: 9109 gcc_unreachable (); 9110 } 9111 9112 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); 9113 9114 if (mode == QImode || mode == HImode) 9115 emit_move_insn (operands[1], gen_lowpart (mode, rval)); 9116 9117 x = gen_rtx_REG (CCmode, CC_REGNUM); 9118 x = gen_rtx_EQ (SImode, x, const0_rtx); 9119 emit_insn (gen_rtx_SET (VOIDmode, bval, x)); 9120} 9121 9122/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a 9123 sequence implementing an atomic operation. */ 9124 9125static void 9126aarch64_emit_post_barrier (enum memmodel model) 9127{ 9128 const enum memmodel base_model = memmodel_base (model); 9129 9130 if (is_mm_sync (model) 9131 && (base_model == MEMMODEL_ACQUIRE 9132 || base_model == MEMMODEL_ACQ_REL 9133 || base_model == MEMMODEL_SEQ_CST)) 9134 { 9135 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST))); 9136 } 9137} 9138 9139/* Split a compare and swap pattern. */ 9140 9141void 9142aarch64_split_compare_and_swap (rtx operands[]) 9143{ 9144 rtx rval, mem, oldval, newval, scratch; 9145 machine_mode mode; 9146 bool is_weak; 9147 rtx_code_label *label1, *label2; 9148 rtx x, cond; 9149 enum memmodel model; 9150 rtx model_rtx; 9151 9152 rval = operands[0]; 9153 mem = operands[1]; 9154 oldval = operands[2]; 9155 newval = operands[3]; 9156 is_weak = (operands[4] != const0_rtx); 9157 model_rtx = operands[5]; 9158 scratch = operands[7]; 9159 mode = GET_MODE (mem); 9160 model = memmodel_from_int (INTVAL (model_rtx)); 9161 9162 label1 = NULL; 9163 if (!is_weak) 9164 { 9165 label1 = gen_label_rtx (); 9166 emit_label (label1); 9167 } 9168 label2 = gen_label_rtx (); 9169 9170 /* The initial load can be relaxed for a __sync operation since a final 9171 barrier will be emitted to stop code hoisting. */ 9172 if (is_mm_sync (model)) 9173 aarch64_emit_load_exclusive (mode, rval, mem, 9174 GEN_INT (MEMMODEL_RELAXED)); 9175 else 9176 aarch64_emit_load_exclusive (mode, rval, mem, model_rtx); 9177 9178 cond = aarch64_gen_compare_reg (NE, rval, oldval); 9179 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 9180 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9181 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); 9182 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 9183 9184 aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); 9185 9186 if (!is_weak) 9187 { 9188 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx); 9189 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9190 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); 9191 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 9192 } 9193 else 9194 { 9195 cond = gen_rtx_REG (CCmode, CC_REGNUM); 9196 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); 9197 emit_insn (gen_rtx_SET (VOIDmode, cond, x)); 9198 } 9199 9200 emit_label (label2); 9201 9202 /* Emit any final barrier needed for a __sync operation. */ 9203 if (is_mm_sync (model)) 9204 aarch64_emit_post_barrier (model); 9205} 9206 9207/* Split an atomic operation. */ 9208 9209void 9210aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, 9211 rtx value, rtx model_rtx, rtx cond) 9212{ 9213 machine_mode mode = GET_MODE (mem); 9214 machine_mode wmode = (mode == DImode ? DImode : SImode); 9215 const enum memmodel model = memmodel_from_int (INTVAL (model_rtx)); 9216 const bool is_sync = is_mm_sync (model); 9217 rtx_code_label *label; 9218 rtx x; 9219 9220 label = gen_label_rtx (); 9221 emit_label (label); 9222 9223 if (new_out) 9224 new_out = gen_lowpart (wmode, new_out); 9225 if (old_out) 9226 old_out = gen_lowpart (wmode, old_out); 9227 else 9228 old_out = new_out; 9229 value = simplify_gen_subreg (wmode, value, mode, 0); 9230 9231 /* The initial load can be relaxed for a __sync operation since a final 9232 barrier will be emitted to stop code hoisting. */ 9233 if (is_sync) 9234 aarch64_emit_load_exclusive (mode, old_out, mem, 9235 GEN_INT (MEMMODEL_RELAXED)); 9236 else 9237 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx); 9238 9239 switch (code) 9240 { 9241 case SET: 9242 new_out = value; 9243 break; 9244 9245 case NOT: 9246 x = gen_rtx_AND (wmode, old_out, value); 9247 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 9248 x = gen_rtx_NOT (wmode, new_out); 9249 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 9250 break; 9251 9252 case MINUS: 9253 if (CONST_INT_P (value)) 9254 { 9255 value = GEN_INT (-INTVAL (value)); 9256 code = PLUS; 9257 } 9258 /* Fall through. */ 9259 9260 default: 9261 x = gen_rtx_fmt_ee (code, wmode, old_out, value); 9262 emit_insn (gen_rtx_SET (VOIDmode, new_out, x)); 9263 break; 9264 } 9265 9266 aarch64_emit_store_exclusive (mode, cond, mem, 9267 gen_lowpart (mode, new_out), model_rtx); 9268 9269 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 9270 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, 9271 gen_rtx_LABEL_REF (Pmode, label), pc_rtx); 9272 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x)); 9273 9274 /* Emit any final barrier needed for a __sync operation. */ 9275 if (is_sync) 9276 aarch64_emit_post_barrier (model); 9277} 9278 9279static void 9280aarch64_print_extension (void) 9281{ 9282 const struct aarch64_option_extension *opt = NULL; 9283 9284 for (opt = all_extensions; opt->name != NULL; opt++) 9285 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on) 9286 asm_fprintf (asm_out_file, "+%s", opt->name); 9287 9288 asm_fprintf (asm_out_file, "\n"); 9289} 9290 9291static void 9292aarch64_start_file (void) 9293{ 9294 if (selected_arch) 9295 { 9296 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name); 9297 aarch64_print_extension (); 9298 } 9299 else if (selected_cpu) 9300 { 9301 const char *truncated_name 9302 = aarch64_rewrite_selected_cpu (selected_cpu->name); 9303 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name); 9304 aarch64_print_extension (); 9305 } 9306 default_file_start(); 9307} 9308 9309/* Target hook for c_mode_for_suffix. */ 9310static machine_mode 9311aarch64_c_mode_for_suffix (char suffix) 9312{ 9313 if (suffix == 'q') 9314 return TFmode; 9315 9316 return VOIDmode; 9317} 9318 9319/* We can only represent floating point constants which will fit in 9320 "quarter-precision" values. These values are characterised by 9321 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given 9322 by: 9323 9324 (-1)^s * (n/16) * 2^r 9325 9326 Where: 9327 's' is the sign bit. 9328 'n' is an integer in the range 16 <= n <= 31. 9329 'r' is an integer in the range -3 <= r <= 4. */ 9330 9331/* Return true iff X can be represented by a quarter-precision 9332 floating point immediate operand X. Note, we cannot represent 0.0. */ 9333bool 9334aarch64_float_const_representable_p (rtx x) 9335{ 9336 /* This represents our current view of how many bits 9337 make up the mantissa. */ 9338 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; 9339 int exponent; 9340 unsigned HOST_WIDE_INT mantissa, mask; 9341 REAL_VALUE_TYPE r, m; 9342 bool fail; 9343 9344 if (!CONST_DOUBLE_P (x)) 9345 return false; 9346 9347 if (GET_MODE (x) == VOIDmode) 9348 return false; 9349 9350 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 9351 9352 /* We cannot represent infinities, NaNs or +/-zero. We won't 9353 know if we have +zero until we analyse the mantissa, but we 9354 can reject the other invalid values. */ 9355 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) 9356 || REAL_VALUE_MINUS_ZERO (r)) 9357 return false; 9358 9359 /* Extract exponent. */ 9360 r = real_value_abs (&r); 9361 exponent = REAL_EXP (&r); 9362 9363 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the 9364 highest (sign) bit, with a fixed binary point at bit point_pos. 9365 m1 holds the low part of the mantissa, m2 the high part. 9366 WARNING: If we ever have a representation using more than 2 * H_W_I - 1 9367 bits for the mantissa, this can fail (low bits will be lost). */ 9368 real_ldexp (&m, &r, point_pos - exponent); 9369 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); 9370 9371 /* If the low part of the mantissa has bits set we cannot represent 9372 the value. */ 9373 if (w.elt (0) != 0) 9374 return false; 9375 /* We have rejected the lower HOST_WIDE_INT, so update our 9376 understanding of how many bits lie in the mantissa and 9377 look only at the high HOST_WIDE_INT. */ 9378 mantissa = w.elt (1); 9379 point_pos -= HOST_BITS_PER_WIDE_INT; 9380 9381 /* We can only represent values with a mantissa of the form 1.xxxx. */ 9382 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; 9383 if ((mantissa & mask) != 0) 9384 return false; 9385 9386 /* Having filtered unrepresentable values, we may now remove all 9387 but the highest 5 bits. */ 9388 mantissa >>= point_pos - 5; 9389 9390 /* We cannot represent the value 0.0, so reject it. This is handled 9391 elsewhere. */ 9392 if (mantissa == 0) 9393 return false; 9394 9395 /* Then, as bit 4 is always set, we can mask it off, leaving 9396 the mantissa in the range [0, 15]. */ 9397 mantissa &= ~(1 << 4); 9398 gcc_assert (mantissa <= 15); 9399 9400 /* GCC internally does not use IEEE754-like encoding (where normalized 9401 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c). 9402 Our mantissa values are shifted 4 places to the left relative to 9403 normalized IEEE754 so we must modify the exponent returned by REAL_EXP 9404 by 5 places to correct for GCC's representation. */ 9405 exponent = 5 - exponent; 9406 9407 return (exponent >= 0 && exponent <= 7); 9408} 9409 9410char* 9411aarch64_output_simd_mov_immediate (rtx const_vector, 9412 machine_mode mode, 9413 unsigned width) 9414{ 9415 bool is_valid; 9416 static char templ[40]; 9417 const char *mnemonic; 9418 const char *shift_op; 9419 unsigned int lane_count = 0; 9420 char element_char; 9421 9422 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false }; 9423 9424 /* This will return true to show const_vector is legal for use as either 9425 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will 9426 also update INFO to show how the immediate should be generated. */ 9427 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info); 9428 gcc_assert (is_valid); 9429 9430 element_char = sizetochar (info.element_width); 9431 lane_count = width / info.element_width; 9432 9433 mode = GET_MODE_INNER (mode); 9434 if (mode == SFmode || mode == DFmode) 9435 { 9436 gcc_assert (info.shift == 0 && ! info.mvn); 9437 if (aarch64_float_const_zero_rtx_p (info.value)) 9438 info.value = GEN_INT (0); 9439 else 9440 { 9441#define buf_size 20 9442 REAL_VALUE_TYPE r; 9443 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value); 9444 char float_buf[buf_size] = {'\0'}; 9445 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode); 9446#undef buf_size 9447 9448 if (lane_count == 1) 9449 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); 9450 else 9451 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s", 9452 lane_count, element_char, float_buf); 9453 return templ; 9454 } 9455 } 9456 9457 mnemonic = info.mvn ? "mvni" : "movi"; 9458 shift_op = info.msl ? "msl" : "lsl"; 9459 9460 if (lane_count == 1) 9461 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, 9462 mnemonic, UINTVAL (info.value)); 9463 else if (info.shift) 9464 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX 9465 ", %s %d", mnemonic, lane_count, element_char, 9466 UINTVAL (info.value), shift_op, info.shift); 9467 else 9468 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX, 9469 mnemonic, lane_count, element_char, UINTVAL (info.value)); 9470 return templ; 9471} 9472 9473char* 9474aarch64_output_scalar_simd_mov_immediate (rtx immediate, 9475 machine_mode mode) 9476{ 9477 machine_mode vmode; 9478 9479 gcc_assert (!VECTOR_MODE_P (mode)); 9480 vmode = aarch64_simd_container_mode (mode, 64); 9481 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate)); 9482 return aarch64_output_simd_mov_immediate (v_op, vmode, 64); 9483} 9484 9485/* Split operands into moves from op[1] + op[2] into op[0]. */ 9486 9487void 9488aarch64_split_combinev16qi (rtx operands[3]) 9489{ 9490 unsigned int dest = REGNO (operands[0]); 9491 unsigned int src1 = REGNO (operands[1]); 9492 unsigned int src2 = REGNO (operands[2]); 9493 machine_mode halfmode = GET_MODE (operands[1]); 9494 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode); 9495 rtx destlo, desthi; 9496 9497 gcc_assert (halfmode == V16QImode); 9498 9499 if (src1 == dest && src2 == dest + halfregs) 9500 { 9501 /* No-op move. Can't split to nothing; emit something. */ 9502 emit_note (NOTE_INSN_DELETED); 9503 return; 9504 } 9505 9506 /* Preserve register attributes for variable tracking. */ 9507 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0); 9508 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs, 9509 GET_MODE_SIZE (halfmode)); 9510 9511 /* Special case of reversed high/low parts. */ 9512 if (reg_overlap_mentioned_p (operands[2], destlo) 9513 && reg_overlap_mentioned_p (operands[1], desthi)) 9514 { 9515 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); 9516 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2])); 9517 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2])); 9518 } 9519 else if (!reg_overlap_mentioned_p (operands[2], destlo)) 9520 { 9521 /* Try to avoid unnecessary moves if part of the result 9522 is in the right place already. */ 9523 if (src1 != dest) 9524 emit_move_insn (destlo, operands[1]); 9525 if (src2 != dest + halfregs) 9526 emit_move_insn (desthi, operands[2]); 9527 } 9528 else 9529 { 9530 if (src2 != dest + halfregs) 9531 emit_move_insn (desthi, operands[2]); 9532 if (src1 != dest) 9533 emit_move_insn (destlo, operands[1]); 9534 } 9535} 9536 9537/* vec_perm support. */ 9538 9539#define MAX_VECT_LEN 16 9540 9541struct expand_vec_perm_d 9542{ 9543 rtx target, op0, op1; 9544 unsigned char perm[MAX_VECT_LEN]; 9545 machine_mode vmode; 9546 unsigned char nelt; 9547 bool one_vector_p; 9548 bool testing_p; 9549}; 9550 9551/* Generate a variable permutation. */ 9552 9553static void 9554aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel) 9555{ 9556 machine_mode vmode = GET_MODE (target); 9557 bool one_vector_p = rtx_equal_p (op0, op1); 9558 9559 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode); 9560 gcc_checking_assert (GET_MODE (op0) == vmode); 9561 gcc_checking_assert (GET_MODE (op1) == vmode); 9562 gcc_checking_assert (GET_MODE (sel) == vmode); 9563 gcc_checking_assert (TARGET_SIMD); 9564 9565 if (one_vector_p) 9566 { 9567 if (vmode == V8QImode) 9568 { 9569 /* Expand the argument to a V16QI mode by duplicating it. */ 9570 rtx pair = gen_reg_rtx (V16QImode); 9571 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0)); 9572 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); 9573 } 9574 else 9575 { 9576 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel)); 9577 } 9578 } 9579 else 9580 { 9581 rtx pair; 9582 9583 if (vmode == V8QImode) 9584 { 9585 pair = gen_reg_rtx (V16QImode); 9586 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1)); 9587 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel)); 9588 } 9589 else 9590 { 9591 pair = gen_reg_rtx (OImode); 9592 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1)); 9593 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel)); 9594 } 9595 } 9596} 9597 9598void 9599aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) 9600{ 9601 machine_mode vmode = GET_MODE (target); 9602 unsigned int nelt = GET_MODE_NUNITS (vmode); 9603 bool one_vector_p = rtx_equal_p (op0, op1); 9604 rtx mask; 9605 9606 /* The TBL instruction does not use a modulo index, so we must take care 9607 of that ourselves. */ 9608 mask = aarch64_simd_gen_const_vector_dup (vmode, 9609 one_vector_p ? nelt - 1 : 2 * nelt - 1); 9610 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN); 9611 9612 /* For big-endian, we also need to reverse the index within the vector 9613 (but not which vector). */ 9614 if (BYTES_BIG_ENDIAN) 9615 { 9616 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */ 9617 if (!one_vector_p) 9618 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1); 9619 sel = expand_simple_binop (vmode, XOR, sel, mask, 9620 NULL, 0, OPTAB_LIB_WIDEN); 9621 } 9622 aarch64_expand_vec_perm_1 (target, op0, op1, sel); 9623} 9624 9625/* Recognize patterns suitable for the TRN instructions. */ 9626static bool 9627aarch64_evpc_trn (struct expand_vec_perm_d *d) 9628{ 9629 unsigned int i, odd, mask, nelt = d->nelt; 9630 rtx out, in0, in1, x; 9631 rtx (*gen) (rtx, rtx, rtx); 9632 machine_mode vmode = d->vmode; 9633 9634 if (GET_MODE_UNIT_SIZE (vmode) > 8) 9635 return false; 9636 9637 /* Note that these are little-endian tests. 9638 We correct for big-endian later. */ 9639 if (d->perm[0] == 0) 9640 odd = 0; 9641 else if (d->perm[0] == 1) 9642 odd = 1; 9643 else 9644 return false; 9645 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 9646 9647 for (i = 0; i < nelt; i += 2) 9648 { 9649 if (d->perm[i] != i + odd) 9650 return false; 9651 if (d->perm[i + 1] != ((i + nelt + odd) & mask)) 9652 return false; 9653 } 9654 9655 /* Success! */ 9656 if (d->testing_p) 9657 return true; 9658 9659 in0 = d->op0; 9660 in1 = d->op1; 9661 if (BYTES_BIG_ENDIAN) 9662 { 9663 x = in0, in0 = in1, in1 = x; 9664 odd = !odd; 9665 } 9666 out = d->target; 9667 9668 if (odd) 9669 { 9670 switch (vmode) 9671 { 9672 case V16QImode: gen = gen_aarch64_trn2v16qi; break; 9673 case V8QImode: gen = gen_aarch64_trn2v8qi; break; 9674 case V8HImode: gen = gen_aarch64_trn2v8hi; break; 9675 case V4HImode: gen = gen_aarch64_trn2v4hi; break; 9676 case V4SImode: gen = gen_aarch64_trn2v4si; break; 9677 case V2SImode: gen = gen_aarch64_trn2v2si; break; 9678 case V2DImode: gen = gen_aarch64_trn2v2di; break; 9679 case V4SFmode: gen = gen_aarch64_trn2v4sf; break; 9680 case V2SFmode: gen = gen_aarch64_trn2v2sf; break; 9681 case V2DFmode: gen = gen_aarch64_trn2v2df; break; 9682 default: 9683 return false; 9684 } 9685 } 9686 else 9687 { 9688 switch (vmode) 9689 { 9690 case V16QImode: gen = gen_aarch64_trn1v16qi; break; 9691 case V8QImode: gen = gen_aarch64_trn1v8qi; break; 9692 case V8HImode: gen = gen_aarch64_trn1v8hi; break; 9693 case V4HImode: gen = gen_aarch64_trn1v4hi; break; 9694 case V4SImode: gen = gen_aarch64_trn1v4si; break; 9695 case V2SImode: gen = gen_aarch64_trn1v2si; break; 9696 case V2DImode: gen = gen_aarch64_trn1v2di; break; 9697 case V4SFmode: gen = gen_aarch64_trn1v4sf; break; 9698 case V2SFmode: gen = gen_aarch64_trn1v2sf; break; 9699 case V2DFmode: gen = gen_aarch64_trn1v2df; break; 9700 default: 9701 return false; 9702 } 9703 } 9704 9705 emit_insn (gen (out, in0, in1)); 9706 return true; 9707} 9708 9709/* Recognize patterns suitable for the UZP instructions. */ 9710static bool 9711aarch64_evpc_uzp (struct expand_vec_perm_d *d) 9712{ 9713 unsigned int i, odd, mask, nelt = d->nelt; 9714 rtx out, in0, in1, x; 9715 rtx (*gen) (rtx, rtx, rtx); 9716 machine_mode vmode = d->vmode; 9717 9718 if (GET_MODE_UNIT_SIZE (vmode) > 8) 9719 return false; 9720 9721 /* Note that these are little-endian tests. 9722 We correct for big-endian later. */ 9723 if (d->perm[0] == 0) 9724 odd = 0; 9725 else if (d->perm[0] == 1) 9726 odd = 1; 9727 else 9728 return false; 9729 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 9730 9731 for (i = 0; i < nelt; i++) 9732 { 9733 unsigned elt = (i * 2 + odd) & mask; 9734 if (d->perm[i] != elt) 9735 return false; 9736 } 9737 9738 /* Success! */ 9739 if (d->testing_p) 9740 return true; 9741 9742 in0 = d->op0; 9743 in1 = d->op1; 9744 if (BYTES_BIG_ENDIAN) 9745 { 9746 x = in0, in0 = in1, in1 = x; 9747 odd = !odd; 9748 } 9749 out = d->target; 9750 9751 if (odd) 9752 { 9753 switch (vmode) 9754 { 9755 case V16QImode: gen = gen_aarch64_uzp2v16qi; break; 9756 case V8QImode: gen = gen_aarch64_uzp2v8qi; break; 9757 case V8HImode: gen = gen_aarch64_uzp2v8hi; break; 9758 case V4HImode: gen = gen_aarch64_uzp2v4hi; break; 9759 case V4SImode: gen = gen_aarch64_uzp2v4si; break; 9760 case V2SImode: gen = gen_aarch64_uzp2v2si; break; 9761 case V2DImode: gen = gen_aarch64_uzp2v2di; break; 9762 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; 9763 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; 9764 case V2DFmode: gen = gen_aarch64_uzp2v2df; break; 9765 default: 9766 return false; 9767 } 9768 } 9769 else 9770 { 9771 switch (vmode) 9772 { 9773 case V16QImode: gen = gen_aarch64_uzp1v16qi; break; 9774 case V8QImode: gen = gen_aarch64_uzp1v8qi; break; 9775 case V8HImode: gen = gen_aarch64_uzp1v8hi; break; 9776 case V4HImode: gen = gen_aarch64_uzp1v4hi; break; 9777 case V4SImode: gen = gen_aarch64_uzp1v4si; break; 9778 case V2SImode: gen = gen_aarch64_uzp1v2si; break; 9779 case V2DImode: gen = gen_aarch64_uzp1v2di; break; 9780 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; 9781 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; 9782 case V2DFmode: gen = gen_aarch64_uzp1v2df; break; 9783 default: 9784 return false; 9785 } 9786 } 9787 9788 emit_insn (gen (out, in0, in1)); 9789 return true; 9790} 9791 9792/* Recognize patterns suitable for the ZIP instructions. */ 9793static bool 9794aarch64_evpc_zip (struct expand_vec_perm_d *d) 9795{ 9796 unsigned int i, high, mask, nelt = d->nelt; 9797 rtx out, in0, in1, x; 9798 rtx (*gen) (rtx, rtx, rtx); 9799 machine_mode vmode = d->vmode; 9800 9801 if (GET_MODE_UNIT_SIZE (vmode) > 8) 9802 return false; 9803 9804 /* Note that these are little-endian tests. 9805 We correct for big-endian later. */ 9806 high = nelt / 2; 9807 if (d->perm[0] == high) 9808 /* Do Nothing. */ 9809 ; 9810 else if (d->perm[0] == 0) 9811 high = 0; 9812 else 9813 return false; 9814 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1); 9815 9816 for (i = 0; i < nelt / 2; i++) 9817 { 9818 unsigned elt = (i + high) & mask; 9819 if (d->perm[i * 2] != elt) 9820 return false; 9821 elt = (elt + nelt) & mask; 9822 if (d->perm[i * 2 + 1] != elt) 9823 return false; 9824 } 9825 9826 /* Success! */ 9827 if (d->testing_p) 9828 return true; 9829 9830 in0 = d->op0; 9831 in1 = d->op1; 9832 if (BYTES_BIG_ENDIAN) 9833 { 9834 x = in0, in0 = in1, in1 = x; 9835 high = !high; 9836 } 9837 out = d->target; 9838 9839 if (high) 9840 { 9841 switch (vmode) 9842 { 9843 case V16QImode: gen = gen_aarch64_zip2v16qi; break; 9844 case V8QImode: gen = gen_aarch64_zip2v8qi; break; 9845 case V8HImode: gen = gen_aarch64_zip2v8hi; break; 9846 case V4HImode: gen = gen_aarch64_zip2v4hi; break; 9847 case V4SImode: gen = gen_aarch64_zip2v4si; break; 9848 case V2SImode: gen = gen_aarch64_zip2v2si; break; 9849 case V2DImode: gen = gen_aarch64_zip2v2di; break; 9850 case V4SFmode: gen = gen_aarch64_zip2v4sf; break; 9851 case V2SFmode: gen = gen_aarch64_zip2v2sf; break; 9852 case V2DFmode: gen = gen_aarch64_zip2v2df; break; 9853 default: 9854 return false; 9855 } 9856 } 9857 else 9858 { 9859 switch (vmode) 9860 { 9861 case V16QImode: gen = gen_aarch64_zip1v16qi; break; 9862 case V8QImode: gen = gen_aarch64_zip1v8qi; break; 9863 case V8HImode: gen = gen_aarch64_zip1v8hi; break; 9864 case V4HImode: gen = gen_aarch64_zip1v4hi; break; 9865 case V4SImode: gen = gen_aarch64_zip1v4si; break; 9866 case V2SImode: gen = gen_aarch64_zip1v2si; break; 9867 case V2DImode: gen = gen_aarch64_zip1v2di; break; 9868 case V4SFmode: gen = gen_aarch64_zip1v4sf; break; 9869 case V2SFmode: gen = gen_aarch64_zip1v2sf; break; 9870 case V2DFmode: gen = gen_aarch64_zip1v2df; break; 9871 default: 9872 return false; 9873 } 9874 } 9875 9876 emit_insn (gen (out, in0, in1)); 9877 return true; 9878} 9879 9880/* Recognize patterns for the EXT insn. */ 9881 9882static bool 9883aarch64_evpc_ext (struct expand_vec_perm_d *d) 9884{ 9885 unsigned int i, nelt = d->nelt; 9886 rtx (*gen) (rtx, rtx, rtx, rtx); 9887 rtx offset; 9888 9889 unsigned int location = d->perm[0]; /* Always < nelt. */ 9890 9891 /* Check if the extracted indices are increasing by one. */ 9892 for (i = 1; i < nelt; i++) 9893 { 9894 unsigned int required = location + i; 9895 if (d->one_vector_p) 9896 { 9897 /* We'll pass the same vector in twice, so allow indices to wrap. */ 9898 required &= (nelt - 1); 9899 } 9900 if (d->perm[i] != required) 9901 return false; 9902 } 9903 9904 switch (d->vmode) 9905 { 9906 case V16QImode: gen = gen_aarch64_extv16qi; break; 9907 case V8QImode: gen = gen_aarch64_extv8qi; break; 9908 case V4HImode: gen = gen_aarch64_extv4hi; break; 9909 case V8HImode: gen = gen_aarch64_extv8hi; break; 9910 case V2SImode: gen = gen_aarch64_extv2si; break; 9911 case V4SImode: gen = gen_aarch64_extv4si; break; 9912 case V2SFmode: gen = gen_aarch64_extv2sf; break; 9913 case V4SFmode: gen = gen_aarch64_extv4sf; break; 9914 case V2DImode: gen = gen_aarch64_extv2di; break; 9915 case V2DFmode: gen = gen_aarch64_extv2df; break; 9916 default: 9917 return false; 9918 } 9919 9920 /* Success! */ 9921 if (d->testing_p) 9922 return true; 9923 9924 /* The case where (location == 0) is a no-op for both big- and little-endian, 9925 and is removed by the mid-end at optimization levels -O1 and higher. */ 9926 9927 if (BYTES_BIG_ENDIAN && (location != 0)) 9928 { 9929 /* After setup, we want the high elements of the first vector (stored 9930 at the LSB end of the register), and the low elements of the second 9931 vector (stored at the MSB end of the register). So swap. */ 9932 std::swap (d->op0, d->op1); 9933 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ 9934 location = nelt - location; 9935 } 9936 9937 offset = GEN_INT (location); 9938 emit_insn (gen (d->target, d->op0, d->op1, offset)); 9939 return true; 9940} 9941 9942/* Recognize patterns for the REV insns. */ 9943 9944static bool 9945aarch64_evpc_rev (struct expand_vec_perm_d *d) 9946{ 9947 unsigned int i, j, diff, nelt = d->nelt; 9948 rtx (*gen) (rtx, rtx); 9949 9950 if (!d->one_vector_p) 9951 return false; 9952 9953 diff = d->perm[0]; 9954 switch (diff) 9955 { 9956 case 7: 9957 switch (d->vmode) 9958 { 9959 case V16QImode: gen = gen_aarch64_rev64v16qi; break; 9960 case V8QImode: gen = gen_aarch64_rev64v8qi; break; 9961 default: 9962 return false; 9963 } 9964 break; 9965 case 3: 9966 switch (d->vmode) 9967 { 9968 case V16QImode: gen = gen_aarch64_rev32v16qi; break; 9969 case V8QImode: gen = gen_aarch64_rev32v8qi; break; 9970 case V8HImode: gen = gen_aarch64_rev64v8hi; break; 9971 case V4HImode: gen = gen_aarch64_rev64v4hi; break; 9972 default: 9973 return false; 9974 } 9975 break; 9976 case 1: 9977 switch (d->vmode) 9978 { 9979 case V16QImode: gen = gen_aarch64_rev16v16qi; break; 9980 case V8QImode: gen = gen_aarch64_rev16v8qi; break; 9981 case V8HImode: gen = gen_aarch64_rev32v8hi; break; 9982 case V4HImode: gen = gen_aarch64_rev32v4hi; break; 9983 case V4SImode: gen = gen_aarch64_rev64v4si; break; 9984 case V2SImode: gen = gen_aarch64_rev64v2si; break; 9985 case V4SFmode: gen = gen_aarch64_rev64v4sf; break; 9986 case V2SFmode: gen = gen_aarch64_rev64v2sf; break; 9987 default: 9988 return false; 9989 } 9990 break; 9991 default: 9992 return false; 9993 } 9994 9995 for (i = 0; i < nelt ; i += diff + 1) 9996 for (j = 0; j <= diff; j += 1) 9997 { 9998 /* This is guaranteed to be true as the value of diff 9999 is 7, 3, 1 and we should have enough elements in the 10000 queue to generate this. Getting a vector mask with a 10001 value of diff other than these values implies that 10002 something is wrong by the time we get here. */ 10003 gcc_assert (i + j < nelt); 10004 if (d->perm[i + j] != i + diff - j) 10005 return false; 10006 } 10007 10008 /* Success! */ 10009 if (d->testing_p) 10010 return true; 10011 10012 emit_insn (gen (d->target, d->op0)); 10013 return true; 10014} 10015 10016static bool 10017aarch64_evpc_dup (struct expand_vec_perm_d *d) 10018{ 10019 rtx (*gen) (rtx, rtx, rtx); 10020 rtx out = d->target; 10021 rtx in0; 10022 machine_mode vmode = d->vmode; 10023 unsigned int i, elt, nelt = d->nelt; 10024 rtx lane; 10025 10026 elt = d->perm[0]; 10027 for (i = 1; i < nelt; i++) 10028 { 10029 if (elt != d->perm[i]) 10030 return false; 10031 } 10032 10033 /* The generic preparation in aarch64_expand_vec_perm_const_1 10034 swaps the operand order and the permute indices if it finds 10035 d->perm[0] to be in the second operand. Thus, we can always 10036 use d->op0 and need not do any extra arithmetic to get the 10037 correct lane number. */ 10038 in0 = d->op0; 10039 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */ 10040 10041 switch (vmode) 10042 { 10043 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break; 10044 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break; 10045 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break; 10046 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break; 10047 case V4SImode: gen = gen_aarch64_dup_lanev4si; break; 10048 case V2SImode: gen = gen_aarch64_dup_lanev2si; break; 10049 case V2DImode: gen = gen_aarch64_dup_lanev2di; break; 10050 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break; 10051 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break; 10052 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break; 10053 default: 10054 return false; 10055 } 10056 10057 emit_insn (gen (out, in0, lane)); 10058 return true; 10059} 10060 10061static bool 10062aarch64_evpc_tbl (struct expand_vec_perm_d *d) 10063{ 10064 rtx rperm[MAX_VECT_LEN], sel; 10065 machine_mode vmode = d->vmode; 10066 unsigned int i, nelt = d->nelt; 10067 10068 if (d->testing_p) 10069 return true; 10070 10071 /* Generic code will try constant permutation twice. Once with the 10072 original mode and again with the elements lowered to QImode. 10073 So wait and don't do the selector expansion ourselves. */ 10074 if (vmode != V8QImode && vmode != V16QImode) 10075 return false; 10076 10077 for (i = 0; i < nelt; ++i) 10078 { 10079 int nunits = GET_MODE_NUNITS (vmode); 10080 10081 /* If big-endian and two vectors we end up with a weird mixed-endian 10082 mode on NEON. Reverse the index within each word but not the word 10083 itself. */ 10084 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) 10085 : d->perm[i]); 10086 } 10087 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); 10088 sel = force_reg (vmode, sel); 10089 10090 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel); 10091 return true; 10092} 10093 10094static bool 10095aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) 10096{ 10097 /* The pattern matching functions above are written to look for a small 10098 number to begin the sequence (0, 1, N/2). If we begin with an index 10099 from the second operand, we can swap the operands. */ 10100 if (d->perm[0] >= d->nelt) 10101 { 10102 unsigned i, nelt = d->nelt; 10103 10104 gcc_assert (nelt == (nelt & -nelt)); 10105 for (i = 0; i < nelt; ++i) 10106 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */ 10107 10108 std::swap (d->op0, d->op1); 10109 } 10110 10111 if (TARGET_SIMD) 10112 { 10113 if (aarch64_evpc_rev (d)) 10114 return true; 10115 else if (aarch64_evpc_ext (d)) 10116 return true; 10117 else if (aarch64_evpc_dup (d)) 10118 return true; 10119 else if (aarch64_evpc_zip (d)) 10120 return true; 10121 else if (aarch64_evpc_uzp (d)) 10122 return true; 10123 else if (aarch64_evpc_trn (d)) 10124 return true; 10125 return aarch64_evpc_tbl (d); 10126 } 10127 return false; 10128} 10129 10130/* Expand a vec_perm_const pattern. */ 10131 10132bool 10133aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel) 10134{ 10135 struct expand_vec_perm_d d; 10136 int i, nelt, which; 10137 10138 d.target = target; 10139 d.op0 = op0; 10140 d.op1 = op1; 10141 10142 d.vmode = GET_MODE (target); 10143 gcc_assert (VECTOR_MODE_P (d.vmode)); 10144 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 10145 d.testing_p = false; 10146 10147 for (i = which = 0; i < nelt; ++i) 10148 { 10149 rtx e = XVECEXP (sel, 0, i); 10150 int ei = INTVAL (e) & (2 * nelt - 1); 10151 which |= (ei < nelt ? 1 : 2); 10152 d.perm[i] = ei; 10153 } 10154 10155 switch (which) 10156 { 10157 default: 10158 gcc_unreachable (); 10159 10160 case 3: 10161 d.one_vector_p = false; 10162 if (!rtx_equal_p (op0, op1)) 10163 break; 10164 10165 /* The elements of PERM do not suggest that only the first operand 10166 is used, but both operands are identical. Allow easier matching 10167 of the permutation by folding the permutation into the single 10168 input vector. */ 10169 /* Fall Through. */ 10170 case 2: 10171 for (i = 0; i < nelt; ++i) 10172 d.perm[i] &= nelt - 1; 10173 d.op0 = op1; 10174 d.one_vector_p = true; 10175 break; 10176 10177 case 1: 10178 d.op1 = op0; 10179 d.one_vector_p = true; 10180 break; 10181 } 10182 10183 return aarch64_expand_vec_perm_const_1 (&d); 10184} 10185 10186static bool 10187aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, 10188 const unsigned char *sel) 10189{ 10190 struct expand_vec_perm_d d; 10191 unsigned int i, nelt, which; 10192 bool ret; 10193 10194 d.vmode = vmode; 10195 d.nelt = nelt = GET_MODE_NUNITS (d.vmode); 10196 d.testing_p = true; 10197 memcpy (d.perm, sel, nelt); 10198 10199 /* Calculate whether all elements are in one vector. */ 10200 for (i = which = 0; i < nelt; ++i) 10201 { 10202 unsigned char e = d.perm[i]; 10203 gcc_assert (e < 2 * nelt); 10204 which |= (e < nelt ? 1 : 2); 10205 } 10206 10207 /* If all elements are from the second vector, reindex as if from the 10208 first vector. */ 10209 if (which == 2) 10210 for (i = 0; i < nelt; ++i) 10211 d.perm[i] -= nelt; 10212 10213 /* Check whether the mask can be applied to a single vector. */ 10214 d.one_vector_p = (which != 3); 10215 10216 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); 10217 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); 10218 if (!d.one_vector_p) 10219 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); 10220 10221 start_sequence (); 10222 ret = aarch64_expand_vec_perm_const_1 (&d); 10223 end_sequence (); 10224 10225 return ret; 10226} 10227 10228/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */ 10229bool 10230aarch64_cannot_change_mode_class (machine_mode from, 10231 machine_mode to, 10232 enum reg_class rclass) 10233{ 10234 /* We cannot allow word_mode subregs of full vector modes. 10235 Otherwise the middle-end will assume it's ok to store to 10236 (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits 10237 of the 128-bit register. However, after reload the subreg will 10238 be dropped leaving a plain DImode store. See PR67609 for a more 10239 detailed dicussion. In all other cases, we want to be permissive 10240 and return false. */ 10241 return (reg_classes_intersect_p (FP_REGS, rclass) 10242 && GET_MODE_SIZE (to) == UNITS_PER_WORD 10243 && GET_MODE_SIZE (from) > UNITS_PER_WORD); 10244} 10245 10246rtx 10247aarch64_reverse_mask (enum machine_mode mode) 10248{ 10249 /* We have to reverse each vector because we dont have 10250 a permuted load that can reverse-load according to ABI rules. */ 10251 rtx mask; 10252 rtvec v = rtvec_alloc (16); 10253 int i, j; 10254 int nunits = GET_MODE_NUNITS (mode); 10255 int usize = GET_MODE_UNIT_SIZE (mode); 10256 10257 gcc_assert (BYTES_BIG_ENDIAN); 10258 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode)); 10259 10260 for (i = 0; i < nunits; i++) 10261 for (j = 0; j < usize; j++) 10262 RTVEC_ELT (v, i * usize + j) = GEN_INT ((i + 1) * usize - 1 - j); 10263 mask = gen_rtx_CONST_VECTOR (V16QImode, v); 10264 return force_reg (V16QImode, mask); 10265} 10266 10267/* Implement MODES_TIEABLE_P. */ 10268 10269bool 10270aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2) 10271{ 10272 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) 10273 return true; 10274 10275 /* We specifically want to allow elements of "structure" modes to 10276 be tieable to the structure. This more general condition allows 10277 other rarer situations too. */ 10278 if (TARGET_SIMD 10279 && aarch64_vector_mode_p (mode1) 10280 && aarch64_vector_mode_p (mode2)) 10281 return true; 10282 10283 return false; 10284} 10285 10286/* Return a new RTX holding the result of moving POINTER forward by 10287 AMOUNT bytes. */ 10288 10289static rtx 10290aarch64_move_pointer (rtx pointer, int amount) 10291{ 10292 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); 10293 10294 return adjust_automodify_address (pointer, GET_MODE (pointer), 10295 next, amount); 10296} 10297 10298/* Return a new RTX holding the result of moving POINTER forward by the 10299 size of the mode it points to. */ 10300 10301static rtx 10302aarch64_progress_pointer (rtx pointer) 10303{ 10304 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); 10305 10306 return aarch64_move_pointer (pointer, amount); 10307} 10308 10309/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by 10310 MODE bytes. */ 10311 10312static void 10313aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, 10314 machine_mode mode) 10315{ 10316 rtx reg = gen_reg_rtx (mode); 10317 10318 /* "Cast" the pointers to the correct mode. */ 10319 *src = adjust_address (*src, mode, 0); 10320 *dst = adjust_address (*dst, mode, 0); 10321 /* Emit the memcpy. */ 10322 emit_move_insn (reg, *src); 10323 emit_move_insn (*dst, reg); 10324 /* Move the pointers forward. */ 10325 *src = aarch64_progress_pointer (*src); 10326 *dst = aarch64_progress_pointer (*dst); 10327} 10328 10329/* Expand movmem, as if from a __builtin_memcpy. Return true if 10330 we succeed, otherwise return false. */ 10331 10332bool 10333aarch64_expand_movmem (rtx *operands) 10334{ 10335 unsigned int n; 10336 rtx dst = operands[0]; 10337 rtx src = operands[1]; 10338 rtx base; 10339 bool speed_p = !optimize_function_for_size_p (cfun); 10340 10341 /* When optimizing for size, give a better estimate of the length of a 10342 memcpy call, but use the default otherwise. */ 10343 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2; 10344 10345 /* We can't do anything smart if the amount to copy is not constant. */ 10346 if (!CONST_INT_P (operands[2])) 10347 return false; 10348 10349 n = UINTVAL (operands[2]); 10350 10351 /* Try to keep the number of instructions low. For cases below 16 bytes we 10352 need to make at most two moves. For cases above 16 bytes it will be one 10353 move for each 16 byte chunk, then at most two additional moves. */ 10354 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions) 10355 return false; 10356 10357 base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 10358 dst = adjust_automodify_address (dst, VOIDmode, base, 0); 10359 10360 base = copy_to_mode_reg (Pmode, XEXP (src, 0)); 10361 src = adjust_automodify_address (src, VOIDmode, base, 0); 10362 10363 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a 10364 1-byte chunk. */ 10365 if (n < 4) 10366 { 10367 if (n >= 2) 10368 { 10369 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); 10370 n -= 2; 10371 } 10372 10373 if (n == 1) 10374 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); 10375 10376 return true; 10377 } 10378 10379 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second 10380 4-byte chunk, partially overlapping with the previously copied chunk. */ 10381 if (n < 8) 10382 { 10383 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); 10384 n -= 4; 10385 if (n > 0) 10386 { 10387 int move = n - 4; 10388 10389 src = aarch64_move_pointer (src, move); 10390 dst = aarch64_move_pointer (dst, move); 10391 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); 10392 } 10393 return true; 10394 } 10395 10396 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of 10397 them, then (if applicable) an 8-byte chunk. */ 10398 while (n >= 8) 10399 { 10400 if (n / 16) 10401 { 10402 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode); 10403 n -= 16; 10404 } 10405 else 10406 { 10407 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); 10408 n -= 8; 10409 } 10410 } 10411 10412 /* Finish the final bytes of the copy. We can always do this in one 10413 instruction. We either copy the exact amount we need, or partially 10414 overlap with the previous chunk we copied and copy 8-bytes. */ 10415 if (n == 0) 10416 return true; 10417 else if (n == 1) 10418 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); 10419 else if (n == 2) 10420 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); 10421 else if (n == 4) 10422 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); 10423 else 10424 { 10425 if (n == 3) 10426 { 10427 src = aarch64_move_pointer (src, -1); 10428 dst = aarch64_move_pointer (dst, -1); 10429 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); 10430 } 10431 else 10432 { 10433 int move = n - 8; 10434 10435 src = aarch64_move_pointer (src, move); 10436 dst = aarch64_move_pointer (dst, move); 10437 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); 10438 } 10439 } 10440 10441 return true; 10442} 10443 10444/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 10445 10446static unsigned HOST_WIDE_INT 10447aarch64_asan_shadow_offset (void) 10448{ 10449 return (HOST_WIDE_INT_1 << 36); 10450} 10451 10452static bool 10453aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, 10454 unsigned int align, 10455 enum by_pieces_operation op, 10456 bool speed_p) 10457{ 10458 /* STORE_BY_PIECES can be used when copying a constant string, but 10459 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR). 10460 For now we always fail this and let the move_by_pieces code copy 10461 the string from read-only memory. */ 10462 if (op == STORE_BY_PIECES) 10463 return false; 10464 10465 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p); 10466} 10467 10468static enum machine_mode 10469aarch64_code_to_ccmode (enum rtx_code code) 10470{ 10471 switch (code) 10472 { 10473 case NE: 10474 return CC_DNEmode; 10475 10476 case EQ: 10477 return CC_DEQmode; 10478 10479 case LE: 10480 return CC_DLEmode; 10481 10482 case LT: 10483 return CC_DLTmode; 10484 10485 case GE: 10486 return CC_DGEmode; 10487 10488 case GT: 10489 return CC_DGTmode; 10490 10491 case LEU: 10492 return CC_DLEUmode; 10493 10494 case LTU: 10495 return CC_DLTUmode; 10496 10497 case GEU: 10498 return CC_DGEUmode; 10499 10500 case GTU: 10501 return CC_DGTUmode; 10502 10503 default: 10504 return CCmode; 10505 } 10506} 10507 10508static rtx 10509aarch64_gen_ccmp_first (rtx *prep_seq, rtx *gen_seq, 10510 int code, tree treeop0, tree treeop1) 10511{ 10512 enum machine_mode op_mode, cmp_mode, cc_mode; 10513 rtx op0, op1, cmp, target; 10514 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0)); 10515 enum insn_code icode; 10516 struct expand_operand ops[4]; 10517 10518 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) code); 10519 if (cc_mode == CCmode) 10520 return NULL_RTX; 10521 10522 start_sequence (); 10523 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); 10524 10525 op_mode = GET_MODE (op0); 10526 if (op_mode == VOIDmode) 10527 op_mode = GET_MODE (op1); 10528 10529 switch (op_mode) 10530 { 10531 case QImode: 10532 case HImode: 10533 case SImode: 10534 cmp_mode = SImode; 10535 icode = CODE_FOR_cmpsi; 10536 break; 10537 10538 case DImode: 10539 cmp_mode = DImode; 10540 icode = CODE_FOR_cmpdi; 10541 break; 10542 10543 default: 10544 end_sequence (); 10545 return NULL_RTX; 10546 } 10547 10548 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp); 10549 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp); 10550 if (!op0 || !op1) 10551 { 10552 end_sequence (); 10553 return NULL_RTX; 10554 } 10555 *prep_seq = get_insns (); 10556 end_sequence (); 10557 10558 cmp = gen_rtx_fmt_ee ((enum rtx_code) code, cmp_mode, op0, op1); 10559 target = gen_rtx_REG (CCmode, CC_REGNUM); 10560 10561 create_output_operand (&ops[0], target, CCmode); 10562 create_fixed_operand (&ops[1], cmp); 10563 create_fixed_operand (&ops[2], op0); 10564 create_fixed_operand (&ops[3], op1); 10565 10566 start_sequence (); 10567 if (!maybe_expand_insn (icode, 4, ops)) 10568 { 10569 end_sequence (); 10570 return NULL_RTX; 10571 } 10572 *gen_seq = get_insns (); 10573 end_sequence (); 10574 10575 return gen_rtx_REG (cc_mode, CC_REGNUM); 10576} 10577 10578static rtx 10579aarch64_gen_ccmp_next (rtx *prep_seq, rtx *gen_seq, rtx prev, int cmp_code, 10580 tree treeop0, tree treeop1, int bit_code) 10581{ 10582 rtx op0, op1, cmp0, cmp1, target; 10583 enum machine_mode op_mode, cmp_mode, cc_mode; 10584 int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0)); 10585 enum insn_code icode = CODE_FOR_ccmp_andsi; 10586 struct expand_operand ops[6]; 10587 10588 cc_mode = aarch64_code_to_ccmode ((enum rtx_code) cmp_code); 10589 if (cc_mode == CCmode) 10590 return NULL_RTX; 10591 10592 push_to_sequence ((rtx_insn*) *prep_seq); 10593 expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); 10594 10595 op_mode = GET_MODE (op0); 10596 if (op_mode == VOIDmode) 10597 op_mode = GET_MODE (op1); 10598 10599 switch (op_mode) 10600 { 10601 case QImode: 10602 case HImode: 10603 case SImode: 10604 cmp_mode = SImode; 10605 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_andsi 10606 : CODE_FOR_ccmp_iorsi; 10607 break; 10608 10609 case DImode: 10610 cmp_mode = DImode; 10611 icode = (enum rtx_code) bit_code == AND ? CODE_FOR_ccmp_anddi 10612 : CODE_FOR_ccmp_iordi; 10613 break; 10614 10615 default: 10616 end_sequence (); 10617 return NULL_RTX; 10618 } 10619 10620 op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp); 10621 op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp); 10622 if (!op0 || !op1) 10623 { 10624 end_sequence (); 10625 return NULL_RTX; 10626 } 10627 *prep_seq = get_insns (); 10628 end_sequence (); 10629 10630 target = gen_rtx_REG (cc_mode, CC_REGNUM); 10631 cmp1 = gen_rtx_fmt_ee ((enum rtx_code) cmp_code, cmp_mode, op0, op1); 10632 cmp0 = gen_rtx_fmt_ee (NE, cmp_mode, prev, const0_rtx); 10633 10634 create_fixed_operand (&ops[0], prev); 10635 create_fixed_operand (&ops[1], target); 10636 create_fixed_operand (&ops[2], op0); 10637 create_fixed_operand (&ops[3], op1); 10638 create_fixed_operand (&ops[4], cmp0); 10639 create_fixed_operand (&ops[5], cmp1); 10640 10641 push_to_sequence ((rtx_insn*) *gen_seq); 10642 if (!maybe_expand_insn (icode, 6, ops)) 10643 { 10644 end_sequence (); 10645 return NULL_RTX; 10646 } 10647 10648 *gen_seq = get_insns (); 10649 end_sequence (); 10650 10651 return target; 10652} 10653 10654#undef TARGET_GEN_CCMP_FIRST 10655#define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first 10656 10657#undef TARGET_GEN_CCMP_NEXT 10658#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next 10659 10660/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports 10661 instruction fusion of some sort. */ 10662 10663static bool 10664aarch64_macro_fusion_p (void) 10665{ 10666 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING; 10667} 10668 10669 10670/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR 10671 should be kept together during scheduling. */ 10672 10673static bool 10674aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) 10675{ 10676 rtx set_dest; 10677 rtx prev_set = single_set (prev); 10678 rtx curr_set = single_set (curr); 10679 /* prev and curr are simple SET insns i.e. no flag setting or branching. */ 10680 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr); 10681 10682 if (!aarch64_macro_fusion_p ()) 10683 return false; 10684 10685 if (simple_sets_p 10686 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK)) 10687 { 10688 /* We are trying to match: 10689 prev (mov) == (set (reg r0) (const_int imm16)) 10690 curr (movk) == (set (zero_extract (reg r0) 10691 (const_int 16) 10692 (const_int 16)) 10693 (const_int imm16_1)) */ 10694 10695 set_dest = SET_DEST (curr_set); 10696 10697 if (GET_CODE (set_dest) == ZERO_EXTRACT 10698 && CONST_INT_P (SET_SRC (curr_set)) 10699 && CONST_INT_P (SET_SRC (prev_set)) 10700 && CONST_INT_P (XEXP (set_dest, 2)) 10701 && INTVAL (XEXP (set_dest, 2)) == 16 10702 && REG_P (XEXP (set_dest, 0)) 10703 && REG_P (SET_DEST (prev_set)) 10704 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) 10705 { 10706 return true; 10707 } 10708 } 10709 10710 if (simple_sets_p 10711 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD)) 10712 { 10713 10714 /* We're trying to match: 10715 prev (adrp) == (set (reg r1) 10716 (high (symbol_ref ("SYM")))) 10717 curr (add) == (set (reg r0) 10718 (lo_sum (reg r1) 10719 (symbol_ref ("SYM")))) 10720 Note that r0 need not necessarily be the same as r1, especially 10721 during pre-regalloc scheduling. */ 10722 10723 if (satisfies_constraint_Ush (SET_SRC (prev_set)) 10724 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set))) 10725 { 10726 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM 10727 && REG_P (XEXP (SET_SRC (curr_set), 0)) 10728 && REGNO (XEXP (SET_SRC (curr_set), 0)) 10729 == REGNO (SET_DEST (prev_set)) 10730 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0), 10731 XEXP (SET_SRC (curr_set), 1))) 10732 return true; 10733 } 10734 } 10735 10736 if (simple_sets_p 10737 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK)) 10738 { 10739 10740 /* We're trying to match: 10741 prev (movk) == (set (zero_extract (reg r0) 10742 (const_int 16) 10743 (const_int 32)) 10744 (const_int imm16_1)) 10745 curr (movk) == (set (zero_extract (reg r0) 10746 (const_int 16) 10747 (const_int 48)) 10748 (const_int imm16_2)) */ 10749 10750 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT 10751 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT 10752 && REG_P (XEXP (SET_DEST (prev_set), 0)) 10753 && REG_P (XEXP (SET_DEST (curr_set), 0)) 10754 && REGNO (XEXP (SET_DEST (prev_set), 0)) 10755 == REGNO (XEXP (SET_DEST (curr_set), 0)) 10756 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2)) 10757 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2)) 10758 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32 10759 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48 10760 && CONST_INT_P (SET_SRC (prev_set)) 10761 && CONST_INT_P (SET_SRC (curr_set))) 10762 return true; 10763 10764 } 10765 if (simple_sets_p 10766 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR)) 10767 { 10768 /* We're trying to match: 10769 prev (adrp) == (set (reg r0) 10770 (high (symbol_ref ("SYM")))) 10771 curr (ldr) == (set (reg r1) 10772 (mem (lo_sum (reg r0) 10773 (symbol_ref ("SYM"))))) 10774 or 10775 curr (ldr) == (set (reg r1) 10776 (zero_extend (mem 10777 (lo_sum (reg r0) 10778 (symbol_ref ("SYM")))))) */ 10779 if (satisfies_constraint_Ush (SET_SRC (prev_set)) 10780 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set))) 10781 { 10782 rtx curr_src = SET_SRC (curr_set); 10783 10784 if (GET_CODE (curr_src) == ZERO_EXTEND) 10785 curr_src = XEXP (curr_src, 0); 10786 10787 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM 10788 && REG_P (XEXP (XEXP (curr_src, 0), 0)) 10789 && REGNO (XEXP (XEXP (curr_src, 0), 0)) 10790 == REGNO (SET_DEST (prev_set)) 10791 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1), 10792 XEXP (SET_SRC (prev_set), 0))) 10793 return true; 10794 } 10795 } 10796 10797 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH) 10798 && any_condjump_p (curr)) 10799 { 10800 enum attr_type prev_type = get_attr_type (prev); 10801 10802 /* FIXME: this misses some which is considered simple arthematic 10803 instructions for ThunderX. Simple shifts are missed here. */ 10804 if (prev_type == TYPE_ALUS_SREG 10805 || prev_type == TYPE_ALUS_IMM 10806 || prev_type == TYPE_LOGICS_REG 10807 || prev_type == TYPE_LOGICS_IMM) 10808 return true; 10809 } 10810 10811 return false; 10812} 10813 10814/* If MEM is in the form of [base+offset], extract the two parts 10815 of address and set to BASE and OFFSET, otherwise return false 10816 after clearing BASE and OFFSET. */ 10817 10818bool 10819extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) 10820{ 10821 rtx addr; 10822 10823 gcc_assert (MEM_P (mem)); 10824 10825 addr = XEXP (mem, 0); 10826 10827 if (REG_P (addr)) 10828 { 10829 *base = addr; 10830 *offset = const0_rtx; 10831 return true; 10832 } 10833 10834 if (GET_CODE (addr) == PLUS 10835 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1))) 10836 { 10837 *base = XEXP (addr, 0); 10838 *offset = XEXP (addr, 1); 10839 return true; 10840 } 10841 10842 *base = NULL_RTX; 10843 *offset = NULL_RTX; 10844 10845 return false; 10846} 10847 10848/* Types for scheduling fusion. */ 10849enum sched_fusion_type 10850{ 10851 SCHED_FUSION_NONE = 0, 10852 SCHED_FUSION_LD_SIGN_EXTEND, 10853 SCHED_FUSION_LD_ZERO_EXTEND, 10854 SCHED_FUSION_LD, 10855 SCHED_FUSION_ST, 10856 SCHED_FUSION_NUM 10857}; 10858 10859/* If INSN is a load or store of address in the form of [base+offset], 10860 extract the two parts and set to BASE and OFFSET. Return scheduling 10861 fusion type this INSN is. */ 10862 10863static enum sched_fusion_type 10864fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset) 10865{ 10866 rtx x, dest, src; 10867 enum sched_fusion_type fusion = SCHED_FUSION_LD; 10868 10869 gcc_assert (INSN_P (insn)); 10870 x = PATTERN (insn); 10871 if (GET_CODE (x) != SET) 10872 return SCHED_FUSION_NONE; 10873 10874 src = SET_SRC (x); 10875 dest = SET_DEST (x); 10876 10877 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode 10878 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode) 10879 return SCHED_FUSION_NONE; 10880 10881 if (GET_CODE (src) == SIGN_EXTEND) 10882 { 10883 fusion = SCHED_FUSION_LD_SIGN_EXTEND; 10884 src = XEXP (src, 0); 10885 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode) 10886 return SCHED_FUSION_NONE; 10887 } 10888 else if (GET_CODE (src) == ZERO_EXTEND) 10889 { 10890 fusion = SCHED_FUSION_LD_ZERO_EXTEND; 10891 src = XEXP (src, 0); 10892 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode) 10893 return SCHED_FUSION_NONE; 10894 } 10895 10896 if (GET_CODE (src) == MEM && REG_P (dest)) 10897 extract_base_offset_in_addr (src, base, offset); 10898 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx)) 10899 { 10900 fusion = SCHED_FUSION_ST; 10901 extract_base_offset_in_addr (dest, base, offset); 10902 } 10903 else 10904 return SCHED_FUSION_NONE; 10905 10906 if (*base == NULL_RTX || *offset == NULL_RTX) 10907 fusion = SCHED_FUSION_NONE; 10908 10909 return fusion; 10910} 10911 10912/* Implement the TARGET_SCHED_FUSION_PRIORITY hook. 10913 10914 Currently we only support to fuse ldr or str instructions, so FUSION_PRI 10915 and PRI are only calculated for these instructions. For other instruction, 10916 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other 10917 type instruction fusion can be added by returning different priorities. 10918 10919 It's important that irrelevant instructions get the largest FUSION_PRI. */ 10920 10921static void 10922aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri, 10923 int *fusion_pri, int *pri) 10924{ 10925 int tmp, off_val; 10926 rtx base, offset; 10927 enum sched_fusion_type fusion; 10928 10929 gcc_assert (INSN_P (insn)); 10930 10931 tmp = max_pri - 1; 10932 fusion = fusion_load_store (insn, &base, &offset); 10933 if (fusion == SCHED_FUSION_NONE) 10934 { 10935 *pri = tmp; 10936 *fusion_pri = tmp; 10937 return; 10938 } 10939 10940 /* Set FUSION_PRI according to fusion type and base register. */ 10941 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base); 10942 10943 /* Calculate PRI. */ 10944 tmp /= 2; 10945 10946 /* INSN with smaller offset goes first. */ 10947 off_val = (int)(INTVAL (offset)); 10948 if (off_val >= 0) 10949 tmp -= (off_val & 0xfffff); 10950 else 10951 tmp += ((- off_val) & 0xfffff); 10952 10953 *pri = tmp; 10954 return; 10955} 10956 10957/* Given OPERANDS of consecutive load/store, check if we can merge 10958 them into ldp/stp. LOAD is true if they are load instructions. 10959 MODE is the mode of memory operands. */ 10960 10961bool 10962aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, 10963 enum machine_mode mode) 10964{ 10965 HOST_WIDE_INT offval_1, offval_2, msize; 10966 enum reg_class rclass_1, rclass_2; 10967 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; 10968 10969 if (load) 10970 { 10971 mem_1 = operands[1]; 10972 mem_2 = operands[3]; 10973 reg_1 = operands[0]; 10974 reg_2 = operands[2]; 10975 gcc_assert (REG_P (reg_1) && REG_P (reg_2)); 10976 if (REGNO (reg_1) == REGNO (reg_2)) 10977 return false; 10978 } 10979 else 10980 { 10981 mem_1 = operands[0]; 10982 mem_2 = operands[2]; 10983 reg_1 = operands[1]; 10984 reg_2 = operands[3]; 10985 } 10986 10987 /* The mems cannot be volatile. */ 10988 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)) 10989 return false; 10990 10991 /* Check if the addresses are in the form of [base+offset]. */ 10992 extract_base_offset_in_addr (mem_1, &base_1, &offset_1); 10993 if (base_1 == NULL_RTX || offset_1 == NULL_RTX) 10994 return false; 10995 extract_base_offset_in_addr (mem_2, &base_2, &offset_2); 10996 if (base_2 == NULL_RTX || offset_2 == NULL_RTX) 10997 return false; 10998 10999 /* Check if the bases are same. */ 11000 if (!rtx_equal_p (base_1, base_2)) 11001 return false; 11002 11003 offval_1 = INTVAL (offset_1); 11004 offval_2 = INTVAL (offset_2); 11005 msize = GET_MODE_SIZE (mode); 11006 /* Check if the offsets are consecutive. */ 11007 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) 11008 return false; 11009 11010 /* Check if the addresses are clobbered by load. */ 11011 if (load) 11012 { 11013 if (reg_mentioned_p (reg_1, mem_1)) 11014 return false; 11015 11016 /* In increasing order, the last load can clobber the address. */ 11017 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2)) 11018 return false; 11019 } 11020 11021 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) 11022 rclass_1 = FP_REGS; 11023 else 11024 rclass_1 = GENERAL_REGS; 11025 11026 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2))) 11027 rclass_2 = FP_REGS; 11028 else 11029 rclass_2 = GENERAL_REGS; 11030 11031 /* Check if the registers are of same class. */ 11032 if (rclass_1 != rclass_2) 11033 return false; 11034 11035 return true; 11036} 11037 11038/* Given OPERANDS of consecutive load/store, check if we can merge 11039 them into ldp/stp by adjusting the offset. LOAD is true if they 11040 are load instructions. MODE is the mode of memory operands. 11041 11042 Given below consecutive stores: 11043 11044 str w1, [xb, 0x100] 11045 str w1, [xb, 0x104] 11046 str w1, [xb, 0x108] 11047 str w1, [xb, 0x10c] 11048 11049 Though the offsets are out of the range supported by stp, we can 11050 still pair them after adjusting the offset, like: 11051 11052 add scratch, xb, 0x100 11053 stp w1, w1, [scratch] 11054 stp w1, w1, [scratch, 0x8] 11055 11056 The peephole patterns detecting this opportunity should guarantee 11057 the scratch register is avaliable. */ 11058 11059bool 11060aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, 11061 enum machine_mode mode) 11062{ 11063 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4; 11064 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize; 11065 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4; 11066 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4; 11067 11068 if (load) 11069 { 11070 reg_1 = operands[0]; 11071 mem_1 = operands[1]; 11072 reg_2 = operands[2]; 11073 mem_2 = operands[3]; 11074 reg_3 = operands[4]; 11075 mem_3 = operands[5]; 11076 reg_4 = operands[6]; 11077 mem_4 = operands[7]; 11078 gcc_assert (REG_P (reg_1) && REG_P (reg_2) 11079 && REG_P (reg_3) && REG_P (reg_4)); 11080 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4)) 11081 return false; 11082 } 11083 else 11084 { 11085 mem_1 = operands[0]; 11086 reg_1 = operands[1]; 11087 mem_2 = operands[2]; 11088 reg_2 = operands[3]; 11089 mem_3 = operands[4]; 11090 reg_3 = operands[5]; 11091 mem_4 = operands[6]; 11092 reg_4 = operands[7]; 11093 } 11094 /* Skip if memory operand is by itslef valid for ldp/stp. */ 11095 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode)) 11096 return false; 11097 11098 /* The mems cannot be volatile. */ 11099 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2) 11100 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4)) 11101 return false; 11102 11103 /* Check if the addresses are in the form of [base+offset]. */ 11104 extract_base_offset_in_addr (mem_1, &base_1, &offset_1); 11105 if (base_1 == NULL_RTX || offset_1 == NULL_RTX) 11106 return false; 11107 extract_base_offset_in_addr (mem_2, &base_2, &offset_2); 11108 if (base_2 == NULL_RTX || offset_2 == NULL_RTX) 11109 return false; 11110 extract_base_offset_in_addr (mem_3, &base_3, &offset_3); 11111 if (base_3 == NULL_RTX || offset_3 == NULL_RTX) 11112 return false; 11113 extract_base_offset_in_addr (mem_4, &base_4, &offset_4); 11114 if (base_4 == NULL_RTX || offset_4 == NULL_RTX) 11115 return false; 11116 11117 /* Check if the bases are same. */ 11118 if (!rtx_equal_p (base_1, base_2) 11119 || !rtx_equal_p (base_2, base_3) 11120 || !rtx_equal_p (base_3, base_4)) 11121 return false; 11122 11123 offval_1 = INTVAL (offset_1); 11124 offval_2 = INTVAL (offset_2); 11125 offval_3 = INTVAL (offset_3); 11126 offval_4 = INTVAL (offset_4); 11127 msize = GET_MODE_SIZE (mode); 11128 /* Check if the offsets are consecutive. */ 11129 if ((offval_1 != (offval_2 + msize) 11130 || offval_1 != (offval_3 + msize * 2) 11131 || offval_1 != (offval_4 + msize * 3)) 11132 && (offval_4 != (offval_3 + msize) 11133 || offval_4 != (offval_2 + msize * 2) 11134 || offval_4 != (offval_1 + msize * 3))) 11135 return false; 11136 11137 /* Check if the addresses are clobbered by load. */ 11138 if (load) 11139 { 11140 if (reg_mentioned_p (reg_1, mem_1) 11141 || reg_mentioned_p (reg_2, mem_2) 11142 || reg_mentioned_p (reg_3, mem_3)) 11143 return false; 11144 11145 /* In increasing order, the last load can clobber the address. */ 11146 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4)) 11147 return false; 11148 } 11149 11150 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) 11151 rclass_1 = FP_REGS; 11152 else 11153 rclass_1 = GENERAL_REGS; 11154 11155 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2))) 11156 rclass_2 = FP_REGS; 11157 else 11158 rclass_2 = GENERAL_REGS; 11159 11160 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3))) 11161 rclass_3 = FP_REGS; 11162 else 11163 rclass_3 = GENERAL_REGS; 11164 11165 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4))) 11166 rclass_4 = FP_REGS; 11167 else 11168 rclass_4 = GENERAL_REGS; 11169 11170 /* Check if the registers are of same class. */ 11171 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4) 11172 return false; 11173 11174 return true; 11175} 11176 11177/* Given OPERANDS of consecutive load/store, this function pairs them 11178 into ldp/stp after adjusting the offset. It depends on the fact 11179 that addresses of load/store instructions are in increasing order. 11180 MODE is the mode of memory operands. CODE is the rtl operator 11181 which should be applied to all memory operands, it's SIGN_EXTEND, 11182 ZERO_EXTEND or UNKNOWN. */ 11183 11184bool 11185aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, 11186 enum machine_mode mode, RTX_CODE code) 11187{ 11188 rtx base, offset, t1, t2; 11189 rtx mem_1, mem_2, mem_3, mem_4; 11190 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize; 11191 11192 if (load) 11193 { 11194 mem_1 = operands[1]; 11195 mem_2 = operands[3]; 11196 mem_3 = operands[5]; 11197 mem_4 = operands[7]; 11198 } 11199 else 11200 { 11201 mem_1 = operands[0]; 11202 mem_2 = operands[2]; 11203 mem_3 = operands[4]; 11204 mem_4 = operands[6]; 11205 gcc_assert (code == UNKNOWN); 11206 } 11207 11208 extract_base_offset_in_addr (mem_1, &base, &offset); 11209 gcc_assert (base != NULL_RTX && offset != NULL_RTX); 11210 11211 /* Adjust offset thus it can fit in ldp/stp instruction. */ 11212 msize = GET_MODE_SIZE (mode); 11213 stp_off_limit = msize * 0x40; 11214 off_val = INTVAL (offset); 11215 abs_off = (off_val < 0) ? -off_val : off_val; 11216 new_off = abs_off % stp_off_limit; 11217 adj_off = abs_off - new_off; 11218 11219 /* Further adjust to make sure all offsets are OK. */ 11220 if ((new_off + msize * 2) >= stp_off_limit) 11221 { 11222 adj_off += stp_off_limit; 11223 new_off -= stp_off_limit; 11224 } 11225 11226 /* Make sure the adjustment can be done with ADD/SUB instructions. */ 11227 if (adj_off >= 0x1000) 11228 return false; 11229 11230 if (off_val < 0) 11231 { 11232 adj_off = -adj_off; 11233 new_off = -new_off; 11234 } 11235 11236 /* Create new memory references. */ 11237 mem_1 = change_address (mem_1, VOIDmode, 11238 plus_constant (DImode, operands[8], new_off)); 11239 11240 /* Check if the adjusted address is OK for ldp/stp. */ 11241 if (!aarch64_mem_pair_operand (mem_1, mode)) 11242 return false; 11243 11244 msize = GET_MODE_SIZE (mode); 11245 mem_2 = change_address (mem_2, VOIDmode, 11246 plus_constant (DImode, 11247 operands[8], 11248 new_off + msize)); 11249 mem_3 = change_address (mem_3, VOIDmode, 11250 plus_constant (DImode, 11251 operands[8], 11252 new_off + msize * 2)); 11253 mem_4 = change_address (mem_4, VOIDmode, 11254 plus_constant (DImode, 11255 operands[8], 11256 new_off + msize * 3)); 11257 11258 if (code == ZERO_EXTEND) 11259 { 11260 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1); 11261 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2); 11262 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3); 11263 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4); 11264 } 11265 else if (code == SIGN_EXTEND) 11266 { 11267 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1); 11268 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2); 11269 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3); 11270 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4); 11271 } 11272 11273 if (load) 11274 { 11275 operands[1] = mem_1; 11276 operands[3] = mem_2; 11277 operands[5] = mem_3; 11278 operands[7] = mem_4; 11279 } 11280 else 11281 { 11282 operands[0] = mem_1; 11283 operands[2] = mem_2; 11284 operands[4] = mem_3; 11285 operands[6] = mem_4; 11286 } 11287 11288 /* Emit adjusting instruction. */ 11289 emit_insn (gen_rtx_SET (VOIDmode, operands[8], 11290 plus_constant (DImode, base, adj_off))); 11291 /* Emit ldp/stp instructions. */ 11292 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]); 11293 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]); 11294 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 11295 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]); 11296 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]); 11297 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2))); 11298 return true; 11299} 11300 11301#undef TARGET_ADDRESS_COST 11302#define TARGET_ADDRESS_COST aarch64_address_cost 11303 11304/* This hook will determines whether unnamed bitfields affect the alignment 11305 of the containing structure. The hook returns true if the structure 11306 should inherit the alignment requirements of an unnamed bitfield's 11307 type. */ 11308#undef TARGET_ALIGN_ANON_BITFIELD 11309#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true 11310 11311#undef TARGET_ASM_ALIGNED_DI_OP 11312#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t" 11313 11314#undef TARGET_ASM_ALIGNED_HI_OP 11315#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" 11316 11317#undef TARGET_ASM_ALIGNED_SI_OP 11318#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" 11319 11320#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 11321#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ 11322 hook_bool_const_tree_hwi_hwi_const_tree_true 11323 11324#undef TARGET_ASM_FILE_START 11325#define TARGET_ASM_FILE_START aarch64_start_file 11326 11327#undef TARGET_ASM_OUTPUT_MI_THUNK 11328#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk 11329 11330#undef TARGET_ASM_SELECT_RTX_SECTION 11331#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section 11332 11333#undef TARGET_ASM_TRAMPOLINE_TEMPLATE 11334#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template 11335 11336#undef TARGET_BUILD_BUILTIN_VA_LIST 11337#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list 11338 11339#undef TARGET_CALLEE_COPIES 11340#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false 11341 11342#undef TARGET_CAN_ELIMINATE 11343#define TARGET_CAN_ELIMINATE aarch64_can_eliminate 11344 11345#undef TARGET_CANNOT_FORCE_CONST_MEM 11346#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem 11347 11348#undef TARGET_CONDITIONAL_REGISTER_USAGE 11349#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage 11350 11351/* Only the least significant bit is used for initialization guard 11352 variables. */ 11353#undef TARGET_CXX_GUARD_MASK_BIT 11354#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true 11355 11356#undef TARGET_C_MODE_FOR_SUFFIX 11357#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix 11358 11359#ifdef TARGET_BIG_ENDIAN_DEFAULT 11360#undef TARGET_DEFAULT_TARGET_FLAGS 11361#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END) 11362#endif 11363 11364#undef TARGET_CLASS_MAX_NREGS 11365#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs 11366 11367#undef TARGET_BUILTIN_DECL 11368#define TARGET_BUILTIN_DECL aarch64_builtin_decl 11369 11370#undef TARGET_EXPAND_BUILTIN 11371#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin 11372 11373#undef TARGET_EXPAND_BUILTIN_VA_START 11374#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start 11375 11376#undef TARGET_FOLD_BUILTIN 11377#define TARGET_FOLD_BUILTIN aarch64_fold_builtin 11378 11379#undef TARGET_FUNCTION_ARG 11380#define TARGET_FUNCTION_ARG aarch64_function_arg 11381 11382#undef TARGET_FUNCTION_ARG_ADVANCE 11383#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance 11384 11385#undef TARGET_FUNCTION_ARG_BOUNDARY 11386#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary 11387 11388#undef TARGET_FUNCTION_OK_FOR_SIBCALL 11389#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall 11390 11391#undef TARGET_FUNCTION_VALUE 11392#define TARGET_FUNCTION_VALUE aarch64_function_value 11393 11394#undef TARGET_FUNCTION_VALUE_REGNO_P 11395#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p 11396 11397#undef TARGET_FRAME_POINTER_REQUIRED 11398#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required 11399 11400#undef TARGET_GIMPLE_FOLD_BUILTIN 11401#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin 11402 11403#undef TARGET_GIMPLIFY_VA_ARG_EXPR 11404#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr 11405 11406#undef TARGET_INIT_BUILTINS 11407#define TARGET_INIT_BUILTINS aarch64_init_builtins 11408 11409#undef TARGET_LEGITIMATE_ADDRESS_P 11410#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p 11411 11412#undef TARGET_LEGITIMATE_CONSTANT_P 11413#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p 11414 11415#undef TARGET_LIBGCC_CMP_RETURN_MODE 11416#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode 11417 11418#undef TARGET_LRA_P 11419#define TARGET_LRA_P hook_bool_void_true 11420 11421#undef TARGET_MANGLE_TYPE 11422#define TARGET_MANGLE_TYPE aarch64_mangle_type 11423 11424#undef TARGET_MEMORY_MOVE_COST 11425#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost 11426 11427#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL 11428#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul 11429 11430#undef TARGET_MUST_PASS_IN_STACK 11431#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 11432 11433/* This target hook should return true if accesses to volatile bitfields 11434 should use the narrowest mode possible. It should return false if these 11435 accesses should use the bitfield container type. */ 11436#undef TARGET_NARROW_VOLATILE_BITFIELD 11437#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false 11438 11439#undef TARGET_OPTION_OVERRIDE 11440#define TARGET_OPTION_OVERRIDE aarch64_override_options 11441 11442#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 11443#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ 11444 aarch64_override_options_after_change 11445 11446#undef TARGET_PASS_BY_REFERENCE 11447#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference 11448 11449#undef TARGET_PREFERRED_RELOAD_CLASS 11450#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class 11451 11452#undef TARGET_SCHED_REASSOCIATION_WIDTH 11453#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width 11454 11455#undef TARGET_SECONDARY_RELOAD 11456#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload 11457 11458#undef TARGET_SHIFT_TRUNCATION_MASK 11459#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask 11460 11461#undef TARGET_SETUP_INCOMING_VARARGS 11462#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs 11463 11464#undef TARGET_STRUCT_VALUE_RTX 11465#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx 11466 11467#undef TARGET_REGISTER_MOVE_COST 11468#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost 11469 11470#undef TARGET_RETURN_IN_MEMORY 11471#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory 11472 11473#undef TARGET_RETURN_IN_MSB 11474#define TARGET_RETURN_IN_MSB aarch64_return_in_msb 11475 11476#undef TARGET_RTX_COSTS 11477#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper 11478 11479#undef TARGET_SCHED_ISSUE_RATE 11480#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate 11481 11482#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 11483#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 11484 aarch64_sched_first_cycle_multipass_dfa_lookahead 11485 11486#undef TARGET_TRAMPOLINE_INIT 11487#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init 11488 11489#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 11490#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p 11491 11492#undef TARGET_VECTOR_MODE_SUPPORTED_P 11493#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p 11494 11495#undef TARGET_ARRAY_MODE_SUPPORTED_P 11496#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p 11497 11498#undef TARGET_VECTORIZE_ADD_STMT_COST 11499#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost 11500 11501#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 11502#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 11503 aarch64_builtin_vectorization_cost 11504 11505#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 11506#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode 11507 11508#undef TARGET_VECTORIZE_BUILTINS 11509#define TARGET_VECTORIZE_BUILTINS 11510 11511#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 11512#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 11513 aarch64_builtin_vectorized_function 11514 11515#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES 11516#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ 11517 aarch64_autovectorize_vector_sizes 11518 11519#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 11520#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ 11521 aarch64_atomic_assign_expand_fenv 11522 11523/* Section anchor support. */ 11524 11525#undef TARGET_MIN_ANCHOR_OFFSET 11526#define TARGET_MIN_ANCHOR_OFFSET -256 11527 11528/* Limit the maximum anchor offset to 4k-1, since that's the limit for a 11529 byte offset; we can do much more for larger data types, but have no way 11530 to determine the size of the access. We assume accesses are aligned. */ 11531#undef TARGET_MAX_ANCHOR_OFFSET 11532#define TARGET_MAX_ANCHOR_OFFSET 4095 11533 11534#undef TARGET_VECTOR_ALIGNMENT 11535#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment 11536 11537#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 11538#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ 11539 aarch64_simd_vector_alignment_reachable 11540 11541/* vec_perm support. */ 11542 11543#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK 11544#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ 11545 aarch64_vectorize_vec_perm_const_ok 11546 11547 11548#undef TARGET_FIXED_CONDITION_CODE_REGS 11549#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs 11550 11551#undef TARGET_FLAGS_REGNUM 11552#define TARGET_FLAGS_REGNUM CC_REGNUM 11553 11554#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS 11555#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true 11556 11557#undef TARGET_ASAN_SHADOW_OFFSET 11558#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset 11559 11560#undef TARGET_LEGITIMIZE_ADDRESS 11561#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address 11562 11563#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P 11564#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ 11565 aarch64_use_by_pieces_infrastructure_p 11566 11567#undef TARGET_CAN_USE_DOLOOP_P 11568#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 11569 11570#undef TARGET_SCHED_MACRO_FUSION_P 11571#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p 11572 11573#undef TARGET_SCHED_MACRO_FUSION_PAIR_P 11574#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p 11575 11576#undef TARGET_SCHED_FUSION_PRIORITY 11577#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority 11578 11579#undef TARGET_RELAXED_ORDERING 11580#define TARGET_RELAXED_ORDERING true 11581 11582struct gcc_target targetm = TARGET_INITIALIZER; 11583 11584#include "gt-aarch64.h" 11585