1// SPDX-License-Identifier: GPL-3.0-or-later 2/* Subroutines used for code generation on IBM RS/6000. 3 Copyright (C) 1991-2022 Free Software Foundation, Inc. 4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published 10 by the Free Software Foundation; either version 3, or (at your 11 option) any later version. 12 13 GCC is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22#define IN_TARGET_CODE 1 23 24#include "config.h" 25#include "system.h" 26#include "coretypes.h" 27#include "backend.h" 28#include "rtl.h" 29#include "tree.h" 30#include "memmodel.h" 31#include "gimple.h" 32#include "cfghooks.h" 33#include "cfgloop.h" 34#include "df.h" 35#include "tm_p.h" 36#include "stringpool.h" 37#include "expmed.h" 38#include "optabs.h" 39#include "regs.h" 40#include "ira.h" 41#include "recog.h" 42#include "cgraph.h" 43#include "diagnostic-core.h" 44#include "insn-attr.h" 45#include "flags.h" 46#include "alias.h" 47#include "fold-const.h" 48#include "attribs.h" 49#include "stor-layout.h" 50#include "calls.h" 51#include "print-tree.h" 52#include "varasm.h" 53#include "explow.h" 54#include "expr.h" 55#include "output.h" 56#include "common/common-target.h" 57#include "langhooks.h" 58#include "reload.h" 59#include "sched-int.h" 60#include "gimplify.h" 61#include "gimple-fold.h" 62#include "gimple-iterator.h" 63#include "gimple-walk.h" 64#include "ssa.h" 65#include "tree-vectorizer.h" 66#include "tree-ssa-propagate.h" 67#include "intl.h" 68#include "tm-constrs.h" 69#include "target-globals.h" 70#include "builtins.h" 71#include "tree-vector-builder.h" 72#include "context.h" 73#include "tree-pass.h" 74#include "symbol-summary.h" 75#include "ipa-prop.h" 76#include "ipa-fnsummary.h" 77#include "except.h" 78#if TARGET_XCOFF 79#include "xcoffout.h" /* get declarations of xcoff_*_section_name */ 80#endif 81#include "case-cfn-macros.h" 82#include "ppc-auxv.h" 83#include "rs6000-internal.h" 84#include "opts.h" 85 86/* This file should be included last. */ 87#include "target-def.h" 88 89extern tree rs6000_builtin_mask_for_load (void); 90extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree); 91extern tree rs6000_builtin_reciprocal (tree); 92 93 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server 94 systems will also set long double to be IEEE 128-bit. AIX and Darwin 95 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so 96 those systems will not pick up this default. This needs to be after all 97 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are 98 properly defined. */ 99#ifndef TARGET_IEEEQUAD_DEFAULT 100#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) && !defined(POWERPC_NETBSD) 101#define TARGET_IEEEQUAD_DEFAULT 1 102#else 103#define TARGET_IEEEQUAD_DEFAULT 0 104#endif 105#endif 106 107/* Don't enable PC-relative addressing if the target does not support it. */ 108#ifndef PCREL_SUPPORTED_BY_OS 109#define PCREL_SUPPORTED_BY_OS 0 110#endif 111 112#ifdef USING_ELFOS_H 113/* Counter for labels which are to be placed in .fixup. */ 114int fixuplabelno = 0; 115#endif 116 117/* Whether to use variant of AIX ABI for PowerPC64 Linux. */ 118int dot_symbols; 119 120/* Specify the machine mode that pointers have. After generation of rtl, the 121 compiler makes no further distinction between pointers and any other objects 122 of this machine mode. */ 123scalar_int_mode rs6000_pmode; 124 125/* Track use of r13 in 64bit AIX TLS. */ 126static bool xcoff_tls_exec_model_detected = false; 127 128/* Width in bits of a pointer. */ 129unsigned rs6000_pointer_size; 130 131#ifdef HAVE_AS_GNU_ATTRIBUTE 132# ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 133# define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 134# endif 135/* Flag whether floating point values have been passed/returned. 136 Note that this doesn't say whether fprs are used, since the 137 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls 138 should be set for soft-float values passed in gprs and ieee128 139 values passed in vsx registers. */ 140bool rs6000_passes_float = false; 141bool rs6000_passes_long_double = false; 142/* Flag whether vector values have been passed/returned. */ 143bool rs6000_passes_vector = false; 144/* Flag whether small (<= 8 byte) structures have been returned. */ 145bool rs6000_returns_struct = false; 146#endif 147 148/* Value is TRUE if register/mode pair is acceptable. */ 149static bool rs6000_hard_regno_mode_ok_p 150 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; 151 152/* Maximum number of registers needed for a given register class and mode. */ 153unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; 154 155/* How many registers are needed for a given register and mode. */ 156unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; 157 158/* Map register number to register class. */ 159enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; 160 161static int dbg_cost_ctrl; 162 163/* Flag to say the TOC is initialized */ 164int toc_initialized, need_toc_init; 165char toc_label_name[10]; 166 167/* Cached value of rs6000_variable_issue. This is cached in 168 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ 169static short cached_can_issue_more; 170 171static GTY(()) section *read_only_data_section; 172static GTY(()) section *private_data_section; 173static GTY(()) section *tls_data_section; 174static GTY(()) section *tls_private_data_section; 175static GTY(()) section *read_only_private_data_section; 176static GTY(()) section *sdata2_section; 177 178section *toc_section = 0; 179 180/* Describe the vector unit used for modes. */ 181enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; 182enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; 183 184/* Register classes for various constraints that are based on the target 185 switches. */ 186enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; 187 188/* Describe the alignment of a vector. */ 189int rs6000_vector_align[NUM_MACHINE_MODES]; 190 191/* What modes to automatically generate reciprocal divide estimate (fre) and 192 reciprocal sqrt (frsqrte) for. */ 193unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; 194 195/* Masks to determine which reciprocal esitmate instructions to generate 196 automatically. */ 197enum rs6000_recip_mask { 198 RECIP_SF_DIV = 0x001, /* Use divide estimate */ 199 RECIP_DF_DIV = 0x002, 200 RECIP_V4SF_DIV = 0x004, 201 RECIP_V2DF_DIV = 0x008, 202 203 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ 204 RECIP_DF_RSQRT = 0x020, 205 RECIP_V4SF_RSQRT = 0x040, 206 RECIP_V2DF_RSQRT = 0x080, 207 208 /* Various combination of flags for -mrecip=xxx. */ 209 RECIP_NONE = 0, 210 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV 211 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT 212 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), 213 214 RECIP_HIGH_PRECISION = RECIP_ALL, 215 216 /* On low precision machines like the power5, don't enable double precision 217 reciprocal square root estimate, since it isn't accurate enough. */ 218 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) 219}; 220 221/* -mrecip options. */ 222static struct 223{ 224 const char *string; /* option name */ 225 unsigned int mask; /* mask bits to set */ 226} recip_options[] = { 227 { "all", RECIP_ALL }, 228 { "none", RECIP_NONE }, 229 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV 230 | RECIP_V2DF_DIV) }, 231 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, 232 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, 233 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT 234 | RECIP_V2DF_RSQRT) }, 235 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, 236 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, 237}; 238 239/* On PowerPC, we have a limited number of target clones that we care about 240 which means we can use an array to hold the options, rather than having more 241 elaborate data structures to identify each possible variation. Order the 242 clones from the default to the highest ISA. */ 243enum { 244 CLONE_DEFAULT = 0, /* default clone. */ 245 CLONE_ISA_2_05, /* ISA 2.05 (power6). */ 246 CLONE_ISA_2_06, /* ISA 2.06 (power7). */ 247 CLONE_ISA_2_07, /* ISA 2.07 (power8). */ 248 CLONE_ISA_3_00, /* ISA 3.0 (power9). */ 249 CLONE_ISA_3_1, /* ISA 3.1 (power10). */ 250 CLONE_MAX 251}; 252 253/* Map compiler ISA bits into HWCAP names. */ 254struct clone_map { 255 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */ 256 const char *name; /* name to use in __builtin_cpu_supports. */ 257}; 258 259static const struct clone_map rs6000_clone_map[CLONE_MAX] = { 260 { 0, "" }, /* Default options. */ 261 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */ 262 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */ 263 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */ 264 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */ 265 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */ 266}; 267 268 269/* Newer LIBCs explicitly export this symbol to declare that they provide 270 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a 271 reference to this symbol whenever we expand a CPU builtin, so that 272 we never link against an old LIBC. */ 273const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; 274 275/* True if we have expanded a CPU builtin. */ 276bool cpu_builtin_p = false; 277 278/* Pointer to function (in rs6000-c.cc) that can define or undefine target 279 macros that have changed. Languages that don't support the preprocessor 280 don't link in rs6000-c.cc, so we can't call it directly. */ 281void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); 282 283/* Simplfy register classes into simpler classifications. We assume 284 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range 285 check for standard register classes (gpr/floating/altivec/vsx) and 286 floating/vector classes (float/altivec/vsx). */ 287 288enum rs6000_reg_type { 289 NO_REG_TYPE, 290 PSEUDO_REG_TYPE, 291 GPR_REG_TYPE, 292 VSX_REG_TYPE, 293 ALTIVEC_REG_TYPE, 294 FPR_REG_TYPE, 295 SPR_REG_TYPE, 296 CR_REG_TYPE 297}; 298 299/* Map register class to register type. */ 300static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; 301 302/* First/last register type for the 'normal' register types (i.e. general 303 purpose, floating point, altivec, and VSX registers). */ 304#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) 305 306#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) 307 308 309/* Register classes we care about in secondary reload or go if legitimate 310 address. We only need to worry about GPR, FPR, and Altivec registers here, 311 along an ANY field that is the OR of the 3 register classes. */ 312 313enum rs6000_reload_reg_type { 314 RELOAD_REG_GPR, /* General purpose registers. */ 315 RELOAD_REG_FPR, /* Traditional floating point regs. */ 316 RELOAD_REG_VMX, /* Altivec (VMX) registers. */ 317 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ 318 N_RELOAD_REG 319}; 320 321/* For setting up register classes, loop through the 3 register classes mapping 322 into real registers, and skip the ANY class, which is just an OR of the 323 bits. */ 324#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR 325#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX 326 327/* Map reload register type to a register in the register class. */ 328struct reload_reg_map_type { 329 const char *name; /* Register class name. */ 330 int reg; /* Register in the register class. */ 331}; 332 333static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { 334 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ 335 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ 336 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ 337 { "Any", -1 }, /* RELOAD_REG_ANY. */ 338}; 339 340/* Mask bits for each register class, indexed per mode. Historically the 341 compiler has been more restrictive which types can do PRE_MODIFY instead of 342 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ 343typedef unsigned char addr_mask_type; 344 345#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ 346#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ 347#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ 348#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ 349#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ 350#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ 351#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ 352#define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ 353 354/* Register type masks based on the type, of valid addressing modes. */ 355struct rs6000_reg_addr { 356 enum insn_code reload_load; /* INSN to reload for loading. */ 357 enum insn_code reload_store; /* INSN to reload for storing. */ 358 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ 359 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ 360 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ 361 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ 362 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ 363}; 364 365static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; 366 367/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ 368static inline bool 369mode_supports_pre_incdec_p (machine_mode mode) 370{ 371 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) 372 != 0); 373} 374 375/* Helper function to say whether a mode supports PRE_MODIFY. */ 376static inline bool 377mode_supports_pre_modify_p (machine_mode mode) 378{ 379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) 380 != 0); 381} 382 383/* Return true if we have D-form addressing in altivec registers. */ 384static inline bool 385mode_supports_vmx_dform (machine_mode mode) 386{ 387 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); 388} 389 390/* Return true if we have D-form addressing in VSX registers. This addressing 391 is more limited than normal d-form addressing in that the offset must be 392 aligned on a 16-byte boundary. */ 393static inline bool 394mode_supports_dq_form (machine_mode mode) 395{ 396 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) 397 != 0); 398} 399 400/* Given that there exists at least one variable that is set (produced) 401 by OUT_INSN and read (consumed) by IN_INSN, return true iff 402 IN_INSN represents one or more memory store operations and none of 403 the variables set by OUT_INSN is used by IN_INSN as the address of a 404 store operation. If either IN_INSN or OUT_INSN does not represent 405 a "single" RTL SET expression (as loosely defined by the 406 implementation of the single_set function) or a PARALLEL with only 407 SETs, CLOBBERs, and USEs inside, this function returns false. 408 409 This rs6000-specific version of store_data_bypass_p checks for 410 certain conditions that result in assertion failures (and internal 411 compiler errors) in the generic store_data_bypass_p function and 412 returns false rather than calling store_data_bypass_p if one of the 413 problematic conditions is detected. */ 414 415int 416rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 417{ 418 rtx out_set, in_set; 419 rtx out_pat, in_pat; 420 rtx out_exp, in_exp; 421 int i, j; 422 423 in_set = single_set (in_insn); 424 if (in_set) 425 { 426 if (MEM_P (SET_DEST (in_set))) 427 { 428 out_set = single_set (out_insn); 429 if (!out_set) 430 { 431 out_pat = PATTERN (out_insn); 432 if (GET_CODE (out_pat) == PARALLEL) 433 { 434 for (i = 0; i < XVECLEN (out_pat, 0); i++) 435 { 436 out_exp = XVECEXP (out_pat, 0, i); 437 if ((GET_CODE (out_exp) == CLOBBER) 438 || (GET_CODE (out_exp) == USE)) 439 continue; 440 else if (GET_CODE (out_exp) != SET) 441 return false; 442 } 443 } 444 } 445 } 446 } 447 else 448 { 449 in_pat = PATTERN (in_insn); 450 if (GET_CODE (in_pat) != PARALLEL) 451 return false; 452 453 for (i = 0; i < XVECLEN (in_pat, 0); i++) 454 { 455 in_exp = XVECEXP (in_pat, 0, i); 456 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) 457 continue; 458 else if (GET_CODE (in_exp) != SET) 459 return false; 460 461 if (MEM_P (SET_DEST (in_exp))) 462 { 463 out_set = single_set (out_insn); 464 if (!out_set) 465 { 466 out_pat = PATTERN (out_insn); 467 if (GET_CODE (out_pat) != PARALLEL) 468 return false; 469 for (j = 0; j < XVECLEN (out_pat, 0); j++) 470 { 471 out_exp = XVECEXP (out_pat, 0, j); 472 if ((GET_CODE (out_exp) == CLOBBER) 473 || (GET_CODE (out_exp) == USE)) 474 continue; 475 else if (GET_CODE (out_exp) != SET) 476 return false; 477 } 478 } 479 } 480 } 481 } 482 return store_data_bypass_p (out_insn, in_insn); 483} 484 485 486/* Processor costs (relative to an add) */ 487 488const struct processor_costs *rs6000_cost; 489 490/* Instruction size costs on 32bit processors. */ 491static const 492struct processor_costs size32_cost = { 493 COSTS_N_INSNS (1), /* mulsi */ 494 COSTS_N_INSNS (1), /* mulsi_const */ 495 COSTS_N_INSNS (1), /* mulsi_const9 */ 496 COSTS_N_INSNS (1), /* muldi */ 497 COSTS_N_INSNS (1), /* divsi */ 498 COSTS_N_INSNS (1), /* divdi */ 499 COSTS_N_INSNS (1), /* fp */ 500 COSTS_N_INSNS (1), /* dmul */ 501 COSTS_N_INSNS (1), /* sdiv */ 502 COSTS_N_INSNS (1), /* ddiv */ 503 32, /* cache line size */ 504 0, /* l1 cache */ 505 0, /* l2 cache */ 506 0, /* streams */ 507 0, /* SF->DF convert */ 508}; 509 510/* Instruction size costs on 64bit processors. */ 511static const 512struct processor_costs size64_cost = { 513 COSTS_N_INSNS (1), /* mulsi */ 514 COSTS_N_INSNS (1), /* mulsi_const */ 515 COSTS_N_INSNS (1), /* mulsi_const9 */ 516 COSTS_N_INSNS (1), /* muldi */ 517 COSTS_N_INSNS (1), /* divsi */ 518 COSTS_N_INSNS (1), /* divdi */ 519 COSTS_N_INSNS (1), /* fp */ 520 COSTS_N_INSNS (1), /* dmul */ 521 COSTS_N_INSNS (1), /* sdiv */ 522 COSTS_N_INSNS (1), /* ddiv */ 523 128, /* cache line size */ 524 0, /* l1 cache */ 525 0, /* l2 cache */ 526 0, /* streams */ 527 0, /* SF->DF convert */ 528}; 529 530/* Instruction costs on RS64A processors. */ 531static const 532struct processor_costs rs64a_cost = { 533 COSTS_N_INSNS (20), /* mulsi */ 534 COSTS_N_INSNS (12), /* mulsi_const */ 535 COSTS_N_INSNS (8), /* mulsi_const9 */ 536 COSTS_N_INSNS (34), /* muldi */ 537 COSTS_N_INSNS (65), /* divsi */ 538 COSTS_N_INSNS (67), /* divdi */ 539 COSTS_N_INSNS (4), /* fp */ 540 COSTS_N_INSNS (4), /* dmul */ 541 COSTS_N_INSNS (31), /* sdiv */ 542 COSTS_N_INSNS (31), /* ddiv */ 543 128, /* cache line size */ 544 128, /* l1 cache */ 545 2048, /* l2 cache */ 546 1, /* streams */ 547 0, /* SF->DF convert */ 548}; 549 550/* Instruction costs on MPCCORE processors. */ 551static const 552struct processor_costs mpccore_cost = { 553 COSTS_N_INSNS (2), /* mulsi */ 554 COSTS_N_INSNS (2), /* mulsi_const */ 555 COSTS_N_INSNS (2), /* mulsi_const9 */ 556 COSTS_N_INSNS (2), /* muldi */ 557 COSTS_N_INSNS (6), /* divsi */ 558 COSTS_N_INSNS (6), /* divdi */ 559 COSTS_N_INSNS (4), /* fp */ 560 COSTS_N_INSNS (5), /* dmul */ 561 COSTS_N_INSNS (10), /* sdiv */ 562 COSTS_N_INSNS (17), /* ddiv */ 563 32, /* cache line size */ 564 4, /* l1 cache */ 565 16, /* l2 cache */ 566 1, /* streams */ 567 0, /* SF->DF convert */ 568}; 569 570/* Instruction costs on PPC403 processors. */ 571static const 572struct processor_costs ppc403_cost = { 573 COSTS_N_INSNS (4), /* mulsi */ 574 COSTS_N_INSNS (4), /* mulsi_const */ 575 COSTS_N_INSNS (4), /* mulsi_const9 */ 576 COSTS_N_INSNS (4), /* muldi */ 577 COSTS_N_INSNS (33), /* divsi */ 578 COSTS_N_INSNS (33), /* divdi */ 579 COSTS_N_INSNS (11), /* fp */ 580 COSTS_N_INSNS (11), /* dmul */ 581 COSTS_N_INSNS (11), /* sdiv */ 582 COSTS_N_INSNS (11), /* ddiv */ 583 32, /* cache line size */ 584 4, /* l1 cache */ 585 16, /* l2 cache */ 586 1, /* streams */ 587 0, /* SF->DF convert */ 588}; 589 590/* Instruction costs on PPC405 processors. */ 591static const 592struct processor_costs ppc405_cost = { 593 COSTS_N_INSNS (5), /* mulsi */ 594 COSTS_N_INSNS (4), /* mulsi_const */ 595 COSTS_N_INSNS (3), /* mulsi_const9 */ 596 COSTS_N_INSNS (5), /* muldi */ 597 COSTS_N_INSNS (35), /* divsi */ 598 COSTS_N_INSNS (35), /* divdi */ 599 COSTS_N_INSNS (11), /* fp */ 600 COSTS_N_INSNS (11), /* dmul */ 601 COSTS_N_INSNS (11), /* sdiv */ 602 COSTS_N_INSNS (11), /* ddiv */ 603 32, /* cache line size */ 604 16, /* l1 cache */ 605 128, /* l2 cache */ 606 1, /* streams */ 607 0, /* SF->DF convert */ 608}; 609 610/* Instruction costs on PPC440 processors. */ 611static const 612struct processor_costs ppc440_cost = { 613 COSTS_N_INSNS (3), /* mulsi */ 614 COSTS_N_INSNS (2), /* mulsi_const */ 615 COSTS_N_INSNS (2), /* mulsi_const9 */ 616 COSTS_N_INSNS (3), /* muldi */ 617 COSTS_N_INSNS (34), /* divsi */ 618 COSTS_N_INSNS (34), /* divdi */ 619 COSTS_N_INSNS (5), /* fp */ 620 COSTS_N_INSNS (5), /* dmul */ 621 COSTS_N_INSNS (19), /* sdiv */ 622 COSTS_N_INSNS (33), /* ddiv */ 623 32, /* cache line size */ 624 32, /* l1 cache */ 625 256, /* l2 cache */ 626 1, /* streams */ 627 0, /* SF->DF convert */ 628}; 629 630/* Instruction costs on PPC476 processors. */ 631static const 632struct processor_costs ppc476_cost = { 633 COSTS_N_INSNS (4), /* mulsi */ 634 COSTS_N_INSNS (4), /* mulsi_const */ 635 COSTS_N_INSNS (4), /* mulsi_const9 */ 636 COSTS_N_INSNS (4), /* muldi */ 637 COSTS_N_INSNS (11), /* divsi */ 638 COSTS_N_INSNS (11), /* divdi */ 639 COSTS_N_INSNS (6), /* fp */ 640 COSTS_N_INSNS (6), /* dmul */ 641 COSTS_N_INSNS (19), /* sdiv */ 642 COSTS_N_INSNS (33), /* ddiv */ 643 32, /* l1 cache line size */ 644 32, /* l1 cache */ 645 512, /* l2 cache */ 646 1, /* streams */ 647 0, /* SF->DF convert */ 648}; 649 650/* Instruction costs on PPC601 processors. */ 651static const 652struct processor_costs ppc601_cost = { 653 COSTS_N_INSNS (5), /* mulsi */ 654 COSTS_N_INSNS (5), /* mulsi_const */ 655 COSTS_N_INSNS (5), /* mulsi_const9 */ 656 COSTS_N_INSNS (5), /* muldi */ 657 COSTS_N_INSNS (36), /* divsi */ 658 COSTS_N_INSNS (36), /* divdi */ 659 COSTS_N_INSNS (4), /* fp */ 660 COSTS_N_INSNS (5), /* dmul */ 661 COSTS_N_INSNS (17), /* sdiv */ 662 COSTS_N_INSNS (31), /* ddiv */ 663 32, /* cache line size */ 664 32, /* l1 cache */ 665 256, /* l2 cache */ 666 1, /* streams */ 667 0, /* SF->DF convert */ 668}; 669 670/* Instruction costs on PPC603 processors. */ 671static const 672struct processor_costs ppc603_cost = { 673 COSTS_N_INSNS (5), /* mulsi */ 674 COSTS_N_INSNS (3), /* mulsi_const */ 675 COSTS_N_INSNS (2), /* mulsi_const9 */ 676 COSTS_N_INSNS (5), /* muldi */ 677 COSTS_N_INSNS (37), /* divsi */ 678 COSTS_N_INSNS (37), /* divdi */ 679 COSTS_N_INSNS (3), /* fp */ 680 COSTS_N_INSNS (4), /* dmul */ 681 COSTS_N_INSNS (18), /* sdiv */ 682 COSTS_N_INSNS (33), /* ddiv */ 683 32, /* cache line size */ 684 8, /* l1 cache */ 685 64, /* l2 cache */ 686 1, /* streams */ 687 0, /* SF->DF convert */ 688}; 689 690/* Instruction costs on PPC604 processors. */ 691static const 692struct processor_costs ppc604_cost = { 693 COSTS_N_INSNS (4), /* mulsi */ 694 COSTS_N_INSNS (4), /* mulsi_const */ 695 COSTS_N_INSNS (4), /* mulsi_const9 */ 696 COSTS_N_INSNS (4), /* muldi */ 697 COSTS_N_INSNS (20), /* divsi */ 698 COSTS_N_INSNS (20), /* divdi */ 699 COSTS_N_INSNS (3), /* fp */ 700 COSTS_N_INSNS (3), /* dmul */ 701 COSTS_N_INSNS (18), /* sdiv */ 702 COSTS_N_INSNS (32), /* ddiv */ 703 32, /* cache line size */ 704 16, /* l1 cache */ 705 512, /* l2 cache */ 706 1, /* streams */ 707 0, /* SF->DF convert */ 708}; 709 710/* Instruction costs on PPC604e processors. */ 711static const 712struct processor_costs ppc604e_cost = { 713 COSTS_N_INSNS (2), /* mulsi */ 714 COSTS_N_INSNS (2), /* mulsi_const */ 715 COSTS_N_INSNS (2), /* mulsi_const9 */ 716 COSTS_N_INSNS (2), /* muldi */ 717 COSTS_N_INSNS (20), /* divsi */ 718 COSTS_N_INSNS (20), /* divdi */ 719 COSTS_N_INSNS (3), /* fp */ 720 COSTS_N_INSNS (3), /* dmul */ 721 COSTS_N_INSNS (18), /* sdiv */ 722 COSTS_N_INSNS (32), /* ddiv */ 723 32, /* cache line size */ 724 32, /* l1 cache */ 725 1024, /* l2 cache */ 726 1, /* streams */ 727 0, /* SF->DF convert */ 728}; 729 730/* Instruction costs on PPC620 processors. */ 731static const 732struct processor_costs ppc620_cost = { 733 COSTS_N_INSNS (5), /* mulsi */ 734 COSTS_N_INSNS (4), /* mulsi_const */ 735 COSTS_N_INSNS (3), /* mulsi_const9 */ 736 COSTS_N_INSNS (7), /* muldi */ 737 COSTS_N_INSNS (21), /* divsi */ 738 COSTS_N_INSNS (37), /* divdi */ 739 COSTS_N_INSNS (3), /* fp */ 740 COSTS_N_INSNS (3), /* dmul */ 741 COSTS_N_INSNS (18), /* sdiv */ 742 COSTS_N_INSNS (32), /* ddiv */ 743 128, /* cache line size */ 744 32, /* l1 cache */ 745 1024, /* l2 cache */ 746 1, /* streams */ 747 0, /* SF->DF convert */ 748}; 749 750/* Instruction costs on PPC630 processors. */ 751static const 752struct processor_costs ppc630_cost = { 753 COSTS_N_INSNS (5), /* mulsi */ 754 COSTS_N_INSNS (4), /* mulsi_const */ 755 COSTS_N_INSNS (3), /* mulsi_const9 */ 756 COSTS_N_INSNS (7), /* muldi */ 757 COSTS_N_INSNS (21), /* divsi */ 758 COSTS_N_INSNS (37), /* divdi */ 759 COSTS_N_INSNS (3), /* fp */ 760 COSTS_N_INSNS (3), /* dmul */ 761 COSTS_N_INSNS (17), /* sdiv */ 762 COSTS_N_INSNS (21), /* ddiv */ 763 128, /* cache line size */ 764 64, /* l1 cache */ 765 1024, /* l2 cache */ 766 1, /* streams */ 767 0, /* SF->DF convert */ 768}; 769 770/* Instruction costs on Cell processor. */ 771/* COSTS_N_INSNS (1) ~ one add. */ 772static const 773struct processor_costs ppccell_cost = { 774 COSTS_N_INSNS (9/2)+2, /* mulsi */ 775 COSTS_N_INSNS (6/2), /* mulsi_const */ 776 COSTS_N_INSNS (6/2), /* mulsi_const9 */ 777 COSTS_N_INSNS (15/2)+2, /* muldi */ 778 COSTS_N_INSNS (38/2), /* divsi */ 779 COSTS_N_INSNS (70/2), /* divdi */ 780 COSTS_N_INSNS (10/2), /* fp */ 781 COSTS_N_INSNS (10/2), /* dmul */ 782 COSTS_N_INSNS (74/2), /* sdiv */ 783 COSTS_N_INSNS (74/2), /* ddiv */ 784 128, /* cache line size */ 785 32, /* l1 cache */ 786 512, /* l2 cache */ 787 6, /* streams */ 788 0, /* SF->DF convert */ 789}; 790 791/* Instruction costs on PPC750 and PPC7400 processors. */ 792static const 793struct processor_costs ppc750_cost = { 794 COSTS_N_INSNS (5), /* mulsi */ 795 COSTS_N_INSNS (3), /* mulsi_const */ 796 COSTS_N_INSNS (2), /* mulsi_const9 */ 797 COSTS_N_INSNS (5), /* muldi */ 798 COSTS_N_INSNS (17), /* divsi */ 799 COSTS_N_INSNS (17), /* divdi */ 800 COSTS_N_INSNS (3), /* fp */ 801 COSTS_N_INSNS (3), /* dmul */ 802 COSTS_N_INSNS (17), /* sdiv */ 803 COSTS_N_INSNS (31), /* ddiv */ 804 32, /* cache line size */ 805 32, /* l1 cache */ 806 512, /* l2 cache */ 807 1, /* streams */ 808 0, /* SF->DF convert */ 809}; 810 811/* Instruction costs on PPC7450 processors. */ 812static const 813struct processor_costs ppc7450_cost = { 814 COSTS_N_INSNS (4), /* mulsi */ 815 COSTS_N_INSNS (3), /* mulsi_const */ 816 COSTS_N_INSNS (3), /* mulsi_const9 */ 817 COSTS_N_INSNS (4), /* muldi */ 818 COSTS_N_INSNS (23), /* divsi */ 819 COSTS_N_INSNS (23), /* divdi */ 820 COSTS_N_INSNS (5), /* fp */ 821 COSTS_N_INSNS (5), /* dmul */ 822 COSTS_N_INSNS (21), /* sdiv */ 823 COSTS_N_INSNS (35), /* ddiv */ 824 32, /* cache line size */ 825 32, /* l1 cache */ 826 1024, /* l2 cache */ 827 1, /* streams */ 828 0, /* SF->DF convert */ 829}; 830 831/* Instruction costs on PPC8540 processors. */ 832static const 833struct processor_costs ppc8540_cost = { 834 COSTS_N_INSNS (4), /* mulsi */ 835 COSTS_N_INSNS (4), /* mulsi_const */ 836 COSTS_N_INSNS (4), /* mulsi_const9 */ 837 COSTS_N_INSNS (4), /* muldi */ 838 COSTS_N_INSNS (19), /* divsi */ 839 COSTS_N_INSNS (19), /* divdi */ 840 COSTS_N_INSNS (4), /* fp */ 841 COSTS_N_INSNS (4), /* dmul */ 842 COSTS_N_INSNS (29), /* sdiv */ 843 COSTS_N_INSNS (29), /* ddiv */ 844 32, /* cache line size */ 845 32, /* l1 cache */ 846 256, /* l2 cache */ 847 1, /* prefetch streams /*/ 848 0, /* SF->DF convert */ 849}; 850 851/* Instruction costs on E300C2 and E300C3 cores. */ 852static const 853struct processor_costs ppce300c2c3_cost = { 854 COSTS_N_INSNS (4), /* mulsi */ 855 COSTS_N_INSNS (4), /* mulsi_const */ 856 COSTS_N_INSNS (4), /* mulsi_const9 */ 857 COSTS_N_INSNS (4), /* muldi */ 858 COSTS_N_INSNS (19), /* divsi */ 859 COSTS_N_INSNS (19), /* divdi */ 860 COSTS_N_INSNS (3), /* fp */ 861 COSTS_N_INSNS (4), /* dmul */ 862 COSTS_N_INSNS (18), /* sdiv */ 863 COSTS_N_INSNS (33), /* ddiv */ 864 32, 865 16, /* l1 cache */ 866 16, /* l2 cache */ 867 1, /* prefetch streams /*/ 868 0, /* SF->DF convert */ 869}; 870 871/* Instruction costs on PPCE500MC processors. */ 872static const 873struct processor_costs ppce500mc_cost = { 874 COSTS_N_INSNS (4), /* mulsi */ 875 COSTS_N_INSNS (4), /* mulsi_const */ 876 COSTS_N_INSNS (4), /* mulsi_const9 */ 877 COSTS_N_INSNS (4), /* muldi */ 878 COSTS_N_INSNS (14), /* divsi */ 879 COSTS_N_INSNS (14), /* divdi */ 880 COSTS_N_INSNS (8), /* fp */ 881 COSTS_N_INSNS (10), /* dmul */ 882 COSTS_N_INSNS (36), /* sdiv */ 883 COSTS_N_INSNS (66), /* ddiv */ 884 64, /* cache line size */ 885 32, /* l1 cache */ 886 128, /* l2 cache */ 887 1, /* prefetch streams /*/ 888 0, /* SF->DF convert */ 889}; 890 891/* Instruction costs on PPCE500MC64 processors. */ 892static const 893struct processor_costs ppce500mc64_cost = { 894 COSTS_N_INSNS (4), /* mulsi */ 895 COSTS_N_INSNS (4), /* mulsi_const */ 896 COSTS_N_INSNS (4), /* mulsi_const9 */ 897 COSTS_N_INSNS (4), /* muldi */ 898 COSTS_N_INSNS (14), /* divsi */ 899 COSTS_N_INSNS (14), /* divdi */ 900 COSTS_N_INSNS (4), /* fp */ 901 COSTS_N_INSNS (10), /* dmul */ 902 COSTS_N_INSNS (36), /* sdiv */ 903 COSTS_N_INSNS (66), /* ddiv */ 904 64, /* cache line size */ 905 32, /* l1 cache */ 906 128, /* l2 cache */ 907 1, /* prefetch streams /*/ 908 0, /* SF->DF convert */ 909}; 910 911/* Instruction costs on PPCE5500 processors. */ 912static const 913struct processor_costs ppce5500_cost = { 914 COSTS_N_INSNS (5), /* mulsi */ 915 COSTS_N_INSNS (5), /* mulsi_const */ 916 COSTS_N_INSNS (4), /* mulsi_const9 */ 917 COSTS_N_INSNS (5), /* muldi */ 918 COSTS_N_INSNS (14), /* divsi */ 919 COSTS_N_INSNS (14), /* divdi */ 920 COSTS_N_INSNS (7), /* fp */ 921 COSTS_N_INSNS (10), /* dmul */ 922 COSTS_N_INSNS (36), /* sdiv */ 923 COSTS_N_INSNS (66), /* ddiv */ 924 64, /* cache line size */ 925 32, /* l1 cache */ 926 128, /* l2 cache */ 927 1, /* prefetch streams /*/ 928 0, /* SF->DF convert */ 929}; 930 931/* Instruction costs on PPCE6500 processors. */ 932static const 933struct processor_costs ppce6500_cost = { 934 COSTS_N_INSNS (5), /* mulsi */ 935 COSTS_N_INSNS (5), /* mulsi_const */ 936 COSTS_N_INSNS (4), /* mulsi_const9 */ 937 COSTS_N_INSNS (5), /* muldi */ 938 COSTS_N_INSNS (14), /* divsi */ 939 COSTS_N_INSNS (14), /* divdi */ 940 COSTS_N_INSNS (7), /* fp */ 941 COSTS_N_INSNS (10), /* dmul */ 942 COSTS_N_INSNS (36), /* sdiv */ 943 COSTS_N_INSNS (66), /* ddiv */ 944 64, /* cache line size */ 945 32, /* l1 cache */ 946 128, /* l2 cache */ 947 1, /* prefetch streams /*/ 948 0, /* SF->DF convert */ 949}; 950 951/* Instruction costs on AppliedMicro Titan processors. */ 952static const 953struct processor_costs titan_cost = { 954 COSTS_N_INSNS (5), /* mulsi */ 955 COSTS_N_INSNS (5), /* mulsi_const */ 956 COSTS_N_INSNS (5), /* mulsi_const9 */ 957 COSTS_N_INSNS (5), /* muldi */ 958 COSTS_N_INSNS (18), /* divsi */ 959 COSTS_N_INSNS (18), /* divdi */ 960 COSTS_N_INSNS (10), /* fp */ 961 COSTS_N_INSNS (10), /* dmul */ 962 COSTS_N_INSNS (46), /* sdiv */ 963 COSTS_N_INSNS (72), /* ddiv */ 964 32, /* cache line size */ 965 32, /* l1 cache */ 966 512, /* l2 cache */ 967 1, /* prefetch streams /*/ 968 0, /* SF->DF convert */ 969}; 970 971/* Instruction costs on POWER4 and POWER5 processors. */ 972static const 973struct processor_costs power4_cost = { 974 COSTS_N_INSNS (3), /* mulsi */ 975 COSTS_N_INSNS (2), /* mulsi_const */ 976 COSTS_N_INSNS (2), /* mulsi_const9 */ 977 COSTS_N_INSNS (4), /* muldi */ 978 COSTS_N_INSNS (18), /* divsi */ 979 COSTS_N_INSNS (34), /* divdi */ 980 COSTS_N_INSNS (3), /* fp */ 981 COSTS_N_INSNS (3), /* dmul */ 982 COSTS_N_INSNS (17), /* sdiv */ 983 COSTS_N_INSNS (17), /* ddiv */ 984 128, /* cache line size */ 985 32, /* l1 cache */ 986 1024, /* l2 cache */ 987 8, /* prefetch streams /*/ 988 0, /* SF->DF convert */ 989}; 990 991/* Instruction costs on POWER6 processors. */ 992static const 993struct processor_costs power6_cost = { 994 COSTS_N_INSNS (8), /* mulsi */ 995 COSTS_N_INSNS (8), /* mulsi_const */ 996 COSTS_N_INSNS (8), /* mulsi_const9 */ 997 COSTS_N_INSNS (8), /* muldi */ 998 COSTS_N_INSNS (22), /* divsi */ 999 COSTS_N_INSNS (28), /* divdi */ 1000 COSTS_N_INSNS (3), /* fp */ 1001 COSTS_N_INSNS (3), /* dmul */ 1002 COSTS_N_INSNS (13), /* sdiv */ 1003 COSTS_N_INSNS (16), /* ddiv */ 1004 128, /* cache line size */ 1005 64, /* l1 cache */ 1006 2048, /* l2 cache */ 1007 16, /* prefetch streams */ 1008 0, /* SF->DF convert */ 1009}; 1010 1011/* Instruction costs on POWER7 processors. */ 1012static const 1013struct processor_costs power7_cost = { 1014 COSTS_N_INSNS (2), /* mulsi */ 1015 COSTS_N_INSNS (2), /* mulsi_const */ 1016 COSTS_N_INSNS (2), /* mulsi_const9 */ 1017 COSTS_N_INSNS (2), /* muldi */ 1018 COSTS_N_INSNS (18), /* divsi */ 1019 COSTS_N_INSNS (34), /* divdi */ 1020 COSTS_N_INSNS (3), /* fp */ 1021 COSTS_N_INSNS (3), /* dmul */ 1022 COSTS_N_INSNS (13), /* sdiv */ 1023 COSTS_N_INSNS (16), /* ddiv */ 1024 128, /* cache line size */ 1025 32, /* l1 cache */ 1026 256, /* l2 cache */ 1027 12, /* prefetch streams */ 1028 COSTS_N_INSNS (3), /* SF->DF convert */ 1029}; 1030 1031/* Instruction costs on POWER8 processors. */ 1032static const 1033struct processor_costs power8_cost = { 1034 COSTS_N_INSNS (3), /* mulsi */ 1035 COSTS_N_INSNS (3), /* mulsi_const */ 1036 COSTS_N_INSNS (3), /* mulsi_const9 */ 1037 COSTS_N_INSNS (3), /* muldi */ 1038 COSTS_N_INSNS (19), /* divsi */ 1039 COSTS_N_INSNS (35), /* divdi */ 1040 COSTS_N_INSNS (3), /* fp */ 1041 COSTS_N_INSNS (3), /* dmul */ 1042 COSTS_N_INSNS (14), /* sdiv */ 1043 COSTS_N_INSNS (17), /* ddiv */ 1044 128, /* cache line size */ 1045 32, /* l1 cache */ 1046 512, /* l2 cache */ 1047 12, /* prefetch streams */ 1048 COSTS_N_INSNS (3), /* SF->DF convert */ 1049}; 1050 1051/* Instruction costs on POWER9 processors. */ 1052static const 1053struct processor_costs power9_cost = { 1054 COSTS_N_INSNS (3), /* mulsi */ 1055 COSTS_N_INSNS (3), /* mulsi_const */ 1056 COSTS_N_INSNS (3), /* mulsi_const9 */ 1057 COSTS_N_INSNS (3), /* muldi */ 1058 COSTS_N_INSNS (8), /* divsi */ 1059 COSTS_N_INSNS (12), /* divdi */ 1060 COSTS_N_INSNS (3), /* fp */ 1061 COSTS_N_INSNS (3), /* dmul */ 1062 COSTS_N_INSNS (13), /* sdiv */ 1063 COSTS_N_INSNS (18), /* ddiv */ 1064 128, /* cache line size */ 1065 32, /* l1 cache */ 1066 512, /* l2 cache */ 1067 8, /* prefetch streams */ 1068 COSTS_N_INSNS (3), /* SF->DF convert */ 1069}; 1070 1071/* Instruction costs on POWER10 processors. */ 1072static const 1073struct processor_costs power10_cost = { 1074 COSTS_N_INSNS (2), /* mulsi */ 1075 COSTS_N_INSNS (2), /* mulsi_const */ 1076 COSTS_N_INSNS (2), /* mulsi_const9 */ 1077 COSTS_N_INSNS (2), /* muldi */ 1078 COSTS_N_INSNS (6), /* divsi */ 1079 COSTS_N_INSNS (6), /* divdi */ 1080 COSTS_N_INSNS (2), /* fp */ 1081 COSTS_N_INSNS (2), /* dmul */ 1082 COSTS_N_INSNS (11), /* sdiv */ 1083 COSTS_N_INSNS (13), /* ddiv */ 1084 128, /* cache line size */ 1085 32, /* l1 cache */ 1086 512, /* l2 cache */ 1087 16, /* prefetch streams */ 1088 COSTS_N_INSNS (2), /* SF->DF convert */ 1089}; 1090 1091/* Instruction costs on POWER A2 processors. */ 1092static const 1093struct processor_costs ppca2_cost = { 1094 COSTS_N_INSNS (16), /* mulsi */ 1095 COSTS_N_INSNS (16), /* mulsi_const */ 1096 COSTS_N_INSNS (16), /* mulsi_const9 */ 1097 COSTS_N_INSNS (16), /* muldi */ 1098 COSTS_N_INSNS (22), /* divsi */ 1099 COSTS_N_INSNS (28), /* divdi */ 1100 COSTS_N_INSNS (3), /* fp */ 1101 COSTS_N_INSNS (3), /* dmul */ 1102 COSTS_N_INSNS (59), /* sdiv */ 1103 COSTS_N_INSNS (72), /* ddiv */ 1104 64, 1105 16, /* l1 cache */ 1106 2048, /* l2 cache */ 1107 16, /* prefetch streams */ 1108 0, /* SF->DF convert */ 1109}; 1110 1111/* Support for -mveclibabi=<xxx> to control which vector library to use. */ 1112static tree (*rs6000_veclib_handler) (combined_fn, tree, tree); 1113 1114 1115static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool); 1116static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); 1117static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); 1118static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); 1119static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); 1120static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); 1121static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); 1122static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); 1123static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, 1124 bool); 1125static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int, 1126 unsigned int); 1127static bool is_microcoded_insn (rtx_insn *); 1128static bool is_nonpipeline_insn (rtx_insn *); 1129static bool is_cracked_insn (rtx_insn *); 1130static bool is_load_insn (rtx, rtx *); 1131static bool is_store_insn (rtx, rtx *); 1132static bool set_to_load_agen (rtx_insn *,rtx_insn *); 1133static bool insn_terminates_group_p (rtx_insn *, enum group_termination); 1134static bool insn_must_be_first_in_group (rtx_insn *); 1135static bool insn_must_be_last_in_group (rtx_insn *); 1136bool easy_vector_constant (rtx, machine_mode); 1137static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode); 1138static rtx rs6000_legitimize_tls_address (rtx, enum tls_model); 1139#if TARGET_MACHO 1140static tree get_prev_label (tree); 1141#endif 1142static bool rs6000_mode_dependent_address (const_rtx); 1143static bool rs6000_debug_mode_dependent_address (const_rtx); 1144static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool); 1145static enum reg_class rs6000_secondary_reload_class (enum reg_class, 1146 machine_mode, rtx); 1147static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class, 1148 machine_mode, 1149 rtx); 1150static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class); 1151static enum reg_class rs6000_debug_preferred_reload_class (rtx, 1152 enum reg_class); 1153static bool rs6000_debug_secondary_memory_needed (machine_mode, 1154 reg_class_t, 1155 reg_class_t); 1156static bool rs6000_debug_can_change_mode_class (machine_mode, 1157 machine_mode, 1158 reg_class_t); 1159 1160static bool (*rs6000_mode_dependent_address_ptr) (const_rtx) 1161 = rs6000_mode_dependent_address; 1162 1163enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, 1164 machine_mode, rtx) 1165 = rs6000_secondary_reload_class; 1166 1167enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class) 1168 = rs6000_preferred_reload_class; 1169 1170const int INSN_NOT_AVAILABLE = -1; 1171 1172static void rs6000_print_isa_options (FILE *, int, const char *, 1173 HOST_WIDE_INT); 1174static void rs6000_print_builtin_options (FILE *, int, const char *, 1175 HOST_WIDE_INT); 1176static HOST_WIDE_INT rs6000_disable_incompatible_switches (void); 1177 1178static enum rs6000_reg_type register_to_reg_type (rtx, bool *); 1179static bool rs6000_secondary_reload_move (enum rs6000_reg_type, 1180 enum rs6000_reg_type, 1181 machine_mode, 1182 secondary_reload_info *, 1183 bool); 1184rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); 1185 1186/* Hash table stuff for keeping track of TOC entries. */ 1187 1188struct GTY((for_user)) toc_hash_struct 1189{ 1190 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy 1191 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */ 1192 rtx key; 1193 machine_mode key_mode; 1194 int labelno; 1195}; 1196 1197struct toc_hasher : ggc_ptr_hash<toc_hash_struct> 1198{ 1199 static hashval_t hash (toc_hash_struct *); 1200 static bool equal (toc_hash_struct *, toc_hash_struct *); 1201}; 1202 1203static GTY (()) hash_table<toc_hasher> *toc_hash_table; 1204 1205 1206 1207/* Default register names. */ 1208char rs6000_reg_names[][8] = 1209{ 1210 /* GPRs */ 1211 "0", "1", "2", "3", "4", "5", "6", "7", 1212 "8", "9", "10", "11", "12", "13", "14", "15", 1213 "16", "17", "18", "19", "20", "21", "22", "23", 1214 "24", "25", "26", "27", "28", "29", "30", "31", 1215 /* FPRs */ 1216 "0", "1", "2", "3", "4", "5", "6", "7", 1217 "8", "9", "10", "11", "12", "13", "14", "15", 1218 "16", "17", "18", "19", "20", "21", "22", "23", 1219 "24", "25", "26", "27", "28", "29", "30", "31", 1220 /* VRs */ 1221 "0", "1", "2", "3", "4", "5", "6", "7", 1222 "8", "9", "10", "11", "12", "13", "14", "15", 1223 "16", "17", "18", "19", "20", "21", "22", "23", 1224 "24", "25", "26", "27", "28", "29", "30", "31", 1225 /* lr ctr ca ap */ 1226 "lr", "ctr", "ca", "ap", 1227 /* cr0..cr7 */ 1228 "0", "1", "2", "3", "4", "5", "6", "7", 1229 /* vrsave vscr sfp */ 1230 "vrsave", "vscr", "sfp", 1231}; 1232 1233#ifdef TARGET_REGNAMES 1234static const char alt_reg_names[][8] = 1235{ 1236 /* GPRs */ 1237 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", 1238 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", 1239 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", 1240 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", 1241 /* FPRs */ 1242 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", 1243 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", 1244 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", 1245 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", 1246 /* VRs */ 1247 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7", 1248 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15", 1249 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", 1250 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31", 1251 /* lr ctr ca ap */ 1252 "lr", "ctr", "ca", "ap", 1253 /* cr0..cr7 */ 1254 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", 1255 /* vrsave vscr sfp */ 1256 "vrsave", "vscr", "sfp", 1257}; 1258#endif 1259 1260/* Table of valid machine attributes. */ 1261 1262static const struct attribute_spec rs6000_attribute_table[] = 1263{ 1264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 1265 affects_type_identity, handler, exclude } */ 1266 { "altivec", 1, 1, false, true, false, false, 1267 rs6000_handle_altivec_attribute, NULL }, 1268 { "longcall", 0, 0, false, true, true, false, 1269 rs6000_handle_longcall_attribute, NULL }, 1270 { "shortcall", 0, 0, false, true, true, false, 1271 rs6000_handle_longcall_attribute, NULL }, 1272 { "ms_struct", 0, 0, false, false, false, false, 1273 rs6000_handle_struct_attribute, NULL }, 1274 { "gcc_struct", 0, 0, false, false, false, false, 1275 rs6000_handle_struct_attribute, NULL }, 1276#ifdef SUBTARGET_ATTRIBUTE_TABLE 1277 SUBTARGET_ATTRIBUTE_TABLE, 1278#endif 1279 { NULL, 0, 0, false, false, false, false, NULL, NULL } 1280}; 1281 1282#ifndef TARGET_PROFILE_KERNEL 1283#define TARGET_PROFILE_KERNEL 0 1284#endif 1285 1286/* Initialize the GCC target structure. */ 1287#undef TARGET_ATTRIBUTE_TABLE 1288#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table 1289#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES 1290#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes 1291#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P 1292#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p 1293 1294#undef TARGET_ASM_ALIGNED_DI_OP 1295#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP 1296 1297/* Default unaligned ops are only provided for ELF. Find the ops needed 1298 for non-ELF systems. */ 1299#ifndef OBJECT_FORMAT_ELF 1300#if TARGET_XCOFF 1301/* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on 1302 64-bit targets. */ 1303#undef TARGET_ASM_UNALIGNED_HI_OP 1304#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2," 1305#undef TARGET_ASM_UNALIGNED_SI_OP 1306#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4," 1307#undef TARGET_ASM_UNALIGNED_DI_OP 1308#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8," 1309#else 1310/* For Darwin. */ 1311#undef TARGET_ASM_UNALIGNED_HI_OP 1312#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" 1313#undef TARGET_ASM_UNALIGNED_SI_OP 1314#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" 1315#undef TARGET_ASM_UNALIGNED_DI_OP 1316#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t" 1317#undef TARGET_ASM_ALIGNED_DI_OP 1318#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 1319#endif 1320#endif 1321 1322/* This hook deals with fixups for relocatable code and DI-mode objects 1323 in 64-bit code. */ 1324#undef TARGET_ASM_INTEGER 1325#define TARGET_ASM_INTEGER rs6000_assemble_integer 1326 1327#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO 1328#undef TARGET_ASM_ASSEMBLE_VISIBILITY 1329#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility 1330#endif 1331 1332#undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY 1333#define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \ 1334 rs6000_print_patchable_function_entry 1335 1336#undef TARGET_SET_UP_BY_PROLOGUE 1337#define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue 1338 1339#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS 1340#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components 1341#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB 1342#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb 1343#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS 1344#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components 1345#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS 1346#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components 1347#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS 1348#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components 1349#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS 1350#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components 1351 1352#undef TARGET_EXTRA_LIVE_ON_ENTRY 1353#define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry 1354 1355#undef TARGET_INTERNAL_ARG_POINTER 1356#define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer 1357 1358#undef TARGET_HAVE_TLS 1359#define TARGET_HAVE_TLS HAVE_AS_TLS 1360 1361#undef TARGET_CANNOT_FORCE_CONST_MEM 1362#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem 1363 1364#undef TARGET_DELEGITIMIZE_ADDRESS 1365#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address 1366 1367#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P 1368#define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p 1369 1370#undef TARGET_LEGITIMATE_COMBINED_INSN 1371#define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn 1372 1373#undef TARGET_ASM_FUNCTION_PROLOGUE 1374#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue 1375#undef TARGET_ASM_FUNCTION_EPILOGUE 1376#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue 1377 1378#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 1379#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra 1380 1381#undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC 1382#define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec 1383 1384#undef TARGET_LEGITIMIZE_ADDRESS 1385#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address 1386 1387#undef TARGET_SCHED_VARIABLE_ISSUE 1388#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue 1389 1390#undef TARGET_SCHED_ISSUE_RATE 1391#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate 1392#undef TARGET_SCHED_ADJUST_COST 1393#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost 1394#undef TARGET_SCHED_ADJUST_PRIORITY 1395#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority 1396#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE 1397#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence 1398#undef TARGET_SCHED_INIT 1399#define TARGET_SCHED_INIT rs6000_sched_init 1400#undef TARGET_SCHED_FINISH 1401#define TARGET_SCHED_FINISH rs6000_sched_finish 1402#undef TARGET_SCHED_REORDER 1403#define TARGET_SCHED_REORDER rs6000_sched_reorder 1404#undef TARGET_SCHED_REORDER2 1405#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 1406 1407#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1408#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead 1409 1410#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 1411#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard 1412 1413#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT 1414#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context 1415#undef TARGET_SCHED_INIT_SCHED_CONTEXT 1416#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context 1417#undef TARGET_SCHED_SET_SCHED_CONTEXT 1418#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context 1419#undef TARGET_SCHED_FREE_SCHED_CONTEXT 1420#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context 1421 1422#undef TARGET_SCHED_CAN_SPECULATE_INSN 1423#define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn 1424 1425#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 1426#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load 1427#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 1428#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ 1429 rs6000_builtin_support_vector_misalignment 1430#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 1431#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable 1432#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 1433#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 1434 rs6000_builtin_vectorization_cost 1435#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 1436#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ 1437 rs6000_preferred_simd_mode 1438#undef TARGET_VECTORIZE_CREATE_COSTS 1439#define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs 1440 1441#undef TARGET_LOOP_UNROLL_ADJUST 1442#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust 1443 1444#undef TARGET_INIT_BUILTINS 1445#define TARGET_INIT_BUILTINS rs6000_init_builtins 1446#undef TARGET_BUILTIN_DECL 1447#define TARGET_BUILTIN_DECL rs6000_builtin_decl 1448 1449#undef TARGET_FOLD_BUILTIN 1450#define TARGET_FOLD_BUILTIN rs6000_fold_builtin 1451#undef TARGET_GIMPLE_FOLD_BUILTIN 1452#define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin 1453 1454#undef TARGET_EXPAND_BUILTIN 1455#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin 1456 1457#undef TARGET_MANGLE_TYPE 1458#define TARGET_MANGLE_TYPE rs6000_mangle_type 1459 1460#undef TARGET_INIT_LIBFUNCS 1461#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs 1462 1463#if TARGET_MACHO 1464#undef TARGET_BINDS_LOCAL_P 1465#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1466#endif 1467 1468#undef TARGET_MS_BITFIELD_LAYOUT_P 1469#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p 1470 1471#undef TARGET_ASM_OUTPUT_MI_THUNK 1472#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk 1473 1474#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1475#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 1476 1477#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1478#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall 1479 1480#undef TARGET_REGISTER_MOVE_COST 1481#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost 1482#undef TARGET_MEMORY_MOVE_COST 1483#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost 1484#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 1485#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ 1486 rs6000_ira_change_pseudo_allocno_class 1487#undef TARGET_CANNOT_COPY_INSN_P 1488#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p 1489#undef TARGET_RTX_COSTS 1490#define TARGET_RTX_COSTS rs6000_rtx_costs 1491#undef TARGET_ADDRESS_COST 1492#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 1493#undef TARGET_INSN_COST 1494#define TARGET_INSN_COST rs6000_insn_cost 1495 1496#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA 1497#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra 1498 1499#undef TARGET_PROMOTE_FUNCTION_MODE 1500#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode 1501 1502#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 1503#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change 1504 1505#undef TARGET_RETURN_IN_MEMORY 1506#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory 1507 1508#undef TARGET_RETURN_IN_MSB 1509#define TARGET_RETURN_IN_MSB rs6000_return_in_msb 1510 1511#undef TARGET_SETUP_INCOMING_VARARGS 1512#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs 1513 1514/* Always strict argument naming on rs6000. */ 1515#undef TARGET_STRICT_ARGUMENT_NAMING 1516#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 1517#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 1518#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true 1519#undef TARGET_SPLIT_COMPLEX_ARG 1520#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true 1521#undef TARGET_MUST_PASS_IN_STACK 1522#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack 1523#undef TARGET_PASS_BY_REFERENCE 1524#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference 1525#undef TARGET_ARG_PARTIAL_BYTES 1526#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes 1527#undef TARGET_FUNCTION_ARG_ADVANCE 1528#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance 1529#undef TARGET_FUNCTION_ARG 1530#define TARGET_FUNCTION_ARG rs6000_function_arg 1531#undef TARGET_FUNCTION_ARG_PADDING 1532#define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding 1533#undef TARGET_FUNCTION_ARG_BOUNDARY 1534#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary 1535 1536#undef TARGET_BUILD_BUILTIN_VA_LIST 1537#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list 1538 1539#undef TARGET_EXPAND_BUILTIN_VA_START 1540#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start 1541 1542#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1543#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg 1544 1545#undef TARGET_EH_RETURN_FILTER_MODE 1546#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode 1547 1548#undef TARGET_TRANSLATE_MODE_ATTRIBUTE 1549#define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute 1550 1551#undef TARGET_SCALAR_MODE_SUPPORTED_P 1552#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p 1553 1554#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P 1555#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \ 1556 rs6000_libgcc_floating_mode_supported_p 1557 1558#undef TARGET_VECTOR_MODE_SUPPORTED_P 1559#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p 1560 1561#undef TARGET_FLOATN_MODE 1562#define TARGET_FLOATN_MODE rs6000_floatn_mode 1563 1564#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN 1565#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn 1566 1567#undef TARGET_MD_ASM_ADJUST 1568#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust 1569 1570#undef TARGET_OPTION_OVERRIDE 1571#define TARGET_OPTION_OVERRIDE rs6000_option_override 1572 1573#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 1574#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 1575 rs6000_builtin_vectorized_function 1576 1577#undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION 1578#define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \ 1579 rs6000_builtin_md_vectorized_function 1580 1581#undef TARGET_STACK_PROTECT_GUARD 1582#define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard 1583 1584#if !TARGET_MACHO 1585#undef TARGET_STACK_PROTECT_FAIL 1586#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail 1587#endif 1588 1589#ifdef HAVE_AS_TLS 1590#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1591#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel 1592#endif 1593 1594/* Use a 32-bit anchor range. This leads to sequences like: 1595 1596 addis tmp,anchor,high 1597 add dest,tmp,low 1598 1599 where tmp itself acts as an anchor, and can be shared between 1600 accesses to the same 64k page. */ 1601#undef TARGET_MIN_ANCHOR_OFFSET 1602#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1 1603#undef TARGET_MAX_ANCHOR_OFFSET 1604#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff 1605#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1606#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p 1607#undef TARGET_USE_BLOCKS_FOR_DECL_P 1608#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p 1609 1610#undef TARGET_BUILTIN_RECIPROCAL 1611#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal 1612 1613#undef TARGET_SECONDARY_RELOAD 1614#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload 1615#undef TARGET_SECONDARY_MEMORY_NEEDED 1616#define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed 1617#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 1618#define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode 1619 1620#undef TARGET_LEGITIMATE_ADDRESS_P 1621#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p 1622 1623#undef TARGET_MODE_DEPENDENT_ADDRESS_P 1624#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p 1625 1626#undef TARGET_COMPUTE_PRESSURE_CLASSES 1627#define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes 1628 1629#undef TARGET_CAN_ELIMINATE 1630#define TARGET_CAN_ELIMINATE rs6000_can_eliminate 1631 1632#undef TARGET_CONDITIONAL_REGISTER_USAGE 1633#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage 1634 1635#undef TARGET_SCHED_REASSOCIATION_WIDTH 1636#define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width 1637 1638#undef TARGET_TRAMPOLINE_INIT 1639#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init 1640 1641#undef TARGET_FUNCTION_VALUE 1642#define TARGET_FUNCTION_VALUE rs6000_function_value 1643 1644#undef TARGET_OPTION_VALID_ATTRIBUTE_P 1645#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p 1646 1647#undef TARGET_OPTION_SAVE 1648#define TARGET_OPTION_SAVE rs6000_function_specific_save 1649 1650#undef TARGET_OPTION_RESTORE 1651#define TARGET_OPTION_RESTORE rs6000_function_specific_restore 1652 1653#undef TARGET_OPTION_PRINT 1654#define TARGET_OPTION_PRINT rs6000_function_specific_print 1655 1656#undef TARGET_CAN_INLINE_P 1657#define TARGET_CAN_INLINE_P rs6000_can_inline_p 1658 1659#undef TARGET_SET_CURRENT_FUNCTION 1660#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function 1661 1662#undef TARGET_LEGITIMATE_CONSTANT_P 1663#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p 1664 1665#undef TARGET_VECTORIZE_VEC_PERM_CONST 1666#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const 1667 1668#undef TARGET_CAN_USE_DOLOOP_P 1669#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 1670 1671#undef TARGET_PREDICT_DOLOOP_P 1672#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p 1673 1674#undef TARGET_HAVE_COUNT_REG_DECR_P 1675#define TARGET_HAVE_COUNT_REG_DECR_P true 1676 1677/* 1000000000 is infinite cost in IVOPTs. */ 1678#undef TARGET_DOLOOP_COST_FOR_GENERIC 1679#define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000 1680 1681#undef TARGET_DOLOOP_COST_FOR_ADDRESS 1682#define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000 1683 1684#undef TARGET_PREFERRED_DOLOOP_MODE 1685#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode 1686 1687#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 1688#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv 1689 1690#undef TARGET_LIBGCC_CMP_RETURN_MODE 1691#define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode 1692#undef TARGET_LIBGCC_SHIFT_COUNT_MODE 1693#define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode 1694#undef TARGET_UNWIND_WORD_MODE 1695#define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode 1696 1697#undef TARGET_OFFLOAD_OPTIONS 1698#define TARGET_OFFLOAD_OPTIONS rs6000_offload_options 1699 1700#undef TARGET_C_MODE_FOR_SUFFIX 1701#define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix 1702 1703#undef TARGET_INVALID_BINARY_OP 1704#define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op 1705 1706#undef TARGET_OPTAB_SUPPORTED_P 1707#define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p 1708 1709#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1710#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 1711 1712#undef TARGET_COMPARE_VERSION_PRIORITY 1713#define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority 1714 1715#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY 1716#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ 1717 rs6000_generate_version_dispatcher_body 1718 1719#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER 1720#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ 1721 rs6000_get_function_versions_dispatcher 1722 1723#undef TARGET_OPTION_FUNCTION_VERSIONS 1724#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions 1725 1726#undef TARGET_HARD_REGNO_NREGS 1727#define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook 1728#undef TARGET_HARD_REGNO_MODE_OK 1729#define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok 1730 1731#undef TARGET_MODES_TIEABLE_P 1732#define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p 1733 1734#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED 1735#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ 1736 rs6000_hard_regno_call_part_clobbered 1737 1738#undef TARGET_SLOW_UNALIGNED_ACCESS 1739#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access 1740 1741#undef TARGET_CAN_CHANGE_MODE_CLASS 1742#define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class 1743 1744#undef TARGET_CONSTANT_ALIGNMENT 1745#define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment 1746 1747#undef TARGET_STARTING_FRAME_OFFSET 1748#define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset 1749 1750#undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P 1751#define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true 1752 1753#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME 1754#define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name 1755 1756#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P 1757#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \ 1758 rs6000_cannot_substitute_mem_equiv_p 1759 1760#undef TARGET_INVALID_CONVERSION 1761#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion 1762 1763#undef TARGET_NEED_IPA_FN_TARGET_INFO 1764#define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info 1765 1766#undef TARGET_UPDATE_IPA_FN_TARGET_INFO 1767#define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info 1768 1769 1770/* Processor table. */ 1771struct rs6000_ptt 1772{ 1773 const char *const name; /* Canonical processor name. */ 1774 const enum processor_type processor; /* Processor type enum value. */ 1775 const HOST_WIDE_INT target_enable; /* Target flags to enable. */ 1776}; 1777 1778static struct rs6000_ptt const processor_target_table[] = 1779{ 1780#define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS }, 1781#include "rs6000-cpus.def" 1782#undef RS6000_CPU 1783}; 1784 1785/* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the 1786 name is invalid. */ 1787 1788static int 1789rs6000_cpu_name_lookup (const char *name) 1790{ 1791 size_t i; 1792 1793 if (name != NULL) 1794 { 1795 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) 1796 if (! strcmp (name, processor_target_table[i].name)) 1797 return (int)i; 1798 } 1799 1800 return -1; 1801} 1802 1803 1804/* Return number of consecutive hard regs needed starting at reg REGNO 1805 to hold something of mode MODE. 1806 This is ordinarily the length in words of a value of mode MODE 1807 but can be less for certain modes in special long registers. 1808 1809 POWER and PowerPC GPRs hold 32 bits worth; 1810 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ 1811 1812static int 1813rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) 1814{ 1815 unsigned HOST_WIDE_INT reg_size; 1816 1817 /* 128-bit floating point usually takes 2 registers, unless it is IEEE 1818 128-bit floating point that can go in vector registers, which has VSX 1819 memory addressing. */ 1820 if (FP_REGNO_P (regno)) 1821 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode) 1822 ? UNITS_PER_VSX_WORD 1823 : UNITS_PER_FP_WORD); 1824 1825 else if (ALTIVEC_REGNO_P (regno)) 1826 reg_size = UNITS_PER_ALTIVEC_WORD; 1827 1828 else 1829 reg_size = UNITS_PER_WORD; 1830 1831 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; 1832} 1833 1834/* Value is 1 if hard register REGNO can hold a value of machine-mode 1835 MODE. */ 1836static int 1837rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) 1838{ 1839 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; 1840 1841 if (COMPLEX_MODE_P (mode)) 1842 mode = GET_MODE_INNER (mode); 1843 1844 /* Vector pair modes need even/odd VSX register pairs. Only allow vector 1845 registers. */ 1846 if (mode == OOmode) 1847 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0); 1848 1849 /* MMA accumulator modes need FPR registers divisible by 4. */ 1850 if (mode == XOmode) 1851 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0); 1852 1853 /* PTImode can only go in GPRs. Quad word memory operations require even/odd 1854 register combinations, and use PTImode where we need to deal with quad 1855 word memory operations. Don't allow quad words in the argument or frame 1856 pointer registers, just registers 0..31. */ 1857 if (mode == PTImode) 1858 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) 1859 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) 1860 && ((regno & 1) == 0)); 1861 1862 /* VSX registers that overlap the FPR registers are larger than for non-VSX 1863 implementations. Don't allow an item to be split between a FP register 1864 and an Altivec register. Allow TImode in all VSX registers if the user 1865 asked for it. */ 1866 if (TARGET_VSX && VSX_REGNO_P (regno) 1867 && (VECTOR_MEM_VSX_P (mode) 1868 || VECTOR_ALIGNMENT_P (mode) 1869 || reg_addr[mode].scalar_in_vmx_p 1870 || mode == TImode 1871 || (TARGET_VADDUQM && mode == V1TImode))) 1872 { 1873 if (FP_REGNO_P (regno)) 1874 return FP_REGNO_P (last_regno); 1875 1876 if (ALTIVEC_REGNO_P (regno)) 1877 { 1878 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) 1879 return 0; 1880 1881 return ALTIVEC_REGNO_P (last_regno); 1882 } 1883 } 1884 1885 /* The GPRs can hold any mode, but values bigger than one register 1886 cannot go past R31. */ 1887 if (INT_REGNO_P (regno)) 1888 return INT_REGNO_P (last_regno); 1889 1890 /* The float registers (except for VSX vector modes) can only hold floating 1891 modes and DImode. */ 1892 if (FP_REGNO_P (regno)) 1893 { 1894 if (VECTOR_ALIGNMENT_P (mode)) 1895 return false; 1896 1897 if (SCALAR_FLOAT_MODE_P (mode) 1898 && (mode != TDmode || (regno % 2) == 0) 1899 && FP_REGNO_P (last_regno)) 1900 return 1; 1901 1902 if (GET_MODE_CLASS (mode) == MODE_INT) 1903 { 1904 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) 1905 return 1; 1906 1907 if (TARGET_P8_VECTOR && (mode == SImode)) 1908 return 1; 1909 1910 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) 1911 return 1; 1912 } 1913 1914 return 0; 1915 } 1916 1917 /* The CR register can only hold CC modes. */ 1918 if (CR_REGNO_P (regno)) 1919 return GET_MODE_CLASS (mode) == MODE_CC; 1920 1921 if (CA_REGNO_P (regno)) 1922 return mode == Pmode || mode == SImode; 1923 1924 /* AltiVec only in AldyVec registers. */ 1925 if (ALTIVEC_REGNO_P (regno)) 1926 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) 1927 || mode == V1TImode); 1928 1929 /* We cannot put non-VSX TImode or PTImode anywhere except general register 1930 and it must be able to fit within the register set. */ 1931 1932 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; 1933} 1934 1935/* Implement TARGET_HARD_REGNO_NREGS. */ 1936 1937static unsigned int 1938rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode) 1939{ 1940 return rs6000_hard_regno_nregs[mode][regno]; 1941} 1942 1943/* Implement TARGET_HARD_REGNO_MODE_OK. */ 1944 1945static bool 1946rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 1947{ 1948 return rs6000_hard_regno_mode_ok_p[mode][regno]; 1949} 1950 1951/* Implement TARGET_MODES_TIEABLE_P. 1952 1953 PTImode cannot tie with other modes because PTImode is restricted to even 1954 GPR registers, and TImode can go in any GPR as well as VSX registers (PR 1955 57744). 1956 1957 Similarly, don't allow OOmode (vector pair, restricted to even VSX 1958 registers) or XOmode (vector quad, restricted to FPR registers divisible 1959 by 4) to tie with other modes. 1960 1961 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE 1962 128-bit floating point on VSX systems ties with other vectors. */ 1963 1964static bool 1965rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) 1966{ 1967 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode 1968 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) 1969 return mode1 == mode2; 1970 1971 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) 1972 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2); 1973 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2)) 1974 return false; 1975 1976 if (SCALAR_FLOAT_MODE_P (mode1)) 1977 return SCALAR_FLOAT_MODE_P (mode2); 1978 if (SCALAR_FLOAT_MODE_P (mode2)) 1979 return false; 1980 1981 if (GET_MODE_CLASS (mode1) == MODE_CC) 1982 return GET_MODE_CLASS (mode2) == MODE_CC; 1983 if (GET_MODE_CLASS (mode2) == MODE_CC) 1984 return false; 1985 1986 return true; 1987} 1988 1989/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */ 1990 1991static bool 1992rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, 1993 machine_mode mode) 1994{ 1995 if (TARGET_32BIT 1996 && TARGET_POWERPC64 1997 && GET_MODE_SIZE (mode) > 4 1998 && INT_REGNO_P (regno)) 1999 return true; 2000 2001 if (TARGET_VSX 2002 && FP_REGNO_P (regno) 2003 && GET_MODE_SIZE (mode) > 8 2004 && !FLOAT128_2REG_P (mode)) 2005 return true; 2006 2007 return false; 2008} 2009 2010/* Print interesting facts about registers. */ 2011static void 2012rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) 2013{ 2014 int r, m; 2015 2016 for (r = first_regno; r <= last_regno; ++r) 2017 { 2018 const char *comma = ""; 2019 int len; 2020 2021 if (first_regno == last_regno) 2022 fprintf (stderr, "%s:\t", reg_name); 2023 else 2024 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno); 2025 2026 len = 8; 2027 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2028 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r]) 2029 { 2030 if (len > 70) 2031 { 2032 fprintf (stderr, ",\n\t"); 2033 len = 8; 2034 comma = ""; 2035 } 2036 2037 if (rs6000_hard_regno_nregs[m][r] > 1) 2038 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m), 2039 rs6000_hard_regno_nregs[m][r]); 2040 else 2041 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m)); 2042 2043 comma = ", "; 2044 } 2045 2046 if (call_used_or_fixed_reg_p (r)) 2047 { 2048 if (len > 70) 2049 { 2050 fprintf (stderr, ",\n\t"); 2051 len = 8; 2052 comma = ""; 2053 } 2054 2055 len += fprintf (stderr, "%s%s", comma, "call-used"); 2056 comma = ", "; 2057 } 2058 2059 if (fixed_regs[r]) 2060 { 2061 if (len > 70) 2062 { 2063 fprintf (stderr, ",\n\t"); 2064 len = 8; 2065 comma = ""; 2066 } 2067 2068 len += fprintf (stderr, "%s%s", comma, "fixed"); 2069 comma = ", "; 2070 } 2071 2072 if (len > 70) 2073 { 2074 fprintf (stderr, ",\n\t"); 2075 comma = ""; 2076 } 2077 2078 len += fprintf (stderr, "%sreg-class = %s", comma, 2079 reg_class_names[(int)rs6000_regno_regclass[r]]); 2080 comma = ", "; 2081 2082 if (len > 70) 2083 { 2084 fprintf (stderr, ",\n\t"); 2085 comma = ""; 2086 } 2087 2088 fprintf (stderr, "%sregno = %d\n", comma, r); 2089 } 2090} 2091 2092static const char * 2093rs6000_debug_vector_unit (enum rs6000_vector v) 2094{ 2095 const char *ret; 2096 2097 switch (v) 2098 { 2099 case VECTOR_NONE: ret = "none"; break; 2100 case VECTOR_ALTIVEC: ret = "altivec"; break; 2101 case VECTOR_VSX: ret = "vsx"; break; 2102 case VECTOR_P8_VECTOR: ret = "p8_vector"; break; 2103 default: ret = "unknown"; break; 2104 } 2105 2106 return ret; 2107} 2108 2109/* Inner function printing just the address mask for a particular reload 2110 register class. */ 2111DEBUG_FUNCTION char * 2112rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) 2113{ 2114 static char ret[8]; 2115 char *p = ret; 2116 2117 if ((mask & RELOAD_REG_VALID) != 0) 2118 *p++ = 'v'; 2119 else if (keep_spaces) 2120 *p++ = ' '; 2121 2122 if ((mask & RELOAD_REG_MULTIPLE) != 0) 2123 *p++ = 'm'; 2124 else if (keep_spaces) 2125 *p++ = ' '; 2126 2127 if ((mask & RELOAD_REG_INDEXED) != 0) 2128 *p++ = 'i'; 2129 else if (keep_spaces) 2130 *p++ = ' '; 2131 2132 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) 2133 *p++ = 'O'; 2134 else if ((mask & RELOAD_REG_OFFSET) != 0) 2135 *p++ = 'o'; 2136 else if (keep_spaces) 2137 *p++ = ' '; 2138 2139 if ((mask & RELOAD_REG_PRE_INCDEC) != 0) 2140 *p++ = '+'; 2141 else if (keep_spaces) 2142 *p++ = ' '; 2143 2144 if ((mask & RELOAD_REG_PRE_MODIFY) != 0) 2145 *p++ = '+'; 2146 else if (keep_spaces) 2147 *p++ = ' '; 2148 2149 if ((mask & RELOAD_REG_AND_M16) != 0) 2150 *p++ = '&'; 2151 else if (keep_spaces) 2152 *p++ = ' '; 2153 2154 *p = '\0'; 2155 2156 return ret; 2157} 2158 2159/* Print the address masks in a human readble fashion. */ 2160DEBUG_FUNCTION void 2161rs6000_debug_print_mode (ssize_t m) 2162{ 2163 ssize_t rc; 2164 int spaces = 0; 2165 2166 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); 2167 for (rc = 0; rc < N_RELOAD_REG; rc++) 2168 fprintf (stderr, " %s: %s", reload_reg_map[rc].name, 2169 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); 2170 2171 if ((reg_addr[m].reload_store != CODE_FOR_nothing) 2172 || (reg_addr[m].reload_load != CODE_FOR_nothing)) 2173 { 2174 fprintf (stderr, "%*s Reload=%c%c", spaces, "", 2175 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', 2176 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); 2177 spaces = 0; 2178 } 2179 else 2180 spaces += strlen (" Reload=sl"); 2181 2182 if (reg_addr[m].scalar_in_vmx_p) 2183 { 2184 fprintf (stderr, "%*s Upper=y", spaces, ""); 2185 spaces = 0; 2186 } 2187 else 2188 spaces += strlen (" Upper=y"); 2189 2190 if (rs6000_vector_unit[m] != VECTOR_NONE 2191 || rs6000_vector_mem[m] != VECTOR_NONE) 2192 { 2193 fprintf (stderr, "%*s vector: arith=%-10s mem=%s", 2194 spaces, "", 2195 rs6000_debug_vector_unit (rs6000_vector_unit[m]), 2196 rs6000_debug_vector_unit (rs6000_vector_mem[m])); 2197 } 2198 2199 fputs ("\n", stderr); 2200} 2201 2202#define DEBUG_FMT_ID "%-32s= " 2203#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" 2204#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " 2205#define DEBUG_FMT_S DEBUG_FMT_ID "%s\n" 2206 2207/* Print various interesting information with -mdebug=reg. */ 2208static void 2209rs6000_debug_reg_global (void) 2210{ 2211 static const char *const tf[2] = { "false", "true" }; 2212 const char *nl = (const char *)0; 2213 int m; 2214 size_t m1, m2, v; 2215 char costly_num[20]; 2216 char nop_num[20]; 2217 char flags_buffer[40]; 2218 const char *costly_str; 2219 const char *nop_str; 2220 const char *trace_str; 2221 const char *abi_str; 2222 const char *cmodel_str; 2223 struct cl_target_option cl_opts; 2224 2225 /* Modes we want tieable information on. */ 2226 static const machine_mode print_tieable_modes[] = { 2227 QImode, 2228 HImode, 2229 SImode, 2230 DImode, 2231 TImode, 2232 PTImode, 2233 SFmode, 2234 DFmode, 2235 TFmode, 2236 IFmode, 2237 KFmode, 2238 SDmode, 2239 DDmode, 2240 TDmode, 2241 V2SImode, 2242 V2SFmode, 2243 V16QImode, 2244 V8HImode, 2245 V4SImode, 2246 V2DImode, 2247 V1TImode, 2248 V32QImode, 2249 V16HImode, 2250 V8SImode, 2251 V4DImode, 2252 V2TImode, 2253 V4SFmode, 2254 V2DFmode, 2255 V8SFmode, 2256 V4DFmode, 2257 OOmode, 2258 XOmode, 2259 CCmode, 2260 CCUNSmode, 2261 CCEQmode, 2262 CCFPmode, 2263 }; 2264 2265 /* Virtual regs we are interested in. */ 2266 const static struct { 2267 int regno; /* register number. */ 2268 const char *name; /* register name. */ 2269 } virtual_regs[] = { 2270 { STACK_POINTER_REGNUM, "stack pointer:" }, 2271 { TOC_REGNUM, "toc: " }, 2272 { STATIC_CHAIN_REGNUM, "static chain: " }, 2273 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, 2274 { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, 2275 { ARG_POINTER_REGNUM, "arg pointer: " }, 2276 { FRAME_POINTER_REGNUM, "frame pointer:" }, 2277 { FIRST_PSEUDO_REGISTER, "first pseudo: " }, 2278 { FIRST_VIRTUAL_REGISTER, "first virtual:" }, 2279 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, 2280 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, 2281 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, 2282 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, 2283 { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, 2284 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, 2285 { LAST_VIRTUAL_REGISTER, "last virtual: " }, 2286 }; 2287 2288 fputs ("\nHard register information:\n", stderr); 2289 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); 2290 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); 2291 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, 2292 LAST_ALTIVEC_REGNO, 2293 "vs"); 2294 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); 2295 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); 2296 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); 2297 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca"); 2298 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); 2299 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); 2300 2301 fputs ("\nVirtual/stack/frame registers:\n", stderr); 2302 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) 2303 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); 2304 2305 fprintf (stderr, 2306 "\n" 2307 "d reg_class = %s\n" 2308 "f reg_class = %s\n" 2309 "v reg_class = %s\n" 2310 "wa reg_class = %s\n" 2311 "we reg_class = %s\n" 2312 "wr reg_class = %s\n" 2313 "wx reg_class = %s\n" 2314 "wA reg_class = %s\n" 2315 "\n", 2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], 2317 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], 2318 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], 2319 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], 2320 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], 2321 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], 2322 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], 2323 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]); 2324 2325 nl = "\n"; 2326 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2327 rs6000_debug_print_mode (m); 2328 2329 fputs ("\n", stderr); 2330 2331 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) 2332 { 2333 machine_mode mode1 = print_tieable_modes[m1]; 2334 bool first_time = true; 2335 2336 nl = (const char *)0; 2337 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) 2338 { 2339 machine_mode mode2 = print_tieable_modes[m2]; 2340 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2)) 2341 { 2342 if (first_time) 2343 { 2344 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); 2345 nl = "\n"; 2346 first_time = false; 2347 } 2348 2349 fprintf (stderr, " %s", GET_MODE_NAME (mode2)); 2350 } 2351 } 2352 2353 if (!first_time) 2354 fputs ("\n", stderr); 2355 } 2356 2357 if (nl) 2358 fputs (nl, stderr); 2359 2360 if (rs6000_recip_control) 2361 { 2362 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); 2363 2364 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2365 if (rs6000_recip_bits[m]) 2366 { 2367 fprintf (stderr, 2368 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", 2369 GET_MODE_NAME (m), 2370 (RS6000_RECIP_AUTO_RE_P (m) 2371 ? "auto" 2372 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), 2373 (RS6000_RECIP_AUTO_RSQRTE_P (m) 2374 ? "auto" 2375 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); 2376 } 2377 2378 fputs ("\n", stderr); 2379 } 2380 2381 if (rs6000_cpu_index >= 0) 2382 { 2383 const char *name = processor_target_table[rs6000_cpu_index].name; 2384 HOST_WIDE_INT flags 2385 = processor_target_table[rs6000_cpu_index].target_enable; 2386 2387 sprintf (flags_buffer, "-mcpu=%s flags", name); 2388 rs6000_print_isa_options (stderr, 0, flags_buffer, flags); 2389 } 2390 else 2391 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>"); 2392 2393 if (rs6000_tune_index >= 0) 2394 { 2395 const char *name = processor_target_table[rs6000_tune_index].name; 2396 HOST_WIDE_INT flags 2397 = processor_target_table[rs6000_tune_index].target_enable; 2398 2399 sprintf (flags_buffer, "-mtune=%s flags", name); 2400 rs6000_print_isa_options (stderr, 0, flags_buffer, flags); 2401 } 2402 else 2403 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>"); 2404 2405 cl_target_option_save (&cl_opts, &global_options, &global_options_set); 2406 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags", 2407 rs6000_isa_flags); 2408 2409 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit", 2410 rs6000_isa_flags_explicit); 2411 2412 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask", 2413 rs6000_builtin_mask); 2414 2415 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); 2416 2417 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default", 2418 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>"); 2419 2420 switch (rs6000_sched_costly_dep) 2421 { 2422 case max_dep_latency: 2423 costly_str = "max_dep_latency"; 2424 break; 2425 2426 case no_dep_costly: 2427 costly_str = "no_dep_costly"; 2428 break; 2429 2430 case all_deps_costly: 2431 costly_str = "all_deps_costly"; 2432 break; 2433 2434 case true_store_to_load_dep_costly: 2435 costly_str = "true_store_to_load_dep_costly"; 2436 break; 2437 2438 case store_to_load_dep_costly: 2439 costly_str = "store_to_load_dep_costly"; 2440 break; 2441 2442 default: 2443 costly_str = costly_num; 2444 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); 2445 break; 2446 } 2447 2448 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str); 2449 2450 switch (rs6000_sched_insert_nops) 2451 { 2452 case sched_finish_regroup_exact: 2453 nop_str = "sched_finish_regroup_exact"; 2454 break; 2455 2456 case sched_finish_pad_groups: 2457 nop_str = "sched_finish_pad_groups"; 2458 break; 2459 2460 case sched_finish_none: 2461 nop_str = "sched_finish_none"; 2462 break; 2463 2464 default: 2465 nop_str = nop_num; 2466 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); 2467 break; 2468 } 2469 2470 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str); 2471 2472 switch (rs6000_sdata) 2473 { 2474 default: 2475 case SDATA_NONE: 2476 break; 2477 2478 case SDATA_DATA: 2479 fprintf (stderr, DEBUG_FMT_S, "sdata", "data"); 2480 break; 2481 2482 case SDATA_SYSV: 2483 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv"); 2484 break; 2485 2486 case SDATA_EABI: 2487 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi"); 2488 break; 2489 2490 } 2491 2492 switch (rs6000_traceback) 2493 { 2494 case traceback_default: trace_str = "default"; break; 2495 case traceback_none: trace_str = "none"; break; 2496 case traceback_part: trace_str = "part"; break; 2497 case traceback_full: trace_str = "full"; break; 2498 default: trace_str = "unknown"; break; 2499 } 2500 2501 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str); 2502 2503 switch (rs6000_current_cmodel) 2504 { 2505 case CMODEL_SMALL: cmodel_str = "small"; break; 2506 case CMODEL_MEDIUM: cmodel_str = "medium"; break; 2507 case CMODEL_LARGE: cmodel_str = "large"; break; 2508 default: cmodel_str = "unknown"; break; 2509 } 2510 2511 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str); 2512 2513 switch (rs6000_current_abi) 2514 { 2515 case ABI_NONE: abi_str = "none"; break; 2516 case ABI_AIX: abi_str = "aix"; break; 2517 case ABI_ELFv2: abi_str = "ELFv2"; break; 2518 case ABI_V4: abi_str = "V4"; break; 2519 case ABI_DARWIN: abi_str = "darwin"; break; 2520 default: abi_str = "unknown"; break; 2521 } 2522 2523 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str); 2524 2525 if (rs6000_altivec_abi) 2526 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true"); 2527 2528 if (rs6000_aix_extabi) 2529 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true"); 2530 2531 if (rs6000_darwin64_abi) 2532 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true"); 2533 2534 fprintf (stderr, DEBUG_FMT_S, "soft_float", 2535 (TARGET_SOFT_FLOAT ? "true" : "false")); 2536 2537 if (TARGET_LINK_STACK) 2538 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); 2539 2540 if (TARGET_P8_FUSION) 2541 { 2542 char options[80]; 2543 2544 strcpy (options, "power8"); 2545 if (TARGET_P8_FUSION_SIGN) 2546 strcat (options, ", sign"); 2547 2548 fprintf (stderr, DEBUG_FMT_S, "fusion", options); 2549 } 2550 2551 fprintf (stderr, DEBUG_FMT_S, "plt-format", 2552 TARGET_SECURE_PLT ? "secure" : "bss"); 2553 fprintf (stderr, DEBUG_FMT_S, "struct-return", 2554 aix_struct_return ? "aix" : "sysv"); 2555 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]); 2556 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]); 2557 fprintf (stderr, DEBUG_FMT_S, "align_branch", 2558 tf[!!rs6000_align_branch_targets]); 2559 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); 2560 fprintf (stderr, DEBUG_FMT_D, "long_double_size", 2561 rs6000_long_double_type_size); 2562 if (rs6000_long_double_type_size > 64) 2563 { 2564 fprintf (stderr, DEBUG_FMT_S, "long double type", 2565 TARGET_IEEEQUAD ? "IEEE" : "IBM"); 2566 fprintf (stderr, DEBUG_FMT_S, "default long double type", 2567 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM"); 2568 } 2569 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", 2570 (int)rs6000_sched_restricted_insns_priority); 2571 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", 2572 (int)END_BUILTINS); 2573 2574 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX", 2575 (int)TARGET_FLOAT128_ENABLE_TYPE); 2576 2577 if (TARGET_VSX) 2578 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", 2579 (int)VECTOR_ELEMENT_SCALAR_64BIT); 2580 2581 if (TARGET_DIRECT_MOVE_128) 2582 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", 2583 (int)VECTOR_ELEMENT_MFVSRLD_64BIT); 2584} 2585 2586 2587/* Update the addr mask bits in reg_addr to help secondary reload and go if 2588 legitimate address support to figure out the appropriate addressing to 2589 use. */ 2590 2591static void 2592rs6000_setup_reg_addr_masks (void) 2593{ 2594 ssize_t rc, reg, m, nregs; 2595 addr_mask_type any_addr_mask, addr_mask; 2596 2597 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2598 { 2599 machine_mode m2 = (machine_mode) m; 2600 bool complex_p = false; 2601 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); 2602 size_t msize; 2603 2604 if (COMPLEX_MODE_P (m2)) 2605 { 2606 complex_p = true; 2607 m2 = GET_MODE_INNER (m2); 2608 } 2609 2610 msize = GET_MODE_SIZE (m2); 2611 2612 /* SDmode is special in that we want to access it only via REG+REG 2613 addressing on power7 and above, since we want to use the LFIWZX and 2614 STFIWZX instructions to load it. */ 2615 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); 2616 2617 any_addr_mask = 0; 2618 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) 2619 { 2620 addr_mask = 0; 2621 reg = reload_reg_map[rc].reg; 2622 2623 /* Can mode values go in the GPR/FPR/Altivec registers? */ 2624 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) 2625 { 2626 bool small_int_vsx_p = (small_int_p 2627 && (rc == RELOAD_REG_FPR 2628 || rc == RELOAD_REG_VMX)); 2629 2630 nregs = rs6000_hard_regno_nregs[m][reg]; 2631 addr_mask |= RELOAD_REG_VALID; 2632 2633 /* Indicate if the mode takes more than 1 physical register. If 2634 it takes a single register, indicate it can do REG+REG 2635 addressing. Small integers in VSX registers can only do 2636 REG+REG addressing. */ 2637 if (small_int_vsx_p) 2638 addr_mask |= RELOAD_REG_INDEXED; 2639 else if (nregs > 1 || m == BLKmode || complex_p) 2640 addr_mask |= RELOAD_REG_MULTIPLE; 2641 else 2642 addr_mask |= RELOAD_REG_INDEXED; 2643 2644 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY 2645 addressing. If we allow scalars into Altivec registers, 2646 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. 2647 2648 For VSX systems, we don't allow update addressing for 2649 DFmode/SFmode if those registers can go in both the 2650 traditional floating point registers and Altivec registers. 2651 The load/store instructions for the Altivec registers do not 2652 have update forms. If we allowed update addressing, it seems 2653 to break IV-OPT code using floating point if the index type is 2654 int instead of long (PR target/81550 and target/84042). */ 2655 2656 if (TARGET_UPDATE 2657 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) 2658 && msize <= 8 2659 && !VECTOR_MODE_P (m2) 2660 && !VECTOR_ALIGNMENT_P (m2) 2661 && !complex_p 2662 && (m != E_DFmode || !TARGET_VSX) 2663 && (m != E_SFmode || !TARGET_P8_VECTOR) 2664 && !small_int_vsx_p) 2665 { 2666 addr_mask |= RELOAD_REG_PRE_INCDEC; 2667 2668 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that 2669 we don't allow PRE_MODIFY for some multi-register 2670 operations. */ 2671 switch (m) 2672 { 2673 default: 2674 addr_mask |= RELOAD_REG_PRE_MODIFY; 2675 break; 2676 2677 case E_DImode: 2678 if (TARGET_POWERPC64) 2679 addr_mask |= RELOAD_REG_PRE_MODIFY; 2680 break; 2681 2682 case E_DFmode: 2683 case E_DDmode: 2684 if (TARGET_HARD_FLOAT) 2685 addr_mask |= RELOAD_REG_PRE_MODIFY; 2686 break; 2687 } 2688 } 2689 } 2690 2691 /* GPR and FPR registers can do REG+OFFSET addressing, except 2692 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing 2693 for 64-bit scalars and 32-bit SFmode to altivec registers. */ 2694 if ((addr_mask != 0) && !indexed_only_p 2695 && msize <= 8 2696 && (rc == RELOAD_REG_GPR 2697 || ((msize == 8 || m2 == SFmode) 2698 && (rc == RELOAD_REG_FPR 2699 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR))))) 2700 addr_mask |= RELOAD_REG_OFFSET; 2701 2702 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 2703 instructions are enabled. The offset for 128-bit VSX registers is 2704 only 12-bits. While GPRs can handle the full offset range, VSX 2705 registers can only handle the restricted range. */ 2706 else if ((addr_mask != 0) && !indexed_only_p 2707 && msize == 16 && TARGET_P9_VECTOR 2708 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) 2709 || (m2 == TImode && TARGET_VSX))) 2710 { 2711 addr_mask |= RELOAD_REG_OFFSET; 2712 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) 2713 addr_mask |= RELOAD_REG_QUAD_OFFSET; 2714 } 2715 2716 /* Vector pairs can do both indexed and offset loads if the 2717 instructions are enabled, otherwise they can only do offset loads 2718 since it will be broken into two vector moves. Vector quads can 2719 only do offset loads. */ 2720 else if ((addr_mask != 0) && TARGET_MMA 2721 && (m2 == OOmode || m2 == XOmode)) 2722 { 2723 addr_mask |= RELOAD_REG_OFFSET; 2724 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) 2725 { 2726 addr_mask |= RELOAD_REG_QUAD_OFFSET; 2727 if (m2 == OOmode) 2728 addr_mask |= RELOAD_REG_INDEXED; 2729 } 2730 } 2731 2732 /* VMX registers can do (REG & -16) and ((REG+REG) & -16) 2733 addressing on 128-bit types. */ 2734 if (rc == RELOAD_REG_VMX && msize == 16 2735 && (addr_mask & RELOAD_REG_VALID) != 0) 2736 addr_mask |= RELOAD_REG_AND_M16; 2737 2738 reg_addr[m].addr_mask[rc] = addr_mask; 2739 any_addr_mask |= addr_mask; 2740 } 2741 2742 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; 2743 } 2744} 2745 2746 2747/* Initialize the various global tables that are based on register size. */ 2748static void 2749rs6000_init_hard_regno_mode_ok (bool global_init_p) 2750{ 2751 ssize_t r, m, c; 2752 int align64; 2753 int align32; 2754 2755 /* Precalculate REGNO_REG_CLASS. */ 2756 rs6000_regno_regclass[0] = GENERAL_REGS; 2757 for (r = 1; r < 32; ++r) 2758 rs6000_regno_regclass[r] = BASE_REGS; 2759 2760 for (r = 32; r < 64; ++r) 2761 rs6000_regno_regclass[r] = FLOAT_REGS; 2762 2763 for (r = 64; HARD_REGISTER_NUM_P (r); ++r) 2764 rs6000_regno_regclass[r] = NO_REGS; 2765 2766 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r) 2767 rs6000_regno_regclass[r] = ALTIVEC_REGS; 2768 2769 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS; 2770 for (r = CR1_REGNO; r <= CR7_REGNO; ++r) 2771 rs6000_regno_regclass[r] = CR_REGS; 2772 2773 rs6000_regno_regclass[LR_REGNO] = LINK_REGS; 2774 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; 2775 rs6000_regno_regclass[CA_REGNO] = NO_REGS; 2776 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS; 2777 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS; 2778 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; 2779 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; 2780 2781 /* Precalculate register class to simpler reload register class. We don't 2782 need all of the register classes that are combinations of different 2783 classes, just the simple ones that have constraint letters. */ 2784 for (c = 0; c < N_REG_CLASSES; c++) 2785 reg_class_to_reg_type[c] = NO_REG_TYPE; 2786 2787 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; 2788 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; 2789 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; 2790 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; 2791 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; 2792 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; 2793 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; 2794 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; 2795 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; 2796 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; 2797 2798 if (TARGET_VSX) 2799 { 2800 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; 2801 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; 2802 } 2803 else 2804 { 2805 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; 2806 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; 2807 } 2808 2809 /* Precalculate the valid memory formats as well as the vector information, 2810 this must be set up before the rs6000_hard_regno_nregs_internal calls 2811 below. */ 2812 gcc_assert ((int)VECTOR_NONE == 0); 2813 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); 2814 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem)); 2815 2816 gcc_assert ((int)CODE_FOR_nothing == 0); 2817 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); 2818 2819 gcc_assert ((int)NO_REGS == 0); 2820 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); 2821 2822 /* The VSX hardware allows native alignment for vectors, but control whether the compiler 2823 believes it can use native alignment or still uses 128-bit alignment. */ 2824 if (TARGET_VSX && !TARGET_VSX_ALIGN_128) 2825 { 2826 align64 = 64; 2827 align32 = 32; 2828 } 2829 else 2830 { 2831 align64 = 128; 2832 align32 = 128; 2833 } 2834 2835 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so 2836 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */ 2837 if (TARGET_FLOAT128_TYPE) 2838 { 2839 rs6000_vector_mem[KFmode] = VECTOR_VSX; 2840 rs6000_vector_align[KFmode] = 128; 2841 2842 if (FLOAT128_IEEE_P (TFmode)) 2843 { 2844 rs6000_vector_mem[TFmode] = VECTOR_VSX; 2845 rs6000_vector_align[TFmode] = 128; 2846 } 2847 } 2848 2849 /* V2DF mode, VSX only. */ 2850 if (TARGET_VSX) 2851 { 2852 rs6000_vector_unit[V2DFmode] = VECTOR_VSX; 2853 rs6000_vector_mem[V2DFmode] = VECTOR_VSX; 2854 rs6000_vector_align[V2DFmode] = align64; 2855 } 2856 2857 /* V4SF mode, either VSX or Altivec. */ 2858 if (TARGET_VSX) 2859 { 2860 rs6000_vector_unit[V4SFmode] = VECTOR_VSX; 2861 rs6000_vector_mem[V4SFmode] = VECTOR_VSX; 2862 rs6000_vector_align[V4SFmode] = align32; 2863 } 2864 else if (TARGET_ALTIVEC) 2865 { 2866 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; 2867 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; 2868 rs6000_vector_align[V4SFmode] = align32; 2869 } 2870 2871 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads 2872 and stores. */ 2873 if (TARGET_ALTIVEC) 2874 { 2875 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; 2876 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; 2877 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; 2878 rs6000_vector_align[V4SImode] = align32; 2879 rs6000_vector_align[V8HImode] = align32; 2880 rs6000_vector_align[V16QImode] = align32; 2881 2882 if (TARGET_VSX) 2883 { 2884 rs6000_vector_mem[V4SImode] = VECTOR_VSX; 2885 rs6000_vector_mem[V8HImode] = VECTOR_VSX; 2886 rs6000_vector_mem[V16QImode] = VECTOR_VSX; 2887 } 2888 else 2889 { 2890 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; 2891 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; 2892 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; 2893 } 2894 } 2895 2896 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to 2897 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ 2898 if (TARGET_VSX) 2899 { 2900 rs6000_vector_mem[V2DImode] = VECTOR_VSX; 2901 rs6000_vector_unit[V2DImode] 2902 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; 2903 rs6000_vector_align[V2DImode] = align64; 2904 2905 rs6000_vector_mem[V1TImode] = VECTOR_VSX; 2906 rs6000_vector_unit[V1TImode] 2907 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; 2908 rs6000_vector_align[V1TImode] = 128; 2909 } 2910 2911 /* DFmode, see if we want to use the VSX unit. Memory is handled 2912 differently, so don't set rs6000_vector_mem. */ 2913 if (TARGET_VSX) 2914 { 2915 rs6000_vector_unit[DFmode] = VECTOR_VSX; 2916 rs6000_vector_align[DFmode] = 64; 2917 } 2918 2919 /* SFmode, see if we want to use the VSX unit. */ 2920 if (TARGET_P8_VECTOR) 2921 { 2922 rs6000_vector_unit[SFmode] = VECTOR_VSX; 2923 rs6000_vector_align[SFmode] = 32; 2924 } 2925 2926 /* Allow TImode in VSX register and set the VSX memory macros. */ 2927 if (TARGET_VSX) 2928 { 2929 rs6000_vector_mem[TImode] = VECTOR_VSX; 2930 rs6000_vector_align[TImode] = align64; 2931 } 2932 2933 /* Add support for vector pairs and vector quad registers. */ 2934 if (TARGET_MMA) 2935 { 2936 rs6000_vector_unit[OOmode] = VECTOR_NONE; 2937 rs6000_vector_mem[OOmode] = VECTOR_VSX; 2938 rs6000_vector_align[OOmode] = 256; 2939 2940 rs6000_vector_unit[XOmode] = VECTOR_NONE; 2941 rs6000_vector_mem[XOmode] = VECTOR_VSX; 2942 rs6000_vector_align[XOmode] = 512; 2943 } 2944 2945 /* Register class constraints for the constraints that depend on compile 2946 switches. When the VSX code was added, different constraints were added 2947 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all 2948 of the VSX registers are used. The register classes for scalar floating 2949 point types is set, based on whether we allow that type into the upper 2950 (Altivec) registers. GCC has register classes to target the Altivec 2951 registers for load/store operations, to select using a VSX memory 2952 operation instead of the traditional floating point operation. The 2953 constraints are: 2954 2955 d - Register class to use with traditional DFmode instructions. 2956 f - Register class to use with traditional SFmode instructions. 2957 v - Altivec register. 2958 wa - Any VSX register. 2959 wc - Reserved to represent individual CR bits (used in LLVM). 2960 wn - always NO_REGS. 2961 wr - GPR if 64-bit mode is permitted. 2962 wx - Float register if we can do 32-bit int stores. */ 2963 2964 if (TARGET_HARD_FLOAT) 2965 { 2966 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */ 2967 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */ 2968 } 2969 2970 if (TARGET_VSX) 2971 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; 2972 2973 /* Add conditional constraints based on various options, to allow us to 2974 collapse multiple insn patterns. */ 2975 if (TARGET_ALTIVEC) 2976 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; 2977 2978 if (TARGET_POWERPC64) 2979 { 2980 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; 2981 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; 2982 } 2983 2984 if (TARGET_STFIWX) 2985 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */ 2986 2987 /* Support for new direct moves (ISA 3.0 + 64bit). */ 2988 if (TARGET_DIRECT_MOVE_128) 2989 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; 2990 2991 /* Set up the reload helper and direct move functions. */ 2992 if (TARGET_VSX || TARGET_ALTIVEC) 2993 { 2994 if (TARGET_64BIT) 2995 { 2996 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; 2997 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; 2998 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; 2999 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; 3000 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; 3001 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; 3002 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; 3003 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; 3004 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; 3005 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; 3006 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; 3007 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; 3008 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; 3009 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; 3010 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; 3011 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; 3012 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; 3013 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; 3014 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; 3015 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; 3016 3017 if (FLOAT128_VECTOR_P (KFmode)) 3018 { 3019 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store; 3020 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load; 3021 } 3022 3023 if (FLOAT128_VECTOR_P (TFmode)) 3024 { 3025 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store; 3026 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; 3027 } 3028 3029 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are 3030 available. */ 3031 if (TARGET_NO_SDMODE_STACK) 3032 { 3033 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; 3034 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; 3035 } 3036 3037 if (TARGET_VSX) 3038 { 3039 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; 3040 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; 3041 } 3042 3043 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) 3044 { 3045 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; 3046 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; 3047 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; 3048 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; 3049 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; 3050 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; 3051 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; 3052 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; 3053 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; 3054 3055 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; 3056 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; 3057 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; 3058 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; 3059 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; 3060 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; 3061 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; 3062 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; 3063 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; 3064 3065 if (FLOAT128_VECTOR_P (KFmode)) 3066 { 3067 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf; 3068 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf; 3069 } 3070 3071 if (FLOAT128_VECTOR_P (TFmode)) 3072 { 3073 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf; 3074 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf; 3075 } 3076 3077 if (TARGET_MMA) 3078 { 3079 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store; 3080 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load; 3081 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store; 3082 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load; 3083 } 3084 } 3085 } 3086 else 3087 { 3088 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; 3089 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; 3090 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; 3091 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; 3092 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; 3093 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; 3094 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; 3095 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; 3096 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; 3097 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; 3098 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; 3099 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; 3100 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; 3101 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; 3102 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; 3103 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; 3104 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; 3105 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; 3106 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; 3107 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; 3108 3109 if (FLOAT128_VECTOR_P (KFmode)) 3110 { 3111 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store; 3112 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load; 3113 } 3114 3115 if (FLOAT128_IEEE_P (TFmode)) 3116 { 3117 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store; 3118 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; 3119 } 3120 3121 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are 3122 available. */ 3123 if (TARGET_NO_SDMODE_STACK) 3124 { 3125 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; 3126 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; 3127 } 3128 3129 if (TARGET_VSX) 3130 { 3131 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; 3132 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; 3133 } 3134 3135 if (TARGET_DIRECT_MOVE) 3136 { 3137 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; 3138 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; 3139 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; 3140 } 3141 } 3142 3143 reg_addr[DFmode].scalar_in_vmx_p = true; 3144 reg_addr[DImode].scalar_in_vmx_p = true; 3145 3146 if (TARGET_P8_VECTOR) 3147 { 3148 reg_addr[SFmode].scalar_in_vmx_p = true; 3149 reg_addr[SImode].scalar_in_vmx_p = true; 3150 3151 if (TARGET_P9_VECTOR) 3152 { 3153 reg_addr[HImode].scalar_in_vmx_p = true; 3154 reg_addr[QImode].scalar_in_vmx_p = true; 3155 } 3156 } 3157 } 3158 3159 /* Precalculate HARD_REGNO_NREGS. */ 3160 for (r = 0; HARD_REGISTER_NUM_P (r); ++r) 3161 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3162 rs6000_hard_regno_nregs[m][r] 3163 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m); 3164 3165 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */ 3166 for (r = 0; HARD_REGISTER_NUM_P (r); ++r) 3167 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3168 rs6000_hard_regno_mode_ok_p[m][r] 3169 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m); 3170 3171 /* Precalculate CLASS_MAX_NREGS sizes. */ 3172 for (c = 0; c < LIM_REG_CLASSES; ++c) 3173 { 3174 int reg_size; 3175 3176 if (TARGET_VSX && VSX_REG_CLASS_P (c)) 3177 reg_size = UNITS_PER_VSX_WORD; 3178 3179 else if (c == ALTIVEC_REGS) 3180 reg_size = UNITS_PER_ALTIVEC_WORD; 3181 3182 else if (c == FLOAT_REGS) 3183 reg_size = UNITS_PER_FP_WORD; 3184 3185 else 3186 reg_size = UNITS_PER_WORD; 3187 3188 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3189 { 3190 machine_mode m2 = (machine_mode)m; 3191 int reg_size2 = reg_size; 3192 3193 /* TDmode & IBM 128-bit floating point always takes 2 registers, even 3194 in VSX. */ 3195 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m)) 3196 reg_size2 = UNITS_PER_FP_WORD; 3197 3198 rs6000_class_max_nregs[m][c] 3199 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2; 3200 } 3201 } 3202 3203 /* Calculate which modes to automatically generate code to use a the 3204 reciprocal divide and square root instructions. In the future, possibly 3205 automatically generate the instructions even if the user did not specify 3206 -mrecip. The older machines double precision reciprocal sqrt estimate is 3207 not accurate enough. */ 3208 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); 3209 if (TARGET_FRES) 3210 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; 3211 if (TARGET_FRE) 3212 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; 3213 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) 3214 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; 3215 if (VECTOR_UNIT_VSX_P (V2DFmode)) 3216 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; 3217 3218 if (TARGET_FRSQRTES) 3219 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3220 if (TARGET_FRSQRTE) 3221 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3222 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) 3223 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3224 if (VECTOR_UNIT_VSX_P (V2DFmode)) 3225 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3226 3227 if (rs6000_recip_control) 3228 { 3229 if (!flag_finite_math_only) 3230 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math", 3231 "-ffast-math"); 3232 if (flag_trapping_math) 3233 warning (0, "%qs requires %qs or %qs", "-mrecip", 3234 "-fno-trapping-math", "-ffast-math"); 3235 if (!flag_reciprocal_math) 3236 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math", 3237 "-ffast-math"); 3238 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) 3239 { 3240 if (RS6000_RECIP_HAVE_RE_P (SFmode) 3241 && (rs6000_recip_control & RECIP_SF_DIV) != 0) 3242 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3243 3244 if (RS6000_RECIP_HAVE_RE_P (DFmode) 3245 && (rs6000_recip_control & RECIP_DF_DIV) != 0) 3246 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3247 3248 if (RS6000_RECIP_HAVE_RE_P (V4SFmode) 3249 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) 3250 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3251 3252 if (RS6000_RECIP_HAVE_RE_P (V2DFmode) 3253 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) 3254 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3255 3256 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) 3257 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) 3258 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3259 3260 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) 3261 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) 3262 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3263 3264 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) 3265 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) 3266 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3267 3268 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) 3269 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) 3270 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3271 } 3272 } 3273 3274 /* Update the addr mask bits in reg_addr to help secondary reload and go if 3275 legitimate address support to figure out the appropriate addressing to 3276 use. */ 3277 rs6000_setup_reg_addr_masks (); 3278 3279 if (global_init_p || TARGET_DEBUG_TARGET) 3280 { 3281 if (TARGET_DEBUG_REG) 3282 rs6000_debug_reg_global (); 3283 3284 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) 3285 fprintf (stderr, 3286 "SImode variable mult cost = %d\n" 3287 "SImode constant mult cost = %d\n" 3288 "SImode short constant mult cost = %d\n" 3289 "DImode multipliciation cost = %d\n" 3290 "SImode division cost = %d\n" 3291 "DImode division cost = %d\n" 3292 "Simple fp operation cost = %d\n" 3293 "DFmode multiplication cost = %d\n" 3294 "SFmode division cost = %d\n" 3295 "DFmode division cost = %d\n" 3296 "cache line size = %d\n" 3297 "l1 cache size = %d\n" 3298 "l2 cache size = %d\n" 3299 "simultaneous prefetches = %d\n" 3300 "\n", 3301 rs6000_cost->mulsi, 3302 rs6000_cost->mulsi_const, 3303 rs6000_cost->mulsi_const9, 3304 rs6000_cost->muldi, 3305 rs6000_cost->divsi, 3306 rs6000_cost->divdi, 3307 rs6000_cost->fp, 3308 rs6000_cost->dmul, 3309 rs6000_cost->sdiv, 3310 rs6000_cost->ddiv, 3311 rs6000_cost->cache_line_size, 3312 rs6000_cost->l1_cache_size, 3313 rs6000_cost->l2_cache_size, 3314 rs6000_cost->simultaneous_prefetches); 3315 } 3316} 3317 3318#if TARGET_MACHO 3319/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */ 3320 3321static void 3322darwin_rs6000_override_options (void) 3323{ 3324 /* The Darwin ABI always includes AltiVec, can't be (validly) turned 3325 off. */ 3326 rs6000_altivec_abi = 1; 3327 TARGET_ALTIVEC_VRSAVE = 1; 3328 rs6000_current_abi = ABI_DARWIN; 3329 3330 if (DEFAULT_ABI == ABI_DARWIN 3331 && TARGET_64BIT) 3332 darwin_one_byte_bool = 1; 3333 3334 if (TARGET_64BIT && ! TARGET_POWERPC64) 3335 { 3336 rs6000_isa_flags |= OPTION_MASK_POWERPC64; 3337 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64"); 3338 } 3339 3340 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall 3341 optimisation, and will not work with the most generic case (where the 3342 symbol is undefined external, but there is no symbl stub). */ 3343 if (TARGET_64BIT) 3344 rs6000_default_long_calls = 0; 3345 3346 /* ld_classic is (so far) still used for kernel (static) code, and supports 3347 the JBSR longcall / branch islands. */ 3348 if (flag_mkernel) 3349 { 3350 rs6000_default_long_calls = 1; 3351 3352 /* Allow a kext author to do -mkernel -mhard-float. */ 3353 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)) 3354 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; 3355 } 3356 3357 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes 3358 Altivec. */ 3359 if (!flag_mkernel && !flag_apple_kext 3360 && TARGET_64BIT 3361 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)) 3362 rs6000_isa_flags |= OPTION_MASK_ALTIVEC; 3363 3364 /* Unless the user (not the configurer) has explicitly overridden 3365 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to 3366 G4 unless targeting the kernel. */ 3367 if (!flag_mkernel 3368 && !flag_apple_kext 3369 && strverscmp (darwin_macosx_version_min, "10.5") >= 0 3370 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC) 3371 && ! OPTION_SET_P (rs6000_cpu_index)) 3372 { 3373 rs6000_isa_flags |= OPTION_MASK_ALTIVEC; 3374 } 3375} 3376#endif 3377 3378/* If not otherwise specified by a target, make 'long double' equivalent to 3379 'double'. */ 3380 3381#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE 3382#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 3383#endif 3384 3385/* Return the builtin mask of the various options used that could affect which 3386 builtins were used. In the past we used target_flags, but we've run out of 3387 bits, and some options are no longer in target_flags. */ 3388 3389HOST_WIDE_INT 3390rs6000_builtin_mask_calculate (void) 3391{ 3392 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) 3393 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0) 3394 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) 3395 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) 3396 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) 3397 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) 3398 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) 3399 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) 3400 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) 3401 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) 3402 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) 3403 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) 3404 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) 3405 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) 3406 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0) 3407 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) 3408 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) 3409 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) 3410 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) 3411 | ((TARGET_LONG_DOUBLE_128 3412 && TARGET_HARD_FLOAT 3413 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0) 3414 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0) 3415 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0) 3416 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0) 3417 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0)); 3418} 3419 3420/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered 3421 to clobber the XER[CA] bit because clobbering that bit without telling 3422 the compiler worked just fine with versions of GCC before GCC 5, and 3423 breaking a lot of older code in ways that are hard to track down is 3424 not such a great idea. */ 3425 3426static rtx_insn * 3427rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/, 3428 vec<machine_mode> & /*input_modes*/, 3429 vec<const char *> & /*constraints*/, vec<rtx> &clobbers, 3430 HARD_REG_SET &clobbered_regs, location_t /*loc*/) 3431{ 3432 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); 3433 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); 3434 return NULL; 3435} 3436 3437/* This target function is similar to the hook TARGET_OPTION_OVERRIDE 3438 but is called when the optimize level is changed via an attribute or 3439 pragma or when it is reset at the end of the code affected by the 3440 attribute or pragma. It is not called at the beginning of compilation 3441 when TARGET_OPTION_OVERRIDE is called so if you want to perform these 3442 actions then, you should have TARGET_OPTION_OVERRIDE call 3443 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */ 3444 3445static void 3446rs6000_override_options_after_change (void) 3447{ 3448 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and 3449 turns -frename-registers on. */ 3450 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops) 3451 || (OPTION_SET_P (flag_unroll_all_loops) 3452 && flag_unroll_all_loops)) 3453 { 3454 if (!OPTION_SET_P (unroll_only_small_loops)) 3455 unroll_only_small_loops = 0; 3456 if (!OPTION_SET_P (flag_rename_registers)) 3457 flag_rename_registers = 1; 3458 if (!OPTION_SET_P (flag_cunroll_grow_size)) 3459 flag_cunroll_grow_size = 1; 3460 } 3461 else if (!OPTION_SET_P (flag_cunroll_grow_size)) 3462 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; 3463 3464 /* If we are inserting ROP-protect instructions, disable shrink wrap. */ 3465 if (rs6000_rop_protect) 3466 flag_shrink_wrap = 0; 3467} 3468 3469#ifdef TARGET_USES_LINUX64_OPT 3470static void 3471rs6000_linux64_override_options () 3472{ 3473 if (!OPTION_SET_P (rs6000_alignment_flags)) 3474 rs6000_alignment_flags = MASK_ALIGN_NATURAL; 3475 if (rs6000_isa_flags & OPTION_MASK_64BIT) 3476 { 3477 if (DEFAULT_ABI != ABI_AIX) 3478 { 3479 rs6000_current_abi = ABI_AIX; 3480 error (INVALID_64BIT, "call"); 3481 } 3482 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); 3483 if (ELFv2_ABI_CHECK) 3484 { 3485 rs6000_current_abi = ABI_ELFv2; 3486 if (dot_symbols) 3487 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); 3488 } 3489 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) 3490 { 3491 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; 3492 error (INVALID_64BIT, "relocatable"); 3493 } 3494 if (rs6000_isa_flags & OPTION_MASK_EABI) 3495 { 3496 rs6000_isa_flags &= ~OPTION_MASK_EABI; 3497 error (INVALID_64BIT, "eabi"); 3498 } 3499 if (TARGET_PROTOTYPE) 3500 { 3501 target_prototype = 0; 3502 error (INVALID_64BIT, "prototype"); 3503 } 3504 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) 3505 { 3506 rs6000_isa_flags |= OPTION_MASK_POWERPC64; 3507 error ("%<-m64%> requires a PowerPC64 cpu"); 3508 } 3509 if (!OPTION_SET_P (rs6000_current_cmodel)) 3510 SET_CMODEL (CMODEL_MEDIUM); 3511 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0) 3512 { 3513 if (OPTION_SET_P (rs6000_current_cmodel) 3514 && rs6000_current_cmodel != CMODEL_SMALL) 3515 error ("%<-mcmodel%> incompatible with other toc options"); 3516 if (TARGET_MINIMAL_TOC) 3517 SET_CMODEL (CMODEL_SMALL); 3518 else if (TARGET_PCREL 3519 || (PCREL_SUPPORTED_BY_OS 3520 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)) 3521 /* Ignore -mno-minimal-toc. */ 3522 ; 3523 else 3524 SET_CMODEL (CMODEL_SMALL); 3525 } 3526 if (rs6000_current_cmodel != CMODEL_SMALL) 3527 { 3528 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC)) 3529 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM; 3530 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC)) 3531 TARGET_NO_SUM_IN_TOC = 0; 3532 } 3533 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2) 3534 { 3535 if (OPTION_SET_P (rs6000_pltseq)) 3536 warning (0, "%qs unsupported for this ABI", 3537 "-mpltseq"); 3538 rs6000_pltseq = false; 3539 } 3540 } 3541 else if (TARGET_64BIT) 3542 error (INVALID_32BIT, "32"); 3543 else 3544 { 3545 if (TARGET_PROFILE_KERNEL) 3546 { 3547 profile_kernel = 0; 3548 error (INVALID_32BIT, "profile-kernel"); 3549 } 3550 if (OPTION_SET_P (rs6000_current_cmodel)) 3551 { 3552 SET_CMODEL (CMODEL_SMALL); 3553 error (INVALID_32BIT, "cmodel"); 3554 } 3555 } 3556} 3557#endif 3558 3559/* Return true if we are using GLIBC, and it supports IEEE 128-bit long double. 3560 This support is only in little endian GLIBC 2.32 or newer. */ 3561static bool 3562glibc_supports_ieee_128bit (void) 3563{ 3564#ifdef OPTION_GLIBC 3565 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN 3566 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032) 3567 return true; 3568#endif /* OPTION_GLIBC. */ 3569 3570 return false; 3571} 3572 3573/* Override command line options. 3574 3575 Combine build-specific configuration information with options 3576 specified on the command line to set various state variables which 3577 influence code generation, optimization, and expansion of built-in 3578 functions. Assure that command-line configuration preferences are 3579 compatible with each other and with the build configuration; issue 3580 warnings while adjusting configuration or error messages while 3581 rejecting configuration. 3582 3583 Upon entry to this function: 3584 3585 This function is called once at the beginning of 3586 compilation, and then again at the start and end of compiling 3587 each section of code that has a different configuration, as 3588 indicated, for example, by adding the 3589 3590 __attribute__((__target__("cpu=power9"))) 3591 3592 qualifier to a function definition or, for example, by bracketing 3593 code between 3594 3595 #pragma GCC target("altivec") 3596 3597 and 3598 3599 #pragma GCC reset_options 3600 3601 directives. Parameter global_init_p is true for the initial 3602 invocation, which initializes global variables, and false for all 3603 subsequent invocations. 3604 3605 3606 Various global state information is assumed to be valid. This 3607 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the 3608 default CPU specified at build configure time, TARGET_DEFAULT, 3609 representing the default set of option flags for the default 3610 target, and OPTION_SET_P (rs6000_isa_flags), representing 3611 which options were requested on the command line. 3612 3613 Upon return from this function: 3614 3615 rs6000_isa_flags_explicit has a non-zero bit for each flag that 3616 was set by name on the command line. Additionally, if certain 3617 attributes are automatically enabled or disabled by this function 3618 in order to assure compatibility between options and 3619 configuration, the flags associated with those attributes are 3620 also set. By setting these "explicit bits", we avoid the risk 3621 that other code might accidentally overwrite these particular 3622 attributes with "default values". 3623 3624 The various bits of rs6000_isa_flags are set to indicate the 3625 target options that have been selected for the most current 3626 compilation efforts. This has the effect of also turning on the 3627 associated TARGET_XXX values since these are macros which are 3628 generally defined to test the corresponding bit of the 3629 rs6000_isa_flags variable. 3630 3631 The variable rs6000_builtin_mask is set to represent the target 3632 options for the most current compilation efforts, consistent with 3633 the current contents of rs6000_isa_flags. This variable controls 3634 expansion of built-in functions. 3635 3636 Various other global variables and fields of global structures 3637 (over 50 in all) are initialized to reflect the desired options 3638 for the most current compilation efforts. */ 3639 3640static bool 3641rs6000_option_override_internal (bool global_init_p) 3642{ 3643 bool ret = true; 3644 3645 HOST_WIDE_INT set_masks; 3646 HOST_WIDE_INT ignore_masks; 3647 int cpu_index = -1; 3648 int tune_index; 3649 struct cl_target_option *main_target_opt 3650 = ((global_init_p || target_option_default_node == NULL) 3651 ? NULL : TREE_TARGET_OPTION (target_option_default_node)); 3652 3653 /* Print defaults. */ 3654 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p) 3655 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); 3656 3657 /* Remember the explicit arguments. */ 3658 if (global_init_p) 3659 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags); 3660 3661 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C 3662 library functions, so warn about it. The flag may be useful for 3663 performance studies from time to time though, so don't disable it 3664 entirely. */ 3665 if (OPTION_SET_P (rs6000_alignment_flags) 3666 && rs6000_alignment_flags == MASK_ALIGN_POWER 3667 && DEFAULT_ABI == ABI_DARWIN 3668 && TARGET_64BIT) 3669 warning (0, "%qs is not supported for 64-bit Darwin;" 3670 " it is incompatible with the installed C and C++ libraries", 3671 "-malign-power"); 3672 3673 /* Numerous experiment shows that IRA based loop pressure 3674 calculation works better for RTL loop invariant motion on targets 3675 with enough (>= 32) registers. It is an expensive optimization. 3676 So it is on only for peak performance. */ 3677 if (optimize >= 3 && global_init_p 3678 && !OPTION_SET_P (flag_ira_loop_pressure)) 3679 flag_ira_loop_pressure = 1; 3680 3681 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order 3682 for tracebacks to be complete but not if any -fasynchronous-unwind-tables 3683 options were already specified. */ 3684 if (flag_sanitize & SANITIZE_USER_ADDRESS 3685 && !OPTION_SET_P (flag_asynchronous_unwind_tables)) 3686 flag_asynchronous_unwind_tables = 1; 3687 3688 /* -fvariable-expansion-in-unroller is a win for POWER whenever the 3689 loop unroller is active. It is only checked during unrolling, so 3690 we can just set it on by default. */ 3691 if (!OPTION_SET_P (flag_variable_expansion_in_unroller)) 3692 flag_variable_expansion_in_unroller = 1; 3693 3694 /* Set the pointer size. */ 3695 if (TARGET_64BIT) 3696 { 3697 rs6000_pmode = DImode; 3698 rs6000_pointer_size = 64; 3699 } 3700 else 3701 { 3702 rs6000_pmode = SImode; 3703 rs6000_pointer_size = 32; 3704 } 3705 3706 /* Some OSs don't support saving the high part of 64-bit registers on context 3707 switch. Other OSs don't support saving Altivec registers. On those OSs, 3708 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings; 3709 if the user wants either, the user must explicitly specify them and we 3710 won't interfere with the user's specification. */ 3711 3712 set_masks = POWERPC_MASKS; 3713#ifdef OS_MISSING_POWERPC64 3714 if (OS_MISSING_POWERPC64) 3715 set_masks &= ~OPTION_MASK_POWERPC64; 3716#endif 3717#ifdef OS_MISSING_ALTIVEC 3718 if (OS_MISSING_ALTIVEC) 3719 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX 3720 | OTHER_VSX_VECTOR_MASKS); 3721#endif 3722 3723 /* Don't override by the processor default if given explicitly. */ 3724 set_masks &= ~rs6000_isa_flags_explicit; 3725 3726 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed 3727 the cpu in a target attribute or pragma, but did not specify a tuning 3728 option, use the cpu for the tuning option rather than the option specified 3729 with -mtune on the command line. Process a '--with-cpu' configuration 3730 request as an implicit --cpu. */ 3731 if (rs6000_cpu_index >= 0) 3732 cpu_index = rs6000_cpu_index; 3733 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) 3734 cpu_index = main_target_opt->x_rs6000_cpu_index; 3735 else if (OPTION_TARGET_CPU_DEFAULT) 3736 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT); 3737 3738 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the 3739 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits 3740 with those from the cpu, except for options that were explicitly set. If 3741 we don't have a cpu, do not override the target bits set in 3742 TARGET_DEFAULT. */ 3743 if (cpu_index >= 0) 3744 { 3745 rs6000_cpu_index = cpu_index; 3746 rs6000_isa_flags &= ~set_masks; 3747 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable 3748 & set_masks); 3749 } 3750 else 3751 { 3752 /* If no -mcpu=<xxx>, inherit any default options that were cleared via 3753 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize 3754 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched 3755 to using rs6000_isa_flags, we need to do the initialization here. 3756 3757 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using 3758 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ 3759 HOST_WIDE_INT flags; 3760 if (TARGET_DEFAULT) 3761 flags = TARGET_DEFAULT; 3762 else 3763 { 3764 /* PowerPC 64-bit LE requires at least ISA 2.07. */ 3765 const char *default_cpu = (!TARGET_POWERPC64 3766 ? "powerpc" 3767 : (BYTES_BIG_ENDIAN 3768 ? "powerpc64" 3769 : "powerpc64le")); 3770 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu); 3771 flags = processor_target_table[default_cpu_index].target_enable; 3772 } 3773 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); 3774 } 3775 3776 if (rs6000_tune_index >= 0) 3777 tune_index = rs6000_tune_index; 3778 else if (cpu_index >= 0) 3779 rs6000_tune_index = tune_index = cpu_index; 3780 else 3781 { 3782 size_t i; 3783 enum processor_type tune_proc 3784 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT); 3785 3786 tune_index = -1; 3787 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) 3788 if (processor_target_table[i].processor == tune_proc) 3789 { 3790 tune_index = i; 3791 break; 3792 } 3793 } 3794 3795 if (cpu_index >= 0) 3796 rs6000_cpu = processor_target_table[cpu_index].processor; 3797 else 3798 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT; 3799 3800 gcc_assert (tune_index >= 0); 3801 rs6000_tune = processor_target_table[tune_index].processor; 3802 3803 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 3804 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 3805 || rs6000_cpu == PROCESSOR_PPCE5500) 3806 { 3807 if (TARGET_ALTIVEC) 3808 error ("AltiVec not supported in this target"); 3809 } 3810 3811 /* If we are optimizing big endian systems for space, use the load/store 3812 multiple instructions. */ 3813 if (BYTES_BIG_ENDIAN && optimize_size) 3814 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE; 3815 3816 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750, 3817 because the hardware doesn't support the instructions used in little 3818 endian mode, and causes an alignment trap. The 750 does not cause an 3819 alignment trap (except when the target is unaligned). */ 3820 3821 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE) 3822 { 3823 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE; 3824 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0) 3825 warning (0, "%qs is not supported on little endian systems", 3826 "-mmultiple"); 3827 } 3828 3829 /* If little-endian, default to -mstrict-align on older processors. 3830 Testing for direct_move matches power8 and later. */ 3831 if (!BYTES_BIG_ENDIAN 3832 && !(processor_target_table[tune_index].target_enable 3833 & OPTION_MASK_DIRECT_MOVE)) 3834 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; 3835 3836 /* Add some warnings for VSX. */ 3837 if (TARGET_VSX) 3838 { 3839 const char *msg = NULL; 3840 if (!TARGET_HARD_FLOAT) 3841 { 3842 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3843 msg = N_("%<-mvsx%> requires hardware floating point"); 3844 else 3845 { 3846 rs6000_isa_flags &= ~ OPTION_MASK_VSX; 3847 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3848 } 3849 } 3850 else if (TARGET_AVOID_XFORM > 0) 3851 msg = N_("%<-mvsx%> needs indexed addressing"); 3852 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit 3853 & OPTION_MASK_ALTIVEC)) 3854 { 3855 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3856 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible"); 3857 else 3858 msg = N_("%<-mno-altivec%> disables vsx"); 3859 } 3860 3861 if (msg) 3862 { 3863 warning (0, msg); 3864 rs6000_isa_flags &= ~ OPTION_MASK_VSX; 3865 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3866 } 3867 } 3868 3869 /* If hard-float/altivec/vsx were explicitly turned off then don't allow 3870 the -mcpu setting to enable options that conflict. */ 3871 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) 3872 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT 3873 | OPTION_MASK_ALTIVEC 3874 | OPTION_MASK_VSX)) != 0) 3875 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO 3876 | OPTION_MASK_DIRECT_MOVE) 3877 & ~rs6000_isa_flags_explicit); 3878 3879 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 3880 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); 3881 3882#ifdef XCOFF_DEBUGGING_INFO 3883 /* For AIX default to 64-bit DWARF. */ 3884 if (!OPTION_SET_P (dwarf_offset_size)) 3885 dwarf_offset_size = POINTER_SIZE_UNITS; 3886#endif 3887 3888 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn 3889 off all of the options that depend on those flags. */ 3890 ignore_masks = rs6000_disable_incompatible_switches (); 3891 3892 /* For the newer switches (vsx, dfp, etc.) set some of the older options, 3893 unless the user explicitly used the -mno-<option> to disable the code. */ 3894 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC) 3895 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); 3896 else if (TARGET_P9_MINMAX) 3897 { 3898 if (cpu_index >= 0) 3899 { 3900 if (cpu_index == PROCESSOR_POWER9) 3901 { 3902 /* legacy behavior: allow -mcpu=power9 with certain 3903 capabilities explicitly disabled. */ 3904 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); 3905 } 3906 else 3907 error ("power9 target option is incompatible with %<%s=<xxx>%> " 3908 "for <xxx> less than power9", "-mcpu"); 3909 } 3910 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) 3911 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags 3912 & rs6000_isa_flags_explicit)) 3913 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags 3914 were explicitly cleared. */ 3915 error ("%qs incompatible with explicitly disabled options", 3916 "-mpower9-minmax"); 3917 else 3918 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER; 3919 } 3920 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) 3921 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks); 3922 else if (TARGET_VSX) 3923 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks); 3924 else if (TARGET_POPCNTD) 3925 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks); 3926 else if (TARGET_DFP) 3927 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks); 3928 else if (TARGET_CMPB) 3929 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks); 3930 else if (TARGET_FPRND) 3931 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks); 3932 else if (TARGET_POPCNTB) 3933 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks); 3934 else if (TARGET_ALTIVEC) 3935 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); 3936 3937 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a 3938 target attribute or pragma which automatically enables both options, 3939 unless the altivec ABI was set. This is set by default for 64-bit, but 3940 not for 32-bit. Don't move this before the above code using ignore_masks, 3941 since it can reset the cleared VSX/ALTIVEC flag again. */ 3942 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi) 3943 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC) 3944 & ~rs6000_isa_flags_explicit); 3945 3946 if (TARGET_CRYPTO && !TARGET_ALTIVEC) 3947 { 3948 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) 3949 error ("%qs requires %qs", "-mcrypto", "-maltivec"); 3950 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; 3951 } 3952 3953 if (!TARGET_FPRND && TARGET_VSX) 3954 { 3955 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND) 3956 /* TARGET_VSX = 1 implies Power 7 and newer */ 3957 error ("%qs requires %qs", "-mvsx", "-mfprnd"); 3958 rs6000_isa_flags &= ~OPTION_MASK_FPRND; 3959 } 3960 3961 if (TARGET_DIRECT_MOVE && !TARGET_VSX) 3962 { 3963 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) 3964 error ("%qs requires %qs", "-mdirect-move", "-mvsx"); 3965 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; 3966 } 3967 3968 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) 3969 { 3970 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 3971 error ("%qs requires %qs", "-mpower8-vector", "-maltivec"); 3972 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; 3973 } 3974 3975 if (TARGET_P8_VECTOR && !TARGET_VSX) 3976 { 3977 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 3978 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX)) 3979 error ("%qs requires %qs", "-mpower8-vector", "-mvsx"); 3980 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0) 3981 { 3982 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; 3983 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3984 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; 3985 } 3986 else 3987 { 3988 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is 3989 not explicit. */ 3990 rs6000_isa_flags |= OPTION_MASK_VSX; 3991 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3992 } 3993 } 3994 3995 if (TARGET_DFP && !TARGET_HARD_FLOAT) 3996 { 3997 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP) 3998 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float"); 3999 rs6000_isa_flags &= ~OPTION_MASK_DFP; 4000 } 4001 4002 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, 4003 silently turn off quad memory mode. */ 4004 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) 4005 { 4006 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) 4007 warning (0, N_("%<-mquad-memory%> requires 64-bit mode")); 4008 4009 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) 4010 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode")); 4011 4012 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY 4013 | OPTION_MASK_QUAD_MEMORY_ATOMIC); 4014 } 4015 4016 /* Non-atomic quad memory load/store are disabled for little endian, since 4017 the words are reversed, but atomic operations can still be done by 4018 swapping the words. */ 4019 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN) 4020 { 4021 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) 4022 warning (0, N_("%<-mquad-memory%> is not available in little endian " 4023 "mode")); 4024 4025 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; 4026 } 4027 4028 /* Assume if the user asked for normal quad memory instructions, they want 4029 the atomic versions as well, unless they explicity told us not to use quad 4030 word atomic instructions. */ 4031 if (TARGET_QUAD_MEMORY 4032 && !TARGET_QUAD_MEMORY_ATOMIC 4033 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) 4034 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; 4035 4036 /* If we can shrink-wrap the TOC register save separately, then use 4037 -msave-toc-indirect unless explicitly disabled. */ 4038 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0 4039 && flag_shrink_wrap_separate 4040 && optimize_function_for_speed_p (cfun)) 4041 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT; 4042 4043 /* Enable power8 fusion if we are tuning for power8, even if we aren't 4044 generating power8 instructions. Power9 does not optimize power8 fusion 4045 cases. */ 4046 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) 4047 { 4048 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8) 4049 rs6000_isa_flags |= OPTION_MASK_P8_FUSION; 4050 else 4051 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; 4052 } 4053 4054 /* Setting additional fusion flags turns on base fusion. */ 4055 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN) 4056 { 4057 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) 4058 { 4059 if (TARGET_P8_FUSION_SIGN) 4060 error ("%qs requires %qs", "-mpower8-fusion-sign", 4061 "-mpower8-fusion"); 4062 4063 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; 4064 } 4065 else 4066 rs6000_isa_flags |= OPTION_MASK_P8_FUSION; 4067 } 4068 4069 /* Power8 does not fuse sign extended loads with the addis. If we are 4070 optimizing at high levels for speed, convert a sign extended load into a 4071 zero extending load, and an explicit sign extension. */ 4072 if (TARGET_P8_FUSION 4073 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) 4074 && optimize_function_for_speed_p (cfun) 4075 && optimize >= 3) 4076 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; 4077 4078 /* ISA 3.0 vector instructions include ISA 2.07. */ 4079 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR) 4080 { 4081 /* We prefer to not mention undocumented options in 4082 error messages. However, if users have managed to select 4083 power9-vector without selecting power8-vector, they 4084 already know about undocumented flags. */ 4085 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) && 4086 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)) 4087 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector"); 4088 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0) 4089 { 4090 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; 4091 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 4092 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; 4093 } 4094 else 4095 { 4096 /* OPTION_MASK_P9_VECTOR is explicit and 4097 OPTION_MASK_P8_VECTOR is not explicit. */ 4098 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR; 4099 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; 4100 } 4101 } 4102 4103 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 4104 support. If we only have ISA 2.06 support, and the user did not specify 4105 the switch, leave it set to -1 so the movmisalign patterns are enabled, 4106 but we don't enable the full vectorization support */ 4107 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) 4108 TARGET_ALLOW_MOVMISALIGN = 1; 4109 4110 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) 4111 { 4112 if (TARGET_ALLOW_MOVMISALIGN > 0 4113 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN)) 4114 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx"); 4115 4116 TARGET_ALLOW_MOVMISALIGN = 0; 4117 } 4118 4119 /* Determine when unaligned vector accesses are permitted, and when 4120 they are preferred over masked Altivec loads. Note that if 4121 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then 4122 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is 4123 not true. */ 4124 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4125 { 4126 if (!TARGET_VSX) 4127 { 4128 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) 4129 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx"); 4130 4131 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; 4132 } 4133 4134 else if (!TARGET_ALLOW_MOVMISALIGN) 4135 { 4136 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) 4137 error ("%qs requires %qs", "-munefficient-unaligned-vsx", 4138 "-mallow-movmisalign"); 4139 4140 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; 4141 } 4142 } 4143 4144 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX)) 4145 { 4146 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4147 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX; 4148 else 4149 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX; 4150 } 4151 4152 /* Use long double size to select the appropriate long double. We use 4153 TYPE_PRECISION to differentiate the 3 different long double types. We map 4154 128 into the precision used for TFmode. */ 4155 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64 4156 ? 64 4157 : FLOAT_PRECISION_TFmode); 4158 4159 /* Set long double size before the IEEE 128-bit tests. */ 4160 if (!OPTION_SET_P (rs6000_long_double_type_size)) 4161 { 4162 if (main_target_opt != NULL 4163 && (main_target_opt->x_rs6000_long_double_type_size 4164 != default_long_double_size)) 4165 error ("target attribute or pragma changes %<long double%> size"); 4166 else 4167 rs6000_long_double_type_size = default_long_double_size; 4168 } 4169 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode) 4170 ; /* The option value can be seen when cl_target_option_restore is called. */ 4171 else if (rs6000_long_double_type_size == 128) 4172 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode; 4173 4174 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server 4175 systems will also set long double to be IEEE 128-bit. AIX and Darwin 4176 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so 4177 those systems will not pick up this default. Warn if the user changes the 4178 default unless -Wno-psabi. */ 4179 if (!OPTION_SET_P (rs6000_ieeequad)) 4180 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT; 4181 4182 else if (TARGET_LONG_DOUBLE_128) 4183 { 4184 if (global_options.x_rs6000_ieeequad 4185 && (!TARGET_POPCNTD || !TARGET_VSX)) 4186 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble"); 4187 4188 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT) 4189 { 4190 /* Determine if the user can change the default long double type at 4191 compilation time. You need GLIBC 2.32 or newer to be able to 4192 change the long double type. Only issue one warning. */ 4193 static bool warned_change_long_double; 4194 4195 if (!warned_change_long_double && !glibc_supports_ieee_128bit ()) 4196 { 4197 warned_change_long_double = true; 4198 if (TARGET_IEEEQUAD) 4199 warning (OPT_Wpsabi, "Using IEEE extended precision " 4200 "%<long double%>"); 4201 else 4202 warning (OPT_Wpsabi, "Using IBM extended precision " 4203 "%<long double%>"); 4204 } 4205 } 4206 } 4207 4208 /* Enable the default support for IEEE 128-bit floating point on Linux VSX 4209 sytems. In GCC 7, we would enable the IEEE 128-bit floating point 4210 infrastructure (-mfloat128-type) but not enable the actual __float128 type 4211 unless the user used the explicit -mfloat128. In GCC 8, we enable both 4212 the keyword as well as the type. */ 4213 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX; 4214 4215 /* IEEE 128-bit floating point requires VSX support. */ 4216 if (TARGET_FLOAT128_KEYWORD) 4217 { 4218 if (!TARGET_VSX) 4219 { 4220 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) 4221 error ("%qs requires VSX support", "-mfloat128"); 4222 4223 TARGET_FLOAT128_TYPE = 0; 4224 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD 4225 | OPTION_MASK_FLOAT128_HW); 4226 } 4227 else if (!TARGET_FLOAT128_TYPE) 4228 { 4229 TARGET_FLOAT128_TYPE = 1; 4230 warning (0, "The %<-mfloat128%> option may not be fully supported"); 4231 } 4232 } 4233 4234 /* Enable the __float128 keyword under Linux by default. */ 4235 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD 4236 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0) 4237 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD; 4238 4239 /* If we have are supporting the float128 type and full ISA 3.0 support, 4240 enable -mfloat128-hardware by default. However, don't enable the 4241 __float128 keyword if it was explicitly turned off. 64-bit mode is needed 4242 because sometimes the compiler wants to put things in an integer 4243 container, and if we don't have __int128 support, it is impossible. */ 4244 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT 4245 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE 4246 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW)) 4247 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW; 4248 4249 if (TARGET_FLOAT128_HW 4250 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE) 4251 { 4252 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) 4253 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>"); 4254 4255 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; 4256 } 4257 4258 if (TARGET_FLOAT128_HW && !TARGET_64BIT) 4259 { 4260 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) 4261 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64"); 4262 4263 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; 4264 } 4265 4266 /* Enable -mprefixed by default on power10 systems. */ 4267 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0) 4268 rs6000_isa_flags |= OPTION_MASK_PREFIXED; 4269 4270 /* -mprefixed requires -mcpu=power10 (or later). */ 4271 else if (TARGET_PREFIXED && !TARGET_POWER10) 4272 { 4273 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0) 4274 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10"); 4275 4276 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED; 4277 } 4278 4279 /* -mpcrel requires prefixed load/store addressing. */ 4280 if (TARGET_PCREL && !TARGET_PREFIXED) 4281 { 4282 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0) 4283 error ("%qs requires %qs", "-mpcrel", "-mprefixed"); 4284 4285 rs6000_isa_flags &= ~OPTION_MASK_PCREL; 4286 } 4287 4288 /* Print the options after updating the defaults. */ 4289 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4290 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); 4291 4292 /* E500mc does "better" if we inline more aggressively. Respect the 4293 user's opinion, though. */ 4294 if (rs6000_block_move_inline_limit == 0 4295 && (rs6000_tune == PROCESSOR_PPCE500MC 4296 || rs6000_tune == PROCESSOR_PPCE500MC64 4297 || rs6000_tune == PROCESSOR_PPCE5500 4298 || rs6000_tune == PROCESSOR_PPCE6500)) 4299 rs6000_block_move_inline_limit = 128; 4300 4301 /* store_one_arg depends on expand_block_move to handle at least the 4302 size of reg_parm_stack_space. */ 4303 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) 4304 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); 4305 4306 if (global_init_p) 4307 { 4308 /* If the appropriate debug option is enabled, replace the target hooks 4309 with debug versions that call the real version and then prints 4310 debugging information. */ 4311 if (TARGET_DEBUG_COST) 4312 { 4313 targetm.rtx_costs = rs6000_debug_rtx_costs; 4314 targetm.address_cost = rs6000_debug_address_cost; 4315 targetm.sched.adjust_cost = rs6000_debug_adjust_cost; 4316 } 4317 4318 if (TARGET_DEBUG_ADDR) 4319 { 4320 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p; 4321 targetm.legitimize_address = rs6000_debug_legitimize_address; 4322 rs6000_secondary_reload_class_ptr 4323 = rs6000_debug_secondary_reload_class; 4324 targetm.secondary_memory_needed 4325 = rs6000_debug_secondary_memory_needed; 4326 targetm.can_change_mode_class 4327 = rs6000_debug_can_change_mode_class; 4328 rs6000_preferred_reload_class_ptr 4329 = rs6000_debug_preferred_reload_class; 4330 rs6000_mode_dependent_address_ptr 4331 = rs6000_debug_mode_dependent_address; 4332 } 4333 4334 if (rs6000_veclibabi_name) 4335 { 4336 if (strcmp (rs6000_veclibabi_name, "mass") == 0) 4337 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass; 4338 else 4339 { 4340 error ("unknown vectorization library ABI type in " 4341 "%<-mveclibabi=%s%>", rs6000_veclibabi_name); 4342 ret = false; 4343 } 4344 } 4345 } 4346 4347 /* Enable Altivec ABI for AIX -maltivec. */ 4348 if (TARGET_XCOFF 4349 && (TARGET_ALTIVEC || TARGET_VSX) 4350 && !OPTION_SET_P (rs6000_altivec_abi)) 4351 { 4352 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) 4353 error ("target attribute or pragma changes AltiVec ABI"); 4354 else 4355 rs6000_altivec_abi = 1; 4356 } 4357 4358 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For 4359 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can 4360 be explicitly overridden in either case. */ 4361 if (TARGET_ELF) 4362 { 4363 if (!OPTION_SET_P (rs6000_altivec_abi) 4364 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX)) 4365 { 4366 if (main_target_opt != NULL && 4367 !main_target_opt->x_rs6000_altivec_abi) 4368 error ("target attribute or pragma changes AltiVec ABI"); 4369 else 4370 rs6000_altivec_abi = 1; 4371 } 4372 } 4373 4374 /* Set the Darwin64 ABI as default for 64-bit Darwin. 4375 So far, the only darwin64 targets are also MACH-O. */ 4376 if (TARGET_MACHO 4377 && DEFAULT_ABI == ABI_DARWIN 4378 && TARGET_64BIT) 4379 { 4380 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi) 4381 error ("target attribute or pragma changes darwin64 ABI"); 4382 else 4383 { 4384 rs6000_darwin64_abi = 1; 4385 /* Default to natural alignment, for better performance. */ 4386 rs6000_alignment_flags = MASK_ALIGN_NATURAL; 4387 } 4388 } 4389 4390 /* Place FP constants in the constant pool instead of TOC 4391 if section anchors enabled. */ 4392 if (flag_section_anchors 4393 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC)) 4394 TARGET_NO_FP_IN_TOC = 1; 4395 4396 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4397 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags); 4398 4399#ifdef SUBTARGET_OVERRIDE_OPTIONS 4400 SUBTARGET_OVERRIDE_OPTIONS; 4401#endif 4402#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 4403 SUBSUBTARGET_OVERRIDE_OPTIONS; 4404#endif 4405#ifdef SUB3TARGET_OVERRIDE_OPTIONS 4406 SUB3TARGET_OVERRIDE_OPTIONS; 4407#endif 4408 4409 /* If the ABI has support for PC-relative relocations, enable it by default. 4410 This test depends on the sub-target tests above setting the code model to 4411 medium for ELF v2 systems. */ 4412 if (PCREL_SUPPORTED_BY_OS 4413 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0) 4414 rs6000_isa_flags |= OPTION_MASK_PCREL; 4415 4416 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until 4417 after the subtarget override options are done. */ 4418 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM) 4419 { 4420 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0) 4421 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium"); 4422 4423 rs6000_isa_flags &= ~OPTION_MASK_PCREL; 4424 } 4425 4426 /* Enable -mmma by default on power10 systems. */ 4427 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) 4428 rs6000_isa_flags |= OPTION_MASK_MMA; 4429 4430 if (TARGET_POWER10 4431 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0) 4432 rs6000_isa_flags |= OPTION_MASK_P10_FUSION; 4433 4434 /* Turn off vector pair/mma options on non-power10 systems. */ 4435 else if (!TARGET_POWER10 && TARGET_MMA) 4436 { 4437 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) 4438 error ("%qs requires %qs", "-mmma", "-mcpu=power10"); 4439 4440 rs6000_isa_flags &= ~OPTION_MASK_MMA; 4441 } 4442 4443 /* MMA requires SIMD support as ISA 3.1 claims and our implementation 4444 such as "*movoo" uses vector pair access which use VSX registers. 4445 So make MMA require VSX support here. */ 4446 if (TARGET_MMA && !TARGET_VSX) 4447 { 4448 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) 4449 error ("%qs requires %qs", "-mmma", "-mvsx"); 4450 rs6000_isa_flags &= ~OPTION_MASK_MMA; 4451 } 4452 4453 if (!TARGET_PCREL && TARGET_PCREL_OPT) 4454 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT; 4455 4456 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4457 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); 4458 4459 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4 4460 && rs6000_tune != PROCESSOR_POWER5 4461 && rs6000_tune != PROCESSOR_POWER6 4462 && rs6000_tune != PROCESSOR_POWER7 4463 && rs6000_tune != PROCESSOR_POWER8 4464 && rs6000_tune != PROCESSOR_POWER9 4465 && rs6000_tune != PROCESSOR_POWER10 4466 && rs6000_tune != PROCESSOR_PPCA2 4467 && rs6000_tune != PROCESSOR_CELL 4468 && rs6000_tune != PROCESSOR_PPC476); 4469 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4 4470 || rs6000_tune == PROCESSOR_POWER5 4471 || rs6000_tune == PROCESSOR_POWER7 4472 || rs6000_tune == PROCESSOR_POWER8); 4473 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4 4474 || rs6000_tune == PROCESSOR_POWER5 4475 || rs6000_tune == PROCESSOR_POWER6 4476 || rs6000_tune == PROCESSOR_POWER7 4477 || rs6000_tune == PROCESSOR_POWER8 4478 || rs6000_tune == PROCESSOR_POWER9 4479 || rs6000_tune == PROCESSOR_POWER10 4480 || rs6000_tune == PROCESSOR_PPCE500MC 4481 || rs6000_tune == PROCESSOR_PPCE500MC64 4482 || rs6000_tune == PROCESSOR_PPCE5500 4483 || rs6000_tune == PROCESSOR_PPCE6500); 4484 4485 /* Allow debug switches to override the above settings. These are set to -1 4486 in rs6000.opt to indicate the user hasn't directly set the switch. */ 4487 if (TARGET_ALWAYS_HINT >= 0) 4488 rs6000_always_hint = TARGET_ALWAYS_HINT; 4489 4490 if (TARGET_SCHED_GROUPS >= 0) 4491 rs6000_sched_groups = TARGET_SCHED_GROUPS; 4492 4493 if (TARGET_ALIGN_BRANCH_TARGETS >= 0) 4494 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; 4495 4496 rs6000_sched_restricted_insns_priority 4497 = (rs6000_sched_groups ? 1 : 0); 4498 4499 /* Handle -msched-costly-dep option. */ 4500 rs6000_sched_costly_dep 4501 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); 4502 4503 if (rs6000_sched_costly_dep_str) 4504 { 4505 if (! strcmp (rs6000_sched_costly_dep_str, "no")) 4506 rs6000_sched_costly_dep = no_dep_costly; 4507 else if (! strcmp (rs6000_sched_costly_dep_str, "all")) 4508 rs6000_sched_costly_dep = all_deps_costly; 4509 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load")) 4510 rs6000_sched_costly_dep = true_store_to_load_dep_costly; 4511 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) 4512 rs6000_sched_costly_dep = store_to_load_dep_costly; 4513 else 4514 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost) 4515 atoi (rs6000_sched_costly_dep_str)); 4516 } 4517 4518 /* Handle -minsert-sched-nops option. */ 4519 rs6000_sched_insert_nops 4520 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none); 4521 4522 if (rs6000_sched_insert_nops_str) 4523 { 4524 if (! strcmp (rs6000_sched_insert_nops_str, "no")) 4525 rs6000_sched_insert_nops = sched_finish_none; 4526 else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) 4527 rs6000_sched_insert_nops = sched_finish_pad_groups; 4528 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) 4529 rs6000_sched_insert_nops = sched_finish_regroup_exact; 4530 else 4531 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion) 4532 atoi (rs6000_sched_insert_nops_str)); 4533 } 4534 4535 /* Handle stack protector */ 4536 if (!OPTION_SET_P (rs6000_stack_protector_guard)) 4537#ifdef TARGET_THREAD_SSP_OFFSET 4538 rs6000_stack_protector_guard = SSP_TLS; 4539#else 4540 rs6000_stack_protector_guard = SSP_GLOBAL; 4541#endif 4542 4543#ifdef TARGET_THREAD_SSP_OFFSET 4544 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; 4545 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; 4546#endif 4547 4548 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str)) 4549 { 4550 char *endp; 4551 const char *str = rs6000_stack_protector_guard_offset_str; 4552 4553 errno = 0; 4554 long offset = strtol (str, &endp, 0); 4555 if (!*str || *endp || errno) 4556 error ("%qs is not a valid number in %qs", str, 4557 "-mstack-protector-guard-offset="); 4558 4559 if (!IN_RANGE (offset, -0x8000, 0x7fff) 4560 || (TARGET_64BIT && (offset & 3))) 4561 error ("%qs is not a valid offset in %qs", str, 4562 "-mstack-protector-guard-offset="); 4563 4564 rs6000_stack_protector_guard_offset = offset; 4565 } 4566 4567 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str)) 4568 { 4569 const char *str = rs6000_stack_protector_guard_reg_str; 4570 int reg = decode_reg_name (str); 4571 4572 if (!IN_RANGE (reg, 1, 31)) 4573 error ("%qs is not a valid base register in %qs", str, 4574 "-mstack-protector-guard-reg="); 4575 4576 rs6000_stack_protector_guard_reg = reg; 4577 } 4578 4579 if (rs6000_stack_protector_guard == SSP_TLS 4580 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) 4581 error ("%qs needs a valid base register", "-mstack-protector-guard=tls"); 4582 4583 if (global_init_p) 4584 { 4585#ifdef TARGET_REGNAMES 4586 /* If the user desires alternate register names, copy in the 4587 alternate names now. */ 4588 if (TARGET_REGNAMES) 4589 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names)); 4590#endif 4591 4592 /* Set aix_struct_return last, after the ABI is determined. 4593 If -maix-struct-return or -msvr4-struct-return was explicitly 4594 used, don't override with the ABI default. */ 4595 if (!OPTION_SET_P (aix_struct_return)) 4596 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET); 4597 4598#if 0 4599 /* IBM XL compiler defaults to unsigned bitfields. */ 4600 if (TARGET_XL_COMPAT) 4601 flag_signed_bitfields = 0; 4602#endif 4603 4604 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) 4605 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format; 4606 4607 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1); 4608 4609 /* We can only guarantee the availability of DI pseudo-ops when 4610 assembling for 64-bit targets. */ 4611 if (!TARGET_64BIT) 4612 { 4613 targetm.asm_out.aligned_op.di = NULL; 4614 targetm.asm_out.unaligned_op.di = NULL; 4615 } 4616 4617 4618 /* Set branch target alignment, if not optimizing for size. */ 4619 if (!optimize_size) 4620 { 4621 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be 4622 aligned 8byte to avoid misprediction by the branch predictor. */ 4623 if (rs6000_tune == PROCESSOR_TITAN 4624 || rs6000_tune == PROCESSOR_CELL) 4625 { 4626 if (flag_align_functions && !str_align_functions) 4627 str_align_functions = "8"; 4628 if (flag_align_jumps && !str_align_jumps) 4629 str_align_jumps = "8"; 4630 if (flag_align_loops && !str_align_loops) 4631 str_align_loops = "8"; 4632 } 4633 if (rs6000_align_branch_targets) 4634 { 4635 if (flag_align_functions && !str_align_functions) 4636 str_align_functions = "16"; 4637 if (flag_align_jumps && !str_align_jumps) 4638 str_align_jumps = "16"; 4639 if (flag_align_loops && !str_align_loops) 4640 { 4641 can_override_loop_align = 1; 4642 str_align_loops = "16"; 4643 } 4644 } 4645 } 4646 4647 /* Arrange to save and restore machine status around nested functions. */ 4648 init_machine_status = rs6000_init_machine_status; 4649 4650 /* We should always be splitting complex arguments, but we can't break 4651 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */ 4652 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) 4653 targetm.calls.split_complex_arg = NULL; 4654 4655 /* The AIX and ELFv1 ABIs define standard function descriptors. */ 4656 if (DEFAULT_ABI == ABI_AIX) 4657 targetm.calls.custom_function_descriptors = 0; 4658 } 4659 4660 /* Initialize rs6000_cost with the appropriate target costs. */ 4661 if (optimize_size) 4662 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; 4663 else 4664 switch (rs6000_tune) 4665 { 4666 case PROCESSOR_RS64A: 4667 rs6000_cost = &rs64a_cost; 4668 break; 4669 4670 case PROCESSOR_MPCCORE: 4671 rs6000_cost = &mpccore_cost; 4672 break; 4673 4674 case PROCESSOR_PPC403: 4675 rs6000_cost = &ppc403_cost; 4676 break; 4677 4678 case PROCESSOR_PPC405: 4679 rs6000_cost = &ppc405_cost; 4680 break; 4681 4682 case PROCESSOR_PPC440: 4683 rs6000_cost = &ppc440_cost; 4684 break; 4685 4686 case PROCESSOR_PPC476: 4687 rs6000_cost = &ppc476_cost; 4688 break; 4689 4690 case PROCESSOR_PPC601: 4691 rs6000_cost = &ppc601_cost; 4692 break; 4693 4694 case PROCESSOR_PPC603: 4695 rs6000_cost = &ppc603_cost; 4696 break; 4697 4698 case PROCESSOR_PPC604: 4699 rs6000_cost = &ppc604_cost; 4700 break; 4701 4702 case PROCESSOR_PPC604e: 4703 rs6000_cost = &ppc604e_cost; 4704 break; 4705 4706 case PROCESSOR_PPC620: 4707 rs6000_cost = &ppc620_cost; 4708 break; 4709 4710 case PROCESSOR_PPC630: 4711 rs6000_cost = &ppc630_cost; 4712 break; 4713 4714 case PROCESSOR_CELL: 4715 rs6000_cost = &ppccell_cost; 4716 break; 4717 4718 case PROCESSOR_PPC750: 4719 case PROCESSOR_PPC7400: 4720 rs6000_cost = &ppc750_cost; 4721 break; 4722 4723 case PROCESSOR_PPC7450: 4724 rs6000_cost = &ppc7450_cost; 4725 break; 4726 4727 case PROCESSOR_PPC8540: 4728 case PROCESSOR_PPC8548: 4729 rs6000_cost = &ppc8540_cost; 4730 break; 4731 4732 case PROCESSOR_PPCE300C2: 4733 case PROCESSOR_PPCE300C3: 4734 rs6000_cost = &ppce300c2c3_cost; 4735 break; 4736 4737 case PROCESSOR_PPCE500MC: 4738 rs6000_cost = &ppce500mc_cost; 4739 break; 4740 4741 case PROCESSOR_PPCE500MC64: 4742 rs6000_cost = &ppce500mc64_cost; 4743 break; 4744 4745 case PROCESSOR_PPCE5500: 4746 rs6000_cost = &ppce5500_cost; 4747 break; 4748 4749 case PROCESSOR_PPCE6500: 4750 rs6000_cost = &ppce6500_cost; 4751 break; 4752 4753 case PROCESSOR_TITAN: 4754 rs6000_cost = &titan_cost; 4755 break; 4756 4757 case PROCESSOR_POWER4: 4758 case PROCESSOR_POWER5: 4759 rs6000_cost = &power4_cost; 4760 break; 4761 4762 case PROCESSOR_POWER6: 4763 rs6000_cost = &power6_cost; 4764 break; 4765 4766 case PROCESSOR_POWER7: 4767 rs6000_cost = &power7_cost; 4768 break; 4769 4770 case PROCESSOR_POWER8: 4771 rs6000_cost = &power8_cost; 4772 break; 4773 4774 case PROCESSOR_POWER9: 4775 rs6000_cost = &power9_cost; 4776 break; 4777 4778 case PROCESSOR_POWER10: 4779 rs6000_cost = &power10_cost; 4780 break; 4781 4782 case PROCESSOR_PPCA2: 4783 rs6000_cost = &ppca2_cost; 4784 break; 4785 4786 default: 4787 gcc_unreachable (); 4788 } 4789 4790 if (global_init_p) 4791 { 4792 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4793 param_simultaneous_prefetches, 4794 rs6000_cost->simultaneous_prefetches); 4795 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4796 param_l1_cache_size, 4797 rs6000_cost->l1_cache_size); 4798 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4799 param_l1_cache_line_size, 4800 rs6000_cost->cache_line_size); 4801 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4802 param_l2_cache_size, 4803 rs6000_cost->l2_cache_size); 4804 4805 /* Increase loop peeling limits based on performance analysis. */ 4806 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4807 param_max_peeled_insns, 400); 4808 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4809 param_max_completely_peeled_insns, 400); 4810 4811 /* The lxvl/stxvl instructions don't perform well before Power10. */ 4812 if (TARGET_POWER10) 4813 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4814 param_vect_partial_vector_usage, 1); 4815 else 4816 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4817 param_vect_partial_vector_usage, 0); 4818 4819 /* Use the 'model' -fsched-pressure algorithm by default. */ 4820 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4821 param_sched_pressure_algorithm, 4822 SCHED_PRESSURE_MODEL); 4823 4824 /* If using typedef char *va_list, signal that 4825 __builtin_va_start (&ap, 0) can be optimized to 4826 ap = __builtin_next_arg (0). */ 4827 if (DEFAULT_ABI != ABI_V4) 4828 targetm.expand_builtin_va_start = NULL; 4829 } 4830 4831 rs6000_override_options_after_change (); 4832 4833 /* If not explicitly specified via option, decide whether to generate indexed 4834 load/store instructions. A value of -1 indicates that the 4835 initial value of this variable has not been overwritten. During 4836 compilation, TARGET_AVOID_XFORM is either 0 or 1. */ 4837 if (TARGET_AVOID_XFORM == -1) 4838 /* Avoid indexed addressing when targeting Power6 in order to avoid the 4839 DERAT mispredict penalty. However the LVE and STVE altivec instructions 4840 need indexed accesses and the type used is the scalar type of the element 4841 being loaded or stored. */ 4842 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB 4843 && !TARGET_ALTIVEC); 4844 4845 /* Set the -mrecip options. */ 4846 if (rs6000_recip_name) 4847 { 4848 char *p = ASTRDUP (rs6000_recip_name); 4849 char *q; 4850 unsigned int mask, i; 4851 bool invert; 4852 4853 while ((q = strtok (p, ",")) != NULL) 4854 { 4855 p = NULL; 4856 if (*q == '!') 4857 { 4858 invert = true; 4859 q++; 4860 } 4861 else 4862 invert = false; 4863 4864 if (!strcmp (q, "default")) 4865 mask = ((TARGET_RECIP_PRECISION) 4866 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION); 4867 else 4868 { 4869 for (i = 0; i < ARRAY_SIZE (recip_options); i++) 4870 if (!strcmp (q, recip_options[i].string)) 4871 { 4872 mask = recip_options[i].mask; 4873 break; 4874 } 4875 4876 if (i == ARRAY_SIZE (recip_options)) 4877 { 4878 error ("unknown option for %<%s=%s%>", "-mrecip", q); 4879 invert = false; 4880 mask = 0; 4881 ret = false; 4882 } 4883 } 4884 4885 if (invert) 4886 rs6000_recip_control &= ~mask; 4887 else 4888 rs6000_recip_control |= mask; 4889 } 4890 } 4891 4892 /* Set the builtin mask of the various options used that could affect which 4893 builtins were used. In the past we used target_flags, but we've run out 4894 of bits, and some options are no longer in target_flags. */ 4895 rs6000_builtin_mask = rs6000_builtin_mask_calculate (); 4896 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) 4897 rs6000_print_builtin_options (stderr, 0, "builtin mask", 4898 rs6000_builtin_mask); 4899 4900 /* Initialize all of the registers. */ 4901 rs6000_init_hard_regno_mode_ok (global_init_p); 4902 4903 /* Save the initial options in case the user does function specific options */ 4904 if (global_init_p) 4905 target_option_default_node = target_option_current_node 4906 = build_target_option_node (&global_options, &global_options_set); 4907 4908 /* If not explicitly specified via option, decide whether to generate the 4909 extra blr's required to preserve the link stack on some cpus (eg, 476). */ 4910 if (TARGET_LINK_STACK == -1) 4911 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic); 4912 4913 /* Deprecate use of -mno-speculate-indirect-jumps. */ 4914 if (!rs6000_speculate_indirect_jumps) 4915 warning (0, "%qs is deprecated and not recommended in any circumstances", 4916 "-mno-speculate-indirect-jumps"); 4917 4918 return ret; 4919} 4920 4921/* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to 4922 define the target cpu type. */ 4923 4924static void 4925rs6000_option_override (void) 4926{ 4927 (void) rs6000_option_override_internal (true); 4928} 4929 4930 4931/* Implement LOOP_ALIGN. */ 4932align_flags 4933rs6000_loop_align (rtx label) 4934{ 4935 basic_block bb; 4936 int ninsns; 4937 4938 /* Don't override loop alignment if -falign-loops was specified. */ 4939 if (!can_override_loop_align) 4940 return align_loops; 4941 4942 bb = BLOCK_FOR_INSN (label); 4943 ninsns = num_loop_insns(bb->loop_father); 4944 4945 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ 4946 if (ninsns > 4 && ninsns <= 8 4947 && (rs6000_tune == PROCESSOR_POWER4 4948 || rs6000_tune == PROCESSOR_POWER5 4949 || rs6000_tune == PROCESSOR_POWER6 4950 || rs6000_tune == PROCESSOR_POWER7 4951 || rs6000_tune == PROCESSOR_POWER8)) 4952 return align_flags (5); 4953 else 4954 return align_loops; 4955} 4956 4957/* Return true iff, data reference of TYPE can reach vector alignment (16) 4958 after applying N number of iterations. This routine does not determine 4959 how may iterations are required to reach desired alignment. */ 4960 4961static bool 4962rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) 4963{ 4964 if (is_packed) 4965 return false; 4966 4967 if (TARGET_32BIT) 4968 { 4969 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) 4970 return true; 4971 4972 if (rs6000_alignment_flags == MASK_ALIGN_POWER) 4973 return true; 4974 4975 return false; 4976 } 4977 else 4978 { 4979 if (TARGET_MACHO) 4980 return false; 4981 4982 /* Assuming that all other types are naturally aligned. CHECKME! */ 4983 return true; 4984 } 4985} 4986 4987/* Return true if the vector misalignment factor is supported by the 4988 target. */ 4989static bool 4990rs6000_builtin_support_vector_misalignment (machine_mode mode, 4991 const_tree type, 4992 int misalignment, 4993 bool is_packed) 4994{ 4995 if (TARGET_VSX) 4996 { 4997 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4998 return true; 4999 5000 /* Return if movmisalign pattern is not supported for this mode. */ 5001 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) 5002 return false; 5003 5004 if (misalignment == -1) 5005 { 5006 /* Misalignment factor is unknown at compile time but we know 5007 it's word aligned. */ 5008 if (rs6000_vector_alignment_reachable (type, is_packed)) 5009 { 5010 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); 5011 5012 if (element_size == 64 || element_size == 32) 5013 return true; 5014 } 5015 5016 return false; 5017 } 5018 5019 /* VSX supports word-aligned vector. */ 5020 if (misalignment % 4 == 0) 5021 return true; 5022 } 5023 return false; 5024} 5025 5026/* Implement targetm.vectorize.builtin_vectorization_cost. */ 5027static int 5028rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 5029 tree vectype, int misalign) 5030{ 5031 unsigned elements; 5032 tree elem_type; 5033 5034 switch (type_of_cost) 5035 { 5036 case scalar_stmt: 5037 case scalar_store: 5038 case vector_stmt: 5039 case vector_store: 5040 case vec_to_scalar: 5041 case scalar_to_vec: 5042 case cond_branch_not_taken: 5043 return 1; 5044 case scalar_load: 5045 case vector_load: 5046 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 5047 return 2; 5048 5049 case vec_perm: 5050 /* Power7 has only one permute unit, make it a bit expensive. */ 5051 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7) 5052 return 3; 5053 else 5054 return 1; 5055 5056 case vec_promote_demote: 5057 /* Power7 has only one permute/pack unit, make it a bit expensive. */ 5058 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7) 5059 return 4; 5060 else 5061 return 1; 5062 5063 case cond_branch_taken: 5064 return 3; 5065 5066 case unaligned_load: 5067 case vector_gather_load: 5068 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 5069 if (TARGET_EFFICIENT_UNALIGNED_VSX) 5070 return 2; 5071 5072 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) 5073 { 5074 elements = TYPE_VECTOR_SUBPARTS (vectype); 5075 /* See PR102767, consider V1TI to keep consistency. */ 5076 if (elements == 2 || elements == 1) 5077 /* Double word aligned. */ 5078 return 4; 5079 5080 if (elements == 4) 5081 { 5082 switch (misalign) 5083 { 5084 case 8: 5085 /* Double word aligned. */ 5086 return 4; 5087 5088 case -1: 5089 /* Unknown misalignment. */ 5090 case 4: 5091 case 12: 5092 /* Word aligned. */ 5093 return 33; 5094 5095 default: 5096 gcc_unreachable (); 5097 } 5098 } 5099 } 5100 5101 if (TARGET_ALTIVEC) 5102 /* Misaligned loads are not supported. */ 5103 gcc_unreachable (); 5104 5105 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 5106 return 4; 5107 5108 case unaligned_store: 5109 case vector_scatter_store: 5110 if (TARGET_EFFICIENT_UNALIGNED_VSX) 5111 return 1; 5112 5113 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) 5114 { 5115 elements = TYPE_VECTOR_SUBPARTS (vectype); 5116 /* See PR102767, consider V1TI to keep consistency. */ 5117 if (elements == 2 || elements == 1) 5118 /* Double word aligned. */ 5119 return 2; 5120 5121 if (elements == 4) 5122 { 5123 switch (misalign) 5124 { 5125 case 8: 5126 /* Double word aligned. */ 5127 return 2; 5128 5129 case -1: 5130 /* Unknown misalignment. */ 5131 case 4: 5132 case 12: 5133 /* Word aligned. */ 5134 return 23; 5135 5136 default: 5137 gcc_unreachable (); 5138 } 5139 } 5140 } 5141 5142 if (TARGET_ALTIVEC) 5143 /* Misaligned stores are not supported. */ 5144 gcc_unreachable (); 5145 5146 return 2; 5147 5148 case vec_construct: 5149 /* This is a rough approximation assuming non-constant elements 5150 constructed into a vector via element insertion. FIXME: 5151 vec_construct is not granular enough for uniformly good 5152 decisions. If the initialization is a splat, this is 5153 cheaper than we estimate. Improve this someday. */ 5154 elem_type = TREE_TYPE (vectype); 5155 /* 32-bit vectors loaded into registers are stored as double 5156 precision, so we need 2 permutes, 2 converts, and 1 merge 5157 to construct a vector of short floats from them. */ 5158 if (SCALAR_FLOAT_TYPE_P (elem_type) 5159 && TYPE_PRECISION (elem_type) == 32) 5160 return 5; 5161 /* On POWER9, integer vector types are built up in GPRs and then 5162 use a direct move (2 cycles). For POWER8 this is even worse, 5163 as we need two direct moves and a merge, and the direct moves 5164 are five cycles. */ 5165 else if (INTEGRAL_TYPE_P (elem_type)) 5166 { 5167 if (TARGET_P9_VECTOR) 5168 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; 5169 else 5170 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5; 5171 } 5172 else 5173 /* V2DFmode doesn't need a direct move. */ 5174 return 2; 5175 5176 default: 5177 gcc_unreachable (); 5178 } 5179} 5180 5181/* Implement targetm.vectorize.preferred_simd_mode. */ 5182 5183static machine_mode 5184rs6000_preferred_simd_mode (scalar_mode mode) 5185{ 5186 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode)); 5187 5188 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ())) 5189 return vmode.require (); 5190 5191 return word_mode; 5192} 5193 5194class rs6000_cost_data : public vector_costs 5195{ 5196public: 5197 using vector_costs::vector_costs; 5198 5199 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, 5200 stmt_vec_info stmt_info, slp_tree, tree vectype, 5201 int misalign, 5202 vect_cost_model_location where) override; 5203 void finish_cost (const vector_costs *) override; 5204 5205protected: 5206 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info, 5207 vect_cost_model_location, unsigned int); 5208 void density_test (loop_vec_info); 5209 void adjust_vect_cost_per_loop (loop_vec_info); 5210 5211 /* Total number of vectorized stmts (loop only). */ 5212 unsigned m_nstmts = 0; 5213 /* Total number of loads (loop only). */ 5214 unsigned m_nloads = 0; 5215 /* Possible extra penalized cost on vector construction (loop only). */ 5216 unsigned m_extra_ctor_cost = 0; 5217 /* For each vectorized loop, this var holds TRUE iff a non-memory vector 5218 instruction is needed by the vectorization. */ 5219 bool m_vect_nonmem = false; 5220}; 5221 5222/* Test for likely overcommitment of vector hardware resources. If a 5223 loop iteration is relatively large, and too large a percentage of 5224 instructions in the loop are vectorized, the cost model may not 5225 adequately reflect delays from unavailable vector resources. 5226 Penalize the loop body cost for this case. */ 5227 5228void 5229rs6000_cost_data::density_test (loop_vec_info loop_vinfo) 5230{ 5231 /* This density test only cares about the cost of vector version of the 5232 loop, so immediately return if we are passed costing for the scalar 5233 version (namely computing single scalar iteration cost). */ 5234 if (m_costing_for_scalar) 5235 return; 5236 5237 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 5238 basic_block *bbs = get_loop_body (loop); 5239 int nbbs = loop->num_nodes; 5240 int vec_cost = m_costs[vect_body], not_vec_cost = 0; 5241 5242 for (int i = 0; i < nbbs; i++) 5243 { 5244 basic_block bb = bbs[i]; 5245 gimple_stmt_iterator gsi; 5246 5247 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5248 { 5249 gimple *stmt = gsi_stmt (gsi); 5250 if (is_gimple_debug (stmt)) 5251 continue; 5252 5253 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); 5254 5255 if (!STMT_VINFO_RELEVANT_P (stmt_info) 5256 && !STMT_VINFO_IN_PATTERN_P (stmt_info)) 5257 not_vec_cost++; 5258 } 5259 } 5260 5261 free (bbs); 5262 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); 5263 5264 if (density_pct > rs6000_density_pct_threshold 5265 && vec_cost + not_vec_cost > rs6000_density_size_threshold) 5266 { 5267 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100; 5268 if (dump_enabled_p ()) 5269 dump_printf_loc (MSG_NOTE, vect_location, 5270 "density %d%%, cost %d exceeds threshold, penalizing " 5271 "loop body cost by %u%%\n", density_pct, 5272 vec_cost + not_vec_cost, rs6000_density_penalty); 5273 } 5274 5275 /* Check whether we need to penalize the body cost to account 5276 for excess strided or elementwise loads. */ 5277 if (m_extra_ctor_cost > 0) 5278 { 5279 gcc_assert (m_nloads <= m_nstmts); 5280 unsigned int load_pct = (m_nloads * 100) / m_nstmts; 5281 5282 /* It's likely to be bounded by latency and execution resources 5283 from many scalar loads which are strided or elementwise loads 5284 into a vector if both conditions below are found: 5285 1. there are many loads, it's easy to result in a long wait 5286 for load units; 5287 2. load has a big proportion of all vectorized statements, 5288 it's not easy to schedule other statements to spread among 5289 the loads. 5290 One typical case is the innermost loop of the hotspot of SPEC2017 5291 503.bwaves_r without loop interchange. */ 5292 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold 5293 && load_pct > (unsigned int) rs6000_density_load_pct_threshold) 5294 { 5295 m_costs[vect_body] += m_extra_ctor_cost; 5296 if (dump_enabled_p ()) 5297 dump_printf_loc (MSG_NOTE, vect_location, 5298 "Found %u loads and " 5299 "load pct. %u%% exceed " 5300 "the threshold, " 5301 "penalizing loop body " 5302 "cost by extra cost %u " 5303 "for ctor.\n", 5304 m_nloads, load_pct, 5305 m_extra_ctor_cost); 5306 } 5307 } 5308} 5309 5310/* Implement targetm.vectorize.create_costs. */ 5311 5312static vector_costs * 5313rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) 5314{ 5315 return new rs6000_cost_data (vinfo, costing_for_scalar); 5316} 5317 5318/* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost. 5319 For some statement, we would like to further fine-grain tweak the cost on 5320 top of rs6000_builtin_vectorization_cost handling which doesn't have any 5321 information on statement operation codes etc. One typical case here is 5322 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating 5323 for scalar cost, but it should be priced more whatever transformed to either 5324 compare + branch or compare + isel instructions. */ 5325 5326static unsigned 5327rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind, 5328 struct _stmt_vec_info *stmt_info) 5329{ 5330 if (kind == scalar_stmt && stmt_info && stmt_info->stmt 5331 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) 5332 { 5333 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt); 5334 if (subcode == COND_EXPR) 5335 return 2; 5336 } 5337 5338 return 0; 5339} 5340 5341/* Helper function for add_stmt_cost. Check each statement cost 5342 entry, gather information and update the target_cost fields 5343 accordingly. */ 5344void 5345rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind, 5346 stmt_vec_info stmt_info, 5347 vect_cost_model_location where, 5348 unsigned int orig_count) 5349{ 5350 5351 /* Check whether we're doing something other than just a copy loop. 5352 Not all such loops may be profitably vectorized; see 5353 rs6000_finish_cost. */ 5354 if (kind == vec_to_scalar 5355 || kind == vec_perm 5356 || kind == vec_promote_demote 5357 || kind == vec_construct 5358 || kind == scalar_to_vec 5359 || (where == vect_body && kind == vector_stmt)) 5360 m_vect_nonmem = true; 5361 5362 /* Gather some information when we are costing the vectorized instruction 5363 for the statements located in a loop body. */ 5364 if (!m_costing_for_scalar 5365 && is_a<loop_vec_info> (m_vinfo) 5366 && where == vect_body) 5367 { 5368 m_nstmts += orig_count; 5369 5370 if (kind == scalar_load || kind == vector_load 5371 || kind == unaligned_load || kind == vector_gather_load) 5372 m_nloads += orig_count; 5373 5374 /* Power processors do not currently have instructions for strided 5375 and elementwise loads, and instead we must generate multiple 5376 scalar loads. This leads to undercounting of the cost. We 5377 account for this by scaling the construction cost by the number 5378 of elements involved, and saving this as extra cost that we may 5379 or may not need to apply. When finalizing the cost of the loop, 5380 the extra penalty is applied when the load density heuristics 5381 are satisfied. */ 5382 if (kind == vec_construct && stmt_info 5383 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type 5384 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE 5385 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP)) 5386 { 5387 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 5388 unsigned int nunits = vect_nunits_for_cost (vectype); 5389 /* As PR103702 shows, it's possible that vectorizer wants to do 5390 costings for only one unit here, it's no need to do any 5391 penalization for it, so simply early return here. */ 5392 if (nunits == 1) 5393 return; 5394 /* i386 port adopts nunits * stmt_cost as the penalized cost 5395 for this kind of penalization, we used to follow it but 5396 found it could result in an unreliable body cost especially 5397 for V16QI/V8HI modes. To make it better, we choose this 5398 new heuristic: for each scalar load, we use 2 as penalized 5399 cost for the case with 2 nunits and use 1 for the other 5400 cases. It's without much supporting theory, mainly 5401 concluded from the broad performance evaluations on Power8, 5402 Power9 and Power10. One possibly related point is that: 5403 vector construction for more units would use more insns, 5404 it has more chances to schedule them better (even run in 5405 parallelly when enough available units at that time), so 5406 it seems reasonable not to penalize that much for them. */ 5407 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1; 5408 unsigned int extra_cost = nunits * adjusted_cost; 5409 m_extra_ctor_cost += extra_cost; 5410 } 5411 } 5412} 5413 5414unsigned 5415rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind, 5416 stmt_vec_info stmt_info, slp_tree, 5417 tree vectype, int misalign, 5418 vect_cost_model_location where) 5419{ 5420 unsigned retval = 0; 5421 5422 if (flag_vect_cost_model) 5423 { 5424 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype, 5425 misalign); 5426 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info); 5427 /* Statements in an inner loop relative to the loop being 5428 vectorized are weighted more heavily. The value here is 5429 arbitrary and could potentially be improved with analysis. */ 5430 unsigned int orig_count = count; 5431 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost); 5432 m_costs[where] += retval; 5433 5434 update_target_cost_per_stmt (kind, stmt_info, where, orig_count); 5435 } 5436 5437 return retval; 5438} 5439 5440/* For some target specific vectorization cost which can't be handled per stmt, 5441 we check the requisite conditions and adjust the vectorization cost 5442 accordingly if satisfied. One typical example is to model shift cost for 5443 vector with length by counting number of required lengths under condition 5444 LOOP_VINFO_FULLY_WITH_LENGTH_P. */ 5445 5446void 5447rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo) 5448{ 5449 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) 5450 { 5451 rgroup_controls *rgc; 5452 unsigned int num_vectors_m1; 5453 unsigned int shift_cnt = 0; 5454 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc) 5455 if (rgc->type) 5456 /* Each length needs one shift to fill into bits 0-7. */ 5457 shift_cnt += num_vectors_m1 + 1; 5458 5459 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL, 5460 NULL_TREE, 0, vect_body); 5461 } 5462} 5463 5464void 5465rs6000_cost_data::finish_cost (const vector_costs *scalar_costs) 5466{ 5467 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo)) 5468 { 5469 adjust_vect_cost_per_loop (loop_vinfo); 5470 density_test (loop_vinfo); 5471 5472 /* Don't vectorize minimum-vectorization-factor, simple copy loops 5473 that require versioning for any reason. The vectorization is at 5474 best a wash inside the loop, and the versioning checks make 5475 profitability highly unlikely and potentially quite harmful. */ 5476 if (!m_vect_nonmem 5477 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2 5478 && LOOP_REQUIRES_VERSIONING (loop_vinfo)) 5479 m_costs[vect_body] += 10000; 5480 } 5481 5482 vector_costs::finish_cost (scalar_costs); 5483} 5484 5485/* Implement targetm.loop_unroll_adjust. */ 5486 5487static unsigned 5488rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) 5489{ 5490 if (unroll_only_small_loops) 5491 { 5492 /* TODO: These are hardcoded values right now. We probably should use 5493 a PARAM here. */ 5494 if (loop->ninsns <= 6) 5495 return MIN (4, nunroll); 5496 if (loop->ninsns <= 10) 5497 return MIN (2, nunroll); 5498 5499 return 0; 5500 } 5501 5502 return nunroll; 5503} 5504 5505/* Returns a function decl for a vectorized version of the builtin function 5506 with builtin function code FN and the result vector type TYPE, or NULL_TREE 5507 if it is not available. 5508 5509 Implement targetm.vectorize.builtin_vectorized_function. */ 5510 5511static tree 5512rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, 5513 tree type_in) 5514{ 5515 machine_mode in_mode, out_mode; 5516 int in_n, out_n; 5517 5518 if (TARGET_DEBUG_BUILTIN) 5519 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n", 5520 combined_fn_name (combined_fn (fn)), 5521 GET_MODE_NAME (TYPE_MODE (type_out)), 5522 GET_MODE_NAME (TYPE_MODE (type_in))); 5523 5524 /* TODO: Should this be gcc_assert? */ 5525 if (TREE_CODE (type_out) != VECTOR_TYPE 5526 || TREE_CODE (type_in) != VECTOR_TYPE) 5527 return NULL_TREE; 5528 5529 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 5530 out_n = TYPE_VECTOR_SUBPARTS (type_out); 5531 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 5532 in_n = TYPE_VECTOR_SUBPARTS (type_in); 5533 5534 switch (fn) 5535 { 5536 CASE_CFN_COPYSIGN: 5537 if (VECTOR_UNIT_VSX_P (V2DFmode) 5538 && out_mode == DFmode && out_n == 2 5539 && in_mode == DFmode && in_n == 2) 5540 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP]; 5541 if (VECTOR_UNIT_VSX_P (V4SFmode) 5542 && out_mode == SFmode && out_n == 4 5543 && in_mode == SFmode && in_n == 4) 5544 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP]; 5545 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5546 && out_mode == SFmode && out_n == 4 5547 && in_mode == SFmode && in_n == 4) 5548 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF]; 5549 break; 5550 CASE_CFN_CEIL: 5551 if (VECTOR_UNIT_VSX_P (V2DFmode) 5552 && out_mode == DFmode && out_n == 2 5553 && in_mode == DFmode && in_n == 2) 5554 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP]; 5555 if (VECTOR_UNIT_VSX_P (V4SFmode) 5556 && out_mode == SFmode && out_n == 4 5557 && in_mode == SFmode && in_n == 4) 5558 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP]; 5559 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5560 && out_mode == SFmode && out_n == 4 5561 && in_mode == SFmode && in_n == 4) 5562 return rs6000_builtin_decls[RS6000_BIF_VRFIP]; 5563 break; 5564 CASE_CFN_FLOOR: 5565 if (VECTOR_UNIT_VSX_P (V2DFmode) 5566 && out_mode == DFmode && out_n == 2 5567 && in_mode == DFmode && in_n == 2) 5568 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM]; 5569 if (VECTOR_UNIT_VSX_P (V4SFmode) 5570 && out_mode == SFmode && out_n == 4 5571 && in_mode == SFmode && in_n == 4) 5572 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM]; 5573 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5574 && out_mode == SFmode && out_n == 4 5575 && in_mode == SFmode && in_n == 4) 5576 return rs6000_builtin_decls[RS6000_BIF_VRFIM]; 5577 break; 5578 CASE_CFN_FMA: 5579 if (VECTOR_UNIT_VSX_P (V2DFmode) 5580 && out_mode == DFmode && out_n == 2 5581 && in_mode == DFmode && in_n == 2) 5582 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP]; 5583 if (VECTOR_UNIT_VSX_P (V4SFmode) 5584 && out_mode == SFmode && out_n == 4 5585 && in_mode == SFmode && in_n == 4) 5586 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP]; 5587 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5588 && out_mode == SFmode && out_n == 4 5589 && in_mode == SFmode && in_n == 4) 5590 return rs6000_builtin_decls[RS6000_BIF_VMADDFP]; 5591 break; 5592 CASE_CFN_TRUNC: 5593 if (VECTOR_UNIT_VSX_P (V2DFmode) 5594 && out_mode == DFmode && out_n == 2 5595 && in_mode == DFmode && in_n == 2) 5596 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ]; 5597 if (VECTOR_UNIT_VSX_P (V4SFmode) 5598 && out_mode == SFmode && out_n == 4 5599 && in_mode == SFmode && in_n == 4) 5600 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ]; 5601 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5602 && out_mode == SFmode && out_n == 4 5603 && in_mode == SFmode && in_n == 4) 5604 return rs6000_builtin_decls[RS6000_BIF_VRFIZ]; 5605 break; 5606 CASE_CFN_NEARBYINT: 5607 if (VECTOR_UNIT_VSX_P (V2DFmode) 5608 && flag_unsafe_math_optimizations 5609 && out_mode == DFmode && out_n == 2 5610 && in_mode == DFmode && in_n == 2) 5611 return rs6000_builtin_decls[RS6000_BIF_XVRDPI]; 5612 if (VECTOR_UNIT_VSX_P (V4SFmode) 5613 && flag_unsafe_math_optimizations 5614 && out_mode == SFmode && out_n == 4 5615 && in_mode == SFmode && in_n == 4) 5616 return rs6000_builtin_decls[RS6000_BIF_XVRSPI]; 5617 break; 5618 CASE_CFN_RINT: 5619 if (VECTOR_UNIT_VSX_P (V2DFmode) 5620 && !flag_trapping_math 5621 && out_mode == DFmode && out_n == 2 5622 && in_mode == DFmode && in_n == 2) 5623 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC]; 5624 if (VECTOR_UNIT_VSX_P (V4SFmode) 5625 && !flag_trapping_math 5626 && out_mode == SFmode && out_n == 4 5627 && in_mode == SFmode && in_n == 4) 5628 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC]; 5629 break; 5630 default: 5631 break; 5632 } 5633 5634 /* Generate calls to libmass if appropriate. */ 5635 if (rs6000_veclib_handler) 5636 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in); 5637 5638 return NULL_TREE; 5639} 5640 5641/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a 5642 library with vectorized intrinsics. */ 5643 5644static tree 5645rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out, 5646 tree type_in) 5647{ 5648 char name[32]; 5649 const char *suffix = NULL; 5650 tree fntype, new_fndecl, bdecl = NULL_TREE; 5651 int n_args = 1; 5652 const char *bname; 5653 machine_mode el_mode, in_mode; 5654 int n, in_n; 5655 5656 /* Libmass is suitable for unsafe math only as it does not correctly support 5657 parts of IEEE with the required precision such as denormals. Only support 5658 it if we have VSX to use the simd d2 or f4 functions. 5659 XXX: Add variable length support. */ 5660 if (!flag_unsafe_math_optimizations || !TARGET_VSX) 5661 return NULL_TREE; 5662 5663 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 5664 n = TYPE_VECTOR_SUBPARTS (type_out); 5665 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 5666 in_n = TYPE_VECTOR_SUBPARTS (type_in); 5667 if (el_mode != in_mode 5668 || n != in_n) 5669 return NULL_TREE; 5670 5671 switch (fn) 5672 { 5673 CASE_CFN_ATAN2: 5674 CASE_CFN_HYPOT: 5675 CASE_CFN_POW: 5676 n_args = 2; 5677 gcc_fallthrough (); 5678 5679 CASE_CFN_ACOS: 5680 CASE_CFN_ACOSH: 5681 CASE_CFN_ASIN: 5682 CASE_CFN_ASINH: 5683 CASE_CFN_ATAN: 5684 CASE_CFN_ATANH: 5685 CASE_CFN_CBRT: 5686 CASE_CFN_COS: 5687 CASE_CFN_COSH: 5688 CASE_CFN_ERF: 5689 CASE_CFN_ERFC: 5690 CASE_CFN_EXP2: 5691 CASE_CFN_EXP: 5692 CASE_CFN_EXPM1: 5693 CASE_CFN_LGAMMA: 5694 CASE_CFN_LOG10: 5695 CASE_CFN_LOG1P: 5696 CASE_CFN_LOG2: 5697 CASE_CFN_LOG: 5698 CASE_CFN_SIN: 5699 CASE_CFN_SINH: 5700 CASE_CFN_SQRT: 5701 CASE_CFN_TAN: 5702 CASE_CFN_TANH: 5703 if (el_mode == DFmode && n == 2) 5704 { 5705 bdecl = mathfn_built_in (double_type_node, fn); 5706 suffix = "d2"; /* pow -> powd2 */ 5707 } 5708 else if (el_mode == SFmode && n == 4) 5709 { 5710 bdecl = mathfn_built_in (float_type_node, fn); 5711 suffix = "4"; /* powf -> powf4 */ 5712 } 5713 else 5714 return NULL_TREE; 5715 if (!bdecl) 5716 return NULL_TREE; 5717 break; 5718 5719 default: 5720 return NULL_TREE; 5721 } 5722 5723 gcc_assert (suffix != NULL); 5724 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); 5725 if (!bname) 5726 return NULL_TREE; 5727 5728 strcpy (name, bname + strlen ("__builtin_")); 5729 strcat (name, suffix); 5730 5731 if (n_args == 1) 5732 fntype = build_function_type_list (type_out, type_in, NULL); 5733 else if (n_args == 2) 5734 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 5735 else 5736 gcc_unreachable (); 5737 5738 /* Build a function declaration for the vectorized function. */ 5739 new_fndecl = build_decl (BUILTINS_LOCATION, 5740 FUNCTION_DECL, get_identifier (name), fntype); 5741 TREE_PUBLIC (new_fndecl) = 1; 5742 DECL_EXTERNAL (new_fndecl) = 1; 5743 DECL_IS_NOVOPS (new_fndecl) = 1; 5744 TREE_READONLY (new_fndecl) = 1; 5745 5746 return new_fndecl; 5747} 5748 5749 5750/* Default CPU string for rs6000*_file_start functions. */ 5751static const char *rs6000_default_cpu; 5752 5753#ifdef USING_ELFOS_H 5754const char *rs6000_machine; 5755 5756const char * 5757rs6000_machine_from_flags (void) 5758{ 5759 /* e300 and e500 */ 5760 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3) 5761 return "e300"; 5762 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548) 5763 return "e500"; 5764 if (rs6000_cpu == PROCESSOR_PPCE500MC) 5765 return "e500mc"; 5766 if (rs6000_cpu == PROCESSOR_PPCE500MC64) 5767 return "e500mc64"; 5768 if (rs6000_cpu == PROCESSOR_PPCE5500) 5769 return "e5500"; 5770 if (rs6000_cpu == PROCESSOR_PPCE6500) 5771 return "e6500"; 5772 5773 /* 400 series */ 5774 if (rs6000_cpu == PROCESSOR_PPC403) 5775 return "\"403\""; 5776 if (rs6000_cpu == PROCESSOR_PPC405) 5777 return "\"405\""; 5778 if (rs6000_cpu == PROCESSOR_PPC440) 5779 return "\"440\""; 5780 if (rs6000_cpu == PROCESSOR_PPC476) 5781 return "\"476\""; 5782 5783 /* A2 */ 5784 if (rs6000_cpu == PROCESSOR_PPCA2) 5785 return "a2"; 5786 5787 /* Cell BE */ 5788 if (rs6000_cpu == PROCESSOR_CELL) 5789 return "cell"; 5790 5791 /* Titan */ 5792 if (rs6000_cpu == PROCESSOR_TITAN) 5793 return "titan"; 5794 5795 /* 500 series and 800 series */ 5796 if (rs6000_cpu == PROCESSOR_MPCCORE) 5797 return "\"821\""; 5798 5799#if 0 5800 /* This (and ppc64 below) are disabled here (for now at least) because 5801 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON 5802 are #define'd as some of these. Untangling that is a job for later. */ 5803 5804 /* 600 series and 700 series, "classic" */ 5805 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 5806 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e 5807 || rs6000_cpu == PROCESSOR_PPC750) 5808 return "ppc"; 5809#endif 5810 5811 /* Classic with AltiVec, "G4" */ 5812 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450) 5813 return "\"7450\""; 5814 5815#if 0 5816 /* The older 64-bit CPUs */ 5817 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630 5818 || rs6000_cpu == PROCESSOR_RS64A) 5819 return "ppc64"; 5820#endif 5821 5822 HOST_WIDE_INT flags = rs6000_isa_flags; 5823 5824 /* Disable the flags that should never influence the .machine selection. */ 5825 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL); 5826 5827 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0) 5828 return "power10"; 5829 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0) 5830 return "power9"; 5831 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0) 5832 return "power8"; 5833 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0) 5834 return "power7"; 5835 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0) 5836 return "power6"; 5837 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0) 5838 return "power5"; 5839 if ((flags & ISA_2_1_MASKS) != 0) 5840 return "power4"; 5841 if ((flags & OPTION_MASK_POWERPC64) != 0) 5842 return "ppc64"; 5843 return "ppc"; 5844} 5845 5846void 5847emit_asm_machine (void) 5848{ 5849 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine); 5850} 5851#endif 5852 5853/* Do anything needed at the start of the asm file. */ 5854 5855static void 5856rs6000_file_start (void) 5857{ 5858 char buffer[80]; 5859 const char *start = buffer; 5860 FILE *file = asm_out_file; 5861 5862 rs6000_default_cpu = TARGET_CPU_DEFAULT; 5863 5864 default_file_start (); 5865 5866 if (flag_verbose_asm) 5867 { 5868 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START); 5869 5870 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') 5871 { 5872 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu); 5873 start = ""; 5874 } 5875 5876 if (OPTION_SET_P (rs6000_cpu_index)) 5877 { 5878 fprintf (file, "%s -mcpu=%s", start, 5879 processor_target_table[rs6000_cpu_index].name); 5880 start = ""; 5881 } 5882 5883 if (OPTION_SET_P (rs6000_tune_index)) 5884 { 5885 fprintf (file, "%s -mtune=%s", start, 5886 processor_target_table[rs6000_tune_index].name); 5887 start = ""; 5888 } 5889 5890 if (PPC405_ERRATUM77) 5891 { 5892 fprintf (file, "%s PPC405CR_ERRATUM77", start); 5893 start = ""; 5894 } 5895 5896#ifdef USING_ELFOS_H 5897 switch (rs6000_sdata) 5898 { 5899 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break; 5900 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break; 5901 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break; 5902 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break; 5903 } 5904 5905 if (rs6000_sdata && g_switch_value) 5906 { 5907 fprintf (file, "%s -G %d", start, 5908 g_switch_value); 5909 start = ""; 5910 } 5911#endif 5912 5913 if (*start == '\0') 5914 putc ('\n', file); 5915 } 5916 5917#ifdef USING_ELFOS_H 5918 rs6000_machine = rs6000_machine_from_flags (); 5919 emit_asm_machine (); 5920#endif 5921 5922 if (DEFAULT_ABI == ABI_ELFv2) 5923 fprintf (file, "\t.abiversion 2\n"); 5924} 5925 5926 5927/* Return nonzero if this function is known to have a null epilogue. */ 5928 5929int 5930direct_return (void) 5931{ 5932 if (reload_completed) 5933 { 5934 rs6000_stack_t *info = rs6000_stack_info (); 5935 5936 if (info->first_gp_reg_save == 32 5937 && info->first_fp_reg_save == 64 5938 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1 5939 && ! info->lr_save_p 5940 && ! info->cr_save_p 5941 && info->vrsave_size == 0 5942 && ! info->push_p) 5943 return 1; 5944 } 5945 5946 return 0; 5947} 5948 5949/* Helper for num_insns_constant. Calculate number of instructions to 5950 load VALUE to a single gpr using combinations of addi, addis, ori, 5951 oris, sldi and rldimi instructions. */ 5952 5953static int 5954num_insns_constant_gpr (HOST_WIDE_INT value) 5955{ 5956 /* signed constant loadable with addi */ 5957 if (SIGNED_INTEGER_16BIT_P (value)) 5958 return 1; 5959 5960 /* constant loadable with addis */ 5961 else if ((value & 0xffff) == 0 5962 && (value >> 31 == -1 || value >> 31 == 0)) 5963 return 1; 5964 5965 /* PADDI can support up to 34 bit signed integers. */ 5966 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value)) 5967 return 1; 5968 5969 else if (TARGET_POWERPC64) 5970 { 5971 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; 5972 HOST_WIDE_INT high = value >> 31; 5973 5974 if (high == 0 || high == -1) 5975 return 2; 5976 5977 high >>= 1; 5978 5979 if (low == 0 || low == high) 5980 return num_insns_constant_gpr (high) + 1; 5981 else if (high == 0) 5982 return num_insns_constant_gpr (low) + 1; 5983 else 5984 return (num_insns_constant_gpr (high) 5985 + num_insns_constant_gpr (low) + 1); 5986 } 5987 5988 else 5989 return 2; 5990} 5991 5992/* Helper for num_insns_constant. Allow constants formed by the 5993 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm, 5994 and handle modes that require multiple gprs. */ 5995 5996static int 5997num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode) 5998{ 5999 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 6000 int total = 0; 6001 while (nregs-- > 0) 6002 { 6003 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD); 6004 int insns = num_insns_constant_gpr (low); 6005 if (insns > 2 6006 /* We won't get more than 2 from num_insns_constant_gpr 6007 except when TARGET_POWERPC64 and mode is DImode or 6008 wider, so the register mode must be DImode. */ 6009 && rs6000_is_valid_and_mask (GEN_INT (low), DImode)) 6010 insns = 2; 6011 total += insns; 6012 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing 6013 it all at once would be UB. */ 6014 value >>= (BITS_PER_WORD - 1); 6015 value >>= 1; 6016 } 6017 return total; 6018} 6019 6020/* Return the number of instructions it takes to form a constant in as 6021 many gprs are needed for MODE. */ 6022 6023int 6024num_insns_constant (rtx op, machine_mode mode) 6025{ 6026 HOST_WIDE_INT val; 6027 6028 switch (GET_CODE (op)) 6029 { 6030 case CONST_INT: 6031 val = INTVAL (op); 6032 break; 6033 6034 case CONST_WIDE_INT: 6035 { 6036 int insns = 0; 6037 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++) 6038 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i), 6039 DImode); 6040 return insns; 6041 } 6042 6043 case CONST_DOUBLE: 6044 { 6045 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op); 6046 6047 if (mode == SFmode || mode == SDmode) 6048 { 6049 long l; 6050 6051 if (mode == SDmode) 6052 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l); 6053 else 6054 REAL_VALUE_TO_TARGET_SINGLE (*rv, l); 6055 /* See the first define_split in rs6000.md handling a 6056 const_double_operand. */ 6057 val = l; 6058 mode = SImode; 6059 } 6060 else if (mode == DFmode || mode == DDmode) 6061 { 6062 long l[2]; 6063 6064 if (mode == DDmode) 6065 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l); 6066 else 6067 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l); 6068 6069 /* See the second (32-bit) and third (64-bit) define_split 6070 in rs6000.md handling a const_double_operand. */ 6071 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32; 6072 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL; 6073 mode = DImode; 6074 } 6075 else if (mode == TFmode || mode == TDmode 6076 || mode == KFmode || mode == IFmode) 6077 { 6078 long l[4]; 6079 int insns; 6080 6081 if (mode == TDmode) 6082 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l); 6083 else 6084 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l); 6085 6086 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32; 6087 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL; 6088 insns = num_insns_constant_multi (val, DImode); 6089 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32; 6090 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL; 6091 insns += num_insns_constant_multi (val, DImode); 6092 return insns; 6093 } 6094 else 6095 gcc_unreachable (); 6096 } 6097 break; 6098 6099 default: 6100 gcc_unreachable (); 6101 } 6102 6103 return num_insns_constant_multi (val, mode); 6104} 6105 6106/* Interpret element ELT of the CONST_VECTOR OP as an integer value. 6107 If the mode of OP is MODE_VECTOR_INT, this simply returns the 6108 corresponding element of the vector, but for V4SFmode, the 6109 corresponding "float" is interpreted as an SImode integer. */ 6110 6111HOST_WIDE_INT 6112const_vector_elt_as_int (rtx op, unsigned int elt) 6113{ 6114 rtx tmp; 6115 6116 /* We can't handle V2DImode and V2DFmode vector constants here yet. */ 6117 gcc_assert (GET_MODE (op) != V2DImode 6118 && GET_MODE (op) != V2DFmode); 6119 6120 tmp = CONST_VECTOR_ELT (op, elt); 6121 if (GET_MODE (op) == V4SFmode) 6122 tmp = gen_lowpart (SImode, tmp); 6123 return INTVAL (tmp); 6124} 6125 6126/* Return true if OP can be synthesized with a particular vspltisb, vspltish 6127 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used 6128 depends on STEP and COPIES, one of which will be 1. If COPIES > 1, 6129 all items are set to the same value and contain COPIES replicas of the 6130 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's 6131 operand and the others are set to the value of the operand's msb. */ 6132 6133static bool 6134vspltis_constant (rtx op, unsigned step, unsigned copies) 6135{ 6136 machine_mode mode = GET_MODE (op); 6137 machine_mode inner = GET_MODE_INNER (mode); 6138 6139 unsigned i; 6140 unsigned nunits; 6141 unsigned bitsize; 6142 unsigned mask; 6143 6144 HOST_WIDE_INT val; 6145 HOST_WIDE_INT splat_val; 6146 HOST_WIDE_INT msb_val; 6147 6148 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) 6149 return false; 6150 6151 nunits = GET_MODE_NUNITS (mode); 6152 bitsize = GET_MODE_BITSIZE (inner); 6153 mask = GET_MODE_MASK (inner); 6154 6155 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); 6156 splat_val = val; 6157 msb_val = val >= 0 ? 0 : -1; 6158 6159 if (val == 0 && step > 1) 6160 { 6161 /* Special case for loading most significant bit with step > 1. 6162 In that case, match 0s in all but step-1s elements, where match 6163 EASY_VECTOR_MSB. */ 6164 for (i = 1; i < nunits; ++i) 6165 { 6166 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; 6167 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); 6168 if ((i & (step - 1)) == step - 1) 6169 { 6170 if (!EASY_VECTOR_MSB (elt_val, inner)) 6171 break; 6172 } 6173 else if (elt_val) 6174 break; 6175 } 6176 if (i == nunits) 6177 return true; 6178 } 6179 6180 /* Construct the value to be splatted, if possible. If not, return 0. */ 6181 for (i = 2; i <= copies; i *= 2) 6182 { 6183 HOST_WIDE_INT small_val; 6184 bitsize /= 2; 6185 small_val = splat_val >> bitsize; 6186 mask >>= bitsize; 6187 if (splat_val != ((HOST_WIDE_INT) 6188 ((unsigned HOST_WIDE_INT) small_val << bitsize) 6189 | (small_val & mask))) 6190 return false; 6191 splat_val = small_val; 6192 inner = smallest_int_mode_for_size (bitsize); 6193 } 6194 6195 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ 6196 if (EASY_VECTOR_15 (splat_val)) 6197 ; 6198 6199 /* Also check if we can splat, and then add the result to itself. Do so if 6200 the value is positive, of if the splat instruction is using OP's mode; 6201 for splat_val < 0, the splat and the add should use the same mode. */ 6202 else if (EASY_VECTOR_15_ADD_SELF (splat_val) 6203 && (splat_val >= 0 || (step == 1 && copies == 1))) 6204 ; 6205 6206 /* Also check if are loading up the most significant bit which can be done by 6207 loading up -1 and shifting the value left by -1. Only do this for 6208 step 1 here, for larger steps it is done earlier. */ 6209 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1) 6210 ; 6211 6212 else 6213 return false; 6214 6215 /* Check if VAL is present in every STEP-th element, and the 6216 other elements are filled with its most significant bit. */ 6217 for (i = 1; i < nunits; ++i) 6218 { 6219 HOST_WIDE_INT desired_val; 6220 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; 6221 if ((i & (step - 1)) == 0) 6222 desired_val = val; 6223 else 6224 desired_val = msb_val; 6225 6226 if (desired_val != const_vector_elt_as_int (op, elt)) 6227 return false; 6228 } 6229 6230 return true; 6231} 6232 6233/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI 6234 instruction, filling in the bottom elements with 0 or -1. 6235 6236 Return 0 if the constant cannot be generated with VSLDOI. Return positive 6237 for the number of zeroes to shift in, or negative for the number of 0xff 6238 bytes to shift in. 6239 6240 OP is a CONST_VECTOR. */ 6241 6242int 6243vspltis_shifted (rtx op) 6244{ 6245 machine_mode mode = GET_MODE (op); 6246 machine_mode inner = GET_MODE_INNER (mode); 6247 6248 unsigned i, j; 6249 unsigned nunits; 6250 unsigned mask; 6251 6252 HOST_WIDE_INT val; 6253 6254 if (mode != V16QImode && mode != V8HImode && mode != V4SImode) 6255 return false; 6256 6257 /* We need to create pseudo registers to do the shift, so don't recognize 6258 shift vector constants after reload. Don't match it even before RA 6259 after split1 is done, because there won't be further splitting pass 6260 before RA to do the splitting. */ 6261 if (!can_create_pseudo_p () 6262 || (cfun->curr_properties & PROP_rtl_split_insns)) 6263 return false; 6264 6265 nunits = GET_MODE_NUNITS (mode); 6266 mask = GET_MODE_MASK (inner); 6267 6268 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); 6269 6270 /* Check if the value can really be the operand of a vspltis[bhw]. */ 6271 if (EASY_VECTOR_15 (val)) 6272 ; 6273 6274 /* Also check if we are loading up the most significant bit which can be done 6275 by loading up -1 and shifting the value left by -1. */ 6276 else if (EASY_VECTOR_MSB (val, inner)) 6277 ; 6278 6279 else 6280 return 0; 6281 6282 /* Check if VAL is present in every STEP-th element until we find elements 6283 that are 0 or all 1 bits. */ 6284 for (i = 1; i < nunits; ++i) 6285 { 6286 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; 6287 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); 6288 6289 /* If the value isn't the splat value, check for the remaining elements 6290 being 0/-1. */ 6291 if (val != elt_val) 6292 { 6293 if (elt_val == 0) 6294 { 6295 for (j = i+1; j < nunits; ++j) 6296 { 6297 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; 6298 if (const_vector_elt_as_int (op, elt2) != 0) 6299 return 0; 6300 } 6301 6302 return (nunits - i) * GET_MODE_SIZE (inner); 6303 } 6304 6305 else if ((elt_val & mask) == mask) 6306 { 6307 for (j = i+1; j < nunits; ++j) 6308 { 6309 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; 6310 if ((const_vector_elt_as_int (op, elt2) & mask) != mask) 6311 return 0; 6312 } 6313 6314 return -((nunits - i) * GET_MODE_SIZE (inner)); 6315 } 6316 6317 else 6318 return 0; 6319 } 6320 } 6321 6322 /* If all elements are equal, we don't need to do VSLDOI. */ 6323 return 0; 6324} 6325 6326 6327/* Return non-zero (element mode byte size) if OP is of the given MODE 6328 and can be synthesized with a vspltisb, vspltish or vspltisw. */ 6329 6330int 6331easy_altivec_constant (rtx op, machine_mode mode) 6332{ 6333 unsigned step, copies; 6334 6335 if (mode == VOIDmode) 6336 mode = GET_MODE (op); 6337 else if (mode != GET_MODE (op)) 6338 return 0; 6339 6340 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy 6341 constants. */ 6342 if (mode == V2DFmode) 6343 return zero_constant (op, mode) ? 8 : 0; 6344 6345 else if (mode == V2DImode) 6346 { 6347 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0)) 6348 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1))) 6349 return 0; 6350 6351 if (zero_constant (op, mode)) 6352 return 8; 6353 6354 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 6355 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) 6356 return 8; 6357 6358 return 0; 6359 } 6360 6361 /* V1TImode is a special container for TImode. Ignore for now. */ 6362 else if (mode == V1TImode) 6363 return 0; 6364 6365 /* Start with a vspltisw. */ 6366 step = GET_MODE_NUNITS (mode) / 4; 6367 copies = 1; 6368 6369 if (vspltis_constant (op, step, copies)) 6370 return 4; 6371 6372 /* Then try with a vspltish. */ 6373 if (step == 1) 6374 copies <<= 1; 6375 else 6376 step >>= 1; 6377 6378 if (vspltis_constant (op, step, copies)) 6379 return 2; 6380 6381 /* And finally a vspltisb. */ 6382 if (step == 1) 6383 copies <<= 1; 6384 else 6385 step >>= 1; 6386 6387 if (vspltis_constant (op, step, copies)) 6388 return 1; 6389 6390 if (vspltis_shifted (op) != 0) 6391 return GET_MODE_SIZE (GET_MODE_INNER (mode)); 6392 6393 return 0; 6394} 6395 6396/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose 6397 result is OP. Abort if it is not possible. */ 6398 6399rtx 6400gen_easy_altivec_constant (rtx op) 6401{ 6402 machine_mode mode = GET_MODE (op); 6403 int nunits = GET_MODE_NUNITS (mode); 6404 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); 6405 unsigned step = nunits / 4; 6406 unsigned copies = 1; 6407 6408 /* Start with a vspltisw. */ 6409 if (vspltis_constant (op, step, copies)) 6410 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); 6411 6412 /* Then try with a vspltish. */ 6413 if (step == 1) 6414 copies <<= 1; 6415 else 6416 step >>= 1; 6417 6418 if (vspltis_constant (op, step, copies)) 6419 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); 6420 6421 /* And finally a vspltisb. */ 6422 if (step == 1) 6423 copies <<= 1; 6424 else 6425 step >>= 1; 6426 6427 if (vspltis_constant (op, step, copies)) 6428 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); 6429 6430 gcc_unreachable (); 6431} 6432 6433/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 6434 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). 6435 6436 Return the number of instructions needed (1 or 2) into the address pointed 6437 via NUM_INSNS_PTR. 6438 6439 Return the constant that is being split via CONSTANT_PTR. */ 6440 6441bool 6442xxspltib_constant_p (rtx op, 6443 machine_mode mode, 6444 int *num_insns_ptr, 6445 int *constant_ptr) 6446{ 6447 size_t nunits = GET_MODE_NUNITS (mode); 6448 size_t i; 6449 HOST_WIDE_INT value; 6450 rtx element; 6451 6452 /* Set the returned values to out of bound values. */ 6453 *num_insns_ptr = -1; 6454 *constant_ptr = 256; 6455 6456 if (!TARGET_P9_VECTOR) 6457 return false; 6458 6459 if (mode == VOIDmode) 6460 mode = GET_MODE (op); 6461 6462 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode) 6463 return false; 6464 6465 /* Handle (vec_duplicate <constant>). */ 6466 if (GET_CODE (op) == VEC_DUPLICATE) 6467 { 6468 if (mode != V16QImode && mode != V8HImode && mode != V4SImode 6469 && mode != V2DImode) 6470 return false; 6471 6472 element = XEXP (op, 0); 6473 if (!CONST_INT_P (element)) 6474 return false; 6475 6476 value = INTVAL (element); 6477 if (!IN_RANGE (value, -128, 127)) 6478 return false; 6479 } 6480 6481 /* Handle (const_vector [...]). */ 6482 else if (GET_CODE (op) == CONST_VECTOR) 6483 { 6484 if (mode != V16QImode && mode != V8HImode && mode != V4SImode 6485 && mode != V2DImode) 6486 return false; 6487 6488 element = CONST_VECTOR_ELT (op, 0); 6489 if (!CONST_INT_P (element)) 6490 return false; 6491 6492 value = INTVAL (element); 6493 if (!IN_RANGE (value, -128, 127)) 6494 return false; 6495 6496 for (i = 1; i < nunits; i++) 6497 { 6498 element = CONST_VECTOR_ELT (op, i); 6499 if (!CONST_INT_P (element)) 6500 return false; 6501 6502 if (value != INTVAL (element)) 6503 return false; 6504 } 6505 } 6506 6507 /* Handle integer constants being loaded into the upper part of the VSX 6508 register as a scalar. If the value isn't 0/-1, only allow it if the mode 6509 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */ 6510 else if (CONST_INT_P (op)) 6511 { 6512 if (!SCALAR_INT_MODE_P (mode)) 6513 return false; 6514 6515 value = INTVAL (op); 6516 if (!IN_RANGE (value, -128, 127)) 6517 return false; 6518 6519 if (!IN_RANGE (value, -1, 0)) 6520 { 6521 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID)) 6522 return false; 6523 6524 if (EASY_VECTOR_15 (value)) 6525 return false; 6526 } 6527 } 6528 6529 else 6530 return false; 6531 6532 /* See if we could generate vspltisw/vspltish directly instead of xxspltib + 6533 sign extend. Special case 0/-1 to allow getting any VSX register instead 6534 of an Altivec register. */ 6535 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) 6536 && EASY_VECTOR_15 (value)) 6537 return false; 6538 6539 /* Return # of instructions and the constant byte for XXSPLTIB. */ 6540 if (mode == V16QImode) 6541 *num_insns_ptr = 1; 6542 6543 else if (IN_RANGE (value, -1, 0)) 6544 *num_insns_ptr = 1; 6545 6546 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a 6547 single XXSPLTIW or XXSPLTIDP instruction. */ 6548 else if (vsx_prefixed_constant (op, mode)) 6549 return false; 6550 6551 /* Return XXSPLITB followed by a sign extend operation to convert the 6552 constant to V8HImode or V4SImode. */ 6553 else 6554 *num_insns_ptr = 2; 6555 6556 *constant_ptr = (int) value; 6557 return true; 6558} 6559 6560const char * 6561output_vec_const_move (rtx *operands) 6562{ 6563 int shift; 6564 machine_mode mode; 6565 rtx dest, vec; 6566 6567 dest = operands[0]; 6568 vec = operands[1]; 6569 mode = GET_MODE (dest); 6570 6571 if (TARGET_VSX) 6572 { 6573 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); 6574 int xxspltib_value = 256; 6575 int num_insns = -1; 6576 6577 if (zero_constant (vec, mode)) 6578 { 6579 if (TARGET_P9_VECTOR) 6580 return "xxspltib %x0,0"; 6581 6582 else if (dest_vmx_p) 6583 return "vspltisw %0,0"; 6584 6585 else 6586 return "xxlxor %x0,%x0,%x0"; 6587 } 6588 6589 if (all_ones_constant (vec, mode)) 6590 { 6591 if (TARGET_P9_VECTOR) 6592 return "xxspltib %x0,255"; 6593 6594 else if (dest_vmx_p) 6595 return "vspltisw %0,-1"; 6596 6597 else if (TARGET_P8_VECTOR) 6598 return "xxlorc %x0,%x0,%x0"; 6599 6600 else 6601 gcc_unreachable (); 6602 } 6603 6604 vec_const_128bit_type vsx_const; 6605 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const)) 6606 { 6607 unsigned imm = constant_generates_lxvkq (&vsx_const); 6608 if (imm) 6609 { 6610 operands[2] = GEN_INT (imm); 6611 return "lxvkq %x0,%2"; 6612 } 6613 6614 imm = constant_generates_xxspltiw (&vsx_const); 6615 if (imm) 6616 { 6617 operands[2] = GEN_INT (imm); 6618 return "xxspltiw %x0,%2"; 6619 } 6620 6621 imm = constant_generates_xxspltidp (&vsx_const); 6622 if (imm) 6623 { 6624 operands[2] = GEN_INT (imm); 6625 return "xxspltidp %x0,%2"; 6626 } 6627 } 6628 6629 if (TARGET_P9_VECTOR 6630 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) 6631 { 6632 if (num_insns == 1) 6633 { 6634 operands[2] = GEN_INT (xxspltib_value & 0xff); 6635 return "xxspltib %x0,%2"; 6636 } 6637 6638 return "#"; 6639 } 6640 } 6641 6642 if (TARGET_ALTIVEC) 6643 { 6644 rtx splat_vec; 6645 6646 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); 6647 if (zero_constant (vec, mode)) 6648 return "vspltisw %0,0"; 6649 6650 if (all_ones_constant (vec, mode)) 6651 return "vspltisw %0,-1"; 6652 6653 /* Do we need to construct a value using VSLDOI? */ 6654 shift = vspltis_shifted (vec); 6655 if (shift != 0) 6656 return "#"; 6657 6658 splat_vec = gen_easy_altivec_constant (vec); 6659 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); 6660 operands[1] = XEXP (splat_vec, 0); 6661 if (!EASY_VECTOR_15 (INTVAL (operands[1]))) 6662 return "#"; 6663 6664 switch (GET_MODE (splat_vec)) 6665 { 6666 case E_V4SImode: 6667 return "vspltisw %0,%1"; 6668 6669 case E_V8HImode: 6670 return "vspltish %0,%1"; 6671 6672 case E_V16QImode: 6673 return "vspltisb %0,%1"; 6674 6675 default: 6676 gcc_unreachable (); 6677 } 6678 } 6679 6680 gcc_unreachable (); 6681} 6682 6683/* Initialize vector TARGET to VALS. */ 6684 6685void 6686rs6000_expand_vector_init (rtx target, rtx vals) 6687{ 6688 machine_mode mode = GET_MODE (target); 6689 machine_mode inner_mode = GET_MODE_INNER (mode); 6690 unsigned int n_elts = GET_MODE_NUNITS (mode); 6691 int n_var = 0, one_var = -1; 6692 bool all_same = true, all_const_zero = true; 6693 rtx x, mem; 6694 unsigned int i; 6695 6696 for (i = 0; i < n_elts; ++i) 6697 { 6698 x = XVECEXP (vals, 0, i); 6699 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) 6700 ++n_var, one_var = i; 6701 else if (x != CONST0_RTX (inner_mode)) 6702 all_const_zero = false; 6703 6704 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 6705 all_same = false; 6706 } 6707 6708 if (n_var == 0) 6709 { 6710 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 6711 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); 6712 if ((int_vector_p || TARGET_VSX) && all_const_zero) 6713 { 6714 /* Zero register. */ 6715 emit_move_insn (target, CONST0_RTX (mode)); 6716 return; 6717 } 6718 else if (int_vector_p && easy_vector_constant (const_vec, mode)) 6719 { 6720 /* Splat immediate. */ 6721 emit_insn (gen_rtx_SET (target, const_vec)); 6722 return; 6723 } 6724 else 6725 { 6726 /* Load from constant pool. */ 6727 emit_move_insn (target, const_vec); 6728 return; 6729 } 6730 } 6731 6732 /* Double word values on VSX can use xxpermdi or lxvdsx. */ 6733 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) 6734 { 6735 rtx op[2]; 6736 size_t i; 6737 size_t num_elements = all_same ? 1 : 2; 6738 for (i = 0; i < num_elements; i++) 6739 { 6740 op[i] = XVECEXP (vals, 0, i); 6741 /* Just in case there is a SUBREG with a smaller mode, do a 6742 conversion. */ 6743 if (GET_MODE (op[i]) != inner_mode) 6744 { 6745 rtx tmp = gen_reg_rtx (inner_mode); 6746 convert_move (tmp, op[i], 0); 6747 op[i] = tmp; 6748 } 6749 /* Allow load with splat double word. */ 6750 else if (MEM_P (op[i])) 6751 { 6752 if (!all_same) 6753 op[i] = force_reg (inner_mode, op[i]); 6754 } 6755 else if (!REG_P (op[i])) 6756 op[i] = force_reg (inner_mode, op[i]); 6757 } 6758 6759 if (all_same) 6760 { 6761 if (mode == V2DFmode) 6762 emit_insn (gen_vsx_splat_v2df (target, op[0])); 6763 else 6764 emit_insn (gen_vsx_splat_v2di (target, op[0])); 6765 } 6766 else 6767 { 6768 if (mode == V2DFmode) 6769 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); 6770 else 6771 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); 6772 } 6773 return; 6774 } 6775 6776 /* Special case initializing vector int if we are on 64-bit systems with 6777 direct move or we have the ISA 3.0 instructions. */ 6778 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode) 6779 && TARGET_DIRECT_MOVE_64BIT) 6780 { 6781 if (all_same) 6782 { 6783 rtx element0 = XVECEXP (vals, 0, 0); 6784 if (MEM_P (element0)) 6785 element0 = rs6000_force_indexed_or_indirect_mem (element0); 6786 else 6787 element0 = force_reg (SImode, element0); 6788 6789 if (TARGET_P9_VECTOR) 6790 emit_insn (gen_vsx_splat_v4si (target, element0)); 6791 else 6792 { 6793 rtx tmp = gen_reg_rtx (DImode); 6794 emit_insn (gen_zero_extendsidi2 (tmp, element0)); 6795 emit_insn (gen_vsx_splat_v4si_di (target, tmp)); 6796 } 6797 return; 6798 } 6799 else 6800 { 6801 rtx elements[4]; 6802 size_t i; 6803 6804 for (i = 0; i < 4; i++) 6805 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i)); 6806 6807 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], 6808 elements[2], elements[3])); 6809 return; 6810 } 6811 } 6812 6813 /* With single precision floating point on VSX, know that internally single 6814 precision is actually represented as a double, and either make 2 V2DF 6815 vectors, and convert these vectors to single precision, or do one 6816 conversion, and splat the result to the other elements. */ 6817 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode)) 6818 { 6819 if (all_same) 6820 { 6821 rtx element0 = XVECEXP (vals, 0, 0); 6822 6823 if (TARGET_P9_VECTOR) 6824 { 6825 if (MEM_P (element0)) 6826 element0 = rs6000_force_indexed_or_indirect_mem (element0); 6827 6828 emit_insn (gen_vsx_splat_v4sf (target, element0)); 6829 } 6830 6831 else 6832 { 6833 rtx freg = gen_reg_rtx (V4SFmode); 6834 rtx sreg = force_reg (SFmode, element0); 6835 rtx cvt = (TARGET_XSCVDPSPN 6836 ? gen_vsx_xscvdpspn_scalar (freg, sreg) 6837 : gen_vsx_xscvdpsp_scalar (freg, sreg)); 6838 6839 emit_insn (cvt); 6840 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, 6841 const0_rtx)); 6842 } 6843 } 6844 else 6845 { 6846 if (TARGET_P8_VECTOR && TARGET_POWERPC64) 6847 { 6848 rtx tmp_sf[4]; 6849 rtx tmp_si[4]; 6850 rtx tmp_di[4]; 6851 rtx mrg_di[4]; 6852 for (i = 0; i < 4; i++) 6853 { 6854 tmp_si[i] = gen_reg_rtx (SImode); 6855 tmp_di[i] = gen_reg_rtx (DImode); 6856 mrg_di[i] = gen_reg_rtx (DImode); 6857 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i)); 6858 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i])); 6859 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i])); 6860 } 6861 6862 if (!BYTES_BIG_ENDIAN) 6863 { 6864 std::swap (tmp_di[0], tmp_di[1]); 6865 std::swap (tmp_di[2], tmp_di[3]); 6866 } 6867 6868 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32))); 6869 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1])); 6870 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32))); 6871 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3])); 6872 6873 rtx tmp_v2di = gen_reg_rtx (V2DImode); 6874 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3])); 6875 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di)); 6876 } 6877 else 6878 { 6879 rtx dbl_even = gen_reg_rtx (V2DFmode); 6880 rtx dbl_odd = gen_reg_rtx (V2DFmode); 6881 rtx flt_even = gen_reg_rtx (V4SFmode); 6882 rtx flt_odd = gen_reg_rtx (V4SFmode); 6883 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0)); 6884 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1)); 6885 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2)); 6886 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3)); 6887 6888 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1)); 6889 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3)); 6890 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); 6891 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); 6892 rs6000_expand_extract_even (target, flt_even, flt_odd); 6893 } 6894 } 6895 return; 6896 } 6897 6898 /* Special case initializing vector short/char that are splats if we are on 6899 64-bit systems with direct move. */ 6900 if (all_same && TARGET_DIRECT_MOVE_64BIT 6901 && (mode == V16QImode || mode == V8HImode)) 6902 { 6903 rtx op0 = XVECEXP (vals, 0, 0); 6904 rtx di_tmp = gen_reg_rtx (DImode); 6905 6906 if (!REG_P (op0)) 6907 op0 = force_reg (GET_MODE_INNER (mode), op0); 6908 6909 if (mode == V16QImode) 6910 { 6911 emit_insn (gen_zero_extendqidi2 (di_tmp, op0)); 6912 emit_insn (gen_vsx_vspltb_di (target, di_tmp)); 6913 return; 6914 } 6915 6916 if (mode == V8HImode) 6917 { 6918 emit_insn (gen_zero_extendhidi2 (di_tmp, op0)); 6919 emit_insn (gen_vsx_vsplth_di (target, di_tmp)); 6920 return; 6921 } 6922 } 6923 6924 /* Store value to stack temp. Load vector element. Splat. However, splat 6925 of 64-bit items is not supported on Altivec. */ 6926 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) 6927 { 6928 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); 6929 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), 6930 XVECEXP (vals, 0, 0)); 6931 x = gen_rtx_UNSPEC (VOIDmode, 6932 gen_rtvec (1, const0_rtx), UNSPEC_LVE); 6933 emit_insn (gen_rtx_PARALLEL (VOIDmode, 6934 gen_rtvec (2, 6935 gen_rtx_SET (target, mem), 6936 x))); 6937 x = gen_rtx_VEC_SELECT (inner_mode, target, 6938 gen_rtx_PARALLEL (VOIDmode, 6939 gen_rtvec (1, const0_rtx))); 6940 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x))); 6941 return; 6942 } 6943 6944 /* One field is non-constant. Load constant then overwrite 6945 varying field. */ 6946 if (n_var == 1) 6947 { 6948 rtx copy = copy_rtx (vals); 6949 6950 /* Load constant part of vector, substitute neighboring value for 6951 varying element. */ 6952 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); 6953 rs6000_expand_vector_init (target, copy); 6954 6955 /* Insert variable. */ 6956 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), 6957 GEN_INT (one_var)); 6958 return; 6959 } 6960 6961 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode)) 6962 { 6963 rtx op[16]; 6964 /* Force the values into word_mode registers. */ 6965 for (i = 0; i < n_elts; i++) 6966 { 6967 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i)); 6968 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode; 6969 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0); 6970 } 6971 6972 /* Take unsigned char big endianness on 64bit as example for below 6973 construction, the input values are: A, B, C, D, ..., O, P. */ 6974 6975 if (TARGET_DIRECT_MOVE_128) 6976 { 6977 /* Move to VSX register with vec_concat, each has 2 values. 6978 eg: vr1[0] = { xxxxxxxA, xxxxxxxB }; 6979 vr1[1] = { xxxxxxxC, xxxxxxxD }; 6980 ... 6981 vr1[7] = { xxxxxxxO, xxxxxxxP }; */ 6982 rtx vr1[8]; 6983 for (i = 0; i < n_elts / 2; i++) 6984 { 6985 vr1[i] = gen_reg_rtx (V2DImode); 6986 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2], 6987 op[i * 2 + 1])); 6988 } 6989 6990 /* Pack vectors with 2 values into vectors with 4 values. 6991 eg: vr2[0] = { xxxAxxxB, xxxCxxxD }; 6992 vr2[1] = { xxxExxxF, xxxGxxxH }; 6993 vr2[1] = { xxxIxxxJ, xxxKxxxL }; 6994 vr2[3] = { xxxMxxxN, xxxOxxxP }; */ 6995 rtx vr2[4]; 6996 for (i = 0; i < n_elts / 4; i++) 6997 { 6998 vr2[i] = gen_reg_rtx (V4SImode); 6999 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2], 7000 vr1[i * 2 + 1])); 7001 } 7002 7003 /* Pack vectors with 4 values into vectors with 8 values. 7004 eg: vr3[0] = { xAxBxCxD, xExFxGxH }; 7005 vr3[1] = { xIxJxKxL, xMxNxOxP }; */ 7006 rtx vr3[2]; 7007 for (i = 0; i < n_elts / 8; i++) 7008 { 7009 vr3[i] = gen_reg_rtx (V8HImode); 7010 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2], 7011 vr2[i * 2 + 1])); 7012 } 7013 7014 /* If it's V8HImode, it's done and return it. */ 7015 if (mode == V8HImode) 7016 { 7017 emit_insn (gen_rtx_SET (target, vr3[0])); 7018 return; 7019 } 7020 7021 /* Pack vectors with 8 values into 16 values. */ 7022 rtx res = gen_reg_rtx (V16QImode); 7023 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1])); 7024 emit_insn (gen_rtx_SET (target, res)); 7025 } 7026 else 7027 { 7028 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL; 7029 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL; 7030 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL; 7031 rtx perm_idx; 7032 7033 /* Set up some common gen routines and values. */ 7034 if (BYTES_BIG_ENDIAN) 7035 { 7036 if (mode == V16QImode) 7037 { 7038 merge_v16qi = gen_altivec_vmrghb; 7039 merge_v8hi = gen_altivec_vmrglh; 7040 } 7041 else 7042 merge_v8hi = gen_altivec_vmrghh; 7043 7044 merge_v4si = gen_altivec_vmrglw; 7045 perm_idx = GEN_INT (3); 7046 } 7047 else 7048 { 7049 if (mode == V16QImode) 7050 { 7051 merge_v16qi = gen_altivec_vmrglb; 7052 merge_v8hi = gen_altivec_vmrghh; 7053 } 7054 else 7055 merge_v8hi = gen_altivec_vmrglh; 7056 7057 merge_v4si = gen_altivec_vmrghw; 7058 perm_idx = GEN_INT (0); 7059 } 7060 7061 /* Move to VSX register with direct move. 7062 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx }; 7063 vr_qi[1] = { xxxxxxxB, xxxxxxxx }; 7064 ... 7065 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */ 7066 rtx vr_qi[16]; 7067 for (i = 0; i < n_elts; i++) 7068 { 7069 vr_qi[i] = gen_reg_rtx (V16QImode); 7070 if (TARGET_POWERPC64) 7071 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i])); 7072 else 7073 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i])); 7074 } 7075 7076 /* Merge/move to vector short. 7077 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB }; 7078 vr_hi[1] = { xxxxxxxx, xxxxxxCD }; 7079 ... 7080 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */ 7081 rtx vr_hi[8]; 7082 for (i = 0; i < 8; i++) 7083 { 7084 rtx tmp = vr_qi[i]; 7085 if (mode == V16QImode) 7086 { 7087 tmp = gen_reg_rtx (V16QImode); 7088 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1])); 7089 } 7090 vr_hi[i] = gen_reg_rtx (V8HImode); 7091 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp)); 7092 } 7093 7094 /* Merge vector short to vector int. 7095 eg: vr_si[0] = { xxxxxxxx, xxxxABCD }; 7096 vr_si[1] = { xxxxxxxx, xxxxEFGH }; 7097 ... 7098 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */ 7099 rtx vr_si[4]; 7100 for (i = 0; i < 4; i++) 7101 { 7102 rtx tmp = gen_reg_rtx (V8HImode); 7103 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1])); 7104 vr_si[i] = gen_reg_rtx (V4SImode); 7105 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp)); 7106 } 7107 7108 /* Merge vector int to vector long. 7109 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH }; 7110 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */ 7111 rtx vr_di[2]; 7112 for (i = 0; i < 2; i++) 7113 { 7114 rtx tmp = gen_reg_rtx (V4SImode); 7115 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1])); 7116 vr_di[i] = gen_reg_rtx (V2DImode); 7117 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp)); 7118 } 7119 7120 rtx res = gen_reg_rtx (V2DImode); 7121 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx)); 7122 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res))); 7123 } 7124 7125 return; 7126 } 7127 7128 /* Construct the vector in memory one field at a time 7129 and load the whole vector. */ 7130 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 7131 for (i = 0; i < n_elts; i++) 7132 emit_move_insn (adjust_address_nv (mem, inner_mode, 7133 i * GET_MODE_SIZE (inner_mode)), 7134 XVECEXP (vals, 0, i)); 7135 emit_move_insn (target, mem); 7136} 7137 7138/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX 7139 is variable and also counts by vector element size for p9 and above. */ 7140 7141static void 7142rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx) 7143{ 7144 machine_mode mode = GET_MODE (target); 7145 7146 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); 7147 7148 machine_mode inner_mode = GET_MODE (val); 7149 7150 int width = GET_MODE_SIZE (inner_mode); 7151 7152 gcc_assert (width >= 1 && width <= 8); 7153 7154 int shift = exact_log2 (width); 7155 7156 machine_mode idx_mode = GET_MODE (idx); 7157 7158 machine_mode shift_mode; 7159 /* Gen function pointers for shifting left and generation of permutation 7160 control vectors. */ 7161 rtx (*gen_ashl) (rtx, rtx, rtx); 7162 rtx (*gen_pcvr1) (rtx, rtx); 7163 rtx (*gen_pcvr2) (rtx, rtx); 7164 7165 if (TARGET_POWERPC64) 7166 { 7167 shift_mode = DImode; 7168 gen_ashl = gen_ashldi3; 7169 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di 7170 : gen_altivec_lvsr_reg_di; 7171 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di 7172 : gen_altivec_lvsl_reg_di; 7173 } 7174 else 7175 { 7176 shift_mode = SImode; 7177 gen_ashl = gen_ashlsi3; 7178 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si 7179 : gen_altivec_lvsr_reg_si; 7180 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si 7181 : gen_altivec_lvsl_reg_si; 7182 } 7183 /* Generate the IDX for permute shift, width is the vector element size. 7184 idx = idx * width. */ 7185 rtx tmp = gen_reg_rtx (shift_mode); 7186 idx = convert_modes (shift_mode, idx_mode, idx, 1); 7187 7188 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift))); 7189 7190 /* Generate one permutation control vector used for rotating the element 7191 at to-insert position to element zero in target vector. lvsl is 7192 used for big endianness while lvsr is used for little endianness: 7193 lvs[lr] v1,0,idx. */ 7194 rtx pcvr1 = gen_reg_rtx (V16QImode); 7195 emit_insn (gen_pcvr1 (pcvr1, tmp)); 7196 7197 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0); 7198 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, 7199 pcvr1); 7200 emit_insn (perm1); 7201 7202 /* Insert val into element 0 of target vector. */ 7203 rs6000_expand_vector_set (target, val, const0_rtx); 7204 7205 /* Rotate back with a reversed permutation control vector generated from: 7206 lvs[rl] v2,0,idx. */ 7207 rtx pcvr2 = gen_reg_rtx (V16QImode); 7208 emit_insn (gen_pcvr2 (pcvr2, tmp)); 7209 7210 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target, 7211 pcvr2); 7212 emit_insn (perm2); 7213} 7214 7215/* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX 7216 is variable and also counts by vector element size for p7 & p8. */ 7217 7218static void 7219rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx) 7220{ 7221 machine_mode mode = GET_MODE (target); 7222 7223 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx)); 7224 7225 machine_mode inner_mode = GET_MODE (val); 7226 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode); 7227 7228 int width = GET_MODE_SIZE (inner_mode); 7229 gcc_assert (width >= 1 && width <= 4); 7230 7231 int shift = exact_log2 (width); 7232 7233 machine_mode idx_mode = GET_MODE (idx); 7234 7235 machine_mode shift_mode; 7236 rtx (*gen_ashl)(rtx, rtx, rtx); 7237 rtx (*gen_add)(rtx, rtx, rtx); 7238 rtx (*gen_sub)(rtx, rtx, rtx); 7239 rtx (*gen_lvsl)(rtx, rtx); 7240 7241 if (TARGET_POWERPC64) 7242 { 7243 shift_mode = DImode; 7244 gen_ashl = gen_ashldi3; 7245 gen_add = gen_adddi3; 7246 gen_sub = gen_subdi3; 7247 gen_lvsl = gen_altivec_lvsl_reg_di; 7248 } 7249 else 7250 { 7251 shift_mode = SImode; 7252 gen_ashl = gen_ashlsi3; 7253 gen_add = gen_addsi3; 7254 gen_sub = gen_subsi3; 7255 gen_lvsl = gen_altivec_lvsl_reg_si; 7256 } 7257 7258 /* idx = idx * width. */ 7259 rtx tmp = gen_reg_rtx (shift_mode); 7260 idx = convert_modes (shift_mode, idx_mode, idx, 1); 7261 7262 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift))); 7263 7264 /* For LE: idx = idx + 8. */ 7265 if (!BYTES_BIG_ENDIAN) 7266 emit_insn (gen_add (tmp, tmp, GEN_INT (8))); 7267 else 7268 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp)); 7269 7270 /* lxv vs33, mask. 7271 DImode: 0xffffffffffffffff0000000000000000 7272 SImode: 0x00000000ffffffff0000000000000000 7273 HImode: 0x000000000000ffff0000000000000000. 7274 QImode: 0x00000000000000ff0000000000000000. */ 7275 rtx mask = gen_reg_rtx (V16QImode); 7276 rtx mask_v2di = gen_reg_rtx (V2DImode); 7277 rtvec v = rtvec_alloc (2); 7278 if (!BYTES_BIG_ENDIAN) 7279 { 7280 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0); 7281 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask); 7282 } 7283 else 7284 { 7285 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask); 7286 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0); 7287 } 7288 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v))); 7289 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0); 7290 emit_insn (gen_rtx_SET (mask, sub_mask)); 7291 7292 /* mtvsrd[wz] f0,tmp_val. */ 7293 rtx tmp_val = gen_reg_rtx (SImode); 7294 if (inner_mode == E_SFmode) 7295 if (TARGET_DIRECT_MOVE_64BIT) 7296 emit_insn (gen_movsi_from_sf (tmp_val, val)); 7297 else 7298 { 7299 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true); 7300 emit_insn (gen_movsf_hardfloat (stack, val)); 7301 rtx stack2 = copy_rtx (stack); 7302 PUT_MODE (stack2, SImode); 7303 emit_move_insn (tmp_val, stack2); 7304 } 7305 else 7306 tmp_val = force_reg (SImode, val); 7307 7308 rtx val_v16qi = gen_reg_rtx (V16QImode); 7309 rtx val_v2di = gen_reg_rtx (V2DImode); 7310 rtvec vec_val = rtvec_alloc (2); 7311 if (!BYTES_BIG_ENDIAN) 7312 { 7313 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0); 7314 RTVEC_ELT (vec_val, 1) = tmp_val; 7315 } 7316 else 7317 { 7318 RTVEC_ELT (vec_val, 0) = tmp_val; 7319 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0); 7320 } 7321 emit_insn ( 7322 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val))); 7323 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0); 7324 emit_insn (gen_rtx_SET (val_v16qi, sub_val)); 7325 7326 /* lvsl 13,0,idx. */ 7327 rtx pcv = gen_reg_rtx (V16QImode); 7328 emit_insn (gen_lvsl (pcv, tmp)); 7329 7330 /* vperm 1,1,1,13. */ 7331 /* vperm 0,0,0,13. */ 7332 rtx val_perm = gen_reg_rtx (V16QImode); 7333 rtx mask_perm = gen_reg_rtx (V16QImode); 7334 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv)); 7335 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv)); 7336 7337 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0); 7338 7339 /* xxsel 34,34,32,33. */ 7340 emit_insn ( 7341 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm)); 7342} 7343 7344/* Set field ELT_RTX of TARGET to VAL. */ 7345 7346void 7347rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx) 7348{ 7349 machine_mode mode = GET_MODE (target); 7350 machine_mode inner_mode = GET_MODE_INNER (mode); 7351 rtx reg = gen_reg_rtx (mode); 7352 rtx mask, mem, x; 7353 int width = GET_MODE_SIZE (inner_mode); 7354 int i; 7355 7356 val = force_reg (GET_MODE (val), val); 7357 7358 if (VECTOR_MEM_VSX_P (mode)) 7359 { 7360 if (!CONST_INT_P (elt_rtx)) 7361 { 7362 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi 7363 when elt_rtx is variable. */ 7364 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8) 7365 { 7366 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx); 7367 return; 7368 } 7369 else if (TARGET_VSX) 7370 { 7371 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx); 7372 return; 7373 } 7374 else 7375 gcc_assert (CONST_INT_P (elt_rtx)); 7376 } 7377 7378 rtx insn = NULL_RTX; 7379 7380 if (mode == V2DFmode) 7381 insn = gen_vsx_set_v2df (target, target, val, elt_rtx); 7382 7383 else if (mode == V2DImode) 7384 insn = gen_vsx_set_v2di (target, target, val, elt_rtx); 7385 7386 else if (TARGET_P9_VECTOR && TARGET_POWERPC64) 7387 { 7388 if (mode == V4SImode) 7389 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); 7390 else if (mode == V8HImode) 7391 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); 7392 else if (mode == V16QImode) 7393 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); 7394 else if (mode == V4SFmode) 7395 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx); 7396 } 7397 7398 if (insn) 7399 { 7400 emit_insn (insn); 7401 return; 7402 } 7403 } 7404 7405 /* Simplify setting single element vectors like V1TImode. */ 7406 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) 7407 && INTVAL (elt_rtx) == 0) 7408 { 7409 emit_move_insn (target, gen_lowpart (mode, val)); 7410 return; 7411 } 7412 7413 /* Load single variable value. */ 7414 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); 7415 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); 7416 x = gen_rtx_UNSPEC (VOIDmode, 7417 gen_rtvec (1, const0_rtx), UNSPEC_LVE); 7418 emit_insn (gen_rtx_PARALLEL (VOIDmode, 7419 gen_rtvec (2, 7420 gen_rtx_SET (reg, mem), 7421 x))); 7422 7423 /* Linear sequence. */ 7424 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 7425 for (i = 0; i < 16; ++i) 7426 XVECEXP (mask, 0, i) = GEN_INT (i); 7427 7428 /* Set permute mask to insert element into target. */ 7429 for (i = 0; i < width; ++i) 7430 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10); 7431 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0)); 7432 7433 if (BYTES_BIG_ENDIAN) 7434 x = gen_rtx_UNSPEC (mode, 7435 gen_rtvec (3, target, reg, 7436 force_reg (V16QImode, x)), 7437 UNSPEC_VPERM); 7438 else 7439 { 7440 if (TARGET_P9_VECTOR) 7441 x = gen_rtx_UNSPEC (mode, 7442 gen_rtvec (3, reg, target, 7443 force_reg (V16QImode, x)), 7444 UNSPEC_VPERMR); 7445 else 7446 { 7447 /* Invert selector. We prefer to generate VNAND on P8 so 7448 that future fusion opportunities can kick in, but must 7449 generate VNOR elsewhere. */ 7450 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); 7451 rtx iorx = (TARGET_P8_VECTOR 7452 ? gen_rtx_IOR (V16QImode, notx, notx) 7453 : gen_rtx_AND (V16QImode, notx, notx)); 7454 rtx tmp = gen_reg_rtx (V16QImode); 7455 emit_insn (gen_rtx_SET (tmp, iorx)); 7456 7457 /* Permute with operands reversed and adjusted selector. */ 7458 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), 7459 UNSPEC_VPERM); 7460 } 7461 } 7462 7463 emit_insn (gen_rtx_SET (target, x)); 7464} 7465 7466/* Extract field ELT from VEC into TARGET. */ 7467 7468void 7469rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) 7470{ 7471 machine_mode mode = GET_MODE (vec); 7472 machine_mode inner_mode = GET_MODE_INNER (mode); 7473 rtx mem; 7474 7475 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt)) 7476 { 7477 switch (mode) 7478 { 7479 default: 7480 break; 7481 case E_V1TImode: 7482 emit_move_insn (target, gen_lowpart (TImode, vec)); 7483 break; 7484 case E_V2DFmode: 7485 emit_insn (gen_vsx_extract_v2df (target, vec, elt)); 7486 return; 7487 case E_V2DImode: 7488 emit_insn (gen_vsx_extract_v2di (target, vec, elt)); 7489 return; 7490 case E_V4SFmode: 7491 emit_insn (gen_vsx_extract_v4sf (target, vec, elt)); 7492 return; 7493 case E_V16QImode: 7494 if (TARGET_DIRECT_MOVE_64BIT) 7495 { 7496 emit_insn (gen_vsx_extract_v16qi (target, vec, elt)); 7497 return; 7498 } 7499 else 7500 break; 7501 case E_V8HImode: 7502 if (TARGET_DIRECT_MOVE_64BIT) 7503 { 7504 emit_insn (gen_vsx_extract_v8hi (target, vec, elt)); 7505 return; 7506 } 7507 else 7508 break; 7509 case E_V4SImode: 7510 if (TARGET_DIRECT_MOVE_64BIT) 7511 { 7512 emit_insn (gen_vsx_extract_v4si (target, vec, elt)); 7513 return; 7514 } 7515 break; 7516 } 7517 } 7518 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) 7519 && TARGET_DIRECT_MOVE_64BIT) 7520 { 7521 if (GET_MODE (elt) != DImode) 7522 { 7523 rtx tmp = gen_reg_rtx (DImode); 7524 convert_move (tmp, elt, 0); 7525 elt = tmp; 7526 } 7527 else if (!REG_P (elt)) 7528 elt = force_reg (DImode, elt); 7529 7530 switch (mode) 7531 { 7532 case E_V1TImode: 7533 emit_move_insn (target, gen_lowpart (TImode, vec)); 7534 return; 7535 7536 case E_V2DFmode: 7537 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); 7538 return; 7539 7540 case E_V2DImode: 7541 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); 7542 return; 7543 7544 case E_V4SFmode: 7545 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt)); 7546 return; 7547 7548 case E_V4SImode: 7549 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt)); 7550 return; 7551 7552 case E_V8HImode: 7553 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt)); 7554 return; 7555 7556 case E_V16QImode: 7557 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt)); 7558 return; 7559 7560 default: 7561 gcc_unreachable (); 7562 } 7563 } 7564 7565 /* Allocate mode-sized buffer. */ 7566 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 7567 7568 emit_move_insn (mem, vec); 7569 if (CONST_INT_P (elt)) 7570 { 7571 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode); 7572 7573 /* Add offset to field within buffer matching vector element. */ 7574 mem = adjust_address_nv (mem, inner_mode, 7575 modulo_elt * GET_MODE_SIZE (inner_mode)); 7576 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); 7577 } 7578 else 7579 { 7580 unsigned int ele_size = GET_MODE_SIZE (inner_mode); 7581 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); 7582 rtx new_addr = gen_reg_rtx (Pmode); 7583 7584 elt = gen_rtx_AND (Pmode, elt, num_ele_m1); 7585 if (ele_size > 1) 7586 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size)); 7587 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt); 7588 new_addr = change_address (mem, inner_mode, new_addr); 7589 emit_move_insn (target, new_addr); 7590 } 7591} 7592 7593/* Return the offset within a memory object (MEM) of a vector type to a given 7594 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If 7595 the element is constant, we return a constant integer. 7596 7597 Otherwise, we use a base register temporary to calculate the offset after 7598 masking it to fit within the bounds of the vector and scaling it. The 7599 masking is required by the 64-bit ELF version 2 ABI for the vec_extract 7600 built-in function. */ 7601 7602static rtx 7603get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size) 7604{ 7605 if (CONST_INT_P (element)) 7606 return GEN_INT (INTVAL (element) * scalar_size); 7607 7608 /* All insns should use the 'Q' constraint (address is a single register) if 7609 the element number is not a constant. */ 7610 gcc_assert (satisfies_constraint_Q (mem)); 7611 7612 /* Mask the element to make sure the element number is between 0 and the 7613 maximum number of elements - 1 so that we don't generate an address 7614 outside the vector. */ 7615 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1); 7616 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1); 7617 emit_insn (gen_rtx_SET (base_tmp, and_op)); 7618 7619 /* Shift the element to get the byte offset from the element number. */ 7620 int shift = exact_log2 (scalar_size); 7621 gcc_assert (shift >= 0); 7622 7623 if (shift > 0) 7624 { 7625 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift)); 7626 emit_insn (gen_rtx_SET (base_tmp, shift_op)); 7627 } 7628 7629 return base_tmp; 7630} 7631 7632/* Helper function update PC-relative addresses when we are adjusting a memory 7633 address (ADDR) to a vector to point to a scalar field within the vector with 7634 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can 7635 use the base register temporary (BASE_TMP) to form the address. */ 7636 7637static rtx 7638adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp) 7639{ 7640 rtx new_addr = NULL; 7641 7642 gcc_assert (CONST_INT_P (element_offset)); 7643 7644 if (GET_CODE (addr) == CONST) 7645 addr = XEXP (addr, 0); 7646 7647 if (GET_CODE (addr) == PLUS) 7648 { 7649 rtx op0 = XEXP (addr, 0); 7650 rtx op1 = XEXP (addr, 1); 7651 7652 if (CONST_INT_P (op1)) 7653 { 7654 HOST_WIDE_INT offset 7655 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset); 7656 7657 if (offset == 0) 7658 new_addr = op0; 7659 7660 else 7661 { 7662 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset)); 7663 new_addr = gen_rtx_CONST (Pmode, plus); 7664 } 7665 } 7666 7667 else 7668 { 7669 emit_move_insn (base_tmp, addr); 7670 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 7671 } 7672 } 7673 7674 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) 7675 { 7676 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset); 7677 new_addr = gen_rtx_CONST (Pmode, plus); 7678 } 7679 7680 else 7681 gcc_unreachable (); 7682 7683 return new_addr; 7684} 7685 7686/* Adjust a memory address (MEM) of a vector type to point to a scalar field 7687 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register 7688 temporary (BASE_TMP) to fixup the address. Return the new memory address 7689 that is valid for reads or writes to a given register (SCALAR_REG). 7690 7691 This function is expected to be called after reload is completed when we are 7692 splitting insns. The temporary BASE_TMP might be set multiple times with 7693 this code. */ 7694 7695rtx 7696rs6000_adjust_vec_address (rtx scalar_reg, 7697 rtx mem, 7698 rtx element, 7699 rtx base_tmp, 7700 machine_mode scalar_mode) 7701{ 7702 unsigned scalar_size = GET_MODE_SIZE (scalar_mode); 7703 rtx addr = XEXP (mem, 0); 7704 rtx new_addr; 7705 7706 gcc_assert (!reg_mentioned_p (base_tmp, addr)); 7707 gcc_assert (!reg_mentioned_p (base_tmp, element)); 7708 7709 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ 7710 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); 7711 7712 /* Calculate what we need to add to the address to get the element 7713 address. */ 7714 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size); 7715 7716 /* Create the new address pointing to the element within the vector. If we 7717 are adding 0, we don't have to change the address. */ 7718 if (element_offset == const0_rtx) 7719 new_addr = addr; 7720 7721 /* A simple indirect address can be converted into a reg + offset 7722 address. */ 7723 else if (REG_P (addr) || SUBREG_P (addr)) 7724 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); 7725 7726 /* For references to local static variables, fold a constant offset into the 7727 address. */ 7728 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset)) 7729 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp); 7730 7731 /* Optimize D-FORM addresses with constant offset with a constant element, to 7732 include the element offset in the address directly. */ 7733 else if (GET_CODE (addr) == PLUS) 7734 { 7735 rtx op0 = XEXP (addr, 0); 7736 rtx op1 = XEXP (addr, 1); 7737 7738 gcc_assert (REG_P (op0) || SUBREG_P (op0)); 7739 if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) 7740 { 7741 /* op0 should never be r0, because r0+offset is not valid. But it 7742 doesn't hurt to make sure it is not r0. */ 7743 gcc_assert (reg_or_subregno (op0) != 0); 7744 7745 /* D-FORM address with constant element number. */ 7746 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); 7747 rtx offset_rtx = GEN_INT (offset); 7748 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); 7749 } 7750 else 7751 { 7752 /* If we don't have a D-FORM address with a constant element number, 7753 add the two elements in the current address. Then add the offset. 7754 7755 Previously, we tried to add the offset to OP1 and change the 7756 address to an X-FORM format adding OP0 and BASE_TMP, but it became 7757 complicated because we had to verify that op1 was not GPR0 and we 7758 had a constant element offset (due to the way ADDI is defined). 7759 By doing the add of OP0 and OP1 first, and then adding in the 7760 offset, it has the benefit that if D-FORM instructions are 7761 allowed, the offset is part of the memory access to the vector 7762 element. */ 7763 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1))); 7764 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 7765 } 7766 } 7767 7768 else 7769 { 7770 emit_move_insn (base_tmp, addr); 7771 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 7772 } 7773 7774 /* If the address isn't valid, move the address into the temporary base 7775 register. Some reasons it could not be valid include: 7776 7777 The address offset overflowed the 16 or 34 bit offset size; 7778 We need to use a DS-FORM load, and the bottom 2 bits are non-zero; 7779 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero; 7780 Only X_FORM loads can be done, and the address is D_FORM. */ 7781 7782 enum insn_form iform 7783 = address_to_insn_form (new_addr, scalar_mode, 7784 reg_to_non_prefixed (scalar_reg, scalar_mode)); 7785 7786 if (iform == INSN_FORM_BAD) 7787 { 7788 emit_move_insn (base_tmp, new_addr); 7789 new_addr = base_tmp; 7790 } 7791 7792 return change_address (mem, scalar_mode, new_addr); 7793} 7794 7795/* Split a variable vec_extract operation into the component instructions. */ 7796 7797void 7798rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, 7799 rtx tmp_altivec) 7800{ 7801 machine_mode mode = GET_MODE (src); 7802 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src)); 7803 unsigned scalar_size = GET_MODE_SIZE (scalar_mode); 7804 int byte_shift = exact_log2 (scalar_size); 7805 7806 gcc_assert (byte_shift >= 0); 7807 7808 /* If we are given a memory address, optimize to load just the element. We 7809 don't have to adjust the vector element number on little endian 7810 systems. */ 7811 if (MEM_P (src)) 7812 { 7813 emit_move_insn (dest, 7814 rs6000_adjust_vec_address (dest, src, element, tmp_gpr, 7815 scalar_mode)); 7816 return; 7817 } 7818 7819 else if (REG_P (src) || SUBREG_P (src)) 7820 { 7821 int num_elements = GET_MODE_NUNITS (mode); 7822 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode)); 7823 int bit_shift = 7 - exact_log2 (num_elements); 7824 rtx element2; 7825 unsigned int dest_regno = reg_or_subregno (dest); 7826 unsigned int src_regno = reg_or_subregno (src); 7827 unsigned int element_regno = reg_or_subregno (element); 7828 7829 gcc_assert (REG_P (tmp_gpr)); 7830 7831 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in 7832 a general purpose register. */ 7833 if (TARGET_P9_VECTOR 7834 && (mode == V16QImode || mode == V8HImode || mode == V4SImode) 7835 && INT_REGNO_P (dest_regno) 7836 && ALTIVEC_REGNO_P (src_regno) 7837 && INT_REGNO_P (element_regno)) 7838 { 7839 rtx dest_si = gen_rtx_REG (SImode, dest_regno); 7840 rtx element_si = gen_rtx_REG (SImode, element_regno); 7841 7842 if (mode == V16QImode) 7843 emit_insn (BYTES_BIG_ENDIAN 7844 ? gen_vextublx (dest_si, element_si, src) 7845 : gen_vextubrx (dest_si, element_si, src)); 7846 7847 else if (mode == V8HImode) 7848 { 7849 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); 7850 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); 7851 emit_insn (BYTES_BIG_ENDIAN 7852 ? gen_vextuhlx (dest_si, tmp_gpr_si, src) 7853 : gen_vextuhrx (dest_si, tmp_gpr_si, src)); 7854 } 7855 7856 7857 else 7858 { 7859 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); 7860 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); 7861 emit_insn (BYTES_BIG_ENDIAN 7862 ? gen_vextuwlx (dest_si, tmp_gpr_si, src) 7863 : gen_vextuwrx (dest_si, tmp_gpr_si, src)); 7864 } 7865 7866 return; 7867 } 7868 7869 7870 gcc_assert (REG_P (tmp_altivec)); 7871 7872 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use 7873 an XOR, otherwise we need to subtract. The shift amount is so VSLO 7874 will shift the element into the upper position (adding 3 to convert a 7875 byte shift into a bit shift). */ 7876 if (scalar_size == 8) 7877 { 7878 if (!BYTES_BIG_ENDIAN) 7879 { 7880 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); 7881 element2 = tmp_gpr; 7882 } 7883 else 7884 element2 = element; 7885 7886 /* Generate RLDIC directly to shift left 6 bits and retrieve 1 7887 bit. */ 7888 emit_insn (gen_rtx_SET (tmp_gpr, 7889 gen_rtx_AND (DImode, 7890 gen_rtx_ASHIFT (DImode, 7891 element2, 7892 GEN_INT (6)), 7893 GEN_INT (64)))); 7894 } 7895 else 7896 { 7897 if (!BYTES_BIG_ENDIAN) 7898 { 7899 rtx num_ele_m1 = GEN_INT (num_elements - 1); 7900 7901 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); 7902 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); 7903 element2 = tmp_gpr; 7904 } 7905 else 7906 element2 = element; 7907 7908 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); 7909 } 7910 7911 /* Get the value into the lower byte of the Altivec register where VSLO 7912 expects it. */ 7913 if (TARGET_P9_VECTOR) 7914 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); 7915 else if (can_create_pseudo_p ()) 7916 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); 7917 else 7918 { 7919 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7920 emit_move_insn (tmp_di, tmp_gpr); 7921 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); 7922 } 7923 7924 /* Do the VSLO to get the value into the final location. */ 7925 switch (mode) 7926 { 7927 case E_V2DFmode: 7928 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); 7929 return; 7930 7931 case E_V2DImode: 7932 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); 7933 return; 7934 7935 case E_V4SFmode: 7936 { 7937 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7938 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec)); 7939 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); 7940 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, 7941 tmp_altivec)); 7942 7943 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf)); 7944 return; 7945 } 7946 7947 case E_V4SImode: 7948 case E_V8HImode: 7949 case E_V16QImode: 7950 { 7951 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7952 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); 7953 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest)); 7954 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, 7955 tmp_altivec)); 7956 emit_move_insn (tmp_gpr_di, tmp_altivec_di); 7957 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di, 7958 GEN_INT (64 - bits_in_element))); 7959 return; 7960 } 7961 7962 default: 7963 gcc_unreachable (); 7964 } 7965 7966 return; 7967 } 7968 else 7969 gcc_unreachable (); 7970 } 7971 7972/* Return alignment of TYPE. Existing alignment is ALIGN. HOW 7973 selects whether the alignment is abi mandated, optional, or 7974 both abi and optional alignment. */ 7975 7976unsigned int 7977rs6000_data_alignment (tree type, unsigned int align, enum data_align how) 7978{ 7979 if (how != align_opt) 7980 { 7981 if (TREE_CODE (type) == VECTOR_TYPE && align < 128) 7982 align = 128; 7983 } 7984 7985 if (how != align_abi) 7986 { 7987 if (TREE_CODE (type) == ARRAY_TYPE 7988 && TYPE_MODE (TREE_TYPE (type)) == QImode) 7989 { 7990 if (align < BITS_PER_WORD) 7991 align = BITS_PER_WORD; 7992 } 7993 } 7994 7995 return align; 7996} 7997 7998/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory 7999 instructions simply ignore the low bits; VSX memory instructions 8000 are aligned to 4 or 8 bytes. */ 8001 8002static bool 8003rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) 8004{ 8005 return (STRICT_ALIGNMENT 8006 || (!TARGET_EFFICIENT_UNALIGNED_VSX 8007 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) 8008 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)) 8009 && (int) align < VECTOR_ALIGN (mode))))); 8010} 8011 8012/* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */ 8013 8014unsigned int 8015rs6000_special_adjust_field_align (tree type, unsigned int computed) 8016{ 8017 if (computed <= 32 || TYPE_PACKED (type)) 8018 return computed; 8019 8020 /* Strip initial arrays. */ 8021 while (TREE_CODE (type) == ARRAY_TYPE) 8022 type = TREE_TYPE (type); 8023 8024 /* If RECORD or UNION, recursively find the first field. */ 8025 while (AGGREGATE_TYPE_P (type)) 8026 { 8027 tree field = TYPE_FIELDS (type); 8028 8029 /* Skip all non field decls */ 8030 while (field != NULL 8031 && (TREE_CODE (field) != FIELD_DECL 8032 || DECL_FIELD_ABI_IGNORED (field))) 8033 field = DECL_CHAIN (field); 8034 8035 if (! field) 8036 break; 8037 8038 /* A packed field does not contribute any extra alignment. */ 8039 if (DECL_PACKED (field)) 8040 return computed; 8041 8042 type = TREE_TYPE (field); 8043 8044 /* Strip arrays. */ 8045 while (TREE_CODE (type) == ARRAY_TYPE) 8046 type = TREE_TYPE (type); 8047 } 8048 8049 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node 8050 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode)) 8051 computed = MIN (computed, 32); 8052 8053 return computed; 8054} 8055 8056/* AIX increases natural record alignment to doubleword if the innermost first 8057 field is an FP double while the FP fields remain word aligned. 8058 Only called if TYPE initially is a RECORD or UNION. */ 8059 8060unsigned int 8061rs6000_special_round_type_align (tree type, unsigned int computed, 8062 unsigned int specified) 8063{ 8064 unsigned int align = MAX (computed, specified); 8065 8066 if (TYPE_PACKED (type) || align >= 64) 8067 return align; 8068 8069 /* If RECORD or UNION, recursively find the first field. */ 8070 do 8071 { 8072 tree field = TYPE_FIELDS (type); 8073 8074 /* Skip all non field decls */ 8075 while (field != NULL 8076 && (TREE_CODE (field) != FIELD_DECL 8077 || DECL_FIELD_ABI_IGNORED (field))) 8078 field = DECL_CHAIN (field); 8079 8080 if (! field) 8081 break; 8082 8083 /* A packed field does not contribute any extra alignment. */ 8084 if (DECL_PACKED (field)) 8085 return align; 8086 8087 type = TREE_TYPE (field); 8088 8089 /* Strip arrays. */ 8090 while (TREE_CODE (type) == ARRAY_TYPE) 8091 type = TREE_TYPE (type); 8092 } while (AGGREGATE_TYPE_P (type)); 8093 8094 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node 8095 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode)) 8096 align = MAX (align, 64); 8097 8098 return align; 8099} 8100 8101/* Darwin increases record alignment to the natural alignment of 8102 the first field. */ 8103 8104unsigned int 8105darwin_rs6000_special_round_type_align (tree type, unsigned int computed, 8106 unsigned int specified) 8107{ 8108 unsigned int align = MAX (computed, specified); 8109 8110 if (TYPE_PACKED (type)) 8111 return align; 8112 8113 /* Find the first field, looking down into aggregates. */ 8114 do { 8115 tree field = TYPE_FIELDS (type); 8116 /* Skip all non field decls */ 8117 while (field != NULL 8118 && (TREE_CODE (field) != FIELD_DECL 8119 || DECL_FIELD_ABI_IGNORED (field))) 8120 field = DECL_CHAIN (field); 8121 if (! field) 8122 break; 8123 /* A packed field does not contribute any extra alignment. */ 8124 if (DECL_PACKED (field)) 8125 return align; 8126 type = TREE_TYPE (field); 8127 while (TREE_CODE (type) == ARRAY_TYPE) 8128 type = TREE_TYPE (type); 8129 } while (AGGREGATE_TYPE_P (type)); 8130 8131 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node) 8132 align = MAX (align, TYPE_ALIGN (type)); 8133 8134 return align; 8135} 8136 8137/* Return 1 for an operand in small memory on V.4/eabi. */ 8138 8139int 8140small_data_operand (rtx op ATTRIBUTE_UNUSED, 8141 machine_mode mode ATTRIBUTE_UNUSED) 8142{ 8143#if TARGET_ELF 8144 rtx sym_ref; 8145 8146 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA) 8147 return 0; 8148 8149 if (DEFAULT_ABI != ABI_V4) 8150 return 0; 8151 8152 if (SYMBOL_REF_P (op)) 8153 sym_ref = op; 8154 8155 else if (GET_CODE (op) != CONST 8156 || GET_CODE (XEXP (op, 0)) != PLUS 8157 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0)) 8158 || !CONST_INT_P (XEXP (XEXP (op, 0), 1))) 8159 return 0; 8160 8161 else 8162 { 8163 rtx sum = XEXP (op, 0); 8164 HOST_WIDE_INT summand; 8165 8166 /* We have to be careful here, because it is the referenced address 8167 that must be 32k from _SDA_BASE_, not just the symbol. */ 8168 summand = INTVAL (XEXP (sum, 1)); 8169 if (summand < 0 || summand > g_switch_value) 8170 return 0; 8171 8172 sym_ref = XEXP (sum, 0); 8173 } 8174 8175 return SYMBOL_REF_SMALL_P (sym_ref); 8176#else 8177 return 0; 8178#endif 8179} 8180 8181/* Return true if either operand is a general purpose register. */ 8182 8183bool 8184gpr_or_gpr_p (rtx op0, rtx op1) 8185{ 8186 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0))) 8187 || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); 8188} 8189 8190/* Return true if this is a move direct operation between GPR registers and 8191 floating point/VSX registers. */ 8192 8193bool 8194direct_move_p (rtx op0, rtx op1) 8195{ 8196 if (!REG_P (op0) || !REG_P (op1)) 8197 return false; 8198 8199 if (!TARGET_DIRECT_MOVE) 8200 return false; 8201 8202 int regno0 = REGNO (op0); 8203 int regno1 = REGNO (op1); 8204 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1)) 8205 return false; 8206 8207 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1)) 8208 return true; 8209 8210 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1)) 8211 return true; 8212 8213 return false; 8214} 8215 8216/* Return true if the ADDR is an acceptable address for a quad memory 8217 operation of mode MODE (either LQ/STQ for general purpose registers, or 8218 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address 8219 is intended for LQ/STQ. If it is false, the address is intended for the ISA 8220 3.0 LXV/STXV instruction. */ 8221 8222bool 8223quad_address_p (rtx addr, machine_mode mode, bool strict) 8224{ 8225 rtx op0, op1; 8226 8227 if (GET_MODE_SIZE (mode) < 16) 8228 return false; 8229 8230 if (legitimate_indirect_address_p (addr, strict)) 8231 return true; 8232 8233 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode)) 8234 return false; 8235 8236 /* Is this a valid prefixed address? If the bottom four bits of the offset 8237 are non-zero, we could use a prefixed instruction (which does not have the 8238 DQ-form constraint that the traditional instruction had) instead of 8239 forcing the unaligned offset to a GPR. */ 8240 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ)) 8241 return true; 8242 8243 if (GET_CODE (addr) != PLUS) 8244 return false; 8245 8246 op0 = XEXP (addr, 0); 8247 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict)) 8248 return false; 8249 8250 op1 = XEXP (addr, 1); 8251 if (!CONST_INT_P (op1)) 8252 return false; 8253 8254 return quad_address_offset_p (INTVAL (op1)); 8255} 8256 8257/* Return true if this is a load or store quad operation. This function does 8258 not handle the atomic quad memory instructions. */ 8259 8260bool 8261quad_load_store_p (rtx op0, rtx op1) 8262{ 8263 bool ret; 8264 8265 if (!TARGET_QUAD_MEMORY) 8266 ret = false; 8267 8268 else if (REG_P (op0) && MEM_P (op1)) 8269 ret = (quad_int_reg_operand (op0, GET_MODE (op0)) 8270 && quad_memory_operand (op1, GET_MODE (op1)) 8271 && !reg_overlap_mentioned_p (op0, op1)); 8272 8273 else if (MEM_P (op0) && REG_P (op1)) 8274 ret = (quad_memory_operand (op0, GET_MODE (op0)) 8275 && quad_int_reg_operand (op1, GET_MODE (op1))); 8276 8277 else 8278 ret = false; 8279 8280 if (TARGET_DEBUG_ADDR) 8281 { 8282 fprintf (stderr, "\n========== quad_load_store, return %s\n", 8283 ret ? "true" : "false"); 8284 debug_rtx (gen_rtx_SET (op0, op1)); 8285 } 8286 8287 return ret; 8288} 8289 8290/* Given an address, return a constant offset term if one exists. */ 8291 8292static rtx 8293address_offset (rtx op) 8294{ 8295 if (GET_CODE (op) == PRE_INC 8296 || GET_CODE (op) == PRE_DEC) 8297 op = XEXP (op, 0); 8298 else if (GET_CODE (op) == PRE_MODIFY 8299 || GET_CODE (op) == LO_SUM) 8300 op = XEXP (op, 1); 8301 8302 if (GET_CODE (op) == CONST) 8303 op = XEXP (op, 0); 8304 8305 if (GET_CODE (op) == PLUS) 8306 op = XEXP (op, 1); 8307 8308 if (CONST_INT_P (op)) 8309 return op; 8310 8311 return NULL_RTX; 8312} 8313 8314/* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for 8315 the mode. If we can't find (or don't know) the alignment of the symbol 8316 we assume (optimistically) that it's sufficiently aligned [??? maybe we 8317 should be pessimistic]. Offsets are validated in the same way as for 8318 reg + offset. */ 8319static bool 8320darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode) 8321{ 8322 /* We should not get here with this. */ 8323 gcc_checking_assert (! mode_supports_dq_form (mode)); 8324 8325 if (GET_CODE (x) == CONST) 8326 x = XEXP (x, 0); 8327 8328 /* If we are building PIC code, then any symbol must be wrapped in an 8329 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */ 8330 bool machopic_offs_p = false; 8331 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) 8332 { 8333 x = XVECEXP (x, 0, 0); 8334 machopic_offs_p = true; 8335 } 8336 8337 rtx sym = NULL_RTX; 8338 unsigned HOST_WIDE_INT offset = 0; 8339 8340 if (GET_CODE (x) == PLUS) 8341 { 8342 sym = XEXP (x, 0); 8343 if (! SYMBOL_REF_P (sym)) 8344 return false; 8345 if (!CONST_INT_P (XEXP (x, 1))) 8346 return false; 8347 offset = INTVAL (XEXP (x, 1)); 8348 } 8349 else if (SYMBOL_REF_P (x)) 8350 sym = x; 8351 else if (CONST_INT_P (x)) 8352 offset = INTVAL (x); 8353 else if (GET_CODE (x) == LABEL_REF) 8354 offset = 0; // We assume code labels are Pmode aligned 8355 else 8356 return false; // not sure what we have here. 8357 8358 /* If we don't know the alignment of the thing to which the symbol refers, 8359 we assume optimistically it is "enough". 8360 ??? maybe we should be pessimistic instead. */ 8361 unsigned align = 0; 8362 8363 if (sym) 8364 { 8365 tree decl = SYMBOL_REF_DECL (sym); 8366 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */ 8367 if (TARGET_MACHO && flag_pic && !machopic_offs_p) 8368 return false; 8369#if TARGET_MACHO 8370 if (MACHO_SYMBOL_INDIRECTION_P (sym)) 8371 /* The decl in an indirection symbol is the original one, which might 8372 be less aligned than the indirection. Our indirections are always 8373 pointer-aligned. */ 8374 ; 8375 else 8376#endif 8377 if (decl && DECL_ALIGN (decl)) 8378 align = DECL_ALIGN_UNIT (decl); 8379 } 8380 8381 unsigned int extra = 0; 8382 switch (mode) 8383 { 8384 case E_DFmode: 8385 case E_DDmode: 8386 case E_DImode: 8387 /* If we are using VSX scalar loads, restrict ourselves to reg+reg 8388 addressing. */ 8389 if (VECTOR_MEM_VSX_P (mode)) 8390 return false; 8391 8392 if (!TARGET_POWERPC64) 8393 extra = 4; 8394 else if ((offset & 3) || (align & 3)) 8395 return false; 8396 break; 8397 8398 case E_TFmode: 8399 case E_IFmode: 8400 case E_KFmode: 8401 case E_TDmode: 8402 case E_TImode: 8403 case E_PTImode: 8404 extra = 8; 8405 if (!TARGET_POWERPC64) 8406 extra = 12; 8407 else if ((offset & 3) || (align & 3)) 8408 return false; 8409 break; 8410 8411 default: 8412 break; 8413 } 8414 8415 /* We only care if the access(es) would cause a change to the high part. */ 8416 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 8417 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 8418} 8419 8420/* Return true if the MEM operand is a memory operand suitable for use 8421 with a (full width, possibly multiple) gpr load/store. On 8422 powerpc64 this means the offset must be divisible by 4. 8423 Implements 'Y' constraint. 8424 8425 Accept direct, indexed, offset, lo_sum and tocref. Since this is 8426 a constraint function we know the operand has satisfied a suitable 8427 memory predicate. 8428 8429 Offsetting a lo_sum should not be allowed, except where we know by 8430 alignment that a 32k boundary is not crossed. Note that by 8431 "offsetting" here we mean a further offset to access parts of the 8432 MEM. It's fine to have a lo_sum where the inner address is offset 8433 from a sym, since the same sym+offset will appear in the high part 8434 of the address calculation. */ 8435 8436bool 8437mem_operand_gpr (rtx op, machine_mode mode) 8438{ 8439 unsigned HOST_WIDE_INT offset; 8440 int extra; 8441 rtx addr = XEXP (op, 0); 8442 8443 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */ 8444 if (TARGET_UPDATE 8445 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 8446 && mode_supports_pre_incdec_p (mode) 8447 && legitimate_indirect_address_p (XEXP (addr, 0), false)) 8448 return true; 8449 8450 /* Allow prefixed instructions if supported. If the bottom two bits of the 8451 offset are non-zero, we could use a prefixed instruction (which does not 8452 have the DS-form constraint that the traditional instruction had) instead 8453 of forcing the unaligned offset to a GPR. */ 8454 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS)) 8455 return true; 8456 8457 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is 8458 really OK. Doing this early avoids teaching all the other machinery 8459 about them. */ 8460 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM) 8461 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode); 8462 8463 /* Only allow offsettable addresses. See PRs 83969 and 84279. */ 8464 if (!rs6000_offsettable_memref_p (op, mode, false)) 8465 return false; 8466 8467 op = address_offset (addr); 8468 if (op == NULL_RTX) 8469 return true; 8470 8471 offset = INTVAL (op); 8472 if (TARGET_POWERPC64 && (offset & 3) != 0) 8473 return false; 8474 8475 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; 8476 if (extra < 0) 8477 extra = 0; 8478 8479 if (GET_CODE (addr) == LO_SUM) 8480 /* For lo_sum addresses, we must allow any offset except one that 8481 causes a wrap, so test only the low 16 bits. */ 8482 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 8483 8484 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 8485} 8486 8487/* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr, 8488 enforce an offset divisible by 4 even for 32-bit. */ 8489 8490bool 8491mem_operand_ds_form (rtx op, machine_mode mode) 8492{ 8493 unsigned HOST_WIDE_INT offset; 8494 int extra; 8495 rtx addr = XEXP (op, 0); 8496 8497 /* Allow prefixed instructions if supported. If the bottom two bits of the 8498 offset are non-zero, we could use a prefixed instruction (which does not 8499 have the DS-form constraint that the traditional instruction had) instead 8500 of forcing the unaligned offset to a GPR. */ 8501 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS)) 8502 return true; 8503 8504 if (!offsettable_address_p (false, mode, addr)) 8505 return false; 8506 8507 op = address_offset (addr); 8508 if (op == NULL_RTX) 8509 return true; 8510 8511 offset = INTVAL (op); 8512 if ((offset & 3) != 0) 8513 return false; 8514 8515 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; 8516 if (extra < 0) 8517 extra = 0; 8518 8519 if (GET_CODE (addr) == LO_SUM) 8520 /* For lo_sum addresses, we must allow any offset except one that 8521 causes a wrap, so test only the low 16 bits. */ 8522 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 8523 8524 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 8525} 8526 8527/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */ 8528 8529static bool 8530reg_offset_addressing_ok_p (machine_mode mode) 8531{ 8532 switch (mode) 8533 { 8534 case E_V16QImode: 8535 case E_V8HImode: 8536 case E_V4SFmode: 8537 case E_V4SImode: 8538 case E_V2DFmode: 8539 case E_V2DImode: 8540 case E_V1TImode: 8541 case E_TImode: 8542 case E_TFmode: 8543 case E_KFmode: 8544 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the 8545 ISA 3.0 vector d-form addressing mode was added. While TImode is not 8546 a vector mode, if we want to use the VSX registers to move it around, 8547 we need to restrict ourselves to reg+reg addressing. Similarly for 8548 IEEE 128-bit floating point that is passed in a single vector 8549 register. */ 8550 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) 8551 return mode_supports_dq_form (mode); 8552 break; 8553 8554 /* The vector pair/quad types support offset addressing if the 8555 underlying vectors support offset addressing. */ 8556 case E_OOmode: 8557 case E_XOmode: 8558 return TARGET_MMA; 8559 8560 case E_SDmode: 8561 /* If we can do direct load/stores of SDmode, restrict it to reg+reg 8562 addressing for the LFIWZX and STFIWX instructions. */ 8563 if (TARGET_NO_SDMODE_STACK) 8564 return false; 8565 break; 8566 8567 default: 8568 break; 8569 } 8570 8571 return true; 8572} 8573 8574static bool 8575virtual_stack_registers_memory_p (rtx op) 8576{ 8577 int regnum; 8578 8579 if (REG_P (op)) 8580 regnum = REGNO (op); 8581 8582 else if (GET_CODE (op) == PLUS 8583 && REG_P (XEXP (op, 0)) 8584 && CONST_INT_P (XEXP (op, 1))) 8585 regnum = REGNO (XEXP (op, 0)); 8586 8587 else 8588 return false; 8589 8590 return (regnum >= FIRST_VIRTUAL_REGISTER 8591 && regnum <= LAST_VIRTUAL_POINTER_REGISTER); 8592} 8593 8594/* Return true if a MODE sized memory accesses to OP plus OFFSET 8595 is known to not straddle a 32k boundary. This function is used 8596 to determine whether -mcmodel=medium code can use TOC pointer 8597 relative addressing for OP. This means the alignment of the TOC 8598 pointer must also be taken into account, and unfortunately that is 8599 only 8 bytes. */ 8600 8601#ifndef POWERPC64_TOC_POINTER_ALIGNMENT 8602#define POWERPC64_TOC_POINTER_ALIGNMENT 8 8603#endif 8604 8605static bool 8606offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, 8607 machine_mode mode) 8608{ 8609 tree decl; 8610 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; 8611 8612 if (!SYMBOL_REF_P (op)) 8613 return false; 8614 8615 /* ISA 3.0 vector d-form addressing is restricted, don't allow 8616 SYMBOL_REF. */ 8617 if (mode_supports_dq_form (mode)) 8618 return false; 8619 8620 dsize = GET_MODE_SIZE (mode); 8621 decl = SYMBOL_REF_DECL (op); 8622 if (!decl) 8623 { 8624 if (dsize == 0) 8625 return false; 8626 8627 /* -fsection-anchors loses the original SYMBOL_REF_DECL when 8628 replacing memory addresses with an anchor plus offset. We 8629 could find the decl by rummaging around in the block->objects 8630 VEC for the given offset but that seems like too much work. */ 8631 dalign = BITS_PER_UNIT; 8632 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) 8633 && SYMBOL_REF_ANCHOR_P (op) 8634 && SYMBOL_REF_BLOCK (op) != NULL) 8635 { 8636 struct object_block *block = SYMBOL_REF_BLOCK (op); 8637 8638 dalign = block->alignment; 8639 offset += SYMBOL_REF_BLOCK_OFFSET (op); 8640 } 8641 else if (CONSTANT_POOL_ADDRESS_P (op)) 8642 { 8643 /* It would be nice to have get_pool_align().. */ 8644 machine_mode cmode = get_pool_mode (op); 8645 8646 dalign = GET_MODE_ALIGNMENT (cmode); 8647 } 8648 } 8649 else if (DECL_P (decl)) 8650 { 8651 dalign = DECL_ALIGN (decl); 8652 8653 if (dsize == 0) 8654 { 8655 /* Allow BLKmode when the entire object is known to not 8656 cross a 32k boundary. */ 8657 if (!DECL_SIZE_UNIT (decl)) 8658 return false; 8659 8660 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl))) 8661 return false; 8662 8663 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl)); 8664 if (dsize > 32768) 8665 return false; 8666 8667 dalign /= BITS_PER_UNIT; 8668 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) 8669 dalign = POWERPC64_TOC_POINTER_ALIGNMENT; 8670 return dalign >= dsize; 8671 } 8672 } 8673 else 8674 gcc_unreachable (); 8675 8676 /* Find how many bits of the alignment we know for this access. */ 8677 dalign /= BITS_PER_UNIT; 8678 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) 8679 dalign = POWERPC64_TOC_POINTER_ALIGNMENT; 8680 mask = dalign - 1; 8681 lsb = offset & -offset; 8682 mask &= lsb - 1; 8683 dalign = mask + 1; 8684 8685 return dalign >= dsize; 8686} 8687 8688static bool 8689constant_pool_expr_p (rtx op) 8690{ 8691 rtx base, offset; 8692 8693 split_const (op, &base, &offset); 8694 return (SYMBOL_REF_P (base) 8695 && CONSTANT_POOL_ADDRESS_P (base) 8696 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode)); 8697} 8698 8699/* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null, 8700 use that as the register to put the HIGH value into if register allocation 8701 is already done. */ 8702 8703rtx 8704create_TOC_reference (rtx symbol, rtx largetoc_reg) 8705{ 8706 rtx tocrel, tocreg, hi; 8707 8708 gcc_assert (TARGET_TOC); 8709 8710 if (TARGET_DEBUG_ADDR) 8711 { 8712 if (SYMBOL_REF_P (symbol)) 8713 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", 8714 XSTR (symbol, 0)); 8715 else 8716 { 8717 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", 8718 GET_RTX_NAME (GET_CODE (symbol))); 8719 debug_rtx (symbol); 8720 } 8721 } 8722 8723 if (!can_create_pseudo_p ()) 8724 df_set_regs_ever_live (TOC_REGISTER, true); 8725 8726 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER); 8727 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL); 8728 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ()) 8729 return tocrel; 8730 8731 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel)); 8732 if (largetoc_reg != NULL) 8733 { 8734 emit_move_insn (largetoc_reg, hi); 8735 hi = largetoc_reg; 8736 } 8737 return gen_rtx_LO_SUM (Pmode, hi, tocrel); 8738} 8739 8740/* These are only used to pass through from print_operand/print_operand_address 8741 to rs6000_output_addr_const_extra over the intervening function 8742 output_addr_const which is not target code. */ 8743static const_rtx tocrel_base_oac, tocrel_offset_oac; 8744 8745/* Return true if OP is a toc pointer relative address (the output 8746 of create_TOC_reference). If STRICT, do not match non-split 8747 -mcmodel=large/medium toc pointer relative addresses. If the pointers 8748 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and 8749 TOCREL_OFFSET_RET respectively. */ 8750 8751bool 8752toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret, 8753 const_rtx *tocrel_offset_ret) 8754{ 8755 if (!TARGET_TOC) 8756 return false; 8757 8758 if (TARGET_CMODEL != CMODEL_SMALL) 8759 { 8760 /* When strict ensure we have everything tidy. */ 8761 if (strict 8762 && !(GET_CODE (op) == LO_SUM 8763 && REG_P (XEXP (op, 0)) 8764 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))) 8765 return false; 8766 8767 /* When not strict, allow non-split TOC addresses and also allow 8768 (lo_sum (high ..)) TOC addresses created during reload. */ 8769 if (GET_CODE (op) == LO_SUM) 8770 op = XEXP (op, 1); 8771 } 8772 8773 const_rtx tocrel_base = op; 8774 const_rtx tocrel_offset = const0_rtx; 8775 8776 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op))) 8777 { 8778 tocrel_base = XEXP (op, 0); 8779 tocrel_offset = XEXP (op, 1); 8780 } 8781 8782 if (tocrel_base_ret) 8783 *tocrel_base_ret = tocrel_base; 8784 if (tocrel_offset_ret) 8785 *tocrel_offset_ret = tocrel_offset; 8786 8787 return (GET_CODE (tocrel_base) == UNSPEC 8788 && XINT (tocrel_base, 1) == UNSPEC_TOCREL 8789 && REG_P (XVECEXP (tocrel_base, 0, 1)) 8790 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER); 8791} 8792 8793/* Return true if X is a constant pool address, and also for cmodel=medium 8794 if X is a toc-relative address known to be offsettable within MODE. */ 8795 8796bool 8797legitimate_constant_pool_address_p (const_rtx x, machine_mode mode, 8798 bool strict) 8799{ 8800 const_rtx tocrel_base, tocrel_offset; 8801 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset) 8802 && (TARGET_CMODEL != CMODEL_MEDIUM 8803 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0)) 8804 || mode == QImode 8805 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0), 8806 INTVAL (tocrel_offset), mode))); 8807} 8808 8809static bool 8810legitimate_small_data_p (machine_mode mode, rtx x) 8811{ 8812 return (DEFAULT_ABI == ABI_V4 8813 && !flag_pic && !TARGET_TOC 8814 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST) 8815 && small_data_operand (x, mode)); 8816} 8817 8818bool 8819rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, 8820 bool strict, bool worst_case) 8821{ 8822 unsigned HOST_WIDE_INT offset; 8823 unsigned int extra; 8824 8825 if (GET_CODE (x) != PLUS) 8826 return false; 8827 if (!REG_P (XEXP (x, 0))) 8828 return false; 8829 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) 8830 return false; 8831 if (mode_supports_dq_form (mode)) 8832 return quad_address_p (x, mode, strict); 8833 if (!reg_offset_addressing_ok_p (mode)) 8834 return virtual_stack_registers_memory_p (x); 8835 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) 8836 return true; 8837 if (!CONST_INT_P (XEXP (x, 1))) 8838 return false; 8839 8840 offset = INTVAL (XEXP (x, 1)); 8841 extra = 0; 8842 switch (mode) 8843 { 8844 case E_DFmode: 8845 case E_DDmode: 8846 case E_DImode: 8847 /* If we are using VSX scalar loads, restrict ourselves to reg+reg 8848 addressing. */ 8849 if (VECTOR_MEM_VSX_P (mode)) 8850 return false; 8851 8852 if (!worst_case) 8853 break; 8854 if (!TARGET_POWERPC64) 8855 extra = 4; 8856 else if (offset & 3) 8857 return false; 8858 break; 8859 8860 case E_TFmode: 8861 case E_IFmode: 8862 case E_KFmode: 8863 case E_TDmode: 8864 case E_TImode: 8865 case E_PTImode: 8866 extra = 8; 8867 if (!worst_case) 8868 break; 8869 if (!TARGET_POWERPC64) 8870 extra = 12; 8871 else if (offset & 3) 8872 return false; 8873 break; 8874 8875 default: 8876 break; 8877 } 8878 8879 if (TARGET_PREFIXED) 8880 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra); 8881 else 8882 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 8883} 8884 8885bool 8886legitimate_indexed_address_p (rtx x, int strict) 8887{ 8888 rtx op0, op1; 8889 8890 if (GET_CODE (x) != PLUS) 8891 return false; 8892 8893 op0 = XEXP (x, 0); 8894 op1 = XEXP (x, 1); 8895 8896 return (REG_P (op0) && REG_P (op1) 8897 && ((INT_REG_OK_FOR_BASE_P (op0, strict) 8898 && INT_REG_OK_FOR_INDEX_P (op1, strict)) 8899 || (INT_REG_OK_FOR_BASE_P (op1, strict) 8900 && INT_REG_OK_FOR_INDEX_P (op0, strict)))); 8901} 8902 8903bool 8904avoiding_indexed_address_p (machine_mode mode) 8905{ 8906 unsigned int msize = GET_MODE_SIZE (mode); 8907 8908 /* Avoid indexed addressing for modes that have non-indexed load/store 8909 instruction forms. On power10, vector pairs have an indexed 8910 form, but vector quads don't. */ 8911 if (msize > 16) 8912 return msize != 32; 8913 8914 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode)); 8915} 8916 8917bool 8918legitimate_indirect_address_p (rtx x, int strict) 8919{ 8920 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict); 8921} 8922 8923bool 8924macho_lo_sum_memory_operand (rtx x, machine_mode mode) 8925{ 8926 if (!TARGET_MACHO || !flag_pic 8927 || mode != SImode || !MEM_P (x)) 8928 return false; 8929 x = XEXP (x, 0); 8930 8931 if (GET_CODE (x) != LO_SUM) 8932 return false; 8933 if (!REG_P (XEXP (x, 0))) 8934 return false; 8935 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0)) 8936 return false; 8937 x = XEXP (x, 1); 8938 8939 return CONSTANT_P (x); 8940} 8941 8942static bool 8943legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) 8944{ 8945 if (GET_CODE (x) != LO_SUM) 8946 return false; 8947 if (!REG_P (XEXP (x, 0))) 8948 return false; 8949 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) 8950 return false; 8951 /* quad word addresses are restricted, and we can't use LO_SUM. */ 8952 if (mode_supports_dq_form (mode)) 8953 return false; 8954 x = XEXP (x, 1); 8955 8956 if (TARGET_ELF) 8957 { 8958 bool large_toc_ok; 8959 8960 if (DEFAULT_ABI == ABI_V4 && flag_pic) 8961 return false; 8962 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls 8963 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS 8964 recognizes some LO_SUM addresses as valid although this 8965 function says opposite. In most cases, LRA through different 8966 transformations can generate correct code for address reloads. 8967 It cannot manage only some LO_SUM cases. So we need to add 8968 code here saying that some addresses are still valid. */ 8969 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL 8970 && small_toc_ref (x, VOIDmode)); 8971 if (TARGET_TOC && ! large_toc_ok) 8972 return false; 8973 if (GET_MODE_NUNITS (mode) != 1) 8974 return false; 8975 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 8976 && !(/* ??? Assume floating point reg based on mode? */ 8977 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 8978 return false; 8979 8980 return CONSTANT_P (x) || large_toc_ok; 8981 } 8982 else if (TARGET_MACHO) 8983 { 8984 if (GET_MODE_NUNITS (mode) != 1) 8985 return false; 8986 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 8987 && !(/* see above */ 8988 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 8989 return false; 8990#if TARGET_MACHO 8991 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic) 8992 return CONSTANT_P (x); 8993#endif 8994 /* Macho-O PIC code from here. */ 8995 if (GET_CODE (x) == CONST) 8996 x = XEXP (x, 0); 8997 8998 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */ 8999 if (SYMBOL_REF_P (x)) 9000 return false; 9001 9002 /* So this is OK if the wrapped object is const. */ 9003 if (GET_CODE (x) == UNSPEC 9004 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) 9005 return CONSTANT_P (XVECEXP (x, 0, 0)); 9006 return CONSTANT_P (x); 9007 } 9008 return false; 9009} 9010 9011 9012/* Try machine-dependent ways of modifying an illegitimate address 9013 to be legitimate. If we find one, return the new, valid address. 9014 This is used from only one place: `memory_address' in explow.cc. 9015 9016 OLDX is the address as it was before break_out_memory_refs was 9017 called. In some cases it is useful to look at this to decide what 9018 needs to be done. 9019 9020 It is always safe for this function to do nothing. It exists to 9021 recognize opportunities to optimize the output. 9022 9023 On RS/6000, first check for the sum of a register with a constant 9024 integer that is out of range. If so, generate code to add the 9025 constant with the low-order 16 bits masked to the register and force 9026 this result into another register (this can be done with `cau'). 9027 Then generate an address of REG+(CONST&0xffff), allowing for the 9028 possibility of bit 16 being a one. 9029 9030 Then check for the sum of a register and something not constant, try to 9031 load the other things into a register and return the sum. */ 9032 9033static rtx 9034rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 9035 machine_mode mode) 9036{ 9037 unsigned int extra; 9038 9039 if (!reg_offset_addressing_ok_p (mode) 9040 || mode_supports_dq_form (mode)) 9041 { 9042 if (virtual_stack_registers_memory_p (x)) 9043 return x; 9044 9045 /* In theory we should not be seeing addresses of the form reg+0, 9046 but just in case it is generated, optimize it away. */ 9047 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) 9048 return force_reg (Pmode, XEXP (x, 0)); 9049 9050 /* For TImode with load/store quad, restrict addresses to just a single 9051 pointer, so it works with both GPRs and VSX registers. */ 9052 /* Make sure both operands are registers. */ 9053 else if (GET_CODE (x) == PLUS 9054 && (mode != TImode || !TARGET_VSX)) 9055 return gen_rtx_PLUS (Pmode, 9056 force_reg (Pmode, XEXP (x, 0)), 9057 force_reg (Pmode, XEXP (x, 1))); 9058 else 9059 return force_reg (Pmode, x); 9060 } 9061 if (SYMBOL_REF_P (x) && !TARGET_MACHO) 9062 { 9063 enum tls_model model = SYMBOL_REF_TLS_MODEL (x); 9064 if (model != 0) 9065 return rs6000_legitimize_tls_address (x, model); 9066 } 9067 9068 extra = 0; 9069 switch (mode) 9070 { 9071 case E_TFmode: 9072 case E_TDmode: 9073 case E_TImode: 9074 case E_PTImode: 9075 case E_IFmode: 9076 case E_KFmode: 9077 /* As in legitimate_offset_address_p we do not assume 9078 worst-case. The mode here is just a hint as to the registers 9079 used. A TImode is usually in gprs, but may actually be in 9080 fprs. Leave worst-case scenario for reload to handle via 9081 insn constraints. PTImode is only GPRs. */ 9082 extra = 8; 9083 break; 9084 default: 9085 break; 9086 } 9087 9088 if (GET_CODE (x) == PLUS 9089 && REG_P (XEXP (x, 0)) 9090 && CONST_INT_P (XEXP (x, 1)) 9091 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) 9092 >= 0x10000 - extra)) 9093 { 9094 HOST_WIDE_INT high_int, low_int; 9095 rtx sum; 9096 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000; 9097 if (low_int >= 0x8000 - extra) 9098 low_int = 0; 9099 high_int = INTVAL (XEXP (x, 1)) - low_int; 9100 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), 9101 gen_int_mode (high_int, Pmode)), 0); 9102 return plus_constant (Pmode, sum, low_int); 9103 } 9104 else if (GET_CODE (x) == PLUS 9105 && REG_P (XEXP (x, 0)) 9106 && !CONST_INT_P (XEXP (x, 1)) 9107 && GET_MODE_NUNITS (mode) == 1 9108 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD 9109 || (/* ??? Assume floating point reg based on mode? */ 9110 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 9111 && !avoiding_indexed_address_p (mode)) 9112 { 9113 return gen_rtx_PLUS (Pmode, XEXP (x, 0), 9114 force_reg (Pmode, force_operand (XEXP (x, 1), 0))); 9115 } 9116 else if ((TARGET_ELF 9117#if TARGET_MACHO 9118 || !MACHO_DYNAMIC_NO_PIC_P 9119#endif 9120 ) 9121 && TARGET_32BIT 9122 && TARGET_NO_TOC_OR_PCREL 9123 && !flag_pic 9124 && !CONST_INT_P (x) 9125 && !CONST_WIDE_INT_P (x) 9126 && !CONST_DOUBLE_P (x) 9127 && CONSTANT_P (x) 9128 && GET_MODE_NUNITS (mode) == 1 9129 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD 9130 || (/* ??? Assume floating point reg based on mode? */ 9131 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))) 9132 { 9133 rtx reg = gen_reg_rtx (Pmode); 9134 if (TARGET_ELF) 9135 emit_insn (gen_elf_high (reg, x)); 9136 else 9137 emit_insn (gen_macho_high (Pmode, reg, x)); 9138 return gen_rtx_LO_SUM (Pmode, reg, x); 9139 } 9140 else if (TARGET_TOC 9141 && SYMBOL_REF_P (x) 9142 && constant_pool_expr_p (x) 9143 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode)) 9144 return create_TOC_reference (x, NULL_RTX); 9145 else 9146 return x; 9147} 9148 9149/* Debug version of rs6000_legitimize_address. */ 9150static rtx 9151rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) 9152{ 9153 rtx ret; 9154 rtx_insn *insns; 9155 9156 start_sequence (); 9157 ret = rs6000_legitimize_address (x, oldx, mode); 9158 insns = get_insns (); 9159 end_sequence (); 9160 9161 if (ret != x) 9162 { 9163 fprintf (stderr, 9164 "\nrs6000_legitimize_address: mode %s, old code %s, " 9165 "new code %s, modified\n", 9166 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)), 9167 GET_RTX_NAME (GET_CODE (ret))); 9168 9169 fprintf (stderr, "Original address:\n"); 9170 debug_rtx (x); 9171 9172 fprintf (stderr, "oldx:\n"); 9173 debug_rtx (oldx); 9174 9175 fprintf (stderr, "New address:\n"); 9176 debug_rtx (ret); 9177 9178 if (insns) 9179 { 9180 fprintf (stderr, "Insns added:\n"); 9181 debug_rtx_list (insns, 20); 9182 } 9183 } 9184 else 9185 { 9186 fprintf (stderr, 9187 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n", 9188 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x))); 9189 9190 debug_rtx (x); 9191 } 9192 9193 if (insns) 9194 emit_insn (insns); 9195 9196 return ret; 9197} 9198 9199/* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. 9200 We need to emit DTP-relative relocations. */ 9201 9202static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 9203static void 9204rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x) 9205{ 9206 switch (size) 9207 { 9208 case 4: 9209 fputs ("\t.long\t", file); 9210 break; 9211 case 8: 9212 fputs (DOUBLE_INT_ASM_OP, file); 9213 break; 9214 default: 9215 gcc_unreachable (); 9216 } 9217 output_addr_const (file, x); 9218 if (TARGET_ELF) 9219 fputs ("@dtprel+0x8000", file); 9220} 9221 9222/* Return true if X is a symbol that refers to real (rather than emulated) 9223 TLS. */ 9224 9225static bool 9226rs6000_real_tls_symbol_ref_p (rtx x) 9227{ 9228 return (SYMBOL_REF_P (x) 9229 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL); 9230} 9231 9232/* In the name of slightly smaller debug output, and to cater to 9233 general assembler lossage, recognize various UNSPEC sequences 9234 and turn them back into a direct symbol reference. */ 9235 9236static rtx 9237rs6000_delegitimize_address (rtx orig_x) 9238{ 9239 rtx x, y, offset; 9240 9241 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It 9242 encodes loading up the high part of the address of a TOC reference along 9243 with a load of a GPR using the same base register used for the load. We 9244 return the original SYMBOL_REF. 9245 9246 (set (reg:INT1 <reg> 9247 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR))) 9248 9249 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These 9250 UNSPECs include the external SYMBOL_REF along with the value being loaded. 9251 We return the original SYMBOL_REF. 9252 9253 (parallel [(set (reg:DI <base-reg>) 9254 (unspec:DI [(symbol_ref <symbol>) 9255 (const_int <marker>)] 9256 UNSPEC_PCREL_OPT_LD_ADDR)) 9257 (set (reg:DI <load-reg>) 9258 (unspec:DI [(const_int 0)] 9259 UNSPEC_PCREL_OPT_LD_DATA))]) 9260 9261 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the 9262 GPR being loaded is the same as the GPR used to hold the external address. 9263 9264 (set (reg:DI <base-reg>) 9265 (unspec:DI [(symbol_ref <symbol>) 9266 (const_int <marker>)] 9267 UNSPEC_PCREL_OPT_LD_SAME_REG)) 9268 9269 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This 9270 UNSPEC include the external SYMBOL_REF along with the value being loaded. 9271 We return the original SYMBOL_REF. 9272 9273 (parallel [(set (reg:DI <base-reg>) 9274 (unspec:DI [(symbol_ref <symbol>) 9275 (const_int <marker>)] 9276 UNSPEC_PCREL_OPT_ST_ADDR)) 9277 (use (reg <store-reg>))]) */ 9278 9279 if (GET_CODE (orig_x) == UNSPEC) 9280 switch (XINT (orig_x, 1)) 9281 { 9282 case UNSPEC_FUSION_GPR: 9283 case UNSPEC_PCREL_OPT_LD_ADDR: 9284 case UNSPEC_PCREL_OPT_LD_SAME_REG: 9285 case UNSPEC_PCREL_OPT_ST_ADDR: 9286 orig_x = XVECEXP (orig_x, 0, 0); 9287 break; 9288 9289 default: 9290 break; 9291 } 9292 9293 orig_x = delegitimize_mem_from_attrs (orig_x); 9294 9295 x = orig_x; 9296 if (MEM_P (x)) 9297 x = XEXP (x, 0); 9298 9299 y = x; 9300 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM) 9301 y = XEXP (y, 1); 9302 9303 offset = NULL_RTX; 9304 if (GET_CODE (y) == PLUS 9305 && GET_MODE (y) == Pmode 9306 && CONST_INT_P (XEXP (y, 1))) 9307 { 9308 offset = XEXP (y, 1); 9309 y = XEXP (y, 0); 9310 } 9311 9312 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL) 9313 { 9314 y = XVECEXP (y, 0, 0); 9315 9316#ifdef HAVE_AS_TLS 9317 /* Do not associate thread-local symbols with the original 9318 constant pool symbol. */ 9319 if (TARGET_XCOFF 9320 && SYMBOL_REF_P (y) 9321 && CONSTANT_POOL_ADDRESS_P (y) 9322 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y))) 9323 return orig_x; 9324#endif 9325 9326 if (offset != NULL_RTX) 9327 y = gen_rtx_PLUS (Pmode, y, offset); 9328 if (!MEM_P (orig_x)) 9329 return y; 9330 else 9331 return replace_equiv_address_nv (orig_x, y); 9332 } 9333 9334 if (TARGET_MACHO 9335 && GET_CODE (orig_x) == LO_SUM 9336 && GET_CODE (XEXP (orig_x, 1)) == CONST) 9337 { 9338 y = XEXP (XEXP (orig_x, 1), 0); 9339 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET) 9340 return XVECEXP (y, 0, 0); 9341 } 9342 9343 return orig_x; 9344} 9345 9346/* Return true if X shouldn't be emitted into the debug info. 9347 The linker doesn't like .toc section references from 9348 .debug_* sections, so reject .toc section symbols. */ 9349 9350static bool 9351rs6000_const_not_ok_for_debug_p (rtx x) 9352{ 9353 if (GET_CODE (x) == UNSPEC) 9354 return true; 9355 if (SYMBOL_REF_P (x) 9356 && CONSTANT_POOL_ADDRESS_P (x)) 9357 { 9358 rtx c = get_pool_constant (x); 9359 machine_mode cmode = get_pool_mode (x); 9360 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode)) 9361 return true; 9362 } 9363 9364 return false; 9365} 9366 9367/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ 9368 9369static bool 9370rs6000_legitimate_combined_insn (rtx_insn *insn) 9371{ 9372 int icode = INSN_CODE (insn); 9373 9374 /* Reject creating doloop insns. Combine should not be allowed 9375 to create these for a number of reasons: 9376 1) In a nested loop, if combine creates one of these in an 9377 outer loop and the register allocator happens to allocate ctr 9378 to the outer loop insn, then the inner loop can't use ctr. 9379 Inner loops ought to be more highly optimized. 9380 2) Combine often wants to create one of these from what was 9381 originally a three insn sequence, first combining the three 9382 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not 9383 allocated ctr, the splitter takes use back to the three insn 9384 sequence. It's better to stop combine at the two insn 9385 sequence. 9386 3) Faced with not being able to allocate ctr for ctrsi/crtdi 9387 insns, the register allocator sometimes uses floating point 9388 or vector registers for the pseudo. Since ctrsi/ctrdi is a 9389 jump insn and output reloads are not implemented for jumps, 9390 the ctrsi/ctrdi splitters need to handle all possible cases. 9391 That's a pain, and it gets to be seriously difficult when a 9392 splitter that runs after reload needs memory to transfer from 9393 a gpr to fpr. See PR70098 and PR71763 which are not fixed 9394 for the difficult case. It's better to not create problems 9395 in the first place. */ 9396 if (icode != CODE_FOR_nothing 9397 && (icode == CODE_FOR_bdz_si 9398 || icode == CODE_FOR_bdz_di 9399 || icode == CODE_FOR_bdnz_si 9400 || icode == CODE_FOR_bdnz_di 9401 || icode == CODE_FOR_bdztf_si 9402 || icode == CODE_FOR_bdztf_di 9403 || icode == CODE_FOR_bdnztf_si 9404 || icode == CODE_FOR_bdnztf_di)) 9405 return false; 9406 9407 return true; 9408} 9409 9410/* Construct the SYMBOL_REF for the tls_get_addr function. */ 9411 9412static GTY(()) rtx rs6000_tls_symbol; 9413static rtx 9414rs6000_tls_get_addr (void) 9415{ 9416 if (!rs6000_tls_symbol) 9417 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr"); 9418 9419 return rs6000_tls_symbol; 9420} 9421 9422/* Construct the SYMBOL_REF for TLS GOT references. */ 9423 9424static GTY(()) rtx rs6000_got_symbol; 9425rtx 9426rs6000_got_sym (void) 9427{ 9428 if (!rs6000_got_symbol) 9429 { 9430 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 9431 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL; 9432 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL; 9433 } 9434 9435 return rs6000_got_symbol; 9436} 9437 9438/* AIX Thread-Local Address support. */ 9439 9440static rtx 9441rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model) 9442{ 9443 rtx sym, mem, tocref, tlsreg, tmpreg, dest; 9444 const char *name; 9445 char *tlsname; 9446 9447 /* Place addr into TOC constant pool. */ 9448 sym = force_const_mem (GET_MODE (addr), addr); 9449 9450 /* Output the TOC entry and create the MEM referencing the value. */ 9451 if (constant_pool_expr_p (XEXP (sym, 0)) 9452 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode)) 9453 { 9454 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX); 9455 mem = gen_const_mem (Pmode, tocref); 9456 set_mem_alias_set (mem, get_TOC_alias_set ()); 9457 } 9458 else 9459 return sym; 9460 9461 /* Use global-dynamic for local-dynamic. */ 9462 if (model == TLS_MODEL_GLOBAL_DYNAMIC 9463 || model == TLS_MODEL_LOCAL_DYNAMIC) 9464 { 9465 /* Create new TOC reference for @m symbol. */ 9466 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0); 9467 tlsname = XALLOCAVEC (char, strlen (name) + 1); 9468 strcpy (tlsname, "*LCM"); 9469 strcat (tlsname, name + 3); 9470 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname)); 9471 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL; 9472 tocref = create_TOC_reference (modaddr, NULL_RTX); 9473 rtx modmem = gen_const_mem (Pmode, tocref); 9474 set_mem_alias_set (modmem, get_TOC_alias_set ()); 9475 9476 rtx modreg = gen_reg_rtx (Pmode); 9477 emit_insn (gen_rtx_SET (modreg, modmem)); 9478 9479 tmpreg = gen_reg_rtx (Pmode); 9480 emit_insn (gen_rtx_SET (tmpreg, mem)); 9481 9482 dest = gen_reg_rtx (Pmode); 9483 if (TARGET_32BIT) 9484 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg)); 9485 else 9486 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg)); 9487 return dest; 9488 } 9489 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */ 9490 else if (TARGET_32BIT) 9491 { 9492 tlsreg = gen_reg_rtx (SImode); 9493 emit_insn (gen_tls_get_tpointer (tlsreg)); 9494 } 9495 else 9496 { 9497 tlsreg = gen_rtx_REG (DImode, 13); 9498 xcoff_tls_exec_model_detected = true; 9499 } 9500 9501 /* Load the TOC value into temporary register. */ 9502 tmpreg = gen_reg_rtx (Pmode); 9503 emit_insn (gen_rtx_SET (tmpreg, mem)); 9504 set_unique_reg_note (get_last_insn (), REG_EQUAL, 9505 gen_rtx_MINUS (Pmode, addr, tlsreg)); 9506 9507 /* Add TOC symbol value to TLS pointer. */ 9508 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg)); 9509 9510 return dest; 9511} 9512 9513/* Passes the tls arg value for global dynamic and local dynamic 9514 emit_library_call_value in rs6000_legitimize_tls_address to 9515 rs6000_call_aix and rs6000_call_sysv. This is used to emit the 9516 marker relocs put on __tls_get_addr calls. */ 9517static rtx global_tlsarg; 9518 9519/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 9520 this (thread-local) address. */ 9521 9522static rtx 9523rs6000_legitimize_tls_address (rtx addr, enum tls_model model) 9524{ 9525 rtx dest, insn; 9526 9527 if (TARGET_XCOFF) 9528 return rs6000_legitimize_tls_address_aix (addr, model); 9529 9530 dest = gen_reg_rtx (Pmode); 9531 if (model == TLS_MODEL_LOCAL_EXEC 9532 && (rs6000_tls_size == 16 || rs6000_pcrel_p ())) 9533 { 9534 rtx tlsreg; 9535 9536 if (TARGET_64BIT) 9537 { 9538 tlsreg = gen_rtx_REG (Pmode, 13); 9539 insn = gen_tls_tprel_64 (dest, tlsreg, addr); 9540 } 9541 else 9542 { 9543 tlsreg = gen_rtx_REG (Pmode, 2); 9544 insn = gen_tls_tprel_32 (dest, tlsreg, addr); 9545 } 9546 emit_insn (insn); 9547 } 9548 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32) 9549 { 9550 rtx tlsreg, tmp; 9551 9552 tmp = gen_reg_rtx (Pmode); 9553 if (TARGET_64BIT) 9554 { 9555 tlsreg = gen_rtx_REG (Pmode, 13); 9556 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr); 9557 } 9558 else 9559 { 9560 tlsreg = gen_rtx_REG (Pmode, 2); 9561 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr); 9562 } 9563 emit_insn (insn); 9564 if (TARGET_64BIT) 9565 insn = gen_tls_tprel_lo_64 (dest, tmp, addr); 9566 else 9567 insn = gen_tls_tprel_lo_32 (dest, tmp, addr); 9568 emit_insn (insn); 9569 } 9570 else 9571 { 9572 rtx got, tga, tmp1, tmp2; 9573 9574 /* We currently use relocations like @got@tlsgd for tls, which 9575 means the linker will handle allocation of tls entries, placing 9576 them in the .got section. So use a pointer to the .got section, 9577 not one to secondary TOC sections used by 64-bit -mminimal-toc, 9578 or to secondary GOT sections used by 32-bit -fPIC. */ 9579 if (rs6000_pcrel_p ()) 9580 got = const0_rtx; 9581 else if (TARGET_64BIT) 9582 got = gen_rtx_REG (Pmode, 2); 9583 else 9584 { 9585 if (flag_pic == 1) 9586 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 9587 else 9588 { 9589 rtx gsym = rs6000_got_sym (); 9590 got = gen_reg_rtx (Pmode); 9591 if (flag_pic == 0) 9592 rs6000_emit_move (got, gsym, Pmode); 9593 else 9594 { 9595 rtx mem, lab; 9596 9597 tmp1 = gen_reg_rtx (Pmode); 9598 tmp2 = gen_reg_rtx (Pmode); 9599 mem = gen_const_mem (Pmode, tmp1); 9600 lab = gen_label_rtx (); 9601 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab)); 9602 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); 9603 if (TARGET_LINK_STACK) 9604 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4))); 9605 emit_move_insn (tmp2, mem); 9606 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2)); 9607 set_unique_reg_note (last, REG_EQUAL, gsym); 9608 } 9609 } 9610 } 9611 9612 if (model == TLS_MODEL_GLOBAL_DYNAMIC) 9613 { 9614 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got), 9615 UNSPEC_TLSGD); 9616 tga = rs6000_tls_get_addr (); 9617 rtx argreg = gen_rtx_REG (Pmode, 3); 9618 emit_insn (gen_rtx_SET (argreg, arg)); 9619 global_tlsarg = arg; 9620 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode); 9621 global_tlsarg = NULL_RTX; 9622 9623 /* Make a note so that the result of this call can be CSEd. */ 9624 rtvec vec = gen_rtvec (1, copy_rtx (arg)); 9625 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR); 9626 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns); 9627 } 9628 else if (model == TLS_MODEL_LOCAL_DYNAMIC) 9629 { 9630 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD); 9631 tga = rs6000_tls_get_addr (); 9632 tmp1 = gen_reg_rtx (Pmode); 9633 rtx argreg = gen_rtx_REG (Pmode, 3); 9634 emit_insn (gen_rtx_SET (argreg, arg)); 9635 global_tlsarg = arg; 9636 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode); 9637 global_tlsarg = NULL_RTX; 9638 9639 /* Make a note so that the result of this call can be CSEd. */ 9640 rtvec vec = gen_rtvec (1, copy_rtx (arg)); 9641 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR); 9642 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns); 9643 9644 if (rs6000_tls_size == 16 || rs6000_pcrel_p ()) 9645 { 9646 if (TARGET_64BIT) 9647 insn = gen_tls_dtprel_64 (dest, tmp1, addr); 9648 else 9649 insn = gen_tls_dtprel_32 (dest, tmp1, addr); 9650 } 9651 else if (rs6000_tls_size == 32) 9652 { 9653 tmp2 = gen_reg_rtx (Pmode); 9654 if (TARGET_64BIT) 9655 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr); 9656 else 9657 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr); 9658 emit_insn (insn); 9659 if (TARGET_64BIT) 9660 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr); 9661 else 9662 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr); 9663 } 9664 else 9665 { 9666 tmp2 = gen_reg_rtx (Pmode); 9667 if (TARGET_64BIT) 9668 insn = gen_tls_got_dtprel_64 (tmp2, got, addr); 9669 else 9670 insn = gen_tls_got_dtprel_32 (tmp2, got, addr); 9671 emit_insn (insn); 9672 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1)); 9673 } 9674 emit_insn (insn); 9675 } 9676 else 9677 { 9678 /* IE, or 64-bit offset LE. */ 9679 tmp2 = gen_reg_rtx (Pmode); 9680 if (TARGET_64BIT) 9681 insn = gen_tls_got_tprel_64 (tmp2, got, addr); 9682 else 9683 insn = gen_tls_got_tprel_32 (tmp2, got, addr); 9684 emit_insn (insn); 9685 if (rs6000_pcrel_p ()) 9686 { 9687 if (TARGET_64BIT) 9688 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr); 9689 else 9690 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr); 9691 } 9692 else if (TARGET_64BIT) 9693 insn = gen_tls_tls_64 (dest, tmp2, addr); 9694 else 9695 insn = gen_tls_tls_32 (dest, tmp2, addr); 9696 emit_insn (insn); 9697 } 9698 } 9699 9700 return dest; 9701} 9702 9703/* Only create the global variable for the stack protect guard if we are using 9704 the global flavor of that guard. */ 9705static tree 9706rs6000_init_stack_protect_guard (void) 9707{ 9708 if (rs6000_stack_protector_guard == SSP_GLOBAL) 9709 return default_stack_protect_guard (); 9710 9711 return NULL_TREE; 9712} 9713 9714/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 9715 9716static bool 9717rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 9718{ 9719 if (GET_CODE (x) == HIGH 9720 && GET_CODE (XEXP (x, 0)) == UNSPEC) 9721 return true; 9722 9723 /* A TLS symbol in the TOC cannot contain a sum. */ 9724 if (GET_CODE (x) == CONST 9725 && GET_CODE (XEXP (x, 0)) == PLUS 9726 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0)) 9727 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0) 9728 return true; 9729 9730 /* Allow AIX TOC TLS symbols in the constant pool, 9731 but not ELF TLS symbols. */ 9732 return TARGET_ELF && tls_referenced_p (x); 9733} 9734 9735/* Return true iff the given SYMBOL_REF refers to a constant pool entry 9736 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF 9737 can be addressed relative to the toc pointer. */ 9738 9739static bool 9740use_toc_relative_ref (rtx sym, machine_mode mode) 9741{ 9742 return ((constant_pool_expr_p (sym) 9743 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), 9744 get_pool_mode (sym))) 9745 || (TARGET_CMODEL == CMODEL_MEDIUM 9746 && SYMBOL_REF_LOCAL_P (sym) 9747 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT)); 9748} 9749 9750/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression 9751 that is a valid memory address for an instruction. 9752 The MODE argument is the machine mode for the MEM expression 9753 that wants to use this address. 9754 9755 On the RS/6000, there are four valid address: a SYMBOL_REF that 9756 refers to a constant pool entry of an address (or the sum of it 9757 plus a constant), a short (16-bit signed) constant plus a register, 9758 the sum of two registers, or a register indirect, possibly with an 9759 auto-increment. For DFmode, DDmode and DImode with a constant plus 9760 register, we must ensure that both words are addressable or PowerPC64 9761 with offset word aligned. 9762 9763 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs, 9764 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used 9765 because adjacent memory cells are accessed by adding word-sized offsets 9766 during assembly output. */ 9767static bool 9768rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict) 9769{ 9770 bool reg_offset_p = reg_offset_addressing_ok_p (mode); 9771 bool quad_offset_p = mode_supports_dq_form (mode); 9772 9773 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x)) 9774 return 0; 9775 9776 /* Handle unaligned altivec lvx/stvx type addresses. */ 9777 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) 9778 && GET_CODE (x) == AND 9779 && CONST_INT_P (XEXP (x, 1)) 9780 && INTVAL (XEXP (x, 1)) == -16) 9781 { 9782 x = XEXP (x, 0); 9783 return (legitimate_indirect_address_p (x, reg_ok_strict) 9784 || legitimate_indexed_address_p (x, reg_ok_strict) 9785 || virtual_stack_registers_memory_p (x)); 9786 } 9787 9788 if (legitimate_indirect_address_p (x, reg_ok_strict)) 9789 return 1; 9790 if (TARGET_UPDATE 9791 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) 9792 && mode_supports_pre_incdec_p (mode) 9793 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) 9794 return 1; 9795 9796 /* Handle prefixed addresses (PC-relative or 34-bit offset). */ 9797 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT)) 9798 return 1; 9799 9800 /* Handle restricted vector d-form offsets in ISA 3.0. */ 9801 if (quad_offset_p) 9802 { 9803 if (quad_address_p (x, mode, reg_ok_strict)) 9804 return 1; 9805 } 9806 else if (virtual_stack_registers_memory_p (x)) 9807 return 1; 9808 9809 else if (reg_offset_p) 9810 { 9811 if (legitimate_small_data_p (mode, x)) 9812 return 1; 9813 if (legitimate_constant_pool_address_p (x, mode, 9814 reg_ok_strict || lra_in_progress)) 9815 return 1; 9816 } 9817 9818 /* For TImode, if we have TImode in VSX registers, only allow register 9819 indirect addresses. This will allow the values to go in either GPRs 9820 or VSX registers without reloading. The vector types would tend to 9821 go into VSX registers, so we allow REG+REG, while TImode seems 9822 somewhat split, in that some uses are GPR based, and some VSX based. */ 9823 /* FIXME: We could loosen this by changing the following to 9824 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX) 9825 but currently we cannot allow REG+REG addressing for TImode. See 9826 PR72827 for complete details on how this ends up hoodwinking DSE. */ 9827 if (mode == TImode && TARGET_VSX) 9828 return 0; 9829 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ 9830 if (! reg_ok_strict 9831 && reg_offset_p 9832 && GET_CODE (x) == PLUS 9833 && REG_P (XEXP (x, 0)) 9834 && (XEXP (x, 0) == virtual_stack_vars_rtx 9835 || XEXP (x, 0) == arg_pointer_rtx) 9836 && CONST_INT_P (XEXP (x, 1))) 9837 return 1; 9838 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false)) 9839 return 1; 9840 if (!FLOAT128_2REG_P (mode) 9841 && (TARGET_HARD_FLOAT 9842 || TARGET_POWERPC64 9843 || (mode != DFmode && mode != DDmode)) 9844 && (TARGET_POWERPC64 || mode != DImode) 9845 && (mode != TImode || VECTOR_MEM_VSX_P (TImode)) 9846 && mode != PTImode 9847 && !avoiding_indexed_address_p (mode) 9848 && legitimate_indexed_address_p (x, reg_ok_strict)) 9849 return 1; 9850 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY 9851 && mode_supports_pre_modify_p (mode) 9852 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) 9853 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), 9854 reg_ok_strict, false) 9855 || (!avoiding_indexed_address_p (mode) 9856 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict))) 9857 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) 9858 { 9859 /* There is no prefixed version of the load/store with update. */ 9860 rtx addr = XEXP (x, 1); 9861 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT); 9862 } 9863 if (reg_offset_p && !quad_offset_p 9864 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict)) 9865 return 1; 9866 return 0; 9867} 9868 9869/* Debug version of rs6000_legitimate_address_p. */ 9870static bool 9871rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, 9872 bool reg_ok_strict) 9873{ 9874 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); 9875 fprintf (stderr, 9876 "\nrs6000_legitimate_address_p: return = %s, mode = %s, " 9877 "strict = %d, reload = %s, code = %s\n", 9878 ret ? "true" : "false", 9879 GET_MODE_NAME (mode), 9880 reg_ok_strict, 9881 (reload_completed ? "after" : "before"), 9882 GET_RTX_NAME (GET_CODE (x))); 9883 debug_rtx (x); 9884 9885 return ret; 9886} 9887 9888/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */ 9889 9890static bool 9891rs6000_mode_dependent_address_p (const_rtx addr, 9892 addr_space_t as ATTRIBUTE_UNUSED) 9893{ 9894 return rs6000_mode_dependent_address_ptr (addr); 9895} 9896 9897/* Go to LABEL if ADDR (a legitimate address expression) 9898 has an effect that depends on the machine mode it is used for. 9899 9900 On the RS/6000 this is true of all integral offsets (since AltiVec 9901 and VSX modes don't allow them) or is a pre-increment or decrement. 9902 9903 ??? Except that due to conceptual problems in offsettable_address_p 9904 we can't really report the problems of integral offsets. So leave 9905 this assuming that the adjustable offset must be valid for the 9906 sub-words of a TFmode operand, which is what we had before. */ 9907 9908static bool 9909rs6000_mode_dependent_address (const_rtx addr) 9910{ 9911 switch (GET_CODE (addr)) 9912 { 9913 case PLUS: 9914 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx 9915 is considered a legitimate address before reload, so there 9916 are no offset restrictions in that case. Note that this 9917 condition is safe in strict mode because any address involving 9918 virtual_stack_vars_rtx or arg_pointer_rtx would already have 9919 been rejected as illegitimate. */ 9920 if (XEXP (addr, 0) != virtual_stack_vars_rtx 9921 && XEXP (addr, 0) != arg_pointer_rtx 9922 && CONST_INT_P (XEXP (addr, 1))) 9923 { 9924 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); 9925 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12; 9926 if (TARGET_PREFIXED) 9927 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra); 9928 else 9929 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra); 9930 } 9931 break; 9932 9933 case LO_SUM: 9934 /* Anything in the constant pool is sufficiently aligned that 9935 all bytes have the same high part address. */ 9936 return !legitimate_constant_pool_address_p (addr, QImode, false); 9937 9938 /* Auto-increment cases are now treated generically in recog.cc. */ 9939 case PRE_MODIFY: 9940 return TARGET_UPDATE; 9941 9942 /* AND is only allowed in Altivec loads. */ 9943 case AND: 9944 return true; 9945 9946 default: 9947 break; 9948 } 9949 9950 return false; 9951} 9952 9953/* Debug version of rs6000_mode_dependent_address. */ 9954static bool 9955rs6000_debug_mode_dependent_address (const_rtx addr) 9956{ 9957 bool ret = rs6000_mode_dependent_address (addr); 9958 9959 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n", 9960 ret ? "true" : "false"); 9961 debug_rtx (addr); 9962 9963 return ret; 9964} 9965 9966/* Implement FIND_BASE_TERM. */ 9967 9968rtx 9969rs6000_find_base_term (rtx op) 9970{ 9971 rtx base; 9972 9973 base = op; 9974 if (GET_CODE (base) == CONST) 9975 base = XEXP (base, 0); 9976 if (GET_CODE (base) == PLUS) 9977 base = XEXP (base, 0); 9978 if (GET_CODE (base) == UNSPEC) 9979 switch (XINT (base, 1)) 9980 { 9981 case UNSPEC_TOCREL: 9982 case UNSPEC_MACHOPIC_OFFSET: 9983 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term 9984 for aliasing purposes. */ 9985 return XVECEXP (base, 0, 0); 9986 } 9987 9988 return op; 9989} 9990 9991/* More elaborate version of recog's offsettable_memref_p predicate 9992 that works around the ??? note of rs6000_mode_dependent_address. 9993 In particular it accepts 9994 9995 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8]))) 9996 9997 in 32-bit mode, that the recog predicate rejects. */ 9998 9999static bool 10000rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict) 10001{ 10002 bool worst_case; 10003 10004 if (!MEM_P (op)) 10005 return false; 10006 10007 /* First mimic offsettable_memref_p. */ 10008 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0))) 10009 return true; 10010 10011 /* offsettable_address_p invokes rs6000_mode_dependent_address, but 10012 the latter predicate knows nothing about the mode of the memory 10013 reference and, therefore, assumes that it is the largest supported 10014 mode (TFmode). As a consequence, legitimate offsettable memory 10015 references are rejected. rs6000_legitimate_offset_address_p contains 10016 the correct logic for the PLUS case of rs6000_mode_dependent_address, 10017 at least with a little bit of help here given that we know the 10018 actual registers used. */ 10019 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT) 10020 || GET_MODE_SIZE (reg_mode) == 4); 10021 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), 10022 strict, worst_case); 10023} 10024 10025/* Determine the reassociation width to be used in reassociate_bb. 10026 This takes into account how many parallel operations we 10027 can actually do of a given type, and also the latency. 10028 P8: 10029 int add/sub 6/cycle 10030 mul 2/cycle 10031 vect add/sub/mul 2/cycle 10032 fp add/sub/mul 2/cycle 10033 dfp 1/cycle 10034*/ 10035 10036static int 10037rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, 10038 machine_mode mode) 10039{ 10040 switch (rs6000_tune) 10041 { 10042 case PROCESSOR_POWER8: 10043 case PROCESSOR_POWER9: 10044 case PROCESSOR_POWER10: 10045 if (DECIMAL_FLOAT_MODE_P (mode)) 10046 return 1; 10047 if (VECTOR_MODE_P (mode)) 10048 return 4; 10049 if (INTEGRAL_MODE_P (mode)) 10050 return 1; 10051 if (FLOAT_MODE_P (mode)) 10052 return 4; 10053 break; 10054 default: 10055 break; 10056 } 10057 return 1; 10058} 10059 10060/* Change register usage conditional on target flags. */ 10061static void 10062rs6000_conditional_register_usage (void) 10063{ 10064 int i; 10065 10066 if (TARGET_DEBUG_TARGET) 10067 fprintf (stderr, "rs6000_conditional_register_usage called\n"); 10068 10069 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */ 10070 if (TARGET_64BIT) 10071 fixed_regs[13] = call_used_regs[13] = 1; 10072 10073 /* Conditionally disable FPRs. */ 10074 if (TARGET_SOFT_FLOAT) 10075 for (i = 32; i < 64; i++) 10076 fixed_regs[i] = call_used_regs[i] = 1; 10077 10078 /* The TOC register is not killed across calls in a way that is 10079 visible to the compiler. */ 10080 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 10081 call_used_regs[2] = 0; 10082 10083 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2) 10084 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 10085 10086 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1) 10087 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] 10088 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 10089 10090 if (DEFAULT_ABI == ABI_DARWIN && flag_pic) 10091 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] 10092 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 10093 10094 if (TARGET_TOC && TARGET_MINIMAL_TOC) 10095 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 10096 10097 if (!TARGET_ALTIVEC && !TARGET_VSX) 10098 { 10099 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) 10100 fixed_regs[i] = call_used_regs[i] = 1; 10101 call_used_regs[VRSAVE_REGNO] = 1; 10102 } 10103 10104 if (TARGET_ALTIVEC || TARGET_VSX) 10105 global_regs[VSCR_REGNO] = 1; 10106 10107 if (TARGET_ALTIVEC_ABI) 10108 { 10109 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i) 10110 call_used_regs[i] = 1; 10111 10112 /* AIX reserves VR20:31 in non-extended ABI mode. */ 10113 if (TARGET_XCOFF && !rs6000_aix_extabi) 10114 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i) 10115 fixed_regs[i] = call_used_regs[i] = 1; 10116 } 10117} 10118 10119 10120/* Output insns to set DEST equal to the constant SOURCE as a series of 10121 lis, ori and shl instructions and return TRUE. */ 10122 10123bool 10124rs6000_emit_set_const (rtx dest, rtx source) 10125{ 10126 machine_mode mode = GET_MODE (dest); 10127 rtx temp, set; 10128 rtx_insn *insn; 10129 HOST_WIDE_INT c; 10130 10131 gcc_checking_assert (CONST_INT_P (source)); 10132 c = INTVAL (source); 10133 switch (mode) 10134 { 10135 case E_QImode: 10136 case E_HImode: 10137 emit_insn (gen_rtx_SET (dest, source)); 10138 return true; 10139 10140 case E_SImode: 10141 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode); 10142 10143 emit_insn (gen_rtx_SET (copy_rtx (temp), 10144 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff))); 10145 emit_insn (gen_rtx_SET (dest, 10146 gen_rtx_IOR (SImode, copy_rtx (temp), 10147 GEN_INT (c & 0xffff)))); 10148 break; 10149 10150 case E_DImode: 10151 if (!TARGET_POWERPC64) 10152 { 10153 rtx hi, lo; 10154 10155 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0, 10156 DImode); 10157 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, 10158 DImode); 10159 emit_move_insn (hi, GEN_INT (c >> 32)); 10160 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000; 10161 emit_move_insn (lo, GEN_INT (c)); 10162 } 10163 else 10164 rs6000_emit_set_long_const (dest, c); 10165 break; 10166 10167 default: 10168 gcc_unreachable (); 10169 } 10170 10171 insn = get_last_insn (); 10172 set = single_set (insn); 10173 if (! CONSTANT_P (SET_SRC (set))) 10174 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c)); 10175 10176 return true; 10177} 10178 10179/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. 10180 Output insns to set DEST equal to the constant C as a series of 10181 lis, ori and shl instructions. */ 10182 10183static void 10184rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) 10185{ 10186 rtx temp; 10187 HOST_WIDE_INT ud1, ud2, ud3, ud4; 10188 10189 ud1 = c & 0xffff; 10190 c = c >> 16; 10191 ud2 = c & 0xffff; 10192 c = c >> 16; 10193 ud3 = c & 0xffff; 10194 c = c >> 16; 10195 ud4 = c & 0xffff; 10196 10197 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) 10198 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) 10199 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); 10200 10201 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) 10202 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) 10203 { 10204 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 10205 10206 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 10207 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); 10208 if (ud1 != 0) 10209 emit_move_insn (dest, 10210 gen_rtx_IOR (DImode, copy_rtx (temp), 10211 GEN_INT (ud1))); 10212 } 10213 else if (ud3 == 0 && ud4 == 0) 10214 { 10215 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 10216 10217 gcc_assert (ud2 & 0x8000); 10218 emit_move_insn (copy_rtx (temp), 10219 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); 10220 if (ud1 != 0) 10221 emit_move_insn (copy_rtx (temp), 10222 gen_rtx_IOR (DImode, copy_rtx (temp), 10223 GEN_INT (ud1))); 10224 emit_move_insn (dest, 10225 gen_rtx_ZERO_EXTEND (DImode, 10226 gen_lowpart (SImode, 10227 copy_rtx (temp)))); 10228 } 10229 else if (ud1 == ud3 && ud2 == ud4) 10230 { 10231 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 10232 HOST_WIDE_INT num = (ud2 << 16) | ud1; 10233 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000); 10234 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); 10235 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); 10236 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); 10237 } 10238 else if ((ud4 == 0xffff && (ud3 & 0x8000)) 10239 || (ud4 == 0 && ! (ud3 & 0x8000))) 10240 { 10241 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 10242 10243 emit_move_insn (copy_rtx (temp), 10244 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); 10245 if (ud2 != 0) 10246 emit_move_insn (copy_rtx (temp), 10247 gen_rtx_IOR (DImode, copy_rtx (temp), 10248 GEN_INT (ud2))); 10249 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 10250 gen_rtx_ASHIFT (DImode, copy_rtx (temp), 10251 GEN_INT (16))); 10252 if (ud1 != 0) 10253 emit_move_insn (dest, 10254 gen_rtx_IOR (DImode, copy_rtx (temp), 10255 GEN_INT (ud1))); 10256 } 10257 else 10258 { 10259 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 10260 10261 emit_move_insn (copy_rtx (temp), 10262 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); 10263 if (ud3 != 0) 10264 emit_move_insn (copy_rtx (temp), 10265 gen_rtx_IOR (DImode, copy_rtx (temp), 10266 GEN_INT (ud3))); 10267 10268 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest, 10269 gen_rtx_ASHIFT (DImode, copy_rtx (temp), 10270 GEN_INT (32))); 10271 if (ud2 != 0) 10272 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 10273 gen_rtx_IOR (DImode, copy_rtx (temp), 10274 GEN_INT (ud2 << 16))); 10275 if (ud1 != 0) 10276 emit_move_insn (dest, 10277 gen_rtx_IOR (DImode, copy_rtx (temp), 10278 GEN_INT (ud1))); 10279 } 10280} 10281 10282/* Helper for the following. Get rid of [r+r] memory refs 10283 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */ 10284 10285static void 10286rs6000_eliminate_indexed_memrefs (rtx operands[2]) 10287{ 10288 if (MEM_P (operands[0]) 10289 && !REG_P (XEXP (operands[0], 0)) 10290 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), 10291 GET_MODE (operands[0]), false)) 10292 operands[0] 10293 = replace_equiv_address (operands[0], 10294 copy_addr_to_reg (XEXP (operands[0], 0))); 10295 10296 if (MEM_P (operands[1]) 10297 && !REG_P (XEXP (operands[1], 0)) 10298 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0), 10299 GET_MODE (operands[1]), false)) 10300 operands[1] 10301 = replace_equiv_address (operands[1], 10302 copy_addr_to_reg (XEXP (operands[1], 0))); 10303} 10304 10305/* Generate a vector of constants to permute MODE for a little-endian 10306 storage operation by swapping the two halves of a vector. */ 10307static rtvec 10308rs6000_const_vec (machine_mode mode) 10309{ 10310 int i, subparts; 10311 rtvec v; 10312 10313 switch (mode) 10314 { 10315 case E_V1TImode: 10316 subparts = 1; 10317 break; 10318 case E_V2DFmode: 10319 case E_V2DImode: 10320 subparts = 2; 10321 break; 10322 case E_V4SFmode: 10323 case E_V4SImode: 10324 subparts = 4; 10325 break; 10326 case E_V8HImode: 10327 subparts = 8; 10328 break; 10329 case E_V16QImode: 10330 subparts = 16; 10331 break; 10332 default: 10333 gcc_unreachable(); 10334 } 10335 10336 v = rtvec_alloc (subparts); 10337 10338 for (i = 0; i < subparts / 2; ++i) 10339 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); 10340 for (i = subparts / 2; i < subparts; ++i) 10341 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); 10342 10343 return v; 10344} 10345 10346/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or 10347 store operation. */ 10348void 10349rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode) 10350{ 10351 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode)); 10352 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode)); 10353 10354 /* Scalar permutations are easier to express in integer modes rather than 10355 floating-point modes, so cast them here. We use V1TImode instead 10356 of TImode to ensure that the values don't go through GPRs. */ 10357 if (FLOAT128_VECTOR_P (mode)) 10358 { 10359 dest = gen_lowpart (V1TImode, dest); 10360 source = gen_lowpart (V1TImode, source); 10361 mode = V1TImode; 10362 } 10363 10364 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single 10365 scalar. */ 10366 if (mode == TImode || mode == V1TImode) 10367 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source, 10368 GEN_INT (64)))); 10369 else 10370 { 10371 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); 10372 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par))); 10373 } 10374} 10375 10376/* Emit a little-endian load from vector memory location SOURCE to VSX 10377 register DEST in mode MODE. The load is done with two permuting 10378 insn's that represent an lxvd2x and xxpermdi. */ 10379void 10380rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) 10381{ 10382 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, 10383 V1TImode). */ 10384 if (mode == TImode || mode == V1TImode) 10385 { 10386 mode = V2DImode; 10387 dest = gen_lowpart (V2DImode, dest); 10388 source = adjust_address (source, V2DImode, 0); 10389 } 10390 10391 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; 10392 rs6000_emit_le_vsx_permute (tmp, source, mode); 10393 rs6000_emit_le_vsx_permute (dest, tmp, mode); 10394} 10395 10396/* Emit a little-endian store to vector memory location DEST from VSX 10397 register SOURCE in mode MODE. The store is done with two permuting 10398 insn's that represent an xxpermdi and an stxvd2x. */ 10399void 10400rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) 10401{ 10402 /* This should never be called after LRA. */ 10403 gcc_assert (can_create_pseudo_p ()); 10404 10405 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, 10406 V1TImode). */ 10407 if (mode == TImode || mode == V1TImode) 10408 { 10409 mode = V2DImode; 10410 dest = adjust_address (dest, V2DImode, 0); 10411 source = gen_lowpart (V2DImode, source); 10412 } 10413 10414 rtx tmp = gen_reg_rtx_and_attrs (source); 10415 rs6000_emit_le_vsx_permute (tmp, source, mode); 10416 rs6000_emit_le_vsx_permute (dest, tmp, mode); 10417} 10418 10419/* Emit a sequence representing a little-endian VSX load or store, 10420 moving data from SOURCE to DEST in mode MODE. This is done 10421 separately from rs6000_emit_move to ensure it is called only 10422 during expand. LE VSX loads and stores introduced later are 10423 handled with a split. The expand-time RTL generation allows 10424 us to optimize away redundant pairs of register-permutes. */ 10425void 10426rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) 10427{ 10428 gcc_assert (!BYTES_BIG_ENDIAN 10429 && VECTOR_MEM_VSX_P (mode) 10430 && !TARGET_P9_VECTOR 10431 && !gpr_or_gpr_p (dest, source) 10432 && (MEM_P (source) ^ MEM_P (dest))); 10433 10434 if (MEM_P (source)) 10435 { 10436 gcc_assert (REG_P (dest) || SUBREG_P (dest)); 10437 rs6000_emit_le_vsx_load (dest, source, mode); 10438 } 10439 else 10440 { 10441 if (!REG_P (source)) 10442 source = force_reg (mode, source); 10443 rs6000_emit_le_vsx_store (dest, source, mode); 10444 } 10445} 10446 10447/* Return whether a SFmode or SImode move can be done without converting one 10448 mode to another. This arrises when we have: 10449 10450 (SUBREG:SF (REG:SI ...)) 10451 (SUBREG:SI (REG:SF ...)) 10452 10453 and one of the values is in a floating point/vector register, where SFmode 10454 scalars are stored in DFmode format. */ 10455 10456bool 10457valid_sf_si_move (rtx dest, rtx src, machine_mode mode) 10458{ 10459 if (TARGET_ALLOW_SF_SUBREG) 10460 return true; 10461 10462 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) 10463 return true; 10464 10465 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) 10466 return true; 10467 10468 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ 10469 if (SUBREG_P (dest)) 10470 { 10471 rtx dest_subreg = SUBREG_REG (dest); 10472 rtx src_subreg = SUBREG_REG (src); 10473 return GET_MODE (dest_subreg) == GET_MODE (src_subreg); 10474 } 10475 10476 return false; 10477} 10478 10479 10480/* Helper function to change moves with: 10481 10482 (SUBREG:SF (REG:SI)) and 10483 (SUBREG:SI (REG:SF)) 10484 10485 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode 10486 values are stored as DFmode values in the VSX registers. We need to convert 10487 the bits before we can use a direct move or operate on the bits in the 10488 vector register as an integer type. 10489 10490 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ 10491 10492static bool 10493rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) 10494{ 10495 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed 10496 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) 10497 && SUBREG_P (source) && sf_subreg_operand (source, mode)) 10498 { 10499 rtx inner_source = SUBREG_REG (source); 10500 machine_mode inner_mode = GET_MODE (inner_source); 10501 10502 if (mode == SImode && inner_mode == SFmode) 10503 { 10504 emit_insn (gen_movsi_from_sf (dest, inner_source)); 10505 return true; 10506 } 10507 10508 if (mode == SFmode && inner_mode == SImode) 10509 { 10510 emit_insn (gen_movsf_from_si (dest, inner_source)); 10511 return true; 10512 } 10513 } 10514 10515 return false; 10516} 10517 10518/* Emit a move from SOURCE to DEST in mode MODE. */ 10519void 10520rs6000_emit_move (rtx dest, rtx source, machine_mode mode) 10521{ 10522 rtx operands[2]; 10523 operands[0] = dest; 10524 operands[1] = source; 10525 10526 if (TARGET_DEBUG_ADDR) 10527 { 10528 fprintf (stderr, 10529 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, " 10530 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", 10531 GET_MODE_NAME (mode), 10532 lra_in_progress, 10533 reload_completed, 10534 can_create_pseudo_p ()); 10535 debug_rtx (dest); 10536 fprintf (stderr, "source:\n"); 10537 debug_rtx (source); 10538 } 10539 10540 /* Check that we get CONST_WIDE_INT only when we should. */ 10541 if (CONST_WIDE_INT_P (operands[1]) 10542 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) 10543 gcc_unreachable (); 10544 10545#ifdef HAVE_AS_GNU_ATTRIBUTE 10546 /* If we use a long double type, set the flags in .gnu_attribute that say 10547 what the long double type is. This is to allow the linker's warning 10548 message for the wrong long double to be useful, even if the function does 10549 not do a call (for example, doing a 128-bit add on power9 if the long 10550 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are 10551 used if they aren't the default long dobule type. */ 10552 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)) 10553 { 10554 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode)) 10555 rs6000_passes_float = rs6000_passes_long_double = true; 10556 10557 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode)) 10558 rs6000_passes_float = rs6000_passes_long_double = true; 10559 } 10560#endif 10561 10562 /* See if we need to special case SImode/SFmode SUBREG moves. */ 10563 if ((mode == SImode || mode == SFmode) && SUBREG_P (source) 10564 && rs6000_emit_move_si_sf_subreg (dest, source, mode)) 10565 return; 10566 10567 /* Check if GCC is setting up a block move that will end up using FP 10568 registers as temporaries. We must make sure this is acceptable. */ 10569 if (MEM_P (operands[0]) 10570 && MEM_P (operands[1]) 10571 && mode == DImode 10572 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) 10573 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) 10574 && ! (rs6000_slow_unaligned_access (SImode, 10575 (MEM_ALIGN (operands[0]) > 32 10576 ? 32 : MEM_ALIGN (operands[0]))) 10577 || rs6000_slow_unaligned_access (SImode, 10578 (MEM_ALIGN (operands[1]) > 32 10579 ? 32 : MEM_ALIGN (operands[1])))) 10580 && ! MEM_VOLATILE_P (operands [0]) 10581 && ! MEM_VOLATILE_P (operands [1])) 10582 { 10583 emit_move_insn (adjust_address (operands[0], SImode, 0), 10584 adjust_address (operands[1], SImode, 0)); 10585 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), 10586 adjust_address (copy_rtx (operands[1]), SImode, 4)); 10587 return; 10588 } 10589 10590 if (can_create_pseudo_p () && MEM_P (operands[0]) 10591 && !gpc_reg_operand (operands[1], mode)) 10592 operands[1] = force_reg (mode, operands[1]); 10593 10594 /* Recognize the case where operand[1] is a reference to thread-local 10595 data and load its address to a register. */ 10596 if (tls_referenced_p (operands[1])) 10597 { 10598 enum tls_model model; 10599 rtx tmp = operands[1]; 10600 rtx addend = NULL; 10601 10602 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 10603 { 10604 addend = XEXP (XEXP (tmp, 0), 1); 10605 tmp = XEXP (XEXP (tmp, 0), 0); 10606 } 10607 10608 gcc_assert (SYMBOL_REF_P (tmp)); 10609 model = SYMBOL_REF_TLS_MODEL (tmp); 10610 gcc_assert (model != 0); 10611 10612 tmp = rs6000_legitimize_tls_address (tmp, model); 10613 if (addend) 10614 { 10615 tmp = gen_rtx_PLUS (mode, tmp, addend); 10616 tmp = force_operand (tmp, operands[0]); 10617 } 10618 operands[1] = tmp; 10619 } 10620 10621 /* 128-bit constant floating-point values on Darwin should really be loaded 10622 as two parts. However, this premature splitting is a problem when DFmode 10623 values can go into Altivec registers. */ 10624 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode) 10625 && !reg_addr[DFmode].scalar_in_vmx_p) 10626 { 10627 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), 10628 simplify_gen_subreg (DFmode, operands[1], mode, 0), 10629 DFmode); 10630 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 10631 GET_MODE_SIZE (DFmode)), 10632 simplify_gen_subreg (DFmode, operands[1], mode, 10633 GET_MODE_SIZE (DFmode)), 10634 DFmode); 10635 return; 10636 } 10637 10638 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), 10639 p1:SD) if p1 is not of floating point class and p0 is spilled as 10640 we can have no analogous movsd_store for this. */ 10641 if (lra_in_progress && mode == DDmode 10642 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0]) 10643 && reg_preferred_class (REGNO (operands[0])) == NO_REGS 10644 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])) 10645 && GET_MODE (SUBREG_REG (operands[1])) == SDmode) 10646 { 10647 enum reg_class cl; 10648 int regno = REGNO (SUBREG_REG (operands[1])); 10649 10650 if (!HARD_REGISTER_NUM_P (regno)) 10651 { 10652 cl = reg_preferred_class (regno); 10653 regno = reg_renumber[regno]; 10654 if (regno < 0) 10655 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; 10656 } 10657 if (regno >= 0 && ! FP_REGNO_P (regno)) 10658 { 10659 mode = SDmode; 10660 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); 10661 operands[1] = SUBREG_REG (operands[1]); 10662 } 10663 } 10664 if (lra_in_progress 10665 && mode == SDmode 10666 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0]) 10667 && reg_preferred_class (REGNO (operands[0])) == NO_REGS 10668 && (REG_P (operands[1]) 10669 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))))) 10670 { 10671 int regno = reg_or_subregno (operands[1]); 10672 enum reg_class cl; 10673 10674 if (!HARD_REGISTER_NUM_P (regno)) 10675 { 10676 cl = reg_preferred_class (regno); 10677 gcc_assert (cl != NO_REGS); 10678 regno = reg_renumber[regno]; 10679 if (regno < 0) 10680 regno = ira_class_hard_regs[cl][0]; 10681 } 10682 if (FP_REGNO_P (regno)) 10683 { 10684 if (GET_MODE (operands[0]) != DDmode) 10685 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); 10686 emit_insn (gen_movsd_store (operands[0], operands[1])); 10687 } 10688 else if (INT_REGNO_P (regno)) 10689 emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); 10690 else 10691 gcc_unreachable(); 10692 return; 10693 } 10694 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD 10695 p:DD)) if p0 is not of floating point class and p1 is spilled as 10696 we can have no analogous movsd_load for this. */ 10697 if (lra_in_progress && mode == DDmode 10698 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0])) 10699 && GET_MODE (SUBREG_REG (operands[0])) == SDmode 10700 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1]) 10701 && reg_preferred_class (REGNO (operands[1])) == NO_REGS) 10702 { 10703 enum reg_class cl; 10704 int regno = REGNO (SUBREG_REG (operands[0])); 10705 10706 if (!HARD_REGISTER_NUM_P (regno)) 10707 { 10708 cl = reg_preferred_class (regno); 10709 regno = reg_renumber[regno]; 10710 if (regno < 0) 10711 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; 10712 } 10713 if (regno >= 0 && ! FP_REGNO_P (regno)) 10714 { 10715 mode = SDmode; 10716 operands[0] = SUBREG_REG (operands[0]); 10717 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); 10718 } 10719 } 10720 if (lra_in_progress 10721 && mode == SDmode 10722 && (REG_P (operands[0]) 10723 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0])))) 10724 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1]) 10725 && reg_preferred_class (REGNO (operands[1])) == NO_REGS) 10726 { 10727 int regno = reg_or_subregno (operands[0]); 10728 enum reg_class cl; 10729 10730 if (!HARD_REGISTER_NUM_P (regno)) 10731 { 10732 cl = reg_preferred_class (regno); 10733 gcc_assert (cl != NO_REGS); 10734 regno = reg_renumber[regno]; 10735 if (regno < 0) 10736 regno = ira_class_hard_regs[cl][0]; 10737 } 10738 if (FP_REGNO_P (regno)) 10739 { 10740 if (GET_MODE (operands[1]) != DDmode) 10741 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); 10742 emit_insn (gen_movsd_load (operands[0], operands[1])); 10743 } 10744 else if (INT_REGNO_P (regno)) 10745 emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); 10746 else 10747 gcc_unreachable(); 10748 return; 10749 } 10750 10751 /* FIXME: In the long term, this switch statement should go away 10752 and be replaced by a sequence of tests based on things like 10753 mode == Pmode. */ 10754 switch (mode) 10755 { 10756 case E_HImode: 10757 case E_QImode: 10758 if (CONSTANT_P (operands[1]) 10759 && !CONST_INT_P (operands[1])) 10760 operands[1] = force_const_mem (mode, operands[1]); 10761 break; 10762 10763 case E_TFmode: 10764 case E_TDmode: 10765 case E_IFmode: 10766 case E_KFmode: 10767 if (FLOAT128_2REG_P (mode)) 10768 rs6000_eliminate_indexed_memrefs (operands); 10769 /* fall through */ 10770 10771 case E_DFmode: 10772 case E_DDmode: 10773 case E_SFmode: 10774 case E_SDmode: 10775 if (CONSTANT_P (operands[1]) 10776 && ! easy_fp_constant (operands[1], mode)) 10777 operands[1] = force_const_mem (mode, operands[1]); 10778 break; 10779 10780 case E_V16QImode: 10781 case E_V8HImode: 10782 case E_V4SFmode: 10783 case E_V4SImode: 10784 case E_V2DFmode: 10785 case E_V2DImode: 10786 case E_V1TImode: 10787 if (CONSTANT_P (operands[1]) 10788 && !easy_vector_constant (operands[1], mode)) 10789 operands[1] = force_const_mem (mode, operands[1]); 10790 break; 10791 10792 case E_OOmode: 10793 case E_XOmode: 10794 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0) 10795 error ("%qs is an opaque type, and you cannot set it to other values", 10796 (mode == OOmode) ? "__vector_pair" : "__vector_quad"); 10797 break; 10798 10799 case E_SImode: 10800 case E_DImode: 10801 /* Use default pattern for address of ELF small data */ 10802 if (TARGET_ELF 10803 && mode == Pmode 10804 && DEFAULT_ABI == ABI_V4 10805 && (SYMBOL_REF_P (operands[1]) 10806 || GET_CODE (operands[1]) == CONST) 10807 && small_data_operand (operands[1], mode)) 10808 { 10809 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10810 return; 10811 } 10812 10813 /* Use the default pattern for loading up PC-relative addresses. */ 10814 if (TARGET_PCREL && mode == Pmode 10815 && pcrel_local_or_external_address (operands[1], Pmode)) 10816 { 10817 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10818 return; 10819 } 10820 10821 if (DEFAULT_ABI == ABI_V4 10822 && mode == Pmode && mode == SImode 10823 && flag_pic == 1 && got_operand (operands[1], mode)) 10824 { 10825 emit_insn (gen_movsi_got (operands[0], operands[1])); 10826 return; 10827 } 10828 10829 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN) 10830 && TARGET_NO_TOC_OR_PCREL 10831 && ! flag_pic 10832 && mode == Pmode 10833 && CONSTANT_P (operands[1]) 10834 && GET_CODE (operands[1]) != HIGH 10835 && !CONST_INT_P (operands[1])) 10836 { 10837 rtx target = (!can_create_pseudo_p () 10838 ? operands[0] 10839 : gen_reg_rtx (mode)); 10840 10841 /* If this is a function address on -mcall-aixdesc, 10842 convert it to the address of the descriptor. */ 10843 if (DEFAULT_ABI == ABI_AIX 10844 && SYMBOL_REF_P (operands[1]) 10845 && XSTR (operands[1], 0)[0] == '.') 10846 { 10847 const char *name = XSTR (operands[1], 0); 10848 rtx new_ref; 10849 while (*name == '.') 10850 name++; 10851 new_ref = gen_rtx_SYMBOL_REF (Pmode, name); 10852 CONSTANT_POOL_ADDRESS_P (new_ref) 10853 = CONSTANT_POOL_ADDRESS_P (operands[1]); 10854 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]); 10855 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]); 10856 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]); 10857 operands[1] = new_ref; 10858 } 10859 10860 if (DEFAULT_ABI == ABI_DARWIN) 10861 { 10862#if TARGET_MACHO 10863 /* This is not PIC code, but could require the subset of 10864 indirections used by mdynamic-no-pic. */ 10865 if (MACHO_DYNAMIC_NO_PIC_P) 10866 { 10867 /* Take care of any required data indirection. */ 10868 operands[1] = rs6000_machopic_legitimize_pic_address ( 10869 operands[1], mode, operands[0]); 10870 if (operands[0] != operands[1]) 10871 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10872 return; 10873 } 10874#endif 10875 emit_insn (gen_macho_high (Pmode, target, operands[1])); 10876 emit_insn (gen_macho_low (Pmode, operands[0], 10877 target, operands[1])); 10878 return; 10879 } 10880 10881 emit_insn (gen_elf_high (target, operands[1])); 10882 emit_insn (gen_elf_low (operands[0], target, operands[1])); 10883 return; 10884 } 10885 10886 /* If this is a SYMBOL_REF that refers to a constant pool entry, 10887 and we have put it in the TOC, we just need to make a TOC-relative 10888 reference to it. */ 10889 if (TARGET_TOC 10890 && SYMBOL_REF_P (operands[1]) 10891 && use_toc_relative_ref (operands[1], mode)) 10892 operands[1] = create_TOC_reference (operands[1], operands[0]); 10893 else if (mode == Pmode 10894 && CONSTANT_P (operands[1]) 10895 && GET_CODE (operands[1]) != HIGH 10896 && ((REG_P (operands[0]) 10897 && FP_REGNO_P (REGNO (operands[0]))) 10898 || !CONST_INT_P (operands[1]) 10899 || (num_insns_constant (operands[1], mode) 10900 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2))) 10901 && !toc_relative_expr_p (operands[1], false, NULL, NULL) 10902 && (TARGET_CMODEL == CMODEL_SMALL 10903 || can_create_pseudo_p () 10904 || (REG_P (operands[0]) 10905 && INT_REG_OK_FOR_BASE_P (operands[0], true)))) 10906 { 10907 10908#if TARGET_MACHO 10909 /* Darwin uses a special PIC legitimizer. */ 10910 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT) 10911 { 10912 operands[1] = 10913 rs6000_machopic_legitimize_pic_address (operands[1], mode, 10914 operands[0]); 10915 if (operands[0] != operands[1]) 10916 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10917 return; 10918 } 10919#endif 10920 10921 /* If we are to limit the number of things we put in the TOC and 10922 this is a symbol plus a constant we can add in one insn, 10923 just put the symbol in the TOC and add the constant. */ 10924 if (GET_CODE (operands[1]) == CONST 10925 && TARGET_NO_SUM_IN_TOC 10926 && GET_CODE (XEXP (operands[1], 0)) == PLUS 10927 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) 10928 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF 10929 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0))) 10930 && ! side_effects_p (operands[0])) 10931 { 10932 rtx sym = 10933 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0)); 10934 rtx other = XEXP (XEXP (operands[1], 0), 1); 10935 10936 sym = force_reg (mode, sym); 10937 emit_insn (gen_add3_insn (operands[0], sym, other)); 10938 return; 10939 } 10940 10941 operands[1] = force_const_mem (mode, operands[1]); 10942 10943 if (TARGET_TOC 10944 && SYMBOL_REF_P (XEXP (operands[1], 0)) 10945 && use_toc_relative_ref (XEXP (operands[1], 0), mode)) 10946 { 10947 rtx tocref = create_TOC_reference (XEXP (operands[1], 0), 10948 operands[0]); 10949 operands[1] = gen_const_mem (mode, tocref); 10950 set_mem_alias_set (operands[1], get_TOC_alias_set ()); 10951 } 10952 } 10953 break; 10954 10955 case E_TImode: 10956 if (!VECTOR_MEM_VSX_P (TImode)) 10957 rs6000_eliminate_indexed_memrefs (operands); 10958 break; 10959 10960 case E_PTImode: 10961 rs6000_eliminate_indexed_memrefs (operands); 10962 break; 10963 10964 default: 10965 fatal_insn ("bad move", gen_rtx_SET (dest, source)); 10966 } 10967 10968 /* Above, we may have called force_const_mem which may have returned 10969 an invalid address. If we can, fix this up; otherwise, reload will 10970 have to deal with it. */ 10971 if (MEM_P (operands[1])) 10972 operands[1] = validize_mem (operands[1]); 10973 10974 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10975} 10976 10977 10978/* Set up AIX/Darwin/64-bit Linux quad floating point routines. */ 10979static void 10980init_float128_ibm (machine_mode mode) 10981{ 10982 if (!TARGET_XL_COMPAT) 10983 { 10984 set_optab_libfunc (add_optab, mode, "__gcc_qadd"); 10985 set_optab_libfunc (sub_optab, mode, "__gcc_qsub"); 10986 set_optab_libfunc (smul_optab, mode, "__gcc_qmul"); 10987 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv"); 10988 10989 if (!TARGET_HARD_FLOAT) 10990 { 10991 set_optab_libfunc (neg_optab, mode, "__gcc_qneg"); 10992 set_optab_libfunc (eq_optab, mode, "__gcc_qeq"); 10993 set_optab_libfunc (ne_optab, mode, "__gcc_qne"); 10994 set_optab_libfunc (gt_optab, mode, "__gcc_qgt"); 10995 set_optab_libfunc (ge_optab, mode, "__gcc_qge"); 10996 set_optab_libfunc (lt_optab, mode, "__gcc_qlt"); 10997 set_optab_libfunc (le_optab, mode, "__gcc_qle"); 10998 set_optab_libfunc (unord_optab, mode, "__gcc_qunord"); 10999 11000 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq"); 11001 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq"); 11002 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos"); 11003 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod"); 11004 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi"); 11005 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou"); 11006 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq"); 11007 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq"); 11008 } 11009 } 11010 else 11011 { 11012 set_optab_libfunc (add_optab, mode, "_xlqadd"); 11013 set_optab_libfunc (sub_optab, mode, "_xlqsub"); 11014 set_optab_libfunc (smul_optab, mode, "_xlqmul"); 11015 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv"); 11016 } 11017 11018 /* Add various conversions for IFmode to use the traditional TFmode 11019 names. */ 11020 if (mode == IFmode) 11021 { 11022 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf"); 11023 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf"); 11024 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf"); 11025 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd"); 11026 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd"); 11027 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd"); 11028 11029 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi"); 11030 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi"); 11031 11032 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf"); 11033 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf"); 11034 11035 if (TARGET_POWERPC64) 11036 { 11037 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); 11038 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti"); 11039 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf"); 11040 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf"); 11041 } 11042 } 11043} 11044 11045/* Set up IEEE 128-bit floating point routines. Use different names if the 11046 arguments can be passed in a vector register. The historical PowerPC 11047 implementation of IEEE 128-bit floating point used _q_<op> for the names, so 11048 continue to use that if we aren't using vector registers to pass IEEE 11049 128-bit floating point. */ 11050 11051static void 11052init_float128_ieee (machine_mode mode) 11053{ 11054 if (FLOAT128_VECTOR_P (mode)) 11055 { 11056 set_optab_libfunc (add_optab, mode, "__addkf3"); 11057 set_optab_libfunc (sub_optab, mode, "__subkf3"); 11058 set_optab_libfunc (neg_optab, mode, "__negkf2"); 11059 set_optab_libfunc (smul_optab, mode, "__mulkf3"); 11060 set_optab_libfunc (sdiv_optab, mode, "__divkf3"); 11061 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2"); 11062 set_optab_libfunc (abs_optab, mode, "__abskf2"); 11063 set_optab_libfunc (powi_optab, mode, "__powikf2"); 11064 11065 set_optab_libfunc (eq_optab, mode, "__eqkf2"); 11066 set_optab_libfunc (ne_optab, mode, "__nekf2"); 11067 set_optab_libfunc (gt_optab, mode, "__gtkf2"); 11068 set_optab_libfunc (ge_optab, mode, "__gekf2"); 11069 set_optab_libfunc (lt_optab, mode, "__ltkf2"); 11070 set_optab_libfunc (le_optab, mode, "__lekf2"); 11071 set_optab_libfunc (unord_optab, mode, "__unordkf2"); 11072 11073 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2"); 11074 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2"); 11075 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2"); 11076 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2"); 11077 11078 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2"); 11079 if (mode != TFmode && FLOAT128_IBM_P (TFmode)) 11080 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2"); 11081 11082 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2"); 11083 if (mode != TFmode && FLOAT128_IBM_P (TFmode)) 11084 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2"); 11085 11086 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf"); 11087 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf"); 11088 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf"); 11089 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd"); 11090 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd"); 11091 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd"); 11092 11093 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi"); 11094 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi"); 11095 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi"); 11096 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi"); 11097 11098 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf"); 11099 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf"); 11100 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf"); 11101 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf"); 11102 11103 if (TARGET_POWERPC64) 11104 { 11105 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw"); 11106 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw"); 11107 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw"); 11108 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw"); 11109 } 11110 } 11111 11112 else 11113 { 11114 set_optab_libfunc (add_optab, mode, "_q_add"); 11115 set_optab_libfunc (sub_optab, mode, "_q_sub"); 11116 set_optab_libfunc (neg_optab, mode, "_q_neg"); 11117 set_optab_libfunc (smul_optab, mode, "_q_mul"); 11118 set_optab_libfunc (sdiv_optab, mode, "_q_div"); 11119 if (TARGET_PPC_GPOPT) 11120 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt"); 11121 11122 set_optab_libfunc (eq_optab, mode, "_q_feq"); 11123 set_optab_libfunc (ne_optab, mode, "_q_fne"); 11124 set_optab_libfunc (gt_optab, mode, "_q_fgt"); 11125 set_optab_libfunc (ge_optab, mode, "_q_fge"); 11126 set_optab_libfunc (lt_optab, mode, "_q_flt"); 11127 set_optab_libfunc (le_optab, mode, "_q_fle"); 11128 11129 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq"); 11130 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq"); 11131 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos"); 11132 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod"); 11133 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi"); 11134 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou"); 11135 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq"); 11136 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq"); 11137 } 11138} 11139 11140static void 11141rs6000_init_libfuncs (void) 11142{ 11143 /* __float128 support. */ 11144 if (TARGET_FLOAT128_TYPE) 11145 { 11146 init_float128_ibm (IFmode); 11147 init_float128_ieee (KFmode); 11148 } 11149 11150 /* AIX/Darwin/64-bit Linux quad floating point routines. */ 11151 if (TARGET_LONG_DOUBLE_128) 11152 { 11153 if (!TARGET_IEEEQUAD) 11154 init_float128_ibm (TFmode); 11155 11156 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */ 11157 else 11158 init_float128_ieee (TFmode); 11159 } 11160} 11161 11162/* Emit a potentially record-form instruction, setting DST from SRC. 11163 If DOT is 0, that is all; otherwise, set CCREG to the result of the 11164 signed comparison of DST with zero. If DOT is 1, the generated RTL 11165 doesn't care about the DST result; if DOT is 2, it does. If CCREG 11166 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and 11167 a separate COMPARE. */ 11168 11169void 11170rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) 11171{ 11172 if (dot == 0) 11173 { 11174 emit_move_insn (dst, src); 11175 return; 11176 } 11177 11178 if (cc_reg_not_cr0_operand (ccreg, CCmode)) 11179 { 11180 emit_move_insn (dst, src); 11181 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx)); 11182 return; 11183 } 11184 11185 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx)); 11186 if (dot == 1) 11187 { 11188 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst); 11189 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber))); 11190 } 11191 else 11192 { 11193 rtx set = gen_rtx_SET (dst, src); 11194 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set))); 11195 } 11196} 11197 11198 11199/* A validation routine: say whether CODE, a condition code, and MODE 11200 match. The other alternatives either don't make sense or should 11201 never be generated. */ 11202 11203void 11204validate_condition_mode (enum rtx_code code, machine_mode mode) 11205{ 11206 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE 11207 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE) 11208 && GET_MODE_CLASS (mode) == MODE_CC); 11209 11210 /* These don't make sense. */ 11211 gcc_assert ((code != GT && code != LT && code != GE && code != LE) 11212 || mode != CCUNSmode); 11213 11214 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU) 11215 || mode == CCUNSmode); 11216 11217 gcc_assert (mode == CCFPmode 11218 || (code != ORDERED && code != UNORDERED 11219 && code != UNEQ && code != LTGT 11220 && code != UNGT && code != UNLT 11221 && code != UNGE && code != UNLE)); 11222 11223 /* These are invalid; the information is not there. */ 11224 gcc_assert (mode != CCEQmode || code == EQ || code == NE); 11225} 11226 11227 11228/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, 11229 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is 11230 not zero, store there the bit offset (counted from the right) where 11231 the single stretch of 1 bits begins; and similarly for B, the bit 11232 offset where it ends. */ 11233 11234bool 11235rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode) 11236{ 11237 unsigned HOST_WIDE_INT val = INTVAL (mask); 11238 unsigned HOST_WIDE_INT bit; 11239 int nb, ne; 11240 int n = GET_MODE_PRECISION (mode); 11241 11242 if (mode != DImode && mode != SImode) 11243 return false; 11244 11245 if (INTVAL (mask) >= 0) 11246 { 11247 bit = val & -val; 11248 ne = exact_log2 (bit); 11249 nb = exact_log2 (val + bit); 11250 } 11251 else if (val + 1 == 0) 11252 { 11253 nb = n; 11254 ne = 0; 11255 } 11256 else if (val & 1) 11257 { 11258 val = ~val; 11259 bit = val & -val; 11260 nb = exact_log2 (bit); 11261 ne = exact_log2 (val + bit); 11262 } 11263 else 11264 { 11265 bit = val & -val; 11266 ne = exact_log2 (bit); 11267 if (val + bit == 0) 11268 nb = n; 11269 else 11270 nb = 0; 11271 } 11272 11273 nb--; 11274 11275 if (nb < 0 || ne < 0 || nb >= n || ne >= n) 11276 return false; 11277 11278 if (b) 11279 *b = nb; 11280 if (e) 11281 *e = ne; 11282 11283 return true; 11284} 11285 11286bool 11287rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode) 11288{ 11289 int nb, ne; 11290 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0) 11291 { 11292 if (TARGET_64BIT) 11293 return true; 11294 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is 11295 <= 0x7fffffff. */ 11296 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff; 11297 } 11298 11299 return false; 11300} 11301 11302/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl, 11303 or rldicr instruction, to implement an AND with it in mode MODE. */ 11304 11305bool 11306rs6000_is_valid_and_mask (rtx mask, machine_mode mode) 11307{ 11308 int nb, ne; 11309 11310 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 11311 return false; 11312 11313 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that 11314 does not wrap. */ 11315 if (mode == DImode) 11316 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb)); 11317 11318 /* For SImode, rlwinm can do everything. */ 11319 if (mode == SImode) 11320 return (nb < 32 && ne < 32); 11321 11322 return false; 11323} 11324 11325/* Return the instruction template for an AND with mask in mode MODE, with 11326 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 11327 11328const char * 11329rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot) 11330{ 11331 int nb, ne; 11332 11333 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode)) 11334 gcc_unreachable (); 11335 11336 if (mode == DImode && ne == 0) 11337 { 11338 operands[3] = GEN_INT (63 - nb); 11339 if (dot) 11340 return "rldicl. %0,%1,0,%3"; 11341 return "rldicl %0,%1,0,%3"; 11342 } 11343 11344 if (mode == DImode && nb == 63) 11345 { 11346 operands[3] = GEN_INT (63 - ne); 11347 if (dot) 11348 return "rldicr. %0,%1,0,%3"; 11349 return "rldicr %0,%1,0,%3"; 11350 } 11351 11352 if (nb < 32 && ne < 32) 11353 { 11354 operands[3] = GEN_INT (31 - nb); 11355 operands[4] = GEN_INT (31 - ne); 11356 if (dot) 11357 return "rlwinm. %0,%1,0,%3,%4"; 11358 return "rlwinm %0,%1,0,%3,%4"; 11359 } 11360 11361 gcc_unreachable (); 11362} 11363 11364/* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm, 11365 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with 11366 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */ 11367 11368bool 11369rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode) 11370{ 11371 int nb, ne; 11372 11373 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 11374 return false; 11375 11376 int n = GET_MODE_PRECISION (mode); 11377 int sh = -1; 11378 11379 if (CONST_INT_P (XEXP (shift, 1))) 11380 { 11381 sh = INTVAL (XEXP (shift, 1)); 11382 if (sh < 0 || sh >= n) 11383 return false; 11384 } 11385 11386 rtx_code code = GET_CODE (shift); 11387 11388 /* Convert any shift by 0 to a rotate, to simplify below code. */ 11389 if (sh == 0) 11390 code = ROTATE; 11391 11392 /* Convert rotate to simple shift if we can, to make analysis simpler. */ 11393 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) 11394 code = ASHIFT; 11395 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) 11396 { 11397 code = LSHIFTRT; 11398 sh = n - sh; 11399 } 11400 11401 /* DImode rotates need rld*. */ 11402 if (mode == DImode && code == ROTATE) 11403 return (nb == 63 || ne == 0 || ne == sh); 11404 11405 /* SImode rotates need rlw*. */ 11406 if (mode == SImode && code == ROTATE) 11407 return (nb < 32 && ne < 32 && sh < 32); 11408 11409 /* Wrap-around masks are only okay for rotates. */ 11410 if (ne > nb) 11411 return false; 11412 11413 /* Variable shifts are only okay for rotates. */ 11414 if (sh < 0) 11415 return false; 11416 11417 /* Don't allow ASHIFT if the mask is wrong for that. */ 11418 if (code == ASHIFT && ne < sh) 11419 return false; 11420 11421 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT 11422 if the mask is wrong for that. */ 11423 if (nb < 32 && ne < 32 && sh < 32 11424 && !(code == LSHIFTRT && nb >= 32 - sh)) 11425 return true; 11426 11427 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT 11428 if the mask is wrong for that. */ 11429 if (code == LSHIFTRT) 11430 sh = 64 - sh; 11431 if (nb == 63 || ne == 0 || ne == sh) 11432 return !(code == LSHIFTRT && nb >= sh); 11433 11434 return false; 11435} 11436 11437/* Return the instruction template for a shift with mask in mode MODE, with 11438 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 11439 11440const char * 11441rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot) 11442{ 11443 int nb, ne; 11444 11445 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) 11446 gcc_unreachable (); 11447 11448 if (mode == DImode && ne == 0) 11449 { 11450 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 11451 operands[2] = GEN_INT (64 - INTVAL (operands[2])); 11452 operands[3] = GEN_INT (63 - nb); 11453 if (dot) 11454 return "rld%I2cl. %0,%1,%2,%3"; 11455 return "rld%I2cl %0,%1,%2,%3"; 11456 } 11457 11458 if (mode == DImode && nb == 63) 11459 { 11460 operands[3] = GEN_INT (63 - ne); 11461 if (dot) 11462 return "rld%I2cr. %0,%1,%2,%3"; 11463 return "rld%I2cr %0,%1,%2,%3"; 11464 } 11465 11466 if (mode == DImode 11467 && GET_CODE (operands[4]) != LSHIFTRT 11468 && CONST_INT_P (operands[2]) 11469 && ne == INTVAL (operands[2])) 11470 { 11471 operands[3] = GEN_INT (63 - nb); 11472 if (dot) 11473 return "rld%I2c. %0,%1,%2,%3"; 11474 return "rld%I2c %0,%1,%2,%3"; 11475 } 11476 11477 if (nb < 32 && ne < 32) 11478 { 11479 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 11480 operands[2] = GEN_INT (32 - INTVAL (operands[2])); 11481 operands[3] = GEN_INT (31 - nb); 11482 operands[4] = GEN_INT (31 - ne); 11483 /* This insn can also be a 64-bit rotate with mask that really makes 11484 it just a shift right (with mask); the %h below are to adjust for 11485 that situation (shift count is >= 32 in that case). */ 11486 if (dot) 11487 return "rlw%I2nm. %0,%1,%h2,%3,%4"; 11488 return "rlw%I2nm %0,%1,%h2,%3,%4"; 11489 } 11490 11491 gcc_unreachable (); 11492} 11493 11494/* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or 11495 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE, 11496 ASHIFT, or LSHIFTRT) in mode MODE. */ 11497 11498bool 11499rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode) 11500{ 11501 int nb, ne; 11502 11503 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 11504 return false; 11505 11506 int n = GET_MODE_PRECISION (mode); 11507 11508 int sh = INTVAL (XEXP (shift, 1)); 11509 if (sh < 0 || sh >= n) 11510 return false; 11511 11512 rtx_code code = GET_CODE (shift); 11513 11514 /* Convert any shift by 0 to a rotate, to simplify below code. */ 11515 if (sh == 0) 11516 code = ROTATE; 11517 11518 /* Convert rotate to simple shift if we can, to make analysis simpler. */ 11519 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) 11520 code = ASHIFT; 11521 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) 11522 { 11523 code = LSHIFTRT; 11524 sh = n - sh; 11525 } 11526 11527 /* DImode rotates need rldimi. */ 11528 if (mode == DImode && code == ROTATE) 11529 return (ne == sh); 11530 11531 /* SImode rotates need rlwimi. */ 11532 if (mode == SImode && code == ROTATE) 11533 return (nb < 32 && ne < 32 && sh < 32); 11534 11535 /* Wrap-around masks are only okay for rotates. */ 11536 if (ne > nb) 11537 return false; 11538 11539 /* Don't allow ASHIFT if the mask is wrong for that. */ 11540 if (code == ASHIFT && ne < sh) 11541 return false; 11542 11543 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT 11544 if the mask is wrong for that. */ 11545 if (nb < 32 && ne < 32 && sh < 32 11546 && !(code == LSHIFTRT && nb >= 32 - sh)) 11547 return true; 11548 11549 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT 11550 if the mask is wrong for that. */ 11551 if (code == LSHIFTRT) 11552 sh = 64 - sh; 11553 if (ne == sh) 11554 return !(code == LSHIFTRT && nb >= sh); 11555 11556 return false; 11557} 11558 11559/* Return the instruction template for an insert with mask in mode MODE, with 11560 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 11561 11562const char * 11563rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot) 11564{ 11565 int nb, ne; 11566 11567 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) 11568 gcc_unreachable (); 11569 11570 /* Prefer rldimi because rlwimi is cracked. */ 11571 if (TARGET_POWERPC64 11572 && (!dot || mode == DImode) 11573 && GET_CODE (operands[4]) != LSHIFTRT 11574 && ne == INTVAL (operands[2])) 11575 { 11576 operands[3] = GEN_INT (63 - nb); 11577 if (dot) 11578 return "rldimi. %0,%1,%2,%3"; 11579 return "rldimi %0,%1,%2,%3"; 11580 } 11581 11582 if (nb < 32 && ne < 32) 11583 { 11584 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 11585 operands[2] = GEN_INT (32 - INTVAL (operands[2])); 11586 operands[3] = GEN_INT (31 - nb); 11587 operands[4] = GEN_INT (31 - ne); 11588 if (dot) 11589 return "rlwimi. %0,%1,%2,%3,%4"; 11590 return "rlwimi %0,%1,%2,%3,%4"; 11591 } 11592 11593 gcc_unreachable (); 11594} 11595 11596/* Return whether an AND with C (a CONST_INT) in mode MODE can be done 11597 using two machine instructions. */ 11598 11599bool 11600rs6000_is_valid_2insn_and (rtx c, machine_mode mode) 11601{ 11602 /* There are two kinds of AND we can handle with two insns: 11603 1) those we can do with two rl* insn; 11604 2) ori[s];xori[s]. 11605 11606 We do not handle that last case yet. */ 11607 11608 /* If there is just one stretch of ones, we can do it. */ 11609 if (rs6000_is_valid_mask (c, NULL, NULL, mode)) 11610 return true; 11611 11612 /* Otherwise, fill in the lowest "hole"; if we can do the result with 11613 one insn, we can do the whole thing with two. */ 11614 unsigned HOST_WIDE_INT val = INTVAL (c); 11615 unsigned HOST_WIDE_INT bit1 = val & -val; 11616 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; 11617 unsigned HOST_WIDE_INT val1 = (val + bit1) & val; 11618 unsigned HOST_WIDE_INT bit3 = val1 & -val1; 11619 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode); 11620} 11621 11622/* Emit the two insns to do an AND in mode MODE, with operands OPERANDS. 11623 If EXPAND is true, split rotate-and-mask instructions we generate to 11624 their constituent parts as well (this is used during expand); if DOT 11625 is 1, make the last insn a record-form instruction clobbering the 11626 destination GPR and setting the CC reg (from operands[3]); if 2, set 11627 that GPR as well as the CC reg. */ 11628 11629void 11630rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot) 11631{ 11632 gcc_assert (!(expand && dot)); 11633 11634 unsigned HOST_WIDE_INT val = INTVAL (operands[2]); 11635 11636 /* If it is one stretch of ones, it is DImode; shift left, mask, then 11637 shift right. This generates better code than doing the masks without 11638 shifts, or shifting first right and then left. */ 11639 int nb, ne; 11640 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne) 11641 { 11642 gcc_assert (mode == DImode); 11643 11644 int shift = 63 - nb; 11645 if (expand) 11646 { 11647 rtx tmp1 = gen_reg_rtx (DImode); 11648 rtx tmp2 = gen_reg_rtx (DImode); 11649 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift))); 11650 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift))); 11651 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift))); 11652 } 11653 else 11654 { 11655 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift)); 11656 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift)); 11657 emit_move_insn (operands[0], tmp); 11658 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift)); 11659 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 11660 } 11661 return; 11662 } 11663 11664 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1 11665 that does the rest. */ 11666 unsigned HOST_WIDE_INT bit1 = val & -val; 11667 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; 11668 unsigned HOST_WIDE_INT val1 = (val + bit1) & val; 11669 unsigned HOST_WIDE_INT bit3 = val1 & -val1; 11670 11671 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1; 11672 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2; 11673 11674 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode)); 11675 11676 /* Two "no-rotate"-and-mask instructions, for SImode. */ 11677 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode)) 11678 { 11679 gcc_assert (mode == SImode); 11680 11681 rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; 11682 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1)); 11683 emit_move_insn (reg, tmp); 11684 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); 11685 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 11686 return; 11687 } 11688 11689 gcc_assert (mode == DImode); 11690 11691 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm 11692 insns; we have to do the first in SImode, because it wraps. */ 11693 if (mask2 <= 0xffffffff 11694 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode)) 11695 { 11696 rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; 11697 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]), 11698 GEN_INT (mask1)); 11699 rtx reg_low = gen_lowpart (SImode, reg); 11700 emit_move_insn (reg_low, tmp); 11701 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); 11702 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 11703 return; 11704 } 11705 11706 /* Two rld* insns: rotate, clear the hole in the middle (which now is 11707 at the top end), rotate back and clear the other hole. */ 11708 int right = exact_log2 (bit3); 11709 int left = 64 - right; 11710 11711 /* Rotate the mask too. */ 11712 mask1 = (mask1 >> right) | ((bit2 - 1) << left); 11713 11714 if (expand) 11715 { 11716 rtx tmp1 = gen_reg_rtx (DImode); 11717 rtx tmp2 = gen_reg_rtx (DImode); 11718 rtx tmp3 = gen_reg_rtx (DImode); 11719 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left))); 11720 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1))); 11721 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right))); 11722 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2))); 11723 } 11724 else 11725 { 11726 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left)); 11727 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1)); 11728 emit_move_insn (operands[0], tmp); 11729 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right)); 11730 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2)); 11731 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 11732 } 11733} 11734 11735/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates 11736 for lfq and stfq insns iff the registers are hard registers. */ 11737 11738int 11739registers_ok_for_quad_peep (rtx reg1, rtx reg2) 11740{ 11741 /* We might have been passed a SUBREG. */ 11742 if (!REG_P (reg1) || !REG_P (reg2)) 11743 return 0; 11744 11745 /* We might have been passed non floating point registers. */ 11746 if (!FP_REGNO_P (REGNO (reg1)) 11747 || !FP_REGNO_P (REGNO (reg2))) 11748 return 0; 11749 11750 return (REGNO (reg1) == REGNO (reg2) - 1); 11751} 11752 11753/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn. 11754 addr1 and addr2 must be in consecutive memory locations 11755 (addr2 == addr1 + 8). */ 11756 11757int 11758mems_ok_for_quad_peep (rtx mem1, rtx mem2) 11759{ 11760 rtx addr1, addr2; 11761 unsigned int reg1, reg2; 11762 int offset1, offset2; 11763 11764 /* The mems cannot be volatile. */ 11765 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 11766 return 0; 11767 11768 addr1 = XEXP (mem1, 0); 11769 addr2 = XEXP (mem2, 0); 11770 11771 /* Extract an offset (if used) from the first addr. */ 11772 if (GET_CODE (addr1) == PLUS) 11773 { 11774 /* If not a REG, return zero. */ 11775 if (!REG_P (XEXP (addr1, 0))) 11776 return 0; 11777 else 11778 { 11779 reg1 = REGNO (XEXP (addr1, 0)); 11780 /* The offset must be constant! */ 11781 if (!CONST_INT_P (XEXP (addr1, 1))) 11782 return 0; 11783 offset1 = INTVAL (XEXP (addr1, 1)); 11784 } 11785 } 11786 else if (!REG_P (addr1)) 11787 return 0; 11788 else 11789 { 11790 reg1 = REGNO (addr1); 11791 /* This was a simple (mem (reg)) expression. Offset is 0. */ 11792 offset1 = 0; 11793 } 11794 11795 /* And now for the second addr. */ 11796 if (GET_CODE (addr2) == PLUS) 11797 { 11798 /* If not a REG, return zero. */ 11799 if (!REG_P (XEXP (addr2, 0))) 11800 return 0; 11801 else 11802 { 11803 reg2 = REGNO (XEXP (addr2, 0)); 11804 /* The offset must be constant. */ 11805 if (!CONST_INT_P (XEXP (addr2, 1))) 11806 return 0; 11807 offset2 = INTVAL (XEXP (addr2, 1)); 11808 } 11809 } 11810 else if (!REG_P (addr2)) 11811 return 0; 11812 else 11813 { 11814 reg2 = REGNO (addr2); 11815 /* This was a simple (mem (reg)) expression. Offset is 0. */ 11816 offset2 = 0; 11817 } 11818 11819 /* Both of these must have the same base register. */ 11820 if (reg1 != reg2) 11821 return 0; 11822 11823 /* The offset for the second addr must be 8 more than the first addr. */ 11824 if (offset2 != offset1 + 8) 11825 return 0; 11826 11827 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq 11828 instructions. */ 11829 return 1; 11830} 11831 11832/* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we 11833 need to use DDmode, in all other cases we can use the same mode. */ 11834static machine_mode 11835rs6000_secondary_memory_needed_mode (machine_mode mode) 11836{ 11837 if (lra_in_progress && mode == SDmode) 11838 return DDmode; 11839 return mode; 11840} 11841 11842/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work 11843 on traditional floating point registers, and the VMRGOW/VMRGEW instructions 11844 only work on the traditional altivec registers, note if an altivec register 11845 was chosen. */ 11846 11847static enum rs6000_reg_type 11848register_to_reg_type (rtx reg, bool *is_altivec) 11849{ 11850 HOST_WIDE_INT regno; 11851 enum reg_class rclass; 11852 11853 if (SUBREG_P (reg)) 11854 reg = SUBREG_REG (reg); 11855 11856 if (!REG_P (reg)) 11857 return NO_REG_TYPE; 11858 11859 regno = REGNO (reg); 11860 if (!HARD_REGISTER_NUM_P (regno)) 11861 { 11862 if (!lra_in_progress && !reload_completed) 11863 return PSEUDO_REG_TYPE; 11864 11865 regno = true_regnum (reg); 11866 if (regno < 0 || !HARD_REGISTER_NUM_P (regno)) 11867 return PSEUDO_REG_TYPE; 11868 } 11869 11870 gcc_assert (regno >= 0); 11871 11872 if (is_altivec && ALTIVEC_REGNO_P (regno)) 11873 *is_altivec = true; 11874 11875 rclass = rs6000_regno_regclass[regno]; 11876 return reg_class_to_reg_type[(int)rclass]; 11877} 11878 11879/* Helper function to return the cost of adding a TOC entry address. */ 11880 11881static inline int 11882rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) 11883{ 11884 int ret; 11885 11886 if (TARGET_CMODEL != CMODEL_SMALL) 11887 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; 11888 11889 else 11890 ret = (TARGET_MINIMAL_TOC) ? 6 : 3; 11891 11892 return ret; 11893} 11894 11895/* Helper function for rs6000_secondary_reload to determine whether the memory 11896 address (ADDR) with a given register class (RCLASS) and machine mode (MODE) 11897 needs reloading. Return negative if the memory is not handled by the memory 11898 helper functions and to try a different reload method, 0 if no additional 11899 instructions are need, and positive to give the extra cost for the 11900 memory. */ 11901 11902static int 11903rs6000_secondary_reload_memory (rtx addr, 11904 enum reg_class rclass, 11905 machine_mode mode) 11906{ 11907 int extra_cost = 0; 11908 rtx reg, and_arg, plus_arg0, plus_arg1; 11909 addr_mask_type addr_mask; 11910 const char *type = NULL; 11911 const char *fail_msg = NULL; 11912 11913 if (GPR_REG_CLASS_P (rclass)) 11914 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; 11915 11916 else if (rclass == FLOAT_REGS) 11917 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; 11918 11919 else if (rclass == ALTIVEC_REGS) 11920 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; 11921 11922 /* For the combined VSX_REGS, turn off Altivec AND -16. */ 11923 else if (rclass == VSX_REGS) 11924 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] 11925 & ~RELOAD_REG_AND_M16); 11926 11927 /* If the register allocator hasn't made up its mind yet on the register 11928 class to use, settle on defaults to use. */ 11929 else if (rclass == NO_REGS) 11930 { 11931 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY] 11932 & ~RELOAD_REG_AND_M16); 11933 11934 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0) 11935 addr_mask &= ~(RELOAD_REG_INDEXED 11936 | RELOAD_REG_PRE_INCDEC 11937 | RELOAD_REG_PRE_MODIFY); 11938 } 11939 11940 else 11941 addr_mask = 0; 11942 11943 /* If the register isn't valid in this register class, just return now. */ 11944 if ((addr_mask & RELOAD_REG_VALID) == 0) 11945 { 11946 if (TARGET_DEBUG_ADDR) 11947 { 11948 fprintf (stderr, 11949 "rs6000_secondary_reload_memory: mode = %s, class = %s, " 11950 "not valid in class\n", 11951 GET_MODE_NAME (mode), reg_class_names[rclass]); 11952 debug_rtx (addr); 11953 } 11954 11955 return -1; 11956 } 11957 11958 switch (GET_CODE (addr)) 11959 { 11960 /* Does the register class supports auto update forms for this mode? We 11961 don't need a scratch register, since the powerpc only supports 11962 PRE_INC, PRE_DEC, and PRE_MODIFY. */ 11963 case PRE_INC: 11964 case PRE_DEC: 11965 reg = XEXP (addr, 0); 11966 if (!base_reg_operand (addr, GET_MODE (reg))) 11967 { 11968 fail_msg = "no base register #1"; 11969 extra_cost = -1; 11970 } 11971 11972 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) 11973 { 11974 extra_cost = 1; 11975 type = "update"; 11976 } 11977 break; 11978 11979 case PRE_MODIFY: 11980 reg = XEXP (addr, 0); 11981 plus_arg1 = XEXP (addr, 1); 11982 if (!base_reg_operand (reg, GET_MODE (reg)) 11983 || GET_CODE (plus_arg1) != PLUS 11984 || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) 11985 { 11986 fail_msg = "bad PRE_MODIFY"; 11987 extra_cost = -1; 11988 } 11989 11990 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) 11991 { 11992 extra_cost = 1; 11993 type = "update"; 11994 } 11995 break; 11996 11997 /* Do we need to simulate AND -16 to clear the bottom address bits used 11998 in VMX load/stores? Only allow the AND for vector sizes. */ 11999 case AND: 12000 and_arg = XEXP (addr, 0); 12001 if (GET_MODE_SIZE (mode) != 16 12002 || !CONST_INT_P (XEXP (addr, 1)) 12003 || INTVAL (XEXP (addr, 1)) != -16) 12004 { 12005 fail_msg = "bad Altivec AND #1"; 12006 extra_cost = -1; 12007 } 12008 12009 if (rclass != ALTIVEC_REGS) 12010 { 12011 if (legitimate_indirect_address_p (and_arg, false)) 12012 extra_cost = 1; 12013 12014 else if (legitimate_indexed_address_p (and_arg, false)) 12015 extra_cost = 2; 12016 12017 else 12018 { 12019 fail_msg = "bad Altivec AND #2"; 12020 extra_cost = -1; 12021 } 12022 12023 type = "and"; 12024 } 12025 break; 12026 12027 /* If this is an indirect address, make sure it is a base register. */ 12028 case REG: 12029 case SUBREG: 12030 if (!legitimate_indirect_address_p (addr, false)) 12031 { 12032 extra_cost = 1; 12033 type = "move"; 12034 } 12035 break; 12036 12037 /* If this is an indexed address, make sure the register class can handle 12038 indexed addresses for this mode. */ 12039 case PLUS: 12040 plus_arg0 = XEXP (addr, 0); 12041 plus_arg1 = XEXP (addr, 1); 12042 12043 /* (plus (plus (reg) (constant)) (constant)) is generated during 12044 push_reload processing, so handle it now. */ 12045 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) 12046 { 12047 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12048 { 12049 extra_cost = 1; 12050 type = "offset"; 12051 } 12052 } 12053 12054 /* (plus (plus (reg) (constant)) (reg)) is also generated during 12055 push_reload processing, so handle it now. */ 12056 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) 12057 { 12058 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 12059 { 12060 extra_cost = 1; 12061 type = "indexed #2"; 12062 } 12063 } 12064 12065 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) 12066 { 12067 fail_msg = "no base register #2"; 12068 extra_cost = -1; 12069 } 12070 12071 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) 12072 { 12073 if ((addr_mask & RELOAD_REG_INDEXED) == 0 12074 || !legitimate_indexed_address_p (addr, false)) 12075 { 12076 extra_cost = 1; 12077 type = "indexed"; 12078 } 12079 } 12080 12081 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0 12082 && CONST_INT_P (plus_arg1)) 12083 { 12084 if (!quad_address_offset_p (INTVAL (plus_arg1))) 12085 { 12086 extra_cost = 1; 12087 type = "vector d-form offset"; 12088 } 12089 } 12090 12091 /* Make sure the register class can handle offset addresses. */ 12092 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) 12093 { 12094 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12095 { 12096 extra_cost = 1; 12097 type = "offset #2"; 12098 } 12099 } 12100 12101 else 12102 { 12103 fail_msg = "bad PLUS"; 12104 extra_cost = -1; 12105 } 12106 12107 break; 12108 12109 case LO_SUM: 12110 /* Quad offsets are restricted and can't handle normal addresses. */ 12111 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 12112 { 12113 extra_cost = -1; 12114 type = "vector d-form lo_sum"; 12115 } 12116 12117 else if (!legitimate_lo_sum_address_p (mode, addr, false)) 12118 { 12119 fail_msg = "bad LO_SUM"; 12120 extra_cost = -1; 12121 } 12122 12123 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12124 { 12125 extra_cost = 1; 12126 type = "lo_sum"; 12127 } 12128 break; 12129 12130 /* Static addresses need to create a TOC entry. */ 12131 case CONST: 12132 case SYMBOL_REF: 12133 case LABEL_REF: 12134 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 12135 { 12136 extra_cost = -1; 12137 type = "vector d-form lo_sum #2"; 12138 } 12139 12140 else 12141 { 12142 type = "address"; 12143 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); 12144 } 12145 break; 12146 12147 /* TOC references look like offsetable memory. */ 12148 case UNSPEC: 12149 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) 12150 { 12151 fail_msg = "bad UNSPEC"; 12152 extra_cost = -1; 12153 } 12154 12155 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 12156 { 12157 extra_cost = -1; 12158 type = "vector d-form lo_sum #3"; 12159 } 12160 12161 else if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12162 { 12163 extra_cost = 1; 12164 type = "toc reference"; 12165 } 12166 break; 12167 12168 default: 12169 { 12170 fail_msg = "bad address"; 12171 extra_cost = -1; 12172 } 12173 } 12174 12175 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) 12176 { 12177 if (extra_cost < 0) 12178 fprintf (stderr, 12179 "rs6000_secondary_reload_memory error: mode = %s, " 12180 "class = %s, addr_mask = '%s', %s\n", 12181 GET_MODE_NAME (mode), 12182 reg_class_names[rclass], 12183 rs6000_debug_addr_mask (addr_mask, false), 12184 (fail_msg != NULL) ? fail_msg : "<bad address>"); 12185 12186 else 12187 fprintf (stderr, 12188 "rs6000_secondary_reload_memory: mode = %s, class = %s, " 12189 "addr_mask = '%s', extra cost = %d, %s\n", 12190 GET_MODE_NAME (mode), 12191 reg_class_names[rclass], 12192 rs6000_debug_addr_mask (addr_mask, false), 12193 extra_cost, 12194 (type) ? type : "<none>"); 12195 12196 debug_rtx (addr); 12197 } 12198 12199 return extra_cost; 12200} 12201 12202/* Helper function for rs6000_secondary_reload to return true if a move to a 12203 different register classe is really a simple move. */ 12204 12205static bool 12206rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, 12207 enum rs6000_reg_type from_type, 12208 machine_mode mode) 12209{ 12210 int size = GET_MODE_SIZE (mode); 12211 12212 /* Add support for various direct moves available. In this function, we only 12213 look at cases where we don't need any extra registers, and one or more 12214 simple move insns are issued. Originally small integers are not allowed 12215 in FPR/VSX registers. Single precision binary floating is not a simple 12216 move because we need to convert to the single precision memory layout. 12217 The 4-byte SDmode can be moved. TDmode values are disallowed since they 12218 need special direct move handling, which we do not support yet. */ 12219 if (TARGET_DIRECT_MOVE 12220 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 12221 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) 12222 { 12223 if (TARGET_POWERPC64) 12224 { 12225 /* ISA 2.07: MTVSRD or MVFVSRD. */ 12226 if (size == 8) 12227 return true; 12228 12229 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */ 12230 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode) 12231 return true; 12232 } 12233 12234 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ 12235 if (TARGET_P8_VECTOR) 12236 { 12237 if (mode == SImode) 12238 return true; 12239 12240 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) 12241 return true; 12242 } 12243 12244 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ 12245 if (mode == SDmode) 12246 return true; 12247 } 12248 12249 /* Move to/from SPR. */ 12250 else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) 12251 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) 12252 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) 12253 return true; 12254 12255 return false; 12256} 12257 12258/* Direct move helper function for rs6000_secondary_reload, handle all of the 12259 special direct moves that involve allocating an extra register, return the 12260 insn code of the helper function if there is such a function or 12261 CODE_FOR_nothing if not. */ 12262 12263static bool 12264rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, 12265 enum rs6000_reg_type from_type, 12266 machine_mode mode, 12267 secondary_reload_info *sri, 12268 bool altivec_p) 12269{ 12270 bool ret = false; 12271 enum insn_code icode = CODE_FOR_nothing; 12272 int cost = 0; 12273 int size = GET_MODE_SIZE (mode); 12274 12275 if (TARGET_POWERPC64 && size == 16) 12276 { 12277 /* Handle moving 128-bit values from GPRs to VSX point registers on 12278 ISA 2.07 (power8, power9) when running in 64-bit mode using 12279 XXPERMDI to glue the two 64-bit values back together. */ 12280 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) 12281 { 12282 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ 12283 icode = reg_addr[mode].reload_vsx_gpr; 12284 } 12285 12286 /* Handle moving 128-bit values from VSX point registers to GPRs on 12287 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the 12288 bottom 64-bit value. */ 12289 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 12290 { 12291 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ 12292 icode = reg_addr[mode].reload_gpr_vsx; 12293 } 12294 } 12295 12296 else if (TARGET_POWERPC64 && mode == SFmode) 12297 { 12298 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 12299 { 12300 cost = 3; /* xscvdpspn, mfvsrd, and. */ 12301 icode = reg_addr[mode].reload_gpr_vsx; 12302 } 12303 12304 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) 12305 { 12306 cost = 2; /* mtvsrz, xscvspdpn. */ 12307 icode = reg_addr[mode].reload_vsx_gpr; 12308 } 12309 } 12310 12311 else if (!TARGET_POWERPC64 && size == 8) 12312 { 12313 /* Handle moving 64-bit values from GPRs to floating point registers on 12314 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two 12315 32-bit values back together. Altivec register classes must be handled 12316 specially since a different instruction is used, and the secondary 12317 reload support requires a single instruction class in the scratch 12318 register constraint. However, right now TFmode is not allowed in 12319 Altivec registers, so the pattern will never match. */ 12320 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) 12321 { 12322 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ 12323 icode = reg_addr[mode].reload_fpr_gpr; 12324 } 12325 } 12326 12327 if (icode != CODE_FOR_nothing) 12328 { 12329 ret = true; 12330 if (sri) 12331 { 12332 sri->icode = icode; 12333 sri->extra_cost = cost; 12334 } 12335 } 12336 12337 return ret; 12338} 12339 12340/* Return whether a move between two register classes can be done either 12341 directly (simple move) or via a pattern that uses a single extra temporary 12342 (using ISA 2.07's direct move in this case. */ 12343 12344static bool 12345rs6000_secondary_reload_move (enum rs6000_reg_type to_type, 12346 enum rs6000_reg_type from_type, 12347 machine_mode mode, 12348 secondary_reload_info *sri, 12349 bool altivec_p) 12350{ 12351 /* Fall back to load/store reloads if either type is not a register. */ 12352 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) 12353 return false; 12354 12355 /* If we haven't allocated registers yet, assume the move can be done for the 12356 standard register types. */ 12357 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) 12358 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) 12359 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) 12360 return true; 12361 12362 /* Moves to the same set of registers is a simple move for non-specialized 12363 registers. */ 12364 if (to_type == from_type && IS_STD_REG_TYPE (to_type)) 12365 return true; 12366 12367 /* Check whether a simple move can be done directly. */ 12368 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) 12369 { 12370 if (sri) 12371 { 12372 sri->icode = CODE_FOR_nothing; 12373 sri->extra_cost = 0; 12374 } 12375 return true; 12376 } 12377 12378 /* Now check if we can do it in a few steps. */ 12379 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, 12380 altivec_p); 12381} 12382 12383/* Inform reload about cases where moving X with a mode MODE to a register in 12384 RCLASS requires an extra scratch or immediate register. Return the class 12385 needed for the immediate register. 12386 12387 For VSX and Altivec, we may need a register to convert sp+offset into 12388 reg+sp. 12389 12390 For misaligned 64-bit gpr loads and stores we need a register to 12391 convert an offset address to indirect. */ 12392 12393static reg_class_t 12394rs6000_secondary_reload (bool in_p, 12395 rtx x, 12396 reg_class_t rclass_i, 12397 machine_mode mode, 12398 secondary_reload_info *sri) 12399{ 12400 enum reg_class rclass = (enum reg_class) rclass_i; 12401 reg_class_t ret = ALL_REGS; 12402 enum insn_code icode; 12403 bool default_p = false; 12404 bool done_p = false; 12405 12406 /* Allow subreg of memory before/during reload. */ 12407 bool memory_p = (MEM_P (x) 12408 || (!reload_completed && SUBREG_P (x) 12409 && MEM_P (SUBREG_REG (x)))); 12410 12411 sri->icode = CODE_FOR_nothing; 12412 sri->t_icode = CODE_FOR_nothing; 12413 sri->extra_cost = 0; 12414 icode = ((in_p) 12415 ? reg_addr[mode].reload_load 12416 : reg_addr[mode].reload_store); 12417 12418 if (REG_P (x) || register_operand (x, mode)) 12419 { 12420 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; 12421 bool altivec_p = (rclass == ALTIVEC_REGS); 12422 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); 12423 12424 if (!in_p) 12425 std::swap (to_type, from_type); 12426 12427 /* Can we do a direct move of some sort? */ 12428 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, 12429 altivec_p)) 12430 { 12431 icode = (enum insn_code)sri->icode; 12432 default_p = false; 12433 done_p = true; 12434 ret = NO_REGS; 12435 } 12436 } 12437 12438 /* Make sure 0.0 is not reloaded or forced into memory. */ 12439 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) 12440 { 12441 ret = NO_REGS; 12442 default_p = false; 12443 done_p = true; 12444 } 12445 12446 /* If this is a scalar floating point value and we want to load it into the 12447 traditional Altivec registers, do it via a move via a traditional floating 12448 point register, unless we have D-form addressing. Also make sure that 12449 non-zero constants use a FPR. */ 12450 if (!done_p && reg_addr[mode].scalar_in_vmx_p 12451 && !mode_supports_vmx_dform (mode) 12452 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) 12453 && (memory_p || CONST_DOUBLE_P (x))) 12454 { 12455 ret = FLOAT_REGS; 12456 default_p = false; 12457 done_p = true; 12458 } 12459 12460 /* Handle reload of load/stores if we have reload helper functions. */ 12461 if (!done_p && icode != CODE_FOR_nothing && memory_p) 12462 { 12463 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, 12464 mode); 12465 12466 if (extra_cost >= 0) 12467 { 12468 done_p = true; 12469 ret = NO_REGS; 12470 if (extra_cost > 0) 12471 { 12472 sri->extra_cost = extra_cost; 12473 sri->icode = icode; 12474 } 12475 } 12476 } 12477 12478 /* Handle unaligned loads and stores of integer registers. */ 12479 if (!done_p && TARGET_POWERPC64 12480 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE 12481 && memory_p 12482 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) 12483 { 12484 rtx addr = XEXP (x, 0); 12485 rtx off = address_offset (addr); 12486 12487 if (off != NULL_RTX) 12488 { 12489 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; 12490 unsigned HOST_WIDE_INT offset = INTVAL (off); 12491 12492 /* We need a secondary reload when our legitimate_address_p 12493 says the address is good (as otherwise the entire address 12494 will be reloaded), and the offset is not a multiple of 12495 four or we have an address wrap. Address wrap will only 12496 occur for LO_SUMs since legitimate_offset_address_p 12497 rejects addresses for 16-byte mems that will wrap. */ 12498 if (GET_CODE (addr) == LO_SUM 12499 ? (1 /* legitimate_address_p allows any offset for lo_sum */ 12500 && ((offset & 3) != 0 12501 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) 12502 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ 12503 && (offset & 3) != 0)) 12504 { 12505 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ 12506 if (in_p) 12507 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load 12508 : CODE_FOR_reload_di_load); 12509 else 12510 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store 12511 : CODE_FOR_reload_di_store); 12512 sri->extra_cost = 2; 12513 ret = NO_REGS; 12514 done_p = true; 12515 } 12516 else 12517 default_p = true; 12518 } 12519 else 12520 default_p = true; 12521 } 12522 12523 if (!done_p && !TARGET_POWERPC64 12524 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE 12525 && memory_p 12526 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 12527 { 12528 rtx addr = XEXP (x, 0); 12529 rtx off = address_offset (addr); 12530 12531 if (off != NULL_RTX) 12532 { 12533 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; 12534 unsigned HOST_WIDE_INT offset = INTVAL (off); 12535 12536 /* We need a secondary reload when our legitimate_address_p 12537 says the address is good (as otherwise the entire address 12538 will be reloaded), and we have a wrap. 12539 12540 legitimate_lo_sum_address_p allows LO_SUM addresses to 12541 have any offset so test for wrap in the low 16 bits. 12542 12543 legitimate_offset_address_p checks for the range 12544 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] 12545 for mode size of 16. We wrap at [0x7ffc,0x7fff] and 12546 [0x7ff4,0x7fff] respectively, so test for the 12547 intersection of these ranges, [0x7ffc,0x7fff] and 12548 [0x7ff4,0x7ff7] respectively. 12549 12550 Note that the address we see here may have been 12551 manipulated by legitimize_reload_address. */ 12552 if (GET_CODE (addr) == LO_SUM 12553 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra 12554 : offset - (0x8000 - extra) < UNITS_PER_WORD) 12555 { 12556 if (in_p) 12557 sri->icode = CODE_FOR_reload_si_load; 12558 else 12559 sri->icode = CODE_FOR_reload_si_store; 12560 sri->extra_cost = 2; 12561 ret = NO_REGS; 12562 done_p = true; 12563 } 12564 else 12565 default_p = true; 12566 } 12567 else 12568 default_p = true; 12569 } 12570 12571 if (!done_p) 12572 default_p = true; 12573 12574 if (default_p) 12575 ret = default_secondary_reload (in_p, x, rclass, mode, sri); 12576 12577 gcc_assert (ret != ALL_REGS); 12578 12579 if (TARGET_DEBUG_ADDR) 12580 { 12581 fprintf (stderr, 12582 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " 12583 "mode = %s", 12584 reg_class_names[ret], 12585 in_p ? "true" : "false", 12586 reg_class_names[rclass], 12587 GET_MODE_NAME (mode)); 12588 12589 if (reload_completed) 12590 fputs (", after reload", stderr); 12591 12592 if (!done_p) 12593 fputs (", done_p not set", stderr); 12594 12595 if (default_p) 12596 fputs (", default secondary reload", stderr); 12597 12598 if (sri->icode != CODE_FOR_nothing) 12599 fprintf (stderr, ", reload func = %s, extra cost = %d", 12600 insn_data[sri->icode].name, sri->extra_cost); 12601 12602 else if (sri->extra_cost > 0) 12603 fprintf (stderr, ", extra cost = %d", sri->extra_cost); 12604 12605 fputs ("\n", stderr); 12606 debug_rtx (x); 12607 } 12608 12609 return ret; 12610} 12611 12612/* Better tracing for rs6000_secondary_reload_inner. */ 12613 12614static void 12615rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, 12616 bool store_p) 12617{ 12618 rtx set, clobber; 12619 12620 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX); 12621 12622 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line, 12623 store_p ? "store" : "load"); 12624 12625 if (store_p) 12626 set = gen_rtx_SET (mem, reg); 12627 else 12628 set = gen_rtx_SET (reg, mem); 12629 12630 clobber = gen_rtx_CLOBBER (VOIDmode, scratch); 12631 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); 12632} 12633 12634static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) 12635 ATTRIBUTE_NORETURN; 12636 12637static void 12638rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, 12639 bool store_p) 12640{ 12641 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p); 12642 gcc_unreachable (); 12643} 12644 12645/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have 12646 reload helper functions. These were identified in 12647 rs6000_secondary_reload_memory, and if reload decided to use the secondary 12648 reload, it calls the insns: 12649 reload_<RELOAD:mode>_<P:mptrsize>_store 12650 reload_<RELOAD:mode>_<P:mptrsize>_load 12651 12652 which in turn calls this function, to do whatever is necessary to create 12653 valid addresses. */ 12654 12655void 12656rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) 12657{ 12658 int regno = true_regnum (reg); 12659 machine_mode mode = GET_MODE (reg); 12660 addr_mask_type addr_mask; 12661 rtx addr; 12662 rtx new_addr; 12663 rtx op_reg, op0, op1; 12664 rtx and_op; 12665 rtx cc_clobber; 12666 rtvec rv; 12667 12668 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem) 12669 || !base_reg_operand (scratch, GET_MODE (scratch))) 12670 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12671 12672 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) 12673 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; 12674 12675 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) 12676 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; 12677 12678 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) 12679 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; 12680 12681 else 12682 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12683 12684 /* Make sure the mode is valid in this register class. */ 12685 if ((addr_mask & RELOAD_REG_VALID) == 0) 12686 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12687 12688 if (TARGET_DEBUG_ADDR) 12689 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); 12690 12691 new_addr = addr = XEXP (mem, 0); 12692 switch (GET_CODE (addr)) 12693 { 12694 /* Does the register class support auto update forms for this mode? If 12695 not, do the update now. We don't need a scratch register, since the 12696 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ 12697 case PRE_INC: 12698 case PRE_DEC: 12699 op_reg = XEXP (addr, 0); 12700 if (!base_reg_operand (op_reg, Pmode)) 12701 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12702 12703 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) 12704 { 12705 int delta = GET_MODE_SIZE (mode); 12706 if (GET_CODE (addr) == PRE_DEC) 12707 delta = -delta; 12708 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta))); 12709 new_addr = op_reg; 12710 } 12711 break; 12712 12713 case PRE_MODIFY: 12714 op0 = XEXP (addr, 0); 12715 op1 = XEXP (addr, 1); 12716 if (!base_reg_operand (op0, Pmode) 12717 || GET_CODE (op1) != PLUS 12718 || !rtx_equal_p (op0, XEXP (op1, 0))) 12719 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12720 12721 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) 12722 { 12723 emit_insn (gen_rtx_SET (op0, op1)); 12724 new_addr = reg; 12725 } 12726 break; 12727 12728 /* Do we need to simulate AND -16 to clear the bottom address bits used 12729 in VMX load/stores? */ 12730 case AND: 12731 op0 = XEXP (addr, 0); 12732 op1 = XEXP (addr, 1); 12733 if ((addr_mask & RELOAD_REG_AND_M16) == 0) 12734 { 12735 if (REG_P (op0) || SUBREG_P (op0)) 12736 op_reg = op0; 12737 12738 else if (GET_CODE (op1) == PLUS) 12739 { 12740 emit_insn (gen_rtx_SET (scratch, op1)); 12741 op_reg = scratch; 12742 } 12743 12744 else 12745 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12746 12747 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); 12748 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); 12749 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber); 12750 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); 12751 new_addr = scratch; 12752 } 12753 break; 12754 12755 /* If this is an indirect address, make sure it is a base register. */ 12756 case REG: 12757 case SUBREG: 12758 if (!base_reg_operand (addr, GET_MODE (addr))) 12759 { 12760 emit_insn (gen_rtx_SET (scratch, addr)); 12761 new_addr = scratch; 12762 } 12763 break; 12764 12765 /* If this is an indexed address, make sure the register class can handle 12766 indexed addresses for this mode. */ 12767 case PLUS: 12768 op0 = XEXP (addr, 0); 12769 op1 = XEXP (addr, 1); 12770 if (!base_reg_operand (op0, Pmode)) 12771 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12772 12773 else if (int_reg_operand (op1, Pmode)) 12774 { 12775 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 12776 { 12777 emit_insn (gen_rtx_SET (scratch, addr)); 12778 new_addr = scratch; 12779 } 12780 } 12781 12782 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1)) 12783 { 12784 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0) 12785 || !quad_address_p (addr, mode, false)) 12786 { 12787 emit_insn (gen_rtx_SET (scratch, addr)); 12788 new_addr = scratch; 12789 } 12790 } 12791 12792 /* Make sure the register class can handle offset addresses. */ 12793 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) 12794 { 12795 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12796 { 12797 emit_insn (gen_rtx_SET (scratch, addr)); 12798 new_addr = scratch; 12799 } 12800 } 12801 12802 else 12803 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12804 12805 break; 12806 12807 case LO_SUM: 12808 op0 = XEXP (addr, 0); 12809 op1 = XEXP (addr, 1); 12810 if (!base_reg_operand (op0, Pmode)) 12811 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12812 12813 else if (int_reg_operand (op1, Pmode)) 12814 { 12815 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 12816 { 12817 emit_insn (gen_rtx_SET (scratch, addr)); 12818 new_addr = scratch; 12819 } 12820 } 12821 12822 /* Quad offsets are restricted and can't handle normal addresses. */ 12823 else if (mode_supports_dq_form (mode)) 12824 { 12825 emit_insn (gen_rtx_SET (scratch, addr)); 12826 new_addr = scratch; 12827 } 12828 12829 /* Make sure the register class can handle offset addresses. */ 12830 else if (legitimate_lo_sum_address_p (mode, addr, false)) 12831 { 12832 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12833 { 12834 emit_insn (gen_rtx_SET (scratch, addr)); 12835 new_addr = scratch; 12836 } 12837 } 12838 12839 else 12840 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12841 12842 break; 12843 12844 case SYMBOL_REF: 12845 case CONST: 12846 case LABEL_REF: 12847 rs6000_emit_move (scratch, addr, Pmode); 12848 new_addr = scratch; 12849 break; 12850 12851 default: 12852 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12853 } 12854 12855 /* Adjust the address if it changed. */ 12856 if (addr != new_addr) 12857 { 12858 mem = replace_equiv_address_nv (mem, new_addr); 12859 if (TARGET_DEBUG_ADDR) 12860 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); 12861 } 12862 12863 /* Now create the move. */ 12864 if (store_p) 12865 emit_insn (gen_rtx_SET (mem, reg)); 12866 else 12867 emit_insn (gen_rtx_SET (reg, mem)); 12868 12869 return; 12870} 12871 12872/* Convert reloads involving 64-bit gprs and misaligned offset 12873 addressing, or multiple 32-bit gprs and offsets that are too large, 12874 to use indirect addressing. */ 12875 12876void 12877rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) 12878{ 12879 int regno = true_regnum (reg); 12880 enum reg_class rclass; 12881 rtx addr; 12882 rtx scratch_or_premodify = scratch; 12883 12884 if (TARGET_DEBUG_ADDR) 12885 { 12886 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n", 12887 store_p ? "store" : "load"); 12888 fprintf (stderr, "reg:\n"); 12889 debug_rtx (reg); 12890 fprintf (stderr, "mem:\n"); 12891 debug_rtx (mem); 12892 fprintf (stderr, "scratch:\n"); 12893 debug_rtx (scratch); 12894 } 12895 12896 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno)); 12897 gcc_assert (MEM_P (mem)); 12898 rclass = REGNO_REG_CLASS (regno); 12899 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS); 12900 addr = XEXP (mem, 0); 12901 12902 if (GET_CODE (addr) == PRE_MODIFY) 12903 { 12904 gcc_assert (REG_P (XEXP (addr, 0)) 12905 && GET_CODE (XEXP (addr, 1)) == PLUS 12906 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); 12907 scratch_or_premodify = XEXP (addr, 0); 12908 addr = XEXP (addr, 1); 12909 } 12910 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); 12911 12912 rs6000_emit_move (scratch_or_premodify, addr, Pmode); 12913 12914 mem = replace_equiv_address_nv (mem, scratch_or_premodify); 12915 12916 /* Now create the move. */ 12917 if (store_p) 12918 emit_insn (gen_rtx_SET (mem, reg)); 12919 else 12920 emit_insn (gen_rtx_SET (reg, mem)); 12921 12922 return; 12923} 12924 12925/* Given an rtx X being reloaded into a reg required to be 12926 in class CLASS, return the class of reg to actually use. 12927 In general this is just CLASS; but on some machines 12928 in some cases it is preferable to use a more restrictive class. 12929 12930 On the RS/6000, we have to return NO_REGS when we want to reload a 12931 floating-point CONST_DOUBLE to force it to be copied to memory. 12932 12933 We also don't want to reload integer values into floating-point 12934 registers if we can at all help it. In fact, this can 12935 cause reload to die, if it tries to generate a reload of CTR 12936 into a FP register and discovers it doesn't have the memory location 12937 required. 12938 12939 ??? Would it be a good idea to have reload do the converse, that is 12940 try to reload floating modes into FP registers if possible? 12941 */ 12942 12943static enum reg_class 12944rs6000_preferred_reload_class (rtx x, enum reg_class rclass) 12945{ 12946 machine_mode mode = GET_MODE (x); 12947 bool is_constant = CONSTANT_P (x); 12948 12949 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred 12950 reload class for it. */ 12951 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) 12952 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) 12953 return NO_REGS; 12954 12955 if ((rclass == FLOAT_REGS || rclass == VSX_REGS) 12956 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0) 12957 return NO_REGS; 12958 12959 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow 12960 the reloading of address expressions using PLUS into floating point 12961 registers. */ 12962 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS) 12963 { 12964 if (is_constant) 12965 { 12966 /* Zero is always allowed in all VSX registers. */ 12967 if (x == CONST0_RTX (mode)) 12968 return rclass; 12969 12970 /* If this is a vector constant that can be formed with a few Altivec 12971 instructions, we want altivec registers. */ 12972 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) 12973 return ALTIVEC_REGS; 12974 12975 /* If this is an integer constant that can easily be loaded into 12976 vector registers, allow it. */ 12977 if (CONST_INT_P (x)) 12978 { 12979 HOST_WIDE_INT value = INTVAL (x); 12980 12981 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA 12982 2.06 can generate it in the Altivec registers with 12983 VSPLTI<x>. */ 12984 if (value == -1) 12985 { 12986 if (TARGET_P8_VECTOR) 12987 return rclass; 12988 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) 12989 return ALTIVEC_REGS; 12990 else 12991 return NO_REGS; 12992 } 12993 12994 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and 12995 a sign extend in the Altivec registers. */ 12996 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR 12997 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) 12998 return ALTIVEC_REGS; 12999 } 13000 13001 /* Force constant to memory. */ 13002 return NO_REGS; 13003 } 13004 13005 /* D-form addressing can easily reload the value. */ 13006 if (mode_supports_vmx_dform (mode) 13007 || mode_supports_dq_form (mode)) 13008 return rclass; 13009 13010 /* If this is a scalar floating point value and we don't have D-form 13011 addressing, prefer the traditional floating point registers so that we 13012 can use D-form (register+offset) addressing. */ 13013 if (rclass == VSX_REGS 13014 && (mode == SFmode || GET_MODE_SIZE (mode) == 8)) 13015 return FLOAT_REGS; 13016 13017 /* Prefer the Altivec registers if Altivec is handling the vector 13018 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec 13019 loads. */ 13020 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) 13021 || mode == V1TImode) 13022 return ALTIVEC_REGS; 13023 13024 return rclass; 13025 } 13026 13027 if (is_constant || GET_CODE (x) == PLUS) 13028 { 13029 if (reg_class_subset_p (GENERAL_REGS, rclass)) 13030 return GENERAL_REGS; 13031 if (reg_class_subset_p (BASE_REGS, rclass)) 13032 return BASE_REGS; 13033 return NO_REGS; 13034 } 13035 13036 /* For the vector pair and vector quad modes, prefer their natural register 13037 (VSX or FPR) rather than GPR registers. For other integer types, prefer 13038 the GPR registers. */ 13039 if (rclass == GEN_OR_FLOAT_REGS) 13040 { 13041 if (mode == OOmode) 13042 return VSX_REGS; 13043 13044 if (mode == XOmode) 13045 return FLOAT_REGS; 13046 13047 if (GET_MODE_CLASS (mode) == MODE_INT) 13048 return GENERAL_REGS; 13049 } 13050 13051 return rclass; 13052} 13053 13054/* Debug version of rs6000_preferred_reload_class. */ 13055static enum reg_class 13056rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) 13057{ 13058 enum reg_class ret = rs6000_preferred_reload_class (x, rclass); 13059 13060 fprintf (stderr, 13061 "\nrs6000_preferred_reload_class, return %s, rclass = %s, " 13062 "mode = %s, x:\n", 13063 reg_class_names[ret], reg_class_names[rclass], 13064 GET_MODE_NAME (GET_MODE (x))); 13065 debug_rtx (x); 13066 13067 return ret; 13068} 13069 13070/* If we are copying between FP or AltiVec registers and anything else, we need 13071 a memory location. The exception is when we are targeting ppc64 and the 13072 move to/from fpr to gpr instructions are available. Also, under VSX, you 13073 can copy vector registers from the FP register set to the Altivec register 13074 set and vice versa. */ 13075 13076static bool 13077rs6000_secondary_memory_needed (machine_mode mode, 13078 reg_class_t from_class, 13079 reg_class_t to_class) 13080{ 13081 enum rs6000_reg_type from_type, to_type; 13082 bool altivec_p = ((from_class == ALTIVEC_REGS) 13083 || (to_class == ALTIVEC_REGS)); 13084 13085 /* If a simple/direct move is available, we don't need secondary memory */ 13086 from_type = reg_class_to_reg_type[(int)from_class]; 13087 to_type = reg_class_to_reg_type[(int)to_class]; 13088 13089 if (rs6000_secondary_reload_move (to_type, from_type, mode, 13090 (secondary_reload_info *)0, altivec_p)) 13091 return false; 13092 13093 /* If we have a floating point or vector register class, we need to use 13094 memory to transfer the data. */ 13095 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) 13096 return true; 13097 13098 return false; 13099} 13100 13101/* Debug version of rs6000_secondary_memory_needed. */ 13102static bool 13103rs6000_debug_secondary_memory_needed (machine_mode mode, 13104 reg_class_t from_class, 13105 reg_class_t to_class) 13106{ 13107 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class); 13108 13109 fprintf (stderr, 13110 "rs6000_secondary_memory_needed, return: %s, from_class = %s, " 13111 "to_class = %s, mode = %s\n", 13112 ret ? "true" : "false", 13113 reg_class_names[from_class], 13114 reg_class_names[to_class], 13115 GET_MODE_NAME (mode)); 13116 13117 return ret; 13118} 13119 13120/* Return the register class of a scratch register needed to copy IN into 13121 or out of a register in RCLASS in MODE. If it can be done directly, 13122 NO_REGS is returned. */ 13123 13124static enum reg_class 13125rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, 13126 rtx in) 13127{ 13128 int regno; 13129 13130 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN 13131#if TARGET_MACHO 13132 && MACHOPIC_INDIRECT 13133#endif 13134 )) 13135 { 13136 /* We cannot copy a symbolic operand directly into anything 13137 other than BASE_REGS for TARGET_ELF. So indicate that a 13138 register from BASE_REGS is needed as an intermediate 13139 register. 13140 13141 On Darwin, pic addresses require a load from memory, which 13142 needs a base register. */ 13143 if (rclass != BASE_REGS 13144 && (SYMBOL_REF_P (in) 13145 || GET_CODE (in) == HIGH 13146 || GET_CODE (in) == LABEL_REF 13147 || GET_CODE (in) == CONST)) 13148 return BASE_REGS; 13149 } 13150 13151 if (REG_P (in)) 13152 { 13153 regno = REGNO (in); 13154 if (!HARD_REGISTER_NUM_P (regno)) 13155 { 13156 regno = true_regnum (in); 13157 if (!HARD_REGISTER_NUM_P (regno)) 13158 regno = -1; 13159 } 13160 } 13161 else if (SUBREG_P (in)) 13162 { 13163 regno = true_regnum (in); 13164 if (!HARD_REGISTER_NUM_P (regno)) 13165 regno = -1; 13166 } 13167 else 13168 regno = -1; 13169 13170 /* If we have VSX register moves, prefer moving scalar values between 13171 Altivec registers and GPR by going via an FPR (and then via memory) 13172 instead of reloading the secondary memory address for Altivec moves. */ 13173 if (TARGET_VSX 13174 && GET_MODE_SIZE (mode) < 16 13175 && !mode_supports_vmx_dform (mode) 13176 && (((rclass == GENERAL_REGS || rclass == BASE_REGS) 13177 && (regno >= 0 && ALTIVEC_REGNO_P (regno))) 13178 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) 13179 && (regno >= 0 && INT_REGNO_P (regno))))) 13180 return FLOAT_REGS; 13181 13182 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS 13183 into anything. */ 13184 if (rclass == GENERAL_REGS || rclass == BASE_REGS 13185 || (regno >= 0 && INT_REGNO_P (regno))) 13186 return NO_REGS; 13187 13188 /* Constants, memory, and VSX registers can go into VSX registers (both the 13189 traditional floating point and the altivec registers). */ 13190 if (rclass == VSX_REGS 13191 && (regno == -1 || VSX_REGNO_P (regno))) 13192 return NO_REGS; 13193 13194 /* Constants, memory, and FP registers can go into FP registers. */ 13195 if ((regno == -1 || FP_REGNO_P (regno)) 13196 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS)) 13197 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; 13198 13199 /* Memory, and AltiVec registers can go into AltiVec registers. */ 13200 if ((regno == -1 || ALTIVEC_REGNO_P (regno)) 13201 && rclass == ALTIVEC_REGS) 13202 return NO_REGS; 13203 13204 /* We can copy among the CR registers. */ 13205 if ((rclass == CR_REGS || rclass == CR0_REGS) 13206 && regno >= 0 && CR_REGNO_P (regno)) 13207 return NO_REGS; 13208 13209 /* Otherwise, we need GENERAL_REGS. */ 13210 return GENERAL_REGS; 13211} 13212 13213/* Debug version of rs6000_secondary_reload_class. */ 13214static enum reg_class 13215rs6000_debug_secondary_reload_class (enum reg_class rclass, 13216 machine_mode mode, rtx in) 13217{ 13218 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in); 13219 fprintf (stderr, 13220 "\nrs6000_secondary_reload_class, return %s, rclass = %s, " 13221 "mode = %s, input rtx:\n", 13222 reg_class_names[ret], reg_class_names[rclass], 13223 GET_MODE_NAME (mode)); 13224 debug_rtx (in); 13225 13226 return ret; 13227} 13228 13229/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 13230 13231static bool 13232rs6000_can_change_mode_class (machine_mode from, 13233 machine_mode to, 13234 reg_class_t rclass) 13235{ 13236 unsigned from_size = GET_MODE_SIZE (from); 13237 unsigned to_size = GET_MODE_SIZE (to); 13238 13239 if (from_size != to_size) 13240 { 13241 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; 13242 13243 if (reg_classes_intersect_p (xclass, rclass)) 13244 { 13245 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to); 13246 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from); 13247 bool to_float128_vector_p = FLOAT128_VECTOR_P (to); 13248 bool from_float128_vector_p = FLOAT128_VECTOR_P (from); 13249 13250 /* Don't allow 64-bit types to overlap with 128-bit types that take a 13251 single register under VSX because the scalar part of the register 13252 is in the upper 64-bits, and not the lower 64-bits. Types like 13253 TFmode/TDmode that take 2 scalar register can overlap. 128-bit 13254 IEEE floating point can't overlap, and neither can small 13255 values. */ 13256 13257 if (to_float128_vector_p && from_float128_vector_p) 13258 return true; 13259 13260 else if (to_float128_vector_p || from_float128_vector_p) 13261 return false; 13262 13263 /* TDmode in floating-mode registers must always go into a register 13264 pair with the most significant word in the even-numbered register 13265 to match ISA requirements. In little-endian mode, this does not 13266 match subreg numbering, so we cannot allow subregs. */ 13267 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode)) 13268 return false; 13269 13270 /* Allow SD<->DD changes, since SDmode values are stored in 13271 the low half of the DDmode, just like target-independent 13272 code expects. We need to allow at least SD->DD since 13273 rs6000_secondary_memory_needed_mode asks for that change 13274 to be made for SD reloads. */ 13275 if ((to == DDmode && from == SDmode) 13276 || (to == SDmode && from == DDmode)) 13277 return true; 13278 13279 if (from_size < 8 || to_size < 8) 13280 return false; 13281 13282 if (from_size == 8 && (8 * to_nregs) != to_size) 13283 return false; 13284 13285 if (to_size == 8 && (8 * from_nregs) != from_size) 13286 return false; 13287 13288 return true; 13289 } 13290 else 13291 return true; 13292 } 13293 13294 /* Since the VSX register set includes traditional floating point registers 13295 and altivec registers, just check for the size being different instead of 13296 trying to check whether the modes are vector modes. Otherwise it won't 13297 allow say DF and DI to change classes. For types like TFmode and TDmode 13298 that take 2 64-bit registers, rather than a single 128-bit register, don't 13299 allow subregs of those types to other 128 bit types. */ 13300 if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) 13301 { 13302 unsigned num_regs = (from_size + 15) / 16; 13303 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs 13304 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs) 13305 return false; 13306 13307 return (from_size == 8 || from_size == 16); 13308 } 13309 13310 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS 13311 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1) 13312 return false; 13313 13314 return true; 13315} 13316 13317/* Debug version of rs6000_can_change_mode_class. */ 13318static bool 13319rs6000_debug_can_change_mode_class (machine_mode from, 13320 machine_mode to, 13321 reg_class_t rclass) 13322{ 13323 bool ret = rs6000_can_change_mode_class (from, to, rclass); 13324 13325 fprintf (stderr, 13326 "rs6000_can_change_mode_class, return %s, from = %s, " 13327 "to = %s, rclass = %s\n", 13328 ret ? "true" : "false", 13329 GET_MODE_NAME (from), GET_MODE_NAME (to), 13330 reg_class_names[rclass]); 13331 13332 return ret; 13333} 13334 13335/* Return a string to do a move operation of 128 bits of data. */ 13336 13337const char * 13338rs6000_output_move_128bit (rtx operands[]) 13339{ 13340 rtx dest = operands[0]; 13341 rtx src = operands[1]; 13342 machine_mode mode = GET_MODE (dest); 13343 int dest_regno; 13344 int src_regno; 13345 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; 13346 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; 13347 13348 if (REG_P (dest)) 13349 { 13350 dest_regno = REGNO (dest); 13351 dest_gpr_p = INT_REGNO_P (dest_regno); 13352 dest_fp_p = FP_REGNO_P (dest_regno); 13353 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); 13354 dest_vsx_p = dest_fp_p | dest_vmx_p; 13355 } 13356 else 13357 { 13358 dest_regno = -1; 13359 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; 13360 } 13361 13362 if (REG_P (src)) 13363 { 13364 src_regno = REGNO (src); 13365 src_gpr_p = INT_REGNO_P (src_regno); 13366 src_fp_p = FP_REGNO_P (src_regno); 13367 src_vmx_p = ALTIVEC_REGNO_P (src_regno); 13368 src_vsx_p = src_fp_p | src_vmx_p; 13369 } 13370 else 13371 { 13372 src_regno = -1; 13373 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; 13374 } 13375 13376 /* Register moves. */ 13377 if (dest_regno >= 0 && src_regno >= 0) 13378 { 13379 if (dest_gpr_p) 13380 { 13381 if (src_gpr_p) 13382 return "#"; 13383 13384 if (TARGET_DIRECT_MOVE_128 && src_vsx_p) 13385 return (WORDS_BIG_ENDIAN 13386 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" 13387 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); 13388 13389 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) 13390 return "#"; 13391 } 13392 13393 else if (TARGET_VSX && dest_vsx_p) 13394 { 13395 if (src_vsx_p) 13396 return "xxlor %x0,%x1,%x1"; 13397 13398 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) 13399 return (WORDS_BIG_ENDIAN 13400 ? "mtvsrdd %x0,%1,%L1" 13401 : "mtvsrdd %x0,%L1,%1"); 13402 13403 else if (TARGET_DIRECT_MOVE && src_gpr_p) 13404 return "#"; 13405 } 13406 13407 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) 13408 return "vor %0,%1,%1"; 13409 13410 else if (dest_fp_p && src_fp_p) 13411 return "#"; 13412 } 13413 13414 /* Loads. */ 13415 else if (dest_regno >= 0 && MEM_P (src)) 13416 { 13417 if (dest_gpr_p) 13418 { 13419 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) 13420 return "lq %0,%1"; 13421 else 13422 return "#"; 13423 } 13424 13425 else if (TARGET_ALTIVEC && dest_vmx_p 13426 && altivec_indexed_or_indirect_operand (src, mode)) 13427 return "lvx %0,%y1"; 13428 13429 else if (TARGET_VSX && dest_vsx_p) 13430 { 13431 if (mode_supports_dq_form (mode) 13432 && quad_address_p (XEXP (src, 0), mode, true)) 13433 return "lxv %x0,%1"; 13434 13435 else if (TARGET_P9_VECTOR) 13436 return "lxvx %x0,%y1"; 13437 13438 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) 13439 return "lxvw4x %x0,%y1"; 13440 13441 else 13442 return "lxvd2x %x0,%y1"; 13443 } 13444 13445 else if (TARGET_ALTIVEC && dest_vmx_p) 13446 return "lvx %0,%y1"; 13447 13448 else if (dest_fp_p) 13449 return "#"; 13450 } 13451 13452 /* Stores. */ 13453 else if (src_regno >= 0 && MEM_P (dest)) 13454 { 13455 if (src_gpr_p) 13456 { 13457 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) 13458 return "stq %1,%0"; 13459 else 13460 return "#"; 13461 } 13462 13463 else if (TARGET_ALTIVEC && src_vmx_p 13464 && altivec_indexed_or_indirect_operand (dest, mode)) 13465 return "stvx %1,%y0"; 13466 13467 else if (TARGET_VSX && src_vsx_p) 13468 { 13469 if (mode_supports_dq_form (mode) 13470 && quad_address_p (XEXP (dest, 0), mode, true)) 13471 return "stxv %x1,%0"; 13472 13473 else if (TARGET_P9_VECTOR) 13474 return "stxvx %x1,%y0"; 13475 13476 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) 13477 return "stxvw4x %x1,%y0"; 13478 13479 else 13480 return "stxvd2x %x1,%y0"; 13481 } 13482 13483 else if (TARGET_ALTIVEC && src_vmx_p) 13484 return "stvx %1,%y0"; 13485 13486 else if (src_fp_p) 13487 return "#"; 13488 } 13489 13490 /* Constants. */ 13491 else if (dest_regno >= 0 13492 && (CONST_INT_P (src) 13493 || CONST_WIDE_INT_P (src) 13494 || CONST_DOUBLE_P (src) 13495 || GET_CODE (src) == CONST_VECTOR)) 13496 { 13497 if (dest_gpr_p) 13498 return "#"; 13499 13500 else if ((dest_vmx_p && TARGET_ALTIVEC) 13501 || (dest_vsx_p && TARGET_VSX)) 13502 return output_vec_const_move (operands); 13503 } 13504 13505 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); 13506} 13507 13508/* Validate a 128-bit move. */ 13509bool 13510rs6000_move_128bit_ok_p (rtx operands[]) 13511{ 13512 machine_mode mode = GET_MODE (operands[0]); 13513 return (gpc_reg_operand (operands[0], mode) 13514 || gpc_reg_operand (operands[1], mode)); 13515} 13516 13517/* Return true if a 128-bit move needs to be split. */ 13518bool 13519rs6000_split_128bit_ok_p (rtx operands[]) 13520{ 13521 if (!reload_completed) 13522 return false; 13523 13524 if (!gpr_or_gpr_p (operands[0], operands[1])) 13525 return false; 13526 13527 if (quad_load_store_p (operands[0], operands[1])) 13528 return false; 13529 13530 return true; 13531} 13532 13533 13534/* Given a comparison operation, return the bit number in CCR to test. We 13535 know this is a valid comparison. 13536 13537 SCC_P is 1 if this is for an scc. That means that %D will have been 13538 used instead of %C, so the bits will be in different places. 13539 13540 Return -1 if OP isn't a valid comparison for some reason. */ 13541 13542int 13543ccr_bit (rtx op, int scc_p) 13544{ 13545 enum rtx_code code = GET_CODE (op); 13546 machine_mode cc_mode; 13547 int cc_regnum; 13548 int base_bit; 13549 rtx reg; 13550 13551 if (!COMPARISON_P (op)) 13552 return -1; 13553 13554 reg = XEXP (op, 0); 13555 13556 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg))) 13557 return -1; 13558 13559 cc_mode = GET_MODE (reg); 13560 cc_regnum = REGNO (reg); 13561 base_bit = 4 * (cc_regnum - CR0_REGNO); 13562 13563 validate_condition_mode (code, cc_mode); 13564 13565 /* When generating a sCOND operation, only positive conditions are 13566 allowed. */ 13567 if (scc_p) 13568 switch (code) 13569 { 13570 case EQ: 13571 case GT: 13572 case LT: 13573 case UNORDERED: 13574 case GTU: 13575 case LTU: 13576 break; 13577 default: 13578 return -1; 13579 } 13580 13581 switch (code) 13582 { 13583 case NE: 13584 return scc_p ? base_bit + 3 : base_bit + 2; 13585 case EQ: 13586 return base_bit + 2; 13587 case GT: case GTU: case UNLE: 13588 return base_bit + 1; 13589 case LT: case LTU: case UNGE: 13590 return base_bit; 13591 case ORDERED: case UNORDERED: 13592 return base_bit + 3; 13593 13594 case GE: case GEU: 13595 /* If scc, we will have done a cror to put the bit in the 13596 unordered position. So test that bit. For integer, this is ! LT 13597 unless this is an scc insn. */ 13598 return scc_p ? base_bit + 3 : base_bit; 13599 13600 case LE: case LEU: 13601 return scc_p ? base_bit + 3 : base_bit + 1; 13602 13603 default: 13604 return -1; 13605 } 13606} 13607 13608/* Return the GOT register. */ 13609 13610rtx 13611rs6000_got_register (rtx value ATTRIBUTE_UNUSED) 13612{ 13613 /* The second flow pass currently (June 1999) can't update 13614 regs_ever_live without disturbing other parts of the compiler, so 13615 update it here to make the prolog/epilogue code happy. */ 13616 if (!can_create_pseudo_p () 13617 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) 13618 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true); 13619 13620 crtl->uses_pic_offset_table = 1; 13621 13622 return pic_offset_table_rtx; 13623} 13624 13625#define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode) 13626 13627/* Write out a function code label. */ 13628 13629void 13630rs6000_output_function_entry (FILE *file, const char *fname) 13631{ 13632 if (fname[0] != '.') 13633 { 13634 switch (DEFAULT_ABI) 13635 { 13636 default: 13637 gcc_unreachable (); 13638 13639 case ABI_AIX: 13640 if (DOT_SYMBOLS) 13641 putc ('.', file); 13642 else 13643 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L."); 13644 break; 13645 13646 case ABI_ELFv2: 13647 case ABI_V4: 13648 case ABI_DARWIN: 13649 break; 13650 } 13651 } 13652 13653 RS6000_OUTPUT_BASENAME (file, fname); 13654} 13655 13656/* Print an operand. Recognize special options, documented below. */ 13657 13658#if TARGET_ELF 13659/* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is 13660 only introduced by the linker, when applying the sda21 13661 relocation. */ 13662#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel") 13663#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13) 13664#else 13665#define SMALL_DATA_RELOC "sda21" 13666#define SMALL_DATA_REG 0 13667#endif 13668 13669void 13670print_operand (FILE *file, rtx x, int code) 13671{ 13672 int i; 13673 unsigned HOST_WIDE_INT uval; 13674 13675 switch (code) 13676 { 13677 /* %a is output_address. */ 13678 13679 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise 13680 output_operand. */ 13681 13682 case 'A': 13683 /* Write the MMA accumulator number associated with VSX register X. */ 13684 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0) 13685 output_operand_lossage ("invalid %%A value"); 13686 else 13687 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4); 13688 return; 13689 13690 case 'D': 13691 /* Like 'J' but get to the GT bit only. */ 13692 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13693 { 13694 output_operand_lossage ("invalid %%D value"); 13695 return; 13696 } 13697 13698 /* Bit 1 is GT bit. */ 13699 i = 4 * (REGNO (x) - CR0_REGNO) + 1; 13700 13701 /* Add one for shift count in rlinm for scc. */ 13702 fprintf (file, "%d", i + 1); 13703 return; 13704 13705 case 'e': 13706 /* If the low 16 bits are 0, but some other bit is set, write 's'. */ 13707 if (! INT_P (x)) 13708 { 13709 output_operand_lossage ("invalid %%e value"); 13710 return; 13711 } 13712 13713 uval = INTVAL (x); 13714 if ((uval & 0xffff) == 0 && uval != 0) 13715 putc ('s', file); 13716 return; 13717 13718 case 'E': 13719 /* X is a CR register. Print the number of the EQ bit of the CR */ 13720 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13721 output_operand_lossage ("invalid %%E value"); 13722 else 13723 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2); 13724 return; 13725 13726 case 'f': 13727 /* X is a CR register. Print the shift count needed to move it 13728 to the high-order four bits. */ 13729 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13730 output_operand_lossage ("invalid %%f value"); 13731 else 13732 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO)); 13733 return; 13734 13735 case 'F': 13736 /* Similar, but print the count for the rotate in the opposite 13737 direction. */ 13738 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13739 output_operand_lossage ("invalid %%F value"); 13740 else 13741 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO)); 13742 return; 13743 13744 case 'G': 13745 /* X is a constant integer. If it is negative, print "m", 13746 otherwise print "z". This is to make an aze or ame insn. */ 13747 if (!CONST_INT_P (x)) 13748 output_operand_lossage ("invalid %%G value"); 13749 else if (INTVAL (x) >= 0) 13750 putc ('z', file); 13751 else 13752 putc ('m', file); 13753 return; 13754 13755 case 'h': 13756 /* If constant, output low-order five bits. Otherwise, write 13757 normally. */ 13758 if (INT_P (x)) 13759 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31); 13760 else 13761 print_operand (file, x, 0); 13762 return; 13763 13764 case 'H': 13765 /* If constant, output low-order six bits. Otherwise, write 13766 normally. */ 13767 if (INT_P (x)) 13768 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63); 13769 else 13770 print_operand (file, x, 0); 13771 return; 13772 13773 case 'I': 13774 /* Print `i' if this is a constant, else nothing. */ 13775 if (INT_P (x)) 13776 putc ('i', file); 13777 return; 13778 13779 case 'j': 13780 /* Write the bit number in CCR for jump. */ 13781 i = ccr_bit (x, 0); 13782 if (i == -1) 13783 output_operand_lossage ("invalid %%j code"); 13784 else 13785 fprintf (file, "%d", i); 13786 return; 13787 13788 case 'J': 13789 /* Similar, but add one for shift count in rlinm for scc and pass 13790 scc flag to `ccr_bit'. */ 13791 i = ccr_bit (x, 1); 13792 if (i == -1) 13793 output_operand_lossage ("invalid %%J code"); 13794 else 13795 /* If we want bit 31, write a shift count of zero, not 32. */ 13796 fprintf (file, "%d", i == 31 ? 0 : i + 1); 13797 return; 13798 13799 case 'k': 13800 /* X must be a constant. Write the 1's complement of the 13801 constant. */ 13802 if (! INT_P (x)) 13803 output_operand_lossage ("invalid %%k value"); 13804 else 13805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); 13806 return; 13807 13808 case 'K': 13809 /* X must be a symbolic constant on ELF. Write an 13810 expression suitable for an 'addi' that adds in the low 16 13811 bits of the MEM. */ 13812 if (GET_CODE (x) == CONST) 13813 { 13814 if (GET_CODE (XEXP (x, 0)) != PLUS 13815 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0)) 13816 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) 13817 || !CONST_INT_P (XEXP (XEXP (x, 0), 1))) 13818 output_operand_lossage ("invalid %%K value"); 13819 } 13820 print_operand_address (file, x); 13821 fputs ("@l", file); 13822 return; 13823 13824 /* %l is output_asm_label. */ 13825 13826 case 'L': 13827 /* Write second word of DImode or DFmode reference. Works on register 13828 or non-indexed memory only. */ 13829 if (REG_P (x)) 13830 fputs (reg_names[REGNO (x) + 1], file); 13831 else if (MEM_P (x)) 13832 { 13833 machine_mode mode = GET_MODE (x); 13834 /* Handle possible auto-increment. Since it is pre-increment and 13835 we have already done it, we can just use an offset of word. */ 13836 if (GET_CODE (XEXP (x, 0)) == PRE_INC 13837 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 13838 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 13839 UNITS_PER_WORD)); 13840 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 13841 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 13842 UNITS_PER_WORD)); 13843 else 13844 output_address (mode, XEXP (adjust_address_nv (x, SImode, 13845 UNITS_PER_WORD), 13846 0)); 13847 13848 if (small_data_operand (x, GET_MODE (x))) 13849 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 13850 reg_names[SMALL_DATA_REG]); 13851 } 13852 return; 13853 13854 case 'N': /* Unused */ 13855 /* Write the number of elements in the vector times 4. */ 13856 if (GET_CODE (x) != PARALLEL) 13857 output_operand_lossage ("invalid %%N value"); 13858 else 13859 fprintf (file, "%d", XVECLEN (x, 0) * 4); 13860 return; 13861 13862 case 'O': /* Unused */ 13863 /* Similar, but subtract 1 first. */ 13864 if (GET_CODE (x) != PARALLEL) 13865 output_operand_lossage ("invalid %%O value"); 13866 else 13867 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4); 13868 return; 13869 13870 case 'p': 13871 /* X is a CONST_INT that is a power of two. Output the logarithm. */ 13872 if (! INT_P (x) 13873 || INTVAL (x) < 0 13874 || (i = exact_log2 (INTVAL (x))) < 0) 13875 output_operand_lossage ("invalid %%p value"); 13876 else 13877 fprintf (file, "%d", i); 13878 return; 13879 13880 case 'P': 13881 /* The operand must be an indirect memory reference. The result 13882 is the register name. */ 13883 if (!MEM_P (x) || !REG_P (XEXP (x, 0)) 13884 || REGNO (XEXP (x, 0)) >= 32) 13885 output_operand_lossage ("invalid %%P value"); 13886 else 13887 fputs (reg_names[REGNO (XEXP (x, 0))], file); 13888 return; 13889 13890 case 'q': 13891 /* This outputs the logical code corresponding to a boolean 13892 expression. The expression may have one or both operands 13893 negated (if one, only the first one). For condition register 13894 logical operations, it will also treat the negated 13895 CR codes as NOTs, but not handle NOTs of them. */ 13896 { 13897 const char *const *t = 0; 13898 const char *s; 13899 enum rtx_code code = GET_CODE (x); 13900 static const char * const tbl[3][3] = { 13901 { "and", "andc", "nor" }, 13902 { "or", "orc", "nand" }, 13903 { "xor", "eqv", "xor" } }; 13904 13905 if (code == AND) 13906 t = tbl[0]; 13907 else if (code == IOR) 13908 t = tbl[1]; 13909 else if (code == XOR) 13910 t = tbl[2]; 13911 else 13912 output_operand_lossage ("invalid %%q value"); 13913 13914 if (GET_CODE (XEXP (x, 0)) != NOT) 13915 s = t[0]; 13916 else 13917 { 13918 if (GET_CODE (XEXP (x, 1)) == NOT) 13919 s = t[2]; 13920 else 13921 s = t[1]; 13922 } 13923 13924 fputs (s, file); 13925 } 13926 return; 13927 13928 case 'Q': 13929 if (! TARGET_MFCRF) 13930 return; 13931 fputc (',', file); 13932 /* FALLTHRU */ 13933 13934 case 'R': 13935 /* X is a CR register. Print the mask for `mtcrf'. */ 13936 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13937 output_operand_lossage ("invalid %%R value"); 13938 else 13939 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO)); 13940 return; 13941 13942 case 's': 13943 /* Low 5 bits of 32 - value */ 13944 if (! INT_P (x)) 13945 output_operand_lossage ("invalid %%s value"); 13946 else 13947 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); 13948 return; 13949 13950 case 't': 13951 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ 13952 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13953 { 13954 output_operand_lossage ("invalid %%t value"); 13955 return; 13956 } 13957 13958 /* Bit 3 is OV bit. */ 13959 i = 4 * (REGNO (x) - CR0_REGNO) + 3; 13960 13961 /* If we want bit 31, write a shift count of zero, not 32. */ 13962 fprintf (file, "%d", i == 31 ? 0 : i + 1); 13963 return; 13964 13965 case 'T': 13966 /* Print the symbolic name of a branch target register. */ 13967 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 13968 x = XVECEXP (x, 0, 0); 13969 if (!REG_P (x) || (REGNO (x) != LR_REGNO 13970 && REGNO (x) != CTR_REGNO)) 13971 output_operand_lossage ("invalid %%T value"); 13972 else if (REGNO (x) == LR_REGNO) 13973 fputs ("lr", file); 13974 else 13975 fputs ("ctr", file); 13976 return; 13977 13978 case 'u': 13979 /* High-order or low-order 16 bits of constant, whichever is non-zero, 13980 for use in unsigned operand. */ 13981 if (! INT_P (x)) 13982 { 13983 output_operand_lossage ("invalid %%u value"); 13984 return; 13985 } 13986 13987 uval = INTVAL (x); 13988 if ((uval & 0xffff) == 0) 13989 uval >>= 16; 13990 13991 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff); 13992 return; 13993 13994 case 'v': 13995 /* High-order 16 bits of constant for use in signed operand. */ 13996 if (! INT_P (x)) 13997 output_operand_lossage ("invalid %%v value"); 13998 else 13999 fprintf (file, HOST_WIDE_INT_PRINT_HEX, 14000 (INTVAL (x) >> 16) & 0xffff); 14001 return; 14002 14003 case 'U': 14004 /* Print `u' if this has an auto-increment or auto-decrement. */ 14005 if (MEM_P (x) 14006 && (GET_CODE (XEXP (x, 0)) == PRE_INC 14007 || GET_CODE (XEXP (x, 0)) == PRE_DEC 14008 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)) 14009 putc ('u', file); 14010 return; 14011 14012 case 'V': 14013 /* Print the trap code for this operand. */ 14014 switch (GET_CODE (x)) 14015 { 14016 case EQ: 14017 fputs ("eq", file); /* 4 */ 14018 break; 14019 case NE: 14020 fputs ("ne", file); /* 24 */ 14021 break; 14022 case LT: 14023 fputs ("lt", file); /* 16 */ 14024 break; 14025 case LE: 14026 fputs ("le", file); /* 20 */ 14027 break; 14028 case GT: 14029 fputs ("gt", file); /* 8 */ 14030 break; 14031 case GE: 14032 fputs ("ge", file); /* 12 */ 14033 break; 14034 case LTU: 14035 fputs ("llt", file); /* 2 */ 14036 break; 14037 case LEU: 14038 fputs ("lle", file); /* 6 */ 14039 break; 14040 case GTU: 14041 fputs ("lgt", file); /* 1 */ 14042 break; 14043 case GEU: 14044 fputs ("lge", file); /* 5 */ 14045 break; 14046 default: 14047 output_operand_lossage ("invalid %%V value"); 14048 } 14049 break; 14050 14051 case 'w': 14052 /* If constant, low-order 16 bits of constant, signed. Otherwise, write 14053 normally. */ 14054 if (INT_P (x)) 14055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 14056 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); 14057 else 14058 print_operand (file, x, 0); 14059 return; 14060 14061 case 'x': 14062 /* X is a FPR or Altivec register used in a VSX context. */ 14063 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x))) 14064 output_operand_lossage ("invalid %%x value"); 14065 else 14066 { 14067 int reg = REGNO (x); 14068 int vsx_reg = (FP_REGNO_P (reg) 14069 ? reg - 32 14070 : reg - FIRST_ALTIVEC_REGNO + 32); 14071 14072#ifdef TARGET_REGNAMES 14073 if (TARGET_REGNAMES) 14074 fprintf (file, "%%vs%d", vsx_reg); 14075 else 14076#endif 14077 fprintf (file, "%d", vsx_reg); 14078 } 14079 return; 14080 14081 case 'X': 14082 if (MEM_P (x) 14083 && (legitimate_indexed_address_p (XEXP (x, 0), 0) 14084 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY 14085 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0)))) 14086 putc ('x', file); 14087 return; 14088 14089 case 'Y': 14090 /* Like 'L', for third word of TImode/PTImode */ 14091 if (REG_P (x)) 14092 fputs (reg_names[REGNO (x) + 2], file); 14093 else if (MEM_P (x)) 14094 { 14095 machine_mode mode = GET_MODE (x); 14096 if (GET_CODE (XEXP (x, 0)) == PRE_INC 14097 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 14098 output_address (mode, plus_constant (Pmode, 14099 XEXP (XEXP (x, 0), 0), 8)); 14100 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 14101 output_address (mode, plus_constant (Pmode, 14102 XEXP (XEXP (x, 0), 0), 8)); 14103 else 14104 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0)); 14105 if (small_data_operand (x, GET_MODE (x))) 14106 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 14107 reg_names[SMALL_DATA_REG]); 14108 } 14109 return; 14110 14111 case 'z': 14112 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 14113 x = XVECEXP (x, 0, 1); 14114 /* X is a SYMBOL_REF. Write out the name preceded by a 14115 period and without any trailing data in brackets. Used for function 14116 names. If we are configured for System V (or the embedded ABI) on 14117 the PowerPC, do not emit the period, since those systems do not use 14118 TOCs and the like. */ 14119 if (!SYMBOL_REF_P (x)) 14120 { 14121 output_operand_lossage ("invalid %%z value"); 14122 return; 14123 } 14124 14125 /* For macho, check to see if we need a stub. */ 14126 if (TARGET_MACHO) 14127 { 14128 const char *name = XSTR (x, 0); 14129#if TARGET_MACHO 14130 if (darwin_symbol_stubs 14131 && MACHOPIC_INDIRECT 14132 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 14133 name = machopic_indirection_name (x, /*stub_p=*/true); 14134#endif 14135 assemble_name (file, name); 14136 } 14137 else if (!DOT_SYMBOLS) 14138 assemble_name (file, XSTR (x, 0)); 14139 else 14140 rs6000_output_function_entry (file, XSTR (x, 0)); 14141 return; 14142 14143 case 'Z': 14144 /* Like 'L', for last word of TImode/PTImode. */ 14145 if (REG_P (x)) 14146 fputs (reg_names[REGNO (x) + 3], file); 14147 else if (MEM_P (x)) 14148 { 14149 machine_mode mode = GET_MODE (x); 14150 if (GET_CODE (XEXP (x, 0)) == PRE_INC 14151 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 14152 output_address (mode, plus_constant (Pmode, 14153 XEXP (XEXP (x, 0), 0), 12)); 14154 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 14155 output_address (mode, plus_constant (Pmode, 14156 XEXP (XEXP (x, 0), 0), 12)); 14157 else 14158 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0)); 14159 if (small_data_operand (x, GET_MODE (x))) 14160 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 14161 reg_names[SMALL_DATA_REG]); 14162 } 14163 return; 14164 14165 /* Print AltiVec memory operand. */ 14166 case 'y': 14167 { 14168 rtx tmp; 14169 14170 gcc_assert (MEM_P (x)); 14171 14172 tmp = XEXP (x, 0); 14173 14174 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x)) 14175 && GET_CODE (tmp) == AND 14176 && CONST_INT_P (XEXP (tmp, 1)) 14177 && INTVAL (XEXP (tmp, 1)) == -16) 14178 tmp = XEXP (tmp, 0); 14179 else if (VECTOR_MEM_VSX_P (GET_MODE (x)) 14180 && GET_CODE (tmp) == PRE_MODIFY) 14181 tmp = XEXP (tmp, 1); 14182 if (REG_P (tmp)) 14183 fprintf (file, "0,%s", reg_names[REGNO (tmp)]); 14184 else 14185 { 14186 if (GET_CODE (tmp) != PLUS 14187 || !REG_P (XEXP (tmp, 0)) 14188 || !REG_P (XEXP (tmp, 1))) 14189 { 14190 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint"); 14191 break; 14192 } 14193 14194 if (REGNO (XEXP (tmp, 0)) == 0) 14195 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ], 14196 reg_names[ REGNO (XEXP (tmp, 0)) ]); 14197 else 14198 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ], 14199 reg_names[ REGNO (XEXP (tmp, 1)) ]); 14200 } 14201 break; 14202 } 14203 14204 case 0: 14205 if (REG_P (x)) 14206 fprintf (file, "%s", reg_names[REGNO (x)]); 14207 else if (MEM_P (x)) 14208 { 14209 /* We need to handle PRE_INC and PRE_DEC here, since we need to 14210 know the width from the mode. */ 14211 if (GET_CODE (XEXP (x, 0)) == PRE_INC) 14212 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), 14213 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); 14214 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) 14215 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)), 14216 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); 14217 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 14218 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); 14219 else 14220 output_address (GET_MODE (x), XEXP (x, 0)); 14221 } 14222 else if (toc_relative_expr_p (x, false, 14223 &tocrel_base_oac, &tocrel_offset_oac)) 14224 /* This hack along with a corresponding hack in 14225 rs6000_output_addr_const_extra arranges to output addends 14226 where the assembler expects to find them. eg. 14227 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4) 14228 without this hack would be output as "x@toc+4". We 14229 want "x+4@toc". */ 14230 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac)); 14231 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD) 14232 output_addr_const (file, XVECEXP (x, 0, 0)); 14233 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 14234 output_addr_const (file, XVECEXP (x, 0, 1)); 14235 else 14236 output_addr_const (file, x); 14237 return; 14238 14239 case '&': 14240 if (const char *name = get_some_local_dynamic_name ()) 14241 assemble_name (file, name); 14242 else 14243 output_operand_lossage ("'%%&' used without any " 14244 "local dynamic TLS references"); 14245 return; 14246 14247 default: 14248 output_operand_lossage ("invalid %%xn code"); 14249 } 14250} 14251 14252/* Print the address of an operand. */ 14253 14254void 14255print_operand_address (FILE *file, rtx x) 14256{ 14257 if (REG_P (x)) 14258 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]); 14259 14260 /* Is it a PC-relative address? */ 14261 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode)) 14262 { 14263 HOST_WIDE_INT offset; 14264 14265 if (GET_CODE (x) == CONST) 14266 x = XEXP (x, 0); 14267 14268 if (GET_CODE (x) == PLUS) 14269 { 14270 offset = INTVAL (XEXP (x, 1)); 14271 x = XEXP (x, 0); 14272 } 14273 else 14274 offset = 0; 14275 14276 output_addr_const (file, x); 14277 14278 if (offset) 14279 fprintf (file, "%+" PRId64, offset); 14280 14281 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x)) 14282 fprintf (file, "@got"); 14283 14284 fprintf (file, "@pcrel"); 14285 } 14286 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST 14287 || GET_CODE (x) == LABEL_REF) 14288 { 14289 output_addr_const (file, x); 14290 if (small_data_operand (x, GET_MODE (x))) 14291 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 14292 reg_names[SMALL_DATA_REG]); 14293 else 14294 gcc_assert (!TARGET_TOC); 14295 } 14296 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) 14297 && REG_P (XEXP (x, 1))) 14298 { 14299 if (REGNO (XEXP (x, 0)) == 0) 14300 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ], 14301 reg_names[ REGNO (XEXP (x, 0)) ]); 14302 else 14303 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ], 14304 reg_names[ REGNO (XEXP (x, 1)) ]); 14305 } 14306 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) 14307 && CONST_INT_P (XEXP (x, 1))) 14308 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", 14309 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]); 14310#if TARGET_MACHO 14311 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) 14312 && CONSTANT_P (XEXP (x, 1))) 14313 { 14314 fprintf (file, "lo16("); 14315 output_addr_const (file, XEXP (x, 1)); 14316 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); 14317 } 14318#endif 14319#if TARGET_ELF 14320 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) 14321 && CONSTANT_P (XEXP (x, 1))) 14322 { 14323 output_addr_const (file, XEXP (x, 1)); 14324 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); 14325 } 14326#endif 14327 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac)) 14328 { 14329 /* This hack along with a corresponding hack in 14330 rs6000_output_addr_const_extra arranges to output addends 14331 where the assembler expects to find them. eg. 14332 (lo_sum (reg 9) 14333 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8)) 14334 without this hack would be output as "x@toc+8@l(9)". We 14335 want "x+8@toc@l(9)". */ 14336 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac)); 14337 if (GET_CODE (x) == LO_SUM) 14338 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]); 14339 else 14340 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]); 14341 } 14342 else 14343 output_addr_const (file, x); 14344} 14345 14346/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ 14347 14348bool 14349rs6000_output_addr_const_extra (FILE *file, rtx x) 14350{ 14351 if (GET_CODE (x) == UNSPEC) 14352 switch (XINT (x, 1)) 14353 { 14354 case UNSPEC_TOCREL: 14355 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0)) 14356 && REG_P (XVECEXP (x, 0, 1)) 14357 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER); 14358 output_addr_const (file, XVECEXP (x, 0, 0)); 14359 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx) 14360 { 14361 if (INTVAL (tocrel_offset_oac) >= 0) 14362 fprintf (file, "+"); 14363 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac)); 14364 } 14365 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC)) 14366 { 14367 putc ('-', file); 14368 assemble_name (file, toc_label_name); 14369 need_toc_init = 1; 14370 } 14371 else if (TARGET_ELF) 14372 fputs ("@toc", file); 14373 return true; 14374 14375#if TARGET_MACHO 14376 case UNSPEC_MACHOPIC_OFFSET: 14377 output_addr_const (file, XVECEXP (x, 0, 0)); 14378 putc ('-', file); 14379 machopic_output_function_base_name (file); 14380 return true; 14381#endif 14382 } 14383 return false; 14384} 14385 14386/* Target hook for assembling integer objects. The PowerPC version has 14387 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP 14388 is defined. It also needs to handle DI-mode objects on 64-bit 14389 targets. */ 14390 14391static bool 14392rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) 14393{ 14394#ifdef RELOCATABLE_NEEDS_FIXUP 14395 /* Special handling for SI values. */ 14396 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p) 14397 { 14398 static int recurse = 0; 14399 14400 /* For -mrelocatable, we mark all addresses that need to be fixed up in 14401 the .fixup section. Since the TOC section is already relocated, we 14402 don't need to mark it here. We used to skip the text section, but it 14403 should never be valid for relocated addresses to be placed in the text 14404 section. */ 14405 if (DEFAULT_ABI == ABI_V4 14406 && (TARGET_RELOCATABLE || flag_pic > 1) 14407 && in_section != toc_section 14408 && !recurse 14409 && !CONST_SCALAR_INT_P (x) 14410 && CONSTANT_P (x)) 14411 { 14412 char buf[256]; 14413 14414 recurse = 1; 14415 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno); 14416 fixuplabelno++; 14417 ASM_OUTPUT_LABEL (asm_out_file, buf); 14418 fprintf (asm_out_file, "\t.long\t("); 14419 output_addr_const (asm_out_file, x); 14420 fprintf (asm_out_file, ")@fixup\n"); 14421 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n"); 14422 ASM_OUTPUT_ALIGN (asm_out_file, 2); 14423 fprintf (asm_out_file, "\t.long\t"); 14424 assemble_name (asm_out_file, buf); 14425 fprintf (asm_out_file, "\n\t.previous\n"); 14426 recurse = 0; 14427 return true; 14428 } 14429 /* Remove initial .'s to turn a -mcall-aixdesc function 14430 address into the address of the descriptor, not the function 14431 itself. */ 14432 else if (SYMBOL_REF_P (x) 14433 && XSTR (x, 0)[0] == '.' 14434 && DEFAULT_ABI == ABI_AIX) 14435 { 14436 const char *name = XSTR (x, 0); 14437 while (*name == '.') 14438 name++; 14439 14440 fprintf (asm_out_file, "\t.long\t%s\n", name); 14441 return true; 14442 } 14443 } 14444#endif /* RELOCATABLE_NEEDS_FIXUP */ 14445 return default_assemble_integer (x, size, aligned_p); 14446} 14447 14448/* Return a template string for assembly to emit when making an 14449 external call. FUNOP is the call mem argument operand number. */ 14450 14451static const char * 14452rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall) 14453{ 14454 /* -Wformat-overflow workaround, without which gcc thinks that %u 14455 might produce 10 digits. */ 14456 gcc_assert (funop <= MAX_RECOG_OPERANDS); 14457 14458 char arg[12]; 14459 arg[0] = 0; 14460 if (GET_CODE (operands[funop + 1]) == UNSPEC) 14461 { 14462 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD) 14463 sprintf (arg, "(%%%u@tlsgd)", funop + 1); 14464 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD) 14465 sprintf (arg, "(%%&@tlsld)"); 14466 } 14467 14468 /* The magic 32768 offset here corresponds to the offset of 14469 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */ 14470 char z[11]; 14471 sprintf (z, "%%z%u%s", funop, 14472 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2 14473 ? "+32768" : "")); 14474 14475 static char str[32]; /* 1 spare */ 14476 if (rs6000_pcrel_p ()) 14477 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg); 14478 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 14479 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg, 14480 sibcall ? "" : "\n\tnop"); 14481 else if (DEFAULT_ABI == ABI_V4) 14482 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg, 14483 flag_pic ? "@plt" : ""); 14484#if TARGET_MACHO 14485 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */ 14486 else if (DEFAULT_ABI == ABI_DARWIN) 14487 { 14488 /* The cookie is in operand func+2. */ 14489 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT); 14490 int cookie = INTVAL (operands[funop + 2]); 14491 if (cookie & CALL_LONG) 14492 { 14493 tree funname = get_identifier (XSTR (operands[funop], 0)); 14494 tree labelname = get_prev_label (funname); 14495 gcc_checking_assert (labelname && !sibcall); 14496 14497 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl' 14498 instruction will reach 'foo', otherwise link as 'bl L42'". 14499 "L42" should be a 'branch island', that will do a far jump to 14500 'foo'. Branch islands are generated in 14501 macho_branch_islands(). */ 14502 sprintf (str, "jbsr %%z%u,%.10s", funop, 14503 IDENTIFIER_POINTER (labelname)); 14504 } 14505 else 14506 /* Same as AIX or ELFv2, except to keep backwards compat, no nop 14507 after the call. */ 14508 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg); 14509 } 14510#endif 14511 else 14512 gcc_unreachable (); 14513 return str; 14514} 14515 14516const char * 14517rs6000_call_template (rtx *operands, unsigned int funop) 14518{ 14519 return rs6000_call_template_1 (operands, funop, false); 14520} 14521 14522const char * 14523rs6000_sibcall_template (rtx *operands, unsigned int funop) 14524{ 14525 return rs6000_call_template_1 (operands, funop, true); 14526} 14527 14528/* As above, for indirect calls. */ 14529 14530static const char * 14531rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop, 14532 bool sibcall) 14533{ 14534 /* -Wformat-overflow workaround, without which gcc thinks that %u 14535 might produce 10 digits. Note that -Wformat-overflow will not 14536 currently warn here for str[], so do not rely on a warning to 14537 ensure str[] is correctly sized. */ 14538 gcc_assert (funop <= MAX_RECOG_OPERANDS); 14539 14540 /* Currently, funop is either 0 or 1. The maximum string is always 14541 a !speculate 64-bit __tls_get_addr call. 14542 14543 ABI_ELFv2, pcrel: 14544 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14545 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t 14546 . 9 crset 2\n\t 14547 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14548 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t 14549 . 8 beq%T1l- 14550 .--- 14551 .142 14552 14553 ABI_AIX: 14554 . 9 ld 2,%3\n\t 14555 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14556 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t 14557 . 9 crset 2\n\t 14558 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14559 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t 14560 . 10 beq%T1l-\n\t 14561 . 10 ld 2,%4(1) 14562 .--- 14563 .151 14564 14565 ABI_ELFv2: 14566 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14567 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t 14568 . 9 crset 2\n\t 14569 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14570 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t 14571 . 10 beq%T1l-\n\t 14572 . 10 ld 2,%3(1) 14573 .--- 14574 .142 14575 14576 ABI_V4: 14577 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14578 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t 14579 . 9 crset 2\n\t 14580 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 14581 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t 14582 . 8 beq%T1l- 14583 .--- 14584 .141 */ 14585 static char str[160]; /* 8 spare */ 14586 char *s = str; 14587 const char *ptrload = TARGET_64BIT ? "d" : "wz"; 14588 14589 if (DEFAULT_ABI == ABI_AIX) 14590 s += sprintf (s, 14591 "l%s 2,%%%u\n\t", 14592 ptrload, funop + 3); 14593 14594 /* We don't need the extra code to stop indirect call speculation if 14595 calling via LR. */ 14596 bool speculate = (TARGET_MACHO 14597 || rs6000_speculate_indirect_jumps 14598 || (REG_P (operands[funop]) 14599 && REGNO (operands[funop]) == LR_REGNO)); 14600 14601 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC) 14602 { 14603 const char *rel64 = TARGET_64BIT ? "64" : ""; 14604 char tls[29]; 14605 tls[0] = 0; 14606 if (GET_CODE (operands[funop + 1]) == UNSPEC) 14607 { 14608 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD) 14609 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t", 14610 rel64, funop + 1); 14611 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD) 14612 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t", 14613 rel64); 14614 } 14615 14616 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : ""; 14617 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT 14618 && flag_pic == 2 ? "+32768" : ""); 14619 if (!speculate) 14620 { 14621 s += sprintf (s, 14622 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t", 14623 tls, rel64, notoc, funop, addend); 14624 s += sprintf (s, "crset 2\n\t"); 14625 } 14626 s += sprintf (s, 14627 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t", 14628 tls, rel64, notoc, funop, addend); 14629 } 14630 else if (!speculate) 14631 s += sprintf (s, "crset 2\n\t"); 14632 14633 if (rs6000_pcrel_p ()) 14634 { 14635 if (speculate) 14636 sprintf (s, "b%%T%ul", funop); 14637 else 14638 sprintf (s, "beq%%T%ul-", funop); 14639 } 14640 else if (DEFAULT_ABI == ABI_AIX) 14641 { 14642 if (speculate) 14643 sprintf (s, 14644 "b%%T%ul\n\t" 14645 "l%s 2,%%%u(1)", 14646 funop, ptrload, funop + 4); 14647 else 14648 sprintf (s, 14649 "beq%%T%ul-\n\t" 14650 "l%s 2,%%%u(1)", 14651 funop, ptrload, funop + 4); 14652 } 14653 else if (DEFAULT_ABI == ABI_ELFv2) 14654 { 14655 if (speculate) 14656 sprintf (s, 14657 "b%%T%ul\n\t" 14658 "l%s 2,%%%u(1)", 14659 funop, ptrload, funop + 3); 14660 else 14661 sprintf (s, 14662 "beq%%T%ul-\n\t" 14663 "l%s 2,%%%u(1)", 14664 funop, ptrload, funop + 3); 14665 } 14666 else 14667 { 14668 if (speculate) 14669 sprintf (s, 14670 "b%%T%u%s", 14671 funop, sibcall ? "" : "l"); 14672 else 14673 sprintf (s, 14674 "beq%%T%u%s-%s", 14675 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : ""); 14676 } 14677 return str; 14678} 14679 14680const char * 14681rs6000_indirect_call_template (rtx *operands, unsigned int funop) 14682{ 14683 return rs6000_indirect_call_template_1 (operands, funop, false); 14684} 14685 14686const char * 14687rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop) 14688{ 14689 return rs6000_indirect_call_template_1 (operands, funop, true); 14690} 14691 14692#if HAVE_AS_PLTSEQ 14693/* Output indirect call insns. WHICH identifies the type of sequence. */ 14694const char * 14695rs6000_pltseq_template (rtx *operands, int which) 14696{ 14697 const char *rel64 = TARGET_64BIT ? "64" : ""; 14698 char tls[30]; 14699 tls[0] = 0; 14700 if (GET_CODE (operands[3]) == UNSPEC) 14701 { 14702 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4'; 14703 if (XINT (operands[3], 1) == UNSPEC_TLSGD) 14704 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t", 14705 off, rel64); 14706 else if (XINT (operands[3], 1) == UNSPEC_TLSLD) 14707 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t", 14708 off, rel64); 14709 } 14710 14711 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4); 14712 static char str[96]; /* 10 spare */ 14713 char off = WORDS_BIG_ENDIAN ? '2' : '4'; 14714 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT 14715 && flag_pic == 2 ? "+32768" : ""); 14716 switch (which) 14717 { 14718 case RS6000_PLTSEQ_TOCSAVE: 14719 sprintf (str, 14720 "st%s\n\t" 14721 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2", 14722 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)", 14723 tls, rel64); 14724 break; 14725 case RS6000_PLTSEQ_PLT16_HA: 14726 if (DEFAULT_ABI == ABI_V4 && !flag_pic) 14727 sprintf (str, 14728 "lis %%0,0\n\t" 14729 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2", 14730 tls, off, rel64); 14731 else 14732 sprintf (str, 14733 "addis %%0,%%1,0\n\t" 14734 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s", 14735 tls, off, rel64, addend); 14736 break; 14737 case RS6000_PLTSEQ_PLT16_LO: 14738 sprintf (str, 14739 "l%s %%0,0(%%1)\n\t" 14740 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s", 14741 TARGET_64BIT ? "d" : "wz", 14742 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend); 14743 break; 14744 case RS6000_PLTSEQ_MTCTR: 14745 sprintf (str, 14746 "mtctr %%1\n\t" 14747 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s", 14748 tls, rel64, addend); 14749 break; 14750 case RS6000_PLTSEQ_PLT_PCREL34: 14751 sprintf (str, 14752 "pl%s %%0,0(0),1\n\t" 14753 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2", 14754 TARGET_64BIT ? "d" : "wz", 14755 tls, rel64); 14756 break; 14757 default: 14758 gcc_unreachable (); 14759 } 14760 return str; 14761} 14762#endif 14763 14764#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO 14765/* Emit an assembler directive to set symbol visibility for DECL to 14766 VISIBILITY_TYPE. */ 14767 14768static void 14769rs6000_assemble_visibility (tree decl, int vis) 14770{ 14771 if (TARGET_XCOFF) 14772 return; 14773 14774 /* Functions need to have their entry point symbol visibility set as 14775 well as their descriptor symbol visibility. */ 14776 if (DEFAULT_ABI == ABI_AIX 14777 && DOT_SYMBOLS 14778 && TREE_CODE (decl) == FUNCTION_DECL) 14779 { 14780 static const char * const visibility_types[] = { 14781 NULL, "protected", "hidden", "internal" 14782 }; 14783 14784 const char *name, *type; 14785 14786 name = ((* targetm.strip_name_encoding) 14787 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); 14788 type = visibility_types[vis]; 14789 14790 fprintf (asm_out_file, "\t.%s\t%s\n", type, name); 14791 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name); 14792 } 14793 else 14794 default_assemble_visibility (decl, vis); 14795} 14796#endif 14797 14798/* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function 14799 entry. If RECORD_P is true and the target supports named sections, 14800 the location of the NOPs will be recorded in a special object section 14801 called "__patchable_function_entries". This routine may be called 14802 twice per function to put NOPs before and after the function 14803 entry. */ 14804 14805void 14806rs6000_print_patchable_function_entry (FILE *file, 14807 unsigned HOST_WIDE_INT patch_area_size, 14808 bool record_p) 14809{ 14810 unsigned int flags = SECTION_WRITE | SECTION_RELRO; 14811 /* When .opd section is emitted, the function symbol 14812 default_print_patchable_function_entry_1 is emitted into the .opd section 14813 while the patchable area is emitted into the function section. 14814 Don't use SECTION_LINK_ORDER in that case. */ 14815 if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) 14816 && HAVE_GAS_SECTION_LINK_ORDER) 14817 flags |= SECTION_LINK_ORDER; 14818 default_print_patchable_function_entry_1 (file, patch_area_size, record_p, 14819 flags); 14820} 14821 14822enum rtx_code 14823rs6000_reverse_condition (machine_mode mode, enum rtx_code code) 14824{ 14825 /* Reversal of FP compares takes care -- an ordered compare 14826 becomes an unordered compare and vice versa. */ 14827 if (mode == CCFPmode 14828 && (!flag_finite_math_only 14829 || code == UNLT || code == UNLE || code == UNGT || code == UNGE 14830 || code == UNEQ || code == LTGT)) 14831 return reverse_condition_maybe_unordered (code); 14832 else 14833 return reverse_condition (code); 14834} 14835 14836/* Generate a compare for CODE. Return a brand-new rtx that 14837 represents the result of the compare. */ 14838 14839static rtx 14840rs6000_generate_compare (rtx cmp, machine_mode mode) 14841{ 14842 machine_mode comp_mode; 14843 rtx compare_result; 14844 enum rtx_code code = GET_CODE (cmp); 14845 rtx op0 = XEXP (cmp, 0); 14846 rtx op1 = XEXP (cmp, 1); 14847 14848 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) 14849 comp_mode = CCmode; 14850 else if (FLOAT_MODE_P (mode)) 14851 comp_mode = CCFPmode; 14852 else if (code == GTU || code == LTU 14853 || code == GEU || code == LEU) 14854 comp_mode = CCUNSmode; 14855 else if ((code == EQ || code == NE) 14856 && unsigned_reg_p (op0) 14857 && (unsigned_reg_p (op1) 14858 || (CONST_INT_P (op1) && INTVAL (op1) != 0))) 14859 /* These are unsigned values, perhaps there will be a later 14860 ordering compare that can be shared with this one. */ 14861 comp_mode = CCUNSmode; 14862 else 14863 comp_mode = CCmode; 14864 14865 /* If we have an unsigned compare, make sure we don't have a signed value as 14866 an immediate. */ 14867 if (comp_mode == CCUNSmode && CONST_INT_P (op1) 14868 && INTVAL (op1) < 0) 14869 { 14870 op0 = copy_rtx_if_shared (op0); 14871 op1 = force_reg (GET_MODE (op0), op1); 14872 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1); 14873 } 14874 14875 /* First, the compare. */ 14876 compare_result = gen_reg_rtx (comp_mode); 14877 14878 /* IEEE 128-bit support in VSX registers when we do not have hardware 14879 support. */ 14880 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) 14881 { 14882 rtx libfunc = NULL_RTX; 14883 bool check_nan = false; 14884 rtx dest; 14885 14886 switch (code) 14887 { 14888 case EQ: 14889 case NE: 14890 libfunc = optab_libfunc (eq_optab, mode); 14891 break; 14892 14893 case GT: 14894 case GE: 14895 libfunc = optab_libfunc (ge_optab, mode); 14896 break; 14897 14898 case LT: 14899 case LE: 14900 libfunc = optab_libfunc (le_optab, mode); 14901 break; 14902 14903 case UNORDERED: 14904 case ORDERED: 14905 libfunc = optab_libfunc (unord_optab, mode); 14906 code = (code == UNORDERED) ? NE : EQ; 14907 break; 14908 14909 case UNGE: 14910 case UNGT: 14911 check_nan = true; 14912 libfunc = optab_libfunc (ge_optab, mode); 14913 code = (code == UNGE) ? GE : GT; 14914 break; 14915 14916 case UNLE: 14917 case UNLT: 14918 check_nan = true; 14919 libfunc = optab_libfunc (le_optab, mode); 14920 code = (code == UNLE) ? LE : LT; 14921 break; 14922 14923 case UNEQ: 14924 case LTGT: 14925 check_nan = true; 14926 libfunc = optab_libfunc (eq_optab, mode); 14927 code = (code = UNEQ) ? EQ : NE; 14928 break; 14929 14930 default: 14931 gcc_unreachable (); 14932 } 14933 14934 gcc_assert (libfunc); 14935 14936 if (!check_nan) 14937 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, 14938 SImode, op0, mode, op1, mode); 14939 14940 /* The library signals an exception for signalling NaNs, so we need to 14941 handle isgreater, etc. by first checking isordered. */ 14942 else 14943 { 14944 rtx ne_rtx, normal_dest, unord_dest; 14945 rtx unord_func = optab_libfunc (unord_optab, mode); 14946 rtx join_label = gen_label_rtx (); 14947 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); 14948 rtx unord_cmp = gen_reg_rtx (comp_mode); 14949 14950 14951 /* Test for either value being a NaN. */ 14952 gcc_assert (unord_func); 14953 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST, 14954 SImode, op0, mode, op1, mode); 14955 14956 /* Set value (0) if either value is a NaN, and jump to the join 14957 label. */ 14958 dest = gen_reg_rtx (SImode); 14959 emit_move_insn (dest, const1_rtx); 14960 emit_insn (gen_rtx_SET (unord_cmp, 14961 gen_rtx_COMPARE (comp_mode, unord_dest, 14962 const0_rtx))); 14963 14964 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx); 14965 emit_jump_insn (gen_rtx_SET (pc_rtx, 14966 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, 14967 join_ref, 14968 pc_rtx))); 14969 14970 /* Do the normal comparison, knowing that the values are not 14971 NaNs. */ 14972 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, 14973 SImode, op0, mode, op1, mode); 14974 14975 emit_insn (gen_cstoresi4 (dest, 14976 gen_rtx_fmt_ee (code, SImode, normal_dest, 14977 const0_rtx), 14978 normal_dest, const0_rtx)); 14979 14980 /* Join NaN and non-Nan paths. Compare dest against 0. */ 14981 emit_label (join_label); 14982 code = NE; 14983 } 14984 14985 emit_insn (gen_rtx_SET (compare_result, 14986 gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); 14987 } 14988 14989 else 14990 { 14991 /* Generate XLC-compatible TFmode compare as PARALLEL with extra 14992 CLOBBERs to match cmptf_internal2 pattern. */ 14993 if (comp_mode == CCFPmode && TARGET_XL_COMPAT 14994 && FLOAT128_IBM_P (GET_MODE (op0)) 14995 && TARGET_HARD_FLOAT) 14996 emit_insn (gen_rtx_PARALLEL (VOIDmode, 14997 gen_rtvec (10, 14998 gen_rtx_SET (compare_result, 14999 gen_rtx_COMPARE (comp_mode, op0, op1)), 15000 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15001 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15002 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15003 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15004 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15005 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15006 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15007 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 15008 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode))))); 15009 else if (GET_CODE (op1) == UNSPEC 15010 && XINT (op1, 1) == UNSPEC_SP_TEST) 15011 { 15012 rtx op1b = XVECEXP (op1, 0, 0); 15013 comp_mode = CCEQmode; 15014 compare_result = gen_reg_rtx (CCEQmode); 15015 if (TARGET_64BIT) 15016 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b)); 15017 else 15018 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); 15019 } 15020 else 15021 emit_insn (gen_rtx_SET (compare_result, 15022 gen_rtx_COMPARE (comp_mode, op0, op1))); 15023 } 15024 15025 validate_condition_mode (code, GET_MODE (compare_result)); 15026 15027 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx); 15028} 15029 15030 15031/* Return the diagnostic message string if the binary operation OP is 15032 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 15033 15034static const char* 15035rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED, 15036 const_tree type1, 15037 const_tree type2) 15038{ 15039 machine_mode mode1 = TYPE_MODE (type1); 15040 machine_mode mode2 = TYPE_MODE (type2); 15041 15042 /* For complex modes, use the inner type. */ 15043 if (COMPLEX_MODE_P (mode1)) 15044 mode1 = GET_MODE_INNER (mode1); 15045 15046 if (COMPLEX_MODE_P (mode2)) 15047 mode2 = GET_MODE_INNER (mode2); 15048 15049 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended 15050 double to intermix unless -mfloat128-convert. */ 15051 if (mode1 == mode2) 15052 return NULL; 15053 15054 if (!TARGET_FLOAT128_CVT) 15055 { 15056 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2)) 15057 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2))) 15058 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating " 15059 "point types"); 15060 } 15061 15062 return NULL; 15063} 15064 15065 15066/* Expand floating point conversion to/from __float128 and __ibm128. */ 15067 15068void 15069rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) 15070{ 15071 machine_mode dest_mode = GET_MODE (dest); 15072 machine_mode src_mode = GET_MODE (src); 15073 convert_optab cvt = unknown_optab; 15074 bool do_move = false; 15075 rtx libfunc = NULL_RTX; 15076 rtx dest2; 15077 typedef rtx (*rtx_2func_t) (rtx, rtx); 15078 rtx_2func_t hw_convert = (rtx_2func_t)0; 15079 size_t kf_or_tf; 15080 15081 struct hw_conv_t { 15082 rtx_2func_t from_df; 15083 rtx_2func_t from_sf; 15084 rtx_2func_t from_si_sign; 15085 rtx_2func_t from_si_uns; 15086 rtx_2func_t from_di_sign; 15087 rtx_2func_t from_di_uns; 15088 rtx_2func_t to_df; 15089 rtx_2func_t to_sf; 15090 rtx_2func_t to_si_sign; 15091 rtx_2func_t to_si_uns; 15092 rtx_2func_t to_di_sign; 15093 rtx_2func_t to_di_uns; 15094 } hw_conversions[2] = { 15095 /* convertions to/from KFmode */ 15096 { 15097 gen_extenddfkf2_hw, /* KFmode <- DFmode. */ 15098 gen_extendsfkf2_hw, /* KFmode <- SFmode. */ 15099 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ 15100 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ 15101 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ 15102 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ 15103 gen_trunckfdf2_hw, /* DFmode <- KFmode. */ 15104 gen_trunckfsf2_hw, /* SFmode <- KFmode. */ 15105 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ 15106 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ 15107 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ 15108 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ 15109 }, 15110 15111 /* convertions to/from TFmode */ 15112 { 15113 gen_extenddftf2_hw, /* TFmode <- DFmode. */ 15114 gen_extendsftf2_hw, /* TFmode <- SFmode. */ 15115 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ 15116 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ 15117 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ 15118 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ 15119 gen_trunctfdf2_hw, /* DFmode <- TFmode. */ 15120 gen_trunctfsf2_hw, /* SFmode <- TFmode. */ 15121 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ 15122 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ 15123 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ 15124 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ 15125 }, 15126 }; 15127 15128 if (dest_mode == src_mode) 15129 gcc_unreachable (); 15130 15131 /* Eliminate memory operations. */ 15132 if (MEM_P (src)) 15133 src = force_reg (src_mode, src); 15134 15135 if (MEM_P (dest)) 15136 { 15137 rtx tmp = gen_reg_rtx (dest_mode); 15138 rs6000_expand_float128_convert (tmp, src, unsigned_p); 15139 rs6000_emit_move (dest, tmp, dest_mode); 15140 return; 15141 } 15142 15143 /* Convert to IEEE 128-bit floating point. */ 15144 if (FLOAT128_IEEE_P (dest_mode)) 15145 { 15146 if (dest_mode == KFmode) 15147 kf_or_tf = 0; 15148 else if (dest_mode == TFmode) 15149 kf_or_tf = 1; 15150 else 15151 gcc_unreachable (); 15152 15153 switch (src_mode) 15154 { 15155 case E_DFmode: 15156 cvt = sext_optab; 15157 hw_convert = hw_conversions[kf_or_tf].from_df; 15158 break; 15159 15160 case E_SFmode: 15161 cvt = sext_optab; 15162 hw_convert = hw_conversions[kf_or_tf].from_sf; 15163 break; 15164 15165 case E_KFmode: 15166 case E_IFmode: 15167 case E_TFmode: 15168 if (FLOAT128_IBM_P (src_mode)) 15169 cvt = sext_optab; 15170 else 15171 do_move = true; 15172 break; 15173 15174 case E_SImode: 15175 if (unsigned_p) 15176 { 15177 cvt = ufloat_optab; 15178 hw_convert = hw_conversions[kf_or_tf].from_si_uns; 15179 } 15180 else 15181 { 15182 cvt = sfloat_optab; 15183 hw_convert = hw_conversions[kf_or_tf].from_si_sign; 15184 } 15185 break; 15186 15187 case E_DImode: 15188 if (unsigned_p) 15189 { 15190 cvt = ufloat_optab; 15191 hw_convert = hw_conversions[kf_or_tf].from_di_uns; 15192 } 15193 else 15194 { 15195 cvt = sfloat_optab; 15196 hw_convert = hw_conversions[kf_or_tf].from_di_sign; 15197 } 15198 break; 15199 15200 default: 15201 gcc_unreachable (); 15202 } 15203 } 15204 15205 /* Convert from IEEE 128-bit floating point. */ 15206 else if (FLOAT128_IEEE_P (src_mode)) 15207 { 15208 if (src_mode == KFmode) 15209 kf_or_tf = 0; 15210 else if (src_mode == TFmode) 15211 kf_or_tf = 1; 15212 else 15213 gcc_unreachable (); 15214 15215 switch (dest_mode) 15216 { 15217 case E_DFmode: 15218 cvt = trunc_optab; 15219 hw_convert = hw_conversions[kf_or_tf].to_df; 15220 break; 15221 15222 case E_SFmode: 15223 cvt = trunc_optab; 15224 hw_convert = hw_conversions[kf_or_tf].to_sf; 15225 break; 15226 15227 case E_KFmode: 15228 case E_IFmode: 15229 case E_TFmode: 15230 if (FLOAT128_IBM_P (dest_mode)) 15231 cvt = trunc_optab; 15232 else 15233 do_move = true; 15234 break; 15235 15236 case E_SImode: 15237 if (unsigned_p) 15238 { 15239 cvt = ufix_optab; 15240 hw_convert = hw_conversions[kf_or_tf].to_si_uns; 15241 } 15242 else 15243 { 15244 cvt = sfix_optab; 15245 hw_convert = hw_conversions[kf_or_tf].to_si_sign; 15246 } 15247 break; 15248 15249 case E_DImode: 15250 if (unsigned_p) 15251 { 15252 cvt = ufix_optab; 15253 hw_convert = hw_conversions[kf_or_tf].to_di_uns; 15254 } 15255 else 15256 { 15257 cvt = sfix_optab; 15258 hw_convert = hw_conversions[kf_or_tf].to_di_sign; 15259 } 15260 break; 15261 15262 default: 15263 gcc_unreachable (); 15264 } 15265 } 15266 15267 /* Both IBM format. */ 15268 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode)) 15269 do_move = true; 15270 15271 else 15272 gcc_unreachable (); 15273 15274 /* Handle conversion between TFmode/KFmode/IFmode. */ 15275 if (do_move) 15276 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src))); 15277 15278 /* Handle conversion if we have hardware support. */ 15279 else if (TARGET_FLOAT128_HW && hw_convert) 15280 emit_insn ((hw_convert) (dest, src)); 15281 15282 /* Call an external function to do the conversion. */ 15283 else if (cvt != unknown_optab) 15284 { 15285 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); 15286 gcc_assert (libfunc != NULL_RTX); 15287 15288 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 15289 src, src_mode); 15290 15291 gcc_assert (dest2 != NULL_RTX); 15292 if (!rtx_equal_p (dest, dest2)) 15293 emit_move_insn (dest, dest2); 15294 } 15295 15296 else 15297 gcc_unreachable (); 15298 15299 return; 15300} 15301 15302 15303/* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH 15304 can be used as that dest register. Return the dest register. */ 15305 15306rtx 15307rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch) 15308{ 15309 if (op2 == const0_rtx) 15310 return op1; 15311 15312 if (GET_CODE (scratch) == SCRATCH) 15313 scratch = gen_reg_rtx (mode); 15314 15315 if (logical_operand (op2, mode)) 15316 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2))); 15317 else 15318 emit_insn (gen_rtx_SET (scratch, 15319 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2)))); 15320 15321 return scratch; 15322} 15323 15324/* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that 15325 requires this. The result is mode MODE. */ 15326rtx 15327rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x) 15328{ 15329 rtx cond[2]; 15330 int n = 0; 15331 if (code == LTGT || code == LE || code == UNLT) 15332 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx); 15333 if (code == LTGT || code == GE || code == UNGT) 15334 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx); 15335 if (code == LE || code == GE || code == UNEQ) 15336 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx); 15337 if (code == UNLT || code == UNGT || code == UNEQ) 15338 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx); 15339 15340 gcc_assert (n == 2); 15341 15342 rtx cc = gen_reg_rtx (CCEQmode); 15343 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]); 15344 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x)); 15345 15346 return cc; 15347} 15348 15349void 15350rs6000_emit_sCOND (machine_mode mode, rtx operands[]) 15351{ 15352 rtx condition_rtx = rs6000_generate_compare (operands[1], mode); 15353 rtx_code cond_code = GET_CODE (condition_rtx); 15354 15355 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode) 15356 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW)) 15357 ; 15358 else if (cond_code == NE 15359 || cond_code == GE || cond_code == LE 15360 || cond_code == GEU || cond_code == LEU 15361 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) 15362 { 15363 rtx not_result = gen_reg_rtx (CCEQmode); 15364 rtx not_op, rev_cond_rtx; 15365 machine_mode cc_mode; 15366 15367 cc_mode = GET_MODE (XEXP (condition_rtx, 0)); 15368 15369 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), 15370 SImode, XEXP (condition_rtx, 0), const0_rtx); 15371 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); 15372 emit_insn (gen_rtx_SET (not_result, not_op)); 15373 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); 15374 } 15375 15376 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0)); 15377 if (op_mode == VOIDmode) 15378 op_mode = GET_MODE (XEXP (operands[1], 1)); 15379 15380 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode))) 15381 { 15382 PUT_MODE (condition_rtx, DImode); 15383 convert_move (operands[0], condition_rtx, 0); 15384 } 15385 else 15386 { 15387 PUT_MODE (condition_rtx, SImode); 15388 emit_insn (gen_rtx_SET (operands[0], condition_rtx)); 15389 } 15390} 15391 15392/* Emit a branch of kind CODE to location LOC. */ 15393 15394void 15395rs6000_emit_cbranch (machine_mode mode, rtx operands[]) 15396{ 15397 rtx condition_rtx = rs6000_generate_compare (operands[0], mode); 15398 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); 15399 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx); 15400 emit_jump_insn (gen_rtx_SET (pc_rtx, ite)); 15401} 15402 15403/* Return the string to output a conditional branch to LABEL, which is 15404 the operand template of the label, or NULL if the branch is really a 15405 conditional return. 15406 15407 OP is the conditional expression. XEXP (OP, 0) is assumed to be a 15408 condition code register and its mode specifies what kind of 15409 comparison we made. 15410 15411 REVERSED is nonzero if we should reverse the sense of the comparison. 15412 15413 INSN is the insn. */ 15414 15415char * 15416output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn) 15417{ 15418 static char string[64]; 15419 enum rtx_code code = GET_CODE (op); 15420 rtx cc_reg = XEXP (op, 0); 15421 machine_mode mode = GET_MODE (cc_reg); 15422 int cc_regno = REGNO (cc_reg) - CR0_REGNO; 15423 int need_longbranch = label != NULL && get_attr_length (insn) == 8; 15424 int really_reversed = reversed ^ need_longbranch; 15425 char *s = string; 15426 const char *ccode; 15427 const char *pred; 15428 rtx note; 15429 15430 validate_condition_mode (code, mode); 15431 15432 /* Work out which way this really branches. We could use 15433 reverse_condition_maybe_unordered here always but this 15434 makes the resulting assembler clearer. */ 15435 if (really_reversed) 15436 { 15437 /* Reversal of FP compares takes care -- an ordered compare 15438 becomes an unordered compare and vice versa. */ 15439 if (mode == CCFPmode) 15440 code = reverse_condition_maybe_unordered (code); 15441 else 15442 code = reverse_condition (code); 15443 } 15444 15445 switch (code) 15446 { 15447 /* Not all of these are actually distinct opcodes, but 15448 we distinguish them for clarity of the resulting assembler. */ 15449 case NE: case LTGT: 15450 ccode = "ne"; break; 15451 case EQ: case UNEQ: 15452 ccode = "eq"; break; 15453 case GE: case GEU: 15454 ccode = "ge"; break; 15455 case GT: case GTU: case UNGT: 15456 ccode = "gt"; break; 15457 case LE: case LEU: 15458 ccode = "le"; break; 15459 case LT: case LTU: case UNLT: 15460 ccode = "lt"; break; 15461 case UNORDERED: ccode = "un"; break; 15462 case ORDERED: ccode = "nu"; break; 15463 case UNGE: ccode = "nl"; break; 15464 case UNLE: ccode = "ng"; break; 15465 default: 15466 gcc_unreachable (); 15467 } 15468 15469 /* Maybe we have a guess as to how likely the branch is. */ 15470 pred = ""; 15471 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX); 15472 if (note != NULL_RTX) 15473 { 15474 /* PROB is the difference from 50%. */ 15475 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0)) 15476 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2; 15477 15478 /* Only hint for highly probable/improbable branches on newer cpus when 15479 we have real profile data, as static prediction overrides processor 15480 dynamic prediction. For older cpus we may as well always hint, but 15481 assume not taken for branches that are very close to 50% as a 15482 mispredicted taken branch is more expensive than a 15483 mispredicted not-taken branch. */ 15484 if (rs6000_always_hint 15485 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48 15486 && (profile_status_for_fn (cfun) != PROFILE_GUESSED) 15487 && br_prob_note_reliable_p (note))) 15488 { 15489 if (abs (prob) > REG_BR_PROB_BASE / 20 15490 && ((prob > 0) ^ need_longbranch)) 15491 pred = "+"; 15492 else 15493 pred = "-"; 15494 } 15495 } 15496 15497 if (label == NULL) 15498 s += sprintf (s, "b%slr%s ", ccode, pred); 15499 else 15500 s += sprintf (s, "b%s%s ", ccode, pred); 15501 15502 /* We need to escape any '%' characters in the reg_names string. 15503 Assume they'd only be the first character.... */ 15504 if (reg_names[cc_regno + CR0_REGNO][0] == '%') 15505 *s++ = '%'; 15506 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]); 15507 15508 if (label != NULL) 15509 { 15510 /* If the branch distance was too far, we may have to use an 15511 unconditional branch to go the distance. */ 15512 if (need_longbranch) 15513 s += sprintf (s, ",$+8\n\tb %s", label); 15514 else 15515 s += sprintf (s, ",%s", label); 15516 } 15517 15518 return string; 15519} 15520 15521/* Return insn for VSX or Altivec comparisons. */ 15522 15523static rtx 15524rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) 15525{ 15526 rtx mask; 15527 machine_mode mode = GET_MODE (op0); 15528 15529 switch (code) 15530 { 15531 default: 15532 break; 15533 15534 case GE: 15535 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 15536 return NULL_RTX; 15537 /* FALLTHRU */ 15538 15539 case EQ: 15540 case GT: 15541 case GTU: 15542 case ORDERED: 15543 case UNORDERED: 15544 case UNEQ: 15545 case LTGT: 15546 mask = gen_reg_rtx (mode); 15547 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1))); 15548 return mask; 15549 } 15550 15551 return NULL_RTX; 15552} 15553 15554/* Emit vector compare for operands OP0 and OP1 using code RCODE. 15555 DMODE is expected destination mode. This is a recursive function. */ 15556 15557static rtx 15558rs6000_emit_vector_compare (enum rtx_code rcode, 15559 rtx op0, rtx op1, 15560 machine_mode dmode) 15561{ 15562 rtx mask; 15563 bool swap_operands = false; 15564 bool try_again = false; 15565 15566 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); 15567 gcc_assert (GET_MODE (op0) == GET_MODE (op1)); 15568 15569 /* See if the comparison works as is. */ 15570 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); 15571 if (mask) 15572 return mask; 15573 15574 switch (rcode) 15575 { 15576 case LT: 15577 rcode = GT; 15578 swap_operands = true; 15579 try_again = true; 15580 break; 15581 case LTU: 15582 rcode = GTU; 15583 swap_operands = true; 15584 try_again = true; 15585 break; 15586 case NE: 15587 case UNLE: 15588 case UNLT: 15589 case UNGE: 15590 case UNGT: 15591 /* Invert condition and try again. 15592 e.g., A != B becomes ~(A==B). */ 15593 { 15594 enum rtx_code rev_code; 15595 enum insn_code nor_code; 15596 rtx mask2; 15597 15598 rev_code = reverse_condition_maybe_unordered (rcode); 15599 if (rev_code == UNKNOWN) 15600 return NULL_RTX; 15601 15602 nor_code = optab_handler (one_cmpl_optab, dmode); 15603 if (nor_code == CODE_FOR_nothing) 15604 return NULL_RTX; 15605 15606 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); 15607 if (!mask2) 15608 return NULL_RTX; 15609 15610 mask = gen_reg_rtx (dmode); 15611 emit_insn (GEN_FCN (nor_code) (mask, mask2)); 15612 return mask; 15613 } 15614 break; 15615 case GE: 15616 case GEU: 15617 case LE: 15618 case LEU: 15619 /* Try GT/GTU/LT/LTU OR EQ */ 15620 { 15621 rtx c_rtx, eq_rtx; 15622 enum insn_code ior_code; 15623 enum rtx_code new_code; 15624 15625 switch (rcode) 15626 { 15627 case GE: 15628 new_code = GT; 15629 break; 15630 15631 case GEU: 15632 new_code = GTU; 15633 break; 15634 15635 case LE: 15636 new_code = LT; 15637 break; 15638 15639 case LEU: 15640 new_code = LTU; 15641 break; 15642 15643 default: 15644 gcc_unreachable (); 15645 } 15646 15647 ior_code = optab_handler (ior_optab, dmode); 15648 if (ior_code == CODE_FOR_nothing) 15649 return NULL_RTX; 15650 15651 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); 15652 if (!c_rtx) 15653 return NULL_RTX; 15654 15655 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); 15656 if (!eq_rtx) 15657 return NULL_RTX; 15658 15659 mask = gen_reg_rtx (dmode); 15660 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); 15661 return mask; 15662 } 15663 break; 15664 default: 15665 return NULL_RTX; 15666 } 15667 15668 if (try_again) 15669 { 15670 if (swap_operands) 15671 std::swap (op0, op1); 15672 15673 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); 15674 if (mask) 15675 return mask; 15676 } 15677 15678 /* You only get two chances. */ 15679 return NULL_RTX; 15680} 15681 15682/* Emit vector conditional expression. DEST is destination. OP_TRUE and 15683 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two 15684 operands for the relation operation COND. */ 15685 15686int 15687rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, 15688 rtx cond, rtx cc_op0, rtx cc_op1) 15689{ 15690 machine_mode dest_mode = GET_MODE (dest); 15691 machine_mode mask_mode = GET_MODE (cc_op0); 15692 enum rtx_code rcode = GET_CODE (cond); 15693 rtx mask; 15694 bool invert_move = false; 15695 15696 if (VECTOR_UNIT_NONE_P (dest_mode)) 15697 return 0; 15698 15699 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode) 15700 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode)); 15701 15702 switch (rcode) 15703 { 15704 /* Swap operands if we can, and fall back to doing the operation as 15705 specified, and doing a NOR to invert the test. */ 15706 case NE: 15707 case UNLE: 15708 case UNLT: 15709 case UNGE: 15710 case UNGT: 15711 /* Invert condition and try again. 15712 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ 15713 invert_move = true; 15714 rcode = reverse_condition_maybe_unordered (rcode); 15715 if (rcode == UNKNOWN) 15716 return 0; 15717 break; 15718 15719 case GE: 15720 case LE: 15721 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT) 15722 { 15723 /* Invert condition to avoid compound test. */ 15724 invert_move = true; 15725 rcode = reverse_condition (rcode); 15726 } 15727 break; 15728 15729 case GTU: 15730 case GEU: 15731 case LTU: 15732 case LEU: 15733 15734 /* Invert condition to avoid compound test if necessary. */ 15735 if (rcode == GEU || rcode == LEU) 15736 { 15737 invert_move = true; 15738 rcode = reverse_condition (rcode); 15739 } 15740 break; 15741 15742 default: 15743 break; 15744 } 15745 15746 /* Get the vector mask for the given relational operations. */ 15747 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode); 15748 15749 if (!mask) 15750 return 0; 15751 15752 if (mask_mode != dest_mode) 15753 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0); 15754 15755 if (invert_move) 15756 std::swap (op_true, op_false); 15757 15758 /* Optimize vec1 == vec2, to know the mask generates -1/0. */ 15759 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT 15760 && (GET_CODE (op_true) == CONST_VECTOR 15761 || GET_CODE (op_false) == CONST_VECTOR)) 15762 { 15763 rtx constant_0 = CONST0_RTX (dest_mode); 15764 rtx constant_m1 = CONSTM1_RTX (dest_mode); 15765 15766 if (op_true == constant_m1 && op_false == constant_0) 15767 { 15768 emit_move_insn (dest, mask); 15769 return 1; 15770 } 15771 15772 else if (op_true == constant_0 && op_false == constant_m1) 15773 { 15774 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask))); 15775 return 1; 15776 } 15777 15778 /* If we can't use the vector comparison directly, perhaps we can use 15779 the mask for the true or false fields, instead of loading up a 15780 constant. */ 15781 if (op_true == constant_m1) 15782 op_true = mask; 15783 15784 if (op_false == constant_0) 15785 op_false = mask; 15786 } 15787 15788 if (!REG_P (op_true) && !SUBREG_P (op_true)) 15789 op_true = force_reg (dest_mode, op_true); 15790 15791 if (!REG_P (op_false) && !SUBREG_P (op_false)) 15792 op_false = force_reg (dest_mode, op_false); 15793 15794 rtx tmp = gen_rtx_IOR (dest_mode, 15795 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask), 15796 op_false), 15797 gen_rtx_AND (dest_mode, mask, op_true)); 15798 emit_insn (gen_rtx_SET (dest, tmp)); 15799 return 1; 15800} 15801 15802/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a 15803 maximum or minimum with "C" semantics. 15804 15805 Unless you use -ffast-math, you can't use these instructions to replace 15806 conditions that implicitly reverse the condition because the comparison 15807 might generate a NaN or signed zer0. 15808 15809 I.e. the following can be replaced all of the time 15810 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp 15811 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp 15812 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp 15813 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp 15814 15815 The following can be replaced only if -ffast-math is used: 15816 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp 15817 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp 15818 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp 15819 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp 15820 15821 Move TRUE_COND to DEST if OP of the operands of the last comparison is 15822 nonzero/true, FALSE_COND if it is zero/false. 15823 15824 Return false if we can't generate the appropriate minimum or maximum, and 15825 true if we can did the minimum or maximum. */ 15826 15827static bool 15828rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15829{ 15830 enum rtx_code code = GET_CODE (op); 15831 rtx op0 = XEXP (op, 0); 15832 rtx op1 = XEXP (op, 1); 15833 machine_mode compare_mode = GET_MODE (op0); 15834 machine_mode result_mode = GET_MODE (dest); 15835 15836 if (result_mode != compare_mode) 15837 return false; 15838 15839 /* See the comments of this function, it simply expects GE/GT/LE/LT in 15840 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE, 15841 we need to do the reversions first to make the following checks 15842 support fewer cases, like: 15843 15844 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1; 15845 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1; 15846 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1; 15847 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1; 15848 15849 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed 15850 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not 15851 have to check for fast-math or the like. */ 15852 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT) 15853 { 15854 code = reverse_condition_maybe_unordered (code); 15855 std::swap (true_cond, false_cond); 15856 } 15857 15858 bool max_p; 15859 if (code == GE || code == GT) 15860 max_p = true; 15861 else if (code == LE || code == LT) 15862 max_p = false; 15863 else 15864 return false; 15865 15866 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond)) 15867 ; 15868 15869 /* Only when NaNs and signed-zeros are not in effect, smax could be 15870 used for `op0 < op1 ? op1 : op0`, and smin could be used for 15871 `op0 > op1 ? op1 : op0`. */ 15872 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond) 15873 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode)) 15874 max_p = !max_p; 15875 15876 else 15877 return false; 15878 15879 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1); 15880 return true; 15881} 15882 15883/* Possibly emit a floating point conditional move by generating a compare that 15884 sets a mask instruction and a XXSEL select instruction. 15885 15886 Move TRUE_COND to DEST if OP of the operands of the last comparison is 15887 nonzero/true, FALSE_COND if it is zero/false. 15888 15889 Return false if the operation cannot be generated, and true if we could 15890 generate the instruction. */ 15891 15892static bool 15893rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15894{ 15895 enum rtx_code code = GET_CODE (op); 15896 rtx op0 = XEXP (op, 0); 15897 rtx op1 = XEXP (op, 1); 15898 machine_mode compare_mode = GET_MODE (op0); 15899 machine_mode result_mode = GET_MODE (dest); 15900 rtx compare_rtx; 15901 rtx cmove_rtx; 15902 rtx clobber_rtx; 15903 15904 if (!can_create_pseudo_p ()) 15905 return 0; 15906 15907 /* We allow the comparison to be either SFmode/DFmode and the true/false 15908 condition to be either SFmode/DFmode. I.e. we allow: 15909 15910 float a, b; 15911 double c, d, r; 15912 15913 r = (a == b) ? c : d; 15914 15915 and: 15916 15917 double a, b; 15918 float c, d, r; 15919 15920 r = (a == b) ? c : d; 15921 15922 but we don't allow intermixing the IEEE 128-bit floating point types with 15923 the 32/64-bit scalar types. */ 15924 15925 if (!(compare_mode == result_mode 15926 || (compare_mode == SFmode && result_mode == DFmode) 15927 || (compare_mode == DFmode && result_mode == SFmode))) 15928 return false; 15929 15930 switch (code) 15931 { 15932 case EQ: 15933 case GE: 15934 case GT: 15935 break; 15936 15937 case NE: 15938 case LT: 15939 case LE: 15940 code = swap_condition (code); 15941 std::swap (op0, op1); 15942 break; 15943 15944 default: 15945 return false; 15946 } 15947 15948 /* Generate: [(parallel [(set (dest) 15949 (if_then_else (op (cmp1) (cmp2)) 15950 (true) 15951 (false))) 15952 (clobber (scratch))])]. */ 15953 15954 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); 15955 cmove_rtx = gen_rtx_SET (dest, 15956 gen_rtx_IF_THEN_ELSE (result_mode, 15957 compare_rtx, 15958 true_cond, 15959 false_cond)); 15960 15961 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); 15962 emit_insn (gen_rtx_PARALLEL (VOIDmode, 15963 gen_rtvec (2, cmove_rtx, clobber_rtx))); 15964 15965 return true; 15966} 15967 15968/* Helper function to return true if the target has instructions to do a 15969 compare and set mask instruction that can be used with XXSEL to implement a 15970 conditional move. It is also assumed that such a target also supports the 15971 "C" minimum and maximum instructions. */ 15972 15973static bool 15974have_compare_and_set_mask (machine_mode mode) 15975{ 15976 switch (mode) 15977 { 15978 case E_SFmode: 15979 case E_DFmode: 15980 return TARGET_P9_MINMAX; 15981 15982 case E_KFmode: 15983 case E_TFmode: 15984 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode); 15985 15986 default: 15987 break; 15988 } 15989 15990 return false; 15991} 15992 15993/* Emit a conditional move: move TRUE_COND to DEST if OP of the 15994 operands of the last comparison is nonzero/true, FALSE_COND if it 15995 is zero/false. Return 0 if the hardware has no such operation. */ 15996 15997bool 15998rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15999{ 16000 enum rtx_code code = GET_CODE (op); 16001 rtx op0 = XEXP (op, 0); 16002 rtx op1 = XEXP (op, 1); 16003 machine_mode compare_mode = GET_MODE (op0); 16004 machine_mode result_mode = GET_MODE (dest); 16005 rtx temp; 16006 bool is_against_zero; 16007 16008 /* These modes should always match. */ 16009 if (GET_MODE (op1) != compare_mode 16010 /* In the isel case however, we can use a compare immediate, so 16011 op1 may be a small constant. */ 16012 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode))) 16013 return false; 16014 if (GET_MODE (true_cond) != result_mode) 16015 return false; 16016 if (GET_MODE (false_cond) != result_mode) 16017 return false; 16018 16019 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask 16020 instructions. */ 16021 if (have_compare_and_set_mask (compare_mode) 16022 && have_compare_and_set_mask (result_mode)) 16023 { 16024 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond)) 16025 return true; 16026 16027 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond)) 16028 return true; 16029 } 16030 16031 /* Don't allow using floating point comparisons for integer results for 16032 now. */ 16033 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) 16034 return false; 16035 16036 /* First, work out if the hardware can do this at all, or 16037 if it's too slow.... */ 16038 if (!FLOAT_MODE_P (compare_mode)) 16039 { 16040 if (TARGET_ISEL) 16041 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); 16042 return false; 16043 } 16044 16045 is_against_zero = op1 == CONST0_RTX (compare_mode); 16046 16047 /* A floating-point subtract might overflow, underflow, or produce 16048 an inexact result, thus changing the floating-point flags, so it 16049 can't be generated if we care about that. It's safe if one side 16050 of the construct is zero, since then no subtract will be 16051 generated. */ 16052 if (SCALAR_FLOAT_MODE_P (compare_mode) 16053 && flag_trapping_math && ! is_against_zero) 16054 return false; 16055 16056 /* Eliminate half of the comparisons by switching operands, this 16057 makes the remaining code simpler. */ 16058 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE 16059 || code == LTGT || code == LT || code == UNLE) 16060 { 16061 code = reverse_condition_maybe_unordered (code); 16062 temp = true_cond; 16063 true_cond = false_cond; 16064 false_cond = temp; 16065 } 16066 16067 /* UNEQ and LTGT take four instructions for a comparison with zero, 16068 it'll probably be faster to use a branch here too. */ 16069 if (code == UNEQ && HONOR_NANS (compare_mode)) 16070 return false; 16071 16072 /* We're going to try to implement comparisons by performing 16073 a subtract, then comparing against zero. Unfortunately, 16074 Inf - Inf is NaN which is not zero, and so if we don't 16075 know that the operand is finite and the comparison 16076 would treat EQ different to UNORDERED, we can't do it. */ 16077 if (HONOR_INFINITIES (compare_mode) 16078 && code != GT && code != UNGE 16079 && (!CONST_DOUBLE_P (op1) 16080 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1))) 16081 /* Constructs of the form (a OP b ? a : b) are safe. */ 16082 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond)) 16083 || (! rtx_equal_p (op0, true_cond) 16084 && ! rtx_equal_p (op1, true_cond)))) 16085 return false; 16086 16087 /* At this point we know we can use fsel. */ 16088 16089 /* Don't allow compare_mode other than SFmode or DFmode, for others there 16090 is no fsel instruction. */ 16091 if (compare_mode != SFmode && compare_mode != DFmode) 16092 return false; 16093 16094 /* Reduce the comparison to a comparison against zero. */ 16095 if (! is_against_zero) 16096 { 16097 temp = gen_reg_rtx (compare_mode); 16098 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1))); 16099 op0 = temp; 16100 op1 = CONST0_RTX (compare_mode); 16101 } 16102 16103 /* If we don't care about NaNs we can reduce some of the comparisons 16104 down to faster ones. */ 16105 if (! HONOR_NANS (compare_mode)) 16106 switch (code) 16107 { 16108 case GT: 16109 code = LE; 16110 temp = true_cond; 16111 true_cond = false_cond; 16112 false_cond = temp; 16113 break; 16114 case UNGE: 16115 code = GE; 16116 break; 16117 case UNEQ: 16118 code = EQ; 16119 break; 16120 default: 16121 break; 16122 } 16123 16124 /* Now, reduce everything down to a GE. */ 16125 switch (code) 16126 { 16127 case GE: 16128 break; 16129 16130 case LE: 16131 temp = gen_reg_rtx (compare_mode); 16132 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 16133 op0 = temp; 16134 break; 16135 16136 case ORDERED: 16137 temp = gen_reg_rtx (compare_mode); 16138 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0))); 16139 op0 = temp; 16140 break; 16141 16142 case EQ: 16143 temp = gen_reg_rtx (compare_mode); 16144 emit_insn (gen_rtx_SET (temp, 16145 gen_rtx_NEG (compare_mode, 16146 gen_rtx_ABS (compare_mode, op0)))); 16147 op0 = temp; 16148 break; 16149 16150 case UNGE: 16151 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */ 16152 temp = gen_reg_rtx (result_mode); 16153 emit_insn (gen_rtx_SET (temp, 16154 gen_rtx_IF_THEN_ELSE (result_mode, 16155 gen_rtx_GE (VOIDmode, 16156 op0, op1), 16157 true_cond, false_cond))); 16158 false_cond = true_cond; 16159 true_cond = temp; 16160 16161 temp = gen_reg_rtx (compare_mode); 16162 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 16163 op0 = temp; 16164 break; 16165 16166 case GT: 16167 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */ 16168 temp = gen_reg_rtx (result_mode); 16169 emit_insn (gen_rtx_SET (temp, 16170 gen_rtx_IF_THEN_ELSE (result_mode, 16171 gen_rtx_GE (VOIDmode, 16172 op0, op1), 16173 true_cond, false_cond))); 16174 true_cond = false_cond; 16175 false_cond = temp; 16176 16177 temp = gen_reg_rtx (compare_mode); 16178 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 16179 op0 = temp; 16180 break; 16181 16182 default: 16183 gcc_unreachable (); 16184 } 16185 16186 emit_insn (gen_rtx_SET (dest, 16187 gen_rtx_IF_THEN_ELSE (result_mode, 16188 gen_rtx_GE (VOIDmode, 16189 op0, op1), 16190 true_cond, false_cond))); 16191 return true; 16192} 16193 16194/* Same as above, but for ints (isel). */ 16195 16196bool 16197rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 16198{ 16199 rtx condition_rtx, cr; 16200 machine_mode mode = GET_MODE (dest); 16201 enum rtx_code cond_code; 16202 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx); 16203 bool signedp; 16204 16205 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) 16206 return false; 16207 16208 /* PR104335: We now need to expect CC-mode "comparisons" 16209 coming from ifcvt. The following code expects proper 16210 comparisons so better abort here. */ 16211 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC) 16212 return false; 16213 16214 /* We still have to do the compare, because isel doesn't do a 16215 compare, it just looks at the CRx bits set by a previous compare 16216 instruction. */ 16217 condition_rtx = rs6000_generate_compare (op, mode); 16218 cond_code = GET_CODE (condition_rtx); 16219 cr = XEXP (condition_rtx, 0); 16220 signedp = GET_MODE (cr) == CCmode; 16221 16222 isel_func = (mode == SImode 16223 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) 16224 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); 16225 16226 switch (cond_code) 16227 { 16228 case LT: case GT: case LTU: case GTU: case EQ: 16229 /* isel handles these directly. */ 16230 break; 16231 16232 default: 16233 /* We need to swap the sense of the comparison. */ 16234 { 16235 std::swap (false_cond, true_cond); 16236 PUT_CODE (condition_rtx, reverse_condition (cond_code)); 16237 } 16238 break; 16239 } 16240 16241 false_cond = force_reg (mode, false_cond); 16242 if (true_cond != const0_rtx) 16243 true_cond = force_reg (mode, true_cond); 16244 16245 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr)); 16246 16247 return true; 16248} 16249 16250void 16251rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) 16252{ 16253 machine_mode mode = GET_MODE (op0); 16254 enum rtx_code c; 16255 rtx target; 16256 16257 /* VSX/altivec have direct min/max insns. */ 16258 if ((code == SMAX || code == SMIN) 16259 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) 16260 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)) 16261 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)))) 16262 { 16263 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); 16264 return; 16265 } 16266 16267 if (code == SMAX || code == SMIN) 16268 c = GE; 16269 else 16270 c = GEU; 16271 16272 if (code == SMAX || code == UMAX) 16273 target = emit_conditional_move (dest, { c, op0, op1, mode }, 16274 op0, op1, mode, 0); 16275 else 16276 target = emit_conditional_move (dest, { c, op0, op1, mode }, 16277 op1, op0, mode, 0); 16278 gcc_assert (target); 16279 if (target != dest) 16280 emit_move_insn (dest, target); 16281} 16282 16283/* A subroutine of the atomic operation splitters. Jump to LABEL if 16284 COND is true. Mark the jump as unlikely to be taken. */ 16285 16286static void 16287emit_unlikely_jump (rtx cond, rtx label) 16288{ 16289 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); 16290 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); 16291 add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); 16292} 16293 16294/* A subroutine of the atomic operation splitters. Emit a load-locked 16295 instruction in MODE. For QI/HImode, possibly use a pattern than includes 16296 the zero_extend operation. */ 16297 16298static void 16299emit_load_locked (machine_mode mode, rtx reg, rtx mem) 16300{ 16301 rtx (*fn) (rtx, rtx) = NULL; 16302 16303 switch (mode) 16304 { 16305 case E_QImode: 16306 fn = gen_load_lockedqi; 16307 break; 16308 case E_HImode: 16309 fn = gen_load_lockedhi; 16310 break; 16311 case E_SImode: 16312 if (GET_MODE (mem) == QImode) 16313 fn = gen_load_lockedqi_si; 16314 else if (GET_MODE (mem) == HImode) 16315 fn = gen_load_lockedhi_si; 16316 else 16317 fn = gen_load_lockedsi; 16318 break; 16319 case E_DImode: 16320 fn = gen_load_lockeddi; 16321 break; 16322 case E_TImode: 16323 fn = gen_load_lockedti; 16324 break; 16325 default: 16326 gcc_unreachable (); 16327 } 16328 emit_insn (fn (reg, mem)); 16329} 16330 16331/* A subroutine of the atomic operation splitters. Emit a store-conditional 16332 instruction in MODE. */ 16333 16334static void 16335emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) 16336{ 16337 rtx (*fn) (rtx, rtx, rtx) = NULL; 16338 16339 switch (mode) 16340 { 16341 case E_QImode: 16342 fn = gen_store_conditionalqi; 16343 break; 16344 case E_HImode: 16345 fn = gen_store_conditionalhi; 16346 break; 16347 case E_SImode: 16348 fn = gen_store_conditionalsi; 16349 break; 16350 case E_DImode: 16351 fn = gen_store_conditionaldi; 16352 break; 16353 case E_TImode: 16354 fn = gen_store_conditionalti; 16355 break; 16356 default: 16357 gcc_unreachable (); 16358 } 16359 16360 /* Emit sync before stwcx. to address PPC405 Erratum. */ 16361 if (PPC405_ERRATUM77) 16362 emit_insn (gen_hwsync ()); 16363 16364 emit_insn (fn (res, mem, val)); 16365} 16366 16367/* Expand barriers before and after a load_locked/store_cond sequence. */ 16368 16369static rtx 16370rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) 16371{ 16372 rtx addr = XEXP (mem, 0); 16373 16374 if (!legitimate_indirect_address_p (addr, reload_completed) 16375 && !legitimate_indexed_address_p (addr, reload_completed)) 16376 { 16377 addr = force_reg (Pmode, addr); 16378 mem = replace_equiv_address_nv (mem, addr); 16379 } 16380 16381 switch (model) 16382 { 16383 case MEMMODEL_RELAXED: 16384 case MEMMODEL_CONSUME: 16385 case MEMMODEL_ACQUIRE: 16386 break; 16387 case MEMMODEL_RELEASE: 16388 case MEMMODEL_ACQ_REL: 16389 emit_insn (gen_lwsync ()); 16390 break; 16391 case MEMMODEL_SEQ_CST: 16392 emit_insn (gen_hwsync ()); 16393 break; 16394 default: 16395 gcc_unreachable (); 16396 } 16397 return mem; 16398} 16399 16400static void 16401rs6000_post_atomic_barrier (enum memmodel model) 16402{ 16403 switch (model) 16404 { 16405 case MEMMODEL_RELAXED: 16406 case MEMMODEL_CONSUME: 16407 case MEMMODEL_RELEASE: 16408 break; 16409 case MEMMODEL_ACQUIRE: 16410 case MEMMODEL_ACQ_REL: 16411 case MEMMODEL_SEQ_CST: 16412 emit_insn (gen_isync ()); 16413 break; 16414 default: 16415 gcc_unreachable (); 16416 } 16417} 16418 16419/* A subroutine of the various atomic expanders. For sub-word operations, 16420 we must adjust things to operate on SImode. Given the original MEM, 16421 return a new aligned memory. Also build and return the quantities by 16422 which to shift and mask. */ 16423 16424static rtx 16425rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) 16426{ 16427 rtx addr, align, shift, mask, mem; 16428 HOST_WIDE_INT shift_mask; 16429 machine_mode mode = GET_MODE (orig_mem); 16430 16431 /* For smaller modes, we have to implement this via SImode. */ 16432 shift_mask = (mode == QImode ? 0x18 : 0x10); 16433 16434 addr = XEXP (orig_mem, 0); 16435 addr = force_reg (GET_MODE (addr), addr); 16436 16437 /* Aligned memory containing subword. Generate a new memory. We 16438 do not want any of the existing MEM_ATTR data, as we're now 16439 accessing memory outside the original object. */ 16440 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4), 16441 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16442 mem = gen_rtx_MEM (SImode, align); 16443 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 16444 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 16445 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 16446 16447 /* Shift amount for subword relative to aligned word. */ 16448 shift = gen_reg_rtx (SImode); 16449 addr = gen_lowpart (SImode, addr); 16450 rtx tmp = gen_reg_rtx (SImode); 16451 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3))); 16452 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask))); 16453 if (BYTES_BIG_ENDIAN) 16454 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), 16455 shift, 1, OPTAB_LIB_WIDEN); 16456 *pshift = shift; 16457 16458 /* Mask for insertion. */ 16459 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)), 16460 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN); 16461 *pmask = mask; 16462 16463 return mem; 16464} 16465 16466/* A subroutine of the various atomic expanders. For sub-word operands, 16467 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */ 16468 16469static rtx 16470rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask) 16471{ 16472 rtx x; 16473 16474 x = gen_reg_rtx (SImode); 16475 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode, 16476 gen_rtx_NOT (SImode, mask), 16477 oldval))); 16478 16479 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN); 16480 16481 return x; 16482} 16483 16484/* A subroutine of the various atomic expanders. For sub-word operands, 16485 extract WIDE to NARROW via SHIFT. */ 16486 16487static void 16488rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift) 16489{ 16490 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift, 16491 wide, 1, OPTAB_LIB_WIDEN); 16492 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide)); 16493} 16494 16495/* Expand an atomic compare and swap operation. */ 16496 16497void 16498rs6000_expand_atomic_compare_and_swap (rtx operands[]) 16499{ 16500 rtx boolval, retval, mem, oldval, newval, cond; 16501 rtx label1, label2, x, mask, shift; 16502 machine_mode mode, orig_mode; 16503 enum memmodel mod_s, mod_f; 16504 bool is_weak; 16505 16506 boolval = operands[0]; 16507 retval = operands[1]; 16508 mem = operands[2]; 16509 oldval = operands[3]; 16510 newval = operands[4]; 16511 is_weak = (INTVAL (operands[5]) != 0); 16512 mod_s = memmodel_base (INTVAL (operands[6])); 16513 mod_f = memmodel_base (INTVAL (operands[7])); 16514 orig_mode = mode = GET_MODE (mem); 16515 16516 mask = shift = NULL_RTX; 16517 if (mode == QImode || mode == HImode) 16518 { 16519 /* Before power8, we didn't have access to lbarx/lharx, so generate a 16520 lwarx and shift/mask operations. With power8, we need to do the 16521 comparison in SImode, but the store is still done in QI/HImode. */ 16522 oldval = convert_modes (SImode, mode, oldval, 1); 16523 16524 if (!TARGET_SYNC_HI_QI) 16525 { 16526 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 16527 16528 /* Shift and mask OLDVAL into position with the word. */ 16529 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, 16530 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16531 16532 /* Shift and mask NEWVAL into position within the word. */ 16533 newval = convert_modes (SImode, mode, newval, 1); 16534 newval = expand_simple_binop (SImode, ASHIFT, newval, shift, 16535 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16536 } 16537 16538 /* Prepare to adjust the return value. */ 16539 retval = gen_reg_rtx (SImode); 16540 mode = SImode; 16541 } 16542 else if (reg_overlap_mentioned_p (retval, oldval)) 16543 oldval = copy_to_reg (oldval); 16544 16545 if (mode != TImode && !reg_or_short_operand (oldval, mode)) 16546 oldval = copy_to_mode_reg (mode, oldval); 16547 16548 if (reg_overlap_mentioned_p (retval, newval)) 16549 newval = copy_to_reg (newval); 16550 16551 mem = rs6000_pre_atomic_barrier (mem, mod_s); 16552 16553 label1 = NULL_RTX; 16554 if (!is_weak) 16555 { 16556 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 16557 emit_label (XEXP (label1, 0)); 16558 } 16559 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 16560 16561 emit_load_locked (mode, retval, mem); 16562 16563 x = retval; 16564 if (mask) 16565 x = expand_simple_binop (SImode, AND, retval, mask, 16566 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16567 16568 cond = gen_reg_rtx (CCmode); 16569 /* If we have TImode, synthesize a comparison. */ 16570 if (mode != TImode) 16571 x = gen_rtx_COMPARE (CCmode, x, oldval); 16572 else 16573 { 16574 rtx xor1_result = gen_reg_rtx (DImode); 16575 rtx xor2_result = gen_reg_rtx (DImode); 16576 rtx or_result = gen_reg_rtx (DImode); 16577 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); 16578 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); 16579 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); 16580 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); 16581 16582 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); 16583 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); 16584 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); 16585 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); 16586 } 16587 16588 emit_insn (gen_rtx_SET (cond, x)); 16589 16590 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 16591 emit_unlikely_jump (x, label2); 16592 16593 x = newval; 16594 if (mask) 16595 x = rs6000_mask_atomic_subword (retval, newval, mask); 16596 16597 emit_store_conditional (orig_mode, cond, mem, x); 16598 16599 if (!is_weak) 16600 { 16601 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 16602 emit_unlikely_jump (x, label1); 16603 } 16604 16605 if (!is_mm_relaxed (mod_f)) 16606 emit_label (XEXP (label2, 0)); 16607 16608 rs6000_post_atomic_barrier (mod_s); 16609 16610 if (is_mm_relaxed (mod_f)) 16611 emit_label (XEXP (label2, 0)); 16612 16613 if (shift) 16614 rs6000_finish_atomic_subword (operands[1], retval, shift); 16615 else if (mode != GET_MODE (operands[1])) 16616 convert_move (operands[1], retval, 1); 16617 16618 /* In all cases, CR0 contains EQ on success, and NE on failure. */ 16619 x = gen_rtx_EQ (SImode, cond, const0_rtx); 16620 emit_insn (gen_rtx_SET (boolval, x)); 16621} 16622 16623/* Expand an atomic exchange operation. */ 16624 16625void 16626rs6000_expand_atomic_exchange (rtx operands[]) 16627{ 16628 rtx retval, mem, val, cond; 16629 machine_mode mode; 16630 enum memmodel model; 16631 rtx label, x, mask, shift; 16632 16633 retval = operands[0]; 16634 mem = operands[1]; 16635 val = operands[2]; 16636 model = memmodel_base (INTVAL (operands[3])); 16637 mode = GET_MODE (mem); 16638 16639 mask = shift = NULL_RTX; 16640 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) 16641 { 16642 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 16643 16644 /* Shift and mask VAL into position with the word. */ 16645 val = convert_modes (SImode, mode, val, 1); 16646 val = expand_simple_binop (SImode, ASHIFT, val, shift, 16647 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16648 16649 /* Prepare to adjust the return value. */ 16650 retval = gen_reg_rtx (SImode); 16651 mode = SImode; 16652 } 16653 16654 mem = rs6000_pre_atomic_barrier (mem, model); 16655 16656 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 16657 emit_label (XEXP (label, 0)); 16658 16659 emit_load_locked (mode, retval, mem); 16660 16661 x = val; 16662 if (mask) 16663 x = rs6000_mask_atomic_subword (retval, val, mask); 16664 16665 cond = gen_reg_rtx (CCmode); 16666 emit_store_conditional (mode, cond, mem, x); 16667 16668 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 16669 emit_unlikely_jump (x, label); 16670 16671 rs6000_post_atomic_barrier (model); 16672 16673 if (shift) 16674 rs6000_finish_atomic_subword (operands[0], retval, shift); 16675} 16676 16677/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 16678 to perform. MEM is the memory on which to operate. VAL is the second 16679 operand of the binary operator. BEFORE and AFTER are optional locations to 16680 return the value of MEM either before of after the operation. MODEL_RTX 16681 is a CONST_INT containing the memory model to use. */ 16682 16683void 16684rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 16685 rtx orig_before, rtx orig_after, rtx model_rtx) 16686{ 16687 enum memmodel model = memmodel_base (INTVAL (model_rtx)); 16688 machine_mode mode = GET_MODE (mem); 16689 machine_mode store_mode = mode; 16690 rtx label, x, cond, mask, shift; 16691 rtx before = orig_before, after = orig_after; 16692 16693 mask = shift = NULL_RTX; 16694 /* On power8, we want to use SImode for the operation. On previous systems, 16695 use the operation in a subword and shift/mask to get the proper byte or 16696 halfword. */ 16697 if (mode == QImode || mode == HImode) 16698 { 16699 if (TARGET_SYNC_HI_QI) 16700 { 16701 val = convert_modes (SImode, mode, val, 1); 16702 16703 /* Prepare to adjust the return value. */ 16704 before = gen_reg_rtx (SImode); 16705 if (after) 16706 after = gen_reg_rtx (SImode); 16707 mode = SImode; 16708 } 16709 else 16710 { 16711 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 16712 16713 /* Shift and mask VAL into position with the word. */ 16714 val = convert_modes (SImode, mode, val, 1); 16715 val = expand_simple_binop (SImode, ASHIFT, val, shift, 16716 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16717 16718 switch (code) 16719 { 16720 case IOR: 16721 case XOR: 16722 /* We've already zero-extended VAL. That is sufficient to 16723 make certain that it does not affect other bits. */ 16724 mask = NULL; 16725 break; 16726 16727 case AND: 16728 /* If we make certain that all of the other bits in VAL are 16729 set, that will be sufficient to not affect other bits. */ 16730 x = gen_rtx_NOT (SImode, mask); 16731 x = gen_rtx_IOR (SImode, x, val); 16732 emit_insn (gen_rtx_SET (val, x)); 16733 mask = NULL; 16734 break; 16735 16736 case NOT: 16737 case PLUS: 16738 case MINUS: 16739 /* These will all affect bits outside the field and need 16740 adjustment via MASK within the loop. */ 16741 break; 16742 16743 default: 16744 gcc_unreachable (); 16745 } 16746 16747 /* Prepare to adjust the return value. */ 16748 before = gen_reg_rtx (SImode); 16749 if (after) 16750 after = gen_reg_rtx (SImode); 16751 store_mode = mode = SImode; 16752 } 16753 } 16754 16755 mem = rs6000_pre_atomic_barrier (mem, model); 16756 16757 label = gen_label_rtx (); 16758 emit_label (label); 16759 label = gen_rtx_LABEL_REF (VOIDmode, label); 16760 16761 if (before == NULL_RTX) 16762 before = gen_reg_rtx (mode); 16763 16764 emit_load_locked (mode, before, mem); 16765 16766 if (code == NOT) 16767 { 16768 x = expand_simple_binop (mode, AND, before, val, 16769 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16770 after = expand_simple_unop (mode, NOT, x, after, 1); 16771 } 16772 else 16773 { 16774 after = expand_simple_binop (mode, code, before, val, 16775 after, 1, OPTAB_LIB_WIDEN); 16776 } 16777 16778 x = after; 16779 if (mask) 16780 { 16781 x = expand_simple_binop (SImode, AND, after, mask, 16782 NULL_RTX, 1, OPTAB_LIB_WIDEN); 16783 x = rs6000_mask_atomic_subword (before, x, mask); 16784 } 16785 else if (store_mode != mode) 16786 x = convert_modes (store_mode, mode, x, 1); 16787 16788 cond = gen_reg_rtx (CCmode); 16789 emit_store_conditional (store_mode, cond, mem, x); 16790 16791 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 16792 emit_unlikely_jump (x, label); 16793 16794 rs6000_post_atomic_barrier (model); 16795 16796 if (shift) 16797 { 16798 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and 16799 then do the calcuations in a SImode register. */ 16800 if (orig_before) 16801 rs6000_finish_atomic_subword (orig_before, before, shift); 16802 if (orig_after) 16803 rs6000_finish_atomic_subword (orig_after, after, shift); 16804 } 16805 else if (store_mode != mode) 16806 { 16807 /* QImode/HImode on machines with lbarx/lharx where we do the native 16808 operation and then do the calcuations in a SImode register. */ 16809 if (orig_before) 16810 convert_move (orig_before, before, 1); 16811 if (orig_after) 16812 convert_move (orig_after, after, 1); 16813 } 16814 else if (orig_after && after != orig_after) 16815 emit_move_insn (orig_after, after); 16816} 16817 16818static GTY(()) alias_set_type TOC_alias_set = -1; 16819 16820alias_set_type 16821get_TOC_alias_set (void) 16822{ 16823 if (TOC_alias_set == -1) 16824 TOC_alias_set = new_alias_set (); 16825 return TOC_alias_set; 16826} 16827 16828/* The mode the ABI uses for a word. This is not the same as word_mode 16829 for -m32 -mpowerpc64. This is used to implement various target hooks. */ 16830 16831static scalar_int_mode 16832rs6000_abi_word_mode (void) 16833{ 16834 return TARGET_32BIT ? SImode : DImode; 16835} 16836 16837/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ 16838static char * 16839rs6000_offload_options (void) 16840{ 16841 if (TARGET_64BIT) 16842 return xstrdup ("-foffload-abi=lp64"); 16843 else 16844 return xstrdup ("-foffload-abi=ilp32"); 16845} 16846 16847 16848/* A quick summary of the various types of 'constant-pool tables' 16849 under PowerPC: 16850 16851 Target Flags Name One table per 16852 AIX (none) AIX TOC object file 16853 AIX -mfull-toc AIX TOC object file 16854 AIX -mminimal-toc AIX minimal TOC translation unit 16855 SVR4/EABI (none) SVR4 SDATA object file 16856 SVR4/EABI -fpic SVR4 pic object file 16857 SVR4/EABI -fPIC SVR4 PIC translation unit 16858 SVR4/EABI -mrelocatable EABI TOC function 16859 SVR4/EABI -maix AIX TOC object file 16860 SVR4/EABI -maix -mminimal-toc 16861 AIX minimal TOC translation unit 16862 16863 Name Reg. Set by entries contains: 16864 made by addrs? fp? sum? 16865 16866 AIX TOC 2 crt0 as Y option option 16867 AIX minimal TOC 30 prolog gcc Y Y option 16868 SVR4 SDATA 13 crt0 gcc N Y N 16869 SVR4 pic 30 prolog ld Y not yet N 16870 SVR4 PIC 30 prolog gcc Y option option 16871 EABI TOC 30 prolog gcc Y option option 16872 16873*/ 16874 16875/* Hash functions for the hash table. */ 16876 16877static unsigned 16878rs6000_hash_constant (rtx k) 16879{ 16880 enum rtx_code code = GET_CODE (k); 16881 machine_mode mode = GET_MODE (k); 16882 unsigned result = (code << 3) ^ mode; 16883 const char *format; 16884 int flen, fidx; 16885 16886 format = GET_RTX_FORMAT (code); 16887 flen = strlen (format); 16888 fidx = 0; 16889 16890 switch (code) 16891 { 16892 case LABEL_REF: 16893 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); 16894 16895 case CONST_WIDE_INT: 16896 { 16897 int i; 16898 flen = CONST_WIDE_INT_NUNITS (k); 16899 for (i = 0; i < flen; i++) 16900 result = result * 613 + CONST_WIDE_INT_ELT (k, i); 16901 return result; 16902 } 16903 16904 case CONST_DOUBLE: 16905 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; 16906 16907 case CODE_LABEL: 16908 fidx = 3; 16909 break; 16910 16911 default: 16912 break; 16913 } 16914 16915 for (; fidx < flen; fidx++) 16916 switch (format[fidx]) 16917 { 16918 case 's': 16919 { 16920 unsigned i, len; 16921 const char *str = XSTR (k, fidx); 16922 len = strlen (str); 16923 result = result * 613 + len; 16924 for (i = 0; i < len; i++) 16925 result = result * 613 + (unsigned) str[i]; 16926 break; 16927 } 16928 case 'u': 16929 case 'e': 16930 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); 16931 break; 16932 case 'i': 16933 case 'n': 16934 result = result * 613 + (unsigned) XINT (k, fidx); 16935 break; 16936 case 'w': 16937 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) 16938 result = result * 613 + (unsigned) XWINT (k, fidx); 16939 else 16940 { 16941 size_t i; 16942 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) 16943 result = result * 613 + (unsigned) (XWINT (k, fidx) 16944 >> CHAR_BIT * i); 16945 } 16946 break; 16947 case '0': 16948 break; 16949 default: 16950 gcc_unreachable (); 16951 } 16952 16953 return result; 16954} 16955 16956hashval_t 16957toc_hasher::hash (toc_hash_struct *thc) 16958{ 16959 return rs6000_hash_constant (thc->key) ^ thc->key_mode; 16960} 16961 16962/* Compare H1 and H2 for equivalence. */ 16963 16964bool 16965toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) 16966{ 16967 rtx r1 = h1->key; 16968 rtx r2 = h2->key; 16969 16970 if (h1->key_mode != h2->key_mode) 16971 return 0; 16972 16973 return rtx_equal_p (r1, r2); 16974} 16975 16976/* These are the names given by the C++ front-end to vtables, and 16977 vtable-like objects. Ideally, this logic should not be here; 16978 instead, there should be some programmatic way of inquiring as 16979 to whether or not an object is a vtable. */ 16980 16981#define VTABLE_NAME_P(NAME) \ 16982 (startswith (name, "_vt.") \ 16983 || startswith (name, "_ZTV") \ 16984 || startswith (name, "_ZTT") \ 16985 || startswith (name, "_ZTI") \ 16986 || startswith (name, "_ZTC")) 16987 16988#ifdef NO_DOLLAR_IN_LABEL 16989/* Return a GGC-allocated character string translating dollar signs in 16990 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */ 16991 16992const char * 16993rs6000_xcoff_strip_dollar (const char *name) 16994{ 16995 char *strip, *p; 16996 const char *q; 16997 size_t len; 16998 16999 q = (const char *) strchr (name, '$'); 17000 17001 if (q == 0 || q == name) 17002 return name; 17003 17004 len = strlen (name); 17005 strip = XALLOCAVEC (char, len + 1); 17006 strcpy (strip, name); 17007 p = strip + (q - name); 17008 while (p) 17009 { 17010 *p = '_'; 17011 p = strchr (p + 1, '$'); 17012 } 17013 17014 return ggc_alloc_string (strip, len); 17015} 17016#endif 17017 17018void 17019rs6000_output_symbol_ref (FILE *file, rtx x) 17020{ 17021 const char *name = XSTR (x, 0); 17022 17023 /* Currently C++ toc references to vtables can be emitted before it 17024 is decided whether the vtable is public or private. If this is 17025 the case, then the linker will eventually complain that there is 17026 a reference to an unknown section. Thus, for vtables only, 17027 we emit the TOC reference to reference the identifier and not the 17028 symbol. */ 17029 if (VTABLE_NAME_P (name)) 17030 { 17031 RS6000_OUTPUT_BASENAME (file, name); 17032 } 17033 else 17034 assemble_name (file, name); 17035} 17036 17037/* Output a TOC entry. We derive the entry name from what is being 17038 written. */ 17039 17040void 17041output_toc (FILE *file, rtx x, int labelno, machine_mode mode) 17042{ 17043 char buf[256]; 17044 const char *name = buf; 17045 rtx base = x; 17046 HOST_WIDE_INT offset = 0; 17047 17048 gcc_assert (!TARGET_NO_TOC_OR_PCREL); 17049 17050 /* When the linker won't eliminate them, don't output duplicate 17051 TOC entries (this happens on AIX if there is any kind of TOC, 17052 and on SVR4 under -fPIC or -mrelocatable). Don't do this for 17053 CODE_LABELs. */ 17054 if (TARGET_TOC && GET_CODE (x) != LABEL_REF) 17055 { 17056 struct toc_hash_struct *h; 17057 17058 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE 17059 time because GGC is not initialized at that point. */ 17060 if (toc_hash_table == NULL) 17061 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021); 17062 17063 h = ggc_alloc<toc_hash_struct> (); 17064 h->key = x; 17065 h->key_mode = mode; 17066 h->labelno = labelno; 17067 17068 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT); 17069 if (*found == NULL) 17070 *found = h; 17071 else /* This is indeed a duplicate. 17072 Set this label equal to that label. */ 17073 { 17074 fputs ("\t.set ", file); 17075 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); 17076 fprintf (file, "%d,", labelno); 17077 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); 17078 fprintf (file, "%d\n", ((*found)->labelno)); 17079 17080#ifdef HAVE_AS_TLS 17081 if (TARGET_XCOFF && SYMBOL_REF_P (x) 17082 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC 17083 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)) 17084 { 17085 fputs ("\t.set ", file); 17086 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); 17087 fprintf (file, "%d,", labelno); 17088 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); 17089 fprintf (file, "%d\n", ((*found)->labelno)); 17090 } 17091#endif 17092 return; 17093 } 17094 } 17095 17096 /* If we're going to put a double constant in the TOC, make sure it's 17097 aligned properly when strict alignment is on. */ 17098 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x)) 17099 && STRICT_ALIGNMENT 17100 && GET_MODE_BITSIZE (mode) >= 64 17101 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) { 17102 ASM_OUTPUT_ALIGN (file, 3); 17103 } 17104 17105 (*targetm.asm_out.internal_label) (file, "LC", labelno); 17106 17107 /* Handle FP constants specially. Note that if we have a minimal 17108 TOC, things we put here aren't actually in the TOC, so we can allow 17109 FP constants. */ 17110 if (CONST_DOUBLE_P (x) 17111 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode 17112 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode)) 17113 { 17114 long k[4]; 17115 17116 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 17117 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k); 17118 else 17119 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); 17120 17121 if (TARGET_64BIT) 17122 { 17123 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17124 fputs (DOUBLE_INT_ASM_OP, file); 17125 else 17126 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", 17127 k[0] & 0xffffffff, k[1] & 0xffffffff, 17128 k[2] & 0xffffffff, k[3] & 0xffffffff); 17129 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n", 17130 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, 17131 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff, 17132 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff, 17133 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff); 17134 return; 17135 } 17136 else 17137 { 17138 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17139 fputs ("\t.long ", file); 17140 else 17141 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", 17142 k[0] & 0xffffffff, k[1] & 0xffffffff, 17143 k[2] & 0xffffffff, k[3] & 0xffffffff); 17144 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n", 17145 k[0] & 0xffffffff, k[1] & 0xffffffff, 17146 k[2] & 0xffffffff, k[3] & 0xffffffff); 17147 return; 17148 } 17149 } 17150 else if (CONST_DOUBLE_P (x) 17151 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode)) 17152 { 17153 long k[2]; 17154 17155 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 17156 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k); 17157 else 17158 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); 17159 17160 if (TARGET_64BIT) 17161 { 17162 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17163 fputs (DOUBLE_INT_ASM_OP, file); 17164 else 17165 fprintf (file, "\t.tc FD_%lx_%lx[TC],", 17166 k[0] & 0xffffffff, k[1] & 0xffffffff); 17167 fprintf (file, "0x%lx%08lx\n", 17168 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, 17169 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff); 17170 return; 17171 } 17172 else 17173 { 17174 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17175 fputs ("\t.long ", file); 17176 else 17177 fprintf (file, "\t.tc FD_%lx_%lx[TC],", 17178 k[0] & 0xffffffff, k[1] & 0xffffffff); 17179 fprintf (file, "0x%lx,0x%lx\n", 17180 k[0] & 0xffffffff, k[1] & 0xffffffff); 17181 return; 17182 } 17183 } 17184 else if (CONST_DOUBLE_P (x) 17185 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode)) 17186 { 17187 long l; 17188 17189 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 17190 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l); 17191 else 17192 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); 17193 17194 if (TARGET_64BIT) 17195 { 17196 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17197 fputs (DOUBLE_INT_ASM_OP, file); 17198 else 17199 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); 17200 if (WORDS_BIG_ENDIAN) 17201 fprintf (file, "0x%lx00000000\n", l & 0xffffffff); 17202 else 17203 fprintf (file, "0x%lx\n", l & 0xffffffff); 17204 return; 17205 } 17206 else 17207 { 17208 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17209 fputs ("\t.long ", file); 17210 else 17211 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); 17212 fprintf (file, "0x%lx\n", l & 0xffffffff); 17213 return; 17214 } 17215 } 17216 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x)) 17217 { 17218 unsigned HOST_WIDE_INT low; 17219 HOST_WIDE_INT high; 17220 17221 low = INTVAL (x) & 0xffffffff; 17222 high = (HOST_WIDE_INT) INTVAL (x) >> 32; 17223 17224 /* TOC entries are always Pmode-sized, so when big-endian 17225 smaller integer constants in the TOC need to be padded. 17226 (This is still a win over putting the constants in 17227 a separate constant pool, because then we'd have 17228 to have both a TOC entry _and_ the actual constant.) 17229 17230 For a 32-bit target, CONST_INT values are loaded and shifted 17231 entirely within `low' and can be stored in one TOC entry. */ 17232 17233 /* It would be easy to make this work, but it doesn't now. */ 17234 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode)); 17235 17236 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode)) 17237 { 17238 low |= high << 32; 17239 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode); 17240 high = (HOST_WIDE_INT) low >> 32; 17241 low &= 0xffffffff; 17242 } 17243 17244 if (TARGET_64BIT) 17245 { 17246 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17247 fputs (DOUBLE_INT_ASM_OP, file); 17248 else 17249 fprintf (file, "\t.tc ID_%lx_%lx[TC],", 17250 (long) high & 0xffffffff, (long) low & 0xffffffff); 17251 fprintf (file, "0x%lx%08lx\n", 17252 (long) high & 0xffffffff, (long) low & 0xffffffff); 17253 return; 17254 } 17255 else 17256 { 17257 if (POINTER_SIZE < GET_MODE_BITSIZE (mode)) 17258 { 17259 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17260 fputs ("\t.long ", file); 17261 else 17262 fprintf (file, "\t.tc ID_%lx_%lx[TC],", 17263 (long) high & 0xffffffff, (long) low & 0xffffffff); 17264 fprintf (file, "0x%lx,0x%lx\n", 17265 (long) high & 0xffffffff, (long) low & 0xffffffff); 17266 } 17267 else 17268 { 17269 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17270 fputs ("\t.long ", file); 17271 else 17272 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff); 17273 fprintf (file, "0x%lx\n", (long) low & 0xffffffff); 17274 } 17275 return; 17276 } 17277 } 17278 17279 if (GET_CODE (x) == CONST) 17280 { 17281 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS 17282 && CONST_INT_P (XEXP (XEXP (x, 0), 1))); 17283 17284 base = XEXP (XEXP (x, 0), 0); 17285 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 17286 } 17287 17288 switch (GET_CODE (base)) 17289 { 17290 case SYMBOL_REF: 17291 name = XSTR (base, 0); 17292 break; 17293 17294 case LABEL_REF: 17295 ASM_GENERATE_INTERNAL_LABEL (buf, "L", 17296 CODE_LABEL_NUMBER (XEXP (base, 0))); 17297 break; 17298 17299 case CODE_LABEL: 17300 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base)); 17301 break; 17302 17303 default: 17304 gcc_unreachable (); 17305 } 17306 17307 if (TARGET_ELF || TARGET_MINIMAL_TOC) 17308 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file); 17309 else 17310 { 17311 fputs ("\t.tc ", file); 17312 RS6000_OUTPUT_BASENAME (file, name); 17313 17314 if (offset < 0) 17315 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset); 17316 else if (offset) 17317 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset); 17318 17319 /* Mark large TOC symbols on AIX with [TE] so they are mapped 17320 after other TOC symbols, reducing overflow of small TOC access 17321 to [TC] symbols. */ 17322 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL 17323 ? "[TE]," : "[TC],", file); 17324 } 17325 17326 /* Currently C++ toc references to vtables can be emitted before it 17327 is decided whether the vtable is public or private. If this is 17328 the case, then the linker will eventually complain that there is 17329 a TOC reference to an unknown section. Thus, for vtables only, 17330 we emit the TOC reference to reference the symbol and not the 17331 section. */ 17332 if (VTABLE_NAME_P (name)) 17333 { 17334 RS6000_OUTPUT_BASENAME (file, name); 17335 if (offset < 0) 17336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset); 17337 else if (offset > 0) 17338 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); 17339 } 17340 else 17341 output_addr_const (file, x); 17342 17343#if HAVE_AS_TLS 17344 if (TARGET_XCOFF && SYMBOL_REF_P (base)) 17345 { 17346 switch (SYMBOL_REF_TLS_MODEL (base)) 17347 { 17348 case 0: 17349 break; 17350 case TLS_MODEL_LOCAL_EXEC: 17351 fputs ("@le", file); 17352 break; 17353 case TLS_MODEL_INITIAL_EXEC: 17354 fputs ("@ie", file); 17355 break; 17356 /* Use global-dynamic for local-dynamic. */ 17357 case TLS_MODEL_GLOBAL_DYNAMIC: 17358 case TLS_MODEL_LOCAL_DYNAMIC: 17359 putc ('\n', file); 17360 (*targetm.asm_out.internal_label) (file, "LCM", labelno); 17361 fputs ("\t.tc .", file); 17362 RS6000_OUTPUT_BASENAME (file, name); 17363 fputs ("[TC],", file); 17364 output_addr_const (file, x); 17365 fputs ("@m", file); 17366 break; 17367 default: 17368 gcc_unreachable (); 17369 } 17370 } 17371#endif 17372 17373 putc ('\n', file); 17374} 17375 17376/* Output an assembler pseudo-op to write an ASCII string of N characters 17377 starting at P to FILE. 17378 17379 On the RS/6000, we have to do this using the .byte operation and 17380 write out special characters outside the quoted string. 17381 Also, the assembler is broken; very long strings are truncated, 17382 so we must artificially break them up early. */ 17383 17384void 17385output_ascii (FILE *file, const char *p, int n) 17386{ 17387 char c; 17388 int i, count_string; 17389 const char *for_string = "\t.byte \""; 17390 const char *for_decimal = "\t.byte "; 17391 const char *to_close = NULL; 17392 17393 count_string = 0; 17394 for (i = 0; i < n; i++) 17395 { 17396 c = *p++; 17397 if (c >= ' ' && c < 0177) 17398 { 17399 if (for_string) 17400 fputs (for_string, file); 17401 putc (c, file); 17402 17403 /* Write two quotes to get one. */ 17404 if (c == '"') 17405 { 17406 putc (c, file); 17407 ++count_string; 17408 } 17409 17410 for_string = NULL; 17411 for_decimal = "\"\n\t.byte "; 17412 to_close = "\"\n"; 17413 ++count_string; 17414 17415 if (count_string >= 512) 17416 { 17417 fputs (to_close, file); 17418 17419 for_string = "\t.byte \""; 17420 for_decimal = "\t.byte "; 17421 to_close = NULL; 17422 count_string = 0; 17423 } 17424 } 17425 else 17426 { 17427 if (for_decimal) 17428 fputs (for_decimal, file); 17429 fprintf (file, "%d", c); 17430 17431 for_string = "\n\t.byte \""; 17432 for_decimal = ", "; 17433 to_close = "\n"; 17434 count_string = 0; 17435 } 17436 } 17437 17438 /* Now close the string if we have written one. Then end the line. */ 17439 if (to_close) 17440 fputs (to_close, file); 17441} 17442 17443/* Generate a unique section name for FILENAME for a section type 17444 represented by SECTION_DESC. Output goes into BUF. 17445 17446 SECTION_DESC can be any string, as long as it is different for each 17447 possible section type. 17448 17449 We name the section in the same manner as xlc. The name begins with an 17450 underscore followed by the filename (after stripping any leading directory 17451 names) with the last period replaced by the string SECTION_DESC. If 17452 FILENAME does not contain a period, SECTION_DESC is appended to the end of 17453 the name. */ 17454 17455void 17456rs6000_gen_section_name (char **buf, const char *filename, 17457 const char *section_desc) 17458{ 17459 const char *q, *after_last_slash, *last_period = 0; 17460 char *p; 17461 int len; 17462 17463 after_last_slash = filename; 17464 for (q = filename; *q; q++) 17465 { 17466 if (*q == '/') 17467 after_last_slash = q + 1; 17468 else if (*q == '.') 17469 last_period = q; 17470 } 17471 17472 len = strlen (after_last_slash) + strlen (section_desc) + 2; 17473 *buf = (char *) xmalloc (len); 17474 17475 p = *buf; 17476 *p++ = '_'; 17477 17478 for (q = after_last_slash; *q; q++) 17479 { 17480 if (q == last_period) 17481 { 17482 strcpy (p, section_desc); 17483 p += strlen (section_desc); 17484 break; 17485 } 17486 17487 else if (ISALNUM (*q)) 17488 *p++ = *q; 17489 } 17490 17491 if (last_period == 0) 17492 strcpy (p, section_desc); 17493 else 17494 *p = '\0'; 17495} 17496 17497/* Emit profile function. */ 17498 17499void 17500output_profile_hook (int labelno ATTRIBUTE_UNUSED) 17501{ 17502 /* Non-standard profiling for kernels, which just saves LR then calls 17503 _mcount without worrying about arg saves. The idea is to change 17504 the function prologue as little as possible as it isn't easy to 17505 account for arg save/restore code added just for _mcount. */ 17506 if (TARGET_PROFILE_KERNEL) 17507 return; 17508 17509 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 17510 { 17511#ifndef NO_PROFILE_COUNTERS 17512# define NO_PROFILE_COUNTERS 0 17513#endif 17514 if (NO_PROFILE_COUNTERS) 17515 emit_library_call (init_one_libfunc (RS6000_MCOUNT), 17516 LCT_NORMAL, VOIDmode); 17517 else 17518 { 17519 char buf[30]; 17520 const char *label_name; 17521 rtx fun; 17522 17523 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 17524 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); 17525 fun = gen_rtx_SYMBOL_REF (Pmode, label_name); 17526 17527 emit_library_call (init_one_libfunc (RS6000_MCOUNT), 17528 LCT_NORMAL, VOIDmode, fun, Pmode); 17529 } 17530 } 17531 else if (DEFAULT_ABI == ABI_DARWIN) 17532 { 17533 const char *mcount_name = RS6000_MCOUNT; 17534 int caller_addr_regno = LR_REGNO; 17535 17536 /* Be conservative and always set this, at least for now. */ 17537 crtl->uses_pic_offset_table = 1; 17538 17539#if TARGET_MACHO 17540 /* For PIC code, set up a stub and collect the caller's address 17541 from r0, which is where the prologue puts it. */ 17542 if (MACHOPIC_INDIRECT 17543 && crtl->uses_pic_offset_table) 17544 caller_addr_regno = 0; 17545#endif 17546 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name), 17547 LCT_NORMAL, VOIDmode, 17548 gen_rtx_REG (Pmode, caller_addr_regno), Pmode); 17549 } 17550} 17551 17552/* Write function profiler code. */ 17553 17554void 17555output_function_profiler (FILE *file, int labelno) 17556{ 17557 char buf[100]; 17558 17559 switch (DEFAULT_ABI) 17560 { 17561 default: 17562 gcc_unreachable (); 17563 17564 case ABI_V4: 17565 if (!TARGET_32BIT) 17566 { 17567 warning (0, "no profiling of 64-bit code for this ABI"); 17568 return; 17569 } 17570 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 17571 fprintf (file, "\tmflr %s\n", reg_names[0]); 17572 if (NO_PROFILE_COUNTERS) 17573 { 17574 asm_fprintf (file, "\tstw %s,4(%s)\n", 17575 reg_names[0], reg_names[1]); 17576 } 17577 else if (TARGET_SECURE_PLT && flag_pic) 17578 { 17579 if (TARGET_LINK_STACK) 17580 { 17581 char name[32]; 17582 get_ppc476_thunk_name (name); 17583 asm_fprintf (file, "\tbl %s\n", name); 17584 } 17585 else 17586 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n"); 17587 asm_fprintf (file, "\tstw %s,4(%s)\n", 17588 reg_names[0], reg_names[1]); 17589 asm_fprintf (file, "\tmflr %s\n", reg_names[12]); 17590 asm_fprintf (file, "\taddis %s,%s,", 17591 reg_names[12], reg_names[12]); 17592 assemble_name (file, buf); 17593 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]); 17594 assemble_name (file, buf); 17595 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]); 17596 } 17597 else if (flag_pic == 1) 17598 { 17599 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file); 17600 asm_fprintf (file, "\tstw %s,4(%s)\n", 17601 reg_names[0], reg_names[1]); 17602 asm_fprintf (file, "\tmflr %s\n", reg_names[12]); 17603 asm_fprintf (file, "\tlwz %s,", reg_names[0]); 17604 assemble_name (file, buf); 17605 asm_fprintf (file, "@got(%s)\n", reg_names[12]); 17606 } 17607 else if (flag_pic > 1) 17608 { 17609 asm_fprintf (file, "\tstw %s,4(%s)\n", 17610 reg_names[0], reg_names[1]); 17611 /* Now, we need to get the address of the label. */ 17612 if (TARGET_LINK_STACK) 17613 { 17614 char name[32]; 17615 get_ppc476_thunk_name (name); 17616 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name); 17617 assemble_name (file, buf); 17618 fputs ("-.\n1:", file); 17619 asm_fprintf (file, "\tmflr %s\n", reg_names[11]); 17620 asm_fprintf (file, "\taddi %s,%s,4\n", 17621 reg_names[11], reg_names[11]); 17622 } 17623 else 17624 { 17625 fputs ("\tbcl 20,31,1f\n\t.long ", file); 17626 assemble_name (file, buf); 17627 fputs ("-.\n1:", file); 17628 asm_fprintf (file, "\tmflr %s\n", reg_names[11]); 17629 } 17630 asm_fprintf (file, "\tlwz %s,0(%s)\n", 17631 reg_names[0], reg_names[11]); 17632 asm_fprintf (file, "\tadd %s,%s,%s\n", 17633 reg_names[0], reg_names[0], reg_names[11]); 17634 } 17635 else 17636 { 17637 asm_fprintf (file, "\tlis %s,", reg_names[12]); 17638 assemble_name (file, buf); 17639 fputs ("@ha\n", file); 17640 asm_fprintf (file, "\tstw %s,4(%s)\n", 17641 reg_names[0], reg_names[1]); 17642 asm_fprintf (file, "\tla %s,", reg_names[0]); 17643 assemble_name (file, buf); 17644 asm_fprintf (file, "@l(%s)\n", reg_names[12]); 17645 } 17646 17647 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */ 17648 fprintf (file, "\tbl %s%s\n", 17649 RS6000_MCOUNT, flag_pic ? "@plt" : ""); 17650 break; 17651 17652 case ABI_AIX: 17653 case ABI_ELFv2: 17654 case ABI_DARWIN: 17655 /* Don't do anything, done in output_profile_hook (). */ 17656 break; 17657 } 17658} 17659 17660 17661 17662/* The following variable value is the last issued insn. */ 17663 17664static rtx_insn *last_scheduled_insn; 17665 17666/* The following variable helps to balance issuing of load and 17667 store instructions */ 17668 17669static int load_store_pendulum; 17670 17671/* The following variable helps pair divide insns during scheduling. */ 17672static int divide_cnt; 17673/* The following variable helps pair and alternate vector and vector load 17674 insns during scheduling. */ 17675static int vec_pairing; 17676 17677 17678/* Power4 load update and store update instructions are cracked into a 17679 load or store and an integer insn which are executed in the same cycle. 17680 Branches have their own dispatch slot which does not count against the 17681 GCC issue rate, but it changes the program flow so there are no other 17682 instructions to issue in this cycle. */ 17683 17684static int 17685rs6000_variable_issue_1 (rtx_insn *insn, int more) 17686{ 17687 last_scheduled_insn = insn; 17688 if (GET_CODE (PATTERN (insn)) == USE 17689 || GET_CODE (PATTERN (insn)) == CLOBBER) 17690 { 17691 cached_can_issue_more = more; 17692 return cached_can_issue_more; 17693 } 17694 17695 if (insn_terminates_group_p (insn, current_group)) 17696 { 17697 cached_can_issue_more = 0; 17698 return cached_can_issue_more; 17699 } 17700 17701 /* If no reservation, but reach here */ 17702 if (recog_memoized (insn) < 0) 17703 return more; 17704 17705 if (rs6000_sched_groups) 17706 { 17707 if (is_microcoded_insn (insn)) 17708 cached_can_issue_more = 0; 17709 else if (is_cracked_insn (insn)) 17710 cached_can_issue_more = more > 2 ? more - 2 : 0; 17711 else 17712 cached_can_issue_more = more - 1; 17713 17714 return cached_can_issue_more; 17715 } 17716 17717 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn)) 17718 return 0; 17719 17720 cached_can_issue_more = more - 1; 17721 return cached_can_issue_more; 17722} 17723 17724static int 17725rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more) 17726{ 17727 int r = rs6000_variable_issue_1 (insn, more); 17728 if (verbose) 17729 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r); 17730 return r; 17731} 17732 17733/* Adjust the cost of a scheduling dependency. Return the new cost of 17734 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 17735 17736static int 17737rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 17738 unsigned int) 17739{ 17740 enum attr_type attr_type; 17741 17742 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 17743 return cost; 17744 17745 switch (dep_type) 17746 { 17747 case REG_DEP_TRUE: 17748 { 17749 /* Data dependency; DEP_INSN writes a register that INSN reads 17750 some cycles later. */ 17751 17752 /* Separate a load from a narrower, dependent store. */ 17753 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9 17754 || rs6000_tune == PROCESSOR_POWER10) 17755 && GET_CODE (PATTERN (insn)) == SET 17756 && GET_CODE (PATTERN (dep_insn)) == SET 17757 && MEM_P (XEXP (PATTERN (insn), 1)) 17758 && MEM_P (XEXP (PATTERN (dep_insn), 0)) 17759 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) 17760 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) 17761 return cost + 14; 17762 17763 attr_type = get_attr_type (insn); 17764 17765 switch (attr_type) 17766 { 17767 case TYPE_JMPREG: 17768 /* Tell the first scheduling pass about the latency between 17769 a mtctr and bctr (and mtlr and br/blr). The first 17770 scheduling pass will not know about this latency since 17771 the mtctr instruction, which has the latency associated 17772 to it, will be generated by reload. */ 17773 return 4; 17774 case TYPE_BRANCH: 17775 /* Leave some extra cycles between a compare and its 17776 dependent branch, to inhibit expensive mispredicts. */ 17777 if ((rs6000_tune == PROCESSOR_PPC603 17778 || rs6000_tune == PROCESSOR_PPC604 17779 || rs6000_tune == PROCESSOR_PPC604e 17780 || rs6000_tune == PROCESSOR_PPC620 17781 || rs6000_tune == PROCESSOR_PPC630 17782 || rs6000_tune == PROCESSOR_PPC750 17783 || rs6000_tune == PROCESSOR_PPC7400 17784 || rs6000_tune == PROCESSOR_PPC7450 17785 || rs6000_tune == PROCESSOR_PPCE5500 17786 || rs6000_tune == PROCESSOR_PPCE6500 17787 || rs6000_tune == PROCESSOR_POWER4 17788 || rs6000_tune == PROCESSOR_POWER5 17789 || rs6000_tune == PROCESSOR_POWER7 17790 || rs6000_tune == PROCESSOR_POWER8 17791 || rs6000_tune == PROCESSOR_POWER9 17792 || rs6000_tune == PROCESSOR_POWER10 17793 || rs6000_tune == PROCESSOR_CELL) 17794 && recog_memoized (dep_insn) 17795 && (INSN_CODE (dep_insn) >= 0)) 17796 17797 switch (get_attr_type (dep_insn)) 17798 { 17799 case TYPE_CMP: 17800 case TYPE_FPCOMPARE: 17801 case TYPE_CR_LOGICAL: 17802 return cost + 2; 17803 case TYPE_EXTS: 17804 case TYPE_MUL: 17805 if (get_attr_dot (dep_insn) == DOT_YES) 17806 return cost + 2; 17807 else 17808 break; 17809 case TYPE_SHIFT: 17810 if (get_attr_dot (dep_insn) == DOT_YES 17811 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO) 17812 return cost + 2; 17813 else 17814 break; 17815 default: 17816 break; 17817 } 17818 break; 17819 17820 case TYPE_STORE: 17821 case TYPE_FPSTORE: 17822 if ((rs6000_tune == PROCESSOR_POWER6) 17823 && recog_memoized (dep_insn) 17824 && (INSN_CODE (dep_insn) >= 0)) 17825 { 17826 17827 if (GET_CODE (PATTERN (insn)) != SET) 17828 /* If this happens, we have to extend this to schedule 17829 optimally. Return default for now. */ 17830 return cost; 17831 17832 /* Adjust the cost for the case where the value written 17833 by a fixed point operation is used as the address 17834 gen value on a store. */ 17835 switch (get_attr_type (dep_insn)) 17836 { 17837 case TYPE_LOAD: 17838 case TYPE_CNTLZ: 17839 { 17840 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17841 return get_attr_sign_extend (dep_insn) 17842 == SIGN_EXTEND_YES ? 6 : 4; 17843 break; 17844 } 17845 case TYPE_SHIFT: 17846 { 17847 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17848 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? 17849 6 : 3; 17850 break; 17851 } 17852 case TYPE_INTEGER: 17853 case TYPE_ADD: 17854 case TYPE_LOGICAL: 17855 case TYPE_EXTS: 17856 case TYPE_INSERT: 17857 { 17858 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17859 return 3; 17860 break; 17861 } 17862 case TYPE_STORE: 17863 case TYPE_FPLOAD: 17864 case TYPE_FPSTORE: 17865 { 17866 if (get_attr_update (dep_insn) == UPDATE_YES 17867 && ! rs6000_store_data_bypass_p (dep_insn, insn)) 17868 return 3; 17869 break; 17870 } 17871 case TYPE_MUL: 17872 { 17873 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17874 return 17; 17875 break; 17876 } 17877 case TYPE_DIV: 17878 { 17879 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17880 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; 17881 break; 17882 } 17883 default: 17884 break; 17885 } 17886 } 17887 break; 17888 17889 case TYPE_LOAD: 17890 if ((rs6000_tune == PROCESSOR_POWER6) 17891 && recog_memoized (dep_insn) 17892 && (INSN_CODE (dep_insn) >= 0)) 17893 { 17894 17895 /* Adjust the cost for the case where the value written 17896 by a fixed point instruction is used within the address 17897 gen portion of a subsequent load(u)(x) */ 17898 switch (get_attr_type (dep_insn)) 17899 { 17900 case TYPE_LOAD: 17901 case TYPE_CNTLZ: 17902 { 17903 if (set_to_load_agen (dep_insn, insn)) 17904 return get_attr_sign_extend (dep_insn) 17905 == SIGN_EXTEND_YES ? 6 : 4; 17906 break; 17907 } 17908 case TYPE_SHIFT: 17909 { 17910 if (set_to_load_agen (dep_insn, insn)) 17911 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? 17912 6 : 3; 17913 break; 17914 } 17915 case TYPE_INTEGER: 17916 case TYPE_ADD: 17917 case TYPE_LOGICAL: 17918 case TYPE_EXTS: 17919 case TYPE_INSERT: 17920 { 17921 if (set_to_load_agen (dep_insn, insn)) 17922 return 3; 17923 break; 17924 } 17925 case TYPE_STORE: 17926 case TYPE_FPLOAD: 17927 case TYPE_FPSTORE: 17928 { 17929 if (get_attr_update (dep_insn) == UPDATE_YES 17930 && set_to_load_agen (dep_insn, insn)) 17931 return 3; 17932 break; 17933 } 17934 case TYPE_MUL: 17935 { 17936 if (set_to_load_agen (dep_insn, insn)) 17937 return 17; 17938 break; 17939 } 17940 case TYPE_DIV: 17941 { 17942 if (set_to_load_agen (dep_insn, insn)) 17943 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; 17944 break; 17945 } 17946 default: 17947 break; 17948 } 17949 } 17950 break; 17951 17952 default: 17953 break; 17954 } 17955 17956 /* Fall out to return default cost. */ 17957 } 17958 break; 17959 17960 case REG_DEP_OUTPUT: 17961 /* Output dependency; DEP_INSN writes a register that INSN writes some 17962 cycles later. */ 17963 if ((rs6000_tune == PROCESSOR_POWER6) 17964 && recog_memoized (dep_insn) 17965 && (INSN_CODE (dep_insn) >= 0)) 17966 { 17967 attr_type = get_attr_type (insn); 17968 17969 switch (attr_type) 17970 { 17971 case TYPE_FP: 17972 case TYPE_FPSIMPLE: 17973 if (get_attr_type (dep_insn) == TYPE_FP 17974 || get_attr_type (dep_insn) == TYPE_FPSIMPLE) 17975 return 1; 17976 break; 17977 default: 17978 break; 17979 } 17980 } 17981 /* Fall through, no cost for output dependency. */ 17982 /* FALLTHRU */ 17983 17984 case REG_DEP_ANTI: 17985 /* Anti dependency; DEP_INSN reads a register that INSN writes some 17986 cycles later. */ 17987 return 0; 17988 17989 default: 17990 gcc_unreachable (); 17991 } 17992 17993 return cost; 17994} 17995 17996/* Debug version of rs6000_adjust_cost. */ 17997 17998static int 17999rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, 18000 int cost, unsigned int dw) 18001{ 18002 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw); 18003 18004 if (ret != cost) 18005 { 18006 const char *dep; 18007 18008 switch (dep_type) 18009 { 18010 default: dep = "unknown depencency"; break; 18011 case REG_DEP_TRUE: dep = "data dependency"; break; 18012 case REG_DEP_OUTPUT: dep = "output dependency"; break; 18013 case REG_DEP_ANTI: dep = "anti depencency"; break; 18014 } 18015 18016 fprintf (stderr, 18017 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, " 18018 "%s, insn:\n", ret, cost, dep); 18019 18020 debug_rtx (insn); 18021 } 18022 18023 return ret; 18024} 18025 18026/* The function returns a true if INSN is microcoded. 18027 Return false otherwise. */ 18028 18029static bool 18030is_microcoded_insn (rtx_insn *insn) 18031{ 18032 if (!insn || !NONDEBUG_INSN_P (insn) 18033 || GET_CODE (PATTERN (insn)) == USE 18034 || GET_CODE (PATTERN (insn)) == CLOBBER) 18035 return false; 18036 18037 if (rs6000_tune == PROCESSOR_CELL) 18038 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; 18039 18040 if (rs6000_sched_groups 18041 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5)) 18042 { 18043 enum attr_type type = get_attr_type (insn); 18044 if ((type == TYPE_LOAD 18045 && get_attr_update (insn) == UPDATE_YES 18046 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) 18047 || ((type == TYPE_LOAD || type == TYPE_STORE) 18048 && get_attr_update (insn) == UPDATE_YES 18049 && get_attr_indexed (insn) == INDEXED_YES) 18050 || type == TYPE_MFCR) 18051 return true; 18052 } 18053 18054 return false; 18055} 18056 18057/* The function returns true if INSN is cracked into 2 instructions 18058 by the processor (and therefore occupies 2 issue slots). */ 18059 18060static bool 18061is_cracked_insn (rtx_insn *insn) 18062{ 18063 if (!insn || !NONDEBUG_INSN_P (insn) 18064 || GET_CODE (PATTERN (insn)) == USE 18065 || GET_CODE (PATTERN (insn)) == CLOBBER) 18066 return false; 18067 18068 if (rs6000_sched_groups 18069 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5)) 18070 { 18071 enum attr_type type = get_attr_type (insn); 18072 if ((type == TYPE_LOAD 18073 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES 18074 && get_attr_update (insn) == UPDATE_NO) 18075 || (type == TYPE_LOAD 18076 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO 18077 && get_attr_update (insn) == UPDATE_YES 18078 && get_attr_indexed (insn) == INDEXED_NO) 18079 || (type == TYPE_STORE 18080 && get_attr_update (insn) == UPDATE_YES 18081 && get_attr_indexed (insn) == INDEXED_NO) 18082 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE) 18083 && get_attr_update (insn) == UPDATE_YES) 18084 || (type == TYPE_CR_LOGICAL 18085 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES) 18086 || (type == TYPE_EXTS 18087 && get_attr_dot (insn) == DOT_YES) 18088 || (type == TYPE_SHIFT 18089 && get_attr_dot (insn) == DOT_YES 18090 && get_attr_var_shift (insn) == VAR_SHIFT_NO) 18091 || (type == TYPE_MUL 18092 && get_attr_dot (insn) == DOT_YES) 18093 || type == TYPE_DIV 18094 || (type == TYPE_INSERT 18095 && get_attr_size (insn) == SIZE_32)) 18096 return true; 18097 } 18098 18099 return false; 18100} 18101 18102/* The function returns true if INSN can be issued only from 18103 the branch slot. */ 18104 18105static bool 18106is_branch_slot_insn (rtx_insn *insn) 18107{ 18108 if (!insn || !NONDEBUG_INSN_P (insn) 18109 || GET_CODE (PATTERN (insn)) == USE 18110 || GET_CODE (PATTERN (insn)) == CLOBBER) 18111 return false; 18112 18113 if (rs6000_sched_groups) 18114 { 18115 enum attr_type type = get_attr_type (insn); 18116 if (type == TYPE_BRANCH || type == TYPE_JMPREG) 18117 return true; 18118 return false; 18119 } 18120 18121 return false; 18122} 18123 18124/* The function returns true if out_inst sets a value that is 18125 used in the address generation computation of in_insn */ 18126static bool 18127set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn) 18128{ 18129 rtx out_set, in_set; 18130 18131 /* For performance reasons, only handle the simple case where 18132 both loads are a single_set. */ 18133 out_set = single_set (out_insn); 18134 if (out_set) 18135 { 18136 in_set = single_set (in_insn); 18137 if (in_set) 18138 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); 18139 } 18140 18141 return false; 18142} 18143 18144/* Try to determine base/offset/size parts of the given MEM. 18145 Return true if successful, false if all the values couldn't 18146 be determined. 18147 18148 This function only looks for REG or REG+CONST address forms. 18149 REG+REG address form will return false. */ 18150 18151static bool 18152get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, 18153 HOST_WIDE_INT *size) 18154{ 18155 rtx addr_rtx; 18156 if MEM_SIZE_KNOWN_P (mem) 18157 *size = MEM_SIZE (mem); 18158 else 18159 return false; 18160 18161 addr_rtx = (XEXP (mem, 0)); 18162 if (GET_CODE (addr_rtx) == PRE_MODIFY) 18163 addr_rtx = XEXP (addr_rtx, 1); 18164 18165 *offset = 0; 18166 while (GET_CODE (addr_rtx) == PLUS 18167 && CONST_INT_P (XEXP (addr_rtx, 1))) 18168 { 18169 *offset += INTVAL (XEXP (addr_rtx, 1)); 18170 addr_rtx = XEXP (addr_rtx, 0); 18171 } 18172 if (!REG_P (addr_rtx)) 18173 return false; 18174 18175 *base = addr_rtx; 18176 return true; 18177} 18178 18179/* If the target storage locations of arguments MEM1 and MEM2 are 18180 adjacent, then return the argument that has the lower address. 18181 Otherwise, return NULL_RTX. */ 18182 18183static rtx 18184adjacent_mem_locations (rtx mem1, rtx mem2) 18185{ 18186 rtx reg1, reg2; 18187 HOST_WIDE_INT off1, size1, off2, size2; 18188 18189 if (MEM_P (mem1) 18190 && MEM_P (mem2) 18191 && get_memref_parts (mem1, ®1, &off1, &size1) 18192 && get_memref_parts (mem2, ®2, &off2, &size2) 18193 && REGNO (reg1) == REGNO (reg2)) 18194 { 18195 if (off1 + size1 == off2) 18196 return mem1; 18197 else if (off2 + size2 == off1) 18198 return mem2; 18199 } 18200 18201 return NULL_RTX; 18202} 18203 18204/* This function returns true if it can be determined that the two MEM 18205 locations overlap by at least 1 byte based on base reg/offset/size. */ 18206 18207static bool 18208mem_locations_overlap (rtx mem1, rtx mem2) 18209{ 18210 rtx reg1, reg2; 18211 HOST_WIDE_INT off1, size1, off2, size2; 18212 18213 if (get_memref_parts (mem1, ®1, &off1, &size1) 18214 && get_memref_parts (mem2, ®2, &off2, &size2)) 18215 return ((REGNO (reg1) == REGNO (reg2)) 18216 && (((off1 <= off2) && (off1 + size1 > off2)) 18217 || ((off2 <= off1) && (off2 + size2 > off1)))); 18218 18219 return false; 18220} 18221 18222/* A C statement (sans semicolon) to update the integer scheduling 18223 priority INSN_PRIORITY (INSN). Increase the priority to execute the 18224 INSN earlier, reduce the priority to execute INSN later. Do not 18225 define this macro if you do not need to adjust the scheduling 18226 priorities of insns. */ 18227 18228static int 18229rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority) 18230{ 18231 rtx load_mem, str_mem; 18232 /* On machines (like the 750) which have asymmetric integer units, 18233 where one integer unit can do multiply and divides and the other 18234 can't, reduce the priority of multiply/divide so it is scheduled 18235 before other integer operations. */ 18236 18237#if 0 18238 if (! INSN_P (insn)) 18239 return priority; 18240 18241 if (GET_CODE (PATTERN (insn)) == USE) 18242 return priority; 18243 18244 switch (rs6000_tune) { 18245 case PROCESSOR_PPC750: 18246 switch (get_attr_type (insn)) 18247 { 18248 default: 18249 break; 18250 18251 case TYPE_MUL: 18252 case TYPE_DIV: 18253 fprintf (stderr, "priority was %#x (%d) before adjustment\n", 18254 priority, priority); 18255 if (priority >= 0 && priority < 0x01000000) 18256 priority >>= 3; 18257 break; 18258 } 18259 } 18260#endif 18261 18262 if (insn_must_be_first_in_group (insn) 18263 && reload_completed 18264 && current_sched_info->sched_max_insns_priority 18265 && rs6000_sched_restricted_insns_priority) 18266 { 18267 18268 /* Prioritize insns that can be dispatched only in the first 18269 dispatch slot. */ 18270 if (rs6000_sched_restricted_insns_priority == 1) 18271 /* Attach highest priority to insn. This means that in 18272 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations 18273 precede 'priority' (critical path) considerations. */ 18274 return current_sched_info->sched_max_insns_priority; 18275 else if (rs6000_sched_restricted_insns_priority == 2) 18276 /* Increase priority of insn by a minimal amount. This means that in 18277 haifa-sched.cc:ready_sort(), only 'priority' (critical path) 18278 considerations precede dispatch-slot restriction considerations. */ 18279 return (priority + 1); 18280 } 18281 18282 if (rs6000_tune == PROCESSOR_POWER6 18283 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem)) 18284 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem)))) 18285 /* Attach highest priority to insn if the scheduler has just issued two 18286 stores and this instruction is a load, or two loads and this instruction 18287 is a store. Power6 wants loads and stores scheduled alternately 18288 when possible */ 18289 return current_sched_info->sched_max_insns_priority; 18290 18291 return priority; 18292} 18293 18294/* Return true if the instruction is nonpipelined on the Cell. */ 18295static bool 18296is_nonpipeline_insn (rtx_insn *insn) 18297{ 18298 enum attr_type type; 18299 if (!insn || !NONDEBUG_INSN_P (insn) 18300 || GET_CODE (PATTERN (insn)) == USE 18301 || GET_CODE (PATTERN (insn)) == CLOBBER) 18302 return false; 18303 18304 type = get_attr_type (insn); 18305 if (type == TYPE_MUL 18306 || type == TYPE_DIV 18307 || type == TYPE_SDIV 18308 || type == TYPE_DDIV 18309 || type == TYPE_SSQRT 18310 || type == TYPE_DSQRT 18311 || type == TYPE_MFCR 18312 || type == TYPE_MFCRF 18313 || type == TYPE_MFJMPR) 18314 { 18315 return true; 18316 } 18317 return false; 18318} 18319 18320 18321/* Return how many instructions the machine can issue per cycle. */ 18322 18323static int 18324rs6000_issue_rate (void) 18325{ 18326 /* Unless scheduling for register pressure, use issue rate of 1 for 18327 first scheduling pass to decrease degradation. */ 18328 if (!reload_completed && !flag_sched_pressure) 18329 return 1; 18330 18331 switch (rs6000_tune) { 18332 case PROCESSOR_RS64A: 18333 case PROCESSOR_PPC601: /* ? */ 18334 case PROCESSOR_PPC7450: 18335 return 3; 18336 case PROCESSOR_PPC440: 18337 case PROCESSOR_PPC603: 18338 case PROCESSOR_PPC750: 18339 case PROCESSOR_PPC7400: 18340 case PROCESSOR_PPC8540: 18341 case PROCESSOR_PPC8548: 18342 case PROCESSOR_CELL: 18343 case PROCESSOR_PPCE300C2: 18344 case PROCESSOR_PPCE300C3: 18345 case PROCESSOR_PPCE500MC: 18346 case PROCESSOR_PPCE500MC64: 18347 case PROCESSOR_PPCE5500: 18348 case PROCESSOR_PPCE6500: 18349 case PROCESSOR_TITAN: 18350 return 2; 18351 case PROCESSOR_PPC476: 18352 case PROCESSOR_PPC604: 18353 case PROCESSOR_PPC604e: 18354 case PROCESSOR_PPC620: 18355 case PROCESSOR_PPC630: 18356 return 4; 18357 case PROCESSOR_POWER4: 18358 case PROCESSOR_POWER5: 18359 case PROCESSOR_POWER6: 18360 case PROCESSOR_POWER7: 18361 return 5; 18362 case PROCESSOR_POWER8: 18363 return 7; 18364 case PROCESSOR_POWER9: 18365 return 6; 18366 case PROCESSOR_POWER10: 18367 return 8; 18368 default: 18369 return 1; 18370 } 18371} 18372 18373/* Return how many instructions to look ahead for better insn 18374 scheduling. */ 18375 18376static int 18377rs6000_use_sched_lookahead (void) 18378{ 18379 switch (rs6000_tune) 18380 { 18381 case PROCESSOR_PPC8540: 18382 case PROCESSOR_PPC8548: 18383 return 4; 18384 18385 case PROCESSOR_CELL: 18386 return (reload_completed ? 8 : 0); 18387 18388 default: 18389 return 0; 18390 } 18391} 18392 18393/* We are choosing insn from the ready queue. Return zero if INSN can be 18394 chosen. */ 18395static int 18396rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index) 18397{ 18398 if (ready_index == 0) 18399 return 0; 18400 18401 if (rs6000_tune != PROCESSOR_CELL) 18402 return 0; 18403 18404 gcc_assert (insn != NULL_RTX && INSN_P (insn)); 18405 18406 if (!reload_completed 18407 || is_nonpipeline_insn (insn) 18408 || is_microcoded_insn (insn)) 18409 return 1; 18410 18411 return 0; 18412} 18413 18414/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx 18415 and return true. */ 18416 18417static bool 18418find_mem_ref (rtx pat, rtx *mem_ref) 18419{ 18420 const char * fmt; 18421 int i, j; 18422 18423 /* stack_tie does not produce any real memory traffic. */ 18424 if (tie_operand (pat, VOIDmode)) 18425 return false; 18426 18427 if (MEM_P (pat)) 18428 { 18429 *mem_ref = pat; 18430 return true; 18431 } 18432 18433 /* Recursively process the pattern. */ 18434 fmt = GET_RTX_FORMAT (GET_CODE (pat)); 18435 18436 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--) 18437 { 18438 if (fmt[i] == 'e') 18439 { 18440 if (find_mem_ref (XEXP (pat, i), mem_ref)) 18441 return true; 18442 } 18443 else if (fmt[i] == 'E') 18444 for (j = XVECLEN (pat, i) - 1; j >= 0; j--) 18445 { 18446 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref)) 18447 return true; 18448 } 18449 } 18450 18451 return false; 18452} 18453 18454/* Determine if PAT is a PATTERN of a load insn. */ 18455 18456static bool 18457is_load_insn1 (rtx pat, rtx *load_mem) 18458{ 18459 if (!pat || pat == NULL_RTX) 18460 return false; 18461 18462 if (GET_CODE (pat) == SET) 18463 { 18464 if (REG_P (SET_DEST (pat))) 18465 return find_mem_ref (SET_SRC (pat), load_mem); 18466 else 18467 return false; 18468 } 18469 18470 if (GET_CODE (pat) == PARALLEL) 18471 { 18472 int i; 18473 18474 for (i = 0; i < XVECLEN (pat, 0); i++) 18475 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem)) 18476 return true; 18477 } 18478 18479 return false; 18480} 18481 18482/* Determine if INSN loads from memory. */ 18483 18484static bool 18485is_load_insn (rtx insn, rtx *load_mem) 18486{ 18487 if (!insn || !INSN_P (insn)) 18488 return false; 18489 18490 if (CALL_P (insn)) 18491 return false; 18492 18493 return is_load_insn1 (PATTERN (insn), load_mem); 18494} 18495 18496/* Determine if PAT is a PATTERN of a store insn. */ 18497 18498static bool 18499is_store_insn1 (rtx pat, rtx *str_mem) 18500{ 18501 if (!pat || pat == NULL_RTX) 18502 return false; 18503 18504 if (GET_CODE (pat) == SET) 18505 { 18506 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat))) 18507 return find_mem_ref (SET_DEST (pat), str_mem); 18508 else 18509 return false; 18510 } 18511 18512 if (GET_CODE (pat) == PARALLEL) 18513 { 18514 int i; 18515 18516 for (i = 0; i < XVECLEN (pat, 0); i++) 18517 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem)) 18518 return true; 18519 } 18520 18521 return false; 18522} 18523 18524/* Determine if INSN stores to memory. */ 18525 18526static bool 18527is_store_insn (rtx insn, rtx *str_mem) 18528{ 18529 if (!insn || !INSN_P (insn)) 18530 return false; 18531 18532 return is_store_insn1 (PATTERN (insn), str_mem); 18533} 18534 18535/* Return whether TYPE is a Power9 pairable vector instruction type. */ 18536 18537static bool 18538is_power9_pairable_vec_type (enum attr_type type) 18539{ 18540 switch (type) 18541 { 18542 case TYPE_VECSIMPLE: 18543 case TYPE_VECCOMPLEX: 18544 case TYPE_VECDIV: 18545 case TYPE_VECCMP: 18546 case TYPE_VECPERM: 18547 case TYPE_VECFLOAT: 18548 case TYPE_VECFDIV: 18549 case TYPE_VECDOUBLE: 18550 return true; 18551 default: 18552 break; 18553 } 18554 return false; 18555} 18556 18557/* Returns whether the dependence between INSN and NEXT is considered 18558 costly by the given target. */ 18559 18560static bool 18561rs6000_is_costly_dependence (dep_t dep, int cost, int distance) 18562{ 18563 rtx insn; 18564 rtx next; 18565 rtx load_mem, str_mem; 18566 18567 /* If the flag is not enabled - no dependence is considered costly; 18568 allow all dependent insns in the same group. 18569 This is the most aggressive option. */ 18570 if (rs6000_sched_costly_dep == no_dep_costly) 18571 return false; 18572 18573 /* If the flag is set to 1 - a dependence is always considered costly; 18574 do not allow dependent instructions in the same group. 18575 This is the most conservative option. */ 18576 if (rs6000_sched_costly_dep == all_deps_costly) 18577 return true; 18578 18579 insn = DEP_PRO (dep); 18580 next = DEP_CON (dep); 18581 18582 if (rs6000_sched_costly_dep == store_to_load_dep_costly 18583 && is_load_insn (next, &load_mem) 18584 && is_store_insn (insn, &str_mem)) 18585 /* Prevent load after store in the same group. */ 18586 return true; 18587 18588 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly 18589 && is_load_insn (next, &load_mem) 18590 && is_store_insn (insn, &str_mem) 18591 && DEP_TYPE (dep) == REG_DEP_TRUE 18592 && mem_locations_overlap(str_mem, load_mem)) 18593 /* Prevent load after store in the same group if it is a true 18594 dependence. */ 18595 return true; 18596 18597 /* The flag is set to X; dependences with latency >= X are considered costly, 18598 and will not be scheduled in the same group. */ 18599 if (rs6000_sched_costly_dep <= max_dep_latency 18600 && ((cost - distance) >= (int)rs6000_sched_costly_dep)) 18601 return true; 18602 18603 return false; 18604} 18605 18606/* Return the next insn after INSN that is found before TAIL is reached, 18607 skipping any "non-active" insns - insns that will not actually occupy 18608 an issue slot. Return NULL_RTX if such an insn is not found. */ 18609 18610static rtx_insn * 18611get_next_active_insn (rtx_insn *insn, rtx_insn *tail) 18612{ 18613 if (insn == NULL_RTX || insn == tail) 18614 return NULL; 18615 18616 while (1) 18617 { 18618 insn = NEXT_INSN (insn); 18619 if (insn == NULL_RTX || insn == tail) 18620 return NULL; 18621 18622 if (CALL_P (insn) 18623 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) 18624 || (NONJUMP_INSN_P (insn) 18625 && GET_CODE (PATTERN (insn)) != USE 18626 && GET_CODE (PATTERN (insn)) != CLOBBER 18627 && INSN_CODE (insn) != CODE_FOR_stack_tie)) 18628 break; 18629 } 18630 return insn; 18631} 18632 18633/* Move instruction at POS to the end of the READY list. */ 18634 18635static void 18636move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos) 18637{ 18638 rtx_insn *tmp; 18639 int i; 18640 18641 tmp = ready[pos]; 18642 for (i = pos; i < lastpos; i++) 18643 ready[i] = ready[i + 1]; 18644 ready[lastpos] = tmp; 18645} 18646 18647/* Do Power6 specific sched_reorder2 reordering of ready list. */ 18648 18649static int 18650power6_sched_reorder2 (rtx_insn **ready, int lastpos) 18651{ 18652 /* For Power6, we need to handle some special cases to try and keep the 18653 store queue from overflowing and triggering expensive flushes. 18654 18655 This code monitors how load and store instructions are being issued 18656 and skews the ready list one way or the other to increase the likelihood 18657 that a desired instruction is issued at the proper time. 18658 18659 A couple of things are done. First, we maintain a "load_store_pendulum" 18660 to track the current state of load/store issue. 18661 18662 - If the pendulum is at zero, then no loads or stores have been 18663 issued in the current cycle so we do nothing. 18664 18665 - If the pendulum is 1, then a single load has been issued in this 18666 cycle and we attempt to locate another load in the ready list to 18667 issue with it. 18668 18669 - If the pendulum is -2, then two stores have already been 18670 issued in this cycle, so we increase the priority of the first load 18671 in the ready list to increase it's likelihood of being chosen first 18672 in the next cycle. 18673 18674 - If the pendulum is -1, then a single store has been issued in this 18675 cycle and we attempt to locate another store in the ready list to 18676 issue with it, preferring a store to an adjacent memory location to 18677 facilitate store pairing in the store queue. 18678 18679 - If the pendulum is 2, then two loads have already been 18680 issued in this cycle, so we increase the priority of the first store 18681 in the ready list to increase it's likelihood of being chosen first 18682 in the next cycle. 18683 18684 - If the pendulum < -2 or > 2, then do nothing. 18685 18686 Note: This code covers the most common scenarios. There exist non 18687 load/store instructions which make use of the LSU and which 18688 would need to be accounted for to strictly model the behavior 18689 of the machine. Those instructions are currently unaccounted 18690 for to help minimize compile time overhead of this code. 18691 */ 18692 int pos; 18693 rtx load_mem, str_mem; 18694 18695 if (is_store_insn (last_scheduled_insn, &str_mem)) 18696 /* Issuing a store, swing the load_store_pendulum to the left */ 18697 load_store_pendulum--; 18698 else if (is_load_insn (last_scheduled_insn, &load_mem)) 18699 /* Issuing a load, swing the load_store_pendulum to the right */ 18700 load_store_pendulum++; 18701 else 18702 return cached_can_issue_more; 18703 18704 /* If the pendulum is balanced, or there is only one instruction on 18705 the ready list, then all is well, so return. */ 18706 if ((load_store_pendulum == 0) || (lastpos <= 0)) 18707 return cached_can_issue_more; 18708 18709 if (load_store_pendulum == 1) 18710 { 18711 /* A load has been issued in this cycle. Scan the ready list 18712 for another load to issue with it */ 18713 pos = lastpos; 18714 18715 while (pos >= 0) 18716 { 18717 if (is_load_insn (ready[pos], &load_mem)) 18718 { 18719 /* Found a load. Move it to the head of the ready list, 18720 and adjust it's priority so that it is more likely to 18721 stay there */ 18722 move_to_end_of_ready (ready, pos, lastpos); 18723 18724 if (!sel_sched_p () 18725 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18726 INSN_PRIORITY (ready[lastpos])++; 18727 break; 18728 } 18729 pos--; 18730 } 18731 } 18732 else if (load_store_pendulum == -2) 18733 { 18734 /* Two stores have been issued in this cycle. Increase the 18735 priority of the first load in the ready list to favor it for 18736 issuing in the next cycle. */ 18737 pos = lastpos; 18738 18739 while (pos >= 0) 18740 { 18741 if (is_load_insn (ready[pos], &load_mem) 18742 && !sel_sched_p () 18743 && INSN_PRIORITY_KNOWN (ready[pos])) 18744 { 18745 INSN_PRIORITY (ready[pos])++; 18746 18747 /* Adjust the pendulum to account for the fact that a load 18748 was found and increased in priority. This is to prevent 18749 increasing the priority of multiple loads */ 18750 load_store_pendulum--; 18751 18752 break; 18753 } 18754 pos--; 18755 } 18756 } 18757 else if (load_store_pendulum == -1) 18758 { 18759 /* A store has been issued in this cycle. Scan the ready list for 18760 another store to issue with it, preferring a store to an adjacent 18761 memory location */ 18762 int first_store_pos = -1; 18763 18764 pos = lastpos; 18765 18766 while (pos >= 0) 18767 { 18768 if (is_store_insn (ready[pos], &str_mem)) 18769 { 18770 rtx str_mem2; 18771 /* Maintain the index of the first store found on the 18772 list */ 18773 if (first_store_pos == -1) 18774 first_store_pos = pos; 18775 18776 if (is_store_insn (last_scheduled_insn, &str_mem2) 18777 && adjacent_mem_locations (str_mem, str_mem2)) 18778 { 18779 /* Found an adjacent store. Move it to the head of the 18780 ready list, and adjust it's priority so that it is 18781 more likely to stay there */ 18782 move_to_end_of_ready (ready, pos, lastpos); 18783 18784 if (!sel_sched_p () 18785 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18786 INSN_PRIORITY (ready[lastpos])++; 18787 18788 first_store_pos = -1; 18789 18790 break; 18791 }; 18792 } 18793 pos--; 18794 } 18795 18796 if (first_store_pos >= 0) 18797 { 18798 /* An adjacent store wasn't found, but a non-adjacent store was, 18799 so move the non-adjacent store to the front of the ready 18800 list, and adjust its priority so that it is more likely to 18801 stay there. */ 18802 move_to_end_of_ready (ready, first_store_pos, lastpos); 18803 if (!sel_sched_p () 18804 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18805 INSN_PRIORITY (ready[lastpos])++; 18806 } 18807 } 18808 else if (load_store_pendulum == 2) 18809 { 18810 /* Two loads have been issued in this cycle. Increase the priority 18811 of the first store in the ready list to favor it for issuing in 18812 the next cycle. */ 18813 pos = lastpos; 18814 18815 while (pos >= 0) 18816 { 18817 if (is_store_insn (ready[pos], &str_mem) 18818 && !sel_sched_p () 18819 && INSN_PRIORITY_KNOWN (ready[pos])) 18820 { 18821 INSN_PRIORITY (ready[pos])++; 18822 18823 /* Adjust the pendulum to account for the fact that a store 18824 was found and increased in priority. This is to prevent 18825 increasing the priority of multiple stores */ 18826 load_store_pendulum++; 18827 18828 break; 18829 } 18830 pos--; 18831 } 18832 } 18833 18834 return cached_can_issue_more; 18835} 18836 18837/* Do Power9 specific sched_reorder2 reordering of ready list. */ 18838 18839static int 18840power9_sched_reorder2 (rtx_insn **ready, int lastpos) 18841{ 18842 int pos; 18843 enum attr_type type, type2; 18844 18845 type = get_attr_type (last_scheduled_insn); 18846 18847 /* Try to issue fixed point divides back-to-back in pairs so they will be 18848 routed to separate execution units and execute in parallel. */ 18849 if (type == TYPE_DIV && divide_cnt == 0) 18850 { 18851 /* First divide has been scheduled. */ 18852 divide_cnt = 1; 18853 18854 /* Scan the ready list looking for another divide, if found move it 18855 to the end of the list so it is chosen next. */ 18856 pos = lastpos; 18857 while (pos >= 0) 18858 { 18859 if (recog_memoized (ready[pos]) >= 0 18860 && get_attr_type (ready[pos]) == TYPE_DIV) 18861 { 18862 move_to_end_of_ready (ready, pos, lastpos); 18863 break; 18864 } 18865 pos--; 18866 } 18867 } 18868 else 18869 { 18870 /* Last insn was the 2nd divide or not a divide, reset the counter. */ 18871 divide_cnt = 0; 18872 18873 /* The best dispatch throughput for vector and vector load insns can be 18874 achieved by interleaving a vector and vector load such that they'll 18875 dispatch to the same superslice. If this pairing cannot be achieved 18876 then it is best to pair vector insns together and vector load insns 18877 together. 18878 18879 To aid in this pairing, vec_pairing maintains the current state with 18880 the following values: 18881 18882 0 : Initial state, no vecload/vector pairing has been started. 18883 18884 1 : A vecload or vector insn has been issued and a candidate for 18885 pairing has been found and moved to the end of the ready 18886 list. */ 18887 if (type == TYPE_VECLOAD) 18888 { 18889 /* Issued a vecload. */ 18890 if (vec_pairing == 0) 18891 { 18892 int vecload_pos = -1; 18893 /* We issued a single vecload, look for a vector insn to pair it 18894 with. If one isn't found, try to pair another vecload. */ 18895 pos = lastpos; 18896 while (pos >= 0) 18897 { 18898 if (recog_memoized (ready[pos]) >= 0) 18899 { 18900 type2 = get_attr_type (ready[pos]); 18901 if (is_power9_pairable_vec_type (type2)) 18902 { 18903 /* Found a vector insn to pair with, move it to the 18904 end of the ready list so it is scheduled next. */ 18905 move_to_end_of_ready (ready, pos, lastpos); 18906 vec_pairing = 1; 18907 return cached_can_issue_more; 18908 } 18909 else if (type2 == TYPE_VECLOAD && vecload_pos == -1) 18910 /* Remember position of first vecload seen. */ 18911 vecload_pos = pos; 18912 } 18913 pos--; 18914 } 18915 if (vecload_pos >= 0) 18916 { 18917 /* Didn't find a vector to pair with but did find a vecload, 18918 move it to the end of the ready list. */ 18919 move_to_end_of_ready (ready, vecload_pos, lastpos); 18920 vec_pairing = 1; 18921 return cached_can_issue_more; 18922 } 18923 } 18924 } 18925 else if (is_power9_pairable_vec_type (type)) 18926 { 18927 /* Issued a vector operation. */ 18928 if (vec_pairing == 0) 18929 { 18930 int vec_pos = -1; 18931 /* We issued a single vector insn, look for a vecload to pair it 18932 with. If one isn't found, try to pair another vector. */ 18933 pos = lastpos; 18934 while (pos >= 0) 18935 { 18936 if (recog_memoized (ready[pos]) >= 0) 18937 { 18938 type2 = get_attr_type (ready[pos]); 18939 if (type2 == TYPE_VECLOAD) 18940 { 18941 /* Found a vecload insn to pair with, move it to the 18942 end of the ready list so it is scheduled next. */ 18943 move_to_end_of_ready (ready, pos, lastpos); 18944 vec_pairing = 1; 18945 return cached_can_issue_more; 18946 } 18947 else if (is_power9_pairable_vec_type (type2) 18948 && vec_pos == -1) 18949 /* Remember position of first vector insn seen. */ 18950 vec_pos = pos; 18951 } 18952 pos--; 18953 } 18954 if (vec_pos >= 0) 18955 { 18956 /* Didn't find a vecload to pair with but did find a vector 18957 insn, move it to the end of the ready list. */ 18958 move_to_end_of_ready (ready, vec_pos, lastpos); 18959 vec_pairing = 1; 18960 return cached_can_issue_more; 18961 } 18962 } 18963 } 18964 18965 /* We've either finished a vec/vecload pair, couldn't find an insn to 18966 continue the current pair, or the last insn had nothing to do with 18967 with pairing. In any case, reset the state. */ 18968 vec_pairing = 0; 18969 } 18970 18971 return cached_can_issue_more; 18972} 18973 18974/* Determine if INSN is a store to memory that can be fused with a similar 18975 adjacent store. */ 18976 18977static bool 18978is_fusable_store (rtx_insn *insn, rtx *str_mem) 18979{ 18980 /* Insn must be a non-prefixed base+disp form store. */ 18981 if (is_store_insn (insn, str_mem) 18982 && get_attr_prefixed (insn) == PREFIXED_NO 18983 && get_attr_update (insn) == UPDATE_NO 18984 && get_attr_indexed (insn) == INDEXED_NO) 18985 { 18986 /* Further restrictions by mode and size. */ 18987 if (!MEM_SIZE_KNOWN_P (*str_mem)) 18988 return false; 18989 18990 machine_mode mode = GET_MODE (*str_mem); 18991 HOST_WIDE_INT size = MEM_SIZE (*str_mem); 18992 18993 if (INTEGRAL_MODE_P (mode)) 18994 /* Must be word or dword size. */ 18995 return (size == 4 || size == 8); 18996 else if (FLOAT_MODE_P (mode)) 18997 /* Must be dword size. */ 18998 return (size == 8); 18999 } 19000 19001 return false; 19002} 19003 19004/* Do Power10 specific reordering of the ready list. */ 19005 19006static int 19007power10_sched_reorder (rtx_insn **ready, int lastpos) 19008{ 19009 rtx mem1; 19010 19011 /* Do store fusion during sched2 only. */ 19012 if (!reload_completed) 19013 return cached_can_issue_more; 19014 19015 /* If the prior insn finished off a store fusion pair then simply 19016 reset the counter and return, nothing more to do. */ 19017 if (load_store_pendulum != 0) 19018 { 19019 load_store_pendulum = 0; 19020 return cached_can_issue_more; 19021 } 19022 19023 /* Try to pair certain store insns to adjacent memory locations 19024 so that the hardware will fuse them to a single operation. */ 19025 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1)) 19026 { 19027 19028 /* A fusable store was just scheduled. Scan the ready list for another 19029 store that it can fuse with. */ 19030 int pos = lastpos; 19031 while (pos >= 0) 19032 { 19033 rtx mem2; 19034 /* GPR stores can be ascending or descending offsets, FPR/VSR stores 19035 must be ascending only. */ 19036 if (is_fusable_store (ready[pos], &mem2) 19037 && ((INTEGRAL_MODE_P (GET_MODE (mem1)) 19038 && adjacent_mem_locations (mem1, mem2)) 19039 || (FLOAT_MODE_P (GET_MODE (mem1)) 19040 && (adjacent_mem_locations (mem1, mem2) == mem1)))) 19041 { 19042 /* Found a fusable store. Move it to the end of the ready list 19043 so it is scheduled next. */ 19044 move_to_end_of_ready (ready, pos, lastpos); 19045 19046 load_store_pendulum = -1; 19047 break; 19048 } 19049 pos--; 19050 } 19051 } 19052 19053 return cached_can_issue_more; 19054} 19055 19056/* We are about to begin issuing insns for this clock cycle. */ 19057 19058static int 19059rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, 19060 rtx_insn **ready ATTRIBUTE_UNUSED, 19061 int *pn_ready ATTRIBUTE_UNUSED, 19062 int clock_var ATTRIBUTE_UNUSED) 19063{ 19064 int n_ready = *pn_ready; 19065 19066 if (sched_verbose) 19067 fprintf (dump, "// rs6000_sched_reorder :\n"); 19068 19069 /* Reorder the ready list, if the second to last ready insn 19070 is a nonepipeline insn. */ 19071 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1) 19072 { 19073 if (is_nonpipeline_insn (ready[n_ready - 1]) 19074 && (recog_memoized (ready[n_ready - 2]) > 0)) 19075 /* Simply swap first two insns. */ 19076 std::swap (ready[n_ready - 1], ready[n_ready - 2]); 19077 } 19078 19079 if (rs6000_tune == PROCESSOR_POWER6) 19080 load_store_pendulum = 0; 19081 19082 /* Do Power10 dependent reordering. */ 19083 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn) 19084 power10_sched_reorder (ready, n_ready - 1); 19085 19086 return rs6000_issue_rate (); 19087} 19088 19089/* Like rs6000_sched_reorder, but called after issuing each insn. */ 19090 19091static int 19092rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, 19093 int *pn_ready, int clock_var ATTRIBUTE_UNUSED) 19094{ 19095 if (sched_verbose) 19096 fprintf (dump, "// rs6000_sched_reorder2 :\n"); 19097 19098 /* Do Power6 dependent reordering if necessary. */ 19099 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn) 19100 return power6_sched_reorder2 (ready, *pn_ready - 1); 19101 19102 /* Do Power9 dependent reordering if necessary. */ 19103 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn 19104 && recog_memoized (last_scheduled_insn) >= 0) 19105 return power9_sched_reorder2 (ready, *pn_ready - 1); 19106 19107 /* Do Power10 dependent reordering. */ 19108 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn) 19109 return power10_sched_reorder (ready, *pn_ready - 1); 19110 19111 return cached_can_issue_more; 19112} 19113 19114/* Return whether the presence of INSN causes a dispatch group termination 19115 of group WHICH_GROUP. 19116 19117 If WHICH_GROUP == current_group, this function will return true if INSN 19118 causes the termination of the current group (i.e, the dispatch group to 19119 which INSN belongs). This means that INSN will be the last insn in the 19120 group it belongs to. 19121 19122 If WHICH_GROUP == previous_group, this function will return true if INSN 19123 causes the termination of the previous group (i.e, the dispatch group that 19124 precedes the group to which INSN belongs). This means that INSN will be 19125 the first insn in the group it belongs to). */ 19126 19127static bool 19128insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group) 19129{ 19130 bool first, last; 19131 19132 if (! insn) 19133 return false; 19134 19135 first = insn_must_be_first_in_group (insn); 19136 last = insn_must_be_last_in_group (insn); 19137 19138 if (first && last) 19139 return true; 19140 19141 if (which_group == current_group) 19142 return last; 19143 else if (which_group == previous_group) 19144 return first; 19145 19146 return false; 19147} 19148 19149 19150static bool 19151insn_must_be_first_in_group (rtx_insn *insn) 19152{ 19153 enum attr_type type; 19154 19155 if (!insn 19156 || NOTE_P (insn) 19157 || DEBUG_INSN_P (insn) 19158 || GET_CODE (PATTERN (insn)) == USE 19159 || GET_CODE (PATTERN (insn)) == CLOBBER) 19160 return false; 19161 19162 switch (rs6000_tune) 19163 { 19164 case PROCESSOR_POWER5: 19165 if (is_cracked_insn (insn)) 19166 return true; 19167 /* FALLTHRU */ 19168 case PROCESSOR_POWER4: 19169 if (is_microcoded_insn (insn)) 19170 return true; 19171 19172 if (!rs6000_sched_groups) 19173 return false; 19174 19175 type = get_attr_type (insn); 19176 19177 switch (type) 19178 { 19179 case TYPE_MFCR: 19180 case TYPE_MFCRF: 19181 case TYPE_MTCR: 19182 case TYPE_CR_LOGICAL: 19183 case TYPE_MTJMPR: 19184 case TYPE_MFJMPR: 19185 case TYPE_DIV: 19186 case TYPE_LOAD_L: 19187 case TYPE_STORE_C: 19188 case TYPE_ISYNC: 19189 case TYPE_SYNC: 19190 return true; 19191 default: 19192 break; 19193 } 19194 break; 19195 case PROCESSOR_POWER6: 19196 type = get_attr_type (insn); 19197 19198 switch (type) 19199 { 19200 case TYPE_EXTS: 19201 case TYPE_CNTLZ: 19202 case TYPE_TRAP: 19203 case TYPE_MUL: 19204 case TYPE_INSERT: 19205 case TYPE_FPCOMPARE: 19206 case TYPE_MFCR: 19207 case TYPE_MTCR: 19208 case TYPE_MFJMPR: 19209 case TYPE_MTJMPR: 19210 case TYPE_ISYNC: 19211 case TYPE_SYNC: 19212 case TYPE_LOAD_L: 19213 case TYPE_STORE_C: 19214 return true; 19215 case TYPE_SHIFT: 19216 if (get_attr_dot (insn) == DOT_NO 19217 || get_attr_var_shift (insn) == VAR_SHIFT_NO) 19218 return true; 19219 else 19220 break; 19221 case TYPE_DIV: 19222 if (get_attr_size (insn) == SIZE_32) 19223 return true; 19224 else 19225 break; 19226 case TYPE_LOAD: 19227 case TYPE_STORE: 19228 case TYPE_FPLOAD: 19229 case TYPE_FPSTORE: 19230 if (get_attr_update (insn) == UPDATE_YES) 19231 return true; 19232 else 19233 break; 19234 default: 19235 break; 19236 } 19237 break; 19238 case PROCESSOR_POWER7: 19239 type = get_attr_type (insn); 19240 19241 switch (type) 19242 { 19243 case TYPE_CR_LOGICAL: 19244 case TYPE_MFCR: 19245 case TYPE_MFCRF: 19246 case TYPE_MTCR: 19247 case TYPE_DIV: 19248 case TYPE_ISYNC: 19249 case TYPE_LOAD_L: 19250 case TYPE_STORE_C: 19251 case TYPE_MFJMPR: 19252 case TYPE_MTJMPR: 19253 return true; 19254 case TYPE_MUL: 19255 case TYPE_SHIFT: 19256 case TYPE_EXTS: 19257 if (get_attr_dot (insn) == DOT_YES) 19258 return true; 19259 else 19260 break; 19261 case TYPE_LOAD: 19262 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 19263 || get_attr_update (insn) == UPDATE_YES) 19264 return true; 19265 else 19266 break; 19267 case TYPE_STORE: 19268 case TYPE_FPLOAD: 19269 case TYPE_FPSTORE: 19270 if (get_attr_update (insn) == UPDATE_YES) 19271 return true; 19272 else 19273 break; 19274 default: 19275 break; 19276 } 19277 break; 19278 case PROCESSOR_POWER8: 19279 type = get_attr_type (insn); 19280 19281 switch (type) 19282 { 19283 case TYPE_CR_LOGICAL: 19284 case TYPE_MFCR: 19285 case TYPE_MFCRF: 19286 case TYPE_MTCR: 19287 case TYPE_SYNC: 19288 case TYPE_ISYNC: 19289 case TYPE_LOAD_L: 19290 case TYPE_STORE_C: 19291 case TYPE_VECSTORE: 19292 case TYPE_MFJMPR: 19293 case TYPE_MTJMPR: 19294 return true; 19295 case TYPE_SHIFT: 19296 case TYPE_EXTS: 19297 case TYPE_MUL: 19298 if (get_attr_dot (insn) == DOT_YES) 19299 return true; 19300 else 19301 break; 19302 case TYPE_LOAD: 19303 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 19304 || get_attr_update (insn) == UPDATE_YES) 19305 return true; 19306 else 19307 break; 19308 case TYPE_STORE: 19309 if (get_attr_update (insn) == UPDATE_YES 19310 && get_attr_indexed (insn) == INDEXED_YES) 19311 return true; 19312 else 19313 break; 19314 default: 19315 break; 19316 } 19317 break; 19318 default: 19319 break; 19320 } 19321 19322 return false; 19323} 19324 19325static bool 19326insn_must_be_last_in_group (rtx_insn *insn) 19327{ 19328 enum attr_type type; 19329 19330 if (!insn 19331 || NOTE_P (insn) 19332 || DEBUG_INSN_P (insn) 19333 || GET_CODE (PATTERN (insn)) == USE 19334 || GET_CODE (PATTERN (insn)) == CLOBBER) 19335 return false; 19336 19337 switch (rs6000_tune) { 19338 case PROCESSOR_POWER4: 19339 case PROCESSOR_POWER5: 19340 if (is_microcoded_insn (insn)) 19341 return true; 19342 19343 if (is_branch_slot_insn (insn)) 19344 return true; 19345 19346 break; 19347 case PROCESSOR_POWER6: 19348 type = get_attr_type (insn); 19349 19350 switch (type) 19351 { 19352 case TYPE_EXTS: 19353 case TYPE_CNTLZ: 19354 case TYPE_TRAP: 19355 case TYPE_MUL: 19356 case TYPE_FPCOMPARE: 19357 case TYPE_MFCR: 19358 case TYPE_MTCR: 19359 case TYPE_MFJMPR: 19360 case TYPE_MTJMPR: 19361 case TYPE_ISYNC: 19362 case TYPE_SYNC: 19363 case TYPE_LOAD_L: 19364 case TYPE_STORE_C: 19365 return true; 19366 case TYPE_SHIFT: 19367 if (get_attr_dot (insn) == DOT_NO 19368 || get_attr_var_shift (insn) == VAR_SHIFT_NO) 19369 return true; 19370 else 19371 break; 19372 case TYPE_DIV: 19373 if (get_attr_size (insn) == SIZE_32) 19374 return true; 19375 else 19376 break; 19377 default: 19378 break; 19379 } 19380 break; 19381 case PROCESSOR_POWER7: 19382 type = get_attr_type (insn); 19383 19384 switch (type) 19385 { 19386 case TYPE_ISYNC: 19387 case TYPE_SYNC: 19388 case TYPE_LOAD_L: 19389 case TYPE_STORE_C: 19390 return true; 19391 case TYPE_LOAD: 19392 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 19393 && get_attr_update (insn) == UPDATE_YES) 19394 return true; 19395 else 19396 break; 19397 case TYPE_STORE: 19398 if (get_attr_update (insn) == UPDATE_YES 19399 && get_attr_indexed (insn) == INDEXED_YES) 19400 return true; 19401 else 19402 break; 19403 default: 19404 break; 19405 } 19406 break; 19407 case PROCESSOR_POWER8: 19408 type = get_attr_type (insn); 19409 19410 switch (type) 19411 { 19412 case TYPE_MFCR: 19413 case TYPE_MTCR: 19414 case TYPE_ISYNC: 19415 case TYPE_SYNC: 19416 case TYPE_LOAD_L: 19417 case TYPE_STORE_C: 19418 return true; 19419 case TYPE_LOAD: 19420 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 19421 && get_attr_update (insn) == UPDATE_YES) 19422 return true; 19423 else 19424 break; 19425 case TYPE_STORE: 19426 if (get_attr_update (insn) == UPDATE_YES 19427 && get_attr_indexed (insn) == INDEXED_YES) 19428 return true; 19429 else 19430 break; 19431 default: 19432 break; 19433 } 19434 break; 19435 default: 19436 break; 19437 } 19438 19439 return false; 19440} 19441 19442/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate 19443 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ 19444 19445static bool 19446is_costly_group (rtx *group_insns, rtx next_insn) 19447{ 19448 int i; 19449 int issue_rate = rs6000_issue_rate (); 19450 19451 for (i = 0; i < issue_rate; i++) 19452 { 19453 sd_iterator_def sd_it; 19454 dep_t dep; 19455 rtx insn = group_insns[i]; 19456 19457 if (!insn) 19458 continue; 19459 19460 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep) 19461 { 19462 rtx next = DEP_CON (dep); 19463 19464 if (next == next_insn 19465 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) 19466 return true; 19467 } 19468 } 19469 19470 return false; 19471} 19472 19473/* Utility of the function redefine_groups. 19474 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS 19475 in the same dispatch group. If so, insert nops before NEXT_INSN, in order 19476 to keep it "far" (in a separate group) from GROUP_INSNS, following 19477 one of the following schemes, depending on the value of the flag 19478 -minsert_sched_nops = X: 19479 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed 19480 in order to force NEXT_INSN into a separate group. 19481 (2) X < sched_finish_regroup_exact: insert exactly X nops. 19482 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop 19483 insertion (has a group just ended, how many vacant issue slots remain in the 19484 last group, and how many dispatch groups were encountered so far). */ 19485 19486static int 19487force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, 19488 rtx_insn *next_insn, bool *group_end, int can_issue_more, 19489 int *group_count) 19490{ 19491 rtx nop; 19492 bool force; 19493 int issue_rate = rs6000_issue_rate (); 19494 bool end = *group_end; 19495 int i; 19496 19497 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn)) 19498 return can_issue_more; 19499 19500 if (rs6000_sched_insert_nops > sched_finish_regroup_exact) 19501 return can_issue_more; 19502 19503 force = is_costly_group (group_insns, next_insn); 19504 if (!force) 19505 return can_issue_more; 19506 19507 if (sched_verbose > 6) 19508 fprintf (dump,"force: group count = %d, can_issue_more = %d\n", 19509 *group_count ,can_issue_more); 19510 19511 if (rs6000_sched_insert_nops == sched_finish_regroup_exact) 19512 { 19513 if (*group_end) 19514 can_issue_more = 0; 19515 19516 /* Since only a branch can be issued in the last issue_slot, it is 19517 sufficient to insert 'can_issue_more - 1' nops if next_insn is not 19518 a branch. If next_insn is a branch, we insert 'can_issue_more' nops; 19519 in this case the last nop will start a new group and the branch 19520 will be forced to the new group. */ 19521 if (can_issue_more && !is_branch_slot_insn (next_insn)) 19522 can_issue_more--; 19523 19524 /* Do we have a special group ending nop? */ 19525 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7 19526 || rs6000_tune == PROCESSOR_POWER8) 19527 { 19528 nop = gen_group_ending_nop (); 19529 emit_insn_before (nop, next_insn); 19530 can_issue_more = 0; 19531 } 19532 else 19533 while (can_issue_more > 0) 19534 { 19535 nop = gen_nop (); 19536 emit_insn_before (nop, next_insn); 19537 can_issue_more--; 19538 } 19539 19540 *group_end = true; 19541 return 0; 19542 } 19543 19544 if (rs6000_sched_insert_nops < sched_finish_regroup_exact) 19545 { 19546 int n_nops = rs6000_sched_insert_nops; 19547 19548 /* Nops can't be issued from the branch slot, so the effective 19549 issue_rate for nops is 'issue_rate - 1'. */ 19550 if (can_issue_more == 0) 19551 can_issue_more = issue_rate; 19552 can_issue_more--; 19553 if (can_issue_more == 0) 19554 { 19555 can_issue_more = issue_rate - 1; 19556 (*group_count)++; 19557 end = true; 19558 for (i = 0; i < issue_rate; i++) 19559 { 19560 group_insns[i] = 0; 19561 } 19562 } 19563 19564 while (n_nops > 0) 19565 { 19566 nop = gen_nop (); 19567 emit_insn_before (nop, next_insn); 19568 if (can_issue_more == issue_rate - 1) /* new group begins */ 19569 end = false; 19570 can_issue_more--; 19571 if (can_issue_more == 0) 19572 { 19573 can_issue_more = issue_rate - 1; 19574 (*group_count)++; 19575 end = true; 19576 for (i = 0; i < issue_rate; i++) 19577 { 19578 group_insns[i] = 0; 19579 } 19580 } 19581 n_nops--; 19582 } 19583 19584 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ 19585 can_issue_more++; 19586 19587 /* Is next_insn going to start a new group? */ 19588 *group_end 19589 = (end 19590 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) 19591 || (can_issue_more <= 2 && is_cracked_insn (next_insn)) 19592 || (can_issue_more < issue_rate && 19593 insn_terminates_group_p (next_insn, previous_group))); 19594 if (*group_end && end) 19595 (*group_count)--; 19596 19597 if (sched_verbose > 6) 19598 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", 19599 *group_count, can_issue_more); 19600 return can_issue_more; 19601 } 19602 19603 return can_issue_more; 19604} 19605 19606/* This function tries to synch the dispatch groups that the compiler "sees" 19607 with the dispatch groups that the processor dispatcher is expected to 19608 form in practice. It tries to achieve this synchronization by forcing the 19609 estimated processor grouping on the compiler (as opposed to the function 19610 'pad_goups' which tries to force the scheduler's grouping on the processor). 19611 19612 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and 19613 examines the (estimated) dispatch groups that will be formed by the processor 19614 dispatcher. It marks these group boundaries to reflect the estimated 19615 processor grouping, overriding the grouping that the scheduler had marked. 19616 Depending on the value of the flag '-minsert-sched-nops' this function can 19617 force certain insns into separate groups or force a certain distance between 19618 them by inserting nops, for example, if there exists a "costly dependence" 19619 between the insns. 19620 19621 The function estimates the group boundaries that the processor will form as 19622 follows: It keeps track of how many vacant issue slots are available after 19623 each insn. A subsequent insn will start a new group if one of the following 19624 4 cases applies: 19625 - no more vacant issue slots remain in the current dispatch group. 19626 - only the last issue slot, which is the branch slot, is vacant, but the next 19627 insn is not a branch. 19628 - only the last 2 or less issue slots, including the branch slot, are vacant, 19629 which means that a cracked insn (which occupies two issue slots) can't be 19630 issued in this group. 19631 - less than 'issue_rate' slots are vacant, and the next insn always needs to 19632 start a new group. */ 19633 19634static int 19635redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, 19636 rtx_insn *tail) 19637{ 19638 rtx_insn *insn, *next_insn; 19639 int issue_rate; 19640 int can_issue_more; 19641 int slot, i; 19642 bool group_end; 19643 int group_count = 0; 19644 rtx *group_insns; 19645 19646 /* Initialize. */ 19647 issue_rate = rs6000_issue_rate (); 19648 group_insns = XALLOCAVEC (rtx, issue_rate); 19649 for (i = 0; i < issue_rate; i++) 19650 { 19651 group_insns[i] = 0; 19652 } 19653 can_issue_more = issue_rate; 19654 slot = 0; 19655 insn = get_next_active_insn (prev_head_insn, tail); 19656 group_end = false; 19657 19658 while (insn != NULL_RTX) 19659 { 19660 slot = (issue_rate - can_issue_more); 19661 group_insns[slot] = insn; 19662 can_issue_more = 19663 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); 19664 if (insn_terminates_group_p (insn, current_group)) 19665 can_issue_more = 0; 19666 19667 next_insn = get_next_active_insn (insn, tail); 19668 if (next_insn == NULL_RTX) 19669 return group_count + 1; 19670 19671 /* Is next_insn going to start a new group? */ 19672 group_end 19673 = (can_issue_more == 0 19674 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) 19675 || (can_issue_more <= 2 && is_cracked_insn (next_insn)) 19676 || (can_issue_more < issue_rate && 19677 insn_terminates_group_p (next_insn, previous_group))); 19678 19679 can_issue_more = force_new_group (sched_verbose, dump, group_insns, 19680 next_insn, &group_end, can_issue_more, 19681 &group_count); 19682 19683 if (group_end) 19684 { 19685 group_count++; 19686 can_issue_more = 0; 19687 for (i = 0; i < issue_rate; i++) 19688 { 19689 group_insns[i] = 0; 19690 } 19691 } 19692 19693 if (GET_MODE (next_insn) == TImode && can_issue_more) 19694 PUT_MODE (next_insn, VOIDmode); 19695 else if (!can_issue_more && GET_MODE (next_insn) != TImode) 19696 PUT_MODE (next_insn, TImode); 19697 19698 insn = next_insn; 19699 if (can_issue_more == 0) 19700 can_issue_more = issue_rate; 19701 } /* while */ 19702 19703 return group_count; 19704} 19705 19706/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the 19707 dispatch group boundaries that the scheduler had marked. Pad with nops 19708 any dispatch groups which have vacant issue slots, in order to force the 19709 scheduler's grouping on the processor dispatcher. The function 19710 returns the number of dispatch groups found. */ 19711 19712static int 19713pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, 19714 rtx_insn *tail) 19715{ 19716 rtx_insn *insn, *next_insn; 19717 rtx nop; 19718 int issue_rate; 19719 int can_issue_more; 19720 int group_end; 19721 int group_count = 0; 19722 19723 /* Initialize issue_rate. */ 19724 issue_rate = rs6000_issue_rate (); 19725 can_issue_more = issue_rate; 19726 19727 insn = get_next_active_insn (prev_head_insn, tail); 19728 next_insn = get_next_active_insn (insn, tail); 19729 19730 while (insn != NULL_RTX) 19731 { 19732 can_issue_more = 19733 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); 19734 19735 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); 19736 19737 if (next_insn == NULL_RTX) 19738 break; 19739 19740 if (group_end) 19741 { 19742 /* If the scheduler had marked group termination at this location 19743 (between insn and next_insn), and neither insn nor next_insn will 19744 force group termination, pad the group with nops to force group 19745 termination. */ 19746 if (can_issue_more 19747 && (rs6000_sched_insert_nops == sched_finish_pad_groups) 19748 && !insn_terminates_group_p (insn, current_group) 19749 && !insn_terminates_group_p (next_insn, previous_group)) 19750 { 19751 if (!is_branch_slot_insn (next_insn)) 19752 can_issue_more--; 19753 19754 while (can_issue_more) 19755 { 19756 nop = gen_nop (); 19757 emit_insn_before (nop, next_insn); 19758 can_issue_more--; 19759 } 19760 } 19761 19762 can_issue_more = issue_rate; 19763 group_count++; 19764 } 19765 19766 insn = next_insn; 19767 next_insn = get_next_active_insn (insn, tail); 19768 } 19769 19770 return group_count; 19771} 19772 19773/* We're beginning a new block. Initialize data structures as necessary. */ 19774 19775static void 19776rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, 19777 int sched_verbose ATTRIBUTE_UNUSED, 19778 int max_ready ATTRIBUTE_UNUSED) 19779{ 19780 last_scheduled_insn = NULL; 19781 load_store_pendulum = 0; 19782 divide_cnt = 0; 19783 vec_pairing = 0; 19784} 19785 19786/* The following function is called at the end of scheduling BB. 19787 After reload, it inserts nops at insn group bundling. */ 19788 19789static void 19790rs6000_sched_finish (FILE *dump, int sched_verbose) 19791{ 19792 int n_groups; 19793 19794 if (sched_verbose) 19795 fprintf (dump, "=== Finishing schedule.\n"); 19796 19797 if (reload_completed && rs6000_sched_groups) 19798 { 19799 /* Do not run sched_finish hook when selective scheduling enabled. */ 19800 if (sel_sched_p ()) 19801 return; 19802 19803 if (rs6000_sched_insert_nops == sched_finish_none) 19804 return; 19805 19806 if (rs6000_sched_insert_nops == sched_finish_pad_groups) 19807 n_groups = pad_groups (dump, sched_verbose, 19808 current_sched_info->prev_head, 19809 current_sched_info->next_tail); 19810 else 19811 n_groups = redefine_groups (dump, sched_verbose, 19812 current_sched_info->prev_head, 19813 current_sched_info->next_tail); 19814 19815 if (sched_verbose >= 6) 19816 { 19817 fprintf (dump, "ngroups = %d\n", n_groups); 19818 print_rtl (dump, current_sched_info->prev_head); 19819 fprintf (dump, "Done finish_sched\n"); 19820 } 19821 } 19822} 19823 19824struct rs6000_sched_context 19825{ 19826 short cached_can_issue_more; 19827 rtx_insn *last_scheduled_insn; 19828 int load_store_pendulum; 19829 int divide_cnt; 19830 int vec_pairing; 19831}; 19832 19833typedef struct rs6000_sched_context rs6000_sched_context_def; 19834typedef rs6000_sched_context_def *rs6000_sched_context_t; 19835 19836/* Allocate store for new scheduling context. */ 19837static void * 19838rs6000_alloc_sched_context (void) 19839{ 19840 return xmalloc (sizeof (rs6000_sched_context_def)); 19841} 19842 19843/* If CLEAN_P is true then initializes _SC with clean data, 19844 and from the global context otherwise. */ 19845static void 19846rs6000_init_sched_context (void *_sc, bool clean_p) 19847{ 19848 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; 19849 19850 if (clean_p) 19851 { 19852 sc->cached_can_issue_more = 0; 19853 sc->last_scheduled_insn = NULL; 19854 sc->load_store_pendulum = 0; 19855 sc->divide_cnt = 0; 19856 sc->vec_pairing = 0; 19857 } 19858 else 19859 { 19860 sc->cached_can_issue_more = cached_can_issue_more; 19861 sc->last_scheduled_insn = last_scheduled_insn; 19862 sc->load_store_pendulum = load_store_pendulum; 19863 sc->divide_cnt = divide_cnt; 19864 sc->vec_pairing = vec_pairing; 19865 } 19866} 19867 19868/* Sets the global scheduling context to the one pointed to by _SC. */ 19869static void 19870rs6000_set_sched_context (void *_sc) 19871{ 19872 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; 19873 19874 gcc_assert (sc != NULL); 19875 19876 cached_can_issue_more = sc->cached_can_issue_more; 19877 last_scheduled_insn = sc->last_scheduled_insn; 19878 load_store_pendulum = sc->load_store_pendulum; 19879 divide_cnt = sc->divide_cnt; 19880 vec_pairing = sc->vec_pairing; 19881} 19882 19883/* Free _SC. */ 19884static void 19885rs6000_free_sched_context (void *_sc) 19886{ 19887 gcc_assert (_sc != NULL); 19888 19889 free (_sc); 19890} 19891 19892static bool 19893rs6000_sched_can_speculate_insn (rtx_insn *insn) 19894{ 19895 switch (get_attr_type (insn)) 19896 { 19897 case TYPE_DIV: 19898 case TYPE_SDIV: 19899 case TYPE_DDIV: 19900 case TYPE_VECDIV: 19901 case TYPE_SSQRT: 19902 case TYPE_DSQRT: 19903 return false; 19904 19905 default: 19906 return true; 19907 } 19908} 19909 19910/* Length in units of the trampoline for entering a nested function. */ 19911 19912int 19913rs6000_trampoline_size (void) 19914{ 19915 int ret = 0; 19916 19917 switch (DEFAULT_ABI) 19918 { 19919 default: 19920 gcc_unreachable (); 19921 19922 case ABI_AIX: 19923 ret = (TARGET_32BIT) ? 12 : 24; 19924 break; 19925 19926 case ABI_ELFv2: 19927 gcc_assert (!TARGET_32BIT); 19928 ret = 32; 19929 break; 19930 19931 case ABI_DARWIN: 19932 case ABI_V4: 19933 ret = (TARGET_32BIT) ? 40 : 48; 19934 break; 19935 } 19936 19937 return ret; 19938} 19939 19940/* Emit RTL insns to initialize the variable parts of a trampoline. 19941 FNADDR is an RTX for the address of the function's pure code. 19942 CXT is an RTX for the static chain value for the function. */ 19943 19944static void 19945rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 19946{ 19947 int regsize = (TARGET_32BIT) ? 4 : 8; 19948 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 19949 rtx ctx_reg = force_reg (Pmode, cxt); 19950 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0)); 19951 19952 switch (DEFAULT_ABI) 19953 { 19954 default: 19955 gcc_unreachable (); 19956 19957 /* Under AIX, just build the 3 word function descriptor */ 19958 case ABI_AIX: 19959 { 19960 rtx fnmem, fn_reg, toc_reg; 19961 19962 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) 19963 error ("you cannot take the address of a nested function if you use " 19964 "the %qs option", "-mno-pointers-to-nested-functions"); 19965 19966 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); 19967 fn_reg = gen_reg_rtx (Pmode); 19968 toc_reg = gen_reg_rtx (Pmode); 19969 19970 /* Macro to shorten the code expansions below. */ 19971# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET) 19972 19973 m_tramp = replace_equiv_address (m_tramp, addr); 19974 19975 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0)); 19976 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize)); 19977 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg); 19978 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg); 19979 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg); 19980 19981# undef MEM_PLUS 19982 } 19983 break; 19984 19985 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */ 19986 case ABI_ELFv2: 19987 case ABI_DARWIN: 19988 case ABI_V4: 19989 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"), 19990 LCT_NORMAL, VOIDmode, 19991 addr, Pmode, 19992 GEN_INT (rs6000_trampoline_size ()), SImode, 19993 fnaddr, Pmode, 19994 ctx_reg, Pmode); 19995 break; 19996 } 19997} 19998 19999 20000/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain 20001 identifier as an argument, so the front end shouldn't look it up. */ 20002 20003static bool 20004rs6000_attribute_takes_identifier_p (const_tree attr_id) 20005{ 20006 return is_attribute_p ("altivec", attr_id); 20007} 20008 20009/* Handle the "altivec" attribute. The attribute may have 20010 arguments as follows: 20011 20012 __attribute__((altivec(vector__))) 20013 __attribute__((altivec(pixel__))) (always followed by 'unsigned short') 20014 __attribute__((altivec(bool__))) (always followed by 'unsigned') 20015 20016 and may appear more than once (e.g., 'vector bool char') in a 20017 given declaration. */ 20018 20019static tree 20020rs6000_handle_altivec_attribute (tree *node, 20021 tree name ATTRIBUTE_UNUSED, 20022 tree args, 20023 int flags ATTRIBUTE_UNUSED, 20024 bool *no_add_attrs) 20025{ 20026 tree type = *node, result = NULL_TREE; 20027 machine_mode mode; 20028 int unsigned_p; 20029 char altivec_type 20030 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) 20031 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE) 20032 ? *IDENTIFIER_POINTER (TREE_VALUE (args)) 20033 : '?'); 20034 20035 while (POINTER_TYPE_P (type) 20036 || TREE_CODE (type) == FUNCTION_TYPE 20037 || TREE_CODE (type) == METHOD_TYPE 20038 || TREE_CODE (type) == ARRAY_TYPE) 20039 type = TREE_TYPE (type); 20040 20041 mode = TYPE_MODE (type); 20042 20043 /* Check for invalid AltiVec type qualifiers. */ 20044 if (type == long_double_type_node) 20045 error ("use of %<long double%> in AltiVec types is invalid"); 20046 else if (type == boolean_type_node) 20047 error ("use of boolean types in AltiVec types is invalid"); 20048 else if (TREE_CODE (type) == COMPLEX_TYPE) 20049 error ("use of %<complex%> in AltiVec types is invalid"); 20050 else if (DECIMAL_FLOAT_MODE_P (mode)) 20051 error ("use of decimal floating-point types in AltiVec types is invalid"); 20052 else if (!TARGET_VSX) 20053 { 20054 if (type == long_unsigned_type_node || type == long_integer_type_node) 20055 { 20056 if (TARGET_64BIT) 20057 error ("use of %<long%> in AltiVec types is invalid for " 20058 "64-bit code without %qs", "-mvsx"); 20059 else if (rs6000_warn_altivec_long) 20060 warning (0, "use of %<long%> in AltiVec types is deprecated; " 20061 "use %<int%>"); 20062 } 20063 else if (type == long_long_unsigned_type_node 20064 || type == long_long_integer_type_node) 20065 error ("use of %<long long%> in AltiVec types is invalid without %qs", 20066 "-mvsx"); 20067 else if (type == double_type_node) 20068 error ("use of %<double%> in AltiVec types is invalid without %qs", 20069 "-mvsx"); 20070 } 20071 20072 switch (altivec_type) 20073 { 20074 case 'v': 20075 unsigned_p = TYPE_UNSIGNED (type); 20076 switch (mode) 20077 { 20078 case E_TImode: 20079 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); 20080 break; 20081 case E_DImode: 20082 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); 20083 break; 20084 case E_SImode: 20085 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); 20086 break; 20087 case E_HImode: 20088 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); 20089 break; 20090 case E_QImode: 20091 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); 20092 break; 20093 case E_SFmode: result = V4SF_type_node; break; 20094 case E_DFmode: result = V2DF_type_node; break; 20095 /* If the user says 'vector int bool', we may be handed the 'bool' 20096 attribute _before_ the 'vector' attribute, and so select the 20097 proper type in the 'b' case below. */ 20098 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode: 20099 case E_V2DImode: case E_V2DFmode: 20100 result = type; 20101 default: break; 20102 } 20103 break; 20104 case 'b': 20105 switch (mode) 20106 { 20107 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break; 20108 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; 20109 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; 20110 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; 20111 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node; 20112 default: break; 20113 } 20114 break; 20115 case 'p': 20116 switch (mode) 20117 { 20118 case E_V8HImode: result = pixel_V8HI_type_node; 20119 default: break; 20120 } 20121 default: break; 20122 } 20123 20124 /* Propagate qualifiers attached to the element type 20125 onto the vector type. */ 20126 if (result && result != type && TYPE_QUALS (type)) 20127 result = build_qualified_type (result, TYPE_QUALS (type)); 20128 20129 *no_add_attrs = true; /* No need to hang on to the attribute. */ 20130 20131 if (result) 20132 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 20133 20134 return NULL_TREE; 20135} 20136 20137/* AltiVec defines five built-in scalar types that serve as vector 20138 elements; we must teach the compiler how to mangle them. The 128-bit 20139 floating point mangling is target-specific as well. MMA defines 20140 two built-in types to be used as opaque vector types. */ 20141 20142static const char * 20143rs6000_mangle_type (const_tree type) 20144{ 20145 type = TYPE_MAIN_VARIANT (type); 20146 20147 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 20148 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE 20149 && TREE_CODE (type) != OPAQUE_TYPE) 20150 return NULL; 20151 20152 if (type == bool_char_type_node) return "U6__boolc"; 20153 if (type == bool_short_type_node) return "U6__bools"; 20154 if (type == pixel_type_node) return "u7__pixel"; 20155 if (type == bool_int_type_node) return "U6__booli"; 20156 if (type == bool_long_long_type_node) return "U6__boolx"; 20157 20158 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type))) 20159 return "g"; 20160 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type))) 20161 return "u9__ieee128"; 20162 20163 if (type == vector_pair_type_node) 20164 return "u13__vector_pair"; 20165 if (type == vector_quad_type_node) 20166 return "u13__vector_quad"; 20167 20168 /* For all other types, use the default mangling. */ 20169 return NULL; 20170} 20171 20172/* Handle a "longcall" or "shortcall" attribute; arguments as in 20173 struct attribute_spec.handler. */ 20174 20175static tree 20176rs6000_handle_longcall_attribute (tree *node, tree name, 20177 tree args ATTRIBUTE_UNUSED, 20178 int flags ATTRIBUTE_UNUSED, 20179 bool *no_add_attrs) 20180{ 20181 if (TREE_CODE (*node) != FUNCTION_TYPE 20182 && TREE_CODE (*node) != FIELD_DECL 20183 && TREE_CODE (*node) != TYPE_DECL) 20184 { 20185 warning (OPT_Wattributes, "%qE attribute only applies to functions", 20186 name); 20187 *no_add_attrs = true; 20188 } 20189 20190 return NULL_TREE; 20191} 20192 20193/* Set longcall attributes on all functions declared when 20194 rs6000_default_long_calls is true. */ 20195static void 20196rs6000_set_default_type_attributes (tree type) 20197{ 20198 if (rs6000_default_long_calls 20199 && (TREE_CODE (type) == FUNCTION_TYPE 20200 || TREE_CODE (type) == METHOD_TYPE)) 20201 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"), 20202 NULL_TREE, 20203 TYPE_ATTRIBUTES (type)); 20204 20205#if TARGET_MACHO 20206 darwin_set_default_type_attributes (type); 20207#endif 20208} 20209 20210/* Return a reference suitable for calling a function with the 20211 longcall attribute. */ 20212 20213static rtx 20214rs6000_longcall_ref (rtx call_ref, rtx arg) 20215{ 20216 /* System V adds '.' to the internal name, so skip them. */ 20217 const char *call_name = XSTR (call_ref, 0); 20218 if (*call_name == '.') 20219 { 20220 while (*call_name == '.') 20221 call_name++; 20222 20223 tree node = get_identifier (call_name); 20224 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node)); 20225 } 20226 20227 if (TARGET_PLTSEQ) 20228 { 20229 rtx base = const0_rtx; 20230 int regno = 12; 20231 if (rs6000_pcrel_p ()) 20232 { 20233 rtx reg = gen_rtx_REG (Pmode, regno); 20234 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode, 20235 gen_rtvec (3, base, call_ref, arg), 20236 UNSPECV_PLT_PCREL); 20237 emit_insn (gen_rtx_SET (reg, u)); 20238 return reg; 20239 } 20240 20241 if (DEFAULT_ABI == ABI_ELFv2) 20242 base = gen_rtx_REG (Pmode, TOC_REGISTER); 20243 else 20244 { 20245 if (flag_pic) 20246 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 20247 regno = 11; 20248 } 20249 /* Reg must match that used by linker PLT stubs. For ELFv2, r12 20250 may be used by a function global entry point. For SysV4, r11 20251 is used by __glink_PLTresolve lazy resolver entry. */ 20252 rtx reg = gen_rtx_REG (Pmode, regno); 20253 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg), 20254 UNSPEC_PLT16_HA); 20255 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode, 20256 gen_rtvec (3, reg, call_ref, arg), 20257 UNSPECV_PLT16_LO); 20258 emit_insn (gen_rtx_SET (reg, hi)); 20259 emit_insn (gen_rtx_SET (reg, lo)); 20260 return reg; 20261 } 20262 20263 return force_reg (Pmode, call_ref); 20264} 20265 20266#ifndef TARGET_USE_MS_BITFIELD_LAYOUT 20267#define TARGET_USE_MS_BITFIELD_LAYOUT 0 20268#endif 20269 20270/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 20271 struct attribute_spec.handler. */ 20272static tree 20273rs6000_handle_struct_attribute (tree *node, tree name, 20274 tree args ATTRIBUTE_UNUSED, 20275 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 20276{ 20277 tree *type = NULL; 20278 if (DECL_P (*node)) 20279 { 20280 if (TREE_CODE (*node) == TYPE_DECL) 20281 type = &TREE_TYPE (*node); 20282 } 20283 else 20284 type = node; 20285 20286 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 20287 || TREE_CODE (*type) == UNION_TYPE))) 20288 { 20289 warning (OPT_Wattributes, "%qE attribute ignored", name); 20290 *no_add_attrs = true; 20291 } 20292 20293 else if ((is_attribute_p ("ms_struct", name) 20294 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 20295 || ((is_attribute_p ("gcc_struct", name) 20296 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 20297 { 20298 warning (OPT_Wattributes, "%qE incompatible attribute ignored", 20299 name); 20300 *no_add_attrs = true; 20301 } 20302 20303 return NULL_TREE; 20304} 20305 20306static bool 20307rs6000_ms_bitfield_layout_p (const_tree record_type) 20308{ 20309 return (TARGET_USE_MS_BITFIELD_LAYOUT && 20310 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 20311 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 20312} 20313 20314#ifdef USING_ELFOS_H 20315 20316/* A get_unnamed_section callback, used for switching to toc_section. */ 20317 20318static void 20319rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED) 20320{ 20321 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 20322 && TARGET_MINIMAL_TOC) 20323 { 20324 if (!toc_initialized) 20325 { 20326 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); 20327 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 20328 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); 20329 fprintf (asm_out_file, "\t.tc "); 20330 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); 20331 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 20332 fprintf (asm_out_file, "\n"); 20333 20334 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 20335 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 20336 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 20337 fprintf (asm_out_file, " = .+32768\n"); 20338 toc_initialized = 1; 20339 } 20340 else 20341 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 20342 } 20343 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 20344 { 20345 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); 20346 if (!toc_initialized) 20347 { 20348 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 20349 toc_initialized = 1; 20350 } 20351 } 20352 else 20353 { 20354 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 20355 if (!toc_initialized) 20356 { 20357 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 20358 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 20359 fprintf (asm_out_file, " = .+32768\n"); 20360 toc_initialized = 1; 20361 } 20362 } 20363} 20364 20365/* Implement TARGET_ASM_INIT_SECTIONS. */ 20366 20367static void 20368rs6000_elf_asm_init_sections (void) 20369{ 20370 toc_section 20371 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL); 20372 20373 sdata2_section 20374 = get_unnamed_section (SECTION_WRITE, output_section_asm_op, 20375 SDATA2_SECTION_ASM_OP); 20376} 20377 20378/* Implement TARGET_SELECT_RTX_SECTION. */ 20379 20380static section * 20381rs6000_elf_select_rtx_section (machine_mode mode, rtx x, 20382 unsigned HOST_WIDE_INT align) 20383{ 20384 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) 20385 return toc_section; 20386 else 20387 return default_elf_select_rtx_section (mode, x, align); 20388} 20389 20390/* For a SYMBOL_REF, set generic flags and then perform some 20391 target-specific processing. 20392 20393 When the AIX ABI is requested on a non-AIX system, replace the 20394 function name with the real name (with a leading .) rather than the 20395 function descriptor name. This saves a lot of overriding code to 20396 read the prefixes. */ 20397 20398static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 20399static void 20400rs6000_elf_encode_section_info (tree decl, rtx rtl, int first) 20401{ 20402 default_encode_section_info (decl, rtl, first); 20403 20404 if (first 20405 && TREE_CODE (decl) == FUNCTION_DECL 20406 && !TARGET_AIX 20407 && DEFAULT_ABI == ABI_AIX) 20408 { 20409 rtx sym_ref = XEXP (rtl, 0); 20410 size_t len = strlen (XSTR (sym_ref, 0)); 20411 char *str = XALLOCAVEC (char, len + 2); 20412 str[0] = '.'; 20413 memcpy (str + 1, XSTR (sym_ref, 0), len + 1); 20414 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1); 20415 } 20416} 20417 20418static inline bool 20419compare_section_name (const char *section, const char *templ) 20420{ 20421 int len; 20422 20423 len = strlen (templ); 20424 return (strncmp (section, templ, len) == 0 20425 && (section[len] == 0 || section[len] == '.')); 20426} 20427 20428bool 20429rs6000_elf_in_small_data_p (const_tree decl) 20430{ 20431 if (rs6000_sdata == SDATA_NONE) 20432 return false; 20433 20434 /* We want to merge strings, so we never consider them small data. */ 20435 if (TREE_CODE (decl) == STRING_CST) 20436 return false; 20437 20438 /* Functions are never in the small data area. */ 20439 if (TREE_CODE (decl) == FUNCTION_DECL) 20440 return false; 20441 20442 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl)) 20443 { 20444 const char *section = DECL_SECTION_NAME (decl); 20445 if (compare_section_name (section, ".sdata") 20446 || compare_section_name (section, ".sdata2") 20447 || compare_section_name (section, ".gnu.linkonce.s") 20448 || compare_section_name (section, ".sbss") 20449 || compare_section_name (section, ".sbss2") 20450 || compare_section_name (section, ".gnu.linkonce.sb") 20451 || strcmp (section, ".PPC.EMB.sdata0") == 0 20452 || strcmp (section, ".PPC.EMB.sbss0") == 0) 20453 return true; 20454 } 20455 else 20456 { 20457 /* If we are told not to put readonly data in sdata, then don't. */ 20458 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI 20459 && !rs6000_readonly_in_sdata) 20460 return false; 20461 20462 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); 20463 20464 if (size > 0 20465 && size <= g_switch_value 20466 /* If it's not public, and we're not going to reference it there, 20467 there's no need to put it in the small data section. */ 20468 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl))) 20469 return true; 20470 } 20471 20472 return false; 20473} 20474 20475#endif /* USING_ELFOS_H */ 20476 20477/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */ 20478 20479static bool 20480rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x) 20481{ 20482 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode); 20483} 20484 20485/* Do not place thread-local symbols refs in the object blocks. */ 20486 20487static bool 20488rs6000_use_blocks_for_decl_p (const_tree decl) 20489{ 20490 return !DECL_THREAD_LOCAL_P (decl); 20491} 20492 20493/* Return a REG that occurs in ADDR with coefficient 1. 20494 ADDR can be effectively incremented by incrementing REG. 20495 20496 r0 is special and we must not select it as an address 20497 register by this routine since our caller will try to 20498 increment the returned register via an "la" instruction. */ 20499 20500rtx 20501find_addr_reg (rtx addr) 20502{ 20503 while (GET_CODE (addr) == PLUS) 20504 { 20505 if (REG_P (XEXP (addr, 0)) 20506 && REGNO (XEXP (addr, 0)) != 0) 20507 addr = XEXP (addr, 0); 20508 else if (REG_P (XEXP (addr, 1)) 20509 && REGNO (XEXP (addr, 1)) != 0) 20510 addr = XEXP (addr, 1); 20511 else if (CONSTANT_P (XEXP (addr, 0))) 20512 addr = XEXP (addr, 1); 20513 else if (CONSTANT_P (XEXP (addr, 1))) 20514 addr = XEXP (addr, 0); 20515 else 20516 gcc_unreachable (); 20517 } 20518 gcc_assert (REG_P (addr) && REGNO (addr) != 0); 20519 return addr; 20520} 20521 20522void 20523rs6000_fatal_bad_address (rtx op) 20524{ 20525 fatal_insn ("bad address", op); 20526} 20527 20528#if TARGET_MACHO 20529 20530vec<branch_island, va_gc> *branch_islands; 20531 20532/* Remember to generate a branch island for far calls to the given 20533 function. */ 20534 20535static void 20536add_compiler_branch_island (tree label_name, tree function_name, 20537 int line_number) 20538{ 20539 branch_island bi = {function_name, label_name, line_number}; 20540 vec_safe_push (branch_islands, bi); 20541} 20542 20543/* NO_PREVIOUS_DEF checks in the link list whether the function name is 20544 already there or not. */ 20545 20546static int 20547no_previous_def (tree function_name) 20548{ 20549 branch_island *bi; 20550 unsigned ix; 20551 20552 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) 20553 if (function_name == bi->function_name) 20554 return 0; 20555 return 1; 20556} 20557 20558/* GET_PREV_LABEL gets the label name from the previous definition of 20559 the function. */ 20560 20561static tree 20562get_prev_label (tree function_name) 20563{ 20564 branch_island *bi; 20565 unsigned ix; 20566 20567 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) 20568 if (function_name == bi->function_name) 20569 return bi->label_name; 20570 return NULL_TREE; 20571} 20572 20573/* Generate external symbol indirection stubs (PIC and non-PIC). */ 20574 20575void 20576machopic_output_stub (FILE *file, const char *symb, const char *stub) 20577{ 20578 unsigned int length; 20579 char *symbol_name, *lazy_ptr_name; 20580 char *local_label_0; 20581 static unsigned label = 0; 20582 20583 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 20584 symb = (*targetm.strip_name_encoding) (symb); 20585 20586 length = strlen (symb); 20587 symbol_name = XALLOCAVEC (char, length + 32); 20588 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 20589 20590 lazy_ptr_name = XALLOCAVEC (char, length + 32); 20591 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length); 20592 20593 if (MACHOPIC_PURE) 20594 { 20595 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]); 20596 fprintf (file, "\t.align 5\n"); 20597 20598 fprintf (file, "%s:\n", stub); 20599 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20600 20601 label++; 20602 local_label_0 = XALLOCAVEC (char, 16); 20603 sprintf (local_label_0, "L%u$spb", label); 20604 20605 fprintf (file, "\tmflr r0\n"); 20606 fprintf (file, "\tbcl 20,31,%s\n", local_label_0); 20607 fprintf (file, "%s:\n\tmflr r11\n", local_label_0); 20608 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n", 20609 lazy_ptr_name, local_label_0); 20610 fprintf (file, "\tmtlr r0\n"); 20611 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n", 20612 (TARGET_64BIT ? "ldu" : "lwzu"), 20613 lazy_ptr_name, local_label_0); 20614 fprintf (file, "\tmtctr r12\n"); 20615 fprintf (file, "\tbctr\n"); 20616 } 20617 else /* mdynamic-no-pic or mkernel. */ 20618 { 20619 switch_to_section (darwin_sections[machopic_symbol_stub1_section]); 20620 fprintf (file, "\t.align 4\n"); 20621 20622 fprintf (file, "%s:\n", stub); 20623 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20624 20625 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name); 20626 fprintf (file, "\t%s r12,lo16(%s)(r11)\n", 20627 (TARGET_64BIT ? "ldu" : "lwzu"), 20628 lazy_ptr_name); 20629 fprintf (file, "\tmtctr r12\n"); 20630 fprintf (file, "\tbctr\n"); 20631 } 20632 20633 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 20634 fprintf (file, "%s:\n", lazy_ptr_name); 20635 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20636 fprintf (file, "%sdyld_stub_binding_helper\n", 20637 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t")); 20638} 20639 20640/* Legitimize PIC addresses. If the address is already 20641 position-independent, we return ORIG. Newly generated 20642 position-independent addresses go into a reg. This is REG if non 20643 zero, otherwise we allocate register(s) as necessary. */ 20644 20645#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000) 20646 20647rtx 20648rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, 20649 rtx reg) 20650{ 20651 rtx base, offset; 20652 20653 if (reg == NULL && !reload_completed) 20654 reg = gen_reg_rtx (Pmode); 20655 20656 if (GET_CODE (orig) == CONST) 20657 { 20658 rtx reg_temp; 20659 20660 if (GET_CODE (XEXP (orig, 0)) == PLUS 20661 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 20662 return orig; 20663 20664 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 20665 20666 /* Use a different reg for the intermediate value, as 20667 it will be marked UNCHANGING. */ 20668 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode); 20669 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), 20670 Pmode, reg_temp); 20671 offset = 20672 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 20673 Pmode, reg); 20674 20675 if (CONST_INT_P (offset)) 20676 { 20677 if (SMALL_INT (offset)) 20678 return plus_constant (Pmode, base, INTVAL (offset)); 20679 else if (!reload_completed) 20680 offset = force_reg (Pmode, offset); 20681 else 20682 { 20683 rtx mem = force_const_mem (Pmode, orig); 20684 return machopic_legitimize_pic_address (mem, Pmode, reg); 20685 } 20686 } 20687 return gen_rtx_PLUS (Pmode, base, offset); 20688 } 20689 20690 /* Fall back on generic machopic code. */ 20691 return machopic_legitimize_pic_address (orig, mode, reg); 20692} 20693 20694/* Output a .machine directive for the Darwin assembler, and call 20695 the generic start_file routine. */ 20696 20697static void 20698rs6000_darwin_file_start (void) 20699{ 20700 static const struct 20701 { 20702 const char *arg; 20703 const char *name; 20704 HOST_WIDE_INT if_set; 20705 } mapping[] = { 20706 { "ppc64", "ppc64", MASK_64BIT }, 20707 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 }, 20708 { "power4", "ppc970", 0 }, 20709 { "G5", "ppc970", 0 }, 20710 { "7450", "ppc7450", 0 }, 20711 { "7400", "ppc7400", MASK_ALTIVEC }, 20712 { "G4", "ppc7400", 0 }, 20713 { "750", "ppc750", 0 }, 20714 { "740", "ppc750", 0 }, 20715 { "G3", "ppc750", 0 }, 20716 { "604e", "ppc604e", 0 }, 20717 { "604", "ppc604", 0 }, 20718 { "603e", "ppc603", 0 }, 20719 { "603", "ppc603", 0 }, 20720 { "601", "ppc601", 0 }, 20721 { NULL, "ppc", 0 } }; 20722 const char *cpu_id = ""; 20723 size_t i; 20724 20725 rs6000_file_start (); 20726 darwin_file_start (); 20727 20728 /* Determine the argument to -mcpu=. Default to G3 if not specified. */ 20729 20730 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') 20731 cpu_id = rs6000_default_cpu; 20732 20733 if (OPTION_SET_P (rs6000_cpu_index)) 20734 cpu_id = processor_target_table[rs6000_cpu_index].name; 20735 20736 /* Look through the mapping array. Pick the first name that either 20737 matches the argument, has a bit set in IF_SET that is also set 20738 in the target flags, or has a NULL name. */ 20739 20740 i = 0; 20741 while (mapping[i].arg != NULL 20742 && strcmp (mapping[i].arg, cpu_id) != 0 20743 && (mapping[i].if_set & rs6000_isa_flags) == 0) 20744 i++; 20745 20746 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name); 20747} 20748 20749#endif /* TARGET_MACHO */ 20750 20751#if TARGET_ELF 20752static int 20753rs6000_elf_reloc_rw_mask (void) 20754{ 20755 if (flag_pic) 20756 return 3; 20757 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 20758#if defined (POWERPC_NETBSD) 20759 return 3; 20760#else 20761 return 2; 20762#endif 20763 else 20764 return 0; 20765} 20766 20767/* Record an element in the table of global constructors. SYMBOL is 20768 a SYMBOL_REF of the function to be called; PRIORITY is a number 20769 between 0 and MAX_INIT_PRIORITY. 20770 20771 This differs from default_named_section_asm_out_constructor in 20772 that we have special handling for -mrelocatable. */ 20773 20774static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED; 20775static void 20776rs6000_elf_asm_out_constructor (rtx symbol, int priority) 20777{ 20778 const char *section = ".ctors"; 20779 char buf[18]; 20780 20781 if (priority != DEFAULT_INIT_PRIORITY) 20782 { 20783 sprintf (buf, ".ctors.%.5u", 20784 /* Invert the numbering so the linker puts us in the proper 20785 order; constructors are run from right to left, and the 20786 linker sorts in increasing order. */ 20787 MAX_INIT_PRIORITY - priority); 20788 section = buf; 20789 } 20790 20791 switch_to_section (get_section (section, SECTION_WRITE, NULL)); 20792 assemble_align (POINTER_SIZE); 20793 20794 if (DEFAULT_ABI == ABI_V4 20795 && (TARGET_RELOCATABLE || flag_pic > 1)) 20796 { 20797 fputs ("\t.long (", asm_out_file); 20798 output_addr_const (asm_out_file, symbol); 20799 fputs (")@fixup\n", asm_out_file); 20800 } 20801 else 20802 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); 20803} 20804 20805static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED; 20806static void 20807rs6000_elf_asm_out_destructor (rtx symbol, int priority) 20808{ 20809 const char *section = ".dtors"; 20810 char buf[18]; 20811 20812 if (priority != DEFAULT_INIT_PRIORITY) 20813 { 20814 sprintf (buf, ".dtors.%.5u", 20815 /* Invert the numbering so the linker puts us in the proper 20816 order; constructors are run from right to left, and the 20817 linker sorts in increasing order. */ 20818 MAX_INIT_PRIORITY - priority); 20819 section = buf; 20820 } 20821 20822 switch_to_section (get_section (section, SECTION_WRITE, NULL)); 20823 assemble_align (POINTER_SIZE); 20824 20825 if (DEFAULT_ABI == ABI_V4 20826 && (TARGET_RELOCATABLE || flag_pic > 1)) 20827 { 20828 fputs ("\t.long (", asm_out_file); 20829 output_addr_const (asm_out_file, symbol); 20830 fputs (")@fixup\n", asm_out_file); 20831 } 20832 else 20833 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); 20834} 20835 20836void 20837rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl) 20838{ 20839 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) 20840 { 20841 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file); 20842 ASM_OUTPUT_LABEL (file, name); 20843 fputs (DOUBLE_INT_ASM_OP, file); 20844 rs6000_output_function_entry (file, name); 20845 fputs (",.TOC.@tocbase,0\n\t.previous\n", file); 20846 if (DOT_SYMBOLS) 20847 { 20848 fputs ("\t.size\t", file); 20849 assemble_name (file, name); 20850 fputs (",24\n\t.type\t.", file); 20851 assemble_name (file, name); 20852 fputs (",@function\n", file); 20853 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl)) 20854 { 20855 fputs ("\t.globl\t.", file); 20856 assemble_name (file, name); 20857 putc ('\n', file); 20858 } 20859 } 20860 else 20861 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 20862 ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); 20863 rs6000_output_function_entry (file, name); 20864 fputs (":\n", file); 20865 return; 20866 } 20867 20868 int uses_toc; 20869 if (DEFAULT_ABI == ABI_V4 20870 && (TARGET_RELOCATABLE || flag_pic > 1) 20871 && !TARGET_SECURE_PLT 20872 && (!constant_pool_empty_p () || crtl->profile) 20873 && (uses_toc = uses_TOC ())) 20874 { 20875 char buf[256]; 20876 20877 if (uses_toc == 2) 20878 switch_to_other_text_partition (); 20879 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); 20880 20881 fprintf (file, "\t.long "); 20882 assemble_name (file, toc_label_name); 20883 need_toc_init = 1; 20884 putc ('-', file); 20885 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); 20886 assemble_name (file, buf); 20887 putc ('\n', file); 20888 if (uses_toc == 2) 20889 switch_to_other_text_partition (); 20890 } 20891 20892 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 20893 ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); 20894 20895 if (TARGET_CMODEL == CMODEL_LARGE 20896 && rs6000_global_entry_point_prologue_needed_p ()) 20897 { 20898 char buf[256]; 20899 20900 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); 20901 20902 fprintf (file, "\t.quad .TOC.-"); 20903 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); 20904 assemble_name (file, buf); 20905 putc ('\n', file); 20906 } 20907 20908 if (DEFAULT_ABI == ABI_AIX) 20909 { 20910 const char *desc_name, *orig_name; 20911 20912 orig_name = (*targetm.strip_name_encoding) (name); 20913 desc_name = orig_name; 20914 while (*desc_name == '.') 20915 desc_name++; 20916 20917 if (TREE_PUBLIC (decl)) 20918 fprintf (file, "\t.globl %s\n", desc_name); 20919 20920 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 20921 fprintf (file, "%s:\n", desc_name); 20922 fprintf (file, "\t.long %s\n", orig_name); 20923 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file); 20924 fputs ("\t.long 0\n", file); 20925 fprintf (file, "\t.previous\n"); 20926 } 20927 ASM_OUTPUT_LABEL (file, name); 20928} 20929 20930static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; 20931static void 20932rs6000_elf_file_end (void) 20933{ 20934#ifdef HAVE_AS_GNU_ATTRIBUTE 20935 /* ??? The value emitted depends on options active at file end. 20936 Assume anyone using #pragma or attributes that might change 20937 options knows what they are doing. */ 20938 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4) 20939 && rs6000_passes_float) 20940 { 20941 int fp; 20942 20943 if (TARGET_HARD_FLOAT) 20944 fp = 1; 20945 else 20946 fp = 2; 20947 if (rs6000_passes_long_double) 20948 { 20949 if (!TARGET_LONG_DOUBLE_128) 20950 fp |= 2 * 4; 20951 else if (TARGET_IEEEQUAD) 20952 fp |= 3 * 4; 20953 else 20954 fp |= 1 * 4; 20955 } 20956 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp); 20957 } 20958 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4) 20959 { 20960 if (rs6000_passes_vector) 20961 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", 20962 (TARGET_ALTIVEC_ABI ? 2 : 1)); 20963 if (rs6000_returns_struct) 20964 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n", 20965 aix_struct_return ? 2 : 1); 20966 } 20967#endif 20968#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) 20969 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2) 20970 file_end_indicate_exec_stack (); 20971#endif 20972 20973 if (flag_split_stack) 20974 file_end_indicate_split_stack (); 20975 20976 if (cpu_builtin_p) 20977 { 20978 /* We have expanded a CPU builtin, so we need to emit a reference to 20979 the special symbol that LIBC uses to declare it supports the 20980 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */ 20981 switch_to_section (data_section); 20982 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3); 20983 fprintf (asm_out_file, "\t%s %s\n", 20984 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol); 20985 } 20986} 20987#endif 20988 20989#if TARGET_XCOFF 20990 20991#ifndef HAVE_XCOFF_DWARF_EXTRAS 20992#define HAVE_XCOFF_DWARF_EXTRAS 0 20993#endif 20994 20995static enum unwind_info_type 20996rs6000_xcoff_debug_unwind_info (void) 20997{ 20998 return UI_NONE; 20999} 21000 21001static void 21002rs6000_xcoff_asm_output_anchor (rtx symbol) 21003{ 21004 char buffer[100]; 21005 21006 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC, 21007 SYMBOL_REF_BLOCK_OFFSET (symbol)); 21008 fprintf (asm_out_file, "%s", SET_ASM_OP); 21009 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0)); 21010 fprintf (asm_out_file, ","); 21011 RS6000_OUTPUT_BASENAME (asm_out_file, buffer); 21012 fprintf (asm_out_file, "\n"); 21013} 21014 21015static void 21016rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) 21017{ 21018 fputs (GLOBAL_ASM_OP, stream); 21019 RS6000_OUTPUT_BASENAME (stream, name); 21020 putc ('\n', stream); 21021} 21022 21023/* A get_unnamed_decl callback, used for read-only sections. PTR 21024 points to the section string variable. */ 21025 21026static void 21027rs6000_xcoff_output_readonly_section_asm_op (const char *directive) 21028{ 21029 fprintf (asm_out_file, "\t.csect %s[RO],%s\n", 21030 directive 21031 ? xcoff_private_rodata_section_name 21032 : xcoff_read_only_section_name, 21033 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 21034} 21035 21036/* Likewise for read-write sections. */ 21037 21038static void 21039rs6000_xcoff_output_readwrite_section_asm_op (const char *) 21040{ 21041 fprintf (asm_out_file, "\t.csect %s[RW],%s\n", 21042 xcoff_private_data_section_name, 21043 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 21044} 21045 21046static void 21047rs6000_xcoff_output_tls_section_asm_op (const char *directive) 21048{ 21049 fprintf (asm_out_file, "\t.csect %s[TL],%s\n", 21050 directive 21051 ? xcoff_private_data_section_name 21052 : xcoff_tls_data_section_name, 21053 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 21054} 21055 21056/* A get_unnamed_section callback, used for switching to toc_section. */ 21057 21058static void 21059rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED) 21060{ 21061 if (TARGET_MINIMAL_TOC) 21062 { 21063 /* toc_section is always selected at least once from 21064 rs6000_xcoff_file_start, so this is guaranteed to 21065 always be defined once and only once in each file. */ 21066 if (!toc_initialized) 21067 { 21068 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file); 21069 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file); 21070 toc_initialized = 1; 21071 } 21072 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n", 21073 (TARGET_32BIT ? "" : ",3")); 21074 } 21075 else 21076 fputs ("\t.toc\n", asm_out_file); 21077} 21078 21079/* Implement TARGET_ASM_INIT_SECTIONS. */ 21080 21081static void 21082rs6000_xcoff_asm_init_sections (void) 21083{ 21084 read_only_data_section 21085 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, 21086 NULL); 21087 21088 private_data_section 21089 = get_unnamed_section (SECTION_WRITE, 21090 rs6000_xcoff_output_readwrite_section_asm_op, 21091 NULL); 21092 21093 read_only_private_data_section 21094 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, 21095 ""); 21096 21097 tls_data_section 21098 = get_unnamed_section (SECTION_TLS, 21099 rs6000_xcoff_output_tls_section_asm_op, 21100 NULL); 21101 21102 tls_private_data_section 21103 = get_unnamed_section (SECTION_TLS, 21104 rs6000_xcoff_output_tls_section_asm_op, 21105 ""); 21106 21107 toc_section 21108 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); 21109 21110 readonly_data_section = read_only_data_section; 21111} 21112 21113static int 21114rs6000_xcoff_reloc_rw_mask (void) 21115{ 21116 return 3; 21117} 21118 21119static void 21120rs6000_xcoff_asm_named_section (const char *name, unsigned int flags, 21121 tree decl ATTRIBUTE_UNUSED) 21122{ 21123 int smclass; 21124 static const char * const suffix[7] 21125 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" }; 21126 21127 if (flags & SECTION_EXCLUDE) 21128 smclass = 6; 21129 else if (flags & SECTION_DEBUG) 21130 { 21131 fprintf (asm_out_file, "\t.dwsect %s\n", name); 21132 return; 21133 } 21134 else if (flags & SECTION_CODE) 21135 smclass = 0; 21136 else if (flags & SECTION_TLS) 21137 { 21138 if (flags & SECTION_BSS) 21139 smclass = 5; 21140 else 21141 smclass = 4; 21142 } 21143 else if (flags & SECTION_WRITE) 21144 { 21145 if (flags & SECTION_BSS) 21146 smclass = 3; 21147 else 21148 smclass = 2; 21149 } 21150 else 21151 smclass = 1; 21152 21153 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n", 21154 (flags & SECTION_CODE) ? "." : "", 21155 name, suffix[smclass], flags & SECTION_ENTSIZE); 21156} 21157 21158#define IN_NAMED_SECTION(DECL) \ 21159 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ 21160 && DECL_SECTION_NAME (DECL) != NULL) 21161 21162static section * 21163rs6000_xcoff_select_section (tree decl, int reloc, 21164 unsigned HOST_WIDE_INT align) 21165{ 21166 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into 21167 named section. */ 21168 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl)) 21169 { 21170 resolve_unique_section (decl, reloc, true); 21171 if (IN_NAMED_SECTION (decl)) 21172 return get_named_section (decl, NULL, reloc); 21173 } 21174 21175 if (decl_readonly_section (decl, reloc)) 21176 { 21177 if (TREE_PUBLIC (decl)) 21178 return read_only_data_section; 21179 else 21180 return read_only_private_data_section; 21181 } 21182 else 21183 { 21184#if HAVE_AS_TLS 21185 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) 21186 { 21187 if (bss_initializer_p (decl)) 21188 return tls_comm_section; 21189 else if (TREE_PUBLIC (decl)) 21190 return tls_data_section; 21191 else 21192 return tls_private_data_section; 21193 } 21194 else 21195#endif 21196 if (TREE_PUBLIC (decl)) 21197 return data_section; 21198 else 21199 return private_data_section; 21200 } 21201} 21202 21203static void 21204rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) 21205{ 21206 const char *name; 21207 21208 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 21209 name = (*targetm.strip_name_encoding) (name); 21210 set_decl_section_name (decl, name); 21211} 21212 21213/* Select section for constant in constant pool. 21214 21215 On RS/6000, all constants are in the private read-only data area. 21216 However, if this is being placed in the TOC it must be output as a 21217 toc entry. */ 21218 21219static section * 21220rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x, 21221 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 21222{ 21223 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) 21224 return toc_section; 21225 else 21226 return read_only_private_data_section; 21227} 21228 21229/* Remove any trailing [DS] or the like from the symbol name. */ 21230 21231static const char * 21232rs6000_xcoff_strip_name_encoding (const char *name) 21233{ 21234 size_t len; 21235 if (*name == '*') 21236 name++; 21237 len = strlen (name); 21238 if (name[len - 1] == ']') 21239 return ggc_alloc_string (name, len - 4); 21240 else 21241 return name; 21242} 21243 21244/* Section attributes. AIX is always PIC. */ 21245 21246static unsigned int 21247rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc) 21248{ 21249 unsigned int align; 21250 unsigned int flags = default_section_type_flags (decl, name, reloc); 21251 21252 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl)) 21253 flags |= SECTION_BSS; 21254 21255 /* Align to at least UNIT size. */ 21256 if (!decl || !DECL_P (decl)) 21257 align = MIN_UNITS_PER_WORD; 21258 /* Align code CSECT to at least 32 bytes. */ 21259 else if ((flags & SECTION_CODE) != 0) 21260 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32); 21261 else 21262 /* Increase alignment of large objects if not already stricter. */ 21263 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 21264 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD 21265 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD); 21266 21267 return flags | (exact_log2 (align) & SECTION_ENTSIZE); 21268} 21269 21270/* Output at beginning of assembler file. 21271 21272 Initialize the section names for the RS/6000 at this point. 21273 21274 Specify filename, including full path, to assembler. 21275 21276 We want to go into the TOC section so at least one .toc will be emitted. 21277 Also, in order to output proper .bs/.es pairs, we need at least one static 21278 [RW] section emitted. 21279 21280 Finally, declare mcount when profiling to make the assembler happy. */ 21281 21282static void 21283rs6000_xcoff_file_start (void) 21284{ 21285 rs6000_gen_section_name (&xcoff_bss_section_name, 21286 main_input_filename, ".bss_"); 21287 rs6000_gen_section_name (&xcoff_private_data_section_name, 21288 main_input_filename, ".rw_"); 21289 rs6000_gen_section_name (&xcoff_private_rodata_section_name, 21290 main_input_filename, ".rop_"); 21291 rs6000_gen_section_name (&xcoff_read_only_section_name, 21292 main_input_filename, ".ro_"); 21293 rs6000_gen_section_name (&xcoff_tls_data_section_name, 21294 main_input_filename, ".tls_"); 21295 21296 fputs ("\t.file\t", asm_out_file); 21297 output_quoted_string (asm_out_file, main_input_filename); 21298 fputc ('\n', asm_out_file); 21299 if (write_symbols != NO_DEBUG) 21300 switch_to_section (private_data_section); 21301 switch_to_section (toc_section); 21302 switch_to_section (text_section); 21303 if (profile_flag) 21304 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT); 21305 rs6000_file_start (); 21306} 21307 21308/* Output at end of assembler file. 21309 On the RS/6000, referencing data should automatically pull in text. */ 21310 21311static void 21312rs6000_xcoff_file_end (void) 21313{ 21314 switch_to_section (text_section); 21315 if (xcoff_tls_exec_model_detected) 21316 { 21317 /* Add a .ref to __tls_get_addr to force libpthread dependency. */ 21318 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file); 21319 } 21320 fputs ("_section_.text:\n", asm_out_file); 21321 switch_to_section (data_section); 21322 fputs (TARGET_32BIT 21323 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n", 21324 asm_out_file); 21325 21326} 21327 21328struct declare_alias_data 21329{ 21330 FILE *file; 21331 bool function_descriptor; 21332}; 21333 21334/* Declare alias N. A helper function for for_node_and_aliases. */ 21335 21336static bool 21337rs6000_declare_alias (struct symtab_node *n, void *d) 21338{ 21339 struct declare_alias_data *data = (struct declare_alias_data *)d; 21340 /* Main symbol is output specially, because varasm machinery does part of 21341 the job for us - we do not need to declare .globl/lglobs and such. */ 21342 if (!n->alias || n->weakref) 21343 return false; 21344 21345 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl))) 21346 return false; 21347 21348 /* Prevent assemble_alias from trying to use .set pseudo operation 21349 that does not behave as expected by the middle-end. */ 21350 TREE_ASM_WRITTEN (n->decl) = true; 21351 21352 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)); 21353 char *buffer = (char *) alloca (strlen (name) + 2); 21354 char *p; 21355 int dollar_inside = 0; 21356 21357 strcpy (buffer, name); 21358 p = strchr (buffer, '$'); 21359 while (p) { 21360 *p = '_'; 21361 dollar_inside++; 21362 p = strchr (p + 1, '$'); 21363 } 21364 if (TREE_PUBLIC (n->decl)) 21365 { 21366 if (!RS6000_WEAK || !DECL_WEAK (n->decl)) 21367 { 21368 if (dollar_inside) { 21369 if (data->function_descriptor) 21370 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); 21371 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); 21372 } 21373 if (data->function_descriptor) 21374 { 21375 fputs ("\t.globl .", data->file); 21376 RS6000_OUTPUT_BASENAME (data->file, buffer); 21377 putc ('\n', data->file); 21378 } 21379 fputs ("\t.globl ", data->file); 21380 assemble_name (data->file, buffer); 21381 putc ('\n', data->file); 21382 } 21383#ifdef ASM_WEAKEN_DECL 21384 else if (DECL_WEAK (n->decl) && !data->function_descriptor) 21385 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL); 21386#endif 21387 } 21388 else 21389 { 21390 if (dollar_inside) 21391 { 21392 if (data->function_descriptor) 21393 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); 21394 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); 21395 } 21396 if (data->function_descriptor) 21397 { 21398 fputs ("\t.lglobl .", data->file); 21399 RS6000_OUTPUT_BASENAME (data->file, buffer); 21400 putc ('\n', data->file); 21401 } 21402 fputs ("\t.lglobl ", data->file); 21403 assemble_name (data->file, buffer); 21404 putc ('\n', data->file); 21405 } 21406 if (data->function_descriptor) 21407 putc ('.', data->file); 21408 ASM_OUTPUT_LABEL (data->file, buffer); 21409 return false; 21410} 21411 21412 21413#ifdef HAVE_GAS_HIDDEN 21414/* Helper function to calculate visibility of a DECL 21415 and return the value as a const string. */ 21416 21417static const char * 21418rs6000_xcoff_visibility (tree decl) 21419{ 21420 static const char * const visibility_types[] = { 21421 "", ",protected", ",hidden", ",internal" 21422 }; 21423 21424 enum symbol_visibility vis = DECL_VISIBILITY (decl); 21425 return visibility_types[vis]; 21426} 21427#endif 21428 21429 21430/* This macro produces the initial definition of a function name. 21431 On the RS/6000, we need to place an extra '.' in the function name and 21432 output the function descriptor. 21433 Dollar signs are converted to underscores. 21434 21435 The csect for the function will have already been created when 21436 text_section was selected. We do have to go back to that csect, however. 21437 21438 The third and fourth parameters to the .function pseudo-op (16 and 044) 21439 are placeholders which no longer have any use. 21440 21441 Because AIX assembler's .set command has unexpected semantics, we output 21442 all aliases as alternative labels in front of the definition. */ 21443 21444void 21445rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl) 21446{ 21447 char *buffer = (char *) alloca (strlen (name) + 1); 21448 char *p; 21449 int dollar_inside = 0; 21450 struct declare_alias_data data = {file, false}; 21451 21452 strcpy (buffer, name); 21453 p = strchr (buffer, '$'); 21454 while (p) { 21455 *p = '_'; 21456 dollar_inside++; 21457 p = strchr (p + 1, '$'); 21458 } 21459 if (TREE_PUBLIC (decl)) 21460 { 21461 if (!RS6000_WEAK || !DECL_WEAK (decl)) 21462 { 21463 if (dollar_inside) { 21464 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); 21465 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); 21466 } 21467 fputs ("\t.globl .", file); 21468 RS6000_OUTPUT_BASENAME (file, buffer); 21469#ifdef HAVE_GAS_HIDDEN 21470 fputs (rs6000_xcoff_visibility (decl), file); 21471#endif 21472 putc ('\n', file); 21473 } 21474 } 21475 else 21476 { 21477 if (dollar_inside) { 21478 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); 21479 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); 21480 } 21481 fputs ("\t.lglobl .", file); 21482 RS6000_OUTPUT_BASENAME (file, buffer); 21483 putc ('\n', file); 21484 } 21485 21486 fputs ("\t.csect ", file); 21487 assemble_name (file, buffer); 21488 fputs (TARGET_32BIT ? "\n" : ",3\n", file); 21489 21490 ASM_OUTPUT_LABEL (file, buffer); 21491 21492 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 21493 &data, true); 21494 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file); 21495 RS6000_OUTPUT_BASENAME (file, buffer); 21496 fputs (", TOC[tc0], 0\n", file); 21497 21498 in_section = NULL; 21499 switch_to_section (function_section (decl)); 21500 putc ('.', file); 21501 ASM_OUTPUT_LABEL (file, buffer); 21502 21503 data.function_descriptor = true; 21504 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 21505 &data, true); 21506 if (!DECL_IGNORED_P (decl)) 21507 { 21508 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) 21509 xcoffout_declare_function (file, decl, buffer); 21510 else if (dwarf_debuginfo_p ()) 21511 { 21512 name = (*targetm.strip_name_encoding) (name); 21513 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name); 21514 } 21515 } 21516 return; 21517} 21518 21519 21520/* Output assembly language to globalize a symbol from a DECL, 21521 possibly with visibility. */ 21522 21523void 21524rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl) 21525{ 21526 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 21527 fputs (GLOBAL_ASM_OP, stream); 21528 assemble_name (stream, name); 21529#ifdef HAVE_GAS_HIDDEN 21530 fputs (rs6000_xcoff_visibility (decl), stream); 21531#endif 21532 putc ('\n', stream); 21533} 21534 21535/* Output assembly language to define a symbol as COMMON from a DECL, 21536 possibly with visibility. */ 21537 21538void 21539rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream, 21540 tree decl ATTRIBUTE_UNUSED, 21541 const char *name, 21542 unsigned HOST_WIDE_INT size, 21543 unsigned int align) 21544{ 21545 unsigned int align2 = 2; 21546 21547 if (align == 0) 21548 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl)); 21549 21550 if (align > 32) 21551 align2 = floor_log2 (align / BITS_PER_UNIT); 21552 else if (size > 4) 21553 align2 = 3; 21554 21555 if (! DECL_COMMON (decl)) 21556 { 21557 /* Forget section. */ 21558 in_section = NULL; 21559 21560 /* Globalize TLS BSS. */ 21561 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl)) 21562 { 21563 fputs (GLOBAL_ASM_OP, stream); 21564 assemble_name (stream, name); 21565 fputc ('\n', stream); 21566 } 21567 21568 /* Switch to section and skip space. */ 21569 fputs ("\t.csect ", stream); 21570 assemble_name (stream, name); 21571 fprintf (stream, ",%u\n", align2); 21572 ASM_DECLARE_OBJECT_NAME (stream, name, decl); 21573 ASM_OUTPUT_SKIP (stream, size ? size : 1); 21574 return; 21575 } 21576 21577 if (TREE_PUBLIC (decl)) 21578 { 21579 fprintf (stream, 21580 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" , 21581 name, size, align2); 21582 21583#ifdef HAVE_GAS_HIDDEN 21584 if (decl != NULL) 21585 fputs (rs6000_xcoff_visibility (decl), stream); 21586#endif 21587 putc ('\n', stream); 21588 } 21589 else 21590 fprintf (stream, 21591 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n", 21592 (*targetm.strip_name_encoding) (name), size, name, align2); 21593} 21594 21595/* This macro produces the initial definition of a object (variable) name. 21596 Because AIX assembler's .set command has unexpected semantics, we output 21597 all aliases as alternative labels in front of the definition. */ 21598 21599void 21600rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl) 21601{ 21602 struct declare_alias_data data = {file, false}; 21603 ASM_OUTPUT_LABEL (file, name); 21604 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 21605 &data, true); 21606} 21607 21608/* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */ 21609 21610void 21611rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label) 21612{ 21613 fputs (integer_asm_op (size, FALSE), file); 21614 assemble_name (file, label); 21615 fputs ("-$", file); 21616} 21617 21618/* Output a symbol offset relative to the dbase for the current object. 21619 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume 21620 signed offsets. 21621 21622 __gcc_unwind_dbase is embedded in all executables/libraries through 21623 libgcc/config/rs6000/crtdbase.S. */ 21624 21625void 21626rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label) 21627{ 21628 fputs (integer_asm_op (size, FALSE), file); 21629 assemble_name (file, label); 21630 fputs("-__gcc_unwind_dbase", file); 21631} 21632 21633#ifdef HAVE_AS_TLS 21634static void 21635rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) 21636{ 21637 rtx symbol; 21638 int flags; 21639 const char *symname; 21640 21641 default_encode_section_info (decl, rtl, first); 21642 21643 /* Careful not to prod global register variables. */ 21644 if (!MEM_P (rtl)) 21645 return; 21646 symbol = XEXP (rtl, 0); 21647 if (!SYMBOL_REF_P (symbol)) 21648 return; 21649 21650 flags = SYMBOL_REF_FLAGS (symbol); 21651 21652 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) 21653 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO; 21654 21655 SYMBOL_REF_FLAGS (symbol) = flags; 21656 21657 symname = XSTR (symbol, 0); 21658 21659 /* Append CSECT mapping class, unless the symbol already is qualified. 21660 Aliases are implemented as labels, so the symbol name should not add 21661 a mapping class. */ 21662 if (decl 21663 && DECL_P (decl) 21664 && VAR_OR_FUNCTION_DECL_P (decl) 21665 && (symtab_node::get (decl) == NULL 21666 || symtab_node::get (decl)->alias == 0) 21667 && symname[strlen (symname) - 1] != ']') 21668 { 21669 const char *smclass = NULL; 21670 21671 if (TREE_CODE (decl) == FUNCTION_DECL) 21672 smclass = "[DS]"; 21673 else if (DECL_THREAD_LOCAL_P (decl)) 21674 { 21675 if (bss_initializer_p (decl)) 21676 smclass = "[UL]"; 21677 else if (flag_data_sections) 21678 smclass = "[TL]"; 21679 } 21680 else if (DECL_EXTERNAL (decl)) 21681 smclass = "[UA]"; 21682 else if (bss_initializer_p (decl)) 21683 smclass = "[BS]"; 21684 else if (flag_data_sections) 21685 { 21686 /* This must exactly match the logic of select section. */ 21687 if (decl_readonly_section (decl, compute_reloc_for_var (decl))) 21688 smclass = "[RO]"; 21689 else 21690 smclass = "[RW]"; 21691 } 21692 21693 if (smclass != NULL) 21694 { 21695 char *newname = XALLOCAVEC (char, strlen (symname) + 5); 21696 21697 strcpy (newname, symname); 21698 strcat (newname, smclass); 21699 XSTR (symbol, 0) = ggc_strdup (newname); 21700 } 21701 } 21702} 21703#endif /* HAVE_AS_TLS */ 21704#endif /* TARGET_XCOFF */ 21705 21706void 21707rs6000_asm_weaken_decl (FILE *stream, tree decl, 21708 const char *name, const char *val) 21709{ 21710 fputs ("\t.weak\t", stream); 21711 assemble_name (stream, name); 21712 if (decl && TREE_CODE (decl) == FUNCTION_DECL 21713 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) 21714 { 21715#if TARGET_XCOFF && HAVE_GAS_HIDDEN 21716 if (TARGET_XCOFF) 21717 fputs (rs6000_xcoff_visibility (decl), stream); 21718#endif 21719 fputs ("\n\t.weak\t.", stream); 21720 RS6000_OUTPUT_BASENAME (stream, name); 21721 } 21722#if TARGET_XCOFF && HAVE_GAS_HIDDEN 21723 if (TARGET_XCOFF) 21724 fputs (rs6000_xcoff_visibility (decl), stream); 21725#endif 21726 fputc ('\n', stream); 21727 21728 if (val) 21729 { 21730#ifdef ASM_OUTPUT_DEF 21731 ASM_OUTPUT_DEF (stream, name, val); 21732#endif 21733 if (decl && TREE_CODE (decl) == FUNCTION_DECL 21734 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) 21735 { 21736 fputs ("\t.set\t.", stream); 21737 RS6000_OUTPUT_BASENAME (stream, name); 21738 fputs (",.", stream); 21739 RS6000_OUTPUT_BASENAME (stream, val); 21740 fputc ('\n', stream); 21741 } 21742 } 21743} 21744 21745 21746/* Return true if INSN should not be copied. */ 21747 21748static bool 21749rs6000_cannot_copy_insn_p (rtx_insn *insn) 21750{ 21751 return recog_memoized (insn) >= 0 21752 && get_attr_cannot_copy (insn); 21753} 21754 21755/* Compute a (partial) cost for rtx X. Return true if the complete 21756 cost has been computed, and false if subexpressions should be 21757 scanned. In either case, *TOTAL contains the cost result. */ 21758 21759static bool 21760rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, 21761 int opno ATTRIBUTE_UNUSED, int *total, bool speed) 21762{ 21763 int code = GET_CODE (x); 21764 21765 switch (code) 21766 { 21767 /* On the RS/6000, if it is valid in the insn, it is free. */ 21768 case CONST_INT: 21769 if (((outer_code == SET 21770 || outer_code == PLUS 21771 || outer_code == MINUS) 21772 && (satisfies_constraint_I (x) 21773 || satisfies_constraint_L (x))) 21774 || (outer_code == AND 21775 && (satisfies_constraint_K (x) 21776 || (mode == SImode 21777 ? satisfies_constraint_L (x) 21778 : satisfies_constraint_J (x)))) 21779 || ((outer_code == IOR || outer_code == XOR) 21780 && (satisfies_constraint_K (x) 21781 || (mode == SImode 21782 ? satisfies_constraint_L (x) 21783 : satisfies_constraint_J (x)))) 21784 || outer_code == ASHIFT 21785 || outer_code == ASHIFTRT 21786 || outer_code == LSHIFTRT 21787 || outer_code == ROTATE 21788 || outer_code == ROTATERT 21789 || outer_code == ZERO_EXTRACT 21790 || (outer_code == MULT 21791 && satisfies_constraint_I (x)) 21792 || ((outer_code == DIV || outer_code == UDIV 21793 || outer_code == MOD || outer_code == UMOD) 21794 && exact_log2 (INTVAL (x)) >= 0) 21795 || (outer_code == COMPARE 21796 && (satisfies_constraint_I (x) 21797 || satisfies_constraint_K (x))) 21798 || ((outer_code == EQ || outer_code == NE) 21799 && (satisfies_constraint_I (x) 21800 || satisfies_constraint_K (x) 21801 || (mode == SImode 21802 ? satisfies_constraint_L (x) 21803 : satisfies_constraint_J (x)))) 21804 || (outer_code == GTU 21805 && satisfies_constraint_I (x)) 21806 || (outer_code == LTU 21807 && satisfies_constraint_P (x))) 21808 { 21809 *total = 0; 21810 return true; 21811 } 21812 else if ((outer_code == PLUS 21813 && reg_or_add_cint_operand (x, mode)) 21814 || (outer_code == MINUS 21815 && reg_or_sub_cint_operand (x, mode)) 21816 || ((outer_code == SET 21817 || outer_code == IOR 21818 || outer_code == XOR) 21819 && (INTVAL (x) 21820 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0)) 21821 { 21822 *total = COSTS_N_INSNS (1); 21823 return true; 21824 } 21825 /* FALLTHRU */ 21826 21827 case CONST_DOUBLE: 21828 case CONST_WIDE_INT: 21829 case CONST: 21830 case HIGH: 21831 case SYMBOL_REF: 21832 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); 21833 return true; 21834 21835 case MEM: 21836 /* When optimizing for size, MEM should be slightly more expensive 21837 than generating address, e.g., (plus (reg) (const)). 21838 L1 cache latency is about two instructions. */ 21839 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); 21840 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) 21841 *total += COSTS_N_INSNS (100); 21842 return true; 21843 21844 case LABEL_REF: 21845 *total = 0; 21846 return true; 21847 21848 case PLUS: 21849 case MINUS: 21850 if (FLOAT_MODE_P (mode)) 21851 *total = rs6000_cost->fp; 21852 else 21853 *total = COSTS_N_INSNS (1); 21854 return false; 21855 21856 case MULT: 21857 if (CONST_INT_P (XEXP (x, 1)) 21858 && satisfies_constraint_I (XEXP (x, 1))) 21859 { 21860 if (INTVAL (XEXP (x, 1)) >= -256 21861 && INTVAL (XEXP (x, 1)) <= 255) 21862 *total = rs6000_cost->mulsi_const9; 21863 else 21864 *total = rs6000_cost->mulsi_const; 21865 } 21866 else if (mode == SFmode) 21867 *total = rs6000_cost->fp; 21868 else if (FLOAT_MODE_P (mode)) 21869 *total = rs6000_cost->dmul; 21870 else if (mode == DImode) 21871 *total = rs6000_cost->muldi; 21872 else 21873 *total = rs6000_cost->mulsi; 21874 return false; 21875 21876 case FMA: 21877 if (mode == SFmode) 21878 *total = rs6000_cost->fp; 21879 else 21880 *total = rs6000_cost->dmul; 21881 break; 21882 21883 case DIV: 21884 case MOD: 21885 if (FLOAT_MODE_P (mode)) 21886 { 21887 *total = mode == DFmode ? rs6000_cost->ddiv 21888 : rs6000_cost->sdiv; 21889 return false; 21890 } 21891 /* FALLTHRU */ 21892 21893 case UDIV: 21894 case UMOD: 21895 if (CONST_INT_P (XEXP (x, 1)) 21896 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) 21897 { 21898 if (code == DIV || code == MOD) 21899 /* Shift, addze */ 21900 *total = COSTS_N_INSNS (2); 21901 else 21902 /* Shift */ 21903 *total = COSTS_N_INSNS (1); 21904 } 21905 else 21906 { 21907 if (GET_MODE (XEXP (x, 1)) == DImode) 21908 *total = rs6000_cost->divdi; 21909 else 21910 *total = rs6000_cost->divsi; 21911 } 21912 /* Add in shift and subtract for MOD unless we have a mod instruction. */ 21913 if (!TARGET_MODULO && (code == MOD || code == UMOD)) 21914 *total += COSTS_N_INSNS (2); 21915 return false; 21916 21917 case CTZ: 21918 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4); 21919 return false; 21920 21921 case FFS: 21922 *total = COSTS_N_INSNS (4); 21923 return false; 21924 21925 case POPCOUNT: 21926 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6); 21927 return false; 21928 21929 case PARITY: 21930 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6); 21931 return false; 21932 21933 case NOT: 21934 if (outer_code == AND || outer_code == IOR || outer_code == XOR) 21935 *total = 0; 21936 else 21937 *total = COSTS_N_INSNS (1); 21938 return false; 21939 21940 case AND: 21941 if (CONST_INT_P (XEXP (x, 1))) 21942 { 21943 rtx left = XEXP (x, 0); 21944 rtx_code left_code = GET_CODE (left); 21945 21946 /* rotate-and-mask: 1 insn. */ 21947 if ((left_code == ROTATE 21948 || left_code == ASHIFT 21949 || left_code == LSHIFTRT) 21950 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode)) 21951 { 21952 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed); 21953 if (!CONST_INT_P (XEXP (left, 1))) 21954 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed); 21955 *total += COSTS_N_INSNS (1); 21956 return true; 21957 } 21958 21959 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */ 21960 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 21961 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode) 21962 || (val & 0xffff) == val 21963 || (val & 0xffff0000) == val 21964 || ((val & 0xffff) == 0 && mode == SImode)) 21965 { 21966 *total = rtx_cost (left, mode, AND, 0, speed); 21967 *total += COSTS_N_INSNS (1); 21968 return true; 21969 } 21970 21971 /* 2 insns. */ 21972 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode)) 21973 { 21974 *total = rtx_cost (left, mode, AND, 0, speed); 21975 *total += COSTS_N_INSNS (2); 21976 return true; 21977 } 21978 } 21979 21980 *total = COSTS_N_INSNS (1); 21981 return false; 21982 21983 case IOR: 21984 /* FIXME */ 21985 *total = COSTS_N_INSNS (1); 21986 return true; 21987 21988 case CLZ: 21989 case XOR: 21990 case ZERO_EXTRACT: 21991 *total = COSTS_N_INSNS (1); 21992 return false; 21993 21994 case ASHIFT: 21995 /* The EXTSWSLI instruction is a combined instruction. Don't count both 21996 the sign extend and shift separately within the insn. */ 21997 if (TARGET_EXTSWSLI && mode == DImode 21998 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 21999 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) 22000 { 22001 *total = 0; 22002 return false; 22003 } 22004 /* fall through */ 22005 22006 case ASHIFTRT: 22007 case LSHIFTRT: 22008 case ROTATE: 22009 case ROTATERT: 22010 /* Handle mul_highpart. */ 22011 if (outer_code == TRUNCATE 22012 && GET_CODE (XEXP (x, 0)) == MULT) 22013 { 22014 if (mode == DImode) 22015 *total = rs6000_cost->muldi; 22016 else 22017 *total = rs6000_cost->mulsi; 22018 return true; 22019 } 22020 else if (outer_code == AND) 22021 *total = 0; 22022 else 22023 *total = COSTS_N_INSNS (1); 22024 return false; 22025 22026 case SIGN_EXTEND: 22027 case ZERO_EXTEND: 22028 if (MEM_P (XEXP (x, 0))) 22029 *total = 0; 22030 else 22031 *total = COSTS_N_INSNS (1); 22032 return false; 22033 22034 case COMPARE: 22035 case NEG: 22036 case ABS: 22037 if (!FLOAT_MODE_P (mode)) 22038 { 22039 *total = COSTS_N_INSNS (1); 22040 return false; 22041 } 22042 /* FALLTHRU */ 22043 22044 case FLOAT: 22045 case UNSIGNED_FLOAT: 22046 case FIX: 22047 case UNSIGNED_FIX: 22048 case FLOAT_TRUNCATE: 22049 *total = rs6000_cost->fp; 22050 return false; 22051 22052 case FLOAT_EXTEND: 22053 if (mode == DFmode) 22054 *total = rs6000_cost->sfdf_convert; 22055 else 22056 *total = rs6000_cost->fp; 22057 return false; 22058 22059 case CALL: 22060 case IF_THEN_ELSE: 22061 if (!speed) 22062 { 22063 *total = COSTS_N_INSNS (1); 22064 return true; 22065 } 22066 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT) 22067 { 22068 *total = rs6000_cost->fp; 22069 return false; 22070 } 22071 break; 22072 22073 case NE: 22074 case EQ: 22075 case GTU: 22076 case LTU: 22077 /* Carry bit requires mode == Pmode. 22078 NEG or PLUS already counted so only add one. */ 22079 if (mode == Pmode 22080 && (outer_code == NEG || outer_code == PLUS)) 22081 { 22082 *total = COSTS_N_INSNS (1); 22083 return true; 22084 } 22085 /* FALLTHRU */ 22086 22087 case GT: 22088 case LT: 22089 case UNORDERED: 22090 if (outer_code == SET) 22091 { 22092 if (XEXP (x, 1) == const0_rtx) 22093 { 22094 *total = COSTS_N_INSNS (2); 22095 return true; 22096 } 22097 else 22098 { 22099 *total = COSTS_N_INSNS (3); 22100 return false; 22101 } 22102 } 22103 /* CC COMPARE. */ 22104 if (outer_code == COMPARE) 22105 { 22106 *total = 0; 22107 return true; 22108 } 22109 break; 22110 22111 case UNSPEC: 22112 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ) 22113 { 22114 *total = 0; 22115 return true; 22116 } 22117 break; 22118 22119 default: 22120 break; 22121 } 22122 22123 return false; 22124} 22125 22126/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */ 22127 22128static bool 22129rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code, 22130 int opno, int *total, bool speed) 22131{ 22132 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed); 22133 22134 fprintf (stderr, 22135 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, " 22136 "opno = %d, total = %d, speed = %s, x:\n", 22137 ret ? "complete" : "scan inner", 22138 GET_MODE_NAME (mode), 22139 GET_RTX_NAME (outer_code), 22140 opno, 22141 *total, 22142 speed ? "true" : "false"); 22143 22144 debug_rtx (x); 22145 22146 return ret; 22147} 22148 22149static int 22150rs6000_insn_cost (rtx_insn *insn, bool speed) 22151{ 22152 if (recog_memoized (insn) < 0) 22153 return 0; 22154 22155 /* If we are optimizing for size, just use the length. */ 22156 if (!speed) 22157 return get_attr_length (insn); 22158 22159 /* Use the cost if provided. */ 22160 int cost = get_attr_cost (insn); 22161 if (cost > 0) 22162 return cost; 22163 22164 /* If the insn tells us how many insns there are, use that. Otherwise use 22165 the length/4. Adjust the insn length to remove the extra size that 22166 prefixed instructions take. */ 22167 int n = get_attr_num_insns (insn); 22168 if (n == 0) 22169 { 22170 int length = get_attr_length (insn); 22171 if (get_attr_prefixed (insn) == PREFIXED_YES) 22172 { 22173 int adjust = 0; 22174 ADJUST_INSN_LENGTH (insn, adjust); 22175 length -= adjust; 22176 } 22177 22178 n = length / 4; 22179 } 22180 22181 enum attr_type type = get_attr_type (insn); 22182 22183 switch (type) 22184 { 22185 case TYPE_LOAD: 22186 case TYPE_FPLOAD: 22187 case TYPE_VECLOAD: 22188 cost = COSTS_N_INSNS (n + 1); 22189 break; 22190 22191 case TYPE_MUL: 22192 switch (get_attr_size (insn)) 22193 { 22194 case SIZE_8: 22195 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9; 22196 break; 22197 case SIZE_16: 22198 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const; 22199 break; 22200 case SIZE_32: 22201 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi; 22202 break; 22203 case SIZE_64: 22204 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi; 22205 break; 22206 default: 22207 gcc_unreachable (); 22208 } 22209 break; 22210 case TYPE_DIV: 22211 switch (get_attr_size (insn)) 22212 { 22213 case SIZE_32: 22214 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi; 22215 break; 22216 case SIZE_64: 22217 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi; 22218 break; 22219 default: 22220 gcc_unreachable (); 22221 } 22222 break; 22223 22224 case TYPE_FP: 22225 cost = n * rs6000_cost->fp; 22226 break; 22227 case TYPE_DMUL: 22228 cost = n * rs6000_cost->dmul; 22229 break; 22230 case TYPE_SDIV: 22231 cost = n * rs6000_cost->sdiv; 22232 break; 22233 case TYPE_DDIV: 22234 cost = n * rs6000_cost->ddiv; 22235 break; 22236 22237 case TYPE_SYNC: 22238 case TYPE_LOAD_L: 22239 case TYPE_MFCR: 22240 case TYPE_MFCRF: 22241 cost = COSTS_N_INSNS (n + 2); 22242 break; 22243 22244 default: 22245 cost = COSTS_N_INSNS (n); 22246 } 22247 22248 return cost; 22249} 22250 22251/* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */ 22252 22253static int 22254rs6000_debug_address_cost (rtx x, machine_mode mode, 22255 addr_space_t as, bool speed) 22256{ 22257 int ret = TARGET_ADDRESS_COST (x, mode, as, speed); 22258 22259 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n", 22260 ret, speed ? "true" : "false"); 22261 debug_rtx (x); 22262 22263 return ret; 22264} 22265 22266 22267/* A C expression returning the cost of moving data from a register of class 22268 CLASS1 to one of CLASS2. */ 22269 22270static int 22271rs6000_register_move_cost (machine_mode mode, 22272 reg_class_t from, reg_class_t to) 22273{ 22274 int ret; 22275 reg_class_t rclass; 22276 22277 if (TARGET_DEBUG_COST) 22278 dbg_cost_ctrl++; 22279 22280 /* If we have VSX, we can easily move between FPR or Altivec registers, 22281 otherwise we can only easily move within classes. 22282 Do this first so we give best-case answers for union classes 22283 containing both gprs and vsx regs. */ 22284 HARD_REG_SET to_vsx, from_vsx; 22285 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS]; 22286 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS]; 22287 if (!hard_reg_set_empty_p (to_vsx) 22288 && !hard_reg_set_empty_p (from_vsx) 22289 && (TARGET_VSX 22290 || hard_reg_set_intersect_p (to_vsx, from_vsx))) 22291 { 22292 int reg = FIRST_FPR_REGNO; 22293 if (TARGET_VSX 22294 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO) 22295 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO))) 22296 reg = FIRST_ALTIVEC_REGNO; 22297 ret = 2 * hard_regno_nregs (reg, mode); 22298 } 22299 22300 /* Moves from/to GENERAL_REGS. */ 22301 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS)) 22302 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS))) 22303 { 22304 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) 22305 { 22306 if (TARGET_DIRECT_MOVE) 22307 { 22308 /* Keep the cost for direct moves above that for within 22309 a register class even if the actual processor cost is 22310 comparable. We do this because a direct move insn 22311 can't be a nop, whereas with ideal register 22312 allocation a move within the same class might turn 22313 out to be a nop. */ 22314 if (rs6000_tune == PROCESSOR_POWER9 22315 || rs6000_tune == PROCESSOR_POWER10) 22316 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 22317 else 22318 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 22319 /* SFmode requires a conversion when moving between gprs 22320 and vsx. */ 22321 if (mode == SFmode) 22322 ret += 2; 22323 } 22324 else 22325 ret = (rs6000_memory_move_cost (mode, rclass, false) 22326 + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); 22327 } 22328 22329 /* It's more expensive to move CR_REGS than CR0_REGS because of the 22330 shift. */ 22331 else if (rclass == CR_REGS) 22332 ret = 4; 22333 22334 /* For those processors that have slow LR/CTR moves, make them more 22335 expensive than memory in order to bias spills to memory .*/ 22336 else if ((rs6000_tune == PROCESSOR_POWER6 22337 || rs6000_tune == PROCESSOR_POWER7 22338 || rs6000_tune == PROCESSOR_POWER8 22339 || rs6000_tune == PROCESSOR_POWER9) 22340 && reg_class_subset_p (rclass, SPECIAL_REGS)) 22341 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 22342 22343 else 22344 /* A move will cost one instruction per GPR moved. */ 22345 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 22346 } 22347 22348 /* Everything else has to go through GENERAL_REGS. */ 22349 else 22350 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) 22351 + rs6000_register_move_cost (mode, from, GENERAL_REGS)); 22352 22353 if (TARGET_DEBUG_COST) 22354 { 22355 if (dbg_cost_ctrl == 1) 22356 fprintf (stderr, 22357 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n", 22358 ret, GET_MODE_NAME (mode), reg_class_names[from], 22359 reg_class_names[to]); 22360 dbg_cost_ctrl--; 22361 } 22362 22363 return ret; 22364} 22365 22366/* A C expressions returning the cost of moving data of MODE from a register to 22367 or from memory. */ 22368 22369static int 22370rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, 22371 bool in ATTRIBUTE_UNUSED) 22372{ 22373 int ret; 22374 22375 if (TARGET_DEBUG_COST) 22376 dbg_cost_ctrl++; 22377 22378 if (reg_classes_intersect_p (rclass, GENERAL_REGS)) 22379 ret = 4 * hard_regno_nregs (0, mode); 22380 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS) 22381 || reg_classes_intersect_p (rclass, VSX_REGS))) 22382 ret = 4 * hard_regno_nregs (32, mode); 22383 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) 22384 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode); 22385 else 22386 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); 22387 22388 if (TARGET_DEBUG_COST) 22389 { 22390 if (dbg_cost_ctrl == 1) 22391 fprintf (stderr, 22392 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", 22393 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); 22394 dbg_cost_ctrl--; 22395 } 22396 22397 return ret; 22398} 22399 22400/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. 22401 22402 The register allocator chooses GEN_OR_VSX_REGS for the allocno 22403 class if GENERAL_REGS and VSX_REGS cost is lower than the memory 22404 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register 22405 move cost between GENERAL_REGS and VSX_REGS low. 22406 22407 It might seem reasonable to use a union class. After all, if usage 22408 of vsr is low and gpr high, it might make sense to spill gpr to vsr 22409 rather than memory. However, in cases where register pressure of 22410 both is high, like the cactus_adm spec test, allowing 22411 GEN_OR_VSX_REGS as the allocno class results in bad decisions in 22412 the first scheduling pass. This is partly due to an allocno of 22413 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure 22414 class, which gives too high a pressure for GENERAL_REGS and too low 22415 for VSX_REGS. So, force a choice of the subclass here. 22416 22417 The best class is also the union if GENERAL_REGS and VSX_REGS have 22418 the same cost. In that case we do use GEN_OR_VSX_REGS as the 22419 allocno class, since trying to narrow down the class by regno mode 22420 is prone to error. For example, SImode is allowed in VSX regs and 22421 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect) 22422 it would be wrong to choose an allocno of GENERAL_REGS based on 22423 SImode. */ 22424 22425static reg_class_t 22426rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, 22427 reg_class_t allocno_class, 22428 reg_class_t best_class) 22429{ 22430 switch (allocno_class) 22431 { 22432 case GEN_OR_VSX_REGS: 22433 /* best_class must be a subset of allocno_class. */ 22434 gcc_checking_assert (best_class == GEN_OR_VSX_REGS 22435 || best_class == GEN_OR_FLOAT_REGS 22436 || best_class == VSX_REGS 22437 || best_class == ALTIVEC_REGS 22438 || best_class == FLOAT_REGS 22439 || best_class == GENERAL_REGS 22440 || best_class == BASE_REGS); 22441 /* Use best_class but choose wider classes when copying from the 22442 wider class to best_class is cheap. This mimics IRA choice 22443 of allocno class. */ 22444 if (best_class == BASE_REGS) 22445 return GENERAL_REGS; 22446 if (TARGET_VSX && best_class == FLOAT_REGS) 22447 return VSX_REGS; 22448 return best_class; 22449 22450 case VSX_REGS: 22451 if (best_class == ALTIVEC_REGS) 22452 return ALTIVEC_REGS; 22453 22454 default: 22455 break; 22456 } 22457 22458 return allocno_class; 22459} 22460 22461/* Load up a constant. If the mode is a vector mode, splat the value across 22462 all of the vector elements. */ 22463 22464static rtx 22465rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) 22466{ 22467 rtx reg; 22468 22469 if (mode == SFmode || mode == DFmode) 22470 { 22471 rtx d = const_double_from_real_value (dconst, mode); 22472 reg = force_reg (mode, d); 22473 } 22474 else if (mode == V4SFmode) 22475 { 22476 rtx d = const_double_from_real_value (dconst, SFmode); 22477 rtvec v = gen_rtvec (4, d, d, d, d); 22478 reg = gen_reg_rtx (mode); 22479 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); 22480 } 22481 else if (mode == V2DFmode) 22482 { 22483 rtx d = const_double_from_real_value (dconst, DFmode); 22484 rtvec v = gen_rtvec (2, d, d); 22485 reg = gen_reg_rtx (mode); 22486 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); 22487 } 22488 else 22489 gcc_unreachable (); 22490 22491 return reg; 22492} 22493 22494/* Generate an FMA instruction. */ 22495 22496static void 22497rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) 22498{ 22499 machine_mode mode = GET_MODE (target); 22500 rtx dst; 22501 22502 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); 22503 gcc_assert (dst != NULL); 22504 22505 if (dst != target) 22506 emit_move_insn (target, dst); 22507} 22508 22509/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ 22510 22511static void 22512rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) 22513{ 22514 machine_mode mode = GET_MODE (dst); 22515 rtx r; 22516 22517 /* This is a tad more complicated, since the fnma_optab is for 22518 a different expression: fma(-m1, m2, a), which is the same 22519 thing except in the case of signed zeros. 22520 22521 Fortunately we know that if FMA is supported that FNMSUB is 22522 also supported in the ISA. Just expand it directly. */ 22523 22524 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); 22525 22526 r = gen_rtx_NEG (mode, a); 22527 r = gen_rtx_FMA (mode, m1, m2, r); 22528 r = gen_rtx_NEG (mode, r); 22529 emit_insn (gen_rtx_SET (dst, r)); 22530} 22531 22532/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, 22533 add a reg_note saying that this was a division. Support both scalar and 22534 vector divide. Assumes no trapping math and finite arguments. */ 22535 22536void 22537rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) 22538{ 22539 machine_mode mode = GET_MODE (dst); 22540 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v; 22541 int i; 22542 22543 /* Low precision estimates guarantee 5 bits of accuracy. High 22544 precision estimates guarantee 14 bits of accuracy. SFmode 22545 requires 23 bits of accuracy. DFmode requires 52 bits of 22546 accuracy. Each pass at least doubles the accuracy, leading 22547 to the following. */ 22548 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; 22549 if (mode == DFmode || mode == V2DFmode) 22550 passes++; 22551 22552 enum insn_code code = optab_handler (smul_optab, mode); 22553 insn_gen_fn gen_mul = GEN_FCN (code); 22554 22555 gcc_assert (code != CODE_FOR_nothing); 22556 22557 one = rs6000_load_constant_and_splat (mode, dconst1); 22558 22559 /* x0 = 1./d estimate */ 22560 x0 = gen_reg_rtx (mode); 22561 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), 22562 UNSPEC_FRES))); 22563 22564 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ 22565 if (passes > 1) { 22566 22567 /* e0 = 1. - d * x0 */ 22568 e0 = gen_reg_rtx (mode); 22569 rs6000_emit_nmsub (e0, d, x0, one); 22570 22571 /* x1 = x0 + e0 * x0 */ 22572 x1 = gen_reg_rtx (mode); 22573 rs6000_emit_madd (x1, e0, x0, x0); 22574 22575 for (i = 0, xprev = x1, eprev = e0; i < passes - 2; 22576 ++i, xprev = xnext, eprev = enext) { 22577 22578 /* enext = eprev * eprev */ 22579 enext = gen_reg_rtx (mode); 22580 emit_insn (gen_mul (enext, eprev, eprev)); 22581 22582 /* xnext = xprev + enext * xprev */ 22583 xnext = gen_reg_rtx (mode); 22584 rs6000_emit_madd (xnext, enext, xprev, xprev); 22585 } 22586 22587 } else 22588 xprev = x0; 22589 22590 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ 22591 22592 /* u = n * xprev */ 22593 u = gen_reg_rtx (mode); 22594 emit_insn (gen_mul (u, n, xprev)); 22595 22596 /* v = n - (d * u) */ 22597 v = gen_reg_rtx (mode); 22598 rs6000_emit_nmsub (v, d, u, n); 22599 22600 /* dst = (v * xprev) + u */ 22601 rs6000_emit_madd (dst, v, xprev, u); 22602 22603 if (note_p) 22604 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); 22605} 22606 22607/* Goldschmidt's Algorithm for single/double-precision floating point 22608 sqrt and rsqrt. Assumes no trapping math and finite arguments. */ 22609 22610void 22611rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) 22612{ 22613 machine_mode mode = GET_MODE (src); 22614 rtx e = gen_reg_rtx (mode); 22615 rtx g = gen_reg_rtx (mode); 22616 rtx h = gen_reg_rtx (mode); 22617 22618 /* Low precision estimates guarantee 5 bits of accuracy. High 22619 precision estimates guarantee 14 bits of accuracy. SFmode 22620 requires 23 bits of accuracy. DFmode requires 52 bits of 22621 accuracy. Each pass at least doubles the accuracy, leading 22622 to the following. */ 22623 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; 22624 if (mode == DFmode || mode == V2DFmode) 22625 passes++; 22626 22627 int i; 22628 rtx mhalf; 22629 enum insn_code code = optab_handler (smul_optab, mode); 22630 insn_gen_fn gen_mul = GEN_FCN (code); 22631 22632 gcc_assert (code != CODE_FOR_nothing); 22633 22634 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf); 22635 22636 /* e = rsqrt estimate */ 22637 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src), 22638 UNSPEC_RSQRT))); 22639 22640 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ 22641 if (!recip) 22642 { 22643 rtx zero = force_reg (mode, CONST0_RTX (mode)); 22644 22645 if (mode == SFmode) 22646 { 22647 rtx target = emit_conditional_move (e, { GT, src, zero, mode }, 22648 e, zero, mode, 0); 22649 if (target != e) 22650 emit_move_insn (e, target); 22651 } 22652 else 22653 { 22654 rtx cond = gen_rtx_GT (VOIDmode, e, zero); 22655 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero); 22656 } 22657 } 22658 22659 /* g = sqrt estimate. */ 22660 emit_insn (gen_mul (g, e, src)); 22661 /* h = 1/(2*sqrt) estimate. */ 22662 emit_insn (gen_mul (h, e, mhalf)); 22663 22664 if (recip) 22665 { 22666 if (passes == 1) 22667 { 22668 rtx t = gen_reg_rtx (mode); 22669 rs6000_emit_nmsub (t, g, h, mhalf); 22670 /* Apply correction directly to 1/rsqrt estimate. */ 22671 rs6000_emit_madd (dst, e, t, e); 22672 } 22673 else 22674 { 22675 for (i = 0; i < passes; i++) 22676 { 22677 rtx t1 = gen_reg_rtx (mode); 22678 rtx g1 = gen_reg_rtx (mode); 22679 rtx h1 = gen_reg_rtx (mode); 22680 22681 rs6000_emit_nmsub (t1, g, h, mhalf); 22682 rs6000_emit_madd (g1, g, t1, g); 22683 rs6000_emit_madd (h1, h, t1, h); 22684 22685 g = g1; 22686 h = h1; 22687 } 22688 /* Multiply by 2 for 1/rsqrt. */ 22689 emit_insn (gen_add3_insn (dst, h, h)); 22690 } 22691 } 22692 else 22693 { 22694 rtx t = gen_reg_rtx (mode); 22695 rs6000_emit_nmsub (t, g, h, mhalf); 22696 rs6000_emit_madd (dst, g, t, g); 22697 } 22698 22699 return; 22700} 22701 22702/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD 22703 (Power7) targets. DST is the target, and SRC is the argument operand. */ 22704 22705void 22706rs6000_emit_popcount (rtx dst, rtx src) 22707{ 22708 machine_mode mode = GET_MODE (dst); 22709 rtx tmp1, tmp2; 22710 22711 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */ 22712 if (TARGET_POPCNTD) 22713 { 22714 if (mode == SImode) 22715 emit_insn (gen_popcntdsi2 (dst, src)); 22716 else 22717 emit_insn (gen_popcntddi2 (dst, src)); 22718 return; 22719 } 22720 22721 tmp1 = gen_reg_rtx (mode); 22722 22723 if (mode == SImode) 22724 { 22725 emit_insn (gen_popcntbsi2 (tmp1, src)); 22726 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), 22727 NULL_RTX, 0); 22728 tmp2 = force_reg (SImode, tmp2); 22729 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); 22730 } 22731 else 22732 { 22733 emit_insn (gen_popcntbdi2 (tmp1, src)); 22734 tmp2 = expand_mult (DImode, tmp1, 22735 GEN_INT ((HOST_WIDE_INT) 22736 0x01010101 << 32 | 0x01010101), 22737 NULL_RTX, 0); 22738 tmp2 = force_reg (DImode, tmp2); 22739 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); 22740 } 22741} 22742 22743 22744/* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the 22745 target, and SRC is the argument operand. */ 22746 22747void 22748rs6000_emit_parity (rtx dst, rtx src) 22749{ 22750 machine_mode mode = GET_MODE (dst); 22751 rtx tmp; 22752 22753 tmp = gen_reg_rtx (mode); 22754 22755 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */ 22756 if (TARGET_CMPB) 22757 { 22758 if (mode == SImode) 22759 { 22760 emit_insn (gen_popcntbsi2 (tmp, src)); 22761 emit_insn (gen_paritysi2_cmpb (dst, tmp)); 22762 } 22763 else 22764 { 22765 emit_insn (gen_popcntbdi2 (tmp, src)); 22766 emit_insn (gen_paritydi2_cmpb (dst, tmp)); 22767 } 22768 return; 22769 } 22770 22771 if (mode == SImode) 22772 { 22773 /* Is mult+shift >= shift+xor+shift+xor? */ 22774 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) 22775 { 22776 rtx tmp1, tmp2, tmp3, tmp4; 22777 22778 tmp1 = gen_reg_rtx (SImode); 22779 emit_insn (gen_popcntbsi2 (tmp1, src)); 22780 22781 tmp2 = gen_reg_rtx (SImode); 22782 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); 22783 tmp3 = gen_reg_rtx (SImode); 22784 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); 22785 22786 tmp4 = gen_reg_rtx (SImode); 22787 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); 22788 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); 22789 } 22790 else 22791 rs6000_emit_popcount (tmp, src); 22792 emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); 22793 } 22794 else 22795 { 22796 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ 22797 if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) 22798 { 22799 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 22800 22801 tmp1 = gen_reg_rtx (DImode); 22802 emit_insn (gen_popcntbdi2 (tmp1, src)); 22803 22804 tmp2 = gen_reg_rtx (DImode); 22805 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); 22806 tmp3 = gen_reg_rtx (DImode); 22807 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); 22808 22809 tmp4 = gen_reg_rtx (DImode); 22810 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); 22811 tmp5 = gen_reg_rtx (DImode); 22812 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); 22813 22814 tmp6 = gen_reg_rtx (DImode); 22815 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); 22816 emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); 22817 } 22818 else 22819 rs6000_emit_popcount (tmp, src); 22820 emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); 22821 } 22822} 22823 22824/* Expand an Altivec constant permutation for little endian mode. 22825 OP0 and OP1 are the input vectors and TARGET is the output vector. 22826 SEL specifies the constant permutation vector. 22827 22828 There are two issues: First, the two input operands must be 22829 swapped so that together they form a double-wide array in LE 22830 order. Second, the vperm instruction has surprising behavior 22831 in LE mode: it interprets the elements of the source vectors 22832 in BE mode ("left to right") and interprets the elements of 22833 the destination vector in LE mode ("right to left"). To 22834 correct for this, we must subtract each element of the permute 22835 control vector from 31. 22836 22837 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} 22838 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. 22839 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to 22840 serve as the permute control vector. Then, in BE mode, 22841 22842 vperm 9,10,11,12 22843 22844 places the desired result in vr9. However, in LE mode the 22845 vector contents will be 22846 22847 vr10 = 00000003 00000002 00000001 00000000 22848 vr11 = 00000007 00000006 00000005 00000004 22849 22850 The result of the vperm using the same permute control vector is 22851 22852 vr9 = 05000000 07000000 01000000 03000000 22853 22854 That is, the leftmost 4 bytes of vr10 are interpreted as the 22855 source for the rightmost 4 bytes of vr9, and so on. 22856 22857 If we change the permute control vector to 22858 22859 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} 22860 22861 and issue 22862 22863 vperm 9,11,10,12 22864 22865 we get the desired 22866 22867 vr9 = 00000006 00000004 00000002 00000000. */ 22868 22869static void 22870altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, 22871 const vec_perm_indices &sel) 22872{ 22873 unsigned int i; 22874 rtx perm[16]; 22875 rtx constv, unspec; 22876 22877 /* Unpack and adjust the constant selector. */ 22878 for (i = 0; i < 16; ++i) 22879 { 22880 unsigned int elt = 31 - (sel[i] & 31); 22881 perm[i] = GEN_INT (elt); 22882 } 22883 22884 /* Expand to a permute, swapping the inputs and using the 22885 adjusted selector. */ 22886 if (!REG_P (op0)) 22887 op0 = force_reg (V16QImode, op0); 22888 if (!REG_P (op1)) 22889 op1 = force_reg (V16QImode, op1); 22890 22891 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); 22892 constv = force_reg (V16QImode, constv); 22893 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), 22894 UNSPEC_VPERM); 22895 if (!REG_P (target)) 22896 { 22897 rtx tmp = gen_reg_rtx (V16QImode); 22898 emit_move_insn (tmp, unspec); 22899 unspec = tmp; 22900 } 22901 22902 emit_move_insn (target, unspec); 22903} 22904 22905/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the 22906 permute control vector. But here it's not a constant, so we must 22907 generate a vector NAND or NOR to do the adjustment. */ 22908 22909void 22910altivec_expand_vec_perm_le (rtx operands[4]) 22911{ 22912 rtx notx, iorx, unspec; 22913 rtx target = operands[0]; 22914 rtx op0 = operands[1]; 22915 rtx op1 = operands[2]; 22916 rtx sel = operands[3]; 22917 rtx tmp = target; 22918 rtx norreg = gen_reg_rtx (V16QImode); 22919 machine_mode mode = GET_MODE (target); 22920 22921 /* Get everything in regs so the pattern matches. */ 22922 if (!REG_P (op0)) 22923 op0 = force_reg (mode, op0); 22924 if (!REG_P (op1)) 22925 op1 = force_reg (mode, op1); 22926 if (!REG_P (sel)) 22927 sel = force_reg (V16QImode, sel); 22928 if (!REG_P (target)) 22929 tmp = gen_reg_rtx (mode); 22930 22931 if (TARGET_P9_VECTOR) 22932 { 22933 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel), 22934 UNSPEC_VPERMR); 22935 } 22936 else 22937 { 22938 /* Invert the selector with a VNAND if available, else a VNOR. 22939 The VNAND is preferred for future fusion opportunities. */ 22940 notx = gen_rtx_NOT (V16QImode, sel); 22941 iorx = (TARGET_P8_VECTOR 22942 ? gen_rtx_IOR (V16QImode, notx, notx) 22943 : gen_rtx_AND (V16QImode, notx, notx)); 22944 emit_insn (gen_rtx_SET (norreg, iorx)); 22945 22946 /* Permute with operands reversed and adjusted selector. */ 22947 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), 22948 UNSPEC_VPERM); 22949 } 22950 22951 /* Copy into target, possibly by way of a register. */ 22952 if (!REG_P (target)) 22953 { 22954 emit_move_insn (tmp, unspec); 22955 unspec = tmp; 22956 } 22957 22958 emit_move_insn (target, unspec); 22959} 22960 22961/* Expand an Altivec constant permutation. Return true if we match 22962 an efficient implementation; false to fall back to VPERM. 22963 22964 OP0 and OP1 are the input vectors and TARGET is the output vector. 22965 SEL specifies the constant permutation vector. */ 22966 22967static bool 22968altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, 22969 const vec_perm_indices &sel) 22970{ 22971 struct altivec_perm_insn { 22972 HOST_WIDE_INT mask; 22973 enum insn_code impl; 22974 unsigned char perm[16]; 22975 }; 22976 static const struct altivec_perm_insn patterns[] = { 22977 {OPTION_MASK_ALTIVEC, 22978 CODE_FOR_altivec_vpkuhum_direct, 22979 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}}, 22980 {OPTION_MASK_ALTIVEC, 22981 CODE_FOR_altivec_vpkuwum_direct, 22982 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}}, 22983 {OPTION_MASK_ALTIVEC, 22984 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct 22985 : CODE_FOR_altivec_vmrglb_direct, 22986 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}, 22987 {OPTION_MASK_ALTIVEC, 22988 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct 22989 : CODE_FOR_altivec_vmrglh_direct, 22990 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, 22991 {OPTION_MASK_ALTIVEC, 22992 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si 22993 : CODE_FOR_altivec_vmrglw_direct_v4si, 22994 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, 22995 {OPTION_MASK_ALTIVEC, 22996 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct 22997 : CODE_FOR_altivec_vmrghb_direct, 22998 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}, 22999 {OPTION_MASK_ALTIVEC, 23000 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct 23001 : CODE_FOR_altivec_vmrghh_direct, 23002 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, 23003 {OPTION_MASK_ALTIVEC, 23004 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si 23005 : CODE_FOR_altivec_vmrghw_direct_v4si, 23006 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, 23007 {OPTION_MASK_P8_VECTOR, 23008 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct 23009 : CODE_FOR_p8_vmrgow_v4sf_direct, 23010 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}}, 23011 {OPTION_MASK_P8_VECTOR, 23012 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct 23013 : CODE_FOR_p8_vmrgew_v4sf_direct, 23014 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}}, 23015 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, 23016 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}}, 23017 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, 23018 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}, 23019 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, 23020 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}}, 23021 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi, 23022 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}}; 23023 23024 unsigned int i, j, elt, which; 23025 unsigned char perm[16]; 23026 rtx x; 23027 bool one_vec; 23028 23029 /* Unpack the constant selector. */ 23030 for (i = which = 0; i < 16; ++i) 23031 { 23032 elt = sel[i] & 31; 23033 which |= (elt < 16 ? 1 : 2); 23034 perm[i] = elt; 23035 } 23036 23037 /* Simplify the constant selector based on operands. */ 23038 switch (which) 23039 { 23040 default: 23041 gcc_unreachable (); 23042 23043 case 3: 23044 one_vec = false; 23045 if (!rtx_equal_p (op0, op1)) 23046 break; 23047 /* FALLTHRU */ 23048 23049 case 2: 23050 for (i = 0; i < 16; ++i) 23051 perm[i] &= 15; 23052 op0 = op1; 23053 one_vec = true; 23054 break; 23055 23056 case 1: 23057 op1 = op0; 23058 one_vec = true; 23059 break; 23060 } 23061 23062 /* Look for splat patterns. */ 23063 if (one_vec) 23064 { 23065 elt = perm[0]; 23066 23067 for (i = 0; i < 16; ++i) 23068 if (perm[i] != elt) 23069 break; 23070 if (i == 16) 23071 { 23072 if (!BYTES_BIG_ENDIAN) 23073 elt = 15 - elt; 23074 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt))); 23075 return true; 23076 } 23077 23078 if (elt % 2 == 0) 23079 { 23080 for (i = 0; i < 16; i += 2) 23081 if (perm[i] != elt || perm[i + 1] != elt + 1) 23082 break; 23083 if (i == 16) 23084 { 23085 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2; 23086 x = gen_reg_rtx (V8HImode); 23087 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0), 23088 GEN_INT (field))); 23089 emit_move_insn (target, gen_lowpart (V16QImode, x)); 23090 return true; 23091 } 23092 } 23093 23094 if (elt % 4 == 0) 23095 { 23096 for (i = 0; i < 16; i += 4) 23097 if (perm[i] != elt 23098 || perm[i + 1] != elt + 1 23099 || perm[i + 2] != elt + 2 23100 || perm[i + 3] != elt + 3) 23101 break; 23102 if (i == 16) 23103 { 23104 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4; 23105 x = gen_reg_rtx (V4SImode); 23106 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0), 23107 GEN_INT (field))); 23108 emit_move_insn (target, gen_lowpart (V16QImode, x)); 23109 return true; 23110 } 23111 } 23112 } 23113 23114 /* Look for merge and pack patterns. */ 23115 for (j = 0; j < ARRAY_SIZE (patterns); ++j) 23116 { 23117 bool swapped; 23118 23119 if ((patterns[j].mask & rs6000_isa_flags) == 0) 23120 continue; 23121 23122 elt = patterns[j].perm[0]; 23123 if (perm[0] == elt) 23124 swapped = false; 23125 else if (perm[0] == elt + 16) 23126 swapped = true; 23127 else 23128 continue; 23129 for (i = 1; i < 16; ++i) 23130 { 23131 elt = patterns[j].perm[i]; 23132 if (swapped) 23133 elt = (elt >= 16 ? elt - 16 : elt + 16); 23134 else if (one_vec && elt >= 16) 23135 elt -= 16; 23136 if (perm[i] != elt) 23137 break; 23138 } 23139 if (i == 16) 23140 { 23141 enum insn_code icode = patterns[j].impl; 23142 machine_mode omode = insn_data[icode].operand[0].mode; 23143 machine_mode imode = insn_data[icode].operand[1].mode; 23144 23145 rtx perm_idx = GEN_INT (0); 23146 if (icode == CODE_FOR_vsx_xxpermdi_v16qi) 23147 { 23148 int perm_val = 0; 23149 if (one_vec) 23150 { 23151 if (perm[0] == 8) 23152 perm_val |= 2; 23153 if (perm[8] == 8) 23154 perm_val |= 1; 23155 } 23156 else 23157 { 23158 if (perm[0] != 0) 23159 perm_val |= 2; 23160 if (perm[8] != 16) 23161 perm_val |= 1; 23162 } 23163 perm_idx = GEN_INT (perm_val); 23164 } 23165 23166 /* For little-endian, don't use vpkuwum and vpkuhum if the 23167 underlying vector type is not V4SI and V8HI, respectively. 23168 For example, using vpkuwum with a V8HI picks up the even 23169 halfwords (BE numbering) when the even halfwords (LE 23170 numbering) are what we need. */ 23171 if (!BYTES_BIG_ENDIAN 23172 && icode == CODE_FOR_altivec_vpkuwum_direct 23173 && ((REG_P (op0) 23174 && GET_MODE (op0) != V4SImode) 23175 || (SUBREG_P (op0) 23176 && GET_MODE (XEXP (op0, 0)) != V4SImode))) 23177 continue; 23178 if (!BYTES_BIG_ENDIAN 23179 && icode == CODE_FOR_altivec_vpkuhum_direct 23180 && ((REG_P (op0) 23181 && GET_MODE (op0) != V8HImode) 23182 || (SUBREG_P (op0) 23183 && GET_MODE (XEXP (op0, 0)) != V8HImode))) 23184 continue; 23185 23186 /* For little-endian, the two input operands must be swapped 23187 (or swapped back) to ensure proper right-to-left numbering 23188 from 0 to 2N-1. */ 23189 if (swapped ^ !BYTES_BIG_ENDIAN 23190 && icode != CODE_FOR_vsx_xxpermdi_v16qi) 23191 std::swap (op0, op1); 23192 if (imode != V16QImode) 23193 { 23194 op0 = gen_lowpart (imode, op0); 23195 op1 = gen_lowpart (imode, op1); 23196 } 23197 if (omode == V16QImode) 23198 x = target; 23199 else 23200 x = gen_reg_rtx (omode); 23201 if (icode == CODE_FOR_vsx_xxpermdi_v16qi) 23202 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx)); 23203 else 23204 emit_insn (GEN_FCN (icode) (x, op0, op1)); 23205 if (omode != V16QImode) 23206 emit_move_insn (target, gen_lowpart (V16QImode, x)); 23207 return true; 23208 } 23209 } 23210 23211 if (!BYTES_BIG_ENDIAN) 23212 { 23213 altivec_expand_vec_perm_const_le (target, op0, op1, sel); 23214 return true; 23215 } 23216 23217 return false; 23218} 23219 23220/* Expand a VSX Permute Doubleword constant permutation. 23221 Return true if we match an efficient implementation. */ 23222 23223static bool 23224rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, 23225 unsigned char perm0, unsigned char perm1) 23226{ 23227 rtx x; 23228 23229 /* If both selectors come from the same operand, fold to single op. */ 23230 if ((perm0 & 2) == (perm1 & 2)) 23231 { 23232 if (perm0 & 2) 23233 op0 = op1; 23234 else 23235 op1 = op0; 23236 } 23237 /* If both operands are equal, fold to simpler permutation. */ 23238 if (rtx_equal_p (op0, op1)) 23239 { 23240 perm0 = perm0 & 1; 23241 perm1 = (perm1 & 1) + 2; 23242 } 23243 /* If the first selector comes from the second operand, swap. */ 23244 else if (perm0 & 2) 23245 { 23246 if (perm1 & 2) 23247 return false; 23248 perm0 -= 2; 23249 perm1 += 2; 23250 std::swap (op0, op1); 23251 } 23252 /* If the second selector does not come from the second operand, fail. */ 23253 else if ((perm1 & 2) == 0) 23254 return false; 23255 23256 /* Success! */ 23257 if (target != NULL) 23258 { 23259 machine_mode vmode, dmode; 23260 rtvec v; 23261 23262 vmode = GET_MODE (target); 23263 gcc_assert (GET_MODE_NUNITS (vmode) == 2); 23264 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require (); 23265 x = gen_rtx_VEC_CONCAT (dmode, op0, op1); 23266 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); 23267 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); 23268 emit_insn (gen_rtx_SET (target, x)); 23269 } 23270 return true; 23271} 23272 23273/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ 23274 23275static bool 23276rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, 23277 rtx op1, const vec_perm_indices &sel) 23278{ 23279 bool testing_p = !target; 23280 23281 /* AltiVec (and thus VSX) can handle arbitrary permutations. */ 23282 if (TARGET_ALTIVEC && testing_p) 23283 return true; 23284 23285 if (op0) 23286 { 23287 rtx nop0 = force_reg (vmode, op0); 23288 if (op0 == op1) 23289 op1 = nop0; 23290 op0 = nop0; 23291 } 23292 if (op1) 23293 op1 = force_reg (vmode, op1); 23294 23295 /* Check for ps_merge* or xxpermdi insns. */ 23296 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode)) 23297 { 23298 if (testing_p) 23299 { 23300 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); 23301 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); 23302 } 23303 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) 23304 return true; 23305 } 23306 23307 if (TARGET_ALTIVEC) 23308 { 23309 /* Force the target-independent code to lower to V16QImode. */ 23310 if (vmode != V16QImode) 23311 return false; 23312 if (altivec_expand_vec_perm_const (target, op0, op1, sel)) 23313 return true; 23314 } 23315 23316 return false; 23317} 23318 23319/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. 23320 OP0 and OP1 are the input vectors and TARGET is the output vector. 23321 PERM specifies the constant permutation vector. */ 23322 23323static void 23324rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, 23325 machine_mode vmode, const vec_perm_builder &perm) 23326{ 23327 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); 23328 if (x != target) 23329 emit_move_insn (target, x); 23330} 23331 23332/* Expand an extract even operation. */ 23333 23334void 23335rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) 23336{ 23337 machine_mode vmode = GET_MODE (target); 23338 unsigned i, nelt = GET_MODE_NUNITS (vmode); 23339 vec_perm_builder perm (nelt, nelt, 1); 23340 23341 for (i = 0; i < nelt; i++) 23342 perm.quick_push (i * 2); 23343 23344 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); 23345} 23346 23347/* Expand a vector interleave operation. */ 23348 23349void 23350rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) 23351{ 23352 machine_mode vmode = GET_MODE (target); 23353 unsigned i, high, nelt = GET_MODE_NUNITS (vmode); 23354 vec_perm_builder perm (nelt, nelt, 1); 23355 23356 high = (highp ? 0 : nelt / 2); 23357 for (i = 0; i < nelt / 2; i++) 23358 { 23359 perm.quick_push (i + high); 23360 perm.quick_push (i + nelt + high); 23361 } 23362 23363 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); 23364} 23365 23366/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ 23367void 23368rs6000_scale_v2df (rtx tgt, rtx src, int scale) 23369{ 23370 HOST_WIDE_INT hwi_scale (scale); 23371 REAL_VALUE_TYPE r_pow; 23372 rtvec v = rtvec_alloc (2); 23373 rtx elt; 23374 rtx scale_vec = gen_reg_rtx (V2DFmode); 23375 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); 23376 elt = const_double_from_real_value (r_pow, DFmode); 23377 RTVEC_ELT (v, 0) = elt; 23378 RTVEC_ELT (v, 1) = elt; 23379 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); 23380 emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); 23381} 23382 23383/* Return an RTX representing where to find the function value of a 23384 function returning MODE. */ 23385static rtx 23386rs6000_complex_function_value (machine_mode mode) 23387{ 23388 unsigned int regno; 23389 rtx r1, r2; 23390 machine_mode inner = GET_MODE_INNER (mode); 23391 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode); 23392 23393 if (TARGET_FLOAT128_TYPE 23394 && (mode == KCmode 23395 || (mode == TCmode && TARGET_IEEEQUAD))) 23396 regno = ALTIVEC_ARG_RETURN; 23397 23398 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 23399 regno = FP_ARG_RETURN; 23400 23401 else 23402 { 23403 regno = GP_ARG_RETURN; 23404 23405 /* 32-bit is OK since it'll go in r3/r4. */ 23406 if (TARGET_32BIT && inner_bytes >= 4) 23407 return gen_rtx_REG (mode, regno); 23408 } 23409 23410 if (inner_bytes >= 8) 23411 return gen_rtx_REG (mode, regno); 23412 23413 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno), 23414 const0_rtx); 23415 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1), 23416 GEN_INT (inner_bytes)); 23417 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); 23418} 23419 23420/* Return an rtx describing a return value of MODE as a PARALLEL 23421 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO, 23422 stride REG_STRIDE. */ 23423 23424static rtx 23425rs6000_parallel_return (machine_mode mode, 23426 int n_elts, machine_mode elt_mode, 23427 unsigned int regno, unsigned int reg_stride) 23428{ 23429 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 23430 23431 int i; 23432 for (i = 0; i < n_elts; i++) 23433 { 23434 rtx r = gen_rtx_REG (elt_mode, regno); 23435 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); 23436 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off); 23437 regno += reg_stride; 23438 } 23439 23440 return par; 23441} 23442 23443/* Target hook for TARGET_FUNCTION_VALUE. 23444 23445 An integer value is in r3 and a floating-point value is in fp1, 23446 unless -msoft-float. */ 23447 23448static rtx 23449rs6000_function_value (const_tree valtype, 23450 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 23451 bool outgoing ATTRIBUTE_UNUSED) 23452{ 23453 machine_mode mode; 23454 unsigned int regno; 23455 machine_mode elt_mode; 23456 int n_elts; 23457 23458 /* Special handling for structs in darwin64. */ 23459 if (TARGET_MACHO 23460 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype)) 23461 { 23462 CUMULATIVE_ARGS valcum; 23463 rtx valret; 23464 23465 valcum.words = 0; 23466 valcum.fregno = FP_ARG_MIN_REG; 23467 valcum.vregno = ALTIVEC_ARG_MIN_REG; 23468 /* Do a trial code generation as if this were going to be passed as 23469 an argument; if any part goes in memory, we return NULL. */ 23470 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true); 23471 if (valret) 23472 return valret; 23473 /* Otherwise fall through to standard ABI rules. */ 23474 } 23475 23476 mode = TYPE_MODE (valtype); 23477 23478 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */ 23479 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts)) 23480 { 23481 int first_reg, n_regs; 23482 23483 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode)) 23484 { 23485 /* _Decimal128 must use even/odd register pairs. */ 23486 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 23487 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3; 23488 } 23489 else 23490 { 23491 first_reg = ALTIVEC_ARG_RETURN; 23492 n_regs = 1; 23493 } 23494 23495 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs); 23496 } 23497 23498 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */ 23499 if (TARGET_32BIT && TARGET_POWERPC64) 23500 switch (mode) 23501 { 23502 default: 23503 break; 23504 case E_DImode: 23505 case E_SCmode: 23506 case E_DCmode: 23507 case E_TCmode: 23508 int count = GET_MODE_SIZE (mode) / 4; 23509 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1); 23510 } 23511 23512 if ((INTEGRAL_TYPE_P (valtype) 23513 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64)) 23514 || POINTER_TYPE_P (valtype)) 23515 mode = TARGET_32BIT ? SImode : DImode; 23516 23517 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 23518 /* _Decimal128 must use an even/odd register pair. */ 23519 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 23520 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT 23521 && !FLOAT128_VECTOR_P (mode)) 23522 regno = FP_ARG_RETURN; 23523 else if (TREE_CODE (valtype) == COMPLEX_TYPE 23524 && targetm.calls.split_complex_arg) 23525 return rs6000_complex_function_value (mode); 23526 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same 23527 return register is used in both cases, and we won't see V2DImode/V2DFmode 23528 for pure altivec, combine the two cases. */ 23529 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode)) 23530 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI 23531 && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) 23532 regno = ALTIVEC_ARG_RETURN; 23533 else 23534 regno = GP_ARG_RETURN; 23535 23536 return gen_rtx_REG (mode, regno); 23537} 23538 23539/* Define how to find the value returned by a library function 23540 assuming the value has mode MODE. */ 23541rtx 23542rs6000_libcall_value (machine_mode mode) 23543{ 23544 unsigned int regno; 23545 23546 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */ 23547 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode) 23548 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1); 23549 23550 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 23551 /* _Decimal128 must use an even/odd register pair. */ 23552 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 23553 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT) 23554 regno = FP_ARG_RETURN; 23555 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same 23556 return register is used in both cases, and we won't see V2DImode/V2DFmode 23557 for pure altivec, combine the two cases. */ 23558 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) 23559 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) 23560 regno = ALTIVEC_ARG_RETURN; 23561 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) 23562 return rs6000_complex_function_value (mode); 23563 else 23564 regno = GP_ARG_RETURN; 23565 23566 return gen_rtx_REG (mode, regno); 23567} 23568 23569/* Compute register pressure classes. We implement the target hook to avoid 23570 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can 23571 lead to incorrect estimates of number of available registers and therefor 23572 increased register pressure/spill. */ 23573static int 23574rs6000_compute_pressure_classes (enum reg_class *pressure_classes) 23575{ 23576 int n; 23577 23578 n = 0; 23579 pressure_classes[n++] = GENERAL_REGS; 23580 if (TARGET_ALTIVEC) 23581 pressure_classes[n++] = ALTIVEC_REGS; 23582 if (TARGET_VSX) 23583 pressure_classes[n++] = VSX_REGS; 23584 else 23585 { 23586 if (TARGET_HARD_FLOAT) 23587 pressure_classes[n++] = FLOAT_REGS; 23588 } 23589 pressure_classes[n++] = CR_REGS; 23590 pressure_classes[n++] = SPECIAL_REGS; 23591 23592 return n; 23593} 23594 23595/* Given FROM and TO register numbers, say whether this elimination is allowed. 23596 Frame pointer elimination is automatically handled. 23597 23598 For the RS/6000, if frame pointer elimination is being done, we would like 23599 to convert ap into fp, not sp. 23600 23601 We need r30 if -mminimal-toc was specified, and there are constant pool 23602 references. */ 23603 23604static bool 23605rs6000_can_eliminate (const int from, const int to) 23606{ 23607 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM 23608 ? ! frame_pointer_needed 23609 : from == RS6000_PIC_OFFSET_TABLE_REGNUM 23610 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL 23611 || constant_pool_empty_p () 23612 : true); 23613} 23614 23615/* Define the offset between two registers, FROM to be eliminated and its 23616 replacement TO, at the start of a routine. */ 23617HOST_WIDE_INT 23618rs6000_initial_elimination_offset (int from, int to) 23619{ 23620 rs6000_stack_t *info = rs6000_stack_info (); 23621 HOST_WIDE_INT offset; 23622 23623 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 23624 offset = info->push_p ? 0 : -info->total_size; 23625 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 23626 { 23627 offset = info->push_p ? 0 : -info->total_size; 23628 if (FRAME_GROWS_DOWNWARD) 23629 offset += info->fixed_size + info->vars_size + info->parm_size; 23630 } 23631 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 23632 offset = FRAME_GROWS_DOWNWARD 23633 ? info->fixed_size + info->vars_size + info->parm_size 23634 : 0; 23635 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 23636 offset = info->total_size; 23637 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 23638 offset = info->push_p ? info->total_size : 0; 23639 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM) 23640 offset = 0; 23641 else 23642 gcc_unreachable (); 23643 23644 return offset; 23645} 23646 23647/* Fill in sizes of registers used by unwinder. */ 23648 23649static void 23650rs6000_init_dwarf_reg_sizes_extra (tree address) 23651{ 23652 if (TARGET_MACHO && ! TARGET_ALTIVEC) 23653 { 23654 int i; 23655 machine_mode mode = TYPE_MODE (char_type_node); 23656 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); 23657 rtx mem = gen_rtx_MEM (BLKmode, addr); 23658 rtx value = gen_int_mode (16, mode); 23659 23660 /* On Darwin, libgcc may be built to run on both G3 and G4/5. 23661 The unwinder still needs to know the size of Altivec registers. */ 23662 23663 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) 23664 { 23665 int column = DWARF_REG_TO_UNWIND_COLUMN 23666 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); 23667 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); 23668 23669 emit_move_insn (adjust_address (mem, mode, offset), value); 23670 } 23671 } 23672} 23673 23674/* Map internal gcc register numbers to debug format register numbers. 23675 FORMAT specifies the type of debug register number to use: 23676 0 -- debug information, except for frame-related sections 23677 1 -- DWARF .debug_frame section 23678 2 -- DWARF .eh_frame section */ 23679 23680unsigned int 23681rs6000_dbx_register_number (unsigned int regno, unsigned int format) 23682{ 23683 /* On some platforms, we use the standard DWARF register 23684 numbering for .debug_info and .debug_frame. */ 23685 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1) 23686 { 23687#ifdef RS6000_USE_DWARF_NUMBERING 23688 if (regno <= 31) 23689 return regno; 23690 if (FP_REGNO_P (regno)) 23691 return regno - FIRST_FPR_REGNO + 32; 23692 if (ALTIVEC_REGNO_P (regno)) 23693 return regno - FIRST_ALTIVEC_REGNO + 1124; 23694 if (regno == LR_REGNO) 23695 return 108; 23696 if (regno == CTR_REGNO) 23697 return 109; 23698 if (regno == CA_REGNO) 23699 return 101; /* XER */ 23700 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has 23701 translated any combination of CR2, CR3, CR4 saves to a save of CR2. 23702 The actual code emitted saves the whole of CR, so we map CR2_REGNO 23703 to the DWARF reg for CR. */ 23704 if (format == 1 && regno == CR2_REGNO) 23705 return 64; 23706 if (CR_REGNO_P (regno)) 23707 return regno - CR0_REGNO + 86; 23708 if (regno == VRSAVE_REGNO) 23709 return 356; 23710 if (regno == VSCR_REGNO) 23711 return 67; 23712 23713 /* These do not make much sense. */ 23714 if (regno == FRAME_POINTER_REGNUM) 23715 return 111; 23716 if (regno == ARG_POINTER_REGNUM) 23717 return 67; 23718 if (regno == 64) 23719 return 100; 23720 23721 gcc_unreachable (); 23722#endif 23723 } 23724 23725 /* We use the GCC 7 (and before) internal number for non-DWARF debug 23726 information, and also for .eh_frame. */ 23727 /* Translate the regnos to their numbers in GCC 7 (and before). */ 23728 if (regno <= 31) 23729 return regno; 23730 if (FP_REGNO_P (regno)) 23731 return regno - FIRST_FPR_REGNO + 32; 23732 if (ALTIVEC_REGNO_P (regno)) 23733 return regno - FIRST_ALTIVEC_REGNO + 77; 23734 if (regno == LR_REGNO) 23735 return 65; 23736 if (regno == CTR_REGNO) 23737 return 66; 23738 if (regno == CA_REGNO) 23739 return 76; /* XER */ 23740 if (CR_REGNO_P (regno)) 23741 return regno - CR0_REGNO + 68; 23742 if (regno == VRSAVE_REGNO) 23743 return 109; 23744 if (regno == VSCR_REGNO) 23745 return 110; 23746 23747 if (regno == FRAME_POINTER_REGNUM) 23748 return 111; 23749 if (regno == ARG_POINTER_REGNUM) 23750 return 67; 23751 if (regno == 64) 23752 return 64; 23753 23754 gcc_unreachable (); 23755} 23756 23757/* target hook eh_return_filter_mode */ 23758static scalar_int_mode 23759rs6000_eh_return_filter_mode (void) 23760{ 23761 return TARGET_32BIT ? SImode : word_mode; 23762} 23763 23764/* Target hook for translate_mode_attribute. */ 23765static machine_mode 23766rs6000_translate_mode_attribute (machine_mode mode) 23767{ 23768 if ((FLOAT128_IEEE_P (mode) 23769 && ieee128_float_type_node == long_double_type_node) 23770 || (FLOAT128_IBM_P (mode) 23771 && ibm128_float_type_node == long_double_type_node)) 23772 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode; 23773 return mode; 23774} 23775 23776/* Target hook for scalar_mode_supported_p. */ 23777static bool 23778rs6000_scalar_mode_supported_p (scalar_mode mode) 23779{ 23780 /* -m32 does not support TImode. This is the default, from 23781 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the 23782 same ABI as for -m32. But default_scalar_mode_supported_p allows 23783 integer modes of precision 2 * BITS_PER_WORD, which matches TImode 23784 for -mpowerpc64. */ 23785 if (TARGET_32BIT && mode == TImode) 23786 return false; 23787 23788 if (DECIMAL_FLOAT_MODE_P (mode)) 23789 return default_decimal_float_supported_p (); 23790 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) 23791 return true; 23792 else 23793 return default_scalar_mode_supported_p (mode); 23794} 23795 23796/* Target hook for libgcc_floating_mode_supported_p. */ 23797 23798static bool 23799rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode) 23800{ 23801 switch (mode) 23802 { 23803 case E_SFmode: 23804 case E_DFmode: 23805 case E_TFmode: 23806 return true; 23807 23808 /* We only return true for KFmode if IEEE 128-bit types are supported, and 23809 if long double does not use the IEEE 128-bit format. If long double 23810 uses the IEEE 128-bit format, it will use TFmode and not KFmode. 23811 Because the code will not use KFmode in that case, there will be aborts 23812 because it can't find KFmode in the Floatn types. */ 23813 case E_KFmode: 23814 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD; 23815 23816 default: 23817 return false; 23818 } 23819} 23820 23821/* Target hook for vector_mode_supported_p. */ 23822static bool 23823rs6000_vector_mode_supported_p (machine_mode mode) 23824{ 23825 /* There is no vector form for IEEE 128-bit. If we return true for IEEE 23826 128-bit, the compiler might try to widen IEEE 128-bit to IBM 23827 double-double. */ 23828 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode)) 23829 return true; 23830 23831 else 23832 return false; 23833} 23834 23835/* Target hook for floatn_mode. */ 23836static opt_scalar_float_mode 23837rs6000_floatn_mode (int n, bool extended) 23838{ 23839 if (extended) 23840 { 23841 switch (n) 23842 { 23843 case 32: 23844 return DFmode; 23845 23846 case 64: 23847 if (TARGET_FLOAT128_TYPE) 23848 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23849 else 23850 return opt_scalar_float_mode (); 23851 23852 case 128: 23853 return opt_scalar_float_mode (); 23854 23855 default: 23856 /* Those are the only valid _FloatNx types. */ 23857 gcc_unreachable (); 23858 } 23859 } 23860 else 23861 { 23862 switch (n) 23863 { 23864 case 32: 23865 return SFmode; 23866 23867 case 64: 23868 return DFmode; 23869 23870 case 128: 23871 if (TARGET_FLOAT128_TYPE) 23872 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23873 else 23874 return opt_scalar_float_mode (); 23875 23876 default: 23877 return opt_scalar_float_mode (); 23878 } 23879 } 23880 23881} 23882 23883/* Target hook for c_mode_for_suffix. */ 23884static machine_mode 23885rs6000_c_mode_for_suffix (char suffix) 23886{ 23887 if (TARGET_FLOAT128_TYPE) 23888 { 23889 if (suffix == 'q' || suffix == 'Q') 23890 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23891 23892 /* At the moment, we are not defining a suffix for IBM extended double. 23893 If/when the default for -mabi=ieeelongdouble is changed, and we want 23894 to support __ibm128 constants in legacy library code, we may need to 23895 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and 23896 'q' as machine dependent suffixes. The x86_64 port uses 'w' for 23897 __float80 constants. */ 23898 } 23899 23900 return VOIDmode; 23901} 23902 23903/* Target hook for invalid_arg_for_unprototyped_fn. */ 23904static const char * 23905invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) 23906{ 23907 return (!rs6000_darwin64_abi 23908 && typelist == 0 23909 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE 23910 && (funcdecl == NULL_TREE 23911 || (TREE_CODE (funcdecl) == FUNCTION_DECL 23912 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) 23913 ? N_("AltiVec argument passed to unprototyped function") 23914 : NULL; 23915} 23916 23917/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register 23918 setup by using __stack_chk_fail_local hidden function instead of 23919 calling __stack_chk_fail directly. Otherwise it is better to call 23920 __stack_chk_fail directly. */ 23921 23922static tree ATTRIBUTE_UNUSED 23923rs6000_stack_protect_fail (void) 23924{ 23925 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) 23926 ? default_hidden_stack_protect_fail () 23927 : default_external_stack_protect_fail (); 23928} 23929 23930/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 23931 23932#if TARGET_ELF 23933static unsigned HOST_WIDE_INT 23934rs6000_asan_shadow_offset (void) 23935{ 23936 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29); 23937} 23938#endif 23939 23940/* Mask options that we want to support inside of attribute((target)) and 23941 #pragma GCC target operations. Note, we do not include things like 23942 64/32-bit, endianness, hard/soft floating point, etc. that would have 23943 different calling sequences. */ 23944 23945struct rs6000_opt_mask { 23946 const char *name; /* option name */ 23947 HOST_WIDE_INT mask; /* mask to set */ 23948 bool invert; /* invert sense of mask */ 23949 bool valid_target; /* option is a target option */ 23950}; 23951 23952static struct rs6000_opt_mask const rs6000_opt_masks[] = 23953{ 23954 { "altivec", OPTION_MASK_ALTIVEC, false, true }, 23955 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX, 23956 false, true }, 23957 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR, 23958 false, true }, 23959 { "cmpb", OPTION_MASK_CMPB, false, true }, 23960 { "crypto", OPTION_MASK_CRYPTO, false, true }, 23961 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, 23962 { "dlmzb", OPTION_MASK_DLMZB, false, true }, 23963 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, 23964 false, true }, 23965 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true }, 23966 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true }, 23967 { "fprnd", OPTION_MASK_FPRND, false, true }, 23968 { "power10", OPTION_MASK_POWER10, false, true }, 23969 { "hard-dfp", OPTION_MASK_DFP, false, true }, 23970 { "htm", OPTION_MASK_HTM, false, true }, 23971 { "isel", OPTION_MASK_ISEL, false, true }, 23972 { "mfcrf", OPTION_MASK_MFCRF, false, true }, 23973 { "mfpgpr", 0, false, true }, 23974 { "mma", OPTION_MASK_MMA, false, true }, 23975 { "modulo", OPTION_MASK_MODULO, false, true }, 23976 { "mulhw", OPTION_MASK_MULHW, false, true }, 23977 { "multiple", OPTION_MASK_MULTIPLE, false, true }, 23978 { "pcrel", OPTION_MASK_PCREL, false, true }, 23979 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true }, 23980 { "popcntb", OPTION_MASK_POPCNTB, false, true }, 23981 { "popcntd", OPTION_MASK_POPCNTD, false, true }, 23982 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, 23983 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, 23984 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, 23985 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, 23986 { "power9-misc", OPTION_MASK_P9_MISC, false, true }, 23987 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, 23988 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true }, 23989 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, 23990 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, 23991 { "prefixed", OPTION_MASK_PREFIXED, false, true }, 23992 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, 23993 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, 23994 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, 23995 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, 23996 { "string", 0, false, true }, 23997 { "update", OPTION_MASK_NO_UPDATE, true , true }, 23998 { "vsx", OPTION_MASK_VSX, false, true }, 23999#ifdef OPTION_MASK_64BIT 24000#if TARGET_AIX_OS 24001 { "aix64", OPTION_MASK_64BIT, false, false }, 24002 { "aix32", OPTION_MASK_64BIT, true, false }, 24003#else 24004 { "64", OPTION_MASK_64BIT, false, false }, 24005 { "32", OPTION_MASK_64BIT, true, false }, 24006#endif 24007#endif 24008#ifdef OPTION_MASK_EABI 24009 { "eabi", OPTION_MASK_EABI, false, false }, 24010#endif 24011#ifdef OPTION_MASK_LITTLE_ENDIAN 24012 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false }, 24013 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false }, 24014#endif 24015#ifdef OPTION_MASK_RELOCATABLE 24016 { "relocatable", OPTION_MASK_RELOCATABLE, false, false }, 24017#endif 24018#ifdef OPTION_MASK_STRICT_ALIGN 24019 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false }, 24020#endif 24021 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false }, 24022 { "string", 0, false, false }, 24023}; 24024 24025/* Builtin mask mapping for printing the flags. */ 24026static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = 24027{ 24028 { "altivec", RS6000_BTM_ALTIVEC, false, false }, 24029 { "vsx", RS6000_BTM_VSX, false, false }, 24030 { "fre", RS6000_BTM_FRE, false, false }, 24031 { "fres", RS6000_BTM_FRES, false, false }, 24032 { "frsqrte", RS6000_BTM_FRSQRTE, false, false }, 24033 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, 24034 { "popcntd", RS6000_BTM_POPCNTD, false, false }, 24035 { "cell", RS6000_BTM_CELL, false, false }, 24036 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, 24037 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false }, 24038 { "power9-misc", RS6000_BTM_P9_MISC, false, false }, 24039 { "crypto", RS6000_BTM_CRYPTO, false, false }, 24040 { "htm", RS6000_BTM_HTM, false, false }, 24041 { "hard-dfp", RS6000_BTM_DFP, false, false }, 24042 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, 24043 { "long-double-128", RS6000_BTM_LDBL128, false, false }, 24044 { "powerpc64", RS6000_BTM_POWERPC64, false, false }, 24045 { "float128", RS6000_BTM_FLOAT128, false, false }, 24046 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false }, 24047 { "mma", RS6000_BTM_MMA, false, false }, 24048 { "power10", RS6000_BTM_P10, false, false }, 24049}; 24050 24051/* Option variables that we want to support inside attribute((target)) and 24052 #pragma GCC target operations. */ 24053 24054struct rs6000_opt_var { 24055 const char *name; /* option name */ 24056 size_t global_offset; /* offset of the option in global_options. */ 24057 size_t target_offset; /* offset of the option in target options. */ 24058}; 24059 24060static struct rs6000_opt_var const rs6000_opt_vars[] = 24061{ 24062 { "friz", 24063 offsetof (struct gcc_options, x_TARGET_FRIZ), 24064 offsetof (struct cl_target_option, x_TARGET_FRIZ), }, 24065 { "avoid-indexed-addresses", 24066 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM), 24067 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) }, 24068 { "longcall", 24069 offsetof (struct gcc_options, x_rs6000_default_long_calls), 24070 offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, 24071 { "optimize-swaps", 24072 offsetof (struct gcc_options, x_rs6000_optimize_swaps), 24073 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, 24074 { "allow-movmisalign", 24075 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), 24076 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, 24077 { "sched-groups", 24078 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), 24079 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, 24080 { "always-hint", 24081 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), 24082 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, 24083 { "align-branch-targets", 24084 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), 24085 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, 24086 { "sched-prolog", 24087 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), 24088 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, 24089 { "sched-epilog", 24090 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), 24091 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, 24092 { "speculate-indirect-jumps", 24093 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps), 24094 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), }, 24095}; 24096 24097/* Inner function to handle attribute((target("..."))) and #pragma GCC target 24098 parsing. Return true if there were no errors. */ 24099 24100static bool 24101rs6000_inner_target_options (tree args, bool attr_p) 24102{ 24103 bool ret = true; 24104 24105 if (args == NULL_TREE) 24106 ; 24107 24108 else if (TREE_CODE (args) == STRING_CST) 24109 { 24110 char *p = ASTRDUP (TREE_STRING_POINTER (args)); 24111 char *q; 24112 24113 while ((q = strtok (p, ",")) != NULL) 24114 { 24115 bool error_p = false; 24116 bool not_valid_p = false; 24117 const char *cpu_opt = NULL; 24118 24119 p = NULL; 24120 if (startswith (q, "cpu=")) 24121 { 24122 int cpu_index = rs6000_cpu_name_lookup (q+4); 24123 if (cpu_index >= 0) 24124 rs6000_cpu_index = cpu_index; 24125 else 24126 { 24127 error_p = true; 24128 cpu_opt = q+4; 24129 } 24130 } 24131 else if (startswith (q, "tune=")) 24132 { 24133 int tune_index = rs6000_cpu_name_lookup (q+5); 24134 if (tune_index >= 0) 24135 rs6000_tune_index = tune_index; 24136 else 24137 { 24138 error_p = true; 24139 cpu_opt = q+5; 24140 } 24141 } 24142 else 24143 { 24144 size_t i; 24145 bool invert = false; 24146 char *r = q; 24147 24148 error_p = true; 24149 if (startswith (r, "no-")) 24150 { 24151 invert = true; 24152 r += 3; 24153 } 24154 24155 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++) 24156 if (strcmp (r, rs6000_opt_masks[i].name) == 0) 24157 { 24158 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask; 24159 24160 if (!rs6000_opt_masks[i].valid_target) 24161 not_valid_p = true; 24162 else 24163 { 24164 error_p = false; 24165 rs6000_isa_flags_explicit |= mask; 24166 24167 /* VSX needs altivec, so -mvsx automagically sets 24168 altivec and disables -mavoid-indexed-addresses. */ 24169 if (!invert) 24170 { 24171 if (mask == OPTION_MASK_VSX) 24172 { 24173 mask |= OPTION_MASK_ALTIVEC; 24174 TARGET_AVOID_XFORM = 0; 24175 } 24176 } 24177 24178 if (rs6000_opt_masks[i].invert) 24179 invert = !invert; 24180 24181 if (invert) 24182 rs6000_isa_flags &= ~mask; 24183 else 24184 rs6000_isa_flags |= mask; 24185 } 24186 break; 24187 } 24188 24189 if (error_p && !not_valid_p) 24190 { 24191 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++) 24192 if (strcmp (r, rs6000_opt_vars[i].name) == 0) 24193 { 24194 size_t j = rs6000_opt_vars[i].global_offset; 24195 *((int *) ((char *)&global_options + j)) = !invert; 24196 error_p = false; 24197 not_valid_p = false; 24198 break; 24199 } 24200 } 24201 } 24202 24203 if (error_p) 24204 { 24205 const char *eprefix, *esuffix; 24206 24207 ret = false; 24208 if (attr_p) 24209 { 24210 eprefix = "__attribute__((__target__("; 24211 esuffix = ")))"; 24212 } 24213 else 24214 { 24215 eprefix = "#pragma GCC target "; 24216 esuffix = ""; 24217 } 24218 24219 if (cpu_opt) 24220 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix, 24221 q, esuffix); 24222 else if (not_valid_p) 24223 error ("%s%qs%s is not allowed", eprefix, q, esuffix); 24224 else 24225 error ("%s%qs%s is invalid", eprefix, q, esuffix); 24226 } 24227 } 24228 } 24229 24230 else if (TREE_CODE (args) == TREE_LIST) 24231 { 24232 do 24233 { 24234 tree value = TREE_VALUE (args); 24235 if (value) 24236 { 24237 bool ret2 = rs6000_inner_target_options (value, attr_p); 24238 if (!ret2) 24239 ret = false; 24240 } 24241 args = TREE_CHAIN (args); 24242 } 24243 while (args != NULL_TREE); 24244 } 24245 24246 else 24247 { 24248 error ("attribute %<target%> argument not a string"); 24249 return false; 24250 } 24251 24252 return ret; 24253} 24254 24255/* Print out the target options as a list for -mdebug=target. */ 24256 24257static void 24258rs6000_debug_target_options (tree args, const char *prefix) 24259{ 24260 if (args == NULL_TREE) 24261 fprintf (stderr, "%s<NULL>", prefix); 24262 24263 else if (TREE_CODE (args) == STRING_CST) 24264 { 24265 char *p = ASTRDUP (TREE_STRING_POINTER (args)); 24266 char *q; 24267 24268 while ((q = strtok (p, ",")) != NULL) 24269 { 24270 p = NULL; 24271 fprintf (stderr, "%s\"%s\"", prefix, q); 24272 prefix = ", "; 24273 } 24274 } 24275 24276 else if (TREE_CODE (args) == TREE_LIST) 24277 { 24278 do 24279 { 24280 tree value = TREE_VALUE (args); 24281 if (value) 24282 { 24283 rs6000_debug_target_options (value, prefix); 24284 prefix = ", "; 24285 } 24286 args = TREE_CHAIN (args); 24287 } 24288 while (args != NULL_TREE); 24289 } 24290 24291 else 24292 gcc_unreachable (); 24293 24294 return; 24295} 24296 24297 24298/* Hook to validate attribute((target("..."))). */ 24299 24300static bool 24301rs6000_valid_attribute_p (tree fndecl, 24302 tree ARG_UNUSED (name), 24303 tree args, 24304 int flags) 24305{ 24306 struct cl_target_option cur_target; 24307 bool ret; 24308 tree old_optimize; 24309 tree new_target, new_optimize; 24310 tree func_optimize; 24311 24312 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); 24313 24314 if (TARGET_DEBUG_TARGET) 24315 { 24316 tree tname = DECL_NAME (fndecl); 24317 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n"); 24318 if (tname) 24319 fprintf (stderr, "function: %.*s\n", 24320 (int) IDENTIFIER_LENGTH (tname), 24321 IDENTIFIER_POINTER (tname)); 24322 else 24323 fprintf (stderr, "function: unknown\n"); 24324 24325 fprintf (stderr, "args:"); 24326 rs6000_debug_target_options (args, " "); 24327 fprintf (stderr, "\n"); 24328 24329 if (flags) 24330 fprintf (stderr, "flags: 0x%x\n", flags); 24331 24332 fprintf (stderr, "--------------------\n"); 24333 } 24334 24335 /* attribute((target("default"))) does nothing, beyond 24336 affecting multi-versioning. */ 24337 if (TREE_VALUE (args) 24338 && TREE_CODE (TREE_VALUE (args)) == STRING_CST 24339 && TREE_CHAIN (args) == NULL_TREE 24340 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) 24341 return true; 24342 24343 old_optimize = build_optimization_node (&global_options, 24344 &global_options_set); 24345 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 24346 24347 /* If the function changed the optimization levels as well as setting target 24348 options, start with the optimizations specified. */ 24349 if (func_optimize && func_optimize != old_optimize) 24350 cl_optimization_restore (&global_options, &global_options_set, 24351 TREE_OPTIMIZATION (func_optimize)); 24352 24353 /* The target attributes may also change some optimization flags, so update 24354 the optimization options if necessary. */ 24355 cl_target_option_save (&cur_target, &global_options, &global_options_set); 24356 rs6000_cpu_index = rs6000_tune_index = -1; 24357 ret = rs6000_inner_target_options (args, true); 24358 24359 /* Set up any additional state. */ 24360 if (ret) 24361 { 24362 ret = rs6000_option_override_internal (false); 24363 new_target = build_target_option_node (&global_options, 24364 &global_options_set); 24365 } 24366 else 24367 new_target = NULL; 24368 24369 new_optimize = build_optimization_node (&global_options, 24370 &global_options_set); 24371 24372 if (!new_target) 24373 ret = false; 24374 24375 else if (fndecl) 24376 { 24377 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; 24378 24379 if (old_optimize != new_optimize) 24380 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 24381 } 24382 24383 cl_target_option_restore (&global_options, &global_options_set, &cur_target); 24384 24385 if (old_optimize != new_optimize) 24386 cl_optimization_restore (&global_options, &global_options_set, 24387 TREE_OPTIMIZATION (old_optimize)); 24388 24389 return ret; 24390} 24391 24392 24393/* Hook to validate the current #pragma GCC target and set the state, and 24394 update the macros based on what was changed. If ARGS is NULL, then 24395 POP_TARGET is used to reset the options. */ 24396 24397bool 24398rs6000_pragma_target_parse (tree args, tree pop_target) 24399{ 24400 tree prev_tree = build_target_option_node (&global_options, 24401 &global_options_set); 24402 tree cur_tree; 24403 struct cl_target_option *prev_opt, *cur_opt; 24404 HOST_WIDE_INT prev_flags, cur_flags, diff_flags; 24405 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask; 24406 24407 if (TARGET_DEBUG_TARGET) 24408 { 24409 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n"); 24410 fprintf (stderr, "args:"); 24411 rs6000_debug_target_options (args, " "); 24412 fprintf (stderr, "\n"); 24413 24414 if (pop_target) 24415 { 24416 fprintf (stderr, "pop_target:\n"); 24417 debug_tree (pop_target); 24418 } 24419 else 24420 fprintf (stderr, "pop_target: <NULL>\n"); 24421 24422 fprintf (stderr, "--------------------\n"); 24423 } 24424 24425 if (! args) 24426 { 24427 cur_tree = ((pop_target) 24428 ? pop_target 24429 : target_option_default_node); 24430 cl_target_option_restore (&global_options, &global_options_set, 24431 TREE_TARGET_OPTION (cur_tree)); 24432 } 24433 else 24434 { 24435 rs6000_cpu_index = rs6000_tune_index = -1; 24436 if (!rs6000_inner_target_options (args, false) 24437 || !rs6000_option_override_internal (false) 24438 || (cur_tree = build_target_option_node (&global_options, 24439 &global_options_set)) 24440 == NULL_TREE) 24441 { 24442 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) 24443 fprintf (stderr, "invalid pragma\n"); 24444 24445 return false; 24446 } 24447 } 24448 24449 target_option_current_node = cur_tree; 24450 rs6000_activate_target_options (target_option_current_node); 24451 24452 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly 24453 change the macros that are defined. */ 24454 if (rs6000_target_modify_macros_ptr) 24455 { 24456 prev_opt = TREE_TARGET_OPTION (prev_tree); 24457 prev_bumask = prev_opt->x_rs6000_builtin_mask; 24458 prev_flags = prev_opt->x_rs6000_isa_flags; 24459 24460 cur_opt = TREE_TARGET_OPTION (cur_tree); 24461 cur_flags = cur_opt->x_rs6000_isa_flags; 24462 cur_bumask = cur_opt->x_rs6000_builtin_mask; 24463 24464 diff_bumask = (prev_bumask ^ cur_bumask); 24465 diff_flags = (prev_flags ^ cur_flags); 24466 24467 if ((diff_flags != 0) || (diff_bumask != 0)) 24468 { 24469 /* Delete old macros. */ 24470 rs6000_target_modify_macros_ptr (false, 24471 prev_flags & diff_flags, 24472 prev_bumask & diff_bumask); 24473 24474 /* Define new macros. */ 24475 rs6000_target_modify_macros_ptr (true, 24476 cur_flags & diff_flags, 24477 cur_bumask & diff_bumask); 24478 } 24479 } 24480 24481 return true; 24482} 24483 24484 24485/* Remember the last target of rs6000_set_current_function. */ 24486static GTY(()) tree rs6000_previous_fndecl; 24487 24488/* Restore target's globals from NEW_TREE and invalidate the 24489 rs6000_previous_fndecl cache. */ 24490 24491void 24492rs6000_activate_target_options (tree new_tree) 24493{ 24494 cl_target_option_restore (&global_options, &global_options_set, 24495 TREE_TARGET_OPTION (new_tree)); 24496 if (TREE_TARGET_GLOBALS (new_tree)) 24497 restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); 24498 else if (new_tree == target_option_default_node) 24499 restore_target_globals (&default_target_globals); 24500 else 24501 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); 24502 rs6000_previous_fndecl = NULL_TREE; 24503} 24504 24505/* Establish appropriate back-end context for processing the function 24506 FNDECL. The argument might be NULL to indicate processing at top 24507 level, outside of any function scope. */ 24508static void 24509rs6000_set_current_function (tree fndecl) 24510{ 24511 if (TARGET_DEBUG_TARGET) 24512 { 24513 fprintf (stderr, "\n==================== rs6000_set_current_function"); 24514 24515 if (fndecl) 24516 fprintf (stderr, ", fndecl %s (%p)", 24517 (DECL_NAME (fndecl) 24518 ? IDENTIFIER_POINTER (DECL_NAME (fndecl)) 24519 : "<unknown>"), (void *)fndecl); 24520 24521 if (rs6000_previous_fndecl) 24522 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); 24523 24524 fprintf (stderr, "\n"); 24525 } 24526 24527 /* Only change the context if the function changes. This hook is called 24528 several times in the course of compiling a function, and we don't want to 24529 slow things down too much or call target_reinit when it isn't safe. */ 24530 if (fndecl == rs6000_previous_fndecl) 24531 return; 24532 24533 tree old_tree; 24534 if (rs6000_previous_fndecl == NULL_TREE) 24535 old_tree = target_option_current_node; 24536 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)) 24537 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl); 24538 else 24539 old_tree = target_option_default_node; 24540 24541 tree new_tree; 24542 if (fndecl == NULL_TREE) 24543 { 24544 if (old_tree != target_option_current_node) 24545 new_tree = target_option_current_node; 24546 else 24547 new_tree = NULL_TREE; 24548 } 24549 else 24550 { 24551 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); 24552 if (new_tree == NULL_TREE) 24553 new_tree = target_option_default_node; 24554 } 24555 24556 if (TARGET_DEBUG_TARGET) 24557 { 24558 if (new_tree) 24559 { 24560 fprintf (stderr, "\nnew fndecl target specific options:\n"); 24561 debug_tree (new_tree); 24562 } 24563 24564 if (old_tree) 24565 { 24566 fprintf (stderr, "\nold fndecl target specific options:\n"); 24567 debug_tree (old_tree); 24568 } 24569 24570 if (old_tree != NULL_TREE || new_tree != NULL_TREE) 24571 fprintf (stderr, "--------------------\n"); 24572 } 24573 24574 if (new_tree && old_tree != new_tree) 24575 rs6000_activate_target_options (new_tree); 24576 24577 if (fndecl) 24578 rs6000_previous_fndecl = fndecl; 24579} 24580 24581 24582/* Save the current options */ 24583 24584static void 24585rs6000_function_specific_save (struct cl_target_option *ptr, 24586 struct gcc_options *opts, 24587 struct gcc_options */* opts_set */) 24588{ 24589 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; 24590 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; 24591} 24592 24593/* Restore the current options */ 24594 24595static void 24596rs6000_function_specific_restore (struct gcc_options *opts, 24597 struct gcc_options */* opts_set */, 24598 struct cl_target_option *ptr) 24599 24600{ 24601 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; 24602 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; 24603 (void) rs6000_option_override_internal (false); 24604} 24605 24606/* Print the current options */ 24607 24608static void 24609rs6000_function_specific_print (FILE *file, int indent, 24610 struct cl_target_option *ptr) 24611{ 24612 rs6000_print_isa_options (file, indent, "Isa options set", 24613 ptr->x_rs6000_isa_flags); 24614 24615 rs6000_print_isa_options (file, indent, "Isa options explicit", 24616 ptr->x_rs6000_isa_flags_explicit); 24617} 24618 24619/* Helper function to print the current isa or misc options on a line. */ 24620 24621static void 24622rs6000_print_options_internal (FILE *file, 24623 int indent, 24624 const char *string, 24625 HOST_WIDE_INT flags, 24626 const char *prefix, 24627 const struct rs6000_opt_mask *opts, 24628 size_t num_elements) 24629{ 24630 size_t i; 24631 size_t start_column = 0; 24632 size_t cur_column; 24633 size_t max_column = 120; 24634 size_t prefix_len = strlen (prefix); 24635 size_t comma_len = 0; 24636 const char *comma = ""; 24637 24638 if (indent) 24639 start_column += fprintf (file, "%*s", indent, ""); 24640 24641 if (!flags) 24642 { 24643 fprintf (stderr, DEBUG_FMT_S, string, "<none>"); 24644 return; 24645 } 24646 24647 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags); 24648 24649 /* Print the various mask options. */ 24650 cur_column = start_column; 24651 for (i = 0; i < num_elements; i++) 24652 { 24653 bool invert = opts[i].invert; 24654 const char *name = opts[i].name; 24655 const char *no_str = ""; 24656 HOST_WIDE_INT mask = opts[i].mask; 24657 size_t len = comma_len + prefix_len + strlen (name); 24658 24659 if (!invert) 24660 { 24661 if ((flags & mask) == 0) 24662 { 24663 no_str = "no-"; 24664 len += strlen ("no-"); 24665 } 24666 24667 flags &= ~mask; 24668 } 24669 24670 else 24671 { 24672 if ((flags & mask) != 0) 24673 { 24674 no_str = "no-"; 24675 len += strlen ("no-"); 24676 } 24677 24678 flags |= mask; 24679 } 24680 24681 cur_column += len; 24682 if (cur_column > max_column) 24683 { 24684 fprintf (stderr, ", \\\n%*s", (int)start_column, ""); 24685 cur_column = start_column + len; 24686 comma = ""; 24687 } 24688 24689 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name); 24690 comma = ", "; 24691 comma_len = strlen (", "); 24692 } 24693 24694 fputs ("\n", file); 24695} 24696 24697/* Helper function to print the current isa options on a line. */ 24698 24699static void 24700rs6000_print_isa_options (FILE *file, int indent, const char *string, 24701 HOST_WIDE_INT flags) 24702{ 24703 rs6000_print_options_internal (file, indent, string, flags, "-m", 24704 &rs6000_opt_masks[0], 24705 ARRAY_SIZE (rs6000_opt_masks)); 24706} 24707 24708static void 24709rs6000_print_builtin_options (FILE *file, int indent, const char *string, 24710 HOST_WIDE_INT flags) 24711{ 24712 rs6000_print_options_internal (file, indent, string, flags, "", 24713 &rs6000_builtin_mask_names[0], 24714 ARRAY_SIZE (rs6000_builtin_mask_names)); 24715} 24716 24717/* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06, 24718 2.07, and 3.0 options that relate to the vector unit (-mdirect-move, 24719 -mupper-regs-df, etc.). 24720 24721 If the user used -mno-power8-vector, we need to turn off all of the implicit 24722 ISA 2.07 and 3.0 options that relate to the vector unit. 24723 24724 If the user used -mno-power9-vector, we need to turn off all of the implicit 24725 ISA 3.0 options that relate to the vector unit. 24726 24727 This function does not handle explicit options such as the user specifying 24728 -mdirect-move. These are handled in rs6000_option_override_internal, and 24729 the appropriate error is given if needed. 24730 24731 We return a mask of all of the implicit options that should not be enabled 24732 by default. */ 24733 24734static HOST_WIDE_INT 24735rs6000_disable_incompatible_switches (void) 24736{ 24737 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit; 24738 size_t i, j; 24739 24740 static const struct { 24741 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */ 24742 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */ 24743 const char *const name; /* name of the switch. */ 24744 } flags[] = { 24745 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" }, 24746 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" }, 24747 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" }, 24748 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" }, 24749 }; 24750 24751 for (i = 0; i < ARRAY_SIZE (flags); i++) 24752 { 24753 HOST_WIDE_INT no_flag = flags[i].no_flag; 24754 24755 if ((rs6000_isa_flags & no_flag) == 0 24756 && (rs6000_isa_flags_explicit & no_flag) != 0) 24757 { 24758 HOST_WIDE_INT dep_flags = flags[i].dep_flags; 24759 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit 24760 & rs6000_isa_flags 24761 & dep_flags); 24762 24763 if (set_flags) 24764 { 24765 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++) 24766 if ((set_flags & rs6000_opt_masks[j].mask) != 0) 24767 { 24768 set_flags &= ~rs6000_opt_masks[j].mask; 24769 error ("%<-mno-%s%> turns off %<-m%s%>", 24770 flags[i].name, 24771 rs6000_opt_masks[j].name); 24772 } 24773 24774 gcc_assert (!set_flags); 24775 } 24776 24777 rs6000_isa_flags &= ~dep_flags; 24778 ignore_masks |= no_flag | dep_flags; 24779 } 24780 } 24781 24782 return ignore_masks; 24783} 24784 24785 24786/* Helper function for printing the function name when debugging. */ 24787 24788static const char * 24789get_decl_name (tree fn) 24790{ 24791 tree name; 24792 24793 if (!fn) 24794 return "<null>"; 24795 24796 name = DECL_NAME (fn); 24797 if (!name) 24798 return "<no-name>"; 24799 24800 return IDENTIFIER_POINTER (name); 24801} 24802 24803/* Return the clone id of the target we are compiling code for in a target 24804 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives 24805 the priority list for the target clones (ordered from lowest to 24806 highest). */ 24807 24808static int 24809rs6000_clone_priority (tree fndecl) 24810{ 24811 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); 24812 HOST_WIDE_INT isa_masks; 24813 int ret = CLONE_DEFAULT; 24814 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl)); 24815 const char *attrs_str = NULL; 24816 24817 attrs = TREE_VALUE (TREE_VALUE (attrs)); 24818 attrs_str = TREE_STRING_POINTER (attrs); 24819 24820 /* Return priority zero for default function. Return the ISA needed for the 24821 function if it is not the default. */ 24822 if (strcmp (attrs_str, "default") != 0) 24823 { 24824 if (fn_opts == NULL_TREE) 24825 fn_opts = target_option_default_node; 24826 24827 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts)) 24828 isa_masks = rs6000_isa_flags; 24829 else 24830 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags; 24831 24832 for (ret = CLONE_MAX - 1; ret != 0; ret--) 24833 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0) 24834 break; 24835 } 24836 24837 if (TARGET_DEBUG_TARGET) 24838 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n", 24839 get_decl_name (fndecl), ret); 24840 24841 return ret; 24842} 24843 24844/* This compares the priority of target features in function DECL1 and DECL2. 24845 It returns positive value if DECL1 is higher priority, negative value if 24846 DECL2 is higher priority and 0 if they are the same. Note, priorities are 24847 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */ 24848 24849static int 24850rs6000_compare_version_priority (tree decl1, tree decl2) 24851{ 24852 int priority1 = rs6000_clone_priority (decl1); 24853 int priority2 = rs6000_clone_priority (decl2); 24854 int ret = priority1 - priority2; 24855 24856 if (TARGET_DEBUG_TARGET) 24857 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n", 24858 get_decl_name (decl1), get_decl_name (decl2), ret); 24859 24860 return ret; 24861} 24862 24863/* Make a dispatcher declaration for the multi-versioned function DECL. 24864 Calls to DECL function will be replaced with calls to the dispatcher 24865 by the front-end. Returns the decl of the dispatcher function. */ 24866 24867static tree 24868rs6000_get_function_versions_dispatcher (void *decl) 24869{ 24870 tree fn = (tree) decl; 24871 struct cgraph_node *node = NULL; 24872 struct cgraph_node *default_node = NULL; 24873 struct cgraph_function_version_info *node_v = NULL; 24874 struct cgraph_function_version_info *first_v = NULL; 24875 24876 tree dispatch_decl = NULL; 24877 24878 struct cgraph_function_version_info *default_version_info = NULL; 24879 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); 24880 24881 if (TARGET_DEBUG_TARGET) 24882 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n", 24883 get_decl_name (fn)); 24884 24885 node = cgraph_node::get (fn); 24886 gcc_assert (node != NULL); 24887 24888 node_v = node->function_version (); 24889 gcc_assert (node_v != NULL); 24890 24891 if (node_v->dispatcher_resolver != NULL) 24892 return node_v->dispatcher_resolver; 24893 24894 /* Find the default version and make it the first node. */ 24895 first_v = node_v; 24896 /* Go to the beginning of the chain. */ 24897 while (first_v->prev != NULL) 24898 first_v = first_v->prev; 24899 24900 default_version_info = first_v; 24901 while (default_version_info != NULL) 24902 { 24903 const tree decl2 = default_version_info->this_node->decl; 24904 if (is_function_default_version (decl2)) 24905 break; 24906 default_version_info = default_version_info->next; 24907 } 24908 24909 /* If there is no default node, just return NULL. */ 24910 if (default_version_info == NULL) 24911 return NULL; 24912 24913 /* Make default info the first node. */ 24914 if (first_v != default_version_info) 24915 { 24916 default_version_info->prev->next = default_version_info->next; 24917 if (default_version_info->next) 24918 default_version_info->next->prev = default_version_info->prev; 24919 first_v->prev = default_version_info; 24920 default_version_info->next = first_v; 24921 default_version_info->prev = NULL; 24922 } 24923 24924 default_node = default_version_info->this_node; 24925 24926#ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB 24927 error_at (DECL_SOURCE_LOCATION (default_node->decl), 24928 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that " 24929 "exports hardware capability bits"); 24930#else 24931 24932 if (targetm.has_ifunc_p ()) 24933 { 24934 struct cgraph_function_version_info *it_v = NULL; 24935 struct cgraph_node *dispatcher_node = NULL; 24936 struct cgraph_function_version_info *dispatcher_version_info = NULL; 24937 24938 /* Right now, the dispatching is done via ifunc. */ 24939 dispatch_decl = make_dispatcher_decl (default_node->decl); 24940 24941 dispatcher_node = cgraph_node::get_create (dispatch_decl); 24942 gcc_assert (dispatcher_node != NULL); 24943 dispatcher_node->dispatcher_function = 1; 24944 dispatcher_version_info 24945 = dispatcher_node->insert_new_function_version (); 24946 dispatcher_version_info->next = default_version_info; 24947 dispatcher_node->definition = 1; 24948 24949 /* Set the dispatcher for all the versions. */ 24950 it_v = default_version_info; 24951 while (it_v != NULL) 24952 { 24953 it_v->dispatcher_resolver = dispatch_decl; 24954 it_v = it_v->next; 24955 } 24956 } 24957 else 24958 { 24959 error_at (DECL_SOURCE_LOCATION (default_node->decl), 24960 "multiversioning needs %<ifunc%> which is not supported " 24961 "on this target"); 24962 } 24963#endif 24964 24965 return dispatch_decl; 24966} 24967 24968/* Make the resolver function decl to dispatch the versions of a multi- 24969 versioned function, DEFAULT_DECL. Create an empty basic block in the 24970 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver 24971 function. */ 24972 24973static tree 24974make_resolver_func (const tree default_decl, 24975 const tree dispatch_decl, 24976 basic_block *empty_bb) 24977{ 24978 /* Make the resolver function static. The resolver function returns 24979 void *. */ 24980 tree decl_name = clone_function_name (default_decl, "resolver"); 24981 const char *resolver_name = IDENTIFIER_POINTER (decl_name); 24982 tree type = build_function_type_list (ptr_type_node, NULL_TREE); 24983 tree decl = build_fn_decl (resolver_name, type); 24984 SET_DECL_ASSEMBLER_NAME (decl, decl_name); 24985 24986 DECL_NAME (decl) = decl_name; 24987 TREE_USED (decl) = 1; 24988 DECL_ARTIFICIAL (decl) = 1; 24989 DECL_IGNORED_P (decl) = 0; 24990 TREE_PUBLIC (decl) = 0; 24991 DECL_UNINLINABLE (decl) = 1; 24992 24993 /* Resolver is not external, body is generated. */ 24994 DECL_EXTERNAL (decl) = 0; 24995 DECL_EXTERNAL (dispatch_decl) = 0; 24996 24997 DECL_CONTEXT (decl) = NULL_TREE; 24998 DECL_INITIAL (decl) = make_node (BLOCK); 24999 DECL_STATIC_CONSTRUCTOR (decl) = 0; 25000 25001 if (DECL_COMDAT_GROUP (default_decl) 25002 || TREE_PUBLIC (default_decl)) 25003 { 25004 /* In this case, each translation unit with a call to this 25005 versioned function will put out a resolver. Ensure it 25006 is comdat to keep just one copy. */ 25007 DECL_COMDAT (decl) = 1; 25008 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 25009 } 25010 else 25011 TREE_PUBLIC (dispatch_decl) = 0; 25012 25013 /* Build result decl and add to function_decl. */ 25014 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); 25015 DECL_CONTEXT (t) = decl; 25016 DECL_ARTIFICIAL (t) = 1; 25017 DECL_IGNORED_P (t) = 1; 25018 DECL_RESULT (decl) = t; 25019 25020 gimplify_function_tree (decl); 25021 push_cfun (DECL_STRUCT_FUNCTION (decl)); 25022 *empty_bb = init_lowered_empty_function (decl, false, 25023 profile_count::uninitialized ()); 25024 25025 cgraph_node::add_new_function (decl, true); 25026 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); 25027 25028 pop_cfun (); 25029 25030 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ 25031 DECL_ATTRIBUTES (dispatch_decl) 25032 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); 25033 25034 cgraph_node::create_same_body_alias (dispatch_decl, decl); 25035 25036 return decl; 25037} 25038 25039/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to 25040 return a pointer to VERSION_DECL if we are running on a machine that 25041 supports the index CLONE_ISA hardware architecture bits. This function will 25042 be called during version dispatch to decide which function version to 25043 execute. It returns the basic block at the end, to which more conditions 25044 can be added. */ 25045 25046static basic_block 25047add_condition_to_bb (tree function_decl, tree version_decl, 25048 int clone_isa, basic_block new_bb) 25049{ 25050 push_cfun (DECL_STRUCT_FUNCTION (function_decl)); 25051 25052 gcc_assert (new_bb != NULL); 25053 gimple_seq gseq = bb_seq (new_bb); 25054 25055 25056 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node, 25057 build_fold_addr_expr (version_decl)); 25058 tree result_var = create_tmp_var (ptr_type_node); 25059 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr); 25060 gimple *return_stmt = gimple_build_return (result_var); 25061 25062 if (clone_isa == CLONE_DEFAULT) 25063 { 25064 gimple_seq_add_stmt (&gseq, convert_stmt); 25065 gimple_seq_add_stmt (&gseq, return_stmt); 25066 set_bb_seq (new_bb, gseq); 25067 gimple_set_bb (convert_stmt, new_bb); 25068 gimple_set_bb (return_stmt, new_bb); 25069 pop_cfun (); 25070 return new_bb; 25071 } 25072 25073 tree bool_zero = build_int_cst (bool_int_type_node, 0); 25074 tree cond_var = create_tmp_var (bool_int_type_node); 25075 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS]; 25076 const char *arg_str = rs6000_clone_map[clone_isa].name; 25077 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); 25078 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); 25079 gimple_call_set_lhs (call_cond_stmt, cond_var); 25080 25081 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); 25082 gimple_set_bb (call_cond_stmt, new_bb); 25083 gimple_seq_add_stmt (&gseq, call_cond_stmt); 25084 25085 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero, 25086 NULL_TREE, NULL_TREE); 25087 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); 25088 gimple_set_bb (if_else_stmt, new_bb); 25089 gimple_seq_add_stmt (&gseq, if_else_stmt); 25090 25091 gimple_seq_add_stmt (&gseq, convert_stmt); 25092 gimple_seq_add_stmt (&gseq, return_stmt); 25093 set_bb_seq (new_bb, gseq); 25094 25095 basic_block bb1 = new_bb; 25096 edge e12 = split_block (bb1, if_else_stmt); 25097 basic_block bb2 = e12->dest; 25098 e12->flags &= ~EDGE_FALLTHRU; 25099 e12->flags |= EDGE_TRUE_VALUE; 25100 25101 edge e23 = split_block (bb2, return_stmt); 25102 gimple_set_bb (convert_stmt, bb2); 25103 gimple_set_bb (return_stmt, bb2); 25104 25105 basic_block bb3 = e23->dest; 25106 make_edge (bb1, bb3, EDGE_FALSE_VALUE); 25107 25108 remove_edge (e23); 25109 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); 25110 25111 pop_cfun (); 25112 return bb3; 25113} 25114 25115/* This function generates the dispatch function for multi-versioned functions. 25116 DISPATCH_DECL is the function which will contain the dispatch logic. 25117 FNDECLS are the function choices for dispatch, and is a tree chain. 25118 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch 25119 code is generated. */ 25120 25121static int 25122dispatch_function_versions (tree dispatch_decl, 25123 void *fndecls_p, 25124 basic_block *empty_bb) 25125{ 25126 int ix; 25127 tree ele; 25128 vec<tree> *fndecls; 25129 tree clones[CLONE_MAX]; 25130 25131 if (TARGET_DEBUG_TARGET) 25132 fputs ("dispatch_function_versions, top\n", stderr); 25133 25134 gcc_assert (dispatch_decl != NULL 25135 && fndecls_p != NULL 25136 && empty_bb != NULL); 25137 25138 /* fndecls_p is actually a vector. */ 25139 fndecls = static_cast<vec<tree> *> (fndecls_p); 25140 25141 /* At least one more version other than the default. */ 25142 gcc_assert (fndecls->length () >= 2); 25143 25144 /* The first version in the vector is the default decl. */ 25145 memset ((void *) clones, '\0', sizeof (clones)); 25146 clones[CLONE_DEFAULT] = (*fndecls)[0]; 25147 25148 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP 25149 on the PowerPC (on the x86_64, it is not a NOP). The builtin function 25150 __builtin_cpu_support ensures that the TOC fields are setup by requiring a 25151 recent glibc. If we ever need to call __builtin_cpu_init, we would need 25152 to insert the code here to do the call. */ 25153 25154 for (ix = 1; fndecls->iterate (ix, &ele); ++ix) 25155 { 25156 int priority = rs6000_clone_priority (ele); 25157 if (!clones[priority]) 25158 clones[priority] = ele; 25159 } 25160 25161 for (ix = CLONE_MAX - 1; ix >= 0; ix--) 25162 if (clones[ix]) 25163 { 25164 if (TARGET_DEBUG_TARGET) 25165 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n", 25166 ix, get_decl_name (clones[ix])); 25167 25168 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix, 25169 *empty_bb); 25170 } 25171 25172 return 0; 25173} 25174 25175/* Generate the dispatching code body to dispatch multi-versioned function 25176 DECL. The target hook is called to process the "target" attributes and 25177 provide the code to dispatch the right function at run-time. NODE points 25178 to the dispatcher decl whose body will be created. */ 25179 25180static tree 25181rs6000_generate_version_dispatcher_body (void *node_p) 25182{ 25183 tree resolver; 25184 basic_block empty_bb; 25185 struct cgraph_node *node = (cgraph_node *) node_p; 25186 struct cgraph_function_version_info *ninfo = node->function_version (); 25187 25188 if (ninfo->dispatcher_resolver) 25189 return ninfo->dispatcher_resolver; 25190 25191 /* node is going to be an alias, so remove the finalized bit. */ 25192 node->definition = false; 25193 25194 /* The first version in the chain corresponds to the default version. */ 25195 ninfo->dispatcher_resolver = resolver 25196 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb); 25197 25198 if (TARGET_DEBUG_TARGET) 25199 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n", 25200 get_decl_name (resolver)); 25201 25202 push_cfun (DECL_STRUCT_FUNCTION (resolver)); 25203 auto_vec<tree, 2> fn_ver_vec; 25204 25205 for (struct cgraph_function_version_info *vinfo = ninfo->next; 25206 vinfo; 25207 vinfo = vinfo->next) 25208 { 25209 struct cgraph_node *version = vinfo->this_node; 25210 /* Check for virtual functions here again, as by this time it should 25211 have been determined if this function needs a vtable index or 25212 not. This happens for methods in derived classes that override 25213 virtual methods in base classes but are not explicitly marked as 25214 virtual. */ 25215 if (DECL_VINDEX (version->decl)) 25216 sorry ("Virtual function multiversioning not supported"); 25217 25218 fn_ver_vec.safe_push (version->decl); 25219 } 25220 25221 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb); 25222 cgraph_edge::rebuild_edges (); 25223 pop_cfun (); 25224 return resolver; 25225} 25226 25227/* Hook to decide if we need to scan function gimple statements to 25228 collect target specific information for inlining, and update the 25229 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able 25230 to predict which ISA feature is used at this time. Return true 25231 if we need to scan, otherwise return false. */ 25232 25233static bool 25234rs6000_need_ipa_fn_target_info (const_tree decl, 25235 unsigned int &info ATTRIBUTE_UNUSED) 25236{ 25237 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl); 25238 if (!target) 25239 target = target_option_default_node; 25240 struct cl_target_option *opts = TREE_TARGET_OPTION (target); 25241 25242 /* See PR102059, we only handle HTM for now, so will only do 25243 the consequent scannings when HTM feature enabled. */ 25244 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM) 25245 return true; 25246 25247 return false; 25248} 25249 25250/* Hook to update target specific information INFO for inlining by 25251 checking the given STMT. Return false if we don't need to scan 25252 any more, otherwise return true. */ 25253 25254static bool 25255rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) 25256{ 25257 /* Assume inline asm can use any instruction features. */ 25258 if (gimple_code (stmt) == GIMPLE_ASM) 25259 { 25260 /* Should set any bits we concerned, for now OPTION_MASK_HTM is 25261 the only bit we care about. */ 25262 info |= RS6000_FN_TARGET_INFO_HTM; 25263 return false; 25264 } 25265 else if (gimple_code (stmt) == GIMPLE_CALL) 25266 { 25267 tree fndecl = gimple_call_fndecl (stmt); 25268 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)) 25269 { 25270 enum rs6000_gen_builtins fcode 25271 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); 25272 /* HTM bifs definitely exploit HTM insns. */ 25273 if (bif_is_htm (rs6000_builtin_info[fcode])) 25274 { 25275 info |= RS6000_FN_TARGET_INFO_HTM; 25276 return false; 25277 } 25278 } 25279 } 25280 25281 return true; 25282} 25283 25284/* Hook to determine if one function can safely inline another. */ 25285 25286static bool 25287rs6000_can_inline_p (tree caller, tree callee) 25288{ 25289 bool ret = false; 25290 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 25291 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 25292 25293 /* If the callee has no option attributes, then it is ok to inline. */ 25294 if (!callee_tree) 25295 ret = true; 25296 25297 else 25298 { 25299 HOST_WIDE_INT caller_isa; 25300 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 25301 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags; 25302 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit; 25303 25304 /* If the caller has option attributes, then use them. 25305 Otherwise, use the command line options. */ 25306 if (caller_tree) 25307 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags; 25308 else 25309 caller_isa = rs6000_isa_flags; 25310 25311 cgraph_node *callee_node = cgraph_node::get (callee); 25312 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL) 25313 { 25314 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info; 25315 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0) 25316 { 25317 callee_isa &= ~OPTION_MASK_HTM; 25318 explicit_isa &= ~OPTION_MASK_HTM; 25319 } 25320 } 25321 25322 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining 25323 purposes. */ 25324 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); 25325 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION); 25326 25327 /* The callee's options must be a subset of the caller's options, i.e. 25328 a vsx function may inline an altivec function, but a no-vsx function 25329 must not inline a vsx function. However, for those options that the 25330 callee has explicitly enabled or disabled, then we must enforce that 25331 the callee's and caller's options match exactly; see PR70010. */ 25332 if (((caller_isa & callee_isa) == callee_isa) 25333 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa)) 25334 ret = true; 25335 } 25336 25337 if (TARGET_DEBUG_TARGET) 25338 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n", 25339 get_decl_name (caller), get_decl_name (callee), 25340 (ret ? "can" : "cannot")); 25341 25342 return ret; 25343} 25344 25345/* Allocate a stack temp and fixup the address so it meets the particular 25346 memory requirements (either offetable or REG+REG addressing). */ 25347 25348rtx 25349rs6000_allocate_stack_temp (machine_mode mode, 25350 bool offsettable_p, 25351 bool reg_reg_p) 25352{ 25353 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 25354 rtx addr = XEXP (stack, 0); 25355 int strict_p = reload_completed; 25356 25357 if (!legitimate_indirect_address_p (addr, strict_p)) 25358 { 25359 if (offsettable_p 25360 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true)) 25361 stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); 25362 25363 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) 25364 stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); 25365 } 25366 25367 return stack; 25368} 25369 25370/* Given a memory reference, if it is not a reg or reg+reg addressing, 25371 convert to such a form to deal with memory reference instructions 25372 like STFIWX and LDBRX that only take reg+reg addressing. */ 25373 25374rtx 25375rs6000_force_indexed_or_indirect_mem (rtx x) 25376{ 25377 machine_mode mode = GET_MODE (x); 25378 25379 gcc_assert (MEM_P (x)); 25380 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode)) 25381 { 25382 rtx addr = XEXP (x, 0); 25383 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 25384 { 25385 rtx reg = XEXP (addr, 0); 25386 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); 25387 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); 25388 gcc_assert (REG_P (reg)); 25389 emit_insn (gen_add3_insn (reg, reg, size_rtx)); 25390 addr = reg; 25391 } 25392 else if (GET_CODE (addr) == PRE_MODIFY) 25393 { 25394 rtx reg = XEXP (addr, 0); 25395 rtx expr = XEXP (addr, 1); 25396 gcc_assert (REG_P (reg)); 25397 gcc_assert (GET_CODE (expr) == PLUS); 25398 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); 25399 addr = reg; 25400 } 25401 25402 if (GET_CODE (addr) == PLUS) 25403 { 25404 rtx op0 = XEXP (addr, 0); 25405 rtx op1 = XEXP (addr, 1); 25406 op0 = force_reg (Pmode, op0); 25407 op1 = force_reg (Pmode, op1); 25408 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1)); 25409 } 25410 else 25411 x = replace_equiv_address (x, force_reg (Pmode, addr)); 25412 } 25413 25414 return x; 25415} 25416 25417/* Implement TARGET_LEGITIMATE_CONSTANT_P. 25418 25419 On the RS/6000, all integer constants are acceptable, most won't be valid 25420 for particular insns, though. Only easy FP constants are acceptable. */ 25421 25422static bool 25423rs6000_legitimate_constant_p (machine_mode mode, rtx x) 25424{ 25425 if (TARGET_ELF && tls_referenced_p (x)) 25426 return false; 25427 25428 if (CONST_DOUBLE_P (x)) 25429 return easy_fp_constant (x, mode); 25430 25431 if (GET_CODE (x) == CONST_VECTOR) 25432 return easy_vector_constant (x, mode); 25433 25434 return true; 25435} 25436 25437#if TARGET_AIX_OS 25438/* Implement TARGET_PRECOMPUTE_TLS_P. 25439 25440 On the AIX, TLS symbols are in the TOC, which is maintained in the 25441 constant pool. AIX TOC TLS symbols need to be pre-computed, but 25442 must be considered legitimate constants. */ 25443 25444static bool 25445rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 25446{ 25447 return tls_referenced_p (x); 25448} 25449#endif 25450 25451 25452/* Return TRUE iff the sequence ending in LAST sets the static chain. */ 25453 25454static bool 25455chain_already_loaded (rtx_insn *last) 25456{ 25457 for (; last != NULL; last = PREV_INSN (last)) 25458 { 25459 if (NONJUMP_INSN_P (last)) 25460 { 25461 rtx patt = PATTERN (last); 25462 25463 if (GET_CODE (patt) == SET) 25464 { 25465 rtx lhs = XEXP (patt, 0); 25466 25467 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM) 25468 return true; 25469 } 25470 } 25471 } 25472 return false; 25473} 25474 25475/* Expand code to perform a call under the AIX or ELFv2 ABI. */ 25476 25477void 25478rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 25479{ 25480 rtx func = func_desc; 25481 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM); 25482 rtx toc_load = NULL_RTX; 25483 rtx toc_restore = NULL_RTX; 25484 rtx func_addr; 25485 rtx abi_reg = NULL_RTX; 25486 rtx call[5]; 25487 int n_call; 25488 rtx insn; 25489 bool is_pltseq_longcall; 25490 25491 if (global_tlsarg) 25492 tlsarg = global_tlsarg; 25493 25494 /* Handle longcall attributes. */ 25495 is_pltseq_longcall = false; 25496 if ((INTVAL (cookie) & CALL_LONG) != 0 25497 && GET_CODE (func_desc) == SYMBOL_REF) 25498 { 25499 func = rs6000_longcall_ref (func_desc, tlsarg); 25500 if (TARGET_PLTSEQ) 25501 is_pltseq_longcall = true; 25502 } 25503 25504 /* Handle indirect calls. */ 25505 if (!SYMBOL_REF_P (func) 25506 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func))) 25507 { 25508 if (!rs6000_pcrel_p ()) 25509 { 25510 /* Save the TOC into its reserved slot before the call, 25511 and prepare to restore it after the call. */ 25512 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT); 25513 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode, 25514 gen_rtvec (1, stack_toc_offset), 25515 UNSPEC_TOCSLOT); 25516 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec); 25517 25518 /* Can we optimize saving the TOC in the prologue or 25519 do we need to do it at every call? */ 25520 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) 25521 cfun->machine->save_toc_in_prologue = true; 25522 else 25523 { 25524 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 25525 rtx stack_toc_mem = gen_frame_mem (Pmode, 25526 gen_rtx_PLUS (Pmode, stack_ptr, 25527 stack_toc_offset)); 25528 MEM_VOLATILE_P (stack_toc_mem) = 1; 25529 if (is_pltseq_longcall) 25530 { 25531 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg); 25532 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25533 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg)); 25534 } 25535 else 25536 emit_move_insn (stack_toc_mem, toc_reg); 25537 } 25538 } 25539 25540 if (DEFAULT_ABI == ABI_ELFv2) 25541 { 25542 /* A function pointer in the ELFv2 ABI is just a plain address, but 25543 the ABI requires it to be loaded into r12 before the call. */ 25544 func_addr = gen_rtx_REG (Pmode, 12); 25545 emit_move_insn (func_addr, func); 25546 abi_reg = func_addr; 25547 /* Indirect calls via CTR are strongly preferred over indirect 25548 calls via LR, so move the address there. Needed to mark 25549 this insn for linker plt sequence editing too. */ 25550 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25551 if (is_pltseq_longcall) 25552 { 25553 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg); 25554 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25555 emit_insn (gen_rtx_SET (func_addr, mark_func)); 25556 v = gen_rtvec (2, func_addr, func_desc); 25557 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25558 } 25559 else 25560 emit_move_insn (func_addr, abi_reg); 25561 } 25562 else 25563 { 25564 /* A function pointer under AIX is a pointer to a data area whose 25565 first word contains the actual address of the function, whose 25566 second word contains a pointer to its TOC, and whose third word 25567 contains a value to place in the static chain register (r11). 25568 Note that if we load the static chain, our "trampoline" need 25569 not have any executable code. */ 25570 25571 /* Load up address of the actual function. */ 25572 func = force_reg (Pmode, func); 25573 func_addr = gen_reg_rtx (Pmode); 25574 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func)); 25575 25576 /* Indirect calls via CTR are strongly preferred over indirect 25577 calls via LR, so move the address there. */ 25578 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO); 25579 emit_move_insn (ctr_reg, func_addr); 25580 func_addr = ctr_reg; 25581 25582 /* Prepare to load the TOC of the called function. Note that the 25583 TOC load must happen immediately before the actual call so 25584 that unwinding the TOC registers works correctly. See the 25585 comment in frob_update_context. */ 25586 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode)); 25587 rtx func_toc_mem = gen_rtx_MEM (Pmode, 25588 gen_rtx_PLUS (Pmode, func, 25589 func_toc_offset)); 25590 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem); 25591 25592 /* If we have a static chain, load it up. But, if the call was 25593 originally direct, the 3rd word has not been written since no 25594 trampoline has been built, so we ought not to load it, lest we 25595 override a static chain value. */ 25596 if (!(GET_CODE (func_desc) == SYMBOL_REF 25597 && SYMBOL_REF_FUNCTION_P (func_desc)) 25598 && TARGET_POINTERS_TO_NESTED_FUNCTIONS 25599 && !chain_already_loaded (get_current_sequence ()->next->last)) 25600 { 25601 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); 25602 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode)); 25603 rtx func_sc_mem = gen_rtx_MEM (Pmode, 25604 gen_rtx_PLUS (Pmode, func, 25605 func_sc_offset)); 25606 emit_move_insn (sc_reg, func_sc_mem); 25607 abi_reg = sc_reg; 25608 } 25609 } 25610 } 25611 else 25612 { 25613 /* No TOC register needed for calls from PC-relative callers. */ 25614 if (!rs6000_pcrel_p ()) 25615 /* Direct calls use the TOC: for local calls, the callee will 25616 assume the TOC register is set; for non-local calls, the 25617 PLT stub needs the TOC register. */ 25618 abi_reg = toc_reg; 25619 func_addr = func; 25620 } 25621 25622 /* Create the call. */ 25623 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25624 if (value != NULL_RTX) 25625 call[0] = gen_rtx_SET (value, call[0]); 25626 call[1] = gen_rtx_USE (VOIDmode, cookie); 25627 n_call = 2; 25628 25629 if (toc_load) 25630 call[n_call++] = toc_load; 25631 if (toc_restore) 25632 call[n_call++] = toc_restore; 25633 25634 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO); 25635 25636 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call)); 25637 insn = emit_call_insn (insn); 25638 25639 /* Mention all registers defined by the ABI to hold information 25640 as uses in CALL_INSN_FUNCTION_USAGE. */ 25641 if (abi_reg) 25642 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 25643} 25644 25645/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */ 25646 25647void 25648rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 25649{ 25650 rtx call[2]; 25651 rtx insn; 25652 rtx r12 = NULL_RTX; 25653 rtx func_addr = func_desc; 25654 25655 if (global_tlsarg) 25656 tlsarg = global_tlsarg; 25657 25658 /* Handle longcall attributes. */ 25659 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc)) 25660 { 25661 /* PCREL can do a sibling call to a longcall function 25662 because we don't need to restore the TOC register. */ 25663 gcc_assert (rs6000_pcrel_p ()); 25664 func_desc = rs6000_longcall_ref (func_desc, tlsarg); 25665 } 25666 else 25667 gcc_assert (INTVAL (cookie) == 0); 25668 25669 /* For ELFv2, r12 and CTR need to hold the function address 25670 for an indirect call. */ 25671 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2) 25672 { 25673 r12 = gen_rtx_REG (Pmode, 12); 25674 emit_move_insn (r12, func_desc); 25675 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25676 emit_move_insn (func_addr, r12); 25677 } 25678 25679 /* Create the call. */ 25680 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25681 if (value != NULL_RTX) 25682 call[0] = gen_rtx_SET (value, call[0]); 25683 25684 call[1] = simple_return_rtx; 25685 25686 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call)); 25687 insn = emit_call_insn (insn); 25688 25689 /* Note use of the TOC register. */ 25690 if (!rs6000_pcrel_p ()) 25691 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 25692 gen_rtx_REG (Pmode, TOC_REGNUM)); 25693 25694 /* Note use of r12. */ 25695 if (r12) 25696 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12); 25697} 25698 25699/* Expand code to perform a call under the SYSV4 ABI. */ 25700 25701void 25702rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 25703{ 25704 rtx func = func_desc; 25705 rtx func_addr; 25706 rtx call[4]; 25707 rtx insn; 25708 rtx abi_reg = NULL_RTX; 25709 int n; 25710 25711 if (global_tlsarg) 25712 tlsarg = global_tlsarg; 25713 25714 /* Handle longcall attributes. */ 25715 if ((INTVAL (cookie) & CALL_LONG) != 0 25716 && GET_CODE (func_desc) == SYMBOL_REF) 25717 { 25718 func = rs6000_longcall_ref (func_desc, tlsarg); 25719 /* If the longcall was implemented as an inline PLT call using 25720 PLT unspecs then func will be REG:r11. If not, func will be 25721 a pseudo reg. The inline PLT call sequence supports lazy 25722 linking (and longcalls to functions in dlopen'd libraries). 25723 The other style of longcalls don't. The lazy linking entry 25724 to the dynamic symbol resolver requires r11 be the function 25725 address (as it is for linker generated PLT stubs). Ensure 25726 r11 stays valid to the bctrl by marking r11 used by the call. */ 25727 if (TARGET_PLTSEQ) 25728 abi_reg = func; 25729 } 25730 25731 /* Handle indirect calls. */ 25732 if (GET_CODE (func) != SYMBOL_REF) 25733 { 25734 func = force_reg (Pmode, func); 25735 25736 /* Indirect calls via CTR are strongly preferred over indirect 25737 calls via LR, so move the address there. That can't be left 25738 to reload because we want to mark every instruction in an 25739 inline PLT call sequence with a reloc, enabling the linker to 25740 edit the sequence back to a direct call when that makes sense. */ 25741 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25742 if (abi_reg) 25743 { 25744 rtvec v = gen_rtvec (3, func, func_desc, tlsarg); 25745 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25746 emit_insn (gen_rtx_SET (func_addr, mark_func)); 25747 v = gen_rtvec (2, func_addr, func_desc); 25748 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25749 } 25750 else 25751 emit_move_insn (func_addr, func); 25752 } 25753 else 25754 func_addr = func; 25755 25756 /* Create the call. */ 25757 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25758 if (value != NULL_RTX) 25759 call[0] = gen_rtx_SET (value, call[0]); 25760 25761 call[1] = gen_rtx_USE (VOIDmode, cookie); 25762 n = 2; 25763 if (TARGET_SECURE_PLT 25764 && flag_pic 25765 && GET_CODE (func_addr) == SYMBOL_REF 25766 && !SYMBOL_REF_LOCAL_P (func_addr)) 25767 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx); 25768 25769 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO); 25770 25771 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call)); 25772 insn = emit_call_insn (insn); 25773 if (abi_reg) 25774 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 25775} 25776 25777/* Expand code to perform a sibling call under the SysV4 ABI. */ 25778 25779void 25780rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 25781{ 25782 rtx func = func_desc; 25783 rtx func_addr; 25784 rtx call[3]; 25785 rtx insn; 25786 rtx abi_reg = NULL_RTX; 25787 25788 if (global_tlsarg) 25789 tlsarg = global_tlsarg; 25790 25791 /* Handle longcall attributes. */ 25792 if ((INTVAL (cookie) & CALL_LONG) != 0 25793 && GET_CODE (func_desc) == SYMBOL_REF) 25794 { 25795 func = rs6000_longcall_ref (func_desc, tlsarg); 25796 /* If the longcall was implemented as an inline PLT call using 25797 PLT unspecs then func will be REG:r11. If not, func will be 25798 a pseudo reg. The inline PLT call sequence supports lazy 25799 linking (and longcalls to functions in dlopen'd libraries). 25800 The other style of longcalls don't. The lazy linking entry 25801 to the dynamic symbol resolver requires r11 be the function 25802 address (as it is for linker generated PLT stubs). Ensure 25803 r11 stays valid to the bctr by marking r11 used by the call. */ 25804 if (TARGET_PLTSEQ) 25805 abi_reg = func; 25806 } 25807 25808 /* Handle indirect calls. */ 25809 if (GET_CODE (func) != SYMBOL_REF) 25810 { 25811 func = force_reg (Pmode, func); 25812 25813 /* Indirect sibcalls must go via CTR. That can't be left to 25814 reload because we want to mark every instruction in an inline 25815 PLT call sequence with a reloc, enabling the linker to edit 25816 the sequence back to a direct call when that makes sense. */ 25817 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25818 if (abi_reg) 25819 { 25820 rtvec v = gen_rtvec (3, func, func_desc, tlsarg); 25821 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25822 emit_insn (gen_rtx_SET (func_addr, mark_func)); 25823 v = gen_rtvec (2, func_addr, func_desc); 25824 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25825 } 25826 else 25827 emit_move_insn (func_addr, func); 25828 } 25829 else 25830 func_addr = func; 25831 25832 /* Create the call. */ 25833 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25834 if (value != NULL_RTX) 25835 call[0] = gen_rtx_SET (value, call[0]); 25836 25837 call[1] = gen_rtx_USE (VOIDmode, cookie); 25838 call[2] = simple_return_rtx; 25839 25840 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call)); 25841 insn = emit_call_insn (insn); 25842 if (abi_reg) 25843 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 25844} 25845 25846#if TARGET_MACHO 25847 25848/* Expand code to perform a call under the Darwin ABI. 25849 Modulo handling of mlongcall, this is much the same as sysv. 25850 if/when the longcall optimisation is removed, we could drop this 25851 code and use the sysv case (taking care to avoid the tls stuff). 25852 25853 We can use this for sibcalls too, if needed. */ 25854 25855void 25856rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg, 25857 rtx cookie, bool sibcall) 25858{ 25859 rtx func = func_desc; 25860 rtx func_addr; 25861 rtx call[3]; 25862 rtx insn; 25863 int cookie_val = INTVAL (cookie); 25864 bool make_island = false; 25865 25866 /* Handle longcall attributes, there are two cases for Darwin: 25867 1) Newer linkers are capable of synthesising any branch islands needed. 25868 2) We need a helper branch island synthesised by the compiler. 25869 The second case has mostly been retired and we don't use it for m64. 25870 In fact, it's is an optimisation, we could just indirect as sysv does.. 25871 ... however, backwards compatibility for now. 25872 If we're going to use this, then we need to keep the CALL_LONG bit set, 25873 so that we can pick up the special insn form later. */ 25874 if ((cookie_val & CALL_LONG) != 0 25875 && GET_CODE (func_desc) == SYMBOL_REF) 25876 { 25877 /* FIXME: the longcall opt should not hang off this flag, it is most 25878 likely incorrect for kernel-mode code-generation. */ 25879 if (darwin_symbol_stubs && TARGET_32BIT) 25880 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */ 25881 else 25882 { 25883 /* The linker is capable of doing this, but the user explicitly 25884 asked for -mlongcall, so we'll do the 'normal' version. */ 25885 func = rs6000_longcall_ref (func_desc, NULL_RTX); 25886 cookie_val &= ~CALL_LONG; /* Handled, zap it. */ 25887 } 25888 } 25889 25890 /* Handle indirect calls. */ 25891 if (GET_CODE (func) != SYMBOL_REF) 25892 { 25893 func = force_reg (Pmode, func); 25894 25895 /* Indirect calls via CTR are strongly preferred over indirect 25896 calls via LR, and are required for indirect sibcalls, so move 25897 the address there. */ 25898 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25899 emit_move_insn (func_addr, func); 25900 } 25901 else 25902 func_addr = func; 25903 25904 /* Create the call. */ 25905 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25906 if (value != NULL_RTX) 25907 call[0] = gen_rtx_SET (value, call[0]); 25908 25909 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val)); 25910 25911 if (sibcall) 25912 call[2] = simple_return_rtx; 25913 else 25914 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO); 25915 25916 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call)); 25917 insn = emit_call_insn (insn); 25918 /* Now we have the debug info in the insn, we can set up the branch island 25919 if we're using one. */ 25920 if (make_island) 25921 { 25922 tree funname = get_identifier (XSTR (func_desc, 0)); 25923 25924 if (no_previous_def (funname)) 25925 { 25926 rtx label_rtx = gen_label_rtx (); 25927 char *label_buf, temp_buf[256]; 25928 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L", 25929 CODE_LABEL_NUMBER (label_rtx)); 25930 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; 25931 tree labelname = get_identifier (label_buf); 25932 add_compiler_branch_island (labelname, funname, 25933 insn_line ((const rtx_insn*)insn)); 25934 } 25935 } 25936} 25937#endif 25938 25939void 25940rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED, 25941 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED) 25942{ 25943#if TARGET_MACHO 25944 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false); 25945#else 25946 gcc_unreachable(); 25947#endif 25948} 25949 25950 25951void 25952rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED, 25953 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED) 25954{ 25955#if TARGET_MACHO 25956 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true); 25957#else 25958 gcc_unreachable(); 25959#endif 25960} 25961 25962/* Return whether we should generate PC-relative code for FNDECL. */ 25963bool 25964rs6000_fndecl_pcrel_p (const_tree fndecl) 25965{ 25966 if (DEFAULT_ABI != ABI_ELFv2) 25967 return false; 25968 25969 struct cl_target_option *opts = target_opts_for_fn (fndecl); 25970 25971 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0 25972 && TARGET_CMODEL == CMODEL_MEDIUM); 25973} 25974 25975/* Return whether we should generate PC-relative code for *FN. */ 25976bool 25977rs6000_function_pcrel_p (struct function *fn) 25978{ 25979 if (DEFAULT_ABI != ABI_ELFv2) 25980 return false; 25981 25982 /* Optimize usual case. */ 25983 if (fn == cfun) 25984 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0 25985 && TARGET_CMODEL == CMODEL_MEDIUM); 25986 25987 return rs6000_fndecl_pcrel_p (fn->decl); 25988} 25989 25990/* Return whether we should generate PC-relative code for the current 25991 function. */ 25992bool 25993rs6000_pcrel_p () 25994{ 25995 return (DEFAULT_ABI == ABI_ELFv2 25996 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0 25997 && TARGET_CMODEL == CMODEL_MEDIUM); 25998} 25999 26000 26001/* Given an address (ADDR), a mode (MODE), and what the format of the 26002 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format 26003 for the address. */ 26004 26005enum insn_form 26006address_to_insn_form (rtx addr, 26007 machine_mode mode, 26008 enum non_prefixed_form non_prefixed_format) 26009{ 26010 /* Single register is easy. */ 26011 if (REG_P (addr) || SUBREG_P (addr)) 26012 return INSN_FORM_BASE_REG; 26013 26014 /* If the non prefixed instruction format doesn't support offset addressing, 26015 make sure only indexed addressing is allowed. 26016 26017 We special case SDmode so that the register allocator does not try to move 26018 SDmode through GPR registers, but instead uses the 32-bit integer load and 26019 store instructions for the floating point registers. */ 26020 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP)) 26021 { 26022 if (GET_CODE (addr) != PLUS) 26023 return INSN_FORM_BAD; 26024 26025 rtx op0 = XEXP (addr, 0); 26026 rtx op1 = XEXP (addr, 1); 26027 if (!REG_P (op0) && !SUBREG_P (op0)) 26028 return INSN_FORM_BAD; 26029 26030 if (!REG_P (op1) && !SUBREG_P (op1)) 26031 return INSN_FORM_BAD; 26032 26033 return INSN_FORM_X; 26034 } 26035 26036 /* Deal with update forms. */ 26037 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) 26038 return INSN_FORM_UPDATE; 26039 26040 /* Handle PC-relative symbols and labels. Check for both local and 26041 external symbols. Assume labels are always local. TLS symbols 26042 are not PC-relative for rs6000. */ 26043 if (TARGET_PCREL) 26044 { 26045 if (LABEL_REF_P (addr)) 26046 return INSN_FORM_PCREL_LOCAL; 26047 26048 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr)) 26049 { 26050 if (!SYMBOL_REF_LOCAL_P (addr)) 26051 return INSN_FORM_PCREL_EXTERNAL; 26052 else 26053 return INSN_FORM_PCREL_LOCAL; 26054 } 26055 } 26056 26057 if (GET_CODE (addr) == CONST) 26058 addr = XEXP (addr, 0); 26059 26060 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */ 26061 if (GET_CODE (addr) == LO_SUM) 26062 return INSN_FORM_LO_SUM; 26063 26064 /* Everything below must be an offset address of some form. */ 26065 if (GET_CODE (addr) != PLUS) 26066 return INSN_FORM_BAD; 26067 26068 rtx op0 = XEXP (addr, 0); 26069 rtx op1 = XEXP (addr, 1); 26070 26071 /* Check for indexed addresses. */ 26072 if (REG_P (op1) || SUBREG_P (op1)) 26073 { 26074 if (REG_P (op0) || SUBREG_P (op0)) 26075 return INSN_FORM_X; 26076 26077 return INSN_FORM_BAD; 26078 } 26079 26080 if (!CONST_INT_P (op1)) 26081 return INSN_FORM_BAD; 26082 26083 HOST_WIDE_INT offset = INTVAL (op1); 26084 if (!SIGNED_INTEGER_34BIT_P (offset)) 26085 return INSN_FORM_BAD; 26086 26087 /* Check for local and external PC-relative addresses. Labels are always 26088 local. TLS symbols are not PC-relative for rs6000. */ 26089 if (TARGET_PCREL) 26090 { 26091 if (LABEL_REF_P (op0)) 26092 return INSN_FORM_PCREL_LOCAL; 26093 26094 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0)) 26095 { 26096 if (!SYMBOL_REF_LOCAL_P (op0)) 26097 return INSN_FORM_PCREL_EXTERNAL; 26098 else 26099 return INSN_FORM_PCREL_LOCAL; 26100 } 26101 } 26102 26103 /* If it isn't PC-relative, the address must use a base register. */ 26104 if (!REG_P (op0) && !SUBREG_P (op0)) 26105 return INSN_FORM_BAD; 26106 26107 /* Large offsets must be prefixed. */ 26108 if (!SIGNED_INTEGER_16BIT_P (offset)) 26109 { 26110 if (TARGET_PREFIXED) 26111 return INSN_FORM_PREFIXED_NUMERIC; 26112 26113 return INSN_FORM_BAD; 26114 } 26115 26116 /* We have a 16-bit offset, see what default instruction format to use. */ 26117 if (non_prefixed_format == NON_PREFIXED_DEFAULT) 26118 { 26119 unsigned size = GET_MODE_SIZE (mode); 26120 26121 /* On 64-bit systems, assume 64-bit integers need to use DS form 26122 addresses (for LD/STD). VSX vectors need to use DQ form addresses 26123 (for LXV and STXV). TImode is problematical in that its normal usage 26124 is expected to be GPRs where it wants a DS instruction format, but if 26125 it goes into the vector registers, it wants a DQ instruction 26126 format. */ 26127 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT) 26128 non_prefixed_format = NON_PREFIXED_DS; 26129 26130 else if (TARGET_VSX && size >= 16 26131 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))) 26132 non_prefixed_format = NON_PREFIXED_DQ; 26133 26134 else 26135 non_prefixed_format = NON_PREFIXED_D; 26136 } 26137 26138 /* Classify the D/DS/DQ-form addresses. */ 26139 switch (non_prefixed_format) 26140 { 26141 /* Instruction format D, all 16 bits are valid. */ 26142 case NON_PREFIXED_D: 26143 return INSN_FORM_D; 26144 26145 /* Instruction format DS, bottom 2 bits must be 0. */ 26146 case NON_PREFIXED_DS: 26147 if ((offset & 3) == 0) 26148 return INSN_FORM_DS; 26149 26150 else if (TARGET_PREFIXED) 26151 return INSN_FORM_PREFIXED_NUMERIC; 26152 26153 else 26154 return INSN_FORM_BAD; 26155 26156 /* Instruction format DQ, bottom 4 bits must be 0. */ 26157 case NON_PREFIXED_DQ: 26158 if ((offset & 15) == 0) 26159 return INSN_FORM_DQ; 26160 26161 else if (TARGET_PREFIXED) 26162 return INSN_FORM_PREFIXED_NUMERIC; 26163 26164 else 26165 return INSN_FORM_BAD; 26166 26167 default: 26168 break; 26169 } 26170 26171 return INSN_FORM_BAD; 26172} 26173 26174/* Given address rtx ADDR for a load of MODE, is this legitimate for a 26175 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is 26176 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want 26177 a D-form or DS-form instruction. X-form and base_reg are always 26178 allowed. */ 26179bool 26180address_is_non_pfx_d_or_x (rtx addr, machine_mode mode, 26181 enum non_prefixed_form non_prefixed_format) 26182{ 26183 enum insn_form result_form; 26184 26185 result_form = address_to_insn_form (addr, mode, non_prefixed_format); 26186 26187 switch (non_prefixed_format) 26188 { 26189 case NON_PREFIXED_D: 26190 switch (result_form) 26191 { 26192 case INSN_FORM_X: 26193 case INSN_FORM_D: 26194 case INSN_FORM_DS: 26195 case INSN_FORM_BASE_REG: 26196 return true; 26197 default: 26198 return false; 26199 } 26200 break; 26201 case NON_PREFIXED_DS: 26202 switch (result_form) 26203 { 26204 case INSN_FORM_X: 26205 case INSN_FORM_DS: 26206 case INSN_FORM_BASE_REG: 26207 return true; 26208 default: 26209 return false; 26210 } 26211 break; 26212 default: 26213 break; 26214 } 26215 return false; 26216} 26217 26218/* Return true if an REG with a given MODE is loaded from or stored into a MEM 26219 location uses a non-prefixed D/DS/DQ-form address. This is used to validate 26220 the load or store with the PCREL_OPT optimization to make sure it is an 26221 instruction that can be optimized. 26222 26223 We need to specify the MODE separately from the REG to allow for loads that 26224 include zero/sign/float extension. */ 26225 26226bool 26227pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem) 26228{ 26229 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the 26230 PCREL_OPT optimization. */ 26231 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode); 26232 if (non_prefixed == NON_PREFIXED_X) 26233 return false; 26234 26235 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */ 26236 rtx addr = XEXP (mem, 0); 26237 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed); 26238 return (iform == INSN_FORM_BASE_REG 26239 || iform == INSN_FORM_D 26240 || iform == INSN_FORM_DS 26241 || iform == INSN_FORM_DQ); 26242} 26243 26244/* Helper function to see if we're potentially looking at lfs/stfs. 26245 - PARALLEL containing a SET and a CLOBBER 26246 - stfs: 26247 - SET is from UNSPEC_SI_FROM_SF to MEM:SI 26248 - CLOBBER is a V4SF 26249 - lfs: 26250 - SET is from UNSPEC_SF_FROM_SI to REG:SF 26251 - CLOBBER is a DI 26252 */ 26253 26254static bool 26255is_lfs_stfs_insn (rtx_insn *insn) 26256{ 26257 rtx pattern = PATTERN (insn); 26258 if (GET_CODE (pattern) != PARALLEL) 26259 return false; 26260 26261 /* This should be a parallel with exactly one set and one clobber. */ 26262 if (XVECLEN (pattern, 0) != 2) 26263 return false; 26264 26265 rtx set = XVECEXP (pattern, 0, 0); 26266 if (GET_CODE (set) != SET) 26267 return false; 26268 26269 rtx clobber = XVECEXP (pattern, 0, 1); 26270 if (GET_CODE (clobber) != CLOBBER) 26271 return false; 26272 26273 /* All we care is that the destination of the SET is a mem:SI, 26274 the source should be an UNSPEC_SI_FROM_SF, and the clobber 26275 should be a scratch:V4SF. */ 26276 26277 rtx dest = SET_DEST (set); 26278 rtx src = SET_SRC (set); 26279 rtx scratch = SET_DEST (clobber); 26280 26281 if (GET_CODE (src) != UNSPEC) 26282 return false; 26283 26284 /* stfs case. */ 26285 if (XINT (src, 1) == UNSPEC_SI_FROM_SF 26286 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode 26287 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode) 26288 return true; 26289 26290 /* lfs case. */ 26291 if (XINT (src, 1) == UNSPEC_SF_FROM_SI 26292 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode 26293 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode) 26294 return true; 26295 26296 return false; 26297} 26298 26299/* Helper function to take a REG and a MODE and turn it into the non-prefixed 26300 instruction format (D/DS/DQ) used for offset memory. */ 26301 26302enum non_prefixed_form 26303reg_to_non_prefixed (rtx reg, machine_mode mode) 26304{ 26305 /* If it isn't a register, use the defaults. */ 26306 if (!REG_P (reg) && !SUBREG_P (reg)) 26307 return NON_PREFIXED_DEFAULT; 26308 26309 unsigned int r = reg_or_subregno (reg); 26310 26311 /* If we have a pseudo, use the default instruction format. */ 26312 if (!HARD_REGISTER_NUM_P (r)) 26313 return NON_PREFIXED_DEFAULT; 26314 26315 unsigned size = GET_MODE_SIZE (mode); 26316 26317 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE 26318 128-bit floating point, and 128-bit integers. Before power9, only indexed 26319 addressing was available for vectors. */ 26320 if (FP_REGNO_P (r)) 26321 { 26322 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode)) 26323 return NON_PREFIXED_D; 26324 26325 else if (size < 8) 26326 return NON_PREFIXED_X; 26327 26328 else if (TARGET_VSX && size >= 16 26329 && (VECTOR_MODE_P (mode) 26330 || VECTOR_ALIGNMENT_P (mode) 26331 || mode == TImode || mode == CTImode)) 26332 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X; 26333 26334 else 26335 return NON_PREFIXED_DEFAULT; 26336 } 26337 26338 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE 26339 128-bit floating point, and 128-bit integers. Before power9, only indexed 26340 addressing was available. */ 26341 else if (ALTIVEC_REGNO_P (r)) 26342 { 26343 if (!TARGET_P9_VECTOR) 26344 return NON_PREFIXED_X; 26345 26346 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode)) 26347 return NON_PREFIXED_DS; 26348 26349 else if (size < 8) 26350 return NON_PREFIXED_X; 26351 26352 else if (TARGET_VSX && size >= 16 26353 && (VECTOR_MODE_P (mode) 26354 || VECTOR_ALIGNMENT_P (mode) 26355 || mode == TImode || mode == CTImode)) 26356 return NON_PREFIXED_DQ; 26357 26358 else 26359 return NON_PREFIXED_DEFAULT; 26360 } 26361 26362 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode 26363 otherwise. Assume that any other register, such as LR, CRs, etc. will go 26364 through the GPR registers for memory operations. */ 26365 else if (TARGET_POWERPC64 && size >= 8) 26366 return NON_PREFIXED_DS; 26367 26368 return NON_PREFIXED_D; 26369} 26370 26371 26372/* Whether a load instruction is a prefixed instruction. This is called from 26373 the prefixed attribute processing. */ 26374 26375bool 26376prefixed_load_p (rtx_insn *insn) 26377{ 26378 /* Validate the insn to make sure it is a normal load insn. */ 26379 extract_insn_cached (insn); 26380 if (recog_data.n_operands < 2) 26381 return false; 26382 26383 rtx reg = recog_data.operand[0]; 26384 rtx mem = recog_data.operand[1]; 26385 26386 if (!REG_P (reg) && !SUBREG_P (reg)) 26387 return false; 26388 26389 if (!MEM_P (mem)) 26390 return false; 26391 26392 /* Prefixed load instructions do not support update or indexed forms. */ 26393 if (get_attr_indexed (insn) == INDEXED_YES 26394 || get_attr_update (insn) == UPDATE_YES) 26395 return false; 26396 26397 /* LWA uses the DS format instead of the D format that LWZ uses. */ 26398 enum non_prefixed_form non_prefixed; 26399 machine_mode reg_mode = GET_MODE (reg); 26400 machine_mode mem_mode = GET_MODE (mem); 26401 26402 if (mem_mode == SImode && reg_mode == DImode 26403 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) 26404 non_prefixed = NON_PREFIXED_DS; 26405 26406 else 26407 non_prefixed = reg_to_non_prefixed (reg, mem_mode); 26408 26409 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) 26410 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT); 26411 else 26412 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); 26413} 26414 26415/* Whether a store instruction is a prefixed instruction. This is called from 26416 the prefixed attribute processing. */ 26417 26418bool 26419prefixed_store_p (rtx_insn *insn) 26420{ 26421 /* Validate the insn to make sure it is a normal store insn. */ 26422 extract_insn_cached (insn); 26423 if (recog_data.n_operands < 2) 26424 return false; 26425 26426 rtx mem = recog_data.operand[0]; 26427 rtx reg = recog_data.operand[1]; 26428 26429 if (!REG_P (reg) && !SUBREG_P (reg)) 26430 return false; 26431 26432 if (!MEM_P (mem)) 26433 return false; 26434 26435 /* Prefixed store instructions do not support update or indexed forms. */ 26436 if (get_attr_indexed (insn) == INDEXED_YES 26437 || get_attr_update (insn) == UPDATE_YES) 26438 return false; 26439 26440 machine_mode mem_mode = GET_MODE (mem); 26441 rtx addr = XEXP (mem, 0); 26442 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); 26443 26444 /* Need to make sure we aren't looking at a stfs which doesn't look 26445 like the other things reg_to_non_prefixed/address_is_prefixed 26446 looks for. */ 26447 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) 26448 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); 26449 else 26450 return address_is_prefixed (addr, mem_mode, non_prefixed); 26451} 26452 26453/* Whether a load immediate or add instruction is a prefixed instruction. This 26454 is called from the prefixed attribute processing. */ 26455 26456bool 26457prefixed_paddi_p (rtx_insn *insn) 26458{ 26459 rtx set = single_set (insn); 26460 if (!set) 26461 return false; 26462 26463 rtx dest = SET_DEST (set); 26464 rtx src = SET_SRC (set); 26465 26466 if (!REG_P (dest) && !SUBREG_P (dest)) 26467 return false; 26468 26469 /* Is this a load immediate that can't be done with a simple ADDI or 26470 ADDIS? */ 26471 if (CONST_INT_P (src)) 26472 return (satisfies_constraint_eI (src) 26473 && !satisfies_constraint_I (src) 26474 && !satisfies_constraint_L (src)); 26475 26476 /* Is this a PADDI instruction that can't be done with a simple ADDI or 26477 ADDIS? */ 26478 if (GET_CODE (src) == PLUS) 26479 { 26480 rtx op1 = XEXP (src, 1); 26481 26482 return (CONST_INT_P (op1) 26483 && satisfies_constraint_eI (op1) 26484 && !satisfies_constraint_I (op1) 26485 && !satisfies_constraint_L (op1)); 26486 } 26487 26488 /* If not, is it a load of a PC-relative address? */ 26489 if (!TARGET_PCREL || GET_MODE (dest) != Pmode) 26490 return false; 26491 26492 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) 26493 return false; 26494 26495 enum insn_form iform = address_to_insn_form (src, Pmode, 26496 NON_PREFIXED_DEFAULT); 26497 26498 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); 26499} 26500 26501/* Whether the next instruction needs a 'p' prefix issued before the 26502 instruction is printed out. */ 26503static bool prepend_p_to_next_insn; 26504 26505/* Define FINAL_PRESCAN_INSN if some processing needs to be done before 26506 outputting the assembler code. On the PowerPC, we remember if the current 26507 insn is a prefixed insn where we need to emit a 'p' before the insn. 26508 26509 In addition, if the insn is part of a PC-relative reference to an external 26510 label optimization, this is recorded also. */ 26511void 26512rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) 26513{ 26514 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn) 26515 == MAYBE_PREFIXED_YES 26516 && get_attr_prefixed (insn) == PREFIXED_YES); 26517 return; 26518} 26519 26520/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. 26521 We use it to emit a 'p' for prefixed insns that is set in 26522 FINAL_PRESCAN_INSN. */ 26523void 26524rs6000_asm_output_opcode (FILE *stream) 26525{ 26526 if (prepend_p_to_next_insn) 26527 { 26528 fprintf (stream, "p"); 26529 26530 /* Reset the flag in the case where there are separate insn lines in the 26531 sequence, so the 'p' is only emitted for the first line. This shows up 26532 when we are doing the PCREL_OPT optimization, in that the label created 26533 with %r<n> would have a leading 'p' printed. */ 26534 prepend_p_to_next_insn = false; 26535 } 26536 26537 return; 26538} 26539 26540/* Emit the relocation to tie the next instruction to a previous instruction 26541 that loads up an external address. This is used to do the PCREL_OPT 26542 optimization. Note, the label is generated after the PLD of the got 26543 pc-relative address to allow for the assembler to insert NOPs before the PLD 26544 instruction. The operand is a constant integer that is the label 26545 number. */ 26546 26547void 26548output_pcrel_opt_reloc (rtx label_num) 26549{ 26550 rtx operands[1] = { label_num }; 26551 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)", 26552 operands); 26553} 26554 26555/* Adjust the length of an INSN. LENGTH is the currently-computed length and 26556 should be adjusted to reflect any required changes. This macro is used when 26557 there is some systematic length adjustment required that would be difficult 26558 to express in the length attribute. 26559 26560 In the PowerPC, we use this to adjust the length of an instruction if one or 26561 more prefixed instructions are generated, using the attribute 26562 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the 26563 hardware requires that a prefied instruciton does not cross a 64-byte 26564 boundary. This means the compiler has to assume the length of the first 26565 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is 26566 already set for the non-prefixed instruction, we just need to udpate for the 26567 difference. */ 26568 26569int 26570rs6000_adjust_insn_length (rtx_insn *insn, int length) 26571{ 26572 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) 26573 { 26574 rtx pattern = PATTERN (insn); 26575 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER 26576 && get_attr_prefixed (insn) == PREFIXED_YES) 26577 { 26578 int num_prefixed = get_attr_max_prefixed_insns (insn); 26579 length += 4 * (num_prefixed + 1); 26580 } 26581 } 26582 26583 return length; 26584} 26585 26586 26587#ifdef HAVE_GAS_HIDDEN 26588# define USE_HIDDEN_LINKONCE 1 26589#else 26590# define USE_HIDDEN_LINKONCE 0 26591#endif 26592 26593/* Fills in the label name that should be used for a 476 link stack thunk. */ 26594 26595void 26596get_ppc476_thunk_name (char name[32]) 26597{ 26598 gcc_assert (TARGET_LINK_STACK); 26599 26600 if (USE_HIDDEN_LINKONCE) 26601 sprintf (name, "__ppc476.get_thunk"); 26602 else 26603 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); 26604} 26605 26606/* This function emits the simple thunk routine that is used to preserve 26607 the link stack on the 476 cpu. */ 26608 26609static void rs6000_code_end (void) ATTRIBUTE_UNUSED; 26610static void 26611rs6000_code_end (void) 26612{ 26613 char name[32]; 26614 tree decl; 26615 26616 if (!TARGET_LINK_STACK) 26617 return; 26618 26619 get_ppc476_thunk_name (name); 26620 26621 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), 26622 build_function_type_list (void_type_node, NULL_TREE)); 26623 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 26624 NULL_TREE, void_type_node); 26625 TREE_PUBLIC (decl) = 1; 26626 TREE_STATIC (decl) = 1; 26627 26628#if RS6000_WEAK 26629 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) 26630 { 26631 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); 26632 targetm.asm_out.unique_section (decl, 0); 26633 switch_to_section (get_named_section (decl, NULL, 0)); 26634 DECL_WEAK (decl) = 1; 26635 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); 26636 targetm.asm_out.globalize_label (asm_out_file, name); 26637 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); 26638 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 26639 } 26640 else 26641#endif 26642 { 26643 switch_to_section (text_section); 26644 ASM_OUTPUT_LABEL (asm_out_file, name); 26645 } 26646 26647 DECL_INITIAL (decl) = make_node (BLOCK); 26648 current_function_decl = decl; 26649 allocate_struct_function (decl, false); 26650 init_function_start (decl); 26651 first_function_block_is_cold = false; 26652 /* Make sure unwind info is emitted for the thunk if needed. */ 26653 final_start_function (emit_barrier (), asm_out_file, 1); 26654 26655 fputs ("\tblr\n", asm_out_file); 26656 26657 final_end_function (); 26658 init_insn_lengths (); 26659 free_after_compilation (cfun); 26660 set_cfun (NULL); 26661 current_function_decl = NULL; 26662} 26663 26664/* Add r30 to hard reg set if the prologue sets it up and it is not 26665 pic_offset_table_rtx. */ 26666 26667static void 26668rs6000_set_up_by_prologue (struct hard_reg_set_container *set) 26669{ 26670 if (!TARGET_SINGLE_PIC_BASE 26671 && TARGET_TOC 26672 && TARGET_MINIMAL_TOC 26673 && !constant_pool_empty_p ()) 26674 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 26675 if (cfun->machine->split_stack_argp_used) 26676 add_to_hard_reg_set (&set->set, Pmode, 12); 26677 26678 /* Make sure the hard reg set doesn't include r2, which was possibly added 26679 via PIC_OFFSET_TABLE_REGNUM. */ 26680 if (TARGET_TOC) 26681 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); 26682} 26683 26684 26685/* Helper function for rs6000_split_logical to emit a logical instruction after 26686 spliting the operation to single GPR registers. 26687 26688 DEST is the destination register. 26689 OP1 and OP2 are the input source registers. 26690 CODE is the base operation (AND, IOR, XOR, NOT). 26691 MODE is the machine mode. 26692 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 26693 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 26694 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ 26695 26696static void 26697rs6000_split_logical_inner (rtx dest, 26698 rtx op1, 26699 rtx op2, 26700 enum rtx_code code, 26701 machine_mode mode, 26702 bool complement_final_p, 26703 bool complement_op1_p, 26704 bool complement_op2_p) 26705{ 26706 rtx bool_rtx; 26707 26708 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ 26709 if (op2 && CONST_INT_P (op2) 26710 && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) 26711 && !complement_final_p && !complement_op1_p && !complement_op2_p) 26712 { 26713 HOST_WIDE_INT mask = GET_MODE_MASK (mode); 26714 HOST_WIDE_INT value = INTVAL (op2) & mask; 26715 26716 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ 26717 if (code == AND) 26718 { 26719 if (value == 0) 26720 { 26721 emit_insn (gen_rtx_SET (dest, const0_rtx)); 26722 return; 26723 } 26724 26725 else if (value == mask) 26726 { 26727 if (!rtx_equal_p (dest, op1)) 26728 emit_insn (gen_rtx_SET (dest, op1)); 26729 return; 26730 } 26731 } 26732 26733 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations 26734 into separate ORI/ORIS or XORI/XORIS instrucitons. */ 26735 else if (code == IOR || code == XOR) 26736 { 26737 if (value == 0) 26738 { 26739 if (!rtx_equal_p (dest, op1)) 26740 emit_insn (gen_rtx_SET (dest, op1)); 26741 return; 26742 } 26743 } 26744 } 26745 26746 if (code == AND && mode == SImode 26747 && !complement_final_p && !complement_op1_p && !complement_op2_p) 26748 { 26749 emit_insn (gen_andsi3 (dest, op1, op2)); 26750 return; 26751 } 26752 26753 if (complement_op1_p) 26754 op1 = gen_rtx_NOT (mode, op1); 26755 26756 if (complement_op2_p) 26757 op2 = gen_rtx_NOT (mode, op2); 26758 26759 /* For canonical RTL, if only one arm is inverted it is the first. */ 26760 if (!complement_op1_p && complement_op2_p) 26761 std::swap (op1, op2); 26762 26763 bool_rtx = ((code == NOT) 26764 ? gen_rtx_NOT (mode, op1) 26765 : gen_rtx_fmt_ee (code, mode, op1, op2)); 26766 26767 if (complement_final_p) 26768 bool_rtx = gen_rtx_NOT (mode, bool_rtx); 26769 26770 emit_insn (gen_rtx_SET (dest, bool_rtx)); 26771} 26772 26773/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These 26774 operations are split immediately during RTL generation to allow for more 26775 optimizations of the AND/IOR/XOR. 26776 26777 OPERANDS is an array containing the destination and two input operands. 26778 CODE is the base operation (AND, IOR, XOR, NOT). 26779 MODE is the machine mode. 26780 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 26781 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 26782 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. 26783 CLOBBER_REG is either NULL or a scratch register of type CC to allow 26784 formation of the AND instructions. */ 26785 26786static void 26787rs6000_split_logical_di (rtx operands[3], 26788 enum rtx_code code, 26789 bool complement_final_p, 26790 bool complement_op1_p, 26791 bool complement_op2_p) 26792{ 26793 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); 26794 const HOST_WIDE_INT upper_32bits = ~ lower_32bits; 26795 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); 26796 enum hi_lo { hi = 0, lo = 1 }; 26797 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; 26798 size_t i; 26799 26800 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); 26801 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); 26802 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); 26803 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); 26804 26805 if (code == NOT) 26806 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; 26807 else 26808 { 26809 if (!CONST_INT_P (operands[2])) 26810 { 26811 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); 26812 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); 26813 } 26814 else 26815 { 26816 HOST_WIDE_INT value = INTVAL (operands[2]); 26817 HOST_WIDE_INT value_hi_lo[2]; 26818 26819 gcc_assert (!complement_final_p); 26820 gcc_assert (!complement_op1_p); 26821 gcc_assert (!complement_op2_p); 26822 26823 value_hi_lo[hi] = value >> 32; 26824 value_hi_lo[lo] = value & lower_32bits; 26825 26826 for (i = 0; i < 2; i++) 26827 { 26828 HOST_WIDE_INT sub_value = value_hi_lo[i]; 26829 26830 if (sub_value & sign_bit) 26831 sub_value |= upper_32bits; 26832 26833 op2_hi_lo[i] = GEN_INT (sub_value); 26834 26835 /* If this is an AND instruction, check to see if we need to load 26836 the value in a register. */ 26837 if (code == AND && sub_value != -1 && sub_value != 0 26838 && !and_operand (op2_hi_lo[i], SImode)) 26839 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); 26840 } 26841 } 26842 } 26843 26844 for (i = 0; i < 2; i++) 26845 { 26846 /* Split large IOR/XOR operations. */ 26847 if ((code == IOR || code == XOR) 26848 && CONST_INT_P (op2_hi_lo[i]) 26849 && !complement_final_p 26850 && !complement_op1_p 26851 && !complement_op2_p 26852 && !logical_const_operand (op2_hi_lo[i], SImode)) 26853 { 26854 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); 26855 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); 26856 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); 26857 rtx tmp = gen_reg_rtx (SImode); 26858 26859 /* Make sure the constant is sign extended. */ 26860 if ((hi_16bits & sign_bit) != 0) 26861 hi_16bits |= upper_32bits; 26862 26863 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), 26864 code, SImode, false, false, false); 26865 26866 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), 26867 code, SImode, false, false, false); 26868 } 26869 else 26870 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], 26871 code, SImode, complement_final_p, 26872 complement_op1_p, complement_op2_p); 26873 } 26874 26875 return; 26876} 26877 26878/* Split the insns that make up boolean operations operating on multiple GPR 26879 registers. The boolean MD patterns ensure that the inputs either are 26880 exactly the same as the output registers, or there is no overlap. 26881 26882 OPERANDS is an array containing the destination and two input operands. 26883 CODE is the base operation (AND, IOR, XOR, NOT). 26884 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 26885 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 26886 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ 26887 26888void 26889rs6000_split_logical (rtx operands[3], 26890 enum rtx_code code, 26891 bool complement_final_p, 26892 bool complement_op1_p, 26893 bool complement_op2_p) 26894{ 26895 machine_mode mode = GET_MODE (operands[0]); 26896 machine_mode sub_mode; 26897 rtx op0, op1, op2; 26898 int sub_size, regno0, regno1, nregs, i; 26899 26900 /* If this is DImode, use the specialized version that can run before 26901 register allocation. */ 26902 if (mode == DImode && !TARGET_POWERPC64) 26903 { 26904 rs6000_split_logical_di (operands, code, complement_final_p, 26905 complement_op1_p, complement_op2_p); 26906 return; 26907 } 26908 26909 op0 = operands[0]; 26910 op1 = operands[1]; 26911 op2 = (code == NOT) ? NULL_RTX : operands[2]; 26912 sub_mode = (TARGET_POWERPC64) ? DImode : SImode; 26913 sub_size = GET_MODE_SIZE (sub_mode); 26914 regno0 = REGNO (op0); 26915 regno1 = REGNO (op1); 26916 26917 gcc_assert (reload_completed); 26918 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26919 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26920 26921 nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; 26922 gcc_assert (nregs > 1); 26923 26924 if (op2 && REG_P (op2)) 26925 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26926 26927 for (i = 0; i < nregs; i++) 26928 { 26929 int offset = i * sub_size; 26930 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); 26931 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); 26932 rtx sub_op2 = ((code == NOT) 26933 ? NULL_RTX 26934 : simplify_subreg (sub_mode, op2, mode, offset)); 26935 26936 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, 26937 complement_final_p, complement_op1_p, 26938 complement_op2_p); 26939 } 26940 26941 return; 26942} 26943 26944/* Emit instructions to move SRC to DST. Called by splitters for 26945 multi-register moves. It will emit at most one instruction for 26946 each register that is accessed; that is, it won't emit li/lis pairs 26947 (or equivalent for 64-bit code). One of SRC or DST must be a hard 26948 register. */ 26949 26950void 26951rs6000_split_multireg_move (rtx dst, rtx src) 26952{ 26953 /* The register number of the first register being moved. */ 26954 int reg; 26955 /* The mode that is to be moved. */ 26956 machine_mode mode; 26957 /* The mode that the move is being done in, and its size. */ 26958 machine_mode reg_mode; 26959 int reg_mode_size; 26960 /* The number of registers that will be moved. */ 26961 int nregs; 26962 26963 reg = REG_P (dst) ? REGNO (dst) : REGNO (src); 26964 mode = GET_MODE (dst); 26965 nregs = hard_regno_nregs (reg, mode); 26966 26967 /* If we have a vector quad register for MMA, and this is a load or store, 26968 see if we can use vector paired load/stores. */ 26969 if (mode == XOmode && TARGET_MMA 26970 && (MEM_P (dst) || MEM_P (src))) 26971 { 26972 reg_mode = OOmode; 26973 nregs /= 2; 26974 } 26975 /* If we have a vector pair/quad mode, split it into two/four separate 26976 vectors. */ 26977 else if (mode == OOmode || mode == XOmode) 26978 reg_mode = V1TImode; 26979 else if (FP_REGNO_P (reg)) 26980 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : 26981 (TARGET_HARD_FLOAT ? DFmode : SFmode); 26982 else if (ALTIVEC_REGNO_P (reg)) 26983 reg_mode = V16QImode; 26984 else 26985 reg_mode = word_mode; 26986 reg_mode_size = GET_MODE_SIZE (reg_mode); 26987 26988 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); 26989 26990 /* TDmode residing in FP registers is special, since the ISA requires that 26991 the lower-numbered word of a register pair is always the most significant 26992 word, even in little-endian mode. This does not match the usual subreg 26993 semantics, so we cannnot use simplify_gen_subreg in those cases. Access 26994 the appropriate constituent registers "by hand" in little-endian mode. 26995 26996 Note we do not need to check for destructive overlap here since TDmode 26997 can only reside in even/odd register pairs. */ 26998 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) 26999 { 27000 rtx p_src, p_dst; 27001 int i; 27002 27003 for (i = 0; i < nregs; i++) 27004 { 27005 if (REG_P (src) && FP_REGNO_P (REGNO (src))) 27006 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); 27007 else 27008 p_src = simplify_gen_subreg (reg_mode, src, mode, 27009 i * reg_mode_size); 27010 27011 if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) 27012 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); 27013 else 27014 p_dst = simplify_gen_subreg (reg_mode, dst, mode, 27015 i * reg_mode_size); 27016 27017 emit_insn (gen_rtx_SET (p_dst, p_src)); 27018 } 27019 27020 return; 27021 } 27022 27023 /* The __vector_pair and __vector_quad modes are multi-register 27024 modes, so if we have to load or store the registers, we have to be 27025 careful to properly swap them if we're in little endian mode 27026 below. This means the last register gets the first memory 27027 location. We also need to be careful of using the right register 27028 numbers if we are splitting XO to OO. */ 27029 if (mode == OOmode || mode == XOmode) 27030 { 27031 nregs = hard_regno_nregs (reg, mode); 27032 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); 27033 if (MEM_P (dst)) 27034 { 27035 unsigned offset = 0; 27036 unsigned size = GET_MODE_SIZE (reg_mode); 27037 27038 /* If we are reading an accumulator register, we have to 27039 deprime it before we can access it. */ 27040 if (TARGET_MMA 27041 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) 27042 emit_insn (gen_mma_xxmfacc (src, src)); 27043 27044 for (int i = 0; i < nregs; i += reg_mode_nregs) 27045 { 27046 unsigned subreg 27047 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); 27048 rtx dst2 = adjust_address (dst, reg_mode, offset); 27049 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); 27050 offset += size; 27051 emit_insn (gen_rtx_SET (dst2, src2)); 27052 } 27053 27054 return; 27055 } 27056 27057 if (MEM_P (src)) 27058 { 27059 unsigned offset = 0; 27060 unsigned size = GET_MODE_SIZE (reg_mode); 27061 27062 for (int i = 0; i < nregs; i += reg_mode_nregs) 27063 { 27064 unsigned subreg 27065 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); 27066 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); 27067 rtx src2 = adjust_address (src, reg_mode, offset); 27068 offset += size; 27069 emit_insn (gen_rtx_SET (dst2, src2)); 27070 } 27071 27072 /* If we are writing an accumulator register, we have to 27073 prime it after we've written it. */ 27074 if (TARGET_MMA 27075 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) 27076 emit_insn (gen_mma_xxmtacc (dst, dst)); 27077 27078 return; 27079 } 27080 27081 if (GET_CODE (src) == UNSPEC 27082 || GET_CODE (src) == UNSPEC_VOLATILE) 27083 { 27084 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE 27085 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE); 27086 gcc_assert (REG_P (dst)); 27087 if (GET_MODE (src) == XOmode) 27088 gcc_assert (FP_REGNO_P (REGNO (dst))); 27089 if (GET_MODE (src) == OOmode) 27090 gcc_assert (VSX_REGNO_P (REGNO (dst))); 27091 27092 int nvecs = XVECLEN (src, 0); 27093 for (int i = 0; i < nvecs; i++) 27094 { 27095 rtx op; 27096 int regno = reg + i; 27097 27098 if (WORDS_BIG_ENDIAN) 27099 { 27100 op = XVECEXP (src, 0, i); 27101 27102 /* If we are loading an even VSX register and the memory location 27103 is adjacent to the next register's memory location (if any), 27104 then we can load them both with one LXVP instruction. */ 27105 if ((regno & 1) == 0) 27106 { 27107 rtx op2 = XVECEXP (src, 0, i + 1); 27108 if (adjacent_mem_locations (op, op2) == op) 27109 { 27110 op = adjust_address (op, OOmode, 0); 27111 /* Skip the next register, since we're going to 27112 load it together with this register. */ 27113 i++; 27114 } 27115 } 27116 } 27117 else 27118 { 27119 op = XVECEXP (src, 0, nvecs - i - 1); 27120 27121 /* If we are loading an even VSX register and the memory location 27122 is adjacent to the next register's memory location (if any), 27123 then we can load them both with one LXVP instruction. */ 27124 if ((regno & 1) == 0) 27125 { 27126 rtx op2 = XVECEXP (src, 0, nvecs - i - 2); 27127 if (adjacent_mem_locations (op2, op) == op2) 27128 { 27129 op = adjust_address (op2, OOmode, 0); 27130 /* Skip the next register, since we're going to 27131 load it together with this register. */ 27132 i++; 27133 } 27134 } 27135 } 27136 27137 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno); 27138 emit_insn (gen_rtx_SET (dst_i, op)); 27139 } 27140 27141 /* We are writing an accumulator register, so we have to 27142 prime it after we've written it. */ 27143 if (GET_MODE (src) == XOmode) 27144 emit_insn (gen_mma_xxmtacc (dst, dst)); 27145 27146 return; 27147 } 27148 27149 /* Register -> register moves can use common code. */ 27150 } 27151 27152 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) 27153 { 27154 /* If we are reading an accumulator register, we have to 27155 deprime it before we can access it. */ 27156 if (TARGET_MMA 27157 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) 27158 emit_insn (gen_mma_xxmfacc (src, src)); 27159 27160 /* Move register range backwards, if we might have destructive 27161 overlap. */ 27162 int i; 27163 /* XO/OO are opaque so cannot use subregs. */ 27164 if (mode == OOmode || mode == XOmode ) 27165 { 27166 for (i = nregs - 1; i >= 0; i--) 27167 { 27168 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); 27169 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); 27170 emit_insn (gen_rtx_SET (dst_i, src_i)); 27171 } 27172 } 27173 else 27174 { 27175 for (i = nregs - 1; i >= 0; i--) 27176 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, 27177 i * reg_mode_size), 27178 simplify_gen_subreg (reg_mode, src, mode, 27179 i * reg_mode_size))); 27180 } 27181 27182 /* If we are writing an accumulator register, we have to 27183 prime it after we've written it. */ 27184 if (TARGET_MMA 27185 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) 27186 emit_insn (gen_mma_xxmtacc (dst, dst)); 27187 } 27188 else 27189 { 27190 int i; 27191 int j = -1; 27192 bool used_update = false; 27193 rtx restore_basereg = NULL_RTX; 27194 27195 if (MEM_P (src) && INT_REGNO_P (reg)) 27196 { 27197 rtx breg; 27198 27199 if (GET_CODE (XEXP (src, 0)) == PRE_INC 27200 || GET_CODE (XEXP (src, 0)) == PRE_DEC) 27201 { 27202 rtx delta_rtx; 27203 breg = XEXP (XEXP (src, 0), 0); 27204 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC 27205 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) 27206 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); 27207 emit_insn (gen_add3_insn (breg, breg, delta_rtx)); 27208 src = replace_equiv_address (src, breg); 27209 } 27210 else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) 27211 { 27212 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) 27213 { 27214 rtx basereg = XEXP (XEXP (src, 0), 0); 27215 if (TARGET_UPDATE) 27216 { 27217 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); 27218 emit_insn (gen_rtx_SET (ndst, 27219 gen_rtx_MEM (reg_mode, 27220 XEXP (src, 0)))); 27221 used_update = true; 27222 } 27223 else 27224 emit_insn (gen_rtx_SET (basereg, 27225 XEXP (XEXP (src, 0), 1))); 27226 src = replace_equiv_address (src, basereg); 27227 } 27228 else 27229 { 27230 rtx basereg = gen_rtx_REG (Pmode, reg); 27231 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); 27232 src = replace_equiv_address (src, basereg); 27233 } 27234 } 27235 27236 breg = XEXP (src, 0); 27237 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) 27238 breg = XEXP (breg, 0); 27239 27240 /* If the base register we are using to address memory is 27241 also a destination reg, then change that register last. */ 27242 if (REG_P (breg) 27243 && REGNO (breg) >= REGNO (dst) 27244 && REGNO (breg) < REGNO (dst) + nregs) 27245 j = REGNO (breg) - REGNO (dst); 27246 } 27247 else if (MEM_P (dst) && INT_REGNO_P (reg)) 27248 { 27249 rtx breg; 27250 27251 if (GET_CODE (XEXP (dst, 0)) == PRE_INC 27252 || GET_CODE (XEXP (dst, 0)) == PRE_DEC) 27253 { 27254 rtx delta_rtx; 27255 breg = XEXP (XEXP (dst, 0), 0); 27256 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC 27257 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) 27258 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); 27259 27260 /* We have to update the breg before doing the store. 27261 Use store with update, if available. */ 27262 27263 if (TARGET_UPDATE) 27264 { 27265 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); 27266 emit_insn (TARGET_32BIT 27267 ? (TARGET_POWERPC64 27268 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) 27269 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) 27270 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); 27271 used_update = true; 27272 } 27273 else 27274 emit_insn (gen_add3_insn (breg, breg, delta_rtx)); 27275 dst = replace_equiv_address (dst, breg); 27276 } 27277 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) 27278 && GET_CODE (XEXP (dst, 0)) != LO_SUM) 27279 { 27280 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) 27281 { 27282 rtx basereg = XEXP (XEXP (dst, 0), 0); 27283 if (TARGET_UPDATE) 27284 { 27285 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); 27286 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, 27287 XEXP (dst, 0)), 27288 nsrc)); 27289 used_update = true; 27290 } 27291 else 27292 emit_insn (gen_rtx_SET (basereg, 27293 XEXP (XEXP (dst, 0), 1))); 27294 dst = replace_equiv_address (dst, basereg); 27295 } 27296 else 27297 { 27298 rtx basereg = XEXP (XEXP (dst, 0), 0); 27299 rtx offsetreg = XEXP (XEXP (dst, 0), 1); 27300 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS 27301 && REG_P (basereg) 27302 && REG_P (offsetreg) 27303 && REGNO (basereg) != REGNO (offsetreg)); 27304 if (REGNO (basereg) == 0) 27305 { 27306 rtx tmp = offsetreg; 27307 offsetreg = basereg; 27308 basereg = tmp; 27309 } 27310 emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); 27311 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); 27312 dst = replace_equiv_address (dst, basereg); 27313 } 27314 } 27315 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) 27316 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); 27317 } 27318 27319 /* If we are reading an accumulator register, we have to 27320 deprime it before we can access it. */ 27321 if (TARGET_MMA && REG_P (src) 27322 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) 27323 emit_insn (gen_mma_xxmfacc (src, src)); 27324 27325 for (i = 0; i < nregs; i++) 27326 { 27327 /* Calculate index to next subword. */ 27328 ++j; 27329 if (j == nregs) 27330 j = 0; 27331 27332 /* If compiler already emitted move of first word by 27333 store with update, no need to do anything. */ 27334 if (j == 0 && used_update) 27335 continue; 27336 27337 /* XO/OO are opaque so cannot use subregs. */ 27338 if (mode == OOmode || mode == XOmode ) 27339 { 27340 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); 27341 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); 27342 emit_insn (gen_rtx_SET (dst_i, src_i)); 27343 } 27344 else 27345 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, 27346 j * reg_mode_size), 27347 simplify_gen_subreg (reg_mode, src, mode, 27348 j * reg_mode_size))); 27349 } 27350 27351 /* If we are writing an accumulator register, we have to 27352 prime it after we've written it. */ 27353 if (TARGET_MMA && REG_P (dst) 27354 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) 27355 emit_insn (gen_mma_xxmtacc (dst, dst)); 27356 27357 if (restore_basereg != NULL_RTX) 27358 emit_insn (restore_basereg); 27359 } 27360} 27361 27362/* Return true if the peephole2 can combine a load involving a combination of 27363 an addis instruction and a load with an offset that can be fused together on 27364 a power8. */ 27365 27366bool 27367fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ 27368 rtx addis_value, /* addis value. */ 27369 rtx target, /* target register that is loaded. */ 27370 rtx mem) /* bottom part of the memory addr. */ 27371{ 27372 rtx addr; 27373 rtx base_reg; 27374 27375 /* Validate arguments. */ 27376 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) 27377 return false; 27378 27379 if (!base_reg_operand (target, GET_MODE (target))) 27380 return false; 27381 27382 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) 27383 return false; 27384 27385 /* Allow sign/zero extension. */ 27386 if (GET_CODE (mem) == ZERO_EXTEND 27387 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) 27388 mem = XEXP (mem, 0); 27389 27390 if (!MEM_P (mem)) 27391 return false; 27392 27393 if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) 27394 return false; 27395 27396 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ 27397 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) 27398 return false; 27399 27400 /* Validate that the register used to load the high value is either the 27401 register being loaded, or we can safely replace its use. 27402 27403 This function is only called from the peephole2 pass and we assume that 27404 there are 2 instructions in the peephole (addis and load), so we want to 27405 check if the target register was not used in the memory address and the 27406 register to hold the addis result is dead after the peephole. */ 27407 if (REGNO (addis_reg) != REGNO (target)) 27408 { 27409 if (reg_mentioned_p (target, mem)) 27410 return false; 27411 27412 if (!peep2_reg_dead_p (2, addis_reg)) 27413 return false; 27414 27415 /* If the target register being loaded is the stack pointer, we must 27416 avoid loading any other value into it, even temporarily. */ 27417 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM) 27418 return false; 27419 } 27420 27421 base_reg = XEXP (addr, 0); 27422 return REGNO (addis_reg) == REGNO (base_reg); 27423} 27424 27425/* During the peephole2 pass, adjust and expand the insns for a load fusion 27426 sequence. We adjust the addis register to use the target register. If the 27427 load sign extends, we adjust the code to do the zero extending load, and an 27428 explicit sign extension later since the fusion only covers zero extending 27429 loads. 27430 27431 The operands are: 27432 operands[0] register set with addis (to be replaced with target) 27433 operands[1] value set via addis 27434 operands[2] target register being loaded 27435 operands[3] D-form memory reference using operands[0]. */ 27436 27437void 27438expand_fusion_gpr_load (rtx *operands) 27439{ 27440 rtx addis_value = operands[1]; 27441 rtx target = operands[2]; 27442 rtx orig_mem = operands[3]; 27443 rtx new_addr, new_mem, orig_addr, offset; 27444 enum rtx_code plus_or_lo_sum; 27445 machine_mode target_mode = GET_MODE (target); 27446 machine_mode extend_mode = target_mode; 27447 machine_mode ptr_mode = Pmode; 27448 enum rtx_code extend = UNKNOWN; 27449 27450 if (GET_CODE (orig_mem) == ZERO_EXTEND 27451 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) 27452 { 27453 extend = GET_CODE (orig_mem); 27454 orig_mem = XEXP (orig_mem, 0); 27455 target_mode = GET_MODE (orig_mem); 27456 } 27457 27458 gcc_assert (MEM_P (orig_mem)); 27459 27460 orig_addr = XEXP (orig_mem, 0); 27461 plus_or_lo_sum = GET_CODE (orig_addr); 27462 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); 27463 27464 offset = XEXP (orig_addr, 1); 27465 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); 27466 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); 27467 27468 if (extend != UNKNOWN) 27469 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); 27470 27471 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), 27472 UNSPEC_FUSION_GPR); 27473 emit_insn (gen_rtx_SET (target, new_mem)); 27474 27475 if (extend == SIGN_EXTEND) 27476 { 27477 int sub_off = ((BYTES_BIG_ENDIAN) 27478 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) 27479 : 0); 27480 rtx sign_reg 27481 = simplify_subreg (target_mode, target, extend_mode, sub_off); 27482 27483 emit_insn (gen_rtx_SET (target, 27484 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); 27485 } 27486 27487 return; 27488} 27489 27490/* Emit the addis instruction that will be part of a fused instruction 27491 sequence. */ 27492 27493void 27494emit_fusion_addis (rtx target, rtx addis_value) 27495{ 27496 rtx fuse_ops[10]; 27497 const char *addis_str = NULL; 27498 27499 /* Emit the addis instruction. */ 27500 fuse_ops[0] = target; 27501 if (satisfies_constraint_L (addis_value)) 27502 { 27503 fuse_ops[1] = addis_value; 27504 addis_str = "lis %0,%v1"; 27505 } 27506 27507 else if (GET_CODE (addis_value) == PLUS) 27508 { 27509 rtx op0 = XEXP (addis_value, 0); 27510 rtx op1 = XEXP (addis_value, 1); 27511 27512 if (REG_P (op0) && CONST_INT_P (op1) 27513 && satisfies_constraint_L (op1)) 27514 { 27515 fuse_ops[1] = op0; 27516 fuse_ops[2] = op1; 27517 addis_str = "addis %0,%1,%v2"; 27518 } 27519 } 27520 27521 else if (GET_CODE (addis_value) == HIGH) 27522 { 27523 rtx value = XEXP (addis_value, 0); 27524 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) 27525 { 27526 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ 27527 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ 27528 if (TARGET_ELF) 27529 addis_str = "addis %0,%2,%1@toc@ha"; 27530 27531 else if (TARGET_XCOFF) 27532 addis_str = "addis %0,%1@u(%2)"; 27533 27534 else 27535 gcc_unreachable (); 27536 } 27537 27538 else if (GET_CODE (value) == PLUS) 27539 { 27540 rtx op0 = XEXP (value, 0); 27541 rtx op1 = XEXP (value, 1); 27542 27543 if (GET_CODE (op0) == UNSPEC 27544 && XINT (op0, 1) == UNSPEC_TOCREL 27545 && CONST_INT_P (op1)) 27546 { 27547 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ 27548 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ 27549 fuse_ops[3] = op1; 27550 if (TARGET_ELF) 27551 addis_str = "addis %0,%2,%1+%3@toc@ha"; 27552 27553 else if (TARGET_XCOFF) 27554 addis_str = "addis %0,%1+%3@u(%2)"; 27555 27556 else 27557 gcc_unreachable (); 27558 } 27559 } 27560 27561 else if (satisfies_constraint_L (value)) 27562 { 27563 fuse_ops[1] = value; 27564 addis_str = "lis %0,%v1"; 27565 } 27566 27567 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) 27568 { 27569 fuse_ops[1] = value; 27570 addis_str = "lis %0,%1@ha"; 27571 } 27572 } 27573 27574 if (!addis_str) 27575 fatal_insn ("Could not generate addis value for fusion", addis_value); 27576 27577 output_asm_insn (addis_str, fuse_ops); 27578} 27579 27580/* Emit a D-form load or store instruction that is the second instruction 27581 of a fusion sequence. */ 27582 27583static void 27584emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str) 27585{ 27586 rtx fuse_ops[10]; 27587 char insn_template[80]; 27588 27589 fuse_ops[0] = load_reg; 27590 fuse_ops[1] = addis_reg; 27591 27592 if (CONST_INT_P (offset) && satisfies_constraint_I (offset)) 27593 { 27594 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str); 27595 fuse_ops[2] = offset; 27596 output_asm_insn (insn_template, fuse_ops); 27597 } 27598 27599 else if (GET_CODE (offset) == UNSPEC 27600 && XINT (offset, 1) == UNSPEC_TOCREL) 27601 { 27602 if (TARGET_ELF) 27603 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str); 27604 27605 else if (TARGET_XCOFF) 27606 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); 27607 27608 else 27609 gcc_unreachable (); 27610 27611 fuse_ops[2] = XVECEXP (offset, 0, 0); 27612 output_asm_insn (insn_template, fuse_ops); 27613 } 27614 27615 else if (GET_CODE (offset) == PLUS 27616 && GET_CODE (XEXP (offset, 0)) == UNSPEC 27617 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL 27618 && CONST_INT_P (XEXP (offset, 1))) 27619 { 27620 rtx tocrel_unspec = XEXP (offset, 0); 27621 if (TARGET_ELF) 27622 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str); 27623 27624 else if (TARGET_XCOFF) 27625 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str); 27626 27627 else 27628 gcc_unreachable (); 27629 27630 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0); 27631 fuse_ops[3] = XEXP (offset, 1); 27632 output_asm_insn (insn_template, fuse_ops); 27633 } 27634 27635 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset)) 27636 { 27637 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); 27638 27639 fuse_ops[2] = offset; 27640 output_asm_insn (insn_template, fuse_ops); 27641 } 27642 27643 else 27644 fatal_insn ("Unable to generate load/store offset for fusion", offset); 27645 27646 return; 27647} 27648 27649/* Given an address, convert it into the addis and load offset parts. Addresses 27650 created during the peephole2 process look like: 27651 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL)) 27652 (unspec [(...)] UNSPEC_TOCREL)) */ 27653 27654static void 27655fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo) 27656{ 27657 rtx hi, lo; 27658 27659 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) 27660 { 27661 hi = XEXP (addr, 0); 27662 lo = XEXP (addr, 1); 27663 } 27664 else 27665 gcc_unreachable (); 27666 27667 *p_hi = hi; 27668 *p_lo = lo; 27669} 27670 27671/* Return a string to fuse an addis instruction with a gpr load to the same 27672 register that we loaded up the addis instruction. The address that is used 27673 is the logical address that was formed during peephole2: 27674 (lo_sum (high) (low-part)) 27675 27676 The code is complicated, so we call output_asm_insn directly, and just 27677 return "". */ 27678 27679const char * 27680emit_fusion_gpr_load (rtx target, rtx mem) 27681{ 27682 rtx addis_value; 27683 rtx addr; 27684 rtx load_offset; 27685 const char *load_str = NULL; 27686 machine_mode mode; 27687 27688 if (GET_CODE (mem) == ZERO_EXTEND) 27689 mem = XEXP (mem, 0); 27690 27691 gcc_assert (REG_P (target) && MEM_P (mem)); 27692 27693 addr = XEXP (mem, 0); 27694 fusion_split_address (addr, &addis_value, &load_offset); 27695 27696 /* Now emit the load instruction to the same register. */ 27697 mode = GET_MODE (mem); 27698 switch (mode) 27699 { 27700 case E_QImode: 27701 load_str = "lbz"; 27702 break; 27703 27704 case E_HImode: 27705 load_str = "lhz"; 27706 break; 27707 27708 case E_SImode: 27709 case E_SFmode: 27710 load_str = "lwz"; 27711 break; 27712 27713 case E_DImode: 27714 case E_DFmode: 27715 gcc_assert (TARGET_POWERPC64); 27716 load_str = "ld"; 27717 break; 27718 27719 default: 27720 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem)); 27721 } 27722 27723 /* Emit the addis instruction. */ 27724 emit_fusion_addis (target, addis_value); 27725 27726 /* Emit the D-form load instruction. */ 27727 emit_fusion_load (target, target, load_offset, load_str); 27728 27729 return ""; 27730} 27731 27732/* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype 27733 ignores it then. */ 27734static GTY(()) tree atomic_hold_decl; 27735static GTY(()) tree atomic_clear_decl; 27736static GTY(()) tree atomic_update_decl; 27737 27738/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 27739static void 27740rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 27741{ 27742 if (!TARGET_HARD_FLOAT) 27743 { 27744#ifdef RS6000_GLIBC_ATOMIC_FENV 27745 if (atomic_hold_decl == NULL_TREE) 27746 { 27747 atomic_hold_decl 27748 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 27749 get_identifier ("__atomic_feholdexcept"), 27750 build_function_type_list (void_type_node, 27751 double_ptr_type_node, 27752 NULL_TREE)); 27753 TREE_PUBLIC (atomic_hold_decl) = 1; 27754 DECL_EXTERNAL (atomic_hold_decl) = 1; 27755 } 27756 27757 if (atomic_clear_decl == NULL_TREE) 27758 { 27759 atomic_clear_decl 27760 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 27761 get_identifier ("__atomic_feclearexcept"), 27762 build_function_type_list (void_type_node, 27763 NULL_TREE)); 27764 TREE_PUBLIC (atomic_clear_decl) = 1; 27765 DECL_EXTERNAL (atomic_clear_decl) = 1; 27766 } 27767 27768 tree const_double = build_qualified_type (double_type_node, 27769 TYPE_QUAL_CONST); 27770 tree const_double_ptr = build_pointer_type (const_double); 27771 if (atomic_update_decl == NULL_TREE) 27772 { 27773 atomic_update_decl 27774 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 27775 get_identifier ("__atomic_feupdateenv"), 27776 build_function_type_list (void_type_node, 27777 const_double_ptr, 27778 NULL_TREE)); 27779 TREE_PUBLIC (atomic_update_decl) = 1; 27780 DECL_EXTERNAL (atomic_update_decl) = 1; 27781 } 27782 27783 tree fenv_var = create_tmp_var_raw (double_type_node); 27784 TREE_ADDRESSABLE (fenv_var) = 1; 27785 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, 27786 build4 (TARGET_EXPR, double_type_node, fenv_var, 27787 void_node, NULL_TREE, NULL_TREE)); 27788 27789 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr); 27790 *clear = build_call_expr (atomic_clear_decl, 0); 27791 *update = build_call_expr (atomic_update_decl, 1, 27792 fold_convert (const_double_ptr, fenv_addr)); 27793#endif 27794 return; 27795 } 27796 27797 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS]; 27798 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF]; 27799 tree call_mffs = build_call_expr (mffs, 0); 27800 27801 /* Generates the equivalent of feholdexcept (&fenv_var) 27802 27803 *fenv_var = __builtin_mffs (); 27804 double fenv_hold; 27805 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; 27806 __builtin_mtfsf (0xff, fenv_hold); */ 27807 27808 /* Mask to clear everything except for the rounding modes and non-IEEE 27809 arithmetic flag. */ 27810 const unsigned HOST_WIDE_INT hold_exception_mask 27811 = HOST_WIDE_INT_C (0xffffffff00000007); 27812 27813 tree fenv_var = create_tmp_var_raw (double_type_node); 27814 27815 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs, 27816 NULL_TREE, NULL_TREE); 27817 27818 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); 27819 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, 27820 build_int_cst (uint64_type_node, 27821 hold_exception_mask)); 27822 27823 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 27824 fenv_llu_and); 27825 27826 tree hold_mtfsf = build_call_expr (mtfsf, 2, 27827 build_int_cst (unsigned_type_node, 0xff), 27828 fenv_hold_mtfsf); 27829 27830 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); 27831 27832 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): 27833 27834 double fenv_clear = __builtin_mffs (); 27835 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; 27836 __builtin_mtfsf (0xff, fenv_clear); */ 27837 27838 /* Mask to clear everything except for the rounding modes and non-IEEE 27839 arithmetic flag. */ 27840 const unsigned HOST_WIDE_INT clear_exception_mask 27841 = HOST_WIDE_INT_C (0xffffffff00000000); 27842 27843 tree fenv_clear = create_tmp_var_raw (double_type_node); 27844 27845 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear, 27846 call_mffs, NULL_TREE, NULL_TREE); 27847 27848 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear); 27849 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, 27850 fenv_clean_llu, 27851 build_int_cst (uint64_type_node, 27852 clear_exception_mask)); 27853 27854 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 27855 fenv_clear_llu_and); 27856 27857 tree clear_mtfsf = build_call_expr (mtfsf, 2, 27858 build_int_cst (unsigned_type_node, 0xff), 27859 fenv_clear_mtfsf); 27860 27861 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); 27862 27863 /* Generates the equivalent of feupdateenv (&fenv_var) 27864 27865 double old_fenv = __builtin_mffs (); 27866 double fenv_update; 27867 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | 27868 (*(uint64_t*)fenv_var 0x1ff80fff); 27869 __builtin_mtfsf (0xff, fenv_update); */ 27870 27871 const unsigned HOST_WIDE_INT update_exception_mask 27872 = HOST_WIDE_INT_C (0xffffffff1fffff00); 27873 const unsigned HOST_WIDE_INT new_exception_mask 27874 = HOST_WIDE_INT_C (0x1ff80fff); 27875 27876 tree old_fenv = create_tmp_var_raw (double_type_node); 27877 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv, 27878 call_mffs, NULL_TREE, NULL_TREE); 27879 27880 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv); 27881 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu, 27882 build_int_cst (uint64_type_node, 27883 update_exception_mask)); 27884 27885 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, 27886 build_int_cst (uint64_type_node, 27887 new_exception_mask)); 27888 27889 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, 27890 old_llu_and, new_llu_and); 27891 27892 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 27893 new_llu_mask); 27894 27895 tree update_mtfsf = build_call_expr (mtfsf, 2, 27896 build_int_cst (unsigned_type_node, 0xff), 27897 fenv_update_mtfsf); 27898 27899 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); 27900} 27901 27902void 27903rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2) 27904{ 27905 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 27906 27907 rtx_tmp0 = gen_reg_rtx (V2DFmode); 27908 rtx_tmp1 = gen_reg_rtx (V2DFmode); 27909 27910 /* The destination of the vmrgew instruction layout is: 27911 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. 27912 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the 27913 vmrgew instruction will be correct. */ 27914 if (BYTES_BIG_ENDIAN) 27915 { 27916 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2, 27917 GEN_INT (0))); 27918 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2, 27919 GEN_INT (3))); 27920 } 27921 else 27922 { 27923 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3))); 27924 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0))); 27925 } 27926 27927 rtx_tmp2 = gen_reg_rtx (V4SFmode); 27928 rtx_tmp3 = gen_reg_rtx (V4SFmode); 27929 27930 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0)); 27931 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1)); 27932 27933 if (BYTES_BIG_ENDIAN) 27934 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); 27935 else 27936 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); 27937} 27938 27939void 27940rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2) 27941{ 27942 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 27943 27944 rtx_tmp0 = gen_reg_rtx (V2DImode); 27945 rtx_tmp1 = gen_reg_rtx (V2DImode); 27946 27947 /* The destination of the vmrgew instruction layout is: 27948 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. 27949 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the 27950 vmrgew instruction will be correct. */ 27951 if (BYTES_BIG_ENDIAN) 27952 { 27953 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0))); 27954 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3))); 27955 } 27956 else 27957 { 27958 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3))); 27959 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0))); 27960 } 27961 27962 rtx_tmp2 = gen_reg_rtx (V4SFmode); 27963 rtx_tmp3 = gen_reg_rtx (V4SFmode); 27964 27965 if (signed_convert) 27966 { 27967 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0)); 27968 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1)); 27969 } 27970 else 27971 { 27972 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0)); 27973 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1)); 27974 } 27975 27976 if (BYTES_BIG_ENDIAN) 27977 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); 27978 else 27979 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); 27980} 27981 27982void 27983rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1, 27984 rtx src2) 27985{ 27986 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 27987 27988 rtx_tmp0 = gen_reg_rtx (V2DFmode); 27989 rtx_tmp1 = gen_reg_rtx (V2DFmode); 27990 27991 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0))); 27992 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3))); 27993 27994 rtx_tmp2 = gen_reg_rtx (V4SImode); 27995 rtx_tmp3 = gen_reg_rtx (V4SImode); 27996 27997 if (signed_convert) 27998 { 27999 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0)); 28000 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1)); 28001 } 28002 else 28003 { 28004 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0)); 28005 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1)); 28006 } 28007 28008 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3)); 28009} 28010 28011/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ 28012 28013static bool 28014rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode, 28015 optimization_type opt_type) 28016{ 28017 switch (op) 28018 { 28019 case rsqrt_optab: 28020 return (opt_type == OPTIMIZE_FOR_SPEED 28021 && RS6000_RECIP_AUTO_RSQRTE_P (mode1)); 28022 28023 default: 28024 return true; 28025 } 28026} 28027 28028/* Implement TARGET_CONSTANT_ALIGNMENT. */ 28029 28030static HOST_WIDE_INT 28031rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align) 28032{ 28033 if (TREE_CODE (exp) == STRING_CST 28034 && (STRICT_ALIGNMENT || !optimize_size)) 28035 return MAX (align, BITS_PER_WORD); 28036 return align; 28037} 28038 28039/* Implement TARGET_STARTING_FRAME_OFFSET. */ 28040 28041static HOST_WIDE_INT 28042rs6000_starting_frame_offset (void) 28043{ 28044 if (FRAME_GROWS_DOWNWARD) 28045 return 0; 28046 return RS6000_STARTING_FRAME_OFFSET; 28047} 28048 28049/* Internal function to return the built-in function id for the complex 28050 multiply operation for a given mode. */ 28051 28052static inline built_in_function 28053complex_multiply_builtin_code (machine_mode mode) 28054{ 28055 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT)); 28056 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT; 28057 return (built_in_function) func; 28058} 28059 28060/* Internal function to return the built-in function id for the complex divide 28061 operation for a given mode. */ 28062 28063static inline built_in_function 28064complex_divide_builtin_code (machine_mode mode) 28065{ 28066 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT)); 28067 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT; 28068 return (built_in_function) func; 28069} 28070 28071/* On 64-bit Linux and Freebsd systems, possibly switch the long double library 28072 function names from <foo>l to <foo>f128 if the default long double type is 28073 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h 28074 include file switches the names on systems that support long double as IEEE 28075 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly. 28076 In the future, glibc will export names like __ieee128_sinf128 and we can 28077 switch to using those instead of using sinf128, which pollutes the user's 28078 namespace. 28079 28080 This will switch the names for Fortran math functions as well (which doesn't 28081 use math.h). However, Fortran needs other changes to the compiler and 28082 library before you can switch the real*16 type at compile time. 28083 28084 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We 28085 only do this transformation if the __float128 type is enabled. This 28086 prevents us from doing the transformation on older 32-bit ports that might 28087 have enabled using IEEE 128-bit floating point as the default long double 28088 type. 28089 28090 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the 28091 function names used for complex multiply and divide to the appropriate 28092 names. */ 28093 28094static tree 28095rs6000_mangle_decl_assembler_name (tree decl, tree id) 28096{ 28097 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or 28098 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */ 28099 if (TARGET_FLOAT128_TYPE 28100 && TREE_CODE (decl) == FUNCTION_DECL 28101 && DECL_IS_UNDECLARED_BUILTIN (decl) 28102 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) 28103 { 28104 built_in_function id = DECL_FUNCTION_CODE (decl); 28105 const char *newname = NULL; 28106 28107 if (id == complex_multiply_builtin_code (KCmode)) 28108 newname = "__mulkc3"; 28109 28110 else if (id == complex_multiply_builtin_code (ICmode)) 28111 newname = "__multc3"; 28112 28113 else if (id == complex_multiply_builtin_code (TCmode)) 28114 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3"; 28115 28116 else if (id == complex_divide_builtin_code (KCmode)) 28117 newname = "__divkc3"; 28118 28119 else if (id == complex_divide_builtin_code (ICmode)) 28120 newname = "__divtc3"; 28121 28122 else if (id == complex_divide_builtin_code (TCmode)) 28123 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3"; 28124 28125 if (newname) 28126 { 28127 if (TARGET_DEBUG_BUILTIN) 28128 fprintf (stderr, "Map complex mul/div => %s\n", newname); 28129 28130 return get_identifier (newname); 28131 } 28132 } 28133 28134 /* Map long double built-in functions if long double is IEEE 128-bit. */ 28135 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 28136 && TREE_CODE (decl) == FUNCTION_DECL 28137 && DECL_IS_UNDECLARED_BUILTIN (decl) 28138 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL) 28139 { 28140 size_t len = IDENTIFIER_LENGTH (id); 28141 const char *name = IDENTIFIER_POINTER (id); 28142 char *newname = NULL; 28143 28144 /* See if it is one of the built-in functions with an unusual name. */ 28145 switch (DECL_FUNCTION_CODE (decl)) 28146 { 28147 case BUILT_IN_DREML: 28148 newname = xstrdup ("__remainderieee128"); 28149 break; 28150 28151 case BUILT_IN_GAMMAL: 28152 newname = xstrdup ("__lgammaieee128"); 28153 break; 28154 28155 case BUILT_IN_GAMMAL_R: 28156 case BUILT_IN_LGAMMAL_R: 28157 newname = xstrdup ("__lgammaieee128_r"); 28158 break; 28159 28160 case BUILT_IN_NEXTTOWARD: 28161 newname = xstrdup ("__nexttoward_to_ieee128"); 28162 break; 28163 28164 case BUILT_IN_NEXTTOWARDF: 28165 newname = xstrdup ("__nexttowardf_to_ieee128"); 28166 break; 28167 28168 case BUILT_IN_NEXTTOWARDL: 28169 newname = xstrdup ("__nexttowardieee128"); 28170 break; 28171 28172 case BUILT_IN_POW10L: 28173 newname = xstrdup ("__exp10ieee128"); 28174 break; 28175 28176 case BUILT_IN_SCALBL: 28177 newname = xstrdup ("__scalbieee128"); 28178 break; 28179 28180 case BUILT_IN_SIGNIFICANDL: 28181 newname = xstrdup ("__significandieee128"); 28182 break; 28183 28184 case BUILT_IN_SINCOSL: 28185 newname = xstrdup ("__sincosieee128"); 28186 break; 28187 28188 default: 28189 break; 28190 } 28191 28192 /* Update the __builtin_*printf and __builtin_*scanf functions. */ 28193 if (!newname) 28194 { 28195 size_t printf_len = strlen ("printf"); 28196 size_t scanf_len = strlen ("scanf"); 28197 size_t printf_chk_len = strlen ("printf_chk"); 28198 28199 if (len >= printf_len 28200 && strcmp (name + len - printf_len, "printf") == 0) 28201 newname = xasprintf ("__%sieee128", name); 28202 28203 else if (len >= scanf_len 28204 && strcmp (name + len - scanf_len, "scanf") == 0) 28205 newname = xasprintf ("__isoc99_%sieee128", name); 28206 28207 else if (len >= printf_chk_len 28208 && strcmp (name + len - printf_chk_len, "printf_chk") == 0) 28209 newname = xasprintf ("%sieee128", name); 28210 28211 else if (name[len - 1] == 'l') 28212 { 28213 bool uses_ieee128_p = false; 28214 tree type = TREE_TYPE (decl); 28215 machine_mode ret_mode = TYPE_MODE (type); 28216 28217 /* See if the function returns a IEEE 128-bit floating point type or 28218 complex type. */ 28219 if (ret_mode == TFmode || ret_mode == TCmode) 28220 uses_ieee128_p = true; 28221 else 28222 { 28223 function_args_iterator args_iter; 28224 tree arg; 28225 28226 /* See if the function passes a IEEE 128-bit floating point type 28227 or complex type. */ 28228 FOREACH_FUNCTION_ARGS (type, arg, args_iter) 28229 { 28230 machine_mode arg_mode = TYPE_MODE (arg); 28231 if (arg_mode == TFmode || arg_mode == TCmode) 28232 { 28233 uses_ieee128_p = true; 28234 break; 28235 } 28236 } 28237 } 28238 28239 /* If we passed or returned an IEEE 128-bit floating point type, 28240 change the name. Use __<name>ieee128, instead of <name>l. */ 28241 if (uses_ieee128_p) 28242 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name); 28243 } 28244 } 28245 28246 if (newname) 28247 { 28248 if (TARGET_DEBUG_BUILTIN) 28249 fprintf (stderr, "Map %s => %s\n", name, newname); 28250 28251 id = get_identifier (newname); 28252 free (newname); 28253 } 28254 } 28255 28256 return id; 28257} 28258 28259/* Predict whether the given loop in gimple will be transformed in the RTL 28260 doloop_optimize pass. */ 28261 28262static bool 28263rs6000_predict_doloop_p (struct loop *loop) 28264{ 28265 gcc_assert (loop); 28266 28267 /* On rs6000, targetm.can_use_doloop_p is actually 28268 can_use_doloop_if_innermost. Just ensure the loop is innermost. */ 28269 if (loop->inner != NULL) 28270 { 28271 if (dump_file && (dump_flags & TDF_DETAILS)) 28272 fprintf (dump_file, "Predict doloop failure due to" 28273 " loop nesting.\n"); 28274 return false; 28275 } 28276 28277 return true; 28278} 28279 28280/* Implement TARGET_PREFERRED_DOLOOP_MODE. */ 28281 28282static machine_mode 28283rs6000_preferred_doloop_mode (machine_mode) 28284{ 28285 return word_mode; 28286} 28287 28288/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */ 28289 28290static bool 28291rs6000_cannot_substitute_mem_equiv_p (rtx mem) 28292{ 28293 gcc_assert (MEM_P (mem)); 28294 28295 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND: 28296 type addresses, so don't allow MEMs with those address types to be 28297 substituted as an equivalent expression. See PR93974 for details. */ 28298 if (GET_CODE (XEXP (mem, 0)) == AND) 28299 return true; 28300 28301 return false; 28302} 28303 28304/* Implement TARGET_INVALID_CONVERSION. */ 28305 28306static const char * 28307rs6000_invalid_conversion (const_tree fromtype, const_tree totype) 28308{ 28309 /* Make sure we're working with the canonical types. */ 28310 if (TYPE_CANONICAL (fromtype) != NULL_TREE) 28311 fromtype = TYPE_CANONICAL (fromtype); 28312 if (TYPE_CANONICAL (totype) != NULL_TREE) 28313 totype = TYPE_CANONICAL (totype); 28314 28315 machine_mode frommode = TYPE_MODE (fromtype); 28316 machine_mode tomode = TYPE_MODE (totype); 28317 28318 if (frommode != tomode) 28319 { 28320 /* Do not allow conversions to/from XOmode and OOmode types. */ 28321 if (frommode == XOmode) 28322 return N_("invalid conversion from type %<__vector_quad%>"); 28323 if (tomode == XOmode) 28324 return N_("invalid conversion to type %<__vector_quad%>"); 28325 if (frommode == OOmode) 28326 return N_("invalid conversion from type %<__vector_pair%>"); 28327 if (tomode == OOmode) 28328 return N_("invalid conversion to type %<__vector_pair%>"); 28329 } 28330 28331 /* Conversion allowed. */ 28332 return NULL; 28333} 28334 28335/* Convert a SFmode constant to the integer bit pattern. */ 28336 28337long 28338rs6000_const_f32_to_i32 (rtx operand) 28339{ 28340 long value; 28341 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand); 28342 28343 gcc_assert (GET_MODE (operand) == SFmode); 28344 REAL_VALUE_TO_TARGET_SINGLE (*rv, value); 28345 return value; 28346} 28347 28348void 28349rs6000_emit_xxspltidp_v2df (rtx dst, long value) 28350{ 28351 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0)) 28352 inform (input_location, 28353 "the result for the xxspltidp instruction " 28354 "is undefined for subnormal input values"); 28355 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value))); 28356} 28357 28358/* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */ 28359 28360static bool 28361rs6000_gen_pic_addr_diff_vec (void) 28362{ 28363 return rs6000_relative_jumptables; 28364} 28365 28366void 28367rs6000_output_addr_vec_elt (FILE *file, int value) 28368{ 28369 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"; 28370 char buf[100]; 28371 28372 fprintf (file, "%s", directive); 28373 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value); 28374 assemble_name (file, buf); 28375 fprintf (file, "\n"); 28376} 28377 28378 28379/* Copy an integer constant to the vector constant structure. */ 28380 28381static void 28382constant_int_to_128bit_vector (rtx op, 28383 machine_mode mode, 28384 size_t byte_num, 28385 vec_const_128bit_type *info) 28386{ 28387 unsigned HOST_WIDE_INT uvalue = UINTVAL (op); 28388 unsigned bitsize = GET_MODE_BITSIZE (mode); 28389 28390 for (int shift = bitsize - 8; shift >= 0; shift -= 8) 28391 info->bytes[byte_num++] = (uvalue >> shift) & 0xff; 28392} 28393 28394/* Copy a floating point constant to the vector constant structure. */ 28395 28396static void 28397constant_fp_to_128bit_vector (rtx op, 28398 machine_mode mode, 28399 size_t byte_num, 28400 vec_const_128bit_type *info) 28401{ 28402 unsigned bitsize = GET_MODE_BITSIZE (mode); 28403 unsigned num_words = bitsize / 32; 28404 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op); 28405 long real_words[VECTOR_128BIT_WORDS]; 28406 28407 /* Make sure we don't overflow the real_words array and that it is 28408 filled completely. */ 28409 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); 28410 28411 real_to_target (real_words, rtype, mode); 28412 28413 /* Iterate over each 32-bit word in the floating point constant. The 28414 real_to_target function puts out words in target endian fashion. We need 28415 to arrange the order so that the bytes are written in big endian order. */ 28416 for (unsigned num = 0; num < num_words; num++) 28417 { 28418 unsigned endian_num = (BYTES_BIG_ENDIAN 28419 ? num 28420 : num_words - 1 - num); 28421 28422 unsigned uvalue = real_words[endian_num]; 28423 for (int shift = 32 - 8; shift >= 0; shift -= 8) 28424 info->bytes[byte_num++] = (uvalue >> shift) & 0xff; 28425 } 28426 28427 /* Mark that this constant involves floating point. */ 28428 info->fp_constant_p = true; 28429} 28430 28431/* Convert a vector constant OP with mode MODE to a vector 128-bit constant 28432 structure INFO. 28433 28434 Break out the constant out to bytes, half words, words, and double words. 28435 Return true if we have successfully converted the constant. 28436 28437 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of 28438 constants. Integer and floating point scalar constants are splatted to fill 28439 out the vector. */ 28440 28441bool 28442vec_const_128bit_to_bytes (rtx op, 28443 machine_mode mode, 28444 vec_const_128bit_type *info) 28445{ 28446 /* Initialize the constant structure. */ 28447 memset ((void *)info, 0, sizeof (vec_const_128bit_type)); 28448 28449 /* Assume CONST_INTs are DImode. */ 28450 if (mode == VOIDmode) 28451 mode = CONST_INT_P (op) ? DImode : GET_MODE (op); 28452 28453 if (mode == VOIDmode) 28454 return false; 28455 28456 unsigned size = GET_MODE_SIZE (mode); 28457 bool splat_p = false; 28458 28459 if (size > VECTOR_128BIT_BYTES) 28460 return false; 28461 28462 /* Set up the bits. */ 28463 switch (GET_CODE (op)) 28464 { 28465 /* Integer constants, default to double word. */ 28466 case CONST_INT: 28467 { 28468 constant_int_to_128bit_vector (op, mode, 0, info); 28469 splat_p = true; 28470 break; 28471 } 28472 28473 /* Floating point constants. */ 28474 case CONST_DOUBLE: 28475 { 28476 /* Fail if the floating point constant is the wrong mode. */ 28477 if (GET_MODE (op) != mode) 28478 return false; 28479 28480 /* SFmode stored as scalars are stored in DFmode format. */ 28481 if (mode == SFmode) 28482 { 28483 mode = DFmode; 28484 size = GET_MODE_SIZE (DFmode); 28485 } 28486 28487 constant_fp_to_128bit_vector (op, mode, 0, info); 28488 splat_p = true; 28489 break; 28490 } 28491 28492 /* Vector constants, iterate over each element. On little endian 28493 systems, we have to reverse the element numbers. */ 28494 case CONST_VECTOR: 28495 { 28496 /* Fail if the vector constant is the wrong mode or size. */ 28497 if (GET_MODE (op) != mode 28498 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES) 28499 return false; 28500 28501 machine_mode ele_mode = GET_MODE_INNER (mode); 28502 size_t ele_size = GET_MODE_SIZE (ele_mode); 28503 size_t nunits = GET_MODE_NUNITS (mode); 28504 28505 for (size_t num = 0; num < nunits; num++) 28506 { 28507 rtx ele = CONST_VECTOR_ELT (op, num); 28508 size_t byte_num = (BYTES_BIG_ENDIAN 28509 ? num 28510 : nunits - 1 - num) * ele_size; 28511 28512 if (CONST_INT_P (ele)) 28513 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info); 28514 else if (CONST_DOUBLE_P (ele)) 28515 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info); 28516 else 28517 return false; 28518 } 28519 28520 break; 28521 } 28522 28523 /* Treat VEC_DUPLICATE of a constant just like a vector constant. 28524 Since we are duplicating the element, we don't have to worry about 28525 endian issues. */ 28526 case VEC_DUPLICATE: 28527 { 28528 /* Fail if the vector duplicate is the wrong mode or size. */ 28529 if (GET_MODE (op) != mode 28530 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES) 28531 return false; 28532 28533 machine_mode ele_mode = GET_MODE_INNER (mode); 28534 size_t ele_size = GET_MODE_SIZE (ele_mode); 28535 rtx ele = XEXP (op, 0); 28536 size_t nunits = GET_MODE_NUNITS (mode); 28537 28538 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele)) 28539 return false; 28540 28541 for (size_t num = 0; num < nunits; num++) 28542 { 28543 size_t byte_num = num * ele_size; 28544 28545 if (CONST_INT_P (ele)) 28546 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info); 28547 else 28548 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info); 28549 } 28550 28551 break; 28552 } 28553 28554 /* Any thing else, just return failure. */ 28555 default: 28556 return false; 28557 } 28558 28559 /* Splat the constant to fill 128 bits if desired. */ 28560 if (splat_p && size < VECTOR_128BIT_BYTES) 28561 { 28562 if ((VECTOR_128BIT_BYTES % size) != 0) 28563 return false; 28564 28565 for (size_t offset = size; 28566 offset < VECTOR_128BIT_BYTES; 28567 offset += size) 28568 memcpy ((void *) &info->bytes[offset], 28569 (void *) &info->bytes[0], 28570 size); 28571 } 28572 28573 /* Remember original size. */ 28574 info->original_size = size; 28575 28576 /* Determine if the bytes are all the same. */ 28577 unsigned char first_byte = info->bytes[0]; 28578 info->all_bytes_same = true; 28579 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++) 28580 if (first_byte != info->bytes[i]) 28581 { 28582 info->all_bytes_same = false; 28583 break; 28584 } 28585 28586 /* Pack half words together & determine if all of the half words are the 28587 same. */ 28588 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++) 28589 info->half_words[i] = ((info->bytes[i * 2] << 8) 28590 | info->bytes[(i * 2) + 1]); 28591 28592 unsigned short first_hword = info->half_words[0]; 28593 info->all_half_words_same = true; 28594 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++) 28595 if (first_hword != info->half_words[i]) 28596 { 28597 info->all_half_words_same = false; 28598 break; 28599 } 28600 28601 /* Pack words together & determine if all of the words are the same. */ 28602 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++) 28603 info->words[i] = ((info->bytes[i * 4] << 24) 28604 | (info->bytes[(i * 4) + 1] << 16) 28605 | (info->bytes[(i * 4) + 2] << 8) 28606 | info->bytes[(i * 4) + 3]); 28607 28608 info->all_words_same 28609 = (info->words[0] == info->words[1] 28610 && info->words[0] == info->words[1] 28611 && info->words[0] == info->words[2] 28612 && info->words[0] == info->words[3]); 28613 28614 /* Pack double words together & determine if all of the double words are the 28615 same. */ 28616 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++) 28617 { 28618 unsigned HOST_WIDE_INT d_word = 0; 28619 for (size_t j = 0; j < 8; j++) 28620 d_word = (d_word << 8) | info->bytes[(i * 8) + j]; 28621 28622 info->double_words[i] = d_word; 28623 } 28624 28625 info->all_double_words_same 28626 = (info->double_words[0] == info->double_words[1]); 28627 28628 return true; 28629} 28630 28631/* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero 28632 if the LXVKQ instruction cannot be used. Otherwise return the immediate 28633 value to be used with the LXVKQ instruction. */ 28634 28635unsigned 28636constant_generates_lxvkq (vec_const_128bit_type *vsx_const) 28637{ 28638 /* Is the instruction supported with power10 code generation, IEEE 128-bit 28639 floating point hardware and VSX registers are available. */ 28640 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10 28641 || !TARGET_VSX) 28642 return 0; 28643 28644 /* All of the constants that are generated by LXVKQ have the bottom 3 words 28645 that are 0. */ 28646 if (vsx_const->words[1] != 0 28647 || vsx_const->words[2] != 0 28648 || vsx_const->words[3] != 0) 28649 return 0; 28650 28651 /* See if we have a match for the first word. */ 28652 switch (vsx_const->words[0]) 28653 { 28654 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */ 28655 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */ 28656 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */ 28657 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */ 28658 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */ 28659 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */ 28660 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */ 28661 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */ 28662 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */ 28663 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */ 28664 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */ 28665 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */ 28666 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */ 28667 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */ 28668 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */ 28669 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */ 28670 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */ 28671 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */ 28672 28673 /* anything else cannot be loaded. */ 28674 default: 28675 break; 28676 } 28677 28678 return 0; 28679} 28680 28681/* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if 28682 the XXSPLTIW instruction cannot be used. Otherwise return the immediate 28683 value to be used with the XXSPLTIW instruction. */ 28684 28685unsigned 28686constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) 28687{ 28688 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) 28689 return 0; 28690 28691 if (!vsx_const->all_words_same) 28692 return 0; 28693 28694 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */ 28695 if (vsx_const->all_bytes_same) 28696 return 0; 28697 28698 /* See if we can use VSPLTISH or VSPLTISW. */ 28699 if (vsx_const->all_half_words_same) 28700 { 28701 unsigned short h_word = vsx_const->half_words[0]; 28702 short sign_h_word = ((h_word & 0xffff) ^ 0x8000) - 0x8000; 28703 if (EASY_VECTOR_15 (sign_h_word)) 28704 return 0; 28705 } 28706 28707 unsigned int word = vsx_const->words[0]; 28708 int sign_word = ((word & 0xffffffff) ^ 0x80000000) - 0x80000000; 28709 if (EASY_VECTOR_15 (sign_word)) 28710 return 0; 28711 28712 return vsx_const->words[0]; 28713} 28714 28715/* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if 28716 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate 28717 value to be used with the XXSPLTIDP instruction. */ 28718 28719unsigned 28720constant_generates_xxspltidp (vec_const_128bit_type *vsx_const) 28721{ 28722 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) 28723 return 0; 28724 28725 /* Reject if the two 64-bit segments are not the same. */ 28726 if (!vsx_const->all_double_words_same) 28727 return 0; 28728 28729 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP. 28730 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */ 28731 if (vsx_const->all_bytes_same 28732 || vsx_const->all_half_words_same 28733 || vsx_const->all_words_same) 28734 return 0; 28735 28736 unsigned HOST_WIDE_INT value = vsx_const->double_words[0]; 28737 28738 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit 28739 pattern and the signalling NaN bit pattern. Recognize infinity and 28740 negative infinity. */ 28741 28742 /* Bit representation of DFmode normal quiet NaN. */ 28743#define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000) 28744 28745 /* Bit representation of DFmode normal signaling NaN. */ 28746#define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000) 28747 28748 /* Bit representation of DFmode positive infinity. */ 28749#define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000) 28750 28751 /* Bit representation of DFmode negative infinity. */ 28752#define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000) 28753 28754 if (value != RS6000_CONST_DF_NAN 28755 && value != RS6000_CONST_DF_NANS 28756 && value != RS6000_CONST_DF_INF 28757 && value != RS6000_CONST_DF_NEG_INF) 28758 { 28759 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for 28760 the exponent, and 52 bits for the mantissa (not counting the hidden 28761 bit used for normal numbers). NaN values have the exponent set to all 28762 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */ 28763 28764 int df_exponent = (value >> 52) & 0x7ff; 28765 unsigned HOST_WIDE_INT 28766 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U); 28767 28768 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */ 28769 return 0; 28770 28771 /* Avoid values that are DFmode subnormal values. Subnormal numbers have 28772 the exponent all 0 bits, and the mantissa non-zero. If the value is 28773 subnormal, then the hidden bit in the mantissa is not set. */ 28774 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */ 28775 return 0; 28776 } 28777 28778 /* Change the representation to DFmode constant. */ 28779 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] }; 28780 28781 /* real_from_target takes the target words in target order. */ 28782 if (!BYTES_BIG_ENDIAN) 28783 std::swap (df_words[0], df_words[1]); 28784 28785 REAL_VALUE_TYPE rv_type; 28786 real_from_target (&rv_type, df_words, DFmode); 28787 28788 const REAL_VALUE_TYPE *rv = &rv_type; 28789 28790 /* Validate that the number can be stored as a SFmode value. */ 28791 if (!exact_real_truncate (SFmode, rv)) 28792 return 0; 28793 28794 /* Validate that the number is not a SFmode subnormal value (exponent is 0, 28795 mantissa field is non-zero) which is undefined for the XXSPLTIDP 28796 instruction. */ 28797 long sf_value; 28798 real_to_target (&sf_value, rv, SFmode); 28799 28800 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent, 28801 and 23 bits for the mantissa. Subnormal numbers have the exponent all 28802 0 bits, and the mantissa non-zero. */ 28803 long sf_exponent = (sf_value >> 23) & 0xFF; 28804 long sf_mantissa = sf_value & 0x7FFFFF; 28805 28806 if (sf_exponent == 0 && sf_mantissa != 0) 28807 return 0; 28808 28809 /* Return the immediate to be used. */ 28810 return sf_value; 28811} 28812 28813/* Now we have only two opaque types, they are __vector_quad and 28814 __vector_pair built-in types. They are target specific and 28815 only available when MMA is supported. With MMA supported, it 28816 simply returns true, otherwise it checks if the given gimple 28817 STMT is an assignment, asm or call stmt and uses either of 28818 these two opaque types unexpectedly, if yes, it would raise 28819 an error message and returns true, otherwise it returns false. */ 28820 28821bool 28822rs6000_opaque_type_invalid_use_p (gimple *stmt) 28823{ 28824 if (TARGET_MMA) 28825 return false; 28826 28827 /* If the given TYPE is one MMA opaque type, emit the corresponding 28828 error messages and return true, otherwise return false. */ 28829 auto check_and_error_invalid_use = [](tree type) 28830 { 28831 tree mv = TYPE_MAIN_VARIANT (type); 28832 if (mv == vector_quad_type_node) 28833 { 28834 error ("type %<__vector_quad%> requires the %qs option", "-mmma"); 28835 return true; 28836 } 28837 else if (mv == vector_pair_type_node) 28838 { 28839 error ("type %<__vector_pair%> requires the %qs option", "-mmma"); 28840 return true; 28841 } 28842 return false; 28843 }; 28844 28845 if (stmt) 28846 { 28847 /* The usage of MMA opaque types is very limited for now, 28848 to check with gassign, gasm and gcall is enough so far. */ 28849 if (gassign *ga = dyn_cast<gassign *> (stmt)) 28850 { 28851 tree lhs = gimple_assign_lhs (ga); 28852 tree type = TREE_TYPE (lhs); 28853 if (check_and_error_invalid_use (type)) 28854 return true; 28855 } 28856 else if (gasm *gs = dyn_cast<gasm *> (stmt)) 28857 { 28858 unsigned ninputs = gimple_asm_ninputs (gs); 28859 for (unsigned i = 0; i < ninputs; i++) 28860 { 28861 tree op = gimple_asm_input_op (gs, i); 28862 tree val = TREE_VALUE (op); 28863 tree type = TREE_TYPE (val); 28864 if (check_and_error_invalid_use (type)) 28865 return true; 28866 } 28867 unsigned noutputs = gimple_asm_noutputs (gs); 28868 for (unsigned i = 0; i < noutputs; i++) 28869 { 28870 tree op = gimple_asm_output_op (gs, i); 28871 tree val = TREE_VALUE (op); 28872 tree type = TREE_TYPE (val); 28873 if (check_and_error_invalid_use (type)) 28874 return true; 28875 } 28876 } 28877 else if (gcall *gc = dyn_cast<gcall *> (stmt)) 28878 { 28879 unsigned nargs = gimple_call_num_args (gc); 28880 for (unsigned i = 0; i < nargs; i++) 28881 { 28882 tree arg = gimple_call_arg (gc, i); 28883 tree type = TREE_TYPE (arg); 28884 if (check_and_error_invalid_use (type)) 28885 return true; 28886 } 28887 } 28888 } 28889 28890 return false; 28891} 28892 28893struct gcc_target targetm = TARGET_INITIALIZER; 28894 28895#include "gt-rs6000.h" 28896