1/* Subroutines used for code generation on IBM RS/6000. 2 Copyright (C) 1991-2020 Free Software Foundation, Inc. 3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21#define IN_TARGET_CODE 1 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "backend.h" 27#include "rtl.h" 28#include "tree.h" 29#include "memmodel.h" 30#include "gimple.h" 31#include "cfghooks.h" 32#include "cfgloop.h" 33#include "df.h" 34#include "tm_p.h" 35#include "stringpool.h" 36#include "expmed.h" 37#include "optabs.h" 38#include "regs.h" 39#include "ira.h" 40#include "recog.h" 41#include "cgraph.h" 42#include "diagnostic-core.h" 43#include "insn-attr.h" 44#include "flags.h" 45#include "alias.h" 46#include "fold-const.h" 47#include "attribs.h" 48#include "stor-layout.h" 49#include "calls.h" 50#include "print-tree.h" 51#include "varasm.h" 52#include "explow.h" 53#include "expr.h" 54#include "output.h" 55#include "common/common-target.h" 56#include "langhooks.h" 57#include "reload.h" 58#include "sched-int.h" 59#include "gimplify.h" 60#include "gimple-fold.h" 61#include "gimple-iterator.h" 62#include "gimple-ssa.h" 63#include "gimple-walk.h" 64#include "intl.h" 65#include "tm-constrs.h" 66#include "tree-vectorizer.h" 67#include "target-globals.h" 68#include "builtins.h" 69#include "tree-vector-builder.h" 70#include "context.h" 71#include "tree-pass.h" 72#include "except.h" 73#if TARGET_XCOFF 74#include "xcoffout.h" /* get declarations of xcoff_*_section_name */ 75#endif 76#include "case-cfn-macros.h" 77#include "ppc-auxv.h" 78#include "tree-ssa-propagate.h" 79#include "tree-vrp.h" 80#include "tree-ssanames.h" 81#include "rs6000-internal.h" 82#include "opts.h" 83 84/* This file should be included last. */ 85#include "target-def.h" 86 87 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server 88 systems will also set long double to be IEEE 128-bit. AIX and Darwin 89 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so 90 those systems will not pick up this default. This needs to be after all 91 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are 92 properly defined. */ 93#ifndef TARGET_IEEEQUAD_DEFAULT 94#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) && !defined(POWERPC_NETBSD) 95#define TARGET_IEEEQUAD_DEFAULT 1 96#else 97#define TARGET_IEEEQUAD_DEFAULT 0 98#endif 99#endif 100 101/* Don't enable PC-relative addressing if the target does not support it. */ 102#ifndef PCREL_SUPPORTED_BY_OS 103#define PCREL_SUPPORTED_BY_OS 0 104#endif 105 106/* Support targetm.vectorize.builtin_mask_for_load. */ 107tree altivec_builtin_mask_for_load; 108 109#ifdef USING_ELFOS_H 110/* Counter for labels which are to be placed in .fixup. */ 111int fixuplabelno = 0; 112#endif 113 114/* Whether to use variant of AIX ABI for PowerPC64 Linux. */ 115int dot_symbols; 116 117/* Specify the machine mode that pointers have. After generation of rtl, the 118 compiler makes no further distinction between pointers and any other objects 119 of this machine mode. */ 120scalar_int_mode rs6000_pmode; 121 122#if TARGET_ELF 123/* Note whether IEEE 128-bit floating point was passed or returned, either as 124 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit 125 floating point. We changed the default C++ mangling for these types and we 126 may want to generate a weak alias of the old mangling (U10__float128) to the 127 new mangling (u9__ieee128). */ 128bool rs6000_passes_ieee128 = false; 129#endif 130 131/* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the 132 name used in current releases (i.e. u9__ieee128). */ 133static bool ieee128_mangling_gcc_8_1; 134 135/* Width in bits of a pointer. */ 136unsigned rs6000_pointer_size; 137 138#ifdef HAVE_AS_GNU_ATTRIBUTE 139# ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 140# define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 141# endif 142/* Flag whether floating point values have been passed/returned. 143 Note that this doesn't say whether fprs are used, since the 144 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls 145 should be set for soft-float values passed in gprs and ieee128 146 values passed in vsx registers. */ 147bool rs6000_passes_float = false; 148bool rs6000_passes_long_double = false; 149/* Flag whether vector values have been passed/returned. */ 150bool rs6000_passes_vector = false; 151/* Flag whether small (<= 8 byte) structures have been returned. */ 152bool rs6000_returns_struct = false; 153#endif 154 155/* Value is TRUE if register/mode pair is acceptable. */ 156static bool rs6000_hard_regno_mode_ok_p 157 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; 158 159/* Maximum number of registers needed for a given register class and mode. */ 160unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; 161 162/* How many registers are needed for a given register and mode. */ 163unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; 164 165/* Map register number to register class. */ 166enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; 167 168static int dbg_cost_ctrl; 169 170/* Built in types. */ 171tree rs6000_builtin_types[RS6000_BTI_MAX]; 172tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; 173 174/* Flag to say the TOC is initialized */ 175int toc_initialized, need_toc_init; 176char toc_label_name[10]; 177 178/* Cached value of rs6000_variable_issue. This is cached in 179 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ 180static short cached_can_issue_more; 181 182static GTY(()) section *read_only_data_section; 183static GTY(()) section *private_data_section; 184static GTY(()) section *tls_data_section; 185static GTY(()) section *tls_private_data_section; 186static GTY(()) section *read_only_private_data_section; 187static GTY(()) section *sdata2_section; 188 189section *toc_section = 0; 190 191/* Describe the vector unit used for modes. */ 192enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; 193enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; 194 195/* Register classes for various constraints that are based on the target 196 switches. */ 197enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; 198 199/* Describe the alignment of a vector. */ 200int rs6000_vector_align[NUM_MACHINE_MODES]; 201 202/* Map selected modes to types for builtins. */ 203tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; 204 205/* What modes to automatically generate reciprocal divide estimate (fre) and 206 reciprocal sqrt (frsqrte) for. */ 207unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; 208 209/* Masks to determine which reciprocal esitmate instructions to generate 210 automatically. */ 211enum rs6000_recip_mask { 212 RECIP_SF_DIV = 0x001, /* Use divide estimate */ 213 RECIP_DF_DIV = 0x002, 214 RECIP_V4SF_DIV = 0x004, 215 RECIP_V2DF_DIV = 0x008, 216 217 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ 218 RECIP_DF_RSQRT = 0x020, 219 RECIP_V4SF_RSQRT = 0x040, 220 RECIP_V2DF_RSQRT = 0x080, 221 222 /* Various combination of flags for -mrecip=xxx. */ 223 RECIP_NONE = 0, 224 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV 225 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT 226 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), 227 228 RECIP_HIGH_PRECISION = RECIP_ALL, 229 230 /* On low precision machines like the power5, don't enable double precision 231 reciprocal square root estimate, since it isn't accurate enough. */ 232 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) 233}; 234 235/* -mrecip options. */ 236static struct 237{ 238 const char *string; /* option name */ 239 unsigned int mask; /* mask bits to set */ 240} recip_options[] = { 241 { "all", RECIP_ALL }, 242 { "none", RECIP_NONE }, 243 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV 244 | RECIP_V2DF_DIV) }, 245 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, 246 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, 247 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT 248 | RECIP_V2DF_RSQRT) }, 249 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, 250 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, 251}; 252 253/* On PowerPC, we have a limited number of target clones that we care about 254 which means we can use an array to hold the options, rather than having more 255 elaborate data structures to identify each possible variation. Order the 256 clones from the default to the highest ISA. */ 257enum { 258 CLONE_DEFAULT = 0, /* default clone. */ 259 CLONE_ISA_2_05, /* ISA 2.05 (power6). */ 260 CLONE_ISA_2_06, /* ISA 2.06 (power7). */ 261 CLONE_ISA_2_07, /* ISA 2.07 (power8). */ 262 CLONE_ISA_3_00, /* ISA 3.0 (power9). */ 263 CLONE_ISA_3_1, /* ISA 3.1 (power10). */ 264 CLONE_MAX 265}; 266 267/* Map compiler ISA bits into HWCAP names. */ 268struct clone_map { 269 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */ 270 const char *name; /* name to use in __builtin_cpu_supports. */ 271}; 272 273static const struct clone_map rs6000_clone_map[CLONE_MAX] = { 274 { 0, "" }, /* Default options. */ 275 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */ 276 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */ 277 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */ 278 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */ 279 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */ 280}; 281 282 283/* Newer LIBCs explicitly export this symbol to declare that they provide 284 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a 285 reference to this symbol whenever we expand a CPU builtin, so that 286 we never link against an old LIBC. */ 287const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; 288 289/* True if we have expanded a CPU builtin. */ 290bool cpu_builtin_p = false; 291 292/* Pointer to function (in rs6000-c.c) that can define or undefine target 293 macros that have changed. Languages that don't support the preprocessor 294 don't link in rs6000-c.c, so we can't call it directly. */ 295void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); 296 297/* Simplfy register classes into simpler classifications. We assume 298 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range 299 check for standard register classes (gpr/floating/altivec/vsx) and 300 floating/vector classes (float/altivec/vsx). */ 301 302enum rs6000_reg_type { 303 NO_REG_TYPE, 304 PSEUDO_REG_TYPE, 305 GPR_REG_TYPE, 306 VSX_REG_TYPE, 307 ALTIVEC_REG_TYPE, 308 FPR_REG_TYPE, 309 SPR_REG_TYPE, 310 CR_REG_TYPE 311}; 312 313/* Map register class to register type. */ 314static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; 315 316/* First/last register type for the 'normal' register types (i.e. general 317 purpose, floating point, altivec, and VSX registers). */ 318#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) 319 320#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) 321 322 323/* Register classes we care about in secondary reload or go if legitimate 324 address. We only need to worry about GPR, FPR, and Altivec registers here, 325 along an ANY field that is the OR of the 3 register classes. */ 326 327enum rs6000_reload_reg_type { 328 RELOAD_REG_GPR, /* General purpose registers. */ 329 RELOAD_REG_FPR, /* Traditional floating point regs. */ 330 RELOAD_REG_VMX, /* Altivec (VMX) registers. */ 331 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ 332 N_RELOAD_REG 333}; 334 335/* For setting up register classes, loop through the 3 register classes mapping 336 into real registers, and skip the ANY class, which is just an OR of the 337 bits. */ 338#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR 339#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX 340 341/* Map reload register type to a register in the register class. */ 342struct reload_reg_map_type { 343 const char *name; /* Register class name. */ 344 int reg; /* Register in the register class. */ 345}; 346 347static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { 348 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ 349 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ 350 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ 351 { "Any", -1 }, /* RELOAD_REG_ANY. */ 352}; 353 354/* Mask bits for each register class, indexed per mode. Historically the 355 compiler has been more restrictive which types can do PRE_MODIFY instead of 356 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ 357typedef unsigned char addr_mask_type; 358 359#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ 360#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ 361#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ 362#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ 363#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ 364#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ 365#define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ 366#define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ 367 368/* Register type masks based on the type, of valid addressing modes. */ 369struct rs6000_reg_addr { 370 enum insn_code reload_load; /* INSN to reload for loading. */ 371 enum insn_code reload_store; /* INSN to reload for storing. */ 372 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ 373 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ 374 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ 375 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ 376 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ 377}; 378 379static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; 380 381/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ 382static inline bool 383mode_supports_pre_incdec_p (machine_mode mode) 384{ 385 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) 386 != 0); 387} 388 389/* Helper function to say whether a mode supports PRE_MODIFY. */ 390static inline bool 391mode_supports_pre_modify_p (machine_mode mode) 392{ 393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) 394 != 0); 395} 396 397/* Return true if we have D-form addressing in altivec registers. */ 398static inline bool 399mode_supports_vmx_dform (machine_mode mode) 400{ 401 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); 402} 403 404/* Return true if we have D-form addressing in VSX registers. This addressing 405 is more limited than normal d-form addressing in that the offset must be 406 aligned on a 16-byte boundary. */ 407static inline bool 408mode_supports_dq_form (machine_mode mode) 409{ 410 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) 411 != 0); 412} 413 414/* Given that there exists at least one variable that is set (produced) 415 by OUT_INSN and read (consumed) by IN_INSN, return true iff 416 IN_INSN represents one or more memory store operations and none of 417 the variables set by OUT_INSN is used by IN_INSN as the address of a 418 store operation. If either IN_INSN or OUT_INSN does not represent 419 a "single" RTL SET expression (as loosely defined by the 420 implementation of the single_set function) or a PARALLEL with only 421 SETs, CLOBBERs, and USEs inside, this function returns false. 422 423 This rs6000-specific version of store_data_bypass_p checks for 424 certain conditions that result in assertion failures (and internal 425 compiler errors) in the generic store_data_bypass_p function and 426 returns false rather than calling store_data_bypass_p if one of the 427 problematic conditions is detected. */ 428 429int 430rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) 431{ 432 rtx out_set, in_set; 433 rtx out_pat, in_pat; 434 rtx out_exp, in_exp; 435 int i, j; 436 437 in_set = single_set (in_insn); 438 if (in_set) 439 { 440 if (MEM_P (SET_DEST (in_set))) 441 { 442 out_set = single_set (out_insn); 443 if (!out_set) 444 { 445 out_pat = PATTERN (out_insn); 446 if (GET_CODE (out_pat) == PARALLEL) 447 { 448 for (i = 0; i < XVECLEN (out_pat, 0); i++) 449 { 450 out_exp = XVECEXP (out_pat, 0, i); 451 if ((GET_CODE (out_exp) == CLOBBER) 452 || (GET_CODE (out_exp) == USE)) 453 continue; 454 else if (GET_CODE (out_exp) != SET) 455 return false; 456 } 457 } 458 } 459 } 460 } 461 else 462 { 463 in_pat = PATTERN (in_insn); 464 if (GET_CODE (in_pat) != PARALLEL) 465 return false; 466 467 for (i = 0; i < XVECLEN (in_pat, 0); i++) 468 { 469 in_exp = XVECEXP (in_pat, 0, i); 470 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) 471 continue; 472 else if (GET_CODE (in_exp) != SET) 473 return false; 474 475 if (MEM_P (SET_DEST (in_exp))) 476 { 477 out_set = single_set (out_insn); 478 if (!out_set) 479 { 480 out_pat = PATTERN (out_insn); 481 if (GET_CODE (out_pat) != PARALLEL) 482 return false; 483 for (j = 0; j < XVECLEN (out_pat, 0); j++) 484 { 485 out_exp = XVECEXP (out_pat, 0, j); 486 if ((GET_CODE (out_exp) == CLOBBER) 487 || (GET_CODE (out_exp) == USE)) 488 continue; 489 else if (GET_CODE (out_exp) != SET) 490 return false; 491 } 492 } 493 } 494 } 495 } 496 return store_data_bypass_p (out_insn, in_insn); 497} 498 499 500/* Processor costs (relative to an add) */ 501 502const struct processor_costs *rs6000_cost; 503 504/* Instruction size costs on 32bit processors. */ 505static const 506struct processor_costs size32_cost = { 507 COSTS_N_INSNS (1), /* mulsi */ 508 COSTS_N_INSNS (1), /* mulsi_const */ 509 COSTS_N_INSNS (1), /* mulsi_const9 */ 510 COSTS_N_INSNS (1), /* muldi */ 511 COSTS_N_INSNS (1), /* divsi */ 512 COSTS_N_INSNS (1), /* divdi */ 513 COSTS_N_INSNS (1), /* fp */ 514 COSTS_N_INSNS (1), /* dmul */ 515 COSTS_N_INSNS (1), /* sdiv */ 516 COSTS_N_INSNS (1), /* ddiv */ 517 32, /* cache line size */ 518 0, /* l1 cache */ 519 0, /* l2 cache */ 520 0, /* streams */ 521 0, /* SF->DF convert */ 522}; 523 524/* Instruction size costs on 64bit processors. */ 525static const 526struct processor_costs size64_cost = { 527 COSTS_N_INSNS (1), /* mulsi */ 528 COSTS_N_INSNS (1), /* mulsi_const */ 529 COSTS_N_INSNS (1), /* mulsi_const9 */ 530 COSTS_N_INSNS (1), /* muldi */ 531 COSTS_N_INSNS (1), /* divsi */ 532 COSTS_N_INSNS (1), /* divdi */ 533 COSTS_N_INSNS (1), /* fp */ 534 COSTS_N_INSNS (1), /* dmul */ 535 COSTS_N_INSNS (1), /* sdiv */ 536 COSTS_N_INSNS (1), /* ddiv */ 537 128, /* cache line size */ 538 0, /* l1 cache */ 539 0, /* l2 cache */ 540 0, /* streams */ 541 0, /* SF->DF convert */ 542}; 543 544/* Instruction costs on RS64A processors. */ 545static const 546struct processor_costs rs64a_cost = { 547 COSTS_N_INSNS (20), /* mulsi */ 548 COSTS_N_INSNS (12), /* mulsi_const */ 549 COSTS_N_INSNS (8), /* mulsi_const9 */ 550 COSTS_N_INSNS (34), /* muldi */ 551 COSTS_N_INSNS (65), /* divsi */ 552 COSTS_N_INSNS (67), /* divdi */ 553 COSTS_N_INSNS (4), /* fp */ 554 COSTS_N_INSNS (4), /* dmul */ 555 COSTS_N_INSNS (31), /* sdiv */ 556 COSTS_N_INSNS (31), /* ddiv */ 557 128, /* cache line size */ 558 128, /* l1 cache */ 559 2048, /* l2 cache */ 560 1, /* streams */ 561 0, /* SF->DF convert */ 562}; 563 564/* Instruction costs on MPCCORE processors. */ 565static const 566struct processor_costs mpccore_cost = { 567 COSTS_N_INSNS (2), /* mulsi */ 568 COSTS_N_INSNS (2), /* mulsi_const */ 569 COSTS_N_INSNS (2), /* mulsi_const9 */ 570 COSTS_N_INSNS (2), /* muldi */ 571 COSTS_N_INSNS (6), /* divsi */ 572 COSTS_N_INSNS (6), /* divdi */ 573 COSTS_N_INSNS (4), /* fp */ 574 COSTS_N_INSNS (5), /* dmul */ 575 COSTS_N_INSNS (10), /* sdiv */ 576 COSTS_N_INSNS (17), /* ddiv */ 577 32, /* cache line size */ 578 4, /* l1 cache */ 579 16, /* l2 cache */ 580 1, /* streams */ 581 0, /* SF->DF convert */ 582}; 583 584/* Instruction costs on PPC403 processors. */ 585static const 586struct processor_costs ppc403_cost = { 587 COSTS_N_INSNS (4), /* mulsi */ 588 COSTS_N_INSNS (4), /* mulsi_const */ 589 COSTS_N_INSNS (4), /* mulsi_const9 */ 590 COSTS_N_INSNS (4), /* muldi */ 591 COSTS_N_INSNS (33), /* divsi */ 592 COSTS_N_INSNS (33), /* divdi */ 593 COSTS_N_INSNS (11), /* fp */ 594 COSTS_N_INSNS (11), /* dmul */ 595 COSTS_N_INSNS (11), /* sdiv */ 596 COSTS_N_INSNS (11), /* ddiv */ 597 32, /* cache line size */ 598 4, /* l1 cache */ 599 16, /* l2 cache */ 600 1, /* streams */ 601 0, /* SF->DF convert */ 602}; 603 604/* Instruction costs on PPC405 processors. */ 605static const 606struct processor_costs ppc405_cost = { 607 COSTS_N_INSNS (5), /* mulsi */ 608 COSTS_N_INSNS (4), /* mulsi_const */ 609 COSTS_N_INSNS (3), /* mulsi_const9 */ 610 COSTS_N_INSNS (5), /* muldi */ 611 COSTS_N_INSNS (35), /* divsi */ 612 COSTS_N_INSNS (35), /* divdi */ 613 COSTS_N_INSNS (11), /* fp */ 614 COSTS_N_INSNS (11), /* dmul */ 615 COSTS_N_INSNS (11), /* sdiv */ 616 COSTS_N_INSNS (11), /* ddiv */ 617 32, /* cache line size */ 618 16, /* l1 cache */ 619 128, /* l2 cache */ 620 1, /* streams */ 621 0, /* SF->DF convert */ 622}; 623 624/* Instruction costs on PPC440 processors. */ 625static const 626struct processor_costs ppc440_cost = { 627 COSTS_N_INSNS (3), /* mulsi */ 628 COSTS_N_INSNS (2), /* mulsi_const */ 629 COSTS_N_INSNS (2), /* mulsi_const9 */ 630 COSTS_N_INSNS (3), /* muldi */ 631 COSTS_N_INSNS (34), /* divsi */ 632 COSTS_N_INSNS (34), /* divdi */ 633 COSTS_N_INSNS (5), /* fp */ 634 COSTS_N_INSNS (5), /* dmul */ 635 COSTS_N_INSNS (19), /* sdiv */ 636 COSTS_N_INSNS (33), /* ddiv */ 637 32, /* cache line size */ 638 32, /* l1 cache */ 639 256, /* l2 cache */ 640 1, /* streams */ 641 0, /* SF->DF convert */ 642}; 643 644/* Instruction costs on PPC476 processors. */ 645static const 646struct processor_costs ppc476_cost = { 647 COSTS_N_INSNS (4), /* mulsi */ 648 COSTS_N_INSNS (4), /* mulsi_const */ 649 COSTS_N_INSNS (4), /* mulsi_const9 */ 650 COSTS_N_INSNS (4), /* muldi */ 651 COSTS_N_INSNS (11), /* divsi */ 652 COSTS_N_INSNS (11), /* divdi */ 653 COSTS_N_INSNS (6), /* fp */ 654 COSTS_N_INSNS (6), /* dmul */ 655 COSTS_N_INSNS (19), /* sdiv */ 656 COSTS_N_INSNS (33), /* ddiv */ 657 32, /* l1 cache line size */ 658 32, /* l1 cache */ 659 512, /* l2 cache */ 660 1, /* streams */ 661 0, /* SF->DF convert */ 662}; 663 664/* Instruction costs on PPC601 processors. */ 665static const 666struct processor_costs ppc601_cost = { 667 COSTS_N_INSNS (5), /* mulsi */ 668 COSTS_N_INSNS (5), /* mulsi_const */ 669 COSTS_N_INSNS (5), /* mulsi_const9 */ 670 COSTS_N_INSNS (5), /* muldi */ 671 COSTS_N_INSNS (36), /* divsi */ 672 COSTS_N_INSNS (36), /* divdi */ 673 COSTS_N_INSNS (4), /* fp */ 674 COSTS_N_INSNS (5), /* dmul */ 675 COSTS_N_INSNS (17), /* sdiv */ 676 COSTS_N_INSNS (31), /* ddiv */ 677 32, /* cache line size */ 678 32, /* l1 cache */ 679 256, /* l2 cache */ 680 1, /* streams */ 681 0, /* SF->DF convert */ 682}; 683 684/* Instruction costs on PPC603 processors. */ 685static const 686struct processor_costs ppc603_cost = { 687 COSTS_N_INSNS (5), /* mulsi */ 688 COSTS_N_INSNS (3), /* mulsi_const */ 689 COSTS_N_INSNS (2), /* mulsi_const9 */ 690 COSTS_N_INSNS (5), /* muldi */ 691 COSTS_N_INSNS (37), /* divsi */ 692 COSTS_N_INSNS (37), /* divdi */ 693 COSTS_N_INSNS (3), /* fp */ 694 COSTS_N_INSNS (4), /* dmul */ 695 COSTS_N_INSNS (18), /* sdiv */ 696 COSTS_N_INSNS (33), /* ddiv */ 697 32, /* cache line size */ 698 8, /* l1 cache */ 699 64, /* l2 cache */ 700 1, /* streams */ 701 0, /* SF->DF convert */ 702}; 703 704/* Instruction costs on PPC604 processors. */ 705static const 706struct processor_costs ppc604_cost = { 707 COSTS_N_INSNS (4), /* mulsi */ 708 COSTS_N_INSNS (4), /* mulsi_const */ 709 COSTS_N_INSNS (4), /* mulsi_const9 */ 710 COSTS_N_INSNS (4), /* muldi */ 711 COSTS_N_INSNS (20), /* divsi */ 712 COSTS_N_INSNS (20), /* divdi */ 713 COSTS_N_INSNS (3), /* fp */ 714 COSTS_N_INSNS (3), /* dmul */ 715 COSTS_N_INSNS (18), /* sdiv */ 716 COSTS_N_INSNS (32), /* ddiv */ 717 32, /* cache line size */ 718 16, /* l1 cache */ 719 512, /* l2 cache */ 720 1, /* streams */ 721 0, /* SF->DF convert */ 722}; 723 724/* Instruction costs on PPC604e processors. */ 725static const 726struct processor_costs ppc604e_cost = { 727 COSTS_N_INSNS (2), /* mulsi */ 728 COSTS_N_INSNS (2), /* mulsi_const */ 729 COSTS_N_INSNS (2), /* mulsi_const9 */ 730 COSTS_N_INSNS (2), /* muldi */ 731 COSTS_N_INSNS (20), /* divsi */ 732 COSTS_N_INSNS (20), /* divdi */ 733 COSTS_N_INSNS (3), /* fp */ 734 COSTS_N_INSNS (3), /* dmul */ 735 COSTS_N_INSNS (18), /* sdiv */ 736 COSTS_N_INSNS (32), /* ddiv */ 737 32, /* cache line size */ 738 32, /* l1 cache */ 739 1024, /* l2 cache */ 740 1, /* streams */ 741 0, /* SF->DF convert */ 742}; 743 744/* Instruction costs on PPC620 processors. */ 745static const 746struct processor_costs ppc620_cost = { 747 COSTS_N_INSNS (5), /* mulsi */ 748 COSTS_N_INSNS (4), /* mulsi_const */ 749 COSTS_N_INSNS (3), /* mulsi_const9 */ 750 COSTS_N_INSNS (7), /* muldi */ 751 COSTS_N_INSNS (21), /* divsi */ 752 COSTS_N_INSNS (37), /* divdi */ 753 COSTS_N_INSNS (3), /* fp */ 754 COSTS_N_INSNS (3), /* dmul */ 755 COSTS_N_INSNS (18), /* sdiv */ 756 COSTS_N_INSNS (32), /* ddiv */ 757 128, /* cache line size */ 758 32, /* l1 cache */ 759 1024, /* l2 cache */ 760 1, /* streams */ 761 0, /* SF->DF convert */ 762}; 763 764/* Instruction costs on PPC630 processors. */ 765static const 766struct processor_costs ppc630_cost = { 767 COSTS_N_INSNS (5), /* mulsi */ 768 COSTS_N_INSNS (4), /* mulsi_const */ 769 COSTS_N_INSNS (3), /* mulsi_const9 */ 770 COSTS_N_INSNS (7), /* muldi */ 771 COSTS_N_INSNS (21), /* divsi */ 772 COSTS_N_INSNS (37), /* divdi */ 773 COSTS_N_INSNS (3), /* fp */ 774 COSTS_N_INSNS (3), /* dmul */ 775 COSTS_N_INSNS (17), /* sdiv */ 776 COSTS_N_INSNS (21), /* ddiv */ 777 128, /* cache line size */ 778 64, /* l1 cache */ 779 1024, /* l2 cache */ 780 1, /* streams */ 781 0, /* SF->DF convert */ 782}; 783 784/* Instruction costs on Cell processor. */ 785/* COSTS_N_INSNS (1) ~ one add. */ 786static const 787struct processor_costs ppccell_cost = { 788 COSTS_N_INSNS (9/2)+2, /* mulsi */ 789 COSTS_N_INSNS (6/2), /* mulsi_const */ 790 COSTS_N_INSNS (6/2), /* mulsi_const9 */ 791 COSTS_N_INSNS (15/2)+2, /* muldi */ 792 COSTS_N_INSNS (38/2), /* divsi */ 793 COSTS_N_INSNS (70/2), /* divdi */ 794 COSTS_N_INSNS (10/2), /* fp */ 795 COSTS_N_INSNS (10/2), /* dmul */ 796 COSTS_N_INSNS (74/2), /* sdiv */ 797 COSTS_N_INSNS (74/2), /* ddiv */ 798 128, /* cache line size */ 799 32, /* l1 cache */ 800 512, /* l2 cache */ 801 6, /* streams */ 802 0, /* SF->DF convert */ 803}; 804 805/* Instruction costs on PPC750 and PPC7400 processors. */ 806static const 807struct processor_costs ppc750_cost = { 808 COSTS_N_INSNS (5), /* mulsi */ 809 COSTS_N_INSNS (3), /* mulsi_const */ 810 COSTS_N_INSNS (2), /* mulsi_const9 */ 811 COSTS_N_INSNS (5), /* muldi */ 812 COSTS_N_INSNS (17), /* divsi */ 813 COSTS_N_INSNS (17), /* divdi */ 814 COSTS_N_INSNS (3), /* fp */ 815 COSTS_N_INSNS (3), /* dmul */ 816 COSTS_N_INSNS (17), /* sdiv */ 817 COSTS_N_INSNS (31), /* ddiv */ 818 32, /* cache line size */ 819 32, /* l1 cache */ 820 512, /* l2 cache */ 821 1, /* streams */ 822 0, /* SF->DF convert */ 823}; 824 825/* Instruction costs on PPC7450 processors. */ 826static const 827struct processor_costs ppc7450_cost = { 828 COSTS_N_INSNS (4), /* mulsi */ 829 COSTS_N_INSNS (3), /* mulsi_const */ 830 COSTS_N_INSNS (3), /* mulsi_const9 */ 831 COSTS_N_INSNS (4), /* muldi */ 832 COSTS_N_INSNS (23), /* divsi */ 833 COSTS_N_INSNS (23), /* divdi */ 834 COSTS_N_INSNS (5), /* fp */ 835 COSTS_N_INSNS (5), /* dmul */ 836 COSTS_N_INSNS (21), /* sdiv */ 837 COSTS_N_INSNS (35), /* ddiv */ 838 32, /* cache line size */ 839 32, /* l1 cache */ 840 1024, /* l2 cache */ 841 1, /* streams */ 842 0, /* SF->DF convert */ 843}; 844 845/* Instruction costs on PPC8540 processors. */ 846static const 847struct processor_costs ppc8540_cost = { 848 COSTS_N_INSNS (4), /* mulsi */ 849 COSTS_N_INSNS (4), /* mulsi_const */ 850 COSTS_N_INSNS (4), /* mulsi_const9 */ 851 COSTS_N_INSNS (4), /* muldi */ 852 COSTS_N_INSNS (19), /* divsi */ 853 COSTS_N_INSNS (19), /* divdi */ 854 COSTS_N_INSNS (4), /* fp */ 855 COSTS_N_INSNS (4), /* dmul */ 856 COSTS_N_INSNS (29), /* sdiv */ 857 COSTS_N_INSNS (29), /* ddiv */ 858 32, /* cache line size */ 859 32, /* l1 cache */ 860 256, /* l2 cache */ 861 1, /* prefetch streams /*/ 862 0, /* SF->DF convert */ 863}; 864 865/* Instruction costs on E300C2 and E300C3 cores. */ 866static const 867struct processor_costs ppce300c2c3_cost = { 868 COSTS_N_INSNS (4), /* mulsi */ 869 COSTS_N_INSNS (4), /* mulsi_const */ 870 COSTS_N_INSNS (4), /* mulsi_const9 */ 871 COSTS_N_INSNS (4), /* muldi */ 872 COSTS_N_INSNS (19), /* divsi */ 873 COSTS_N_INSNS (19), /* divdi */ 874 COSTS_N_INSNS (3), /* fp */ 875 COSTS_N_INSNS (4), /* dmul */ 876 COSTS_N_INSNS (18), /* sdiv */ 877 COSTS_N_INSNS (33), /* ddiv */ 878 32, 879 16, /* l1 cache */ 880 16, /* l2 cache */ 881 1, /* prefetch streams /*/ 882 0, /* SF->DF convert */ 883}; 884 885/* Instruction costs on PPCE500MC processors. */ 886static const 887struct processor_costs ppce500mc_cost = { 888 COSTS_N_INSNS (4), /* mulsi */ 889 COSTS_N_INSNS (4), /* mulsi_const */ 890 COSTS_N_INSNS (4), /* mulsi_const9 */ 891 COSTS_N_INSNS (4), /* muldi */ 892 COSTS_N_INSNS (14), /* divsi */ 893 COSTS_N_INSNS (14), /* divdi */ 894 COSTS_N_INSNS (8), /* fp */ 895 COSTS_N_INSNS (10), /* dmul */ 896 COSTS_N_INSNS (36), /* sdiv */ 897 COSTS_N_INSNS (66), /* ddiv */ 898 64, /* cache line size */ 899 32, /* l1 cache */ 900 128, /* l2 cache */ 901 1, /* prefetch streams /*/ 902 0, /* SF->DF convert */ 903}; 904 905/* Instruction costs on PPCE500MC64 processors. */ 906static const 907struct processor_costs ppce500mc64_cost = { 908 COSTS_N_INSNS (4), /* mulsi */ 909 COSTS_N_INSNS (4), /* mulsi_const */ 910 COSTS_N_INSNS (4), /* mulsi_const9 */ 911 COSTS_N_INSNS (4), /* muldi */ 912 COSTS_N_INSNS (14), /* divsi */ 913 COSTS_N_INSNS (14), /* divdi */ 914 COSTS_N_INSNS (4), /* fp */ 915 COSTS_N_INSNS (10), /* dmul */ 916 COSTS_N_INSNS (36), /* sdiv */ 917 COSTS_N_INSNS (66), /* ddiv */ 918 64, /* cache line size */ 919 32, /* l1 cache */ 920 128, /* l2 cache */ 921 1, /* prefetch streams /*/ 922 0, /* SF->DF convert */ 923}; 924 925/* Instruction costs on PPCE5500 processors. */ 926static const 927struct processor_costs ppce5500_cost = { 928 COSTS_N_INSNS (5), /* mulsi */ 929 COSTS_N_INSNS (5), /* mulsi_const */ 930 COSTS_N_INSNS (4), /* mulsi_const9 */ 931 COSTS_N_INSNS (5), /* muldi */ 932 COSTS_N_INSNS (14), /* divsi */ 933 COSTS_N_INSNS (14), /* divdi */ 934 COSTS_N_INSNS (7), /* fp */ 935 COSTS_N_INSNS (10), /* dmul */ 936 COSTS_N_INSNS (36), /* sdiv */ 937 COSTS_N_INSNS (66), /* ddiv */ 938 64, /* cache line size */ 939 32, /* l1 cache */ 940 128, /* l2 cache */ 941 1, /* prefetch streams /*/ 942 0, /* SF->DF convert */ 943}; 944 945/* Instruction costs on PPCE6500 processors. */ 946static const 947struct processor_costs ppce6500_cost = { 948 COSTS_N_INSNS (5), /* mulsi */ 949 COSTS_N_INSNS (5), /* mulsi_const */ 950 COSTS_N_INSNS (4), /* mulsi_const9 */ 951 COSTS_N_INSNS (5), /* muldi */ 952 COSTS_N_INSNS (14), /* divsi */ 953 COSTS_N_INSNS (14), /* divdi */ 954 COSTS_N_INSNS (7), /* fp */ 955 COSTS_N_INSNS (10), /* dmul */ 956 COSTS_N_INSNS (36), /* sdiv */ 957 COSTS_N_INSNS (66), /* ddiv */ 958 64, /* cache line size */ 959 32, /* l1 cache */ 960 128, /* l2 cache */ 961 1, /* prefetch streams /*/ 962 0, /* SF->DF convert */ 963}; 964 965/* Instruction costs on AppliedMicro Titan processors. */ 966static const 967struct processor_costs titan_cost = { 968 COSTS_N_INSNS (5), /* mulsi */ 969 COSTS_N_INSNS (5), /* mulsi_const */ 970 COSTS_N_INSNS (5), /* mulsi_const9 */ 971 COSTS_N_INSNS (5), /* muldi */ 972 COSTS_N_INSNS (18), /* divsi */ 973 COSTS_N_INSNS (18), /* divdi */ 974 COSTS_N_INSNS (10), /* fp */ 975 COSTS_N_INSNS (10), /* dmul */ 976 COSTS_N_INSNS (46), /* sdiv */ 977 COSTS_N_INSNS (72), /* ddiv */ 978 32, /* cache line size */ 979 32, /* l1 cache */ 980 512, /* l2 cache */ 981 1, /* prefetch streams /*/ 982 0, /* SF->DF convert */ 983}; 984 985/* Instruction costs on POWER4 and POWER5 processors. */ 986static const 987struct processor_costs power4_cost = { 988 COSTS_N_INSNS (3), /* mulsi */ 989 COSTS_N_INSNS (2), /* mulsi_const */ 990 COSTS_N_INSNS (2), /* mulsi_const9 */ 991 COSTS_N_INSNS (4), /* muldi */ 992 COSTS_N_INSNS (18), /* divsi */ 993 COSTS_N_INSNS (34), /* divdi */ 994 COSTS_N_INSNS (3), /* fp */ 995 COSTS_N_INSNS (3), /* dmul */ 996 COSTS_N_INSNS (17), /* sdiv */ 997 COSTS_N_INSNS (17), /* ddiv */ 998 128, /* cache line size */ 999 32, /* l1 cache */ 1000 1024, /* l2 cache */ 1001 8, /* prefetch streams /*/ 1002 0, /* SF->DF convert */ 1003}; 1004 1005/* Instruction costs on POWER6 processors. */ 1006static const 1007struct processor_costs power6_cost = { 1008 COSTS_N_INSNS (8), /* mulsi */ 1009 COSTS_N_INSNS (8), /* mulsi_const */ 1010 COSTS_N_INSNS (8), /* mulsi_const9 */ 1011 COSTS_N_INSNS (8), /* muldi */ 1012 COSTS_N_INSNS (22), /* divsi */ 1013 COSTS_N_INSNS (28), /* divdi */ 1014 COSTS_N_INSNS (3), /* fp */ 1015 COSTS_N_INSNS (3), /* dmul */ 1016 COSTS_N_INSNS (13), /* sdiv */ 1017 COSTS_N_INSNS (16), /* ddiv */ 1018 128, /* cache line size */ 1019 64, /* l1 cache */ 1020 2048, /* l2 cache */ 1021 16, /* prefetch streams */ 1022 0, /* SF->DF convert */ 1023}; 1024 1025/* Instruction costs on POWER7 processors. */ 1026static const 1027struct processor_costs power7_cost = { 1028 COSTS_N_INSNS (2), /* mulsi */ 1029 COSTS_N_INSNS (2), /* mulsi_const */ 1030 COSTS_N_INSNS (2), /* mulsi_const9 */ 1031 COSTS_N_INSNS (2), /* muldi */ 1032 COSTS_N_INSNS (18), /* divsi */ 1033 COSTS_N_INSNS (34), /* divdi */ 1034 COSTS_N_INSNS (3), /* fp */ 1035 COSTS_N_INSNS (3), /* dmul */ 1036 COSTS_N_INSNS (13), /* sdiv */ 1037 COSTS_N_INSNS (16), /* ddiv */ 1038 128, /* cache line size */ 1039 32, /* l1 cache */ 1040 256, /* l2 cache */ 1041 12, /* prefetch streams */ 1042 COSTS_N_INSNS (3), /* SF->DF convert */ 1043}; 1044 1045/* Instruction costs on POWER8 processors. */ 1046static const 1047struct processor_costs power8_cost = { 1048 COSTS_N_INSNS (3), /* mulsi */ 1049 COSTS_N_INSNS (3), /* mulsi_const */ 1050 COSTS_N_INSNS (3), /* mulsi_const9 */ 1051 COSTS_N_INSNS (3), /* muldi */ 1052 COSTS_N_INSNS (19), /* divsi */ 1053 COSTS_N_INSNS (35), /* divdi */ 1054 COSTS_N_INSNS (3), /* fp */ 1055 COSTS_N_INSNS (3), /* dmul */ 1056 COSTS_N_INSNS (14), /* sdiv */ 1057 COSTS_N_INSNS (17), /* ddiv */ 1058 128, /* cache line size */ 1059 32, /* l1 cache */ 1060 512, /* l2 cache */ 1061 12, /* prefetch streams */ 1062 COSTS_N_INSNS (3), /* SF->DF convert */ 1063}; 1064 1065/* Instruction costs on POWER9 processors. */ 1066static const 1067struct processor_costs power9_cost = { 1068 COSTS_N_INSNS (3), /* mulsi */ 1069 COSTS_N_INSNS (3), /* mulsi_const */ 1070 COSTS_N_INSNS (3), /* mulsi_const9 */ 1071 COSTS_N_INSNS (3), /* muldi */ 1072 COSTS_N_INSNS (8), /* divsi */ 1073 COSTS_N_INSNS (12), /* divdi */ 1074 COSTS_N_INSNS (3), /* fp */ 1075 COSTS_N_INSNS (3), /* dmul */ 1076 COSTS_N_INSNS (13), /* sdiv */ 1077 COSTS_N_INSNS (18), /* ddiv */ 1078 128, /* cache line size */ 1079 32, /* l1 cache */ 1080 512, /* l2 cache */ 1081 8, /* prefetch streams */ 1082 COSTS_N_INSNS (3), /* SF->DF convert */ 1083}; 1084 1085/* Instruction costs on POWER A2 processors. */ 1086static const 1087struct processor_costs ppca2_cost = { 1088 COSTS_N_INSNS (16), /* mulsi */ 1089 COSTS_N_INSNS (16), /* mulsi_const */ 1090 COSTS_N_INSNS (16), /* mulsi_const9 */ 1091 COSTS_N_INSNS (16), /* muldi */ 1092 COSTS_N_INSNS (22), /* divsi */ 1093 COSTS_N_INSNS (28), /* divdi */ 1094 COSTS_N_INSNS (3), /* fp */ 1095 COSTS_N_INSNS (3), /* dmul */ 1096 COSTS_N_INSNS (59), /* sdiv */ 1097 COSTS_N_INSNS (72), /* ddiv */ 1098 64, 1099 16, /* l1 cache */ 1100 2048, /* l2 cache */ 1101 16, /* prefetch streams */ 1102 0, /* SF->DF convert */ 1103}; 1104 1105/* Support for -mveclibabi=<xxx> to control which vector library to use. */ 1106static tree (*rs6000_veclib_handler) (combined_fn, tree, tree); 1107 1108 1109static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool); 1110static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); 1111static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); 1112static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); 1113static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); 1114static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); 1115static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); 1116static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); 1117static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, 1118 bool); 1119static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int, 1120 unsigned int); 1121static bool is_microcoded_insn (rtx_insn *); 1122static bool is_nonpipeline_insn (rtx_insn *); 1123static bool is_cracked_insn (rtx_insn *); 1124static bool is_load_insn (rtx, rtx *); 1125static bool is_store_insn (rtx, rtx *); 1126static bool set_to_load_agen (rtx_insn *,rtx_insn *); 1127static bool insn_terminates_group_p (rtx_insn *, enum group_termination); 1128static bool insn_must_be_first_in_group (rtx_insn *); 1129static bool insn_must_be_last_in_group (rtx_insn *); 1130int easy_vector_constant (rtx, machine_mode); 1131static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode); 1132static rtx rs6000_legitimize_tls_address (rtx, enum tls_model); 1133#if TARGET_MACHO 1134static tree get_prev_label (tree); 1135#endif 1136static bool rs6000_mode_dependent_address (const_rtx); 1137static bool rs6000_debug_mode_dependent_address (const_rtx); 1138static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool); 1139static enum reg_class rs6000_secondary_reload_class (enum reg_class, 1140 machine_mode, rtx); 1141static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class, 1142 machine_mode, 1143 rtx); 1144static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class); 1145static enum reg_class rs6000_debug_preferred_reload_class (rtx, 1146 enum reg_class); 1147static bool rs6000_debug_secondary_memory_needed (machine_mode, 1148 reg_class_t, 1149 reg_class_t); 1150static bool rs6000_debug_can_change_mode_class (machine_mode, 1151 machine_mode, 1152 reg_class_t); 1153 1154static bool (*rs6000_mode_dependent_address_ptr) (const_rtx) 1155 = rs6000_mode_dependent_address; 1156 1157enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, 1158 machine_mode, rtx) 1159 = rs6000_secondary_reload_class; 1160 1161enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class) 1162 = rs6000_preferred_reload_class; 1163 1164const int INSN_NOT_AVAILABLE = -1; 1165 1166static void rs6000_print_isa_options (FILE *, int, const char *, 1167 HOST_WIDE_INT); 1168static void rs6000_print_builtin_options (FILE *, int, const char *, 1169 HOST_WIDE_INT); 1170static HOST_WIDE_INT rs6000_disable_incompatible_switches (void); 1171 1172static enum rs6000_reg_type register_to_reg_type (rtx, bool *); 1173static bool rs6000_secondary_reload_move (enum rs6000_reg_type, 1174 enum rs6000_reg_type, 1175 machine_mode, 1176 secondary_reload_info *, 1177 bool); 1178static enum non_prefixed_form reg_to_non_prefixed (rtx reg, machine_mode mode); 1179rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); 1180 1181/* Hash table stuff for keeping track of TOC entries. */ 1182 1183struct GTY((for_user)) toc_hash_struct 1184{ 1185 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy 1186 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */ 1187 rtx key; 1188 machine_mode key_mode; 1189 int labelno; 1190}; 1191 1192struct toc_hasher : ggc_ptr_hash<toc_hash_struct> 1193{ 1194 static hashval_t hash (toc_hash_struct *); 1195 static bool equal (toc_hash_struct *, toc_hash_struct *); 1196}; 1197 1198static GTY (()) hash_table<toc_hasher> *toc_hash_table; 1199 1200 1201 1202/* Default register names. */ 1203char rs6000_reg_names[][8] = 1204{ 1205 /* GPRs */ 1206 "0", "1", "2", "3", "4", "5", "6", "7", 1207 "8", "9", "10", "11", "12", "13", "14", "15", 1208 "16", "17", "18", "19", "20", "21", "22", "23", 1209 "24", "25", "26", "27", "28", "29", "30", "31", 1210 /* FPRs */ 1211 "0", "1", "2", "3", "4", "5", "6", "7", 1212 "8", "9", "10", "11", "12", "13", "14", "15", 1213 "16", "17", "18", "19", "20", "21", "22", "23", 1214 "24", "25", "26", "27", "28", "29", "30", "31", 1215 /* VRs */ 1216 "0", "1", "2", "3", "4", "5", "6", "7", 1217 "8", "9", "10", "11", "12", "13", "14", "15", 1218 "16", "17", "18", "19", "20", "21", "22", "23", 1219 "24", "25", "26", "27", "28", "29", "30", "31", 1220 /* lr ctr ca ap */ 1221 "lr", "ctr", "ca", "ap", 1222 /* cr0..cr7 */ 1223 "0", "1", "2", "3", "4", "5", "6", "7", 1224 /* vrsave vscr sfp */ 1225 "vrsave", "vscr", "sfp", 1226}; 1227 1228#ifdef TARGET_REGNAMES 1229static const char alt_reg_names[][8] = 1230{ 1231 /* GPRs */ 1232 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", 1233 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", 1234 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", 1235 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", 1236 /* FPRs */ 1237 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", 1238 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", 1239 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", 1240 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", 1241 /* VRs */ 1242 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7", 1243 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15", 1244 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", 1245 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31", 1246 /* lr ctr ca ap */ 1247 "lr", "ctr", "ca", "ap", 1248 /* cr0..cr7 */ 1249 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", 1250 /* vrsave vscr sfp */ 1251 "vrsave", "vscr", "sfp", 1252}; 1253#endif 1254 1255/* Table of valid machine attributes. */ 1256 1257static const struct attribute_spec rs6000_attribute_table[] = 1258{ 1259 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 1260 affects_type_identity, handler, exclude } */ 1261 { "altivec", 1, 1, false, true, false, false, 1262 rs6000_handle_altivec_attribute, NULL }, 1263 { "longcall", 0, 0, false, true, true, false, 1264 rs6000_handle_longcall_attribute, NULL }, 1265 { "shortcall", 0, 0, false, true, true, false, 1266 rs6000_handle_longcall_attribute, NULL }, 1267 { "ms_struct", 0, 0, false, false, false, false, 1268 rs6000_handle_struct_attribute, NULL }, 1269 { "gcc_struct", 0, 0, false, false, false, false, 1270 rs6000_handle_struct_attribute, NULL }, 1271#ifdef SUBTARGET_ATTRIBUTE_TABLE 1272 SUBTARGET_ATTRIBUTE_TABLE, 1273#endif 1274 { NULL, 0, 0, false, false, false, false, NULL, NULL } 1275}; 1276 1277#ifndef TARGET_PROFILE_KERNEL 1278#define TARGET_PROFILE_KERNEL 0 1279#endif 1280 1281/* Initialize the GCC target structure. */ 1282#undef TARGET_ATTRIBUTE_TABLE 1283#define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table 1284#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES 1285#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes 1286#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P 1287#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p 1288 1289#undef TARGET_ASM_ALIGNED_DI_OP 1290#define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP 1291 1292/* Default unaligned ops are only provided for ELF. Find the ops needed 1293 for non-ELF systems. */ 1294#ifndef OBJECT_FORMAT_ELF 1295#if TARGET_XCOFF 1296/* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on 1297 64-bit targets. */ 1298#undef TARGET_ASM_UNALIGNED_HI_OP 1299#define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2," 1300#undef TARGET_ASM_UNALIGNED_SI_OP 1301#define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4," 1302#undef TARGET_ASM_UNALIGNED_DI_OP 1303#define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8," 1304#else 1305/* For Darwin. */ 1306#undef TARGET_ASM_UNALIGNED_HI_OP 1307#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" 1308#undef TARGET_ASM_UNALIGNED_SI_OP 1309#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" 1310#undef TARGET_ASM_UNALIGNED_DI_OP 1311#define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t" 1312#undef TARGET_ASM_ALIGNED_DI_OP 1313#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 1314#endif 1315#endif 1316 1317/* This hook deals with fixups for relocatable code and DI-mode objects 1318 in 64-bit code. */ 1319#undef TARGET_ASM_INTEGER 1320#define TARGET_ASM_INTEGER rs6000_assemble_integer 1321 1322#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO 1323#undef TARGET_ASM_ASSEMBLE_VISIBILITY 1324#define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility 1325#endif 1326 1327#undef TARGET_SET_UP_BY_PROLOGUE 1328#define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue 1329 1330#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS 1331#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components 1332#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB 1333#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb 1334#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS 1335#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components 1336#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS 1337#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components 1338#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS 1339#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components 1340#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS 1341#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components 1342 1343#undef TARGET_EXTRA_LIVE_ON_ENTRY 1344#define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry 1345 1346#undef TARGET_INTERNAL_ARG_POINTER 1347#define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer 1348 1349#undef TARGET_HAVE_TLS 1350#define TARGET_HAVE_TLS HAVE_AS_TLS 1351 1352#undef TARGET_CANNOT_FORCE_CONST_MEM 1353#define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem 1354 1355#undef TARGET_DELEGITIMIZE_ADDRESS 1356#define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address 1357 1358#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P 1359#define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p 1360 1361#undef TARGET_LEGITIMATE_COMBINED_INSN 1362#define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn 1363 1364#undef TARGET_ASM_FUNCTION_PROLOGUE 1365#define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue 1366#undef TARGET_ASM_FUNCTION_EPILOGUE 1367#define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue 1368 1369#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 1370#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra 1371 1372#undef TARGET_LEGITIMIZE_ADDRESS 1373#define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address 1374 1375#undef TARGET_SCHED_VARIABLE_ISSUE 1376#define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue 1377 1378#undef TARGET_SCHED_ISSUE_RATE 1379#define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate 1380#undef TARGET_SCHED_ADJUST_COST 1381#define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost 1382#undef TARGET_SCHED_ADJUST_PRIORITY 1383#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority 1384#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE 1385#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence 1386#undef TARGET_SCHED_INIT 1387#define TARGET_SCHED_INIT rs6000_sched_init 1388#undef TARGET_SCHED_FINISH 1389#define TARGET_SCHED_FINISH rs6000_sched_finish 1390#undef TARGET_SCHED_REORDER 1391#define TARGET_SCHED_REORDER rs6000_sched_reorder 1392#undef TARGET_SCHED_REORDER2 1393#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 1394 1395#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 1396#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead 1397 1398#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 1399#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard 1400 1401#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT 1402#define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context 1403#undef TARGET_SCHED_INIT_SCHED_CONTEXT 1404#define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context 1405#undef TARGET_SCHED_SET_SCHED_CONTEXT 1406#define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context 1407#undef TARGET_SCHED_FREE_SCHED_CONTEXT 1408#define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context 1409 1410#undef TARGET_SCHED_CAN_SPECULATE_INSN 1411#define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn 1412 1413#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 1414#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load 1415#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT 1416#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ 1417 rs6000_builtin_support_vector_misalignment 1418#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE 1419#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable 1420#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 1421#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 1422 rs6000_builtin_vectorization_cost 1423#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 1424#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ 1425 rs6000_preferred_simd_mode 1426#undef TARGET_VECTORIZE_INIT_COST 1427#define TARGET_VECTORIZE_INIT_COST rs6000_init_cost 1428#undef TARGET_VECTORIZE_ADD_STMT_COST 1429#define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost 1430#undef TARGET_VECTORIZE_FINISH_COST 1431#define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost 1432#undef TARGET_VECTORIZE_DESTROY_COST_DATA 1433#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data 1434 1435#undef TARGET_LOOP_UNROLL_ADJUST 1436#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust 1437 1438#undef TARGET_INIT_BUILTINS 1439#define TARGET_INIT_BUILTINS rs6000_init_builtins 1440#undef TARGET_BUILTIN_DECL 1441#define TARGET_BUILTIN_DECL rs6000_builtin_decl 1442 1443#undef TARGET_FOLD_BUILTIN 1444#define TARGET_FOLD_BUILTIN rs6000_fold_builtin 1445#undef TARGET_GIMPLE_FOLD_BUILTIN 1446#define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin 1447 1448#undef TARGET_EXPAND_BUILTIN 1449#define TARGET_EXPAND_BUILTIN rs6000_expand_builtin 1450 1451#undef TARGET_MANGLE_TYPE 1452#define TARGET_MANGLE_TYPE rs6000_mangle_type 1453 1454#undef TARGET_INIT_LIBFUNCS 1455#define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs 1456 1457#if TARGET_MACHO 1458#undef TARGET_BINDS_LOCAL_P 1459#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 1460#endif 1461 1462#undef TARGET_MS_BITFIELD_LAYOUT_P 1463#define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p 1464 1465#undef TARGET_ASM_OUTPUT_MI_THUNK 1466#define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk 1467 1468#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 1469#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true 1470 1471#undef TARGET_FUNCTION_OK_FOR_SIBCALL 1472#define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall 1473 1474#undef TARGET_REGISTER_MOVE_COST 1475#define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost 1476#undef TARGET_MEMORY_MOVE_COST 1477#define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost 1478#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 1479#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \ 1480 rs6000_ira_change_pseudo_allocno_class 1481#undef TARGET_CANNOT_COPY_INSN_P 1482#define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p 1483#undef TARGET_RTX_COSTS 1484#define TARGET_RTX_COSTS rs6000_rtx_costs 1485#undef TARGET_ADDRESS_COST 1486#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 1487#undef TARGET_INSN_COST 1488#define TARGET_INSN_COST rs6000_insn_cost 1489 1490#undef TARGET_INIT_DWARF_REG_SIZES_EXTRA 1491#define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra 1492 1493#undef TARGET_PROMOTE_FUNCTION_MODE 1494#define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode 1495 1496#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 1497#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change 1498 1499#undef TARGET_RETURN_IN_MEMORY 1500#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory 1501 1502#undef TARGET_RETURN_IN_MSB 1503#define TARGET_RETURN_IN_MSB rs6000_return_in_msb 1504 1505#undef TARGET_SETUP_INCOMING_VARARGS 1506#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs 1507 1508/* Always strict argument naming on rs6000. */ 1509#undef TARGET_STRICT_ARGUMENT_NAMING 1510#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 1511#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED 1512#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true 1513#undef TARGET_SPLIT_COMPLEX_ARG 1514#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true 1515#undef TARGET_MUST_PASS_IN_STACK 1516#define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack 1517#undef TARGET_PASS_BY_REFERENCE 1518#define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference 1519#undef TARGET_ARG_PARTIAL_BYTES 1520#define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes 1521#undef TARGET_FUNCTION_ARG_ADVANCE 1522#define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance 1523#undef TARGET_FUNCTION_ARG 1524#define TARGET_FUNCTION_ARG rs6000_function_arg 1525#undef TARGET_FUNCTION_ARG_PADDING 1526#define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding 1527#undef TARGET_FUNCTION_ARG_BOUNDARY 1528#define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary 1529 1530#undef TARGET_BUILD_BUILTIN_VA_LIST 1531#define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list 1532 1533#undef TARGET_EXPAND_BUILTIN_VA_START 1534#define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start 1535 1536#undef TARGET_GIMPLIFY_VA_ARG_EXPR 1537#define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg 1538 1539#undef TARGET_EH_RETURN_FILTER_MODE 1540#define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode 1541 1542#undef TARGET_TRANSLATE_MODE_ATTRIBUTE 1543#define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute 1544 1545#undef TARGET_SCALAR_MODE_SUPPORTED_P 1546#define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p 1547 1548#undef TARGET_VECTOR_MODE_SUPPORTED_P 1549#define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p 1550 1551#undef TARGET_FLOATN_MODE 1552#define TARGET_FLOATN_MODE rs6000_floatn_mode 1553 1554#undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN 1555#define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn 1556 1557#undef TARGET_MD_ASM_ADJUST 1558#define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust 1559 1560#undef TARGET_OPTION_OVERRIDE 1561#define TARGET_OPTION_OVERRIDE rs6000_option_override 1562 1563#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 1564#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 1565 rs6000_builtin_vectorized_function 1566 1567#undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION 1568#define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \ 1569 rs6000_builtin_md_vectorized_function 1570 1571#undef TARGET_STACK_PROTECT_GUARD 1572#define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard 1573 1574#if !TARGET_MACHO 1575#undef TARGET_STACK_PROTECT_FAIL 1576#define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail 1577#endif 1578 1579#ifdef HAVE_AS_TLS 1580#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 1581#define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel 1582#endif 1583 1584/* Use a 32-bit anchor range. This leads to sequences like: 1585 1586 addis tmp,anchor,high 1587 add dest,tmp,low 1588 1589 where tmp itself acts as an anchor, and can be shared between 1590 accesses to the same 64k page. */ 1591#undef TARGET_MIN_ANCHOR_OFFSET 1592#define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1 1593#undef TARGET_MAX_ANCHOR_OFFSET 1594#define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff 1595#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 1596#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p 1597#undef TARGET_USE_BLOCKS_FOR_DECL_P 1598#define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p 1599 1600#undef TARGET_BUILTIN_RECIPROCAL 1601#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal 1602 1603#undef TARGET_SECONDARY_RELOAD 1604#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload 1605#undef TARGET_SECONDARY_MEMORY_NEEDED 1606#define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed 1607#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 1608#define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode 1609 1610#undef TARGET_LEGITIMATE_ADDRESS_P 1611#define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p 1612 1613#undef TARGET_MODE_DEPENDENT_ADDRESS_P 1614#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p 1615 1616#undef TARGET_COMPUTE_PRESSURE_CLASSES 1617#define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes 1618 1619#undef TARGET_CAN_ELIMINATE 1620#define TARGET_CAN_ELIMINATE rs6000_can_eliminate 1621 1622#undef TARGET_CONDITIONAL_REGISTER_USAGE 1623#define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage 1624 1625#undef TARGET_SCHED_REASSOCIATION_WIDTH 1626#define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width 1627 1628#undef TARGET_TRAMPOLINE_INIT 1629#define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init 1630 1631#undef TARGET_FUNCTION_VALUE 1632#define TARGET_FUNCTION_VALUE rs6000_function_value 1633 1634#undef TARGET_OPTION_VALID_ATTRIBUTE_P 1635#define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p 1636 1637#undef TARGET_OPTION_SAVE 1638#define TARGET_OPTION_SAVE rs6000_function_specific_save 1639 1640#undef TARGET_OPTION_RESTORE 1641#define TARGET_OPTION_RESTORE rs6000_function_specific_restore 1642 1643#undef TARGET_OPTION_PRINT 1644#define TARGET_OPTION_PRINT rs6000_function_specific_print 1645 1646#undef TARGET_CAN_INLINE_P 1647#define TARGET_CAN_INLINE_P rs6000_can_inline_p 1648 1649#undef TARGET_SET_CURRENT_FUNCTION 1650#define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function 1651 1652#undef TARGET_LEGITIMATE_CONSTANT_P 1653#define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p 1654 1655#undef TARGET_VECTORIZE_VEC_PERM_CONST 1656#define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const 1657 1658#undef TARGET_CAN_USE_DOLOOP_P 1659#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost 1660 1661#undef TARGET_PREDICT_DOLOOP_P 1662#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p 1663 1664#undef TARGET_HAVE_COUNT_REG_DECR_P 1665#define TARGET_HAVE_COUNT_REG_DECR_P true 1666 1667/* 1000000000 is infinite cost in IVOPTs. */ 1668#undef TARGET_DOLOOP_COST_FOR_GENERIC 1669#define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000 1670 1671#undef TARGET_DOLOOP_COST_FOR_ADDRESS 1672#define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000 1673 1674#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 1675#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv 1676 1677#undef TARGET_LIBGCC_CMP_RETURN_MODE 1678#define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode 1679#undef TARGET_LIBGCC_SHIFT_COUNT_MODE 1680#define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode 1681#undef TARGET_UNWIND_WORD_MODE 1682#define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode 1683 1684#undef TARGET_OFFLOAD_OPTIONS 1685#define TARGET_OFFLOAD_OPTIONS rs6000_offload_options 1686 1687#undef TARGET_C_MODE_FOR_SUFFIX 1688#define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix 1689 1690#undef TARGET_INVALID_BINARY_OP 1691#define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op 1692 1693#undef TARGET_OPTAB_SUPPORTED_P 1694#define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p 1695 1696#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1697#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 1698 1699#undef TARGET_COMPARE_VERSION_PRIORITY 1700#define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority 1701 1702#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY 1703#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ 1704 rs6000_generate_version_dispatcher_body 1705 1706#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER 1707#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ 1708 rs6000_get_function_versions_dispatcher 1709 1710#undef TARGET_OPTION_FUNCTION_VERSIONS 1711#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions 1712 1713#undef TARGET_HARD_REGNO_NREGS 1714#define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook 1715#undef TARGET_HARD_REGNO_MODE_OK 1716#define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok 1717 1718#undef TARGET_MODES_TIEABLE_P 1719#define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p 1720 1721#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED 1722#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ 1723 rs6000_hard_regno_call_part_clobbered 1724 1725#undef TARGET_SLOW_UNALIGNED_ACCESS 1726#define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access 1727 1728#undef TARGET_CAN_CHANGE_MODE_CLASS 1729#define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class 1730 1731#undef TARGET_CONSTANT_ALIGNMENT 1732#define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment 1733 1734#undef TARGET_STARTING_FRAME_OFFSET 1735#define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset 1736 1737#if TARGET_ELF && RS6000_WEAK 1738#undef TARGET_ASM_GLOBALIZE_DECL_NAME 1739#define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name 1740#endif 1741 1742#undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P 1743#define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true 1744 1745#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME 1746#define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name 1747 1748#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P 1749#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \ 1750 rs6000_cannot_substitute_mem_equiv_p 1751 1752#undef TARGET_INVALID_CONVERSION 1753#define TARGET_INVALID_CONVERSION rs6000_invalid_conversion 1754 1755 1756/* Processor table. */ 1757struct rs6000_ptt 1758{ 1759 const char *const name; /* Canonical processor name. */ 1760 const enum processor_type processor; /* Processor type enum value. */ 1761 const HOST_WIDE_INT target_enable; /* Target flags to enable. */ 1762}; 1763 1764static struct rs6000_ptt const processor_target_table[] = 1765{ 1766#define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS }, 1767#include "rs6000-cpus.def" 1768#undef RS6000_CPU 1769}; 1770 1771/* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the 1772 name is invalid. */ 1773 1774static int 1775rs6000_cpu_name_lookup (const char *name) 1776{ 1777 size_t i; 1778 1779 if (name != NULL) 1780 { 1781 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) 1782 if (! strcmp (name, processor_target_table[i].name)) 1783 return (int)i; 1784 } 1785 1786 return -1; 1787} 1788 1789 1790/* Return number of consecutive hard regs needed starting at reg REGNO 1791 to hold something of mode MODE. 1792 This is ordinarily the length in words of a value of mode MODE 1793 but can be less for certain modes in special long registers. 1794 1795 POWER and PowerPC GPRs hold 32 bits worth; 1796 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ 1797 1798static int 1799rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) 1800{ 1801 unsigned HOST_WIDE_INT reg_size; 1802 1803 /* 128-bit floating point usually takes 2 registers, unless it is IEEE 1804 128-bit floating point that can go in vector registers, which has VSX 1805 memory addressing. */ 1806 if (FP_REGNO_P (regno)) 1807 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode) 1808 ? UNITS_PER_VSX_WORD 1809 : UNITS_PER_FP_WORD); 1810 1811 else if (ALTIVEC_REGNO_P (regno)) 1812 reg_size = UNITS_PER_ALTIVEC_WORD; 1813 1814 else 1815 reg_size = UNITS_PER_WORD; 1816 1817 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; 1818} 1819 1820/* Value is 1 if hard register REGNO can hold a value of machine-mode 1821 MODE. */ 1822static int 1823rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) 1824{ 1825 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; 1826 1827 if (COMPLEX_MODE_P (mode)) 1828 mode = GET_MODE_INNER (mode); 1829 1830 /* Vector pair modes need even/odd VSX register pairs. Only allow vector 1831 registers. We need to allow OImode to have the same registers as POImode, 1832 even though we do not enable the move pattern for OImode. */ 1833 if (mode == POImode || mode == OImode) 1834 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0); 1835 1836 /* MMA accumulator modes need FPR registers divisible by 4. We need to allow 1837 XImode to have the same registers as PXImode, even though we do not enable 1838 the move pattern for XImode. */ 1839 if (mode == PXImode || mode == XImode) 1840 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0); 1841 1842 /* PTImode can only go in GPRs. Quad word memory operations require even/odd 1843 register combinations, and use PTImode where we need to deal with quad 1844 word memory operations. Don't allow quad words in the argument or frame 1845 pointer registers, just registers 0..31. */ 1846 if (mode == PTImode) 1847 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) 1848 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) 1849 && ((regno & 1) == 0)); 1850 1851 /* VSX registers that overlap the FPR registers are larger than for non-VSX 1852 implementations. Don't allow an item to be split between a FP register 1853 and an Altivec register. Allow TImode in all VSX registers if the user 1854 asked for it. */ 1855 if (TARGET_VSX && VSX_REGNO_P (regno) 1856 && (VECTOR_MEM_VSX_P (mode) 1857 || VECTOR_ALIGNMENT_P (mode) 1858 || reg_addr[mode].scalar_in_vmx_p 1859 || mode == TImode 1860 || (TARGET_VADDUQM && mode == V1TImode))) 1861 { 1862 if (FP_REGNO_P (regno)) 1863 return FP_REGNO_P (last_regno); 1864 1865 if (ALTIVEC_REGNO_P (regno)) 1866 { 1867 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) 1868 return 0; 1869 1870 return ALTIVEC_REGNO_P (last_regno); 1871 } 1872 } 1873 1874 /* The GPRs can hold any mode, but values bigger than one register 1875 cannot go past R31. */ 1876 if (INT_REGNO_P (regno)) 1877 return INT_REGNO_P (last_regno); 1878 1879 /* The float registers (except for VSX vector modes) can only hold floating 1880 modes and DImode. */ 1881 if (FP_REGNO_P (regno)) 1882 { 1883 if (VECTOR_ALIGNMENT_P (mode)) 1884 return false; 1885 1886 if (SCALAR_FLOAT_MODE_P (mode) 1887 && (mode != TDmode || (regno % 2) == 0) 1888 && FP_REGNO_P (last_regno)) 1889 return 1; 1890 1891 if (GET_MODE_CLASS (mode) == MODE_INT) 1892 { 1893 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) 1894 return 1; 1895 1896 if (TARGET_P8_VECTOR && (mode == SImode)) 1897 return 1; 1898 1899 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) 1900 return 1; 1901 } 1902 1903 return 0; 1904 } 1905 1906 /* The CR register can only hold CC modes. */ 1907 if (CR_REGNO_P (regno)) 1908 return GET_MODE_CLASS (mode) == MODE_CC; 1909 1910 if (CA_REGNO_P (regno)) 1911 return mode == Pmode || mode == SImode; 1912 1913 /* AltiVec only in AldyVec registers. */ 1914 if (ALTIVEC_REGNO_P (regno)) 1915 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) 1916 || mode == V1TImode); 1917 1918 /* We cannot put non-VSX TImode or PTImode anywhere except general register 1919 and it must be able to fit within the register set. */ 1920 1921 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; 1922} 1923 1924/* Implement TARGET_HARD_REGNO_NREGS. */ 1925 1926static unsigned int 1927rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode) 1928{ 1929 return rs6000_hard_regno_nregs[mode][regno]; 1930} 1931 1932/* Implement TARGET_HARD_REGNO_MODE_OK. */ 1933 1934static bool 1935rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 1936{ 1937 return rs6000_hard_regno_mode_ok_p[mode][regno]; 1938} 1939 1940/* Implement TARGET_MODES_TIEABLE_P. 1941 1942 PTImode cannot tie with other modes because PTImode is restricted to even 1943 GPR registers, and TImode can go in any GPR as well as VSX registers (PR 1944 57744). 1945 1946 Similarly, don't allow POImode (vector pair, restricted to even VSX 1947 registers) or PXImode (vector quad, restricted to FPR registers divisible 1948 by 4) to tie with other modes. 1949 1950 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE 1951 128-bit floating point on VSX systems ties with other vectors. */ 1952 1953static bool 1954rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) 1955{ 1956 if (mode1 == PTImode || mode1 == POImode || mode1 == PXImode 1957 || mode2 == PTImode || mode2 == POImode || mode2 == PXImode) 1958 return mode1 == mode2; 1959 1960 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) 1961 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2); 1962 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2)) 1963 return false; 1964 1965 if (SCALAR_FLOAT_MODE_P (mode1)) 1966 return SCALAR_FLOAT_MODE_P (mode2); 1967 if (SCALAR_FLOAT_MODE_P (mode2)) 1968 return false; 1969 1970 if (GET_MODE_CLASS (mode1) == MODE_CC) 1971 return GET_MODE_CLASS (mode2) == MODE_CC; 1972 if (GET_MODE_CLASS (mode2) == MODE_CC) 1973 return false; 1974 1975 return true; 1976} 1977 1978/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */ 1979 1980static bool 1981rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, 1982 machine_mode mode) 1983{ 1984 if (TARGET_32BIT 1985 && TARGET_POWERPC64 1986 && GET_MODE_SIZE (mode) > 4 1987 && INT_REGNO_P (regno)) 1988 return true; 1989 1990 if (TARGET_VSX 1991 && FP_REGNO_P (regno) 1992 && GET_MODE_SIZE (mode) > 8 1993 && !FLOAT128_2REG_P (mode)) 1994 return true; 1995 1996 return false; 1997} 1998 1999/* Print interesting facts about registers. */ 2000static void 2001rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) 2002{ 2003 int r, m; 2004 2005 for (r = first_regno; r <= last_regno; ++r) 2006 { 2007 const char *comma = ""; 2008 int len; 2009 2010 if (first_regno == last_regno) 2011 fprintf (stderr, "%s:\t", reg_name); 2012 else 2013 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno); 2014 2015 len = 8; 2016 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2017 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r]) 2018 { 2019 if (len > 70) 2020 { 2021 fprintf (stderr, ",\n\t"); 2022 len = 8; 2023 comma = ""; 2024 } 2025 2026 if (rs6000_hard_regno_nregs[m][r] > 1) 2027 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m), 2028 rs6000_hard_regno_nregs[m][r]); 2029 else 2030 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m)); 2031 2032 comma = ", "; 2033 } 2034 2035 if (call_used_or_fixed_reg_p (r)) 2036 { 2037 if (len > 70) 2038 { 2039 fprintf (stderr, ",\n\t"); 2040 len = 8; 2041 comma = ""; 2042 } 2043 2044 len += fprintf (stderr, "%s%s", comma, "call-used"); 2045 comma = ", "; 2046 } 2047 2048 if (fixed_regs[r]) 2049 { 2050 if (len > 70) 2051 { 2052 fprintf (stderr, ",\n\t"); 2053 len = 8; 2054 comma = ""; 2055 } 2056 2057 len += fprintf (stderr, "%s%s", comma, "fixed"); 2058 comma = ", "; 2059 } 2060 2061 if (len > 70) 2062 { 2063 fprintf (stderr, ",\n\t"); 2064 comma = ""; 2065 } 2066 2067 len += fprintf (stderr, "%sreg-class = %s", comma, 2068 reg_class_names[(int)rs6000_regno_regclass[r]]); 2069 comma = ", "; 2070 2071 if (len > 70) 2072 { 2073 fprintf (stderr, ",\n\t"); 2074 comma = ""; 2075 } 2076 2077 fprintf (stderr, "%sregno = %d\n", comma, r); 2078 } 2079} 2080 2081static const char * 2082rs6000_debug_vector_unit (enum rs6000_vector v) 2083{ 2084 const char *ret; 2085 2086 switch (v) 2087 { 2088 case VECTOR_NONE: ret = "none"; break; 2089 case VECTOR_ALTIVEC: ret = "altivec"; break; 2090 case VECTOR_VSX: ret = "vsx"; break; 2091 case VECTOR_P8_VECTOR: ret = "p8_vector"; break; 2092 default: ret = "unknown"; break; 2093 } 2094 2095 return ret; 2096} 2097 2098/* Inner function printing just the address mask for a particular reload 2099 register class. */ 2100DEBUG_FUNCTION char * 2101rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) 2102{ 2103 static char ret[8]; 2104 char *p = ret; 2105 2106 if ((mask & RELOAD_REG_VALID) != 0) 2107 *p++ = 'v'; 2108 else if (keep_spaces) 2109 *p++ = ' '; 2110 2111 if ((mask & RELOAD_REG_MULTIPLE) != 0) 2112 *p++ = 'm'; 2113 else if (keep_spaces) 2114 *p++ = ' '; 2115 2116 if ((mask & RELOAD_REG_INDEXED) != 0) 2117 *p++ = 'i'; 2118 else if (keep_spaces) 2119 *p++ = ' '; 2120 2121 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) 2122 *p++ = 'O'; 2123 else if ((mask & RELOAD_REG_OFFSET) != 0) 2124 *p++ = 'o'; 2125 else if (keep_spaces) 2126 *p++ = ' '; 2127 2128 if ((mask & RELOAD_REG_PRE_INCDEC) != 0) 2129 *p++ = '+'; 2130 else if (keep_spaces) 2131 *p++ = ' '; 2132 2133 if ((mask & RELOAD_REG_PRE_MODIFY) != 0) 2134 *p++ = '+'; 2135 else if (keep_spaces) 2136 *p++ = ' '; 2137 2138 if ((mask & RELOAD_REG_AND_M16) != 0) 2139 *p++ = '&'; 2140 else if (keep_spaces) 2141 *p++ = ' '; 2142 2143 *p = '\0'; 2144 2145 return ret; 2146} 2147 2148/* Print the address masks in a human readble fashion. */ 2149DEBUG_FUNCTION void 2150rs6000_debug_print_mode (ssize_t m) 2151{ 2152 ssize_t rc; 2153 int spaces = 0; 2154 2155 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); 2156 for (rc = 0; rc < N_RELOAD_REG; rc++) 2157 fprintf (stderr, " %s: %s", reload_reg_map[rc].name, 2158 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); 2159 2160 if ((reg_addr[m].reload_store != CODE_FOR_nothing) 2161 || (reg_addr[m].reload_load != CODE_FOR_nothing)) 2162 { 2163 fprintf (stderr, "%*s Reload=%c%c", spaces, "", 2164 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', 2165 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); 2166 spaces = 0; 2167 } 2168 else 2169 spaces += strlen (" Reload=sl"); 2170 2171 if (reg_addr[m].scalar_in_vmx_p) 2172 { 2173 fprintf (stderr, "%*s Upper=y", spaces, ""); 2174 spaces = 0; 2175 } 2176 else 2177 spaces += strlen (" Upper=y"); 2178 2179 if (rs6000_vector_unit[m] != VECTOR_NONE 2180 || rs6000_vector_mem[m] != VECTOR_NONE) 2181 { 2182 fprintf (stderr, "%*s vector: arith=%-10s mem=%s", 2183 spaces, "", 2184 rs6000_debug_vector_unit (rs6000_vector_unit[m]), 2185 rs6000_debug_vector_unit (rs6000_vector_mem[m])); 2186 } 2187 2188 fputs ("\n", stderr); 2189} 2190 2191#define DEBUG_FMT_ID "%-32s= " 2192#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" 2193#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " 2194#define DEBUG_FMT_S DEBUG_FMT_ID "%s\n" 2195 2196/* Print various interesting information with -mdebug=reg. */ 2197static void 2198rs6000_debug_reg_global (void) 2199{ 2200 static const char *const tf[2] = { "false", "true" }; 2201 const char *nl = (const char *)0; 2202 int m; 2203 size_t m1, m2, v; 2204 char costly_num[20]; 2205 char nop_num[20]; 2206 char flags_buffer[40]; 2207 const char *costly_str; 2208 const char *nop_str; 2209 const char *trace_str; 2210 const char *abi_str; 2211 const char *cmodel_str; 2212 struct cl_target_option cl_opts; 2213 2214 /* Modes we want tieable information on. */ 2215 static const machine_mode print_tieable_modes[] = { 2216 QImode, 2217 HImode, 2218 SImode, 2219 DImode, 2220 TImode, 2221 PTImode, 2222 SFmode, 2223 DFmode, 2224 TFmode, 2225 IFmode, 2226 KFmode, 2227 SDmode, 2228 DDmode, 2229 TDmode, 2230 V2SImode, 2231 V2SFmode, 2232 V16QImode, 2233 V8HImode, 2234 V4SImode, 2235 V2DImode, 2236 V1TImode, 2237 V32QImode, 2238 V16HImode, 2239 V8SImode, 2240 V4DImode, 2241 V2TImode, 2242 V4SFmode, 2243 V2DFmode, 2244 V8SFmode, 2245 V4DFmode, 2246 OImode, 2247 XImode, 2248 POImode, 2249 PXImode, 2250 CCmode, 2251 CCUNSmode, 2252 CCEQmode, 2253 CCFPmode, 2254 }; 2255 2256 /* Virtual regs we are interested in. */ 2257 const static struct { 2258 int regno; /* register number. */ 2259 const char *name; /* register name. */ 2260 } virtual_regs[] = { 2261 { STACK_POINTER_REGNUM, "stack pointer:" }, 2262 { TOC_REGNUM, "toc: " }, 2263 { STATIC_CHAIN_REGNUM, "static chain: " }, 2264 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, 2265 { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, 2266 { ARG_POINTER_REGNUM, "arg pointer: " }, 2267 { FRAME_POINTER_REGNUM, "frame pointer:" }, 2268 { FIRST_PSEUDO_REGISTER, "first pseudo: " }, 2269 { FIRST_VIRTUAL_REGISTER, "first virtual:" }, 2270 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, 2271 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, 2272 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, 2273 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, 2274 { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, 2275 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, 2276 { LAST_VIRTUAL_REGISTER, "last virtual: " }, 2277 }; 2278 2279 fputs ("\nHard register information:\n", stderr); 2280 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); 2281 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); 2282 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, 2283 LAST_ALTIVEC_REGNO, 2284 "vs"); 2285 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); 2286 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); 2287 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); 2288 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca"); 2289 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); 2290 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); 2291 2292 fputs ("\nVirtual/stack/frame registers:\n", stderr); 2293 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) 2294 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); 2295 2296 fprintf (stderr, 2297 "\n" 2298 "d reg_class = %s\n" 2299 "f reg_class = %s\n" 2300 "v reg_class = %s\n" 2301 "wa reg_class = %s\n" 2302 "we reg_class = %s\n" 2303 "wr reg_class = %s\n" 2304 "wx reg_class = %s\n" 2305 "wA reg_class = %s\n" 2306 "\n", 2307 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], 2308 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], 2309 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], 2310 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], 2311 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], 2312 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], 2313 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], 2314 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]); 2315 2316 nl = "\n"; 2317 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2318 rs6000_debug_print_mode (m); 2319 2320 fputs ("\n", stderr); 2321 2322 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) 2323 { 2324 machine_mode mode1 = print_tieable_modes[m1]; 2325 bool first_time = true; 2326 2327 nl = (const char *)0; 2328 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) 2329 { 2330 machine_mode mode2 = print_tieable_modes[m2]; 2331 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2)) 2332 { 2333 if (first_time) 2334 { 2335 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); 2336 nl = "\n"; 2337 first_time = false; 2338 } 2339 2340 fprintf (stderr, " %s", GET_MODE_NAME (mode2)); 2341 } 2342 } 2343 2344 if (!first_time) 2345 fputs ("\n", stderr); 2346 } 2347 2348 if (nl) 2349 fputs (nl, stderr); 2350 2351 if (rs6000_recip_control) 2352 { 2353 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); 2354 2355 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2356 if (rs6000_recip_bits[m]) 2357 { 2358 fprintf (stderr, 2359 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", 2360 GET_MODE_NAME (m), 2361 (RS6000_RECIP_AUTO_RE_P (m) 2362 ? "auto" 2363 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), 2364 (RS6000_RECIP_AUTO_RSQRTE_P (m) 2365 ? "auto" 2366 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); 2367 } 2368 2369 fputs ("\n", stderr); 2370 } 2371 2372 if (rs6000_cpu_index >= 0) 2373 { 2374 const char *name = processor_target_table[rs6000_cpu_index].name; 2375 HOST_WIDE_INT flags 2376 = processor_target_table[rs6000_cpu_index].target_enable; 2377 2378 sprintf (flags_buffer, "-mcpu=%s flags", name); 2379 rs6000_print_isa_options (stderr, 0, flags_buffer, flags); 2380 } 2381 else 2382 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>"); 2383 2384 if (rs6000_tune_index >= 0) 2385 { 2386 const char *name = processor_target_table[rs6000_tune_index].name; 2387 HOST_WIDE_INT flags 2388 = processor_target_table[rs6000_tune_index].target_enable; 2389 2390 sprintf (flags_buffer, "-mtune=%s flags", name); 2391 rs6000_print_isa_options (stderr, 0, flags_buffer, flags); 2392 } 2393 else 2394 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>"); 2395 2396 cl_target_option_save (&cl_opts, &global_options); 2397 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags", 2398 rs6000_isa_flags); 2399 2400 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit", 2401 rs6000_isa_flags_explicit); 2402 2403 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask", 2404 rs6000_builtin_mask); 2405 2406 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); 2407 2408 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default", 2409 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>"); 2410 2411 switch (rs6000_sched_costly_dep) 2412 { 2413 case max_dep_latency: 2414 costly_str = "max_dep_latency"; 2415 break; 2416 2417 case no_dep_costly: 2418 costly_str = "no_dep_costly"; 2419 break; 2420 2421 case all_deps_costly: 2422 costly_str = "all_deps_costly"; 2423 break; 2424 2425 case true_store_to_load_dep_costly: 2426 costly_str = "true_store_to_load_dep_costly"; 2427 break; 2428 2429 case store_to_load_dep_costly: 2430 costly_str = "store_to_load_dep_costly"; 2431 break; 2432 2433 default: 2434 costly_str = costly_num; 2435 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); 2436 break; 2437 } 2438 2439 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str); 2440 2441 switch (rs6000_sched_insert_nops) 2442 { 2443 case sched_finish_regroup_exact: 2444 nop_str = "sched_finish_regroup_exact"; 2445 break; 2446 2447 case sched_finish_pad_groups: 2448 nop_str = "sched_finish_pad_groups"; 2449 break; 2450 2451 case sched_finish_none: 2452 nop_str = "sched_finish_none"; 2453 break; 2454 2455 default: 2456 nop_str = nop_num; 2457 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); 2458 break; 2459 } 2460 2461 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str); 2462 2463 switch (rs6000_sdata) 2464 { 2465 default: 2466 case SDATA_NONE: 2467 break; 2468 2469 case SDATA_DATA: 2470 fprintf (stderr, DEBUG_FMT_S, "sdata", "data"); 2471 break; 2472 2473 case SDATA_SYSV: 2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv"); 2475 break; 2476 2477 case SDATA_EABI: 2478 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi"); 2479 break; 2480 2481 } 2482 2483 switch (rs6000_traceback) 2484 { 2485 case traceback_default: trace_str = "default"; break; 2486 case traceback_none: trace_str = "none"; break; 2487 case traceback_part: trace_str = "part"; break; 2488 case traceback_full: trace_str = "full"; break; 2489 default: trace_str = "unknown"; break; 2490 } 2491 2492 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str); 2493 2494 switch (rs6000_current_cmodel) 2495 { 2496 case CMODEL_SMALL: cmodel_str = "small"; break; 2497 case CMODEL_MEDIUM: cmodel_str = "medium"; break; 2498 case CMODEL_LARGE: cmodel_str = "large"; break; 2499 default: cmodel_str = "unknown"; break; 2500 } 2501 2502 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str); 2503 2504 switch (rs6000_current_abi) 2505 { 2506 case ABI_NONE: abi_str = "none"; break; 2507 case ABI_AIX: abi_str = "aix"; break; 2508 case ABI_ELFv2: abi_str = "ELFv2"; break; 2509 case ABI_V4: abi_str = "V4"; break; 2510 case ABI_DARWIN: abi_str = "darwin"; break; 2511 default: abi_str = "unknown"; break; 2512 } 2513 2514 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str); 2515 2516 if (rs6000_altivec_abi) 2517 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true"); 2518 2519 if (rs6000_aix_extabi) 2520 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true"); 2521 2522 if (rs6000_darwin64_abi) 2523 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true"); 2524 2525 fprintf (stderr, DEBUG_FMT_S, "soft_float", 2526 (TARGET_SOFT_FLOAT ? "true" : "false")); 2527 2528 if (TARGET_LINK_STACK) 2529 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); 2530 2531 if (TARGET_P8_FUSION) 2532 { 2533 char options[80]; 2534 2535 strcpy (options, "power8"); 2536 if (TARGET_P8_FUSION_SIGN) 2537 strcat (options, ", sign"); 2538 2539 fprintf (stderr, DEBUG_FMT_S, "fusion", options); 2540 } 2541 2542 fprintf (stderr, DEBUG_FMT_S, "plt-format", 2543 TARGET_SECURE_PLT ? "secure" : "bss"); 2544 fprintf (stderr, DEBUG_FMT_S, "struct-return", 2545 aix_struct_return ? "aix" : "sysv"); 2546 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]); 2547 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]); 2548 fprintf (stderr, DEBUG_FMT_S, "align_branch", 2549 tf[!!rs6000_align_branch_targets]); 2550 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); 2551 fprintf (stderr, DEBUG_FMT_D, "long_double_size", 2552 rs6000_long_double_type_size); 2553 if (rs6000_long_double_type_size > 64) 2554 { 2555 fprintf (stderr, DEBUG_FMT_S, "long double type", 2556 TARGET_IEEEQUAD ? "IEEE" : "IBM"); 2557 fprintf (stderr, DEBUG_FMT_S, "default long double type", 2558 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM"); 2559 } 2560 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", 2561 (int)rs6000_sched_restricted_insns_priority); 2562 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", 2563 (int)END_BUILTINS); 2564 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins", 2565 (int)RS6000_BUILTIN_COUNT); 2566 2567 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX", 2568 (int)TARGET_FLOAT128_ENABLE_TYPE); 2569 2570 if (TARGET_VSX) 2571 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", 2572 (int)VECTOR_ELEMENT_SCALAR_64BIT); 2573 2574 if (TARGET_DIRECT_MOVE_128) 2575 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", 2576 (int)VECTOR_ELEMENT_MFVSRLD_64BIT); 2577} 2578 2579 2580/* Update the addr mask bits in reg_addr to help secondary reload and go if 2581 legitimate address support to figure out the appropriate addressing to 2582 use. */ 2583 2584static void 2585rs6000_setup_reg_addr_masks (void) 2586{ 2587 ssize_t rc, reg, m, nregs; 2588 addr_mask_type any_addr_mask, addr_mask; 2589 2590 for (m = 0; m < NUM_MACHINE_MODES; ++m) 2591 { 2592 machine_mode m2 = (machine_mode) m; 2593 bool complex_p = false; 2594 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); 2595 size_t msize; 2596 2597 if (COMPLEX_MODE_P (m2)) 2598 { 2599 complex_p = true; 2600 m2 = GET_MODE_INNER (m2); 2601 } 2602 2603 msize = GET_MODE_SIZE (m2); 2604 2605 /* SDmode is special in that we want to access it only via REG+REG 2606 addressing on power7 and above, since we want to use the LFIWZX and 2607 STFIWZX instructions to load it. */ 2608 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); 2609 2610 any_addr_mask = 0; 2611 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) 2612 { 2613 addr_mask = 0; 2614 reg = reload_reg_map[rc].reg; 2615 2616 /* Can mode values go in the GPR/FPR/Altivec registers? */ 2617 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) 2618 { 2619 bool small_int_vsx_p = (small_int_p 2620 && (rc == RELOAD_REG_FPR 2621 || rc == RELOAD_REG_VMX)); 2622 2623 nregs = rs6000_hard_regno_nregs[m][reg]; 2624 addr_mask |= RELOAD_REG_VALID; 2625 2626 /* Indicate if the mode takes more than 1 physical register. If 2627 it takes a single register, indicate it can do REG+REG 2628 addressing. Small integers in VSX registers can only do 2629 REG+REG addressing. */ 2630 if (small_int_vsx_p) 2631 addr_mask |= RELOAD_REG_INDEXED; 2632 else if (nregs > 1 || m == BLKmode || complex_p) 2633 addr_mask |= RELOAD_REG_MULTIPLE; 2634 else 2635 addr_mask |= RELOAD_REG_INDEXED; 2636 2637 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY 2638 addressing. If we allow scalars into Altivec registers, 2639 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. 2640 2641 For VSX systems, we don't allow update addressing for 2642 DFmode/SFmode if those registers can go in both the 2643 traditional floating point registers and Altivec registers. 2644 The load/store instructions for the Altivec registers do not 2645 have update forms. If we allowed update addressing, it seems 2646 to break IV-OPT code using floating point if the index type is 2647 int instead of long (PR target/81550 and target/84042). */ 2648 2649 if (TARGET_UPDATE 2650 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) 2651 && msize <= 8 2652 && !VECTOR_MODE_P (m2) 2653 && !VECTOR_ALIGNMENT_P (m2) 2654 && !complex_p 2655 && (m != E_DFmode || !TARGET_VSX) 2656 && (m != E_SFmode || !TARGET_P8_VECTOR) 2657 && !small_int_vsx_p) 2658 { 2659 addr_mask |= RELOAD_REG_PRE_INCDEC; 2660 2661 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that 2662 we don't allow PRE_MODIFY for some multi-register 2663 operations. */ 2664 switch (m) 2665 { 2666 default: 2667 addr_mask |= RELOAD_REG_PRE_MODIFY; 2668 break; 2669 2670 case E_DImode: 2671 if (TARGET_POWERPC64) 2672 addr_mask |= RELOAD_REG_PRE_MODIFY; 2673 break; 2674 2675 case E_DFmode: 2676 case E_DDmode: 2677 if (TARGET_HARD_FLOAT) 2678 addr_mask |= RELOAD_REG_PRE_MODIFY; 2679 break; 2680 } 2681 } 2682 } 2683 2684 /* GPR and FPR registers can do REG+OFFSET addressing, except 2685 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing 2686 for 64-bit scalars and 32-bit SFmode to altivec registers. */ 2687 if ((addr_mask != 0) && !indexed_only_p 2688 && msize <= 8 2689 && (rc == RELOAD_REG_GPR 2690 || ((msize == 8 || m2 == SFmode) 2691 && (rc == RELOAD_REG_FPR 2692 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR))))) 2693 addr_mask |= RELOAD_REG_OFFSET; 2694 2695 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 2696 instructions are enabled. The offset for 128-bit VSX registers is 2697 only 12-bits. While GPRs can handle the full offset range, VSX 2698 registers can only handle the restricted range. */ 2699 else if ((addr_mask != 0) && !indexed_only_p 2700 && msize == 16 && TARGET_P9_VECTOR 2701 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) 2702 || (m2 == TImode && TARGET_VSX))) 2703 { 2704 addr_mask |= RELOAD_REG_OFFSET; 2705 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) 2706 addr_mask |= RELOAD_REG_QUAD_OFFSET; 2707 } 2708 2709 /* Vector pairs can do both indexed and offset loads if the 2710 instructions are enabled, otherwise they can only do offset loads 2711 since it will be broken into two vector moves. Vector quads can 2712 only do offset loads. */ 2713 else if ((addr_mask != 0) && TARGET_MMA 2714 && (m2 == POImode || m2 == PXImode)) 2715 { 2716 addr_mask |= RELOAD_REG_OFFSET; 2717 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) 2718 { 2719 addr_mask |= RELOAD_REG_QUAD_OFFSET; 2720 if (m2 == POImode) 2721 addr_mask |= RELOAD_REG_INDEXED; 2722 } 2723 } 2724 2725 /* VMX registers can do (REG & -16) and ((REG+REG) & -16) 2726 addressing on 128-bit types. */ 2727 if (rc == RELOAD_REG_VMX && msize == 16 2728 && (addr_mask & RELOAD_REG_VALID) != 0) 2729 addr_mask |= RELOAD_REG_AND_M16; 2730 2731 reg_addr[m].addr_mask[rc] = addr_mask; 2732 any_addr_mask |= addr_mask; 2733 } 2734 2735 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; 2736 } 2737} 2738 2739 2740/* Initialize the various global tables that are based on register size. */ 2741static void 2742rs6000_init_hard_regno_mode_ok (bool global_init_p) 2743{ 2744 ssize_t r, m, c; 2745 int align64; 2746 int align32; 2747 2748 /* Precalculate REGNO_REG_CLASS. */ 2749 rs6000_regno_regclass[0] = GENERAL_REGS; 2750 for (r = 1; r < 32; ++r) 2751 rs6000_regno_regclass[r] = BASE_REGS; 2752 2753 for (r = 32; r < 64; ++r) 2754 rs6000_regno_regclass[r] = FLOAT_REGS; 2755 2756 for (r = 64; HARD_REGISTER_NUM_P (r); ++r) 2757 rs6000_regno_regclass[r] = NO_REGS; 2758 2759 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r) 2760 rs6000_regno_regclass[r] = ALTIVEC_REGS; 2761 2762 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS; 2763 for (r = CR1_REGNO; r <= CR7_REGNO; ++r) 2764 rs6000_regno_regclass[r] = CR_REGS; 2765 2766 rs6000_regno_regclass[LR_REGNO] = LINK_REGS; 2767 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; 2768 rs6000_regno_regclass[CA_REGNO] = NO_REGS; 2769 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS; 2770 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS; 2771 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; 2772 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; 2773 2774 /* Precalculate register class to simpler reload register class. We don't 2775 need all of the register classes that are combinations of different 2776 classes, just the simple ones that have constraint letters. */ 2777 for (c = 0; c < N_REG_CLASSES; c++) 2778 reg_class_to_reg_type[c] = NO_REG_TYPE; 2779 2780 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; 2781 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; 2782 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; 2783 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; 2784 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; 2785 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; 2786 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; 2787 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; 2788 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; 2789 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; 2790 2791 if (TARGET_VSX) 2792 { 2793 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; 2794 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; 2795 } 2796 else 2797 { 2798 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; 2799 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; 2800 } 2801 2802 /* Precalculate the valid memory formats as well as the vector information, 2803 this must be set up before the rs6000_hard_regno_nregs_internal calls 2804 below. */ 2805 gcc_assert ((int)VECTOR_NONE == 0); 2806 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); 2807 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem)); 2808 2809 gcc_assert ((int)CODE_FOR_nothing == 0); 2810 memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); 2811 2812 gcc_assert ((int)NO_REGS == 0); 2813 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); 2814 2815 /* The VSX hardware allows native alignment for vectors, but control whether the compiler 2816 believes it can use native alignment or still uses 128-bit alignment. */ 2817 if (TARGET_VSX && !TARGET_VSX_ALIGN_128) 2818 { 2819 align64 = 64; 2820 align32 = 32; 2821 } 2822 else 2823 { 2824 align64 = 128; 2825 align32 = 128; 2826 } 2827 2828 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so 2829 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */ 2830 if (TARGET_FLOAT128_TYPE) 2831 { 2832 rs6000_vector_mem[KFmode] = VECTOR_VSX; 2833 rs6000_vector_align[KFmode] = 128; 2834 2835 if (FLOAT128_IEEE_P (TFmode)) 2836 { 2837 rs6000_vector_mem[TFmode] = VECTOR_VSX; 2838 rs6000_vector_align[TFmode] = 128; 2839 } 2840 } 2841 2842 /* V2DF mode, VSX only. */ 2843 if (TARGET_VSX) 2844 { 2845 rs6000_vector_unit[V2DFmode] = VECTOR_VSX; 2846 rs6000_vector_mem[V2DFmode] = VECTOR_VSX; 2847 rs6000_vector_align[V2DFmode] = align64; 2848 } 2849 2850 /* V4SF mode, either VSX or Altivec. */ 2851 if (TARGET_VSX) 2852 { 2853 rs6000_vector_unit[V4SFmode] = VECTOR_VSX; 2854 rs6000_vector_mem[V4SFmode] = VECTOR_VSX; 2855 rs6000_vector_align[V4SFmode] = align32; 2856 } 2857 else if (TARGET_ALTIVEC) 2858 { 2859 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; 2860 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; 2861 rs6000_vector_align[V4SFmode] = align32; 2862 } 2863 2864 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads 2865 and stores. */ 2866 if (TARGET_ALTIVEC) 2867 { 2868 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; 2869 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; 2870 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; 2871 rs6000_vector_align[V4SImode] = align32; 2872 rs6000_vector_align[V8HImode] = align32; 2873 rs6000_vector_align[V16QImode] = align32; 2874 2875 if (TARGET_VSX) 2876 { 2877 rs6000_vector_mem[V4SImode] = VECTOR_VSX; 2878 rs6000_vector_mem[V8HImode] = VECTOR_VSX; 2879 rs6000_vector_mem[V16QImode] = VECTOR_VSX; 2880 } 2881 else 2882 { 2883 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; 2884 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; 2885 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; 2886 } 2887 } 2888 2889 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to 2890 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ 2891 if (TARGET_VSX) 2892 { 2893 rs6000_vector_mem[V2DImode] = VECTOR_VSX; 2894 rs6000_vector_unit[V2DImode] 2895 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; 2896 rs6000_vector_align[V2DImode] = align64; 2897 2898 rs6000_vector_mem[V1TImode] = VECTOR_VSX; 2899 rs6000_vector_unit[V1TImode] 2900 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; 2901 rs6000_vector_align[V1TImode] = 128; 2902 } 2903 2904 /* DFmode, see if we want to use the VSX unit. Memory is handled 2905 differently, so don't set rs6000_vector_mem. */ 2906 if (TARGET_VSX) 2907 { 2908 rs6000_vector_unit[DFmode] = VECTOR_VSX; 2909 rs6000_vector_align[DFmode] = 64; 2910 } 2911 2912 /* SFmode, see if we want to use the VSX unit. */ 2913 if (TARGET_P8_VECTOR) 2914 { 2915 rs6000_vector_unit[SFmode] = VECTOR_VSX; 2916 rs6000_vector_align[SFmode] = 32; 2917 } 2918 2919 /* Allow TImode in VSX register and set the VSX memory macros. */ 2920 if (TARGET_VSX) 2921 { 2922 rs6000_vector_mem[TImode] = VECTOR_VSX; 2923 rs6000_vector_align[TImode] = align64; 2924 } 2925 2926 /* Add support for vector pairs and vector quad registers. */ 2927 if (TARGET_MMA) 2928 { 2929 rs6000_vector_unit[POImode] = VECTOR_NONE; 2930 rs6000_vector_mem[POImode] = VECTOR_VSX; 2931 rs6000_vector_align[POImode] = 256; 2932 2933 rs6000_vector_unit[PXImode] = VECTOR_NONE; 2934 rs6000_vector_mem[PXImode] = VECTOR_VSX; 2935 rs6000_vector_align[PXImode] = 512; 2936 } 2937 2938 /* Register class constraints for the constraints that depend on compile 2939 switches. When the VSX code was added, different constraints were added 2940 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all 2941 of the VSX registers are used. The register classes for scalar floating 2942 point types is set, based on whether we allow that type into the upper 2943 (Altivec) registers. GCC has register classes to target the Altivec 2944 registers for load/store operations, to select using a VSX memory 2945 operation instead of the traditional floating point operation. The 2946 constraints are: 2947 2948 d - Register class to use with traditional DFmode instructions. 2949 f - Register class to use with traditional SFmode instructions. 2950 v - Altivec register. 2951 wa - Any VSX register. 2952 wc - Reserved to represent individual CR bits (used in LLVM). 2953 wn - always NO_REGS. 2954 wr - GPR if 64-bit mode is permitted. 2955 wx - Float register if we can do 32-bit int stores. */ 2956 2957 if (TARGET_HARD_FLOAT) 2958 { 2959 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */ 2960 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */ 2961 } 2962 2963 if (TARGET_VSX) 2964 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; 2965 2966 /* Add conditional constraints based on various options, to allow us to 2967 collapse multiple insn patterns. */ 2968 if (TARGET_ALTIVEC) 2969 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; 2970 2971 if (TARGET_POWERPC64) 2972 { 2973 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; 2974 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; 2975 } 2976 2977 if (TARGET_STFIWX) 2978 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */ 2979 2980 /* Support for new direct moves (ISA 3.0 + 64bit). */ 2981 if (TARGET_DIRECT_MOVE_128) 2982 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; 2983 2984 /* Set up the reload helper and direct move functions. */ 2985 if (TARGET_VSX || TARGET_ALTIVEC) 2986 { 2987 if (TARGET_64BIT) 2988 { 2989 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; 2990 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; 2991 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; 2992 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; 2993 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; 2994 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; 2995 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; 2996 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; 2997 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; 2998 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; 2999 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; 3000 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; 3001 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; 3002 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; 3003 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; 3004 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; 3005 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; 3006 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; 3007 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; 3008 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; 3009 3010 if (FLOAT128_VECTOR_P (KFmode)) 3011 { 3012 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store; 3013 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load; 3014 } 3015 3016 if (FLOAT128_VECTOR_P (TFmode)) 3017 { 3018 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store; 3019 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; 3020 } 3021 3022 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are 3023 available. */ 3024 if (TARGET_NO_SDMODE_STACK) 3025 { 3026 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; 3027 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; 3028 } 3029 3030 if (TARGET_VSX) 3031 { 3032 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; 3033 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; 3034 } 3035 3036 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) 3037 { 3038 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; 3039 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; 3040 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; 3041 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; 3042 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; 3043 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; 3044 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; 3045 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; 3046 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; 3047 3048 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; 3049 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; 3050 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; 3051 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; 3052 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; 3053 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; 3054 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; 3055 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; 3056 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; 3057 3058 if (FLOAT128_VECTOR_P (KFmode)) 3059 { 3060 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf; 3061 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf; 3062 } 3063 3064 if (FLOAT128_VECTOR_P (TFmode)) 3065 { 3066 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf; 3067 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf; 3068 } 3069 3070 if (TARGET_MMA) 3071 { 3072 reg_addr[POImode].reload_store = CODE_FOR_reload_poi_di_store; 3073 reg_addr[POImode].reload_load = CODE_FOR_reload_poi_di_load; 3074 reg_addr[PXImode].reload_store = CODE_FOR_reload_pxi_di_store; 3075 reg_addr[PXImode].reload_load = CODE_FOR_reload_pxi_di_load; 3076 } 3077 } 3078 } 3079 else 3080 { 3081 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; 3082 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; 3083 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; 3084 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; 3085 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; 3086 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; 3087 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; 3088 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; 3089 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; 3090 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; 3091 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; 3092 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; 3093 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; 3094 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; 3095 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; 3096 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; 3097 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; 3098 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; 3099 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; 3100 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; 3101 3102 if (FLOAT128_VECTOR_P (KFmode)) 3103 { 3104 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store; 3105 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load; 3106 } 3107 3108 if (FLOAT128_IEEE_P (TFmode)) 3109 { 3110 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store; 3111 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; 3112 } 3113 3114 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are 3115 available. */ 3116 if (TARGET_NO_SDMODE_STACK) 3117 { 3118 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; 3119 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; 3120 } 3121 3122 if (TARGET_VSX) 3123 { 3124 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; 3125 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; 3126 } 3127 3128 if (TARGET_DIRECT_MOVE) 3129 { 3130 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; 3131 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; 3132 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; 3133 } 3134 } 3135 3136 reg_addr[DFmode].scalar_in_vmx_p = true; 3137 reg_addr[DImode].scalar_in_vmx_p = true; 3138 3139 if (TARGET_P8_VECTOR) 3140 { 3141 reg_addr[SFmode].scalar_in_vmx_p = true; 3142 reg_addr[SImode].scalar_in_vmx_p = true; 3143 3144 if (TARGET_P9_VECTOR) 3145 { 3146 reg_addr[HImode].scalar_in_vmx_p = true; 3147 reg_addr[QImode].scalar_in_vmx_p = true; 3148 } 3149 } 3150 } 3151 3152 /* Precalculate HARD_REGNO_NREGS. */ 3153 for (r = 0; HARD_REGISTER_NUM_P (r); ++r) 3154 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3155 rs6000_hard_regno_nregs[m][r] 3156 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m); 3157 3158 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */ 3159 for (r = 0; HARD_REGISTER_NUM_P (r); ++r) 3160 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3161 rs6000_hard_regno_mode_ok_p[m][r] 3162 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m); 3163 3164 /* Precalculate CLASS_MAX_NREGS sizes. */ 3165 for (c = 0; c < LIM_REG_CLASSES; ++c) 3166 { 3167 int reg_size; 3168 3169 if (TARGET_VSX && VSX_REG_CLASS_P (c)) 3170 reg_size = UNITS_PER_VSX_WORD; 3171 3172 else if (c == ALTIVEC_REGS) 3173 reg_size = UNITS_PER_ALTIVEC_WORD; 3174 3175 else if (c == FLOAT_REGS) 3176 reg_size = UNITS_PER_FP_WORD; 3177 3178 else 3179 reg_size = UNITS_PER_WORD; 3180 3181 for (m = 0; m < NUM_MACHINE_MODES; ++m) 3182 { 3183 machine_mode m2 = (machine_mode)m; 3184 int reg_size2 = reg_size; 3185 3186 /* TDmode & IBM 128-bit floating point always takes 2 registers, even 3187 in VSX. */ 3188 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m)) 3189 reg_size2 = UNITS_PER_FP_WORD; 3190 3191 rs6000_class_max_nregs[m][c] 3192 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2; 3193 } 3194 } 3195 3196 /* Calculate which modes to automatically generate code to use a the 3197 reciprocal divide and square root instructions. In the future, possibly 3198 automatically generate the instructions even if the user did not specify 3199 -mrecip. The older machines double precision reciprocal sqrt estimate is 3200 not accurate enough. */ 3201 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); 3202 if (TARGET_FRES) 3203 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; 3204 if (TARGET_FRE) 3205 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; 3206 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) 3207 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; 3208 if (VECTOR_UNIT_VSX_P (V2DFmode)) 3209 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; 3210 3211 if (TARGET_FRSQRTES) 3212 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3213 if (TARGET_FRSQRTE) 3214 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3215 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) 3216 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3217 if (VECTOR_UNIT_VSX_P (V2DFmode)) 3218 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; 3219 3220 if (rs6000_recip_control) 3221 { 3222 if (!flag_finite_math_only) 3223 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math", 3224 "-ffast-math"); 3225 if (flag_trapping_math) 3226 warning (0, "%qs requires %qs or %qs", "-mrecip", 3227 "-fno-trapping-math", "-ffast-math"); 3228 if (!flag_reciprocal_math) 3229 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math", 3230 "-ffast-math"); 3231 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) 3232 { 3233 if (RS6000_RECIP_HAVE_RE_P (SFmode) 3234 && (rs6000_recip_control & RECIP_SF_DIV) != 0) 3235 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3236 3237 if (RS6000_RECIP_HAVE_RE_P (DFmode) 3238 && (rs6000_recip_control & RECIP_DF_DIV) != 0) 3239 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3240 3241 if (RS6000_RECIP_HAVE_RE_P (V4SFmode) 3242 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) 3243 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3244 3245 if (RS6000_RECIP_HAVE_RE_P (V2DFmode) 3246 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) 3247 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; 3248 3249 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) 3250 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) 3251 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3252 3253 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) 3254 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) 3255 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3256 3257 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) 3258 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) 3259 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3260 3261 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) 3262 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) 3263 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; 3264 } 3265 } 3266 3267 /* Update the addr mask bits in reg_addr to help secondary reload and go if 3268 legitimate address support to figure out the appropriate addressing to 3269 use. */ 3270 rs6000_setup_reg_addr_masks (); 3271 3272 if (global_init_p || TARGET_DEBUG_TARGET) 3273 { 3274 if (TARGET_DEBUG_REG) 3275 rs6000_debug_reg_global (); 3276 3277 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) 3278 fprintf (stderr, 3279 "SImode variable mult cost = %d\n" 3280 "SImode constant mult cost = %d\n" 3281 "SImode short constant mult cost = %d\n" 3282 "DImode multipliciation cost = %d\n" 3283 "SImode division cost = %d\n" 3284 "DImode division cost = %d\n" 3285 "Simple fp operation cost = %d\n" 3286 "DFmode multiplication cost = %d\n" 3287 "SFmode division cost = %d\n" 3288 "DFmode division cost = %d\n" 3289 "cache line size = %d\n" 3290 "l1 cache size = %d\n" 3291 "l2 cache size = %d\n" 3292 "simultaneous prefetches = %d\n" 3293 "\n", 3294 rs6000_cost->mulsi, 3295 rs6000_cost->mulsi_const, 3296 rs6000_cost->mulsi_const9, 3297 rs6000_cost->muldi, 3298 rs6000_cost->divsi, 3299 rs6000_cost->divdi, 3300 rs6000_cost->fp, 3301 rs6000_cost->dmul, 3302 rs6000_cost->sdiv, 3303 rs6000_cost->ddiv, 3304 rs6000_cost->cache_line_size, 3305 rs6000_cost->l1_cache_size, 3306 rs6000_cost->l2_cache_size, 3307 rs6000_cost->simultaneous_prefetches); 3308 } 3309} 3310 3311#if TARGET_MACHO 3312/* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */ 3313 3314static void 3315darwin_rs6000_override_options (void) 3316{ 3317 /* The Darwin ABI always includes AltiVec, can't be (validly) turned 3318 off. */ 3319 rs6000_altivec_abi = 1; 3320 TARGET_ALTIVEC_VRSAVE = 1; 3321 rs6000_current_abi = ABI_DARWIN; 3322 3323 if (DEFAULT_ABI == ABI_DARWIN 3324 && TARGET_64BIT) 3325 darwin_one_byte_bool = 1; 3326 3327 if (TARGET_64BIT && ! TARGET_POWERPC64) 3328 { 3329 rs6000_isa_flags |= OPTION_MASK_POWERPC64; 3330 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64"); 3331 } 3332 3333 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall 3334 optimisation, and will not work with the most generic case (where the 3335 symbol is undefined external, but there is no symbl stub). */ 3336 if (TARGET_64BIT) 3337 rs6000_default_long_calls = 0; 3338 3339 /* ld_classic is (so far) still used for kernel (static) code, and supports 3340 the JBSR longcall / branch islands. */ 3341 if (flag_mkernel) 3342 { 3343 rs6000_default_long_calls = 1; 3344 3345 /* Allow a kext author to do -mkernel -mhard-float. */ 3346 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT)) 3347 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; 3348 } 3349 3350 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes 3351 Altivec. */ 3352 if (!flag_mkernel && !flag_apple_kext 3353 && TARGET_64BIT 3354 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)) 3355 rs6000_isa_flags |= OPTION_MASK_ALTIVEC; 3356 3357 /* Unless the user (not the configurer) has explicitly overridden 3358 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to 3359 G4 unless targeting the kernel. */ 3360 if (!flag_mkernel 3361 && !flag_apple_kext 3362 && strverscmp (darwin_macosx_version_min, "10.5") >= 0 3363 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC) 3364 && ! global_options_set.x_rs6000_cpu_index) 3365 { 3366 rs6000_isa_flags |= OPTION_MASK_ALTIVEC; 3367 } 3368} 3369#endif 3370 3371/* If not otherwise specified by a target, make 'long double' equivalent to 3372 'double'. */ 3373 3374#ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE 3375#define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 3376#endif 3377 3378/* Return the builtin mask of the various options used that could affect which 3379 builtins were used. In the past we used target_flags, but we've run out of 3380 bits, and some options are no longer in target_flags. */ 3381 3382HOST_WIDE_INT 3383rs6000_builtin_mask_calculate (void) 3384{ 3385 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) 3386 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0) 3387 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) 3388 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) 3389 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) 3390 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) 3391 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) 3392 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) 3393 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) 3394 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) 3395 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) 3396 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) 3397 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) 3398 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) 3399 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0) 3400 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) 3401 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) 3402 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) 3403 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) 3404 | ((TARGET_LONG_DOUBLE_128 3405 && TARGET_HARD_FLOAT 3406 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0) 3407 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0) 3408 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0) 3409 | ((TARGET_MMA) ? RS6000_BTM_MMA : 0) 3410 | ((TARGET_POWER10) ? RS6000_BTM_P10 : 0)); 3411} 3412 3413/* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered 3414 to clobber the XER[CA] bit because clobbering that bit without telling 3415 the compiler worked just fine with versions of GCC before GCC 5, and 3416 breaking a lot of older code in ways that are hard to track down is 3417 not such a great idea. */ 3418 3419static rtx_insn * 3420rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, 3421 vec<const char *> &/*constraints*/, 3422 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) 3423{ 3424 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); 3425 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); 3426 return NULL; 3427} 3428 3429/* This target function is similar to the hook TARGET_OPTION_OVERRIDE 3430 but is called when the optimize level is changed via an attribute or 3431 pragma or when it is reset at the end of the code affected by the 3432 attribute or pragma. It is not called at the beginning of compilation 3433 when TARGET_OPTION_OVERRIDE is called so if you want to perform these 3434 actions then, you should have TARGET_OPTION_OVERRIDE call 3435 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */ 3436 3437static void 3438rs6000_override_options_after_change (void) 3439{ 3440 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and 3441 turns -frename-registers on. */ 3442 if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops) 3443 || (global_options_set.x_flag_unroll_all_loops 3444 && flag_unroll_all_loops)) 3445 { 3446 if (!global_options_set.x_unroll_only_small_loops) 3447 unroll_only_small_loops = 0; 3448 if (!global_options_set.x_flag_rename_registers) 3449 flag_rename_registers = 1; 3450 if (!global_options_set.x_flag_cunroll_grow_size) 3451 flag_cunroll_grow_size = 1; 3452 } 3453 else if (!global_options_set.x_flag_cunroll_grow_size) 3454 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; 3455} 3456 3457/* Override command line options. 3458 3459 Combine build-specific configuration information with options 3460 specified on the command line to set various state variables which 3461 influence code generation, optimization, and expansion of built-in 3462 functions. Assure that command-line configuration preferences are 3463 compatible with each other and with the build configuration; issue 3464 warnings while adjusting configuration or error messages while 3465 rejecting configuration. 3466 3467 Upon entry to this function: 3468 3469 This function is called once at the beginning of 3470 compilation, and then again at the start and end of compiling 3471 each section of code that has a different configuration, as 3472 indicated, for example, by adding the 3473 3474 __attribute__((__target__("cpu=power9"))) 3475 3476 qualifier to a function definition or, for example, by bracketing 3477 code between 3478 3479 #pragma GCC target("altivec") 3480 3481 and 3482 3483 #pragma GCC reset_options 3484 3485 directives. Parameter global_init_p is true for the initial 3486 invocation, which initializes global variables, and false for all 3487 subsequent invocations. 3488 3489 3490 Various global state information is assumed to be valid. This 3491 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the 3492 default CPU specified at build configure time, TARGET_DEFAULT, 3493 representing the default set of option flags for the default 3494 target, and global_options_set.x_rs6000_isa_flags, representing 3495 which options were requested on the command line. 3496 3497 Upon return from this function: 3498 3499 rs6000_isa_flags_explicit has a non-zero bit for each flag that 3500 was set by name on the command line. Additionally, if certain 3501 attributes are automatically enabled or disabled by this function 3502 in order to assure compatibility between options and 3503 configuration, the flags associated with those attributes are 3504 also set. By setting these "explicit bits", we avoid the risk 3505 that other code might accidentally overwrite these particular 3506 attributes with "default values". 3507 3508 The various bits of rs6000_isa_flags are set to indicate the 3509 target options that have been selected for the most current 3510 compilation efforts. This has the effect of also turning on the 3511 associated TARGET_XXX values since these are macros which are 3512 generally defined to test the corresponding bit of the 3513 rs6000_isa_flags variable. 3514 3515 The variable rs6000_builtin_mask is set to represent the target 3516 options for the most current compilation efforts, consistent with 3517 the current contents of rs6000_isa_flags. This variable controls 3518 expansion of built-in functions. 3519 3520 Various other global variables and fields of global structures 3521 (over 50 in all) are initialized to reflect the desired options 3522 for the most current compilation efforts. */ 3523 3524static bool 3525rs6000_option_override_internal (bool global_init_p) 3526{ 3527 bool ret = true; 3528 3529 HOST_WIDE_INT set_masks; 3530 HOST_WIDE_INT ignore_masks; 3531 int cpu_index = -1; 3532 int tune_index; 3533 struct cl_target_option *main_target_opt 3534 = ((global_init_p || target_option_default_node == NULL) 3535 ? NULL : TREE_TARGET_OPTION (target_option_default_node)); 3536 3537 /* Print defaults. */ 3538 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p) 3539 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); 3540 3541 /* Remember the explicit arguments. */ 3542 if (global_init_p) 3543 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags; 3544 3545 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C 3546 library functions, so warn about it. The flag may be useful for 3547 performance studies from time to time though, so don't disable it 3548 entirely. */ 3549 if (global_options_set.x_rs6000_alignment_flags 3550 && rs6000_alignment_flags == MASK_ALIGN_POWER 3551 && DEFAULT_ABI == ABI_DARWIN 3552 && TARGET_64BIT) 3553 warning (0, "%qs is not supported for 64-bit Darwin;" 3554 " it is incompatible with the installed C and C++ libraries", 3555 "-malign-power"); 3556 3557 /* Numerous experiment shows that IRA based loop pressure 3558 calculation works better for RTL loop invariant motion on targets 3559 with enough (>= 32) registers. It is an expensive optimization. 3560 So it is on only for peak performance. */ 3561 if (optimize >= 3 && global_init_p 3562 && !global_options_set.x_flag_ira_loop_pressure) 3563 flag_ira_loop_pressure = 1; 3564 3565 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order 3566 for tracebacks to be complete but not if any -fasynchronous-unwind-tables 3567 options were already specified. */ 3568 if (flag_sanitize & SANITIZE_USER_ADDRESS 3569 && !global_options_set.x_flag_asynchronous_unwind_tables) 3570 flag_asynchronous_unwind_tables = 1; 3571 3572 /* -fvariable-expansion-in-unroller is a win for POWER whenever the 3573 loop unroller is active. It is only checked during unrolling, so 3574 we can just set it on by default. */ 3575 if (!global_options_set.x_flag_variable_expansion_in_unroller) 3576 flag_variable_expansion_in_unroller = 1; 3577 3578 /* Set the pointer size. */ 3579 if (TARGET_64BIT) 3580 { 3581 rs6000_pmode = DImode; 3582 rs6000_pointer_size = 64; 3583 } 3584 else 3585 { 3586 rs6000_pmode = SImode; 3587 rs6000_pointer_size = 32; 3588 } 3589 3590 /* Some OSs don't support saving the high part of 64-bit registers on context 3591 switch. Other OSs don't support saving Altivec registers. On those OSs, 3592 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings; 3593 if the user wants either, the user must explicitly specify them and we 3594 won't interfere with the user's specification. */ 3595 3596 set_masks = POWERPC_MASKS; 3597#ifdef OS_MISSING_POWERPC64 3598 if (OS_MISSING_POWERPC64) 3599 set_masks &= ~OPTION_MASK_POWERPC64; 3600#endif 3601#ifdef OS_MISSING_ALTIVEC 3602 if (OS_MISSING_ALTIVEC) 3603 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX 3604 | OTHER_VSX_VECTOR_MASKS); 3605#endif 3606 3607 /* Don't override by the processor default if given explicitly. */ 3608 set_masks &= ~rs6000_isa_flags_explicit; 3609 3610 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed 3611 the cpu in a target attribute or pragma, but did not specify a tuning 3612 option, use the cpu for the tuning option rather than the option specified 3613 with -mtune on the command line. Process a '--with-cpu' configuration 3614 request as an implicit --cpu. */ 3615 if (rs6000_cpu_index >= 0) 3616 cpu_index = rs6000_cpu_index; 3617 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) 3618 cpu_index = main_target_opt->x_rs6000_cpu_index; 3619 else if (OPTION_TARGET_CPU_DEFAULT) 3620 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT); 3621 3622 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the 3623 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits 3624 with those from the cpu, except for options that were explicitly set. If 3625 we don't have a cpu, do not override the target bits set in 3626 TARGET_DEFAULT. */ 3627 if (cpu_index >= 0) 3628 { 3629 rs6000_cpu_index = cpu_index; 3630 rs6000_isa_flags &= ~set_masks; 3631 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable 3632 & set_masks); 3633 } 3634 else 3635 { 3636 /* If no -mcpu=<xxx>, inherit any default options that were cleared via 3637 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize 3638 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched 3639 to using rs6000_isa_flags, we need to do the initialization here. 3640 3641 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using 3642 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ 3643 HOST_WIDE_INT flags; 3644 if (TARGET_DEFAULT) 3645 flags = TARGET_DEFAULT; 3646 else 3647 { 3648 /* PowerPC 64-bit LE requires at least ISA 2.07. */ 3649 const char *default_cpu = (!TARGET_POWERPC64 3650 ? "powerpc" 3651 : (BYTES_BIG_ENDIAN 3652 ? "powerpc64" 3653 : "powerpc64le")); 3654 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu); 3655 flags = processor_target_table[default_cpu_index].target_enable; 3656 } 3657 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); 3658 } 3659 3660 if (rs6000_tune_index >= 0) 3661 tune_index = rs6000_tune_index; 3662 else if (cpu_index >= 0) 3663 rs6000_tune_index = tune_index = cpu_index; 3664 else 3665 { 3666 size_t i; 3667 enum processor_type tune_proc 3668 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT); 3669 3670 tune_index = -1; 3671 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) 3672 if (processor_target_table[i].processor == tune_proc) 3673 { 3674 tune_index = i; 3675 break; 3676 } 3677 } 3678 3679 if (cpu_index >= 0) 3680 rs6000_cpu = processor_target_table[cpu_index].processor; 3681 else 3682 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT; 3683 3684 gcc_assert (tune_index >= 0); 3685 rs6000_tune = processor_target_table[tune_index].processor; 3686 3687 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 3688 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 3689 || rs6000_cpu == PROCESSOR_PPCE5500) 3690 { 3691 if (TARGET_ALTIVEC) 3692 error ("AltiVec not supported in this target"); 3693 } 3694 3695 /* If we are optimizing big endian systems for space, use the load/store 3696 multiple instructions. */ 3697 if (BYTES_BIG_ENDIAN && optimize_size) 3698 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE; 3699 3700 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750, 3701 because the hardware doesn't support the instructions used in little 3702 endian mode, and causes an alignment trap. The 750 does not cause an 3703 alignment trap (except when the target is unaligned). */ 3704 3705 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE) 3706 { 3707 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE; 3708 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0) 3709 warning (0, "%qs is not supported on little endian systems", 3710 "-mmultiple"); 3711 } 3712 3713 /* If little-endian, default to -mstrict-align on older processors. 3714 Testing for htm matches power8 and later. */ 3715 if (!BYTES_BIG_ENDIAN 3716 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM)) 3717 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; 3718 3719 if (!rs6000_fold_gimple) 3720 fprintf (stderr, 3721 "gimple folding of rs6000 builtins has been disabled.\n"); 3722 3723 /* Add some warnings for VSX. */ 3724 if (TARGET_VSX) 3725 { 3726 const char *msg = NULL; 3727 if (!TARGET_HARD_FLOAT) 3728 { 3729 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3730 msg = N_("%<-mvsx%> requires hardware floating point"); 3731 else 3732 { 3733 rs6000_isa_flags &= ~ OPTION_MASK_VSX; 3734 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3735 } 3736 } 3737 else if (TARGET_AVOID_XFORM > 0) 3738 msg = N_("%<-mvsx%> needs indexed addressing"); 3739 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit 3740 & OPTION_MASK_ALTIVEC)) 3741 { 3742 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3743 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible"); 3744 else 3745 msg = N_("%<-mno-altivec%> disables vsx"); 3746 } 3747 3748 if (msg) 3749 { 3750 warning (0, msg); 3751 rs6000_isa_flags &= ~ OPTION_MASK_VSX; 3752 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3753 } 3754 } 3755 3756 /* If hard-float/altivec/vsx were explicitly turned off then don't allow 3757 the -mcpu setting to enable options that conflict. */ 3758 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) 3759 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT 3760 | OPTION_MASK_ALTIVEC 3761 | OPTION_MASK_VSX)) != 0) 3762 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO 3763 | OPTION_MASK_DIRECT_MOVE) 3764 & ~rs6000_isa_flags_explicit); 3765 3766 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 3767 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); 3768 3769 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn 3770 off all of the options that depend on those flags. */ 3771 ignore_masks = rs6000_disable_incompatible_switches (); 3772 3773 /* For the newer switches (vsx, dfp, etc.) set some of the older options, 3774 unless the user explicitly used the -mno-<option> to disable the code. */ 3775 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC) 3776 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); 3777 else if (TARGET_P9_MINMAX) 3778 { 3779 if (cpu_index >= 0) 3780 { 3781 if (cpu_index == PROCESSOR_POWER9) 3782 { 3783 /* legacy behavior: allow -mcpu=power9 with certain 3784 capabilities explicitly disabled. */ 3785 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); 3786 } 3787 else 3788 error ("power9 target option is incompatible with %<%s=<xxx>%> " 3789 "for <xxx> less than power9", "-mcpu"); 3790 } 3791 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) 3792 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags 3793 & rs6000_isa_flags_explicit)) 3794 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags 3795 were explicitly cleared. */ 3796 error ("%qs incompatible with explicitly disabled options", 3797 "-mpower9-minmax"); 3798 else 3799 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER; 3800 } 3801 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) 3802 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks); 3803 else if (TARGET_VSX) 3804 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks); 3805 else if (TARGET_POPCNTD) 3806 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks); 3807 else if (TARGET_DFP) 3808 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks); 3809 else if (TARGET_CMPB) 3810 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks); 3811 else if (TARGET_FPRND) 3812 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks); 3813 else if (TARGET_POPCNTB) 3814 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks); 3815 else if (TARGET_ALTIVEC) 3816 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); 3817 3818 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a 3819 target attribute or pragma which automatically enables both options, 3820 unless the altivec ABI was set. This is set by default for 64-bit, but 3821 not for 32-bit. Don't move this before the above code using ignore_masks, 3822 since it can reset the cleared VSX/ALTIVEC flag again. */ 3823 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi) 3824 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC) 3825 & ~rs6000_isa_flags_explicit); 3826 3827 if (TARGET_CRYPTO && !TARGET_ALTIVEC) 3828 { 3829 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) 3830 error ("%qs requires %qs", "-mcrypto", "-maltivec"); 3831 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; 3832 } 3833 3834 if (!TARGET_FPRND && TARGET_VSX) 3835 { 3836 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND) 3837 /* TARGET_VSX = 1 implies Power 7 and newer */ 3838 error ("%qs requires %qs", "-mvsx", "-mfprnd"); 3839 rs6000_isa_flags &= ~OPTION_MASK_FPRND; 3840 } 3841 3842 if (TARGET_DIRECT_MOVE && !TARGET_VSX) 3843 { 3844 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) 3845 error ("%qs requires %qs", "-mdirect-move", "-mvsx"); 3846 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; 3847 } 3848 3849 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) 3850 { 3851 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 3852 error ("%qs requires %qs", "-mpower8-vector", "-maltivec"); 3853 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; 3854 } 3855 3856 if (TARGET_P8_VECTOR && !TARGET_VSX) 3857 { 3858 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 3859 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX)) 3860 error ("%qs requires %qs", "-mpower8-vector", "-mvsx"); 3861 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0) 3862 { 3863 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; 3864 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) 3865 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; 3866 } 3867 else 3868 { 3869 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is 3870 not explicit. */ 3871 rs6000_isa_flags |= OPTION_MASK_VSX; 3872 rs6000_isa_flags_explicit |= OPTION_MASK_VSX; 3873 } 3874 } 3875 3876 if (TARGET_DFP && !TARGET_HARD_FLOAT) 3877 { 3878 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP) 3879 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float"); 3880 rs6000_isa_flags &= ~OPTION_MASK_DFP; 3881 } 3882 3883 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, 3884 silently turn off quad memory mode. */ 3885 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) 3886 { 3887 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) 3888 warning (0, N_("%<-mquad-memory%> requires 64-bit mode")); 3889 3890 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) 3891 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode")); 3892 3893 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY 3894 | OPTION_MASK_QUAD_MEMORY_ATOMIC); 3895 } 3896 3897 /* Non-atomic quad memory load/store are disabled for little endian, since 3898 the words are reversed, but atomic operations can still be done by 3899 swapping the words. */ 3900 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN) 3901 { 3902 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) 3903 warning (0, N_("%<-mquad-memory%> is not available in little endian " 3904 "mode")); 3905 3906 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; 3907 } 3908 3909 /* Assume if the user asked for normal quad memory instructions, they want 3910 the atomic versions as well, unless they explicity told us not to use quad 3911 word atomic instructions. */ 3912 if (TARGET_QUAD_MEMORY 3913 && !TARGET_QUAD_MEMORY_ATOMIC 3914 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) 3915 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; 3916 3917 /* If we can shrink-wrap the TOC register save separately, then use 3918 -msave-toc-indirect unless explicitly disabled. */ 3919 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0 3920 && flag_shrink_wrap_separate 3921 && optimize_function_for_speed_p (cfun)) 3922 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT; 3923 3924 /* Enable power8 fusion if we are tuning for power8, even if we aren't 3925 generating power8 instructions. Power9 does not optimize power8 fusion 3926 cases. */ 3927 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) 3928 { 3929 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8) 3930 rs6000_isa_flags |= OPTION_MASK_P8_FUSION; 3931 else 3932 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; 3933 } 3934 3935 /* Setting additional fusion flags turns on base fusion. */ 3936 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN) 3937 { 3938 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) 3939 { 3940 if (TARGET_P8_FUSION_SIGN) 3941 error ("%qs requires %qs", "-mpower8-fusion-sign", 3942 "-mpower8-fusion"); 3943 3944 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; 3945 } 3946 else 3947 rs6000_isa_flags |= OPTION_MASK_P8_FUSION; 3948 } 3949 3950 /* Power8 does not fuse sign extended loads with the addis. If we are 3951 optimizing at high levels for speed, convert a sign extended load into a 3952 zero extending load, and an explicit sign extension. */ 3953 if (TARGET_P8_FUSION 3954 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) 3955 && optimize_function_for_speed_p (cfun) 3956 && optimize >= 3) 3957 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; 3958 3959 /* ISA 3.0 vector instructions include ISA 2.07. */ 3960 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR) 3961 { 3962 /* We prefer to not mention undocumented options in 3963 error messages. However, if users have managed to select 3964 power9-vector without selecting power8-vector, they 3965 already know about undocumented flags. */ 3966 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) && 3967 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)) 3968 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector"); 3969 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0) 3970 { 3971 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; 3972 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) 3973 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; 3974 } 3975 else 3976 { 3977 /* OPTION_MASK_P9_VECTOR is explicit and 3978 OPTION_MASK_P8_VECTOR is not explicit. */ 3979 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR; 3980 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; 3981 } 3982 } 3983 3984 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 3985 support. If we only have ISA 2.06 support, and the user did not specify 3986 the switch, leave it set to -1 so the movmisalign patterns are enabled, 3987 but we don't enable the full vectorization support */ 3988 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) 3989 TARGET_ALLOW_MOVMISALIGN = 1; 3990 3991 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) 3992 { 3993 if (TARGET_ALLOW_MOVMISALIGN > 0 3994 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN) 3995 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx"); 3996 3997 TARGET_ALLOW_MOVMISALIGN = 0; 3998 } 3999 4000 /* Determine when unaligned vector accesses are permitted, and when 4001 they are preferred over masked Altivec loads. Note that if 4002 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then 4003 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is 4004 not true. */ 4005 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4006 { 4007 if (!TARGET_VSX) 4008 { 4009 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) 4010 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx"); 4011 4012 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; 4013 } 4014 4015 else if (!TARGET_ALLOW_MOVMISALIGN) 4016 { 4017 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) 4018 error ("%qs requires %qs", "-munefficient-unaligned-vsx", 4019 "-mallow-movmisalign"); 4020 4021 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; 4022 } 4023 } 4024 4025 /* Use long double size to select the appropriate long double. We use 4026 TYPE_PRECISION to differentiate the 3 different long double types. We map 4027 128 into the precision used for TFmode. */ 4028 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64 4029 ? 64 4030 : FLOAT_PRECISION_TFmode); 4031 4032 /* Set long double size before the IEEE 128-bit tests. */ 4033 if (!global_options_set.x_rs6000_long_double_type_size) 4034 { 4035 if (main_target_opt != NULL 4036 && (main_target_opt->x_rs6000_long_double_type_size 4037 != default_long_double_size)) 4038 error ("target attribute or pragma changes %<long double%> size"); 4039 else 4040 rs6000_long_double_type_size = default_long_double_size; 4041 } 4042 else if (rs6000_long_double_type_size == 128) 4043 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode; 4044 4045 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server 4046 systems will also set long double to be IEEE 128-bit. AIX and Darwin 4047 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so 4048 those systems will not pick up this default. Warn if the user changes the 4049 default unless -Wno-psabi. */ 4050 if (!global_options_set.x_rs6000_ieeequad) 4051 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT; 4052 4053 else if (TARGET_LONG_DOUBLE_128) 4054 { 4055 if (global_options.x_rs6000_ieeequad 4056 && (!TARGET_POPCNTD || !TARGET_VSX)) 4057 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble"); 4058 4059 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT) 4060 { 4061 static bool warned_change_long_double; 4062 if (!warned_change_long_double) 4063 { 4064 warned_change_long_double = true; 4065 if (TARGET_IEEEQUAD) 4066 warning (OPT_Wpsabi, "Using IEEE extended precision " 4067 "%<long double%>"); 4068 else 4069 warning (OPT_Wpsabi, "Using IBM extended precision " 4070 "%<long double%>"); 4071 } 4072 } 4073 } 4074 4075 /* Enable the default support for IEEE 128-bit floating point on Linux VSX 4076 sytems. In GCC 7, we would enable the IEEE 128-bit floating point 4077 infrastructure (-mfloat128-type) but not enable the actual __float128 type 4078 unless the user used the explicit -mfloat128. In GCC 8, we enable both 4079 the keyword as well as the type. */ 4080 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX; 4081 4082 /* IEEE 128-bit floating point requires VSX support. */ 4083 if (TARGET_FLOAT128_KEYWORD) 4084 { 4085 if (!TARGET_VSX) 4086 { 4087 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) 4088 error ("%qs requires VSX support", "-mfloat128"); 4089 4090 TARGET_FLOAT128_TYPE = 0; 4091 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD 4092 | OPTION_MASK_FLOAT128_HW); 4093 } 4094 else if (!TARGET_FLOAT128_TYPE) 4095 { 4096 TARGET_FLOAT128_TYPE = 1; 4097 warning (0, "The %<-mfloat128%> option may not be fully supported"); 4098 } 4099 } 4100 4101 /* Enable the __float128 keyword under Linux by default. */ 4102 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD 4103 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0) 4104 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD; 4105 4106 /* If we have are supporting the float128 type and full ISA 3.0 support, 4107 enable -mfloat128-hardware by default. However, don't enable the 4108 __float128 keyword if it was explicitly turned off. 64-bit mode is needed 4109 because sometimes the compiler wants to put things in an integer 4110 container, and if we don't have __int128 support, it is impossible. */ 4111 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT 4112 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE 4113 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW)) 4114 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW; 4115 4116 if (TARGET_FLOAT128_HW 4117 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE) 4118 { 4119 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) 4120 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>"); 4121 4122 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; 4123 } 4124 4125 if (TARGET_FLOAT128_HW && !TARGET_64BIT) 4126 { 4127 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) 4128 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64"); 4129 4130 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; 4131 } 4132 4133 /* Enable -mprefixed by default on power10 systems. */ 4134 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0) 4135 rs6000_isa_flags |= OPTION_MASK_PREFIXED; 4136 4137 /* -mprefixed requires -mcpu=power10 (or later). */ 4138 else if (TARGET_PREFIXED && !TARGET_POWER10) 4139 { 4140 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0) 4141 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10"); 4142 4143 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED; 4144 } 4145 4146 /* -mpcrel requires prefixed load/store addressing. */ 4147 if (TARGET_PCREL && !TARGET_PREFIXED) 4148 { 4149 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0) 4150 error ("%qs requires %qs", "-mpcrel", "-mprefixed"); 4151 4152 rs6000_isa_flags &= ~OPTION_MASK_PCREL; 4153 } 4154 4155 /* Print the options after updating the defaults. */ 4156 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4157 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); 4158 4159 /* E500mc does "better" if we inline more aggressively. Respect the 4160 user's opinion, though. */ 4161 if (rs6000_block_move_inline_limit == 0 4162 && (rs6000_tune == PROCESSOR_PPCE500MC 4163 || rs6000_tune == PROCESSOR_PPCE500MC64 4164 || rs6000_tune == PROCESSOR_PPCE5500 4165 || rs6000_tune == PROCESSOR_PPCE6500)) 4166 rs6000_block_move_inline_limit = 128; 4167 4168 /* store_one_arg depends on expand_block_move to handle at least the 4169 size of reg_parm_stack_space. */ 4170 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) 4171 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); 4172 4173 if (global_init_p) 4174 { 4175 /* If the appropriate debug option is enabled, replace the target hooks 4176 with debug versions that call the real version and then prints 4177 debugging information. */ 4178 if (TARGET_DEBUG_COST) 4179 { 4180 targetm.rtx_costs = rs6000_debug_rtx_costs; 4181 targetm.address_cost = rs6000_debug_address_cost; 4182 targetm.sched.adjust_cost = rs6000_debug_adjust_cost; 4183 } 4184 4185 if (TARGET_DEBUG_ADDR) 4186 { 4187 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p; 4188 targetm.legitimize_address = rs6000_debug_legitimize_address; 4189 rs6000_secondary_reload_class_ptr 4190 = rs6000_debug_secondary_reload_class; 4191 targetm.secondary_memory_needed 4192 = rs6000_debug_secondary_memory_needed; 4193 targetm.can_change_mode_class 4194 = rs6000_debug_can_change_mode_class; 4195 rs6000_preferred_reload_class_ptr 4196 = rs6000_debug_preferred_reload_class; 4197 rs6000_mode_dependent_address_ptr 4198 = rs6000_debug_mode_dependent_address; 4199 } 4200 4201 if (rs6000_veclibabi_name) 4202 { 4203 if (strcmp (rs6000_veclibabi_name, "mass") == 0) 4204 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass; 4205 else 4206 { 4207 error ("unknown vectorization library ABI type (%qs) for " 4208 "%qs switch", rs6000_veclibabi_name, "-mveclibabi="); 4209 ret = false; 4210 } 4211 } 4212 } 4213 4214 /* Enable Altivec ABI for AIX -maltivec. */ 4215 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX)) 4216 { 4217 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) 4218 error ("target attribute or pragma changes AltiVec ABI"); 4219 else 4220 rs6000_altivec_abi = 1; 4221 } 4222 4223 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For 4224 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can 4225 be explicitly overridden in either case. */ 4226 if (TARGET_ELF) 4227 { 4228 if (!global_options_set.x_rs6000_altivec_abi 4229 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX)) 4230 { 4231 if (main_target_opt != NULL && 4232 !main_target_opt->x_rs6000_altivec_abi) 4233 error ("target attribute or pragma changes AltiVec ABI"); 4234 else 4235 rs6000_altivec_abi = 1; 4236 } 4237 } 4238 4239 /* Set the Darwin64 ABI as default for 64-bit Darwin. 4240 So far, the only darwin64 targets are also MACH-O. */ 4241 if (TARGET_MACHO 4242 && DEFAULT_ABI == ABI_DARWIN 4243 && TARGET_64BIT) 4244 { 4245 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi) 4246 error ("target attribute or pragma changes darwin64 ABI"); 4247 else 4248 { 4249 rs6000_darwin64_abi = 1; 4250 /* Default to natural alignment, for better performance. */ 4251 rs6000_alignment_flags = MASK_ALIGN_NATURAL; 4252 } 4253 } 4254 4255 /* Place FP constants in the constant pool instead of TOC 4256 if section anchors enabled. */ 4257 if (flag_section_anchors 4258 && !global_options_set.x_TARGET_NO_FP_IN_TOC) 4259 TARGET_NO_FP_IN_TOC = 1; 4260 4261 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4262 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags); 4263 4264#ifdef SUBTARGET_OVERRIDE_OPTIONS 4265 SUBTARGET_OVERRIDE_OPTIONS; 4266#endif 4267#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 4268 SUBSUBTARGET_OVERRIDE_OPTIONS; 4269#endif 4270#ifdef SUB3TARGET_OVERRIDE_OPTIONS 4271 SUB3TARGET_OVERRIDE_OPTIONS; 4272#endif 4273 4274 /* If the ABI has support for PC-relative relocations, enable it by default. 4275 This test depends on the sub-target tests above setting the code model to 4276 medium for ELF v2 systems. */ 4277 if (PCREL_SUPPORTED_BY_OS 4278 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0) 4279 rs6000_isa_flags |= OPTION_MASK_PCREL; 4280 4281 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until 4282 after the subtarget override options are done. */ 4283 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM) 4284 { 4285 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0) 4286 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium"); 4287 4288 rs6000_isa_flags &= ~OPTION_MASK_PCREL; 4289 } 4290 4291 /* Enable -mmma by default on power10 systems. */ 4292 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) 4293 rs6000_isa_flags |= OPTION_MASK_MMA; 4294 4295 /* Turn off vector pair/mma options on non-power10 systems. */ 4296 else if (!TARGET_POWER10 && TARGET_MMA) 4297 { 4298 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) 4299 error ("%qs requires %qs", "-mmma", "-mcpu=power10"); 4300 4301 rs6000_isa_flags &= ~OPTION_MASK_MMA; 4302 } 4303 4304 /* MMA requires SIMD support as ISA 3.1 claims and our implementation 4305 such as "*movoo" uses vector pair access which use VSX registers. 4306 So make MMA require VSX support here. */ 4307 if (TARGET_MMA && !TARGET_VSX) 4308 { 4309 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) 4310 error ("%qs requires %qs", "-mmma", "-mvsx"); 4311 rs6000_isa_flags &= ~OPTION_MASK_MMA; 4312 } 4313 4314 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) 4315 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); 4316 4317 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4 4318 && rs6000_tune != PROCESSOR_POWER5 4319 && rs6000_tune != PROCESSOR_POWER6 4320 && rs6000_tune != PROCESSOR_POWER7 4321 && rs6000_tune != PROCESSOR_POWER8 4322 && rs6000_tune != PROCESSOR_POWER9 4323 && rs6000_tune != PROCESSOR_POWER10 4324 && rs6000_tune != PROCESSOR_PPCA2 4325 && rs6000_tune != PROCESSOR_CELL 4326 && rs6000_tune != PROCESSOR_PPC476); 4327 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4 4328 || rs6000_tune == PROCESSOR_POWER5 4329 || rs6000_tune == PROCESSOR_POWER7 4330 || rs6000_tune == PROCESSOR_POWER8); 4331 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4 4332 || rs6000_tune == PROCESSOR_POWER5 4333 || rs6000_tune == PROCESSOR_POWER6 4334 || rs6000_tune == PROCESSOR_POWER7 4335 || rs6000_tune == PROCESSOR_POWER8 4336 || rs6000_tune == PROCESSOR_POWER9 4337 || rs6000_tune == PROCESSOR_POWER10 4338 || rs6000_tune == PROCESSOR_PPCE500MC 4339 || rs6000_tune == PROCESSOR_PPCE500MC64 4340 || rs6000_tune == PROCESSOR_PPCE5500 4341 || rs6000_tune == PROCESSOR_PPCE6500); 4342 4343 /* Allow debug switches to override the above settings. These are set to -1 4344 in rs6000.opt to indicate the user hasn't directly set the switch. */ 4345 if (TARGET_ALWAYS_HINT >= 0) 4346 rs6000_always_hint = TARGET_ALWAYS_HINT; 4347 4348 if (TARGET_SCHED_GROUPS >= 0) 4349 rs6000_sched_groups = TARGET_SCHED_GROUPS; 4350 4351 if (TARGET_ALIGN_BRANCH_TARGETS >= 0) 4352 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; 4353 4354 rs6000_sched_restricted_insns_priority 4355 = (rs6000_sched_groups ? 1 : 0); 4356 4357 /* Handle -msched-costly-dep option. */ 4358 rs6000_sched_costly_dep 4359 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); 4360 4361 if (rs6000_sched_costly_dep_str) 4362 { 4363 if (! strcmp (rs6000_sched_costly_dep_str, "no")) 4364 rs6000_sched_costly_dep = no_dep_costly; 4365 else if (! strcmp (rs6000_sched_costly_dep_str, "all")) 4366 rs6000_sched_costly_dep = all_deps_costly; 4367 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load")) 4368 rs6000_sched_costly_dep = true_store_to_load_dep_costly; 4369 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) 4370 rs6000_sched_costly_dep = store_to_load_dep_costly; 4371 else 4372 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost) 4373 atoi (rs6000_sched_costly_dep_str)); 4374 } 4375 4376 /* Handle -minsert-sched-nops option. */ 4377 rs6000_sched_insert_nops 4378 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none); 4379 4380 if (rs6000_sched_insert_nops_str) 4381 { 4382 if (! strcmp (rs6000_sched_insert_nops_str, "no")) 4383 rs6000_sched_insert_nops = sched_finish_none; 4384 else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) 4385 rs6000_sched_insert_nops = sched_finish_pad_groups; 4386 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) 4387 rs6000_sched_insert_nops = sched_finish_regroup_exact; 4388 else 4389 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion) 4390 atoi (rs6000_sched_insert_nops_str)); 4391 } 4392 4393 /* Handle stack protector */ 4394 if (!global_options_set.x_rs6000_stack_protector_guard) 4395#ifdef TARGET_THREAD_SSP_OFFSET 4396 rs6000_stack_protector_guard = SSP_TLS; 4397#else 4398 rs6000_stack_protector_guard = SSP_GLOBAL; 4399#endif 4400 4401#ifdef TARGET_THREAD_SSP_OFFSET 4402 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; 4403 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; 4404#endif 4405 4406 if (global_options_set.x_rs6000_stack_protector_guard_offset_str) 4407 { 4408 char *endp; 4409 const char *str = rs6000_stack_protector_guard_offset_str; 4410 4411 errno = 0; 4412 long offset = strtol (str, &endp, 0); 4413 if (!*str || *endp || errno) 4414 error ("%qs is not a valid number in %qs", str, 4415 "-mstack-protector-guard-offset="); 4416 4417 if (!IN_RANGE (offset, -0x8000, 0x7fff) 4418 || (TARGET_64BIT && (offset & 3))) 4419 error ("%qs is not a valid offset in %qs", str, 4420 "-mstack-protector-guard-offset="); 4421 4422 rs6000_stack_protector_guard_offset = offset; 4423 } 4424 4425 if (global_options_set.x_rs6000_stack_protector_guard_reg_str) 4426 { 4427 const char *str = rs6000_stack_protector_guard_reg_str; 4428 int reg = decode_reg_name (str); 4429 4430 if (!IN_RANGE (reg, 1, 31)) 4431 error ("%qs is not a valid base register in %qs", str, 4432 "-mstack-protector-guard-reg="); 4433 4434 rs6000_stack_protector_guard_reg = reg; 4435 } 4436 4437 if (rs6000_stack_protector_guard == SSP_TLS 4438 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) 4439 error ("%qs needs a valid base register", "-mstack-protector-guard=tls"); 4440 4441 if (global_init_p) 4442 { 4443#ifdef TARGET_REGNAMES 4444 /* If the user desires alternate register names, copy in the 4445 alternate names now. */ 4446 if (TARGET_REGNAMES) 4447 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names)); 4448#endif 4449 4450 /* Set aix_struct_return last, after the ABI is determined. 4451 If -maix-struct-return or -msvr4-struct-return was explicitly 4452 used, don't override with the ABI default. */ 4453 if (!global_options_set.x_aix_struct_return) 4454 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET); 4455 4456#if 0 4457 /* IBM XL compiler defaults to unsigned bitfields. */ 4458 if (TARGET_XL_COMPAT) 4459 flag_signed_bitfields = 0; 4460#endif 4461 4462 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) 4463 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format; 4464 4465 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1); 4466 4467 /* We can only guarantee the availability of DI pseudo-ops when 4468 assembling for 64-bit targets. */ 4469 if (!TARGET_64BIT) 4470 { 4471 targetm.asm_out.aligned_op.di = NULL; 4472 targetm.asm_out.unaligned_op.di = NULL; 4473 } 4474 4475 4476 /* Set branch target alignment, if not optimizing for size. */ 4477 if (!optimize_size) 4478 { 4479 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be 4480 aligned 8byte to avoid misprediction by the branch predictor. */ 4481 if (rs6000_tune == PROCESSOR_TITAN 4482 || rs6000_tune == PROCESSOR_CELL) 4483 { 4484 if (flag_align_functions && !str_align_functions) 4485 str_align_functions = "8"; 4486 if (flag_align_jumps && !str_align_jumps) 4487 str_align_jumps = "8"; 4488 if (flag_align_loops && !str_align_loops) 4489 str_align_loops = "8"; 4490 } 4491 if (rs6000_align_branch_targets) 4492 { 4493 if (flag_align_functions && !str_align_functions) 4494 str_align_functions = "16"; 4495 if (flag_align_jumps && !str_align_jumps) 4496 str_align_jumps = "16"; 4497 if (flag_align_loops && !str_align_loops) 4498 { 4499 can_override_loop_align = 1; 4500 str_align_loops = "16"; 4501 } 4502 } 4503 } 4504 4505 /* Arrange to save and restore machine status around nested functions. */ 4506 init_machine_status = rs6000_init_machine_status; 4507 4508 /* We should always be splitting complex arguments, but we can't break 4509 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */ 4510 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) 4511 targetm.calls.split_complex_arg = NULL; 4512 4513 /* The AIX and ELFv1 ABIs define standard function descriptors. */ 4514 if (DEFAULT_ABI == ABI_AIX) 4515 targetm.calls.custom_function_descriptors = 0; 4516 } 4517 4518 /* Initialize rs6000_cost with the appropriate target costs. */ 4519 if (optimize_size) 4520 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; 4521 else 4522 switch (rs6000_tune) 4523 { 4524 case PROCESSOR_RS64A: 4525 rs6000_cost = &rs64a_cost; 4526 break; 4527 4528 case PROCESSOR_MPCCORE: 4529 rs6000_cost = &mpccore_cost; 4530 break; 4531 4532 case PROCESSOR_PPC403: 4533 rs6000_cost = &ppc403_cost; 4534 break; 4535 4536 case PROCESSOR_PPC405: 4537 rs6000_cost = &ppc405_cost; 4538 break; 4539 4540 case PROCESSOR_PPC440: 4541 rs6000_cost = &ppc440_cost; 4542 break; 4543 4544 case PROCESSOR_PPC476: 4545 rs6000_cost = &ppc476_cost; 4546 break; 4547 4548 case PROCESSOR_PPC601: 4549 rs6000_cost = &ppc601_cost; 4550 break; 4551 4552 case PROCESSOR_PPC603: 4553 rs6000_cost = &ppc603_cost; 4554 break; 4555 4556 case PROCESSOR_PPC604: 4557 rs6000_cost = &ppc604_cost; 4558 break; 4559 4560 case PROCESSOR_PPC604e: 4561 rs6000_cost = &ppc604e_cost; 4562 break; 4563 4564 case PROCESSOR_PPC620: 4565 rs6000_cost = &ppc620_cost; 4566 break; 4567 4568 case PROCESSOR_PPC630: 4569 rs6000_cost = &ppc630_cost; 4570 break; 4571 4572 case PROCESSOR_CELL: 4573 rs6000_cost = &ppccell_cost; 4574 break; 4575 4576 case PROCESSOR_PPC750: 4577 case PROCESSOR_PPC7400: 4578 rs6000_cost = &ppc750_cost; 4579 break; 4580 4581 case PROCESSOR_PPC7450: 4582 rs6000_cost = &ppc7450_cost; 4583 break; 4584 4585 case PROCESSOR_PPC8540: 4586 case PROCESSOR_PPC8548: 4587 rs6000_cost = &ppc8540_cost; 4588 break; 4589 4590 case PROCESSOR_PPCE300C2: 4591 case PROCESSOR_PPCE300C3: 4592 rs6000_cost = &ppce300c2c3_cost; 4593 break; 4594 4595 case PROCESSOR_PPCE500MC: 4596 rs6000_cost = &ppce500mc_cost; 4597 break; 4598 4599 case PROCESSOR_PPCE500MC64: 4600 rs6000_cost = &ppce500mc64_cost; 4601 break; 4602 4603 case PROCESSOR_PPCE5500: 4604 rs6000_cost = &ppce5500_cost; 4605 break; 4606 4607 case PROCESSOR_PPCE6500: 4608 rs6000_cost = &ppce6500_cost; 4609 break; 4610 4611 case PROCESSOR_TITAN: 4612 rs6000_cost = &titan_cost; 4613 break; 4614 4615 case PROCESSOR_POWER4: 4616 case PROCESSOR_POWER5: 4617 rs6000_cost = &power4_cost; 4618 break; 4619 4620 case PROCESSOR_POWER6: 4621 rs6000_cost = &power6_cost; 4622 break; 4623 4624 case PROCESSOR_POWER7: 4625 rs6000_cost = &power7_cost; 4626 break; 4627 4628 case PROCESSOR_POWER8: 4629 rs6000_cost = &power8_cost; 4630 break; 4631 4632 case PROCESSOR_POWER9: 4633 case PROCESSOR_POWER10: 4634 rs6000_cost = &power9_cost; 4635 break; 4636 4637 case PROCESSOR_PPCA2: 4638 rs6000_cost = &ppca2_cost; 4639 break; 4640 4641 default: 4642 gcc_unreachable (); 4643 } 4644 4645 if (global_init_p) 4646 { 4647 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4648 param_simultaneous_prefetches, 4649 rs6000_cost->simultaneous_prefetches); 4650 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4651 param_l1_cache_size, 4652 rs6000_cost->l1_cache_size); 4653 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4654 param_l1_cache_line_size, 4655 rs6000_cost->cache_line_size); 4656 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4657 param_l2_cache_size, 4658 rs6000_cost->l2_cache_size); 4659 4660 /* Increase loop peeling limits based on performance analysis. */ 4661 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4662 param_max_peeled_insns, 400); 4663 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4664 param_max_completely_peeled_insns, 400); 4665 4666 /* Use the 'model' -fsched-pressure algorithm by default. */ 4667 SET_OPTION_IF_UNSET (&global_options, &global_options_set, 4668 param_sched_pressure_algorithm, 4669 SCHED_PRESSURE_MODEL); 4670 4671 /* If using typedef char *va_list, signal that 4672 __builtin_va_start (&ap, 0) can be optimized to 4673 ap = __builtin_next_arg (0). */ 4674 if (DEFAULT_ABI != ABI_V4) 4675 targetm.expand_builtin_va_start = NULL; 4676 } 4677 4678 rs6000_override_options_after_change (); 4679 4680 /* If not explicitly specified via option, decide whether to generate indexed 4681 load/store instructions. A value of -1 indicates that the 4682 initial value of this variable has not been overwritten. During 4683 compilation, TARGET_AVOID_XFORM is either 0 or 1. */ 4684 if (TARGET_AVOID_XFORM == -1) 4685 /* Avoid indexed addressing when targeting Power6 in order to avoid the 4686 DERAT mispredict penalty. However the LVE and STVE altivec instructions 4687 need indexed accesses and the type used is the scalar type of the element 4688 being loaded or stored. */ 4689 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB 4690 && !TARGET_ALTIVEC); 4691 4692 /* Set the -mrecip options. */ 4693 if (rs6000_recip_name) 4694 { 4695 char *p = ASTRDUP (rs6000_recip_name); 4696 char *q; 4697 unsigned int mask, i; 4698 bool invert; 4699 4700 while ((q = strtok (p, ",")) != NULL) 4701 { 4702 p = NULL; 4703 if (*q == '!') 4704 { 4705 invert = true; 4706 q++; 4707 } 4708 else 4709 invert = false; 4710 4711 if (!strcmp (q, "default")) 4712 mask = ((TARGET_RECIP_PRECISION) 4713 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION); 4714 else 4715 { 4716 for (i = 0; i < ARRAY_SIZE (recip_options); i++) 4717 if (!strcmp (q, recip_options[i].string)) 4718 { 4719 mask = recip_options[i].mask; 4720 break; 4721 } 4722 4723 if (i == ARRAY_SIZE (recip_options)) 4724 { 4725 error ("unknown option for %<%s=%s%>", "-mrecip", q); 4726 invert = false; 4727 mask = 0; 4728 ret = false; 4729 } 4730 } 4731 4732 if (invert) 4733 rs6000_recip_control &= ~mask; 4734 else 4735 rs6000_recip_control |= mask; 4736 } 4737 } 4738 4739 /* Set the builtin mask of the various options used that could affect which 4740 builtins were used. In the past we used target_flags, but we've run out 4741 of bits, and some options are no longer in target_flags. */ 4742 rs6000_builtin_mask = rs6000_builtin_mask_calculate (); 4743 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) 4744 rs6000_print_builtin_options (stderr, 0, "builtin mask", 4745 rs6000_builtin_mask); 4746 4747 /* Initialize all of the registers. */ 4748 rs6000_init_hard_regno_mode_ok (global_init_p); 4749 4750 /* Save the initial options in case the user does function specific options */ 4751 if (global_init_p) 4752 target_option_default_node = target_option_current_node 4753 = build_target_option_node (&global_options); 4754 4755 /* If not explicitly specified via option, decide whether to generate the 4756 extra blr's required to preserve the link stack on some cpus (eg, 476). */ 4757 if (TARGET_LINK_STACK == -1) 4758 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic); 4759 4760 /* Deprecate use of -mno-speculate-indirect-jumps. */ 4761 if (!rs6000_speculate_indirect_jumps) 4762 warning (0, "%qs is deprecated and not recommended in any circumstances", 4763 "-mno-speculate-indirect-jumps"); 4764 4765 return ret; 4766} 4767 4768/* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to 4769 define the target cpu type. */ 4770 4771static void 4772rs6000_option_override (void) 4773{ 4774 (void) rs6000_option_override_internal (true); 4775} 4776 4777 4778/* Implement targetm.vectorize.builtin_mask_for_load. */ 4779static tree 4780rs6000_builtin_mask_for_load (void) 4781{ 4782 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ 4783 if ((TARGET_ALTIVEC && !TARGET_VSX) 4784 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) 4785 return altivec_builtin_mask_for_load; 4786 else 4787 return 0; 4788} 4789 4790/* Implement LOOP_ALIGN. */ 4791align_flags 4792rs6000_loop_align (rtx label) 4793{ 4794 basic_block bb; 4795 int ninsns; 4796 4797 /* Don't override loop alignment if -falign-loops was specified. */ 4798 if (!can_override_loop_align) 4799 return align_loops; 4800 4801 bb = BLOCK_FOR_INSN (label); 4802 ninsns = num_loop_insns(bb->loop_father); 4803 4804 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ 4805 if (ninsns > 4 && ninsns <= 8 4806 && (rs6000_tune == PROCESSOR_POWER4 4807 || rs6000_tune == PROCESSOR_POWER5 4808 || rs6000_tune == PROCESSOR_POWER6 4809 || rs6000_tune == PROCESSOR_POWER7 4810 || rs6000_tune == PROCESSOR_POWER8)) 4811 return align_flags (5); 4812 else 4813 return align_loops; 4814} 4815 4816/* Return true iff, data reference of TYPE can reach vector alignment (16) 4817 after applying N number of iterations. This routine does not determine 4818 how may iterations are required to reach desired alignment. */ 4819 4820static bool 4821rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) 4822{ 4823 if (is_packed) 4824 return false; 4825 4826 if (TARGET_32BIT) 4827 { 4828 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) 4829 return true; 4830 4831 if (rs6000_alignment_flags == MASK_ALIGN_POWER) 4832 return true; 4833 4834 return false; 4835 } 4836 else 4837 { 4838 if (TARGET_MACHO) 4839 return false; 4840 4841 /* Assuming that all other types are naturally aligned. CHECKME! */ 4842 return true; 4843 } 4844} 4845 4846/* Return true if the vector misalignment factor is supported by the 4847 target. */ 4848static bool 4849rs6000_builtin_support_vector_misalignment (machine_mode mode, 4850 const_tree type, 4851 int misalignment, 4852 bool is_packed) 4853{ 4854 if (TARGET_VSX) 4855 { 4856 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4857 return true; 4858 4859 /* Return if movmisalign pattern is not supported for this mode. */ 4860 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) 4861 return false; 4862 4863 if (misalignment == -1) 4864 { 4865 /* Misalignment factor is unknown at compile time but we know 4866 it's word aligned. */ 4867 if (rs6000_vector_alignment_reachable (type, is_packed)) 4868 { 4869 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); 4870 4871 if (element_size == 64 || element_size == 32) 4872 return true; 4873 } 4874 4875 return false; 4876 } 4877 4878 /* VSX supports word-aligned vector. */ 4879 if (misalignment % 4 == 0) 4880 return true; 4881 } 4882 return false; 4883} 4884 4885/* Implement targetm.vectorize.builtin_vectorization_cost. */ 4886static int 4887rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 4888 tree vectype, int misalign) 4889{ 4890 unsigned elements; 4891 tree elem_type; 4892 4893 switch (type_of_cost) 4894 { 4895 case scalar_stmt: 4896 case scalar_store: 4897 case vector_stmt: 4898 case vector_store: 4899 case vec_to_scalar: 4900 case scalar_to_vec: 4901 case cond_branch_not_taken: 4902 return 1; 4903 case scalar_load: 4904 case vector_load: 4905 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 4906 return 2; 4907 4908 case vec_perm: 4909 /* Power7 has only one permute unit, make it a bit expensive. */ 4910 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7) 4911 return 3; 4912 else 4913 return 1; 4914 4915 case vec_promote_demote: 4916 /* Power7 has only one permute/pack unit, make it a bit expensive. */ 4917 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7) 4918 return 4; 4919 else 4920 return 1; 4921 4922 case cond_branch_taken: 4923 return 3; 4924 4925 case unaligned_load: 4926 case vector_gather_load: 4927 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 4928 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4929 return 2; 4930 4931 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) 4932 { 4933 elements = TYPE_VECTOR_SUBPARTS (vectype); 4934 if (elements == 2) 4935 /* Double word aligned. */ 4936 return 4; 4937 4938 if (elements == 4) 4939 { 4940 switch (misalign) 4941 { 4942 case 8: 4943 /* Double word aligned. */ 4944 return 4; 4945 4946 case -1: 4947 /* Unknown misalignment. */ 4948 case 4: 4949 case 12: 4950 /* Word aligned. */ 4951 return 33; 4952 4953 default: 4954 gcc_unreachable (); 4955 } 4956 } 4957 } 4958 4959 if (TARGET_ALTIVEC) 4960 /* Misaligned loads are not supported. */ 4961 gcc_unreachable (); 4962 4963 /* Like rs6000_insn_cost, make load insns cost a bit more. */ 4964 return 4; 4965 4966 case unaligned_store: 4967 case vector_scatter_store: 4968 if (TARGET_EFFICIENT_UNALIGNED_VSX) 4969 return 1; 4970 4971 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) 4972 { 4973 elements = TYPE_VECTOR_SUBPARTS (vectype); 4974 if (elements == 2) 4975 /* Double word aligned. */ 4976 return 2; 4977 4978 if (elements == 4) 4979 { 4980 switch (misalign) 4981 { 4982 case 8: 4983 /* Double word aligned. */ 4984 return 2; 4985 4986 case -1: 4987 /* Unknown misalignment. */ 4988 case 4: 4989 case 12: 4990 /* Word aligned. */ 4991 return 23; 4992 4993 default: 4994 gcc_unreachable (); 4995 } 4996 } 4997 } 4998 4999 if (TARGET_ALTIVEC) 5000 /* Misaligned stores are not supported. */ 5001 gcc_unreachable (); 5002 5003 return 2; 5004 5005 case vec_construct: 5006 /* This is a rough approximation assuming non-constant elements 5007 constructed into a vector via element insertion. FIXME: 5008 vec_construct is not granular enough for uniformly good 5009 decisions. If the initialization is a splat, this is 5010 cheaper than we estimate. Improve this someday. */ 5011 elem_type = TREE_TYPE (vectype); 5012 /* 32-bit vectors loaded into registers are stored as double 5013 precision, so we need 2 permutes, 2 converts, and 1 merge 5014 to construct a vector of short floats from them. */ 5015 if (SCALAR_FLOAT_TYPE_P (elem_type) 5016 && TYPE_PRECISION (elem_type) == 32) 5017 return 5; 5018 /* On POWER9, integer vector types are built up in GPRs and then 5019 use a direct move (2 cycles). For POWER8 this is even worse, 5020 as we need two direct moves and a merge, and the direct moves 5021 are five cycles. */ 5022 else if (INTEGRAL_TYPE_P (elem_type)) 5023 { 5024 if (TARGET_P9_VECTOR) 5025 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; 5026 else 5027 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5; 5028 } 5029 else 5030 /* V2DFmode doesn't need a direct move. */ 5031 return 2; 5032 5033 default: 5034 gcc_unreachable (); 5035 } 5036} 5037 5038/* Implement targetm.vectorize.preferred_simd_mode. */ 5039 5040static machine_mode 5041rs6000_preferred_simd_mode (scalar_mode mode) 5042{ 5043 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode)); 5044 5045 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ())) 5046 return vmode.require (); 5047 5048 return word_mode; 5049} 5050 5051typedef struct _rs6000_cost_data 5052{ 5053 struct loop *loop_info; 5054 unsigned cost[3]; 5055} rs6000_cost_data; 5056 5057/* Test for likely overcommitment of vector hardware resources. If a 5058 loop iteration is relatively large, and too large a percentage of 5059 instructions in the loop are vectorized, the cost model may not 5060 adequately reflect delays from unavailable vector resources. 5061 Penalize the loop body cost for this case. */ 5062 5063static void 5064rs6000_density_test (rs6000_cost_data *data) 5065{ 5066 const int DENSITY_PCT_THRESHOLD = 85; 5067 const int DENSITY_SIZE_THRESHOLD = 70; 5068 const int DENSITY_PENALTY = 10; 5069 struct loop *loop = data->loop_info; 5070 basic_block *bbs = get_loop_body (loop); 5071 int nbbs = loop->num_nodes; 5072 loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info); 5073 int vec_cost = data->cost[vect_body], not_vec_cost = 0; 5074 int i, density_pct; 5075 5076 for (i = 0; i < nbbs; i++) 5077 { 5078 basic_block bb = bbs[i]; 5079 gimple_stmt_iterator gsi; 5080 5081 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 5082 { 5083 gimple *stmt = gsi_stmt (gsi); 5084 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); 5085 5086 if (!STMT_VINFO_RELEVANT_P (stmt_info) 5087 && !STMT_VINFO_IN_PATTERN_P (stmt_info)) 5088 not_vec_cost++; 5089 } 5090 } 5091 5092 free (bbs); 5093 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); 5094 5095 if (density_pct > DENSITY_PCT_THRESHOLD 5096 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) 5097 { 5098 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; 5099 if (dump_enabled_p ()) 5100 dump_printf_loc (MSG_NOTE, vect_location, 5101 "density %d%%, cost %d exceeds threshold, penalizing " 5102 "loop body cost by %d%%", density_pct, 5103 vec_cost + not_vec_cost, DENSITY_PENALTY); 5104 } 5105} 5106 5107/* Implement targetm.vectorize.init_cost. */ 5108 5109/* For each vectorized loop, this var holds TRUE iff a non-memory vector 5110 instruction is needed by the vectorization. */ 5111static bool rs6000_vect_nonmem; 5112 5113static void * 5114rs6000_init_cost (struct loop *loop_info) 5115{ 5116 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data); 5117 data->loop_info = loop_info; 5118 data->cost[vect_prologue] = 0; 5119 data->cost[vect_body] = 0; 5120 data->cost[vect_epilogue] = 0; 5121 rs6000_vect_nonmem = false; 5122 return data; 5123} 5124 5125/* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost. 5126 For some statement, we would like to further fine-grain tweak the cost on 5127 top of rs6000_builtin_vectorization_cost handling which doesn't have any 5128 information on statement operation codes etc. One typical case here is 5129 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating 5130 for scalar cost, but it should be priced more whatever transformed to either 5131 compare + branch or compare + isel instructions. */ 5132 5133static unsigned 5134adjust_vectorization_cost (enum vect_cost_for_stmt kind, 5135 struct _stmt_vec_info *stmt_info) 5136{ 5137 if (kind == scalar_stmt && stmt_info && stmt_info->stmt 5138 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) 5139 { 5140 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt); 5141 if (subcode == COND_EXPR) 5142 return 2; 5143 } 5144 5145 return 0; 5146} 5147 5148/* Implement targetm.vectorize.add_stmt_cost. */ 5149 5150static unsigned 5151rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 5152 struct _stmt_vec_info *stmt_info, int misalign, 5153 enum vect_cost_model_location where) 5154{ 5155 rs6000_cost_data *cost_data = (rs6000_cost_data*) data; 5156 unsigned retval = 0; 5157 5158 if (flag_vect_cost_model) 5159 { 5160 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 5161 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype, 5162 misalign); 5163 stmt_cost += adjust_vectorization_cost (kind, stmt_info); 5164 /* Statements in an inner loop relative to the loop being 5165 vectorized are weighted more heavily. The value here is 5166 arbitrary and could potentially be improved with analysis. */ 5167 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) 5168 count *= 50; /* FIXME. */ 5169 5170 retval = (unsigned) (count * stmt_cost); 5171 cost_data->cost[where] += retval; 5172 5173 /* Check whether we're doing something other than just a copy loop. 5174 Not all such loops may be profitably vectorized; see 5175 rs6000_finish_cost. */ 5176 if ((kind == vec_to_scalar || kind == vec_perm 5177 || kind == vec_promote_demote || kind == vec_construct 5178 || kind == scalar_to_vec) 5179 || (where == vect_body && kind == vector_stmt)) 5180 rs6000_vect_nonmem = true; 5181 } 5182 5183 return retval; 5184} 5185 5186/* Implement targetm.vectorize.finish_cost. */ 5187 5188static void 5189rs6000_finish_cost (void *data, unsigned *prologue_cost, 5190 unsigned *body_cost, unsigned *epilogue_cost) 5191{ 5192 rs6000_cost_data *cost_data = (rs6000_cost_data*) data; 5193 5194 if (cost_data->loop_info) 5195 rs6000_density_test (cost_data); 5196 5197 /* Don't vectorize minimum-vectorization-factor, simple copy loops 5198 that require versioning for any reason. The vectorization is at 5199 best a wash inside the loop, and the versioning checks make 5200 profitability highly unlikely and potentially quite harmful. */ 5201 if (cost_data->loop_info) 5202 { 5203 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); 5204 if (!rs6000_vect_nonmem 5205 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 5206 && LOOP_REQUIRES_VERSIONING (vec_info)) 5207 cost_data->cost[vect_body] += 10000; 5208 } 5209 5210 *prologue_cost = cost_data->cost[vect_prologue]; 5211 *body_cost = cost_data->cost[vect_body]; 5212 *epilogue_cost = cost_data->cost[vect_epilogue]; 5213} 5214 5215/* Implement targetm.vectorize.destroy_cost_data. */ 5216 5217static void 5218rs6000_destroy_cost_data (void *data) 5219{ 5220 free (data); 5221} 5222 5223/* Implement targetm.loop_unroll_adjust. */ 5224 5225static unsigned 5226rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) 5227{ 5228 if (unroll_only_small_loops) 5229 { 5230 /* TODO: This is hardcoded to 10 right now. It can be refined, for 5231 example we may want to unroll very small loops more times (4 perhaps). 5232 We also should use a PARAM for this. */ 5233 if (loop->ninsns <= 10) 5234 return MIN (2, nunroll); 5235 else 5236 return 0; 5237 } 5238 5239 return nunroll; 5240} 5241 5242/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a 5243 library with vectorized intrinsics. */ 5244 5245static tree 5246rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out, 5247 tree type_in) 5248{ 5249 char name[32]; 5250 const char *suffix = NULL; 5251 tree fntype, new_fndecl, bdecl = NULL_TREE; 5252 int n_args = 1; 5253 const char *bname; 5254 machine_mode el_mode, in_mode; 5255 int n, in_n; 5256 5257 /* Libmass is suitable for unsafe math only as it does not correctly support 5258 parts of IEEE with the required precision such as denormals. Only support 5259 it if we have VSX to use the simd d2 or f4 functions. 5260 XXX: Add variable length support. */ 5261 if (!flag_unsafe_math_optimizations || !TARGET_VSX) 5262 return NULL_TREE; 5263 5264 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 5265 n = TYPE_VECTOR_SUBPARTS (type_out); 5266 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 5267 in_n = TYPE_VECTOR_SUBPARTS (type_in); 5268 if (el_mode != in_mode 5269 || n != in_n) 5270 return NULL_TREE; 5271 5272 switch (fn) 5273 { 5274 CASE_CFN_ATAN2: 5275 CASE_CFN_HYPOT: 5276 CASE_CFN_POW: 5277 n_args = 2; 5278 gcc_fallthrough (); 5279 5280 CASE_CFN_ACOS: 5281 CASE_CFN_ACOSH: 5282 CASE_CFN_ASIN: 5283 CASE_CFN_ASINH: 5284 CASE_CFN_ATAN: 5285 CASE_CFN_ATANH: 5286 CASE_CFN_CBRT: 5287 CASE_CFN_COS: 5288 CASE_CFN_COSH: 5289 CASE_CFN_ERF: 5290 CASE_CFN_ERFC: 5291 CASE_CFN_EXP2: 5292 CASE_CFN_EXP: 5293 CASE_CFN_EXPM1: 5294 CASE_CFN_LGAMMA: 5295 CASE_CFN_LOG10: 5296 CASE_CFN_LOG1P: 5297 CASE_CFN_LOG2: 5298 CASE_CFN_LOG: 5299 CASE_CFN_SIN: 5300 CASE_CFN_SINH: 5301 CASE_CFN_SQRT: 5302 CASE_CFN_TAN: 5303 CASE_CFN_TANH: 5304 if (el_mode == DFmode && n == 2) 5305 { 5306 bdecl = mathfn_built_in (double_type_node, fn); 5307 suffix = "d2"; /* pow -> powd2 */ 5308 } 5309 else if (el_mode == SFmode && n == 4) 5310 { 5311 bdecl = mathfn_built_in (float_type_node, fn); 5312 suffix = "4"; /* powf -> powf4 */ 5313 } 5314 else 5315 return NULL_TREE; 5316 if (!bdecl) 5317 return NULL_TREE; 5318 break; 5319 5320 default: 5321 return NULL_TREE; 5322 } 5323 5324 gcc_assert (suffix != NULL); 5325 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); 5326 if (!bname) 5327 return NULL_TREE; 5328 5329 strcpy (name, bname + strlen ("__builtin_")); 5330 strcat (name, suffix); 5331 5332 if (n_args == 1) 5333 fntype = build_function_type_list (type_out, type_in, NULL); 5334 else if (n_args == 2) 5335 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 5336 else 5337 gcc_unreachable (); 5338 5339 /* Build a function declaration for the vectorized function. */ 5340 new_fndecl = build_decl (BUILTINS_LOCATION, 5341 FUNCTION_DECL, get_identifier (name), fntype); 5342 TREE_PUBLIC (new_fndecl) = 1; 5343 DECL_EXTERNAL (new_fndecl) = 1; 5344 DECL_IS_NOVOPS (new_fndecl) = 1; 5345 TREE_READONLY (new_fndecl) = 1; 5346 5347 return new_fndecl; 5348} 5349 5350/* Returns a function decl for a vectorized version of the builtin function 5351 with builtin function code FN and the result vector type TYPE, or NULL_TREE 5352 if it is not available. */ 5353 5354static tree 5355rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, 5356 tree type_in) 5357{ 5358 machine_mode in_mode, out_mode; 5359 int in_n, out_n; 5360 5361 if (TARGET_DEBUG_BUILTIN) 5362 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n", 5363 combined_fn_name (combined_fn (fn)), 5364 GET_MODE_NAME (TYPE_MODE (type_out)), 5365 GET_MODE_NAME (TYPE_MODE (type_in))); 5366 5367 if (TREE_CODE (type_out) != VECTOR_TYPE 5368 || TREE_CODE (type_in) != VECTOR_TYPE) 5369 return NULL_TREE; 5370 5371 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 5372 out_n = TYPE_VECTOR_SUBPARTS (type_out); 5373 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 5374 in_n = TYPE_VECTOR_SUBPARTS (type_in); 5375 5376 switch (fn) 5377 { 5378 CASE_CFN_COPYSIGN: 5379 if (VECTOR_UNIT_VSX_P (V2DFmode) 5380 && out_mode == DFmode && out_n == 2 5381 && in_mode == DFmode && in_n == 2) 5382 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP]; 5383 if (VECTOR_UNIT_VSX_P (V4SFmode) 5384 && out_mode == SFmode && out_n == 4 5385 && in_mode == SFmode && in_n == 4) 5386 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP]; 5387 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5388 && out_mode == SFmode && out_n == 4 5389 && in_mode == SFmode && in_n == 4) 5390 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; 5391 break; 5392 CASE_CFN_CEIL: 5393 if (VECTOR_UNIT_VSX_P (V2DFmode) 5394 && out_mode == DFmode && out_n == 2 5395 && in_mode == DFmode && in_n == 2) 5396 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP]; 5397 if (VECTOR_UNIT_VSX_P (V4SFmode) 5398 && out_mode == SFmode && out_n == 4 5399 && in_mode == SFmode && in_n == 4) 5400 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP]; 5401 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5402 && out_mode == SFmode && out_n == 4 5403 && in_mode == SFmode && in_n == 4) 5404 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP]; 5405 break; 5406 CASE_CFN_FLOOR: 5407 if (VECTOR_UNIT_VSX_P (V2DFmode) 5408 && out_mode == DFmode && out_n == 2 5409 && in_mode == DFmode && in_n == 2) 5410 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM]; 5411 if (VECTOR_UNIT_VSX_P (V4SFmode) 5412 && out_mode == SFmode && out_n == 4 5413 && in_mode == SFmode && in_n == 4) 5414 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM]; 5415 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5416 && out_mode == SFmode && out_n == 4 5417 && in_mode == SFmode && in_n == 4) 5418 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM]; 5419 break; 5420 CASE_CFN_FMA: 5421 if (VECTOR_UNIT_VSX_P (V2DFmode) 5422 && out_mode == DFmode && out_n == 2 5423 && in_mode == DFmode && in_n == 2) 5424 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP]; 5425 if (VECTOR_UNIT_VSX_P (V4SFmode) 5426 && out_mode == SFmode && out_n == 4 5427 && in_mode == SFmode && in_n == 4) 5428 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP]; 5429 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5430 && out_mode == SFmode && out_n == 4 5431 && in_mode == SFmode && in_n == 4) 5432 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP]; 5433 break; 5434 CASE_CFN_TRUNC: 5435 if (VECTOR_UNIT_VSX_P (V2DFmode) 5436 && out_mode == DFmode && out_n == 2 5437 && in_mode == DFmode && in_n == 2) 5438 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ]; 5439 if (VECTOR_UNIT_VSX_P (V4SFmode) 5440 && out_mode == SFmode && out_n == 4 5441 && in_mode == SFmode && in_n == 4) 5442 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ]; 5443 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) 5444 && out_mode == SFmode && out_n == 4 5445 && in_mode == SFmode && in_n == 4) 5446 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ]; 5447 break; 5448 CASE_CFN_NEARBYINT: 5449 if (VECTOR_UNIT_VSX_P (V2DFmode) 5450 && flag_unsafe_math_optimizations 5451 && out_mode == DFmode && out_n == 2 5452 && in_mode == DFmode && in_n == 2) 5453 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI]; 5454 if (VECTOR_UNIT_VSX_P (V4SFmode) 5455 && flag_unsafe_math_optimizations 5456 && out_mode == SFmode && out_n == 4 5457 && in_mode == SFmode && in_n == 4) 5458 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI]; 5459 break; 5460 CASE_CFN_RINT: 5461 if (VECTOR_UNIT_VSX_P (V2DFmode) 5462 && !flag_trapping_math 5463 && out_mode == DFmode && out_n == 2 5464 && in_mode == DFmode && in_n == 2) 5465 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC]; 5466 if (VECTOR_UNIT_VSX_P (V4SFmode) 5467 && !flag_trapping_math 5468 && out_mode == SFmode && out_n == 4 5469 && in_mode == SFmode && in_n == 4) 5470 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC]; 5471 break; 5472 default: 5473 break; 5474 } 5475 5476 /* Generate calls to libmass if appropriate. */ 5477 if (rs6000_veclib_handler) 5478 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in); 5479 5480 return NULL_TREE; 5481} 5482 5483/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */ 5484 5485static tree 5486rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, 5487 tree type_in) 5488{ 5489 machine_mode in_mode, out_mode; 5490 int in_n, out_n; 5491 5492 if (TARGET_DEBUG_BUILTIN) 5493 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", 5494 IDENTIFIER_POINTER (DECL_NAME (fndecl)), 5495 GET_MODE_NAME (TYPE_MODE (type_out)), 5496 GET_MODE_NAME (TYPE_MODE (type_in))); 5497 5498 if (TREE_CODE (type_out) != VECTOR_TYPE 5499 || TREE_CODE (type_in) != VECTOR_TYPE) 5500 return NULL_TREE; 5501 5502 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 5503 out_n = TYPE_VECTOR_SUBPARTS (type_out); 5504 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 5505 in_n = TYPE_VECTOR_SUBPARTS (type_in); 5506 5507 enum rs6000_builtins fn 5508 = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl); 5509 switch (fn) 5510 { 5511 case RS6000_BUILTIN_RSQRTF: 5512 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) 5513 && out_mode == SFmode && out_n == 4 5514 && in_mode == SFmode && in_n == 4) 5515 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP]; 5516 break; 5517 case RS6000_BUILTIN_RSQRT: 5518 if (VECTOR_UNIT_VSX_P (V2DFmode) 5519 && out_mode == DFmode && out_n == 2 5520 && in_mode == DFmode && in_n == 2) 5521 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; 5522 break; 5523 case RS6000_BUILTIN_RECIPF: 5524 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) 5525 && out_mode == SFmode && out_n == 4 5526 && in_mode == SFmode && in_n == 4) 5527 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP]; 5528 break; 5529 case RS6000_BUILTIN_RECIP: 5530 if (VECTOR_UNIT_VSX_P (V2DFmode) 5531 && out_mode == DFmode && out_n == 2 5532 && in_mode == DFmode && in_n == 2) 5533 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF]; 5534 break; 5535 default: 5536 break; 5537 } 5538 return NULL_TREE; 5539} 5540 5541/* Default CPU string for rs6000*_file_start functions. */ 5542static const char *rs6000_default_cpu; 5543 5544#ifdef USING_ELFOS_H 5545const char *rs6000_machine; 5546 5547const char * 5548rs6000_machine_from_flags (void) 5549{ 5550 /* e300 and e500 */ 5551 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3) 5552 return "e300"; 5553 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548) 5554 return "e500"; 5555 if (rs6000_cpu == PROCESSOR_PPCE500MC) 5556 return "e500mc"; 5557 if (rs6000_cpu == PROCESSOR_PPCE500MC64) 5558 return "e500mc64"; 5559 if (rs6000_cpu == PROCESSOR_PPCE5500) 5560 return "e5500"; 5561 if (rs6000_cpu == PROCESSOR_PPCE6500) 5562 return "e6500"; 5563 5564 /* 400 series */ 5565 if (rs6000_cpu == PROCESSOR_PPC403) 5566 return "\"403\""; 5567 if (rs6000_cpu == PROCESSOR_PPC405) 5568 return "\"405\""; 5569 if (rs6000_cpu == PROCESSOR_PPC440) 5570 return "\"440\""; 5571 if (rs6000_cpu == PROCESSOR_PPC476) 5572 return "\"476\""; 5573 5574 /* A2 */ 5575 if (rs6000_cpu == PROCESSOR_PPCA2) 5576 return "a2"; 5577 5578 /* Cell BE */ 5579 if (rs6000_cpu == PROCESSOR_CELL) 5580 return "cell"; 5581 5582 /* Titan */ 5583 if (rs6000_cpu == PROCESSOR_TITAN) 5584 return "titan"; 5585 5586 /* 500 series and 800 series */ 5587 if (rs6000_cpu == PROCESSOR_MPCCORE) 5588 return "\"821\""; 5589 5590#if 0 5591 /* This (and ppc64 below) are disabled here (for now at least) because 5592 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON 5593 are #define'd as some of these. Untangling that is a job for later. */ 5594 5595 /* 600 series and 700 series, "classic" */ 5596 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 5597 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e 5598 || rs6000_cpu == PROCESSOR_PPC750) 5599 return "ppc"; 5600#endif 5601 5602 /* Classic with AltiVec, "G4" */ 5603 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450) 5604 return "\"7450\""; 5605 5606#if 0 5607 /* The older 64-bit CPUs */ 5608 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630 5609 || rs6000_cpu == PROCESSOR_RS64A) 5610 return "ppc64"; 5611#endif 5612 5613 HOST_WIDE_INT flags = rs6000_isa_flags; 5614 5615 /* Disable the flags that should never influence the .machine selection. */ 5616 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL); 5617 5618 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0) 5619 return "power10"; 5620 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0) 5621 return "power9"; 5622 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0) 5623 return "power8"; 5624 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0) 5625 return "power7"; 5626 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0) 5627 return "power6"; 5628 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0) 5629 return "power5"; 5630 if ((flags & ISA_2_1_MASKS) != 0) 5631 return "power4"; 5632 if ((flags & OPTION_MASK_POWERPC64) != 0) 5633 return "ppc64"; 5634 return "ppc"; 5635} 5636 5637void 5638emit_asm_machine (void) 5639{ 5640 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine); 5641} 5642#endif 5643 5644/* Do anything needed at the start of the asm file. */ 5645 5646static void 5647rs6000_file_start (void) 5648{ 5649 char buffer[80]; 5650 const char *start = buffer; 5651 FILE *file = asm_out_file; 5652 5653 rs6000_default_cpu = TARGET_CPU_DEFAULT; 5654 5655 default_file_start (); 5656 5657 if (flag_verbose_asm) 5658 { 5659 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START); 5660 5661 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') 5662 { 5663 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu); 5664 start = ""; 5665 } 5666 5667 if (global_options_set.x_rs6000_cpu_index) 5668 { 5669 fprintf (file, "%s -mcpu=%s", start, 5670 processor_target_table[rs6000_cpu_index].name); 5671 start = ""; 5672 } 5673 5674 if (global_options_set.x_rs6000_tune_index) 5675 { 5676 fprintf (file, "%s -mtune=%s", start, 5677 processor_target_table[rs6000_tune_index].name); 5678 start = ""; 5679 } 5680 5681 if (PPC405_ERRATUM77) 5682 { 5683 fprintf (file, "%s PPC405CR_ERRATUM77", start); 5684 start = ""; 5685 } 5686 5687#ifdef USING_ELFOS_H 5688 switch (rs6000_sdata) 5689 { 5690 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break; 5691 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break; 5692 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break; 5693 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break; 5694 } 5695 5696 if (rs6000_sdata && g_switch_value) 5697 { 5698 fprintf (file, "%s -G %d", start, 5699 g_switch_value); 5700 start = ""; 5701 } 5702#endif 5703 5704 if (*start == '\0') 5705 putc ('\n', file); 5706 } 5707 5708#ifdef USING_ELFOS_H 5709 rs6000_machine = rs6000_machine_from_flags (); 5710 emit_asm_machine (); 5711#endif 5712 5713 if (DEFAULT_ABI == ABI_ELFv2) 5714 fprintf (file, "\t.abiversion 2\n"); 5715} 5716 5717 5718/* Return nonzero if this function is known to have a null epilogue. */ 5719 5720int 5721direct_return (void) 5722{ 5723 if (reload_completed) 5724 { 5725 rs6000_stack_t *info = rs6000_stack_info (); 5726 5727 if (info->first_gp_reg_save == 32 5728 && info->first_fp_reg_save == 64 5729 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1 5730 && ! info->lr_save_p 5731 && ! info->cr_save_p 5732 && info->vrsave_size == 0 5733 && ! info->push_p) 5734 return 1; 5735 } 5736 5737 return 0; 5738} 5739 5740/* Helper for num_insns_constant. Calculate number of instructions to 5741 load VALUE to a single gpr using combinations of addi, addis, ori, 5742 oris and sldi instructions. */ 5743 5744static int 5745num_insns_constant_gpr (HOST_WIDE_INT value) 5746{ 5747 /* signed constant loadable with addi */ 5748 if (SIGNED_INTEGER_16BIT_P (value)) 5749 return 1; 5750 5751 /* constant loadable with addis */ 5752 else if ((value & 0xffff) == 0 5753 && (value >> 31 == -1 || value >> 31 == 0)) 5754 return 1; 5755 5756 /* PADDI can support up to 34 bit signed integers. */ 5757 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value)) 5758 return 1; 5759 5760 else if (TARGET_POWERPC64) 5761 { 5762 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; 5763 HOST_WIDE_INT high = value >> 31; 5764 5765 if (high == 0 || high == -1) 5766 return 2; 5767 5768 high >>= 1; 5769 5770 if (low == 0) 5771 return num_insns_constant_gpr (high) + 1; 5772 else if (high == 0) 5773 return num_insns_constant_gpr (low) + 1; 5774 else 5775 return (num_insns_constant_gpr (high) 5776 + num_insns_constant_gpr (low) + 1); 5777 } 5778 5779 else 5780 return 2; 5781} 5782 5783/* Helper for num_insns_constant. Allow constants formed by the 5784 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm, 5785 and handle modes that require multiple gprs. */ 5786 5787static int 5788num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode) 5789{ 5790 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5791 int total = 0; 5792 while (nregs-- > 0) 5793 { 5794 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD); 5795 int insns = num_insns_constant_gpr (low); 5796 if (insns > 2 5797 /* We won't get more than 2 from num_insns_constant_gpr 5798 except when TARGET_POWERPC64 and mode is DImode or 5799 wider, so the register mode must be DImode. */ 5800 && rs6000_is_valid_and_mask (GEN_INT (low), DImode)) 5801 insns = 2; 5802 total += insns; 5803 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing 5804 it all at once would be UB. */ 5805 value >>= (BITS_PER_WORD - 1); 5806 value >>= 1; 5807 } 5808 return total; 5809} 5810 5811/* Return the number of instructions it takes to form a constant in as 5812 many gprs are needed for MODE. */ 5813 5814int 5815num_insns_constant (rtx op, machine_mode mode) 5816{ 5817 HOST_WIDE_INT val; 5818 5819 switch (GET_CODE (op)) 5820 { 5821 case CONST_INT: 5822 val = INTVAL (op); 5823 break; 5824 5825 case CONST_WIDE_INT: 5826 { 5827 int insns = 0; 5828 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++) 5829 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i), 5830 DImode); 5831 return insns; 5832 } 5833 5834 case CONST_DOUBLE: 5835 { 5836 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op); 5837 5838 if (mode == SFmode || mode == SDmode) 5839 { 5840 long l; 5841 5842 if (mode == SDmode) 5843 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l); 5844 else 5845 REAL_VALUE_TO_TARGET_SINGLE (*rv, l); 5846 /* See the first define_split in rs6000.md handling a 5847 const_double_operand. */ 5848 val = l; 5849 mode = SImode; 5850 } 5851 else if (mode == DFmode || mode == DDmode) 5852 { 5853 long l[2]; 5854 5855 if (mode == DDmode) 5856 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l); 5857 else 5858 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l); 5859 5860 /* See the second (32-bit) and third (64-bit) define_split 5861 in rs6000.md handling a const_double_operand. */ 5862 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32; 5863 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL; 5864 mode = DImode; 5865 } 5866 else if (mode == TFmode || mode == TDmode 5867 || mode == KFmode || mode == IFmode) 5868 { 5869 long l[4]; 5870 int insns; 5871 5872 if (mode == TDmode) 5873 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l); 5874 else 5875 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l); 5876 5877 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32; 5878 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL; 5879 insns = num_insns_constant_multi (val, DImode); 5880 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32; 5881 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL; 5882 insns += num_insns_constant_multi (val, DImode); 5883 return insns; 5884 } 5885 else 5886 gcc_unreachable (); 5887 } 5888 break; 5889 5890 default: 5891 gcc_unreachable (); 5892 } 5893 5894 return num_insns_constant_multi (val, mode); 5895} 5896 5897/* Interpret element ELT of the CONST_VECTOR OP as an integer value. 5898 If the mode of OP is MODE_VECTOR_INT, this simply returns the 5899 corresponding element of the vector, but for V4SFmode, the 5900 corresponding "float" is interpreted as an SImode integer. */ 5901 5902HOST_WIDE_INT 5903const_vector_elt_as_int (rtx op, unsigned int elt) 5904{ 5905 rtx tmp; 5906 5907 /* We can't handle V2DImode and V2DFmode vector constants here yet. */ 5908 gcc_assert (GET_MODE (op) != V2DImode 5909 && GET_MODE (op) != V2DFmode); 5910 5911 tmp = CONST_VECTOR_ELT (op, elt); 5912 if (GET_MODE (op) == V4SFmode) 5913 tmp = gen_lowpart (SImode, tmp); 5914 return INTVAL (tmp); 5915} 5916 5917/* Return true if OP can be synthesized with a particular vspltisb, vspltish 5918 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used 5919 depends on STEP and COPIES, one of which will be 1. If COPIES > 1, 5920 all items are set to the same value and contain COPIES replicas of the 5921 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's 5922 operand and the others are set to the value of the operand's msb. */ 5923 5924static bool 5925vspltis_constant (rtx op, unsigned step, unsigned copies) 5926{ 5927 machine_mode mode = GET_MODE (op); 5928 machine_mode inner = GET_MODE_INNER (mode); 5929 5930 unsigned i; 5931 unsigned nunits; 5932 unsigned bitsize; 5933 unsigned mask; 5934 5935 HOST_WIDE_INT val; 5936 HOST_WIDE_INT splat_val; 5937 HOST_WIDE_INT msb_val; 5938 5939 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) 5940 return false; 5941 5942 nunits = GET_MODE_NUNITS (mode); 5943 bitsize = GET_MODE_BITSIZE (inner); 5944 mask = GET_MODE_MASK (inner); 5945 5946 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); 5947 splat_val = val; 5948 msb_val = val >= 0 ? 0 : -1; 5949 5950 /* Construct the value to be splatted, if possible. If not, return 0. */ 5951 for (i = 2; i <= copies; i *= 2) 5952 { 5953 HOST_WIDE_INT small_val; 5954 bitsize /= 2; 5955 small_val = splat_val >> bitsize; 5956 mask >>= bitsize; 5957 if (splat_val != ((HOST_WIDE_INT) 5958 ((unsigned HOST_WIDE_INT) small_val << bitsize) 5959 | (small_val & mask))) 5960 return false; 5961 splat_val = small_val; 5962 } 5963 5964 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ 5965 if (EASY_VECTOR_15 (splat_val)) 5966 ; 5967 5968 /* Also check if we can splat, and then add the result to itself. Do so if 5969 the value is positive, of if the splat instruction is using OP's mode; 5970 for splat_val < 0, the splat and the add should use the same mode. */ 5971 else if (EASY_VECTOR_15_ADD_SELF (splat_val) 5972 && (splat_val >= 0 || (step == 1 && copies == 1))) 5973 ; 5974 5975 /* Also check if are loading up the most significant bit which can be done by 5976 loading up -1 and shifting the value left by -1. */ 5977 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1 && copies == 1) 5978 ; 5979 5980 else 5981 return false; 5982 5983 /* Check if VAL is present in every STEP-th element, and the 5984 other elements are filled with its most significant bit. */ 5985 for (i = 1; i < nunits; ++i) 5986 { 5987 HOST_WIDE_INT desired_val; 5988 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; 5989 if ((i & (step - 1)) == 0) 5990 desired_val = val; 5991 else 5992 desired_val = msb_val; 5993 5994 if (desired_val != const_vector_elt_as_int (op, elt)) 5995 return false; 5996 } 5997 5998 return true; 5999} 6000 6001/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI 6002 instruction, filling in the bottom elements with 0 or -1. 6003 6004 Return 0 if the constant cannot be generated with VSLDOI. Return positive 6005 for the number of zeroes to shift in, or negative for the number of 0xff 6006 bytes to shift in. 6007 6008 OP is a CONST_VECTOR. */ 6009 6010int 6011vspltis_shifted (rtx op) 6012{ 6013 machine_mode mode = GET_MODE (op); 6014 machine_mode inner = GET_MODE_INNER (mode); 6015 6016 unsigned i, j; 6017 unsigned nunits; 6018 unsigned mask; 6019 6020 HOST_WIDE_INT val; 6021 6022 if (mode != V16QImode && mode != V8HImode && mode != V4SImode) 6023 return false; 6024 6025 /* We need to create pseudo registers to do the shift, so don't recognize 6026 shift vector constants after reload. Don't match it even before RA 6027 after split1 is done, because there won't be further splitting pass 6028 before RA to do the splitting. */ 6029 if (!can_create_pseudo_p () 6030 || (cfun->curr_properties & PROP_rtl_split_insns)) 6031 return false; 6032 6033 nunits = GET_MODE_NUNITS (mode); 6034 mask = GET_MODE_MASK (inner); 6035 6036 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); 6037 6038 /* Check if the value can really be the operand of a vspltis[bhw]. */ 6039 if (EASY_VECTOR_15 (val)) 6040 ; 6041 6042 /* Also check if we are loading up the most significant bit which can be done 6043 by loading up -1 and shifting the value left by -1. */ 6044 else if (EASY_VECTOR_MSB (val, inner)) 6045 ; 6046 6047 else 6048 return 0; 6049 6050 /* Check if VAL is present in every STEP-th element until we find elements 6051 that are 0 or all 1 bits. */ 6052 for (i = 1; i < nunits; ++i) 6053 { 6054 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; 6055 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); 6056 6057 /* If the value isn't the splat value, check for the remaining elements 6058 being 0/-1. */ 6059 if (val != elt_val) 6060 { 6061 if (elt_val == 0) 6062 { 6063 for (j = i+1; j < nunits; ++j) 6064 { 6065 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; 6066 if (const_vector_elt_as_int (op, elt2) != 0) 6067 return 0; 6068 } 6069 6070 return (nunits - i) * GET_MODE_SIZE (inner); 6071 } 6072 6073 else if ((elt_val & mask) == mask) 6074 { 6075 for (j = i+1; j < nunits; ++j) 6076 { 6077 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; 6078 if ((const_vector_elt_as_int (op, elt2) & mask) != mask) 6079 return 0; 6080 } 6081 6082 return -((nunits - i) * GET_MODE_SIZE (inner)); 6083 } 6084 6085 else 6086 return 0; 6087 } 6088 } 6089 6090 /* If all elements are equal, we don't need to do VLSDOI. */ 6091 return 0; 6092} 6093 6094 6095/* Return true if OP is of the given MODE and can be synthesized 6096 with a vspltisb, vspltish or vspltisw. */ 6097 6098bool 6099easy_altivec_constant (rtx op, machine_mode mode) 6100{ 6101 unsigned step, copies; 6102 6103 if (mode == VOIDmode) 6104 mode = GET_MODE (op); 6105 else if (mode != GET_MODE (op)) 6106 return false; 6107 6108 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy 6109 constants. */ 6110 if (mode == V2DFmode) 6111 return zero_constant (op, mode); 6112 6113 else if (mode == V2DImode) 6114 { 6115 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0)) 6116 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1))) 6117 return false; 6118 6119 if (zero_constant (op, mode)) 6120 return true; 6121 6122 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 6123 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) 6124 return true; 6125 6126 return false; 6127 } 6128 6129 /* V1TImode is a special container for TImode. Ignore for now. */ 6130 else if (mode == V1TImode) 6131 return false; 6132 6133 /* Start with a vspltisw. */ 6134 step = GET_MODE_NUNITS (mode) / 4; 6135 copies = 1; 6136 6137 if (vspltis_constant (op, step, copies)) 6138 return true; 6139 6140 /* Then try with a vspltish. */ 6141 if (step == 1) 6142 copies <<= 1; 6143 else 6144 step >>= 1; 6145 6146 if (vspltis_constant (op, step, copies)) 6147 return true; 6148 6149 /* And finally a vspltisb. */ 6150 if (step == 1) 6151 copies <<= 1; 6152 else 6153 step >>= 1; 6154 6155 if (vspltis_constant (op, step, copies)) 6156 return true; 6157 6158 if (vspltis_shifted (op) != 0) 6159 return true; 6160 6161 return false; 6162} 6163 6164/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose 6165 result is OP. Abort if it is not possible. */ 6166 6167rtx 6168gen_easy_altivec_constant (rtx op) 6169{ 6170 machine_mode mode = GET_MODE (op); 6171 int nunits = GET_MODE_NUNITS (mode); 6172 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); 6173 unsigned step = nunits / 4; 6174 unsigned copies = 1; 6175 6176 /* Start with a vspltisw. */ 6177 if (vspltis_constant (op, step, copies)) 6178 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); 6179 6180 /* Then try with a vspltish. */ 6181 if (step == 1) 6182 copies <<= 1; 6183 else 6184 step >>= 1; 6185 6186 if (vspltis_constant (op, step, copies)) 6187 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); 6188 6189 /* And finally a vspltisb. */ 6190 if (step == 1) 6191 copies <<= 1; 6192 else 6193 step >>= 1; 6194 6195 if (vspltis_constant (op, step, copies)) 6196 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); 6197 6198 gcc_unreachable (); 6199} 6200 6201/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 6202 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). 6203 6204 Return the number of instructions needed (1 or 2) into the address pointed 6205 via NUM_INSNS_PTR. 6206 6207 Return the constant that is being split via CONSTANT_PTR. */ 6208 6209bool 6210xxspltib_constant_p (rtx op, 6211 machine_mode mode, 6212 int *num_insns_ptr, 6213 int *constant_ptr) 6214{ 6215 size_t nunits = GET_MODE_NUNITS (mode); 6216 size_t i; 6217 HOST_WIDE_INT value; 6218 rtx element; 6219 6220 /* Set the returned values to out of bound values. */ 6221 *num_insns_ptr = -1; 6222 *constant_ptr = 256; 6223 6224 if (!TARGET_P9_VECTOR) 6225 return false; 6226 6227 if (mode == VOIDmode) 6228 mode = GET_MODE (op); 6229 6230 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode) 6231 return false; 6232 6233 /* Handle (vec_duplicate <constant>). */ 6234 if (GET_CODE (op) == VEC_DUPLICATE) 6235 { 6236 if (mode != V16QImode && mode != V8HImode && mode != V4SImode 6237 && mode != V2DImode) 6238 return false; 6239 6240 element = XEXP (op, 0); 6241 if (!CONST_INT_P (element)) 6242 return false; 6243 6244 value = INTVAL (element); 6245 if (!IN_RANGE (value, -128, 127)) 6246 return false; 6247 } 6248 6249 /* Handle (const_vector [...]). */ 6250 else if (GET_CODE (op) == CONST_VECTOR) 6251 { 6252 if (mode != V16QImode && mode != V8HImode && mode != V4SImode 6253 && mode != V2DImode) 6254 return false; 6255 6256 element = CONST_VECTOR_ELT (op, 0); 6257 if (!CONST_INT_P (element)) 6258 return false; 6259 6260 value = INTVAL (element); 6261 if (!IN_RANGE (value, -128, 127)) 6262 return false; 6263 6264 for (i = 1; i < nunits; i++) 6265 { 6266 element = CONST_VECTOR_ELT (op, i); 6267 if (!CONST_INT_P (element)) 6268 return false; 6269 6270 if (value != INTVAL (element)) 6271 return false; 6272 } 6273 } 6274 6275 /* Handle integer constants being loaded into the upper part of the VSX 6276 register as a scalar. If the value isn't 0/-1, only allow it if the mode 6277 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */ 6278 else if (CONST_INT_P (op)) 6279 { 6280 if (!SCALAR_INT_MODE_P (mode)) 6281 return false; 6282 6283 value = INTVAL (op); 6284 if (!IN_RANGE (value, -128, 127)) 6285 return false; 6286 6287 if (!IN_RANGE (value, -1, 0)) 6288 { 6289 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID)) 6290 return false; 6291 6292 if (EASY_VECTOR_15 (value)) 6293 return false; 6294 } 6295 } 6296 6297 else 6298 return false; 6299 6300 /* See if we could generate vspltisw/vspltish directly instead of xxspltib + 6301 sign extend. Special case 0/-1 to allow getting any VSX register instead 6302 of an Altivec register. */ 6303 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) 6304 && EASY_VECTOR_15 (value)) 6305 return false; 6306 6307 /* Return # of instructions and the constant byte for XXSPLTIB. */ 6308 if (mode == V16QImode) 6309 *num_insns_ptr = 1; 6310 6311 else if (IN_RANGE (value, -1, 0)) 6312 *num_insns_ptr = 1; 6313 6314 else 6315 *num_insns_ptr = 2; 6316 6317 *constant_ptr = (int) value; 6318 return true; 6319} 6320 6321const char * 6322output_vec_const_move (rtx *operands) 6323{ 6324 int shift; 6325 machine_mode mode; 6326 rtx dest, vec; 6327 6328 dest = operands[0]; 6329 vec = operands[1]; 6330 mode = GET_MODE (dest); 6331 6332 if (TARGET_VSX) 6333 { 6334 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); 6335 int xxspltib_value = 256; 6336 int num_insns = -1; 6337 6338 if (zero_constant (vec, mode)) 6339 { 6340 if (TARGET_P9_VECTOR) 6341 return "xxspltib %x0,0"; 6342 6343 else if (dest_vmx_p) 6344 return "vspltisw %0,0"; 6345 6346 else 6347 return "xxlxor %x0,%x0,%x0"; 6348 } 6349 6350 if (all_ones_constant (vec, mode)) 6351 { 6352 if (TARGET_P9_VECTOR) 6353 return "xxspltib %x0,255"; 6354 6355 else if (dest_vmx_p) 6356 return "vspltisw %0,-1"; 6357 6358 else if (TARGET_P8_VECTOR) 6359 return "xxlorc %x0,%x0,%x0"; 6360 6361 else 6362 gcc_unreachable (); 6363 } 6364 6365 if (TARGET_P9_VECTOR 6366 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) 6367 { 6368 if (num_insns == 1) 6369 { 6370 operands[2] = GEN_INT (xxspltib_value & 0xff); 6371 return "xxspltib %x0,%2"; 6372 } 6373 6374 return "#"; 6375 } 6376 } 6377 6378 if (TARGET_ALTIVEC) 6379 { 6380 rtx splat_vec; 6381 6382 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); 6383 if (zero_constant (vec, mode)) 6384 return "vspltisw %0,0"; 6385 6386 if (all_ones_constant (vec, mode)) 6387 return "vspltisw %0,-1"; 6388 6389 /* Do we need to construct a value using VSLDOI? */ 6390 shift = vspltis_shifted (vec); 6391 if (shift != 0) 6392 return "#"; 6393 6394 splat_vec = gen_easy_altivec_constant (vec); 6395 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); 6396 operands[1] = XEXP (splat_vec, 0); 6397 if (!EASY_VECTOR_15 (INTVAL (operands[1]))) 6398 return "#"; 6399 6400 switch (GET_MODE (splat_vec)) 6401 { 6402 case E_V4SImode: 6403 return "vspltisw %0,%1"; 6404 6405 case E_V8HImode: 6406 return "vspltish %0,%1"; 6407 6408 case E_V16QImode: 6409 return "vspltisb %0,%1"; 6410 6411 default: 6412 gcc_unreachable (); 6413 } 6414 } 6415 6416 gcc_unreachable (); 6417} 6418 6419/* Initialize vector TARGET to VALS. */ 6420 6421void 6422rs6000_expand_vector_init (rtx target, rtx vals) 6423{ 6424 machine_mode mode = GET_MODE (target); 6425 machine_mode inner_mode = GET_MODE_INNER (mode); 6426 int n_elts = GET_MODE_NUNITS (mode); 6427 int n_var = 0, one_var = -1; 6428 bool all_same = true, all_const_zero = true; 6429 rtx x, mem; 6430 int i; 6431 6432 for (i = 0; i < n_elts; ++i) 6433 { 6434 x = XVECEXP (vals, 0, i); 6435 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) 6436 ++n_var, one_var = i; 6437 else if (x != CONST0_RTX (inner_mode)) 6438 all_const_zero = false; 6439 6440 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 6441 all_same = false; 6442 } 6443 6444 if (n_var == 0) 6445 { 6446 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); 6447 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); 6448 if ((int_vector_p || TARGET_VSX) && all_const_zero) 6449 { 6450 /* Zero register. */ 6451 emit_move_insn (target, CONST0_RTX (mode)); 6452 return; 6453 } 6454 else if (int_vector_p && easy_vector_constant (const_vec, mode)) 6455 { 6456 /* Splat immediate. */ 6457 emit_insn (gen_rtx_SET (target, const_vec)); 6458 return; 6459 } 6460 else 6461 { 6462 /* Load from constant pool. */ 6463 emit_move_insn (target, const_vec); 6464 return; 6465 } 6466 } 6467 6468 /* Double word values on VSX can use xxpermdi or lxvdsx. */ 6469 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) 6470 { 6471 rtx op[2]; 6472 size_t i; 6473 size_t num_elements = all_same ? 1 : 2; 6474 for (i = 0; i < num_elements; i++) 6475 { 6476 op[i] = XVECEXP (vals, 0, i); 6477 /* Just in case there is a SUBREG with a smaller mode, do a 6478 conversion. */ 6479 if (GET_MODE (op[i]) != inner_mode) 6480 { 6481 rtx tmp = gen_reg_rtx (inner_mode); 6482 convert_move (tmp, op[i], 0); 6483 op[i] = tmp; 6484 } 6485 /* Allow load with splat double word. */ 6486 else if (MEM_P (op[i])) 6487 { 6488 if (!all_same) 6489 op[i] = force_reg (inner_mode, op[i]); 6490 } 6491 else if (!REG_P (op[i])) 6492 op[i] = force_reg (inner_mode, op[i]); 6493 } 6494 6495 if (all_same) 6496 { 6497 if (mode == V2DFmode) 6498 emit_insn (gen_vsx_splat_v2df (target, op[0])); 6499 else 6500 emit_insn (gen_vsx_splat_v2di (target, op[0])); 6501 } 6502 else 6503 { 6504 if (mode == V2DFmode) 6505 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); 6506 else 6507 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); 6508 } 6509 return; 6510 } 6511 6512 /* Special case initializing vector int if we are on 64-bit systems with 6513 direct move or we have the ISA 3.0 instructions. */ 6514 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode) 6515 && TARGET_DIRECT_MOVE_64BIT) 6516 { 6517 if (all_same) 6518 { 6519 rtx element0 = XVECEXP (vals, 0, 0); 6520 if (MEM_P (element0)) 6521 element0 = rs6000_force_indexed_or_indirect_mem (element0); 6522 else 6523 element0 = force_reg (SImode, element0); 6524 6525 if (TARGET_P9_VECTOR) 6526 emit_insn (gen_vsx_splat_v4si (target, element0)); 6527 else 6528 { 6529 rtx tmp = gen_reg_rtx (DImode); 6530 emit_insn (gen_zero_extendsidi2 (tmp, element0)); 6531 emit_insn (gen_vsx_splat_v4si_di (target, tmp)); 6532 } 6533 return; 6534 } 6535 else 6536 { 6537 rtx elements[4]; 6538 size_t i; 6539 6540 for (i = 0; i < 4; i++) 6541 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i)); 6542 6543 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], 6544 elements[2], elements[3])); 6545 return; 6546 } 6547 } 6548 6549 /* With single precision floating point on VSX, know that internally single 6550 precision is actually represented as a double, and either make 2 V2DF 6551 vectors, and convert these vectors to single precision, or do one 6552 conversion, and splat the result to the other elements. */ 6553 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode)) 6554 { 6555 if (all_same) 6556 { 6557 rtx element0 = XVECEXP (vals, 0, 0); 6558 6559 if (TARGET_P9_VECTOR) 6560 { 6561 if (MEM_P (element0)) 6562 element0 = rs6000_force_indexed_or_indirect_mem (element0); 6563 6564 emit_insn (gen_vsx_splat_v4sf (target, element0)); 6565 } 6566 6567 else 6568 { 6569 rtx freg = gen_reg_rtx (V4SFmode); 6570 rtx sreg = force_reg (SFmode, element0); 6571 rtx cvt = (TARGET_XSCVDPSPN 6572 ? gen_vsx_xscvdpspn_scalar (freg, sreg) 6573 : gen_vsx_xscvdpsp_scalar (freg, sreg)); 6574 6575 emit_insn (cvt); 6576 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, 6577 const0_rtx)); 6578 } 6579 } 6580 else 6581 { 6582 rtx dbl_even = gen_reg_rtx (V2DFmode); 6583 rtx dbl_odd = gen_reg_rtx (V2DFmode); 6584 rtx flt_even = gen_reg_rtx (V4SFmode); 6585 rtx flt_odd = gen_reg_rtx (V4SFmode); 6586 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0)); 6587 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1)); 6588 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2)); 6589 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3)); 6590 6591 /* Use VMRGEW if we can instead of doing a permute. */ 6592 if (TARGET_P8_VECTOR) 6593 { 6594 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2)); 6595 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3)); 6596 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); 6597 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); 6598 if (BYTES_BIG_ENDIAN) 6599 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd)); 6600 else 6601 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even)); 6602 } 6603 else 6604 { 6605 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1)); 6606 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3)); 6607 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); 6608 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); 6609 rs6000_expand_extract_even (target, flt_even, flt_odd); 6610 } 6611 } 6612 return; 6613 } 6614 6615 /* Special case initializing vector short/char that are splats if we are on 6616 64-bit systems with direct move. */ 6617 if (all_same && TARGET_DIRECT_MOVE_64BIT 6618 && (mode == V16QImode || mode == V8HImode)) 6619 { 6620 rtx op0 = XVECEXP (vals, 0, 0); 6621 rtx di_tmp = gen_reg_rtx (DImode); 6622 6623 if (!REG_P (op0)) 6624 op0 = force_reg (GET_MODE_INNER (mode), op0); 6625 6626 if (mode == V16QImode) 6627 { 6628 emit_insn (gen_zero_extendqidi2 (di_tmp, op0)); 6629 emit_insn (gen_vsx_vspltb_di (target, di_tmp)); 6630 return; 6631 } 6632 6633 if (mode == V8HImode) 6634 { 6635 emit_insn (gen_zero_extendhidi2 (di_tmp, op0)); 6636 emit_insn (gen_vsx_vsplth_di (target, di_tmp)); 6637 return; 6638 } 6639 } 6640 6641 /* Store value to stack temp. Load vector element. Splat. However, splat 6642 of 64-bit items is not supported on Altivec. */ 6643 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) 6644 { 6645 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); 6646 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), 6647 XVECEXP (vals, 0, 0)); 6648 x = gen_rtx_UNSPEC (VOIDmode, 6649 gen_rtvec (1, const0_rtx), UNSPEC_LVE); 6650 emit_insn (gen_rtx_PARALLEL (VOIDmode, 6651 gen_rtvec (2, 6652 gen_rtx_SET (target, mem), 6653 x))); 6654 x = gen_rtx_VEC_SELECT (inner_mode, target, 6655 gen_rtx_PARALLEL (VOIDmode, 6656 gen_rtvec (1, const0_rtx))); 6657 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x))); 6658 return; 6659 } 6660 6661 /* One field is non-constant. Load constant then overwrite 6662 varying field. */ 6663 if (n_var == 1) 6664 { 6665 rtx copy = copy_rtx (vals); 6666 6667 /* Load constant part of vector, substitute neighboring value for 6668 varying element. */ 6669 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); 6670 rs6000_expand_vector_init (target, copy); 6671 6672 /* Insert variable. */ 6673 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var); 6674 return; 6675 } 6676 6677 /* Construct the vector in memory one field at a time 6678 and load the whole vector. */ 6679 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 6680 for (i = 0; i < n_elts; i++) 6681 emit_move_insn (adjust_address_nv (mem, inner_mode, 6682 i * GET_MODE_SIZE (inner_mode)), 6683 XVECEXP (vals, 0, i)); 6684 emit_move_insn (target, mem); 6685} 6686 6687/* Set field ELT of TARGET to VAL. */ 6688 6689void 6690rs6000_expand_vector_set (rtx target, rtx val, int elt) 6691{ 6692 machine_mode mode = GET_MODE (target); 6693 machine_mode inner_mode = GET_MODE_INNER (mode); 6694 rtx reg = gen_reg_rtx (mode); 6695 rtx mask, mem, x; 6696 int width = GET_MODE_SIZE (inner_mode); 6697 int i; 6698 6699 val = force_reg (GET_MODE (val), val); 6700 6701 if (VECTOR_MEM_VSX_P (mode)) 6702 { 6703 rtx insn = NULL_RTX; 6704 rtx elt_rtx = GEN_INT (elt); 6705 6706 if (mode == V2DFmode) 6707 insn = gen_vsx_set_v2df (target, target, val, elt_rtx); 6708 6709 else if (mode == V2DImode) 6710 insn = gen_vsx_set_v2di (target, target, val, elt_rtx); 6711 6712 else if (TARGET_P9_VECTOR && TARGET_POWERPC64) 6713 { 6714 if (mode == V4SImode) 6715 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); 6716 else if (mode == V8HImode) 6717 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); 6718 else if (mode == V16QImode) 6719 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); 6720 else if (mode == V4SFmode) 6721 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx); 6722 } 6723 6724 if (insn) 6725 { 6726 emit_insn (insn); 6727 return; 6728 } 6729 } 6730 6731 /* Simplify setting single element vectors like V1TImode. */ 6732 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0) 6733 { 6734 emit_move_insn (target, gen_lowpart (mode, val)); 6735 return; 6736 } 6737 6738 /* Load single variable value. */ 6739 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); 6740 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); 6741 x = gen_rtx_UNSPEC (VOIDmode, 6742 gen_rtvec (1, const0_rtx), UNSPEC_LVE); 6743 emit_insn (gen_rtx_PARALLEL (VOIDmode, 6744 gen_rtvec (2, 6745 gen_rtx_SET (reg, mem), 6746 x))); 6747 6748 /* Linear sequence. */ 6749 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 6750 for (i = 0; i < 16; ++i) 6751 XVECEXP (mask, 0, i) = GEN_INT (i); 6752 6753 /* Set permute mask to insert element into target. */ 6754 for (i = 0; i < width; ++i) 6755 XVECEXP (mask, 0, elt*width + i) 6756 = GEN_INT (i + 0x10); 6757 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0)); 6758 6759 if (BYTES_BIG_ENDIAN) 6760 x = gen_rtx_UNSPEC (mode, 6761 gen_rtvec (3, target, reg, 6762 force_reg (V16QImode, x)), 6763 UNSPEC_VPERM); 6764 else 6765 { 6766 if (TARGET_P9_VECTOR) 6767 x = gen_rtx_UNSPEC (mode, 6768 gen_rtvec (3, reg, target, 6769 force_reg (V16QImode, x)), 6770 UNSPEC_VPERMR); 6771 else 6772 { 6773 /* Invert selector. We prefer to generate VNAND on P8 so 6774 that future fusion opportunities can kick in, but must 6775 generate VNOR elsewhere. */ 6776 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); 6777 rtx iorx = (TARGET_P8_VECTOR 6778 ? gen_rtx_IOR (V16QImode, notx, notx) 6779 : gen_rtx_AND (V16QImode, notx, notx)); 6780 rtx tmp = gen_reg_rtx (V16QImode); 6781 emit_insn (gen_rtx_SET (tmp, iorx)); 6782 6783 /* Permute with operands reversed and adjusted selector. */ 6784 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), 6785 UNSPEC_VPERM); 6786 } 6787 } 6788 6789 emit_insn (gen_rtx_SET (target, x)); 6790} 6791 6792/* Extract field ELT from VEC into TARGET. */ 6793 6794void 6795rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) 6796{ 6797 machine_mode mode = GET_MODE (vec); 6798 machine_mode inner_mode = GET_MODE_INNER (mode); 6799 rtx mem; 6800 6801 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt)) 6802 { 6803 switch (mode) 6804 { 6805 default: 6806 break; 6807 case E_V1TImode: 6808 emit_move_insn (target, gen_lowpart (TImode, vec)); 6809 break; 6810 case E_V2DFmode: 6811 emit_insn (gen_vsx_extract_v2df (target, vec, elt)); 6812 return; 6813 case E_V2DImode: 6814 emit_insn (gen_vsx_extract_v2di (target, vec, elt)); 6815 return; 6816 case E_V4SFmode: 6817 emit_insn (gen_vsx_extract_v4sf (target, vec, elt)); 6818 return; 6819 case E_V16QImode: 6820 if (TARGET_DIRECT_MOVE_64BIT) 6821 { 6822 emit_insn (gen_vsx_extract_v16qi (target, vec, elt)); 6823 return; 6824 } 6825 else 6826 break; 6827 case E_V8HImode: 6828 if (TARGET_DIRECT_MOVE_64BIT) 6829 { 6830 emit_insn (gen_vsx_extract_v8hi (target, vec, elt)); 6831 return; 6832 } 6833 else 6834 break; 6835 case E_V4SImode: 6836 if (TARGET_DIRECT_MOVE_64BIT) 6837 { 6838 emit_insn (gen_vsx_extract_v4si (target, vec, elt)); 6839 return; 6840 } 6841 break; 6842 } 6843 } 6844 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) 6845 && TARGET_DIRECT_MOVE_64BIT) 6846 { 6847 if (GET_MODE (elt) != DImode) 6848 { 6849 rtx tmp = gen_reg_rtx (DImode); 6850 convert_move (tmp, elt, 0); 6851 elt = tmp; 6852 } 6853 else if (!REG_P (elt)) 6854 elt = force_reg (DImode, elt); 6855 6856 switch (mode) 6857 { 6858 case E_V1TImode: 6859 emit_move_insn (target, gen_lowpart (TImode, vec)); 6860 return; 6861 6862 case E_V2DFmode: 6863 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); 6864 return; 6865 6866 case E_V2DImode: 6867 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); 6868 return; 6869 6870 case E_V4SFmode: 6871 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt)); 6872 return; 6873 6874 case E_V4SImode: 6875 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt)); 6876 return; 6877 6878 case E_V8HImode: 6879 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt)); 6880 return; 6881 6882 case E_V16QImode: 6883 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt)); 6884 return; 6885 6886 default: 6887 gcc_unreachable (); 6888 } 6889 } 6890 6891 /* Allocate mode-sized buffer. */ 6892 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 6893 6894 emit_move_insn (mem, vec); 6895 if (CONST_INT_P (elt)) 6896 { 6897 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode); 6898 6899 /* Add offset to field within buffer matching vector element. */ 6900 mem = adjust_address_nv (mem, inner_mode, 6901 modulo_elt * GET_MODE_SIZE (inner_mode)); 6902 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); 6903 } 6904 else 6905 { 6906 unsigned int ele_size = GET_MODE_SIZE (inner_mode); 6907 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); 6908 rtx new_addr = gen_reg_rtx (Pmode); 6909 6910 elt = gen_rtx_AND (Pmode, elt, num_ele_m1); 6911 if (ele_size > 1) 6912 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size)); 6913 new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt); 6914 new_addr = change_address (mem, inner_mode, new_addr); 6915 emit_move_insn (target, new_addr); 6916 } 6917} 6918 6919/* Return the offset within a memory object (MEM) of a vector type to a given 6920 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If 6921 the element is constant, we return a constant integer. 6922 6923 Otherwise, we use a base register temporary to calculate the offset after 6924 masking it to fit within the bounds of the vector and scaling it. The 6925 masking is required by the 64-bit ELF version 2 ABI for the vec_extract 6926 built-in function. */ 6927 6928static rtx 6929get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size) 6930{ 6931 if (CONST_INT_P (element)) 6932 return GEN_INT (INTVAL (element) * scalar_size); 6933 6934 /* All insns should use the 'Q' constraint (address is a single register) if 6935 the element number is not a constant. */ 6936 gcc_assert (satisfies_constraint_Q (mem)); 6937 6938 /* Mask the element to make sure the element number is between 0 and the 6939 maximum number of elements - 1 so that we don't generate an address 6940 outside the vector. */ 6941 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1); 6942 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1); 6943 emit_insn (gen_rtx_SET (base_tmp, and_op)); 6944 6945 /* Shift the element to get the byte offset from the element number. */ 6946 int shift = exact_log2 (scalar_size); 6947 gcc_assert (shift >= 0); 6948 6949 if (shift > 0) 6950 { 6951 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift)); 6952 emit_insn (gen_rtx_SET (base_tmp, shift_op)); 6953 } 6954 6955 return base_tmp; 6956} 6957 6958/* Helper function update PC-relative addresses when we are adjusting a memory 6959 address (ADDR) to a vector to point to a scalar field within the vector with 6960 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can 6961 use the base register temporary (BASE_TMP) to form the address. */ 6962 6963static rtx 6964adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp) 6965{ 6966 rtx new_addr = NULL; 6967 6968 gcc_assert (CONST_INT_P (element_offset)); 6969 6970 if (GET_CODE (addr) == CONST) 6971 addr = XEXP (addr, 0); 6972 6973 if (GET_CODE (addr) == PLUS) 6974 { 6975 rtx op0 = XEXP (addr, 0); 6976 rtx op1 = XEXP (addr, 1); 6977 6978 if (CONST_INT_P (op1)) 6979 { 6980 HOST_WIDE_INT offset 6981 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset); 6982 6983 if (offset == 0) 6984 new_addr = op0; 6985 6986 else 6987 { 6988 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset)); 6989 new_addr = gen_rtx_CONST (Pmode, plus); 6990 } 6991 } 6992 6993 else 6994 { 6995 emit_move_insn (base_tmp, addr); 6996 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 6997 } 6998 } 6999 7000 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) 7001 { 7002 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset); 7003 new_addr = gen_rtx_CONST (Pmode, plus); 7004 } 7005 7006 else 7007 gcc_unreachable (); 7008 7009 return new_addr; 7010} 7011 7012/* Adjust a memory address (MEM) of a vector type to point to a scalar field 7013 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register 7014 temporary (BASE_TMP) to fixup the address. Return the new memory address 7015 that is valid for reads or writes to a given register (SCALAR_REG). 7016 7017 This function is expected to be called after reload is completed when we are 7018 splitting insns. The temporary BASE_TMP might be set multiple times with 7019 this code. */ 7020 7021rtx 7022rs6000_adjust_vec_address (rtx scalar_reg, 7023 rtx mem, 7024 rtx element, 7025 rtx base_tmp, 7026 machine_mode scalar_mode) 7027{ 7028 unsigned scalar_size = GET_MODE_SIZE (scalar_mode); 7029 rtx addr = XEXP (mem, 0); 7030 rtx new_addr; 7031 7032 gcc_assert (!reg_mentioned_p (base_tmp, addr)); 7033 gcc_assert (!reg_mentioned_p (base_tmp, element)); 7034 7035 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ 7036 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); 7037 7038 /* Calculate what we need to add to the address to get the element 7039 address. */ 7040 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size); 7041 7042 /* Create the new address pointing to the element within the vector. If we 7043 are adding 0, we don't have to change the address. */ 7044 if (element_offset == const0_rtx) 7045 new_addr = addr; 7046 7047 /* A simple indirect address can be converted into a reg + offset 7048 address. */ 7049 else if (REG_P (addr) || SUBREG_P (addr)) 7050 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); 7051 7052 /* For references to local static variables, fold a constant offset into the 7053 address. */ 7054 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset)) 7055 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp); 7056 7057 /* Optimize D-FORM addresses with constant offset with a constant element, to 7058 include the element offset in the address directly. */ 7059 else if (GET_CODE (addr) == PLUS) 7060 { 7061 rtx op0 = XEXP (addr, 0); 7062 rtx op1 = XEXP (addr, 1); 7063 7064 gcc_assert (REG_P (op0) || SUBREG_P (op0)); 7065 if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) 7066 { 7067 /* op0 should never be r0, because r0+offset is not valid. But it 7068 doesn't hurt to make sure it is not r0. */ 7069 gcc_assert (reg_or_subregno (op0) != 0); 7070 7071 /* D-FORM address with constant element number. */ 7072 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); 7073 rtx offset_rtx = GEN_INT (offset); 7074 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); 7075 } 7076 else 7077 { 7078 /* If we don't have a D-FORM address with a constant element number, 7079 add the two elements in the current address. Then add the offset. 7080 7081 Previously, we tried to add the offset to OP1 and change the 7082 address to an X-FORM format adding OP0 and BASE_TMP, but it became 7083 complicated because we had to verify that op1 was not GPR0 and we 7084 had a constant element offset (due to the way ADDI is defined). 7085 By doing the add of OP0 and OP1 first, and then adding in the 7086 offset, it has the benefit that if D-FORM instructions are 7087 allowed, the offset is part of the memory access to the vector 7088 element. */ 7089 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1))); 7090 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 7091 } 7092 } 7093 7094 else 7095 { 7096 emit_move_insn (base_tmp, addr); 7097 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); 7098 } 7099 7100 /* If the address isn't valid, move the address into the temporary base 7101 register. Some reasons it could not be valid include: 7102 7103 The address offset overflowed the 16 or 34 bit offset size; 7104 We need to use a DS-FORM load, and the bottom 2 bits are non-zero; 7105 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero; 7106 Only X_FORM loads can be done, and the address is D_FORM. */ 7107 7108 enum insn_form iform 7109 = address_to_insn_form (new_addr, scalar_mode, 7110 reg_to_non_prefixed (scalar_reg, scalar_mode)); 7111 7112 if (iform == INSN_FORM_BAD) 7113 { 7114 emit_move_insn (base_tmp, new_addr); 7115 new_addr = base_tmp; 7116 } 7117 7118 return change_address (mem, scalar_mode, new_addr); 7119} 7120 7121/* Split a variable vec_extract operation into the component instructions. */ 7122 7123void 7124rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, 7125 rtx tmp_altivec) 7126{ 7127 machine_mode mode = GET_MODE (src); 7128 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src)); 7129 unsigned scalar_size = GET_MODE_SIZE (scalar_mode); 7130 int byte_shift = exact_log2 (scalar_size); 7131 7132 gcc_assert (byte_shift >= 0); 7133 7134 /* If we are given a memory address, optimize to load just the element. We 7135 don't have to adjust the vector element number on little endian 7136 systems. */ 7137 if (MEM_P (src)) 7138 { 7139 emit_move_insn (dest, 7140 rs6000_adjust_vec_address (dest, src, element, tmp_gpr, 7141 scalar_mode)); 7142 return; 7143 } 7144 7145 else if (REG_P (src) || SUBREG_P (src)) 7146 { 7147 int num_elements = GET_MODE_NUNITS (mode); 7148 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode)); 7149 int bit_shift = 7 - exact_log2 (num_elements); 7150 rtx element2; 7151 unsigned int dest_regno = reg_or_subregno (dest); 7152 unsigned int src_regno = reg_or_subregno (src); 7153 unsigned int element_regno = reg_or_subregno (element); 7154 7155 gcc_assert (REG_P (tmp_gpr)); 7156 7157 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in 7158 a general purpose register. */ 7159 if (TARGET_P9_VECTOR 7160 && (mode == V16QImode || mode == V8HImode || mode == V4SImode) 7161 && INT_REGNO_P (dest_regno) 7162 && ALTIVEC_REGNO_P (src_regno) 7163 && INT_REGNO_P (element_regno)) 7164 { 7165 rtx dest_si = gen_rtx_REG (SImode, dest_regno); 7166 rtx element_si = gen_rtx_REG (SImode, element_regno); 7167 7168 if (mode == V16QImode) 7169 emit_insn (BYTES_BIG_ENDIAN 7170 ? gen_vextublx (dest_si, element_si, src) 7171 : gen_vextubrx (dest_si, element_si, src)); 7172 7173 else if (mode == V8HImode) 7174 { 7175 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); 7176 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); 7177 emit_insn (BYTES_BIG_ENDIAN 7178 ? gen_vextuhlx (dest_si, tmp_gpr_si, src) 7179 : gen_vextuhrx (dest_si, tmp_gpr_si, src)); 7180 } 7181 7182 7183 else 7184 { 7185 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); 7186 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); 7187 emit_insn (BYTES_BIG_ENDIAN 7188 ? gen_vextuwlx (dest_si, tmp_gpr_si, src) 7189 : gen_vextuwrx (dest_si, tmp_gpr_si, src)); 7190 } 7191 7192 return; 7193 } 7194 7195 7196 gcc_assert (REG_P (tmp_altivec)); 7197 7198 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use 7199 an XOR, otherwise we need to subtract. The shift amount is so VSLO 7200 will shift the element into the upper position (adding 3 to convert a 7201 byte shift into a bit shift). */ 7202 if (scalar_size == 8) 7203 { 7204 if (!BYTES_BIG_ENDIAN) 7205 { 7206 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); 7207 element2 = tmp_gpr; 7208 } 7209 else 7210 element2 = element; 7211 7212 /* Generate RLDIC directly to shift left 6 bits and retrieve 1 7213 bit. */ 7214 emit_insn (gen_rtx_SET (tmp_gpr, 7215 gen_rtx_AND (DImode, 7216 gen_rtx_ASHIFT (DImode, 7217 element2, 7218 GEN_INT (6)), 7219 GEN_INT (64)))); 7220 } 7221 else 7222 { 7223 if (!BYTES_BIG_ENDIAN) 7224 { 7225 rtx num_ele_m1 = GEN_INT (num_elements - 1); 7226 7227 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); 7228 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); 7229 element2 = tmp_gpr; 7230 } 7231 else 7232 element2 = element; 7233 7234 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); 7235 } 7236 7237 /* Get the value into the lower byte of the Altivec register where VSLO 7238 expects it. */ 7239 if (TARGET_P9_VECTOR) 7240 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); 7241 else if (can_create_pseudo_p ()) 7242 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); 7243 else 7244 { 7245 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7246 emit_move_insn (tmp_di, tmp_gpr); 7247 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); 7248 } 7249 7250 /* Do the VSLO to get the value into the final location. */ 7251 switch (mode) 7252 { 7253 case E_V2DFmode: 7254 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); 7255 return; 7256 7257 case E_V2DImode: 7258 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); 7259 return; 7260 7261 case E_V4SFmode: 7262 { 7263 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7264 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec)); 7265 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); 7266 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, 7267 tmp_altivec)); 7268 7269 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf)); 7270 return; 7271 } 7272 7273 case E_V4SImode: 7274 case E_V8HImode: 7275 case E_V16QImode: 7276 { 7277 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); 7278 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); 7279 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest)); 7280 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, 7281 tmp_altivec)); 7282 emit_move_insn (tmp_gpr_di, tmp_altivec_di); 7283 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di, 7284 GEN_INT (64 - bits_in_element))); 7285 return; 7286 } 7287 7288 default: 7289 gcc_unreachable (); 7290 } 7291 7292 return; 7293 } 7294 else 7295 gcc_unreachable (); 7296 } 7297 7298/* Return alignment of TYPE. Existing alignment is ALIGN. HOW 7299 selects whether the alignment is abi mandated, optional, or 7300 both abi and optional alignment. */ 7301 7302unsigned int 7303rs6000_data_alignment (tree type, unsigned int align, enum data_align how) 7304{ 7305 if (how != align_opt) 7306 { 7307 if (TREE_CODE (type) == VECTOR_TYPE && align < 128) 7308 align = 128; 7309 } 7310 7311 if (how != align_abi) 7312 { 7313 if (TREE_CODE (type) == ARRAY_TYPE 7314 && TYPE_MODE (TREE_TYPE (type)) == QImode) 7315 { 7316 if (align < BITS_PER_WORD) 7317 align = BITS_PER_WORD; 7318 } 7319 } 7320 7321 return align; 7322} 7323 7324/* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory 7325 instructions simply ignore the low bits; VSX memory instructions 7326 are aligned to 4 or 8 bytes. */ 7327 7328static bool 7329rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) 7330{ 7331 return (STRICT_ALIGNMENT 7332 || (!TARGET_EFFICIENT_UNALIGNED_VSX 7333 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) 7334 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)) 7335 && (int) align < VECTOR_ALIGN (mode))))); 7336} 7337 7338/* Previous GCC releases forced all vector types to have 16-byte alignment. */ 7339 7340bool 7341rs6000_special_adjust_field_align_p (tree type, unsigned int computed) 7342{ 7343 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE) 7344 { 7345 if (computed != 128) 7346 { 7347 static bool warned; 7348 if (!warned && warn_psabi) 7349 { 7350 warned = true; 7351 inform (input_location, 7352 "the layout of aggregates containing vectors with" 7353 " %d-byte alignment has changed in GCC 5", 7354 computed / BITS_PER_UNIT); 7355 } 7356 } 7357 /* In current GCC there is no special case. */ 7358 return false; 7359 } 7360 7361 return false; 7362} 7363 7364/* AIX increases natural record alignment to doubleword if the first 7365 field is an FP double while the FP fields remain word aligned. */ 7366 7367unsigned int 7368rs6000_special_round_type_align (tree type, unsigned int computed, 7369 unsigned int specified) 7370{ 7371 unsigned int align = MAX (computed, specified); 7372 tree field = TYPE_FIELDS (type); 7373 7374 /* Skip all non field decls */ 7375 while (field != NULL 7376 && (TREE_CODE (field) != FIELD_DECL 7377 || DECL_FIELD_ABI_IGNORED (field))) 7378 field = DECL_CHAIN (field); 7379 7380 if (field != NULL && field != type) 7381 { 7382 type = TREE_TYPE (field); 7383 while (TREE_CODE (type) == ARRAY_TYPE) 7384 type = TREE_TYPE (type); 7385 7386 if (type != error_mark_node && TYPE_MODE (type) == DFmode) 7387 align = MAX (align, 64); 7388 } 7389 7390 return align; 7391} 7392 7393/* Darwin increases record alignment to the natural alignment of 7394 the first field. */ 7395 7396unsigned int 7397darwin_rs6000_special_round_type_align (tree type, unsigned int computed, 7398 unsigned int specified) 7399{ 7400 unsigned int align = MAX (computed, specified); 7401 7402 if (TYPE_PACKED (type)) 7403 return align; 7404 7405 /* Find the first field, looking down into aggregates. */ 7406 do { 7407 tree field = TYPE_FIELDS (type); 7408 /* Skip all non field decls */ 7409 while (field != NULL 7410 && (TREE_CODE (field) != FIELD_DECL 7411 || DECL_FIELD_ABI_IGNORED (field))) 7412 field = DECL_CHAIN (field); 7413 if (! field) 7414 break; 7415 /* A packed field does not contribute any extra alignment. */ 7416 if (DECL_PACKED (field)) 7417 return align; 7418 type = TREE_TYPE (field); 7419 while (TREE_CODE (type) == ARRAY_TYPE) 7420 type = TREE_TYPE (type); 7421 } while (AGGREGATE_TYPE_P (type)); 7422 7423 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type) 7424 && ! TYPE_PACKED (type) && maximum_field_alignment == 0) 7425 align = MAX (align, TYPE_ALIGN (type)); 7426 7427 return align; 7428} 7429 7430/* Return 1 for an operand in small memory on V.4/eabi. */ 7431 7432int 7433small_data_operand (rtx op ATTRIBUTE_UNUSED, 7434 machine_mode mode ATTRIBUTE_UNUSED) 7435{ 7436#if TARGET_ELF 7437 rtx sym_ref; 7438 7439 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA) 7440 return 0; 7441 7442 if (DEFAULT_ABI != ABI_V4) 7443 return 0; 7444 7445 if (SYMBOL_REF_P (op)) 7446 sym_ref = op; 7447 7448 else if (GET_CODE (op) != CONST 7449 || GET_CODE (XEXP (op, 0)) != PLUS 7450 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0)) 7451 || !CONST_INT_P (XEXP (XEXP (op, 0), 1))) 7452 return 0; 7453 7454 else 7455 { 7456 rtx sum = XEXP (op, 0); 7457 HOST_WIDE_INT summand; 7458 7459 /* We have to be careful here, because it is the referenced address 7460 that must be 32k from _SDA_BASE_, not just the symbol. */ 7461 summand = INTVAL (XEXP (sum, 1)); 7462 if (summand < 0 || summand > g_switch_value) 7463 return 0; 7464 7465 sym_ref = XEXP (sum, 0); 7466 } 7467 7468 return SYMBOL_REF_SMALL_P (sym_ref); 7469#else 7470 return 0; 7471#endif 7472} 7473 7474/* Return true if either operand is a general purpose register. */ 7475 7476bool 7477gpr_or_gpr_p (rtx op0, rtx op1) 7478{ 7479 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0))) 7480 || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); 7481} 7482 7483/* Return true if this is a move direct operation between GPR registers and 7484 floating point/VSX registers. */ 7485 7486bool 7487direct_move_p (rtx op0, rtx op1) 7488{ 7489 if (!REG_P (op0) || !REG_P (op1)) 7490 return false; 7491 7492 if (!TARGET_DIRECT_MOVE) 7493 return false; 7494 7495 int regno0 = REGNO (op0); 7496 int regno1 = REGNO (op1); 7497 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1)) 7498 return false; 7499 7500 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1)) 7501 return true; 7502 7503 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1)) 7504 return true; 7505 7506 return false; 7507} 7508 7509/* Return true if the ADDR is an acceptable address for a quad memory 7510 operation of mode MODE (either LQ/STQ for general purpose registers, or 7511 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address 7512 is intended for LQ/STQ. If it is false, the address is intended for the ISA 7513 3.0 LXV/STXV instruction. */ 7514 7515bool 7516quad_address_p (rtx addr, machine_mode mode, bool strict) 7517{ 7518 rtx op0, op1; 7519 7520 if (GET_MODE_SIZE (mode) < 16) 7521 return false; 7522 7523 if (legitimate_indirect_address_p (addr, strict)) 7524 return true; 7525 7526 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode)) 7527 return false; 7528 7529 /* Is this a valid prefixed address? If the bottom four bits of the offset 7530 are non-zero, we could use a prefixed instruction (which does not have the 7531 DQ-form constraint that the traditional instruction had) instead of 7532 forcing the unaligned offset to a GPR. */ 7533 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ)) 7534 return true; 7535 7536 if (GET_CODE (addr) != PLUS) 7537 return false; 7538 7539 op0 = XEXP (addr, 0); 7540 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict)) 7541 return false; 7542 7543 op1 = XEXP (addr, 1); 7544 if (!CONST_INT_P (op1)) 7545 return false; 7546 7547 return quad_address_offset_p (INTVAL (op1)); 7548} 7549 7550/* Return true if this is a load or store quad operation. This function does 7551 not handle the atomic quad memory instructions. */ 7552 7553bool 7554quad_load_store_p (rtx op0, rtx op1) 7555{ 7556 bool ret; 7557 7558 if (!TARGET_QUAD_MEMORY) 7559 ret = false; 7560 7561 else if (REG_P (op0) && MEM_P (op1)) 7562 ret = (quad_int_reg_operand (op0, GET_MODE (op0)) 7563 && quad_memory_operand (op1, GET_MODE (op1)) 7564 && !reg_overlap_mentioned_p (op0, op1)); 7565 7566 else if (MEM_P (op0) && REG_P (op1)) 7567 ret = (quad_memory_operand (op0, GET_MODE (op0)) 7568 && quad_int_reg_operand (op1, GET_MODE (op1))); 7569 7570 else 7571 ret = false; 7572 7573 if (TARGET_DEBUG_ADDR) 7574 { 7575 fprintf (stderr, "\n========== quad_load_store, return %s\n", 7576 ret ? "true" : "false"); 7577 debug_rtx (gen_rtx_SET (op0, op1)); 7578 } 7579 7580 return ret; 7581} 7582 7583/* Given an address, return a constant offset term if one exists. */ 7584 7585static rtx 7586address_offset (rtx op) 7587{ 7588 if (GET_CODE (op) == PRE_INC 7589 || GET_CODE (op) == PRE_DEC) 7590 op = XEXP (op, 0); 7591 else if (GET_CODE (op) == PRE_MODIFY 7592 || GET_CODE (op) == LO_SUM) 7593 op = XEXP (op, 1); 7594 7595 if (GET_CODE (op) == CONST) 7596 op = XEXP (op, 0); 7597 7598 if (GET_CODE (op) == PLUS) 7599 op = XEXP (op, 1); 7600 7601 if (CONST_INT_P (op)) 7602 return op; 7603 7604 return NULL_RTX; 7605} 7606 7607/* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for 7608 the mode. If we can't find (or don't know) the alignment of the symbol 7609 we assume (optimistically) that it's sufficiently aligned [??? maybe we 7610 should be pessimistic]. Offsets are validated in the same way as for 7611 reg + offset. */ 7612static bool 7613darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode) 7614{ 7615 /* We should not get here with this. */ 7616 gcc_checking_assert (! mode_supports_dq_form (mode)); 7617 7618 if (GET_CODE (x) == CONST) 7619 x = XEXP (x, 0); 7620 7621 /* If we are building PIC code, then any symbol must be wrapped in an 7622 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */ 7623 bool machopic_offs_p = false; 7624 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) 7625 { 7626 x = XVECEXP (x, 0, 0); 7627 machopic_offs_p = true; 7628 } 7629 7630 rtx sym = NULL_RTX; 7631 unsigned HOST_WIDE_INT offset = 0; 7632 7633 if (GET_CODE (x) == PLUS) 7634 { 7635 sym = XEXP (x, 0); 7636 if (! SYMBOL_REF_P (sym)) 7637 return false; 7638 if (!CONST_INT_P (XEXP (x, 1))) 7639 return false; 7640 offset = INTVAL (XEXP (x, 1)); 7641 } 7642 else if (SYMBOL_REF_P (x)) 7643 sym = x; 7644 else if (CONST_INT_P (x)) 7645 offset = INTVAL (x); 7646 else if (GET_CODE (x) == LABEL_REF) 7647 offset = 0; // We assume code labels are Pmode aligned 7648 else 7649 return false; // not sure what we have here. 7650 7651 /* If we don't know the alignment of the thing to which the symbol refers, 7652 we assume optimistically it is "enough". 7653 ??? maybe we should be pessimistic instead. */ 7654 unsigned align = 0; 7655 7656 if (sym) 7657 { 7658 tree decl = SYMBOL_REF_DECL (sym); 7659 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */ 7660 if (TARGET_MACHO && flag_pic && !machopic_offs_p) 7661 return false; 7662#if TARGET_MACHO 7663 if (MACHO_SYMBOL_INDIRECTION_P (sym)) 7664 /* The decl in an indirection symbol is the original one, which might 7665 be less aligned than the indirection. Our indirections are always 7666 pointer-aligned. */ 7667 ; 7668 else 7669#endif 7670 if (decl && DECL_ALIGN (decl)) 7671 align = DECL_ALIGN_UNIT (decl); 7672 } 7673 7674 unsigned int extra = 0; 7675 switch (mode) 7676 { 7677 case E_DFmode: 7678 case E_DDmode: 7679 case E_DImode: 7680 /* If we are using VSX scalar loads, restrict ourselves to reg+reg 7681 addressing. */ 7682 if (VECTOR_MEM_VSX_P (mode)) 7683 return false; 7684 7685 if (!TARGET_POWERPC64) 7686 extra = 4; 7687 else if ((offset & 3) || (align & 3)) 7688 return false; 7689 break; 7690 7691 case E_TFmode: 7692 case E_IFmode: 7693 case E_KFmode: 7694 case E_TDmode: 7695 case E_TImode: 7696 case E_PTImode: 7697 extra = 8; 7698 if (!TARGET_POWERPC64) 7699 extra = 12; 7700 else if ((offset & 3) || (align & 3)) 7701 return false; 7702 break; 7703 7704 default: 7705 break; 7706 } 7707 7708 /* We only care if the access(es) would cause a change to the high part. */ 7709 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 7710 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 7711} 7712 7713/* Return true if the MEM operand is a memory operand suitable for use 7714 with a (full width, possibly multiple) gpr load/store. On 7715 powerpc64 this means the offset must be divisible by 4. 7716 Implements 'Y' constraint. 7717 7718 Accept direct, indexed, offset, lo_sum and tocref. Since this is 7719 a constraint function we know the operand has satisfied a suitable 7720 memory predicate. 7721 7722 Offsetting a lo_sum should not be allowed, except where we know by 7723 alignment that a 32k boundary is not crossed. Note that by 7724 "offsetting" here we mean a further offset to access parts of the 7725 MEM. It's fine to have a lo_sum where the inner address is offset 7726 from a sym, since the same sym+offset will appear in the high part 7727 of the address calculation. */ 7728 7729bool 7730mem_operand_gpr (rtx op, machine_mode mode) 7731{ 7732 unsigned HOST_WIDE_INT offset; 7733 int extra; 7734 rtx addr = XEXP (op, 0); 7735 7736 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */ 7737 if (TARGET_UPDATE 7738 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 7739 && mode_supports_pre_incdec_p (mode) 7740 && legitimate_indirect_address_p (XEXP (addr, 0), false)) 7741 return true; 7742 7743 /* Allow prefixed instructions if supported. If the bottom two bits of the 7744 offset are non-zero, we could use a prefixed instruction (which does not 7745 have the DS-form constraint that the traditional instruction had) instead 7746 of forcing the unaligned offset to a GPR. */ 7747 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS)) 7748 return true; 7749 7750 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is 7751 really OK. Doing this early avoids teaching all the other machinery 7752 about them. */ 7753 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM) 7754 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode); 7755 7756 /* Only allow offsettable addresses. See PRs 83969 and 84279. */ 7757 if (!rs6000_offsettable_memref_p (op, mode, false)) 7758 return false; 7759 7760 op = address_offset (addr); 7761 if (op == NULL_RTX) 7762 return true; 7763 7764 offset = INTVAL (op); 7765 if (TARGET_POWERPC64 && (offset & 3) != 0) 7766 return false; 7767 7768 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; 7769 if (extra < 0) 7770 extra = 0; 7771 7772 if (GET_CODE (addr) == LO_SUM) 7773 /* For lo_sum addresses, we must allow any offset except one that 7774 causes a wrap, so test only the low 16 bits. */ 7775 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 7776 7777 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 7778} 7779 7780/* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr, 7781 enforce an offset divisible by 4 even for 32-bit. */ 7782 7783bool 7784mem_operand_ds_form (rtx op, machine_mode mode) 7785{ 7786 unsigned HOST_WIDE_INT offset; 7787 int extra; 7788 rtx addr = XEXP (op, 0); 7789 7790 /* Allow prefixed instructions if supported. If the bottom two bits of the 7791 offset are non-zero, we could use a prefixed instruction (which does not 7792 have the DS-form constraint that the traditional instruction had) instead 7793 of forcing the unaligned offset to a GPR. */ 7794 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS)) 7795 return true; 7796 7797 if (!offsettable_address_p (false, mode, addr)) 7798 return false; 7799 7800 op = address_offset (addr); 7801 if (op == NULL_RTX) 7802 return true; 7803 7804 offset = INTVAL (op); 7805 if ((offset & 3) != 0) 7806 return false; 7807 7808 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; 7809 if (extra < 0) 7810 extra = 0; 7811 7812 if (GET_CODE (addr) == LO_SUM) 7813 /* For lo_sum addresses, we must allow any offset except one that 7814 causes a wrap, so test only the low 16 bits. */ 7815 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; 7816 7817 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 7818} 7819 7820/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */ 7821 7822static bool 7823reg_offset_addressing_ok_p (machine_mode mode) 7824{ 7825 switch (mode) 7826 { 7827 case E_V16QImode: 7828 case E_V8HImode: 7829 case E_V4SFmode: 7830 case E_V4SImode: 7831 case E_V2DFmode: 7832 case E_V2DImode: 7833 case E_V1TImode: 7834 case E_TImode: 7835 case E_TFmode: 7836 case E_KFmode: 7837 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the 7838 ISA 3.0 vector d-form addressing mode was added. While TImode is not 7839 a vector mode, if we want to use the VSX registers to move it around, 7840 we need to restrict ourselves to reg+reg addressing. Similarly for 7841 IEEE 128-bit floating point that is passed in a single vector 7842 register. */ 7843 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) 7844 return mode_supports_dq_form (mode); 7845 break; 7846 7847 /* The vector pair/quad types support offset addressing if the 7848 underlying vectors support offset addressing. */ 7849 case E_POImode: 7850 case E_PXImode: 7851 return TARGET_MMA; 7852 7853 case E_SDmode: 7854 /* If we can do direct load/stores of SDmode, restrict it to reg+reg 7855 addressing for the LFIWZX and STFIWX instructions. */ 7856 if (TARGET_NO_SDMODE_STACK) 7857 return false; 7858 break; 7859 7860 default: 7861 break; 7862 } 7863 7864 return true; 7865} 7866 7867static bool 7868virtual_stack_registers_memory_p (rtx op) 7869{ 7870 int regnum; 7871 7872 if (REG_P (op)) 7873 regnum = REGNO (op); 7874 7875 else if (GET_CODE (op) == PLUS 7876 && REG_P (XEXP (op, 0)) 7877 && CONST_INT_P (XEXP (op, 1))) 7878 regnum = REGNO (XEXP (op, 0)); 7879 7880 else 7881 return false; 7882 7883 return (regnum >= FIRST_VIRTUAL_REGISTER 7884 && regnum <= LAST_VIRTUAL_POINTER_REGISTER); 7885} 7886 7887/* Return true if a MODE sized memory accesses to OP plus OFFSET 7888 is known to not straddle a 32k boundary. This function is used 7889 to determine whether -mcmodel=medium code can use TOC pointer 7890 relative addressing for OP. This means the alignment of the TOC 7891 pointer must also be taken into account, and unfortunately that is 7892 only 8 bytes. */ 7893 7894#ifndef POWERPC64_TOC_POINTER_ALIGNMENT 7895#define POWERPC64_TOC_POINTER_ALIGNMENT 8 7896#endif 7897 7898static bool 7899offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, 7900 machine_mode mode) 7901{ 7902 tree decl; 7903 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; 7904 7905 if (!SYMBOL_REF_P (op)) 7906 return false; 7907 7908 /* ISA 3.0 vector d-form addressing is restricted, don't allow 7909 SYMBOL_REF. */ 7910 if (mode_supports_dq_form (mode)) 7911 return false; 7912 7913 dsize = GET_MODE_SIZE (mode); 7914 decl = SYMBOL_REF_DECL (op); 7915 if (!decl) 7916 { 7917 if (dsize == 0) 7918 return false; 7919 7920 /* -fsection-anchors loses the original SYMBOL_REF_DECL when 7921 replacing memory addresses with an anchor plus offset. We 7922 could find the decl by rummaging around in the block->objects 7923 VEC for the given offset but that seems like too much work. */ 7924 dalign = BITS_PER_UNIT; 7925 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) 7926 && SYMBOL_REF_ANCHOR_P (op) 7927 && SYMBOL_REF_BLOCK (op) != NULL) 7928 { 7929 struct object_block *block = SYMBOL_REF_BLOCK (op); 7930 7931 dalign = block->alignment; 7932 offset += SYMBOL_REF_BLOCK_OFFSET (op); 7933 } 7934 else if (CONSTANT_POOL_ADDRESS_P (op)) 7935 { 7936 /* It would be nice to have get_pool_align().. */ 7937 machine_mode cmode = get_pool_mode (op); 7938 7939 dalign = GET_MODE_ALIGNMENT (cmode); 7940 } 7941 } 7942 else if (DECL_P (decl)) 7943 { 7944 dalign = DECL_ALIGN (decl); 7945 7946 if (dsize == 0) 7947 { 7948 /* Allow BLKmode when the entire object is known to not 7949 cross a 32k boundary. */ 7950 if (!DECL_SIZE_UNIT (decl)) 7951 return false; 7952 7953 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl))) 7954 return false; 7955 7956 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl)); 7957 if (dsize > 32768) 7958 return false; 7959 7960 dalign /= BITS_PER_UNIT; 7961 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) 7962 dalign = POWERPC64_TOC_POINTER_ALIGNMENT; 7963 return dalign >= dsize; 7964 } 7965 } 7966 else 7967 gcc_unreachable (); 7968 7969 /* Find how many bits of the alignment we know for this access. */ 7970 dalign /= BITS_PER_UNIT; 7971 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) 7972 dalign = POWERPC64_TOC_POINTER_ALIGNMENT; 7973 mask = dalign - 1; 7974 lsb = offset & -offset; 7975 mask &= lsb - 1; 7976 dalign = mask + 1; 7977 7978 return dalign >= dsize; 7979} 7980 7981static bool 7982constant_pool_expr_p (rtx op) 7983{ 7984 rtx base, offset; 7985 7986 split_const (op, &base, &offset); 7987 return (SYMBOL_REF_P (base) 7988 && CONSTANT_POOL_ADDRESS_P (base) 7989 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode)); 7990} 7991 7992/* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null, 7993 use that as the register to put the HIGH value into if register allocation 7994 is already done. */ 7995 7996rtx 7997create_TOC_reference (rtx symbol, rtx largetoc_reg) 7998{ 7999 rtx tocrel, tocreg, hi; 8000 8001 gcc_assert (TARGET_TOC); 8002 8003 if (TARGET_DEBUG_ADDR) 8004 { 8005 if (SYMBOL_REF_P (symbol)) 8006 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", 8007 XSTR (symbol, 0)); 8008 else 8009 { 8010 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", 8011 GET_RTX_NAME (GET_CODE (symbol))); 8012 debug_rtx (symbol); 8013 } 8014 } 8015 8016 if (!can_create_pseudo_p ()) 8017 df_set_regs_ever_live (TOC_REGISTER, true); 8018 8019 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER); 8020 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL); 8021 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ()) 8022 return tocrel; 8023 8024 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel)); 8025 if (largetoc_reg != NULL) 8026 { 8027 emit_move_insn (largetoc_reg, hi); 8028 hi = largetoc_reg; 8029 } 8030 return gen_rtx_LO_SUM (Pmode, hi, tocrel); 8031} 8032 8033/* These are only used to pass through from print_operand/print_operand_address 8034 to rs6000_output_addr_const_extra over the intervening function 8035 output_addr_const which is not target code. */ 8036static const_rtx tocrel_base_oac, tocrel_offset_oac; 8037 8038/* Return true if OP is a toc pointer relative address (the output 8039 of create_TOC_reference). If STRICT, do not match non-split 8040 -mcmodel=large/medium toc pointer relative addresses. If the pointers 8041 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and 8042 TOCREL_OFFSET_RET respectively. */ 8043 8044bool 8045toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret, 8046 const_rtx *tocrel_offset_ret) 8047{ 8048 if (!TARGET_TOC) 8049 return false; 8050 8051 if (TARGET_CMODEL != CMODEL_SMALL) 8052 { 8053 /* When strict ensure we have everything tidy. */ 8054 if (strict 8055 && !(GET_CODE (op) == LO_SUM 8056 && REG_P (XEXP (op, 0)) 8057 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))) 8058 return false; 8059 8060 /* When not strict, allow non-split TOC addresses and also allow 8061 (lo_sum (high ..)) TOC addresses created during reload. */ 8062 if (GET_CODE (op) == LO_SUM) 8063 op = XEXP (op, 1); 8064 } 8065 8066 const_rtx tocrel_base = op; 8067 const_rtx tocrel_offset = const0_rtx; 8068 8069 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op))) 8070 { 8071 tocrel_base = XEXP (op, 0); 8072 tocrel_offset = XEXP (op, 1); 8073 } 8074 8075 if (tocrel_base_ret) 8076 *tocrel_base_ret = tocrel_base; 8077 if (tocrel_offset_ret) 8078 *tocrel_offset_ret = tocrel_offset; 8079 8080 return (GET_CODE (tocrel_base) == UNSPEC 8081 && XINT (tocrel_base, 1) == UNSPEC_TOCREL 8082 && REG_P (XVECEXP (tocrel_base, 0, 1)) 8083 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER); 8084} 8085 8086/* Return true if X is a constant pool address, and also for cmodel=medium 8087 if X is a toc-relative address known to be offsettable within MODE. */ 8088 8089bool 8090legitimate_constant_pool_address_p (const_rtx x, machine_mode mode, 8091 bool strict) 8092{ 8093 const_rtx tocrel_base, tocrel_offset; 8094 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset) 8095 && (TARGET_CMODEL != CMODEL_MEDIUM 8096 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0)) 8097 || mode == QImode 8098 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0), 8099 INTVAL (tocrel_offset), mode))); 8100} 8101 8102static bool 8103legitimate_small_data_p (machine_mode mode, rtx x) 8104{ 8105 return (DEFAULT_ABI == ABI_V4 8106 && !flag_pic && !TARGET_TOC 8107 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST) 8108 && small_data_operand (x, mode)); 8109} 8110 8111bool 8112rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, 8113 bool strict, bool worst_case) 8114{ 8115 unsigned HOST_WIDE_INT offset; 8116 unsigned int extra; 8117 8118 if (GET_CODE (x) != PLUS) 8119 return false; 8120 if (!REG_P (XEXP (x, 0))) 8121 return false; 8122 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) 8123 return false; 8124 if (mode_supports_dq_form (mode)) 8125 return quad_address_p (x, mode, strict); 8126 if (!reg_offset_addressing_ok_p (mode)) 8127 return virtual_stack_registers_memory_p (x); 8128 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) 8129 return true; 8130 if (!CONST_INT_P (XEXP (x, 1))) 8131 return false; 8132 8133 offset = INTVAL (XEXP (x, 1)); 8134 extra = 0; 8135 switch (mode) 8136 { 8137 case E_DFmode: 8138 case E_DDmode: 8139 case E_DImode: 8140 /* If we are using VSX scalar loads, restrict ourselves to reg+reg 8141 addressing. */ 8142 if (VECTOR_MEM_VSX_P (mode)) 8143 return false; 8144 8145 if (!worst_case) 8146 break; 8147 if (!TARGET_POWERPC64) 8148 extra = 4; 8149 else if (offset & 3) 8150 return false; 8151 break; 8152 8153 case E_TFmode: 8154 case E_IFmode: 8155 case E_KFmode: 8156 case E_TDmode: 8157 case E_TImode: 8158 case E_PTImode: 8159 extra = 8; 8160 if (!worst_case) 8161 break; 8162 if (!TARGET_POWERPC64) 8163 extra = 12; 8164 else if (offset & 3) 8165 return false; 8166 break; 8167 8168 default: 8169 break; 8170 } 8171 8172 if (TARGET_PREFIXED) 8173 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra); 8174 else 8175 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra); 8176} 8177 8178bool 8179legitimate_indexed_address_p (rtx x, int strict) 8180{ 8181 rtx op0, op1; 8182 8183 if (GET_CODE (x) != PLUS) 8184 return false; 8185 8186 op0 = XEXP (x, 0); 8187 op1 = XEXP (x, 1); 8188 8189 return (REG_P (op0) && REG_P (op1) 8190 && ((INT_REG_OK_FOR_BASE_P (op0, strict) 8191 && INT_REG_OK_FOR_INDEX_P (op1, strict)) 8192 || (INT_REG_OK_FOR_BASE_P (op1, strict) 8193 && INT_REG_OK_FOR_INDEX_P (op0, strict)))); 8194} 8195 8196bool 8197avoiding_indexed_address_p (machine_mode mode) 8198{ 8199 unsigned int msize = GET_MODE_SIZE (mode); 8200 8201 /* Avoid indexed addressing for modes that have non-indexed load/store 8202 instruction forms. On power10, vector pairs have an indexed 8203 form, but vector quads don't. */ 8204 if (msize > 16) 8205 return msize != 32; 8206 8207 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode)); 8208} 8209 8210bool 8211legitimate_indirect_address_p (rtx x, int strict) 8212{ 8213 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict); 8214} 8215 8216bool 8217macho_lo_sum_memory_operand (rtx x, machine_mode mode) 8218{ 8219 if (!TARGET_MACHO || !flag_pic 8220 || mode != SImode || !MEM_P (x)) 8221 return false; 8222 x = XEXP (x, 0); 8223 8224 if (GET_CODE (x) != LO_SUM) 8225 return false; 8226 if (!REG_P (XEXP (x, 0))) 8227 return false; 8228 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0)) 8229 return false; 8230 x = XEXP (x, 1); 8231 8232 return CONSTANT_P (x); 8233} 8234 8235static bool 8236legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) 8237{ 8238 if (GET_CODE (x) != LO_SUM) 8239 return false; 8240 if (!REG_P (XEXP (x, 0))) 8241 return false; 8242 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) 8243 return false; 8244 /* quad word addresses are restricted, and we can't use LO_SUM. */ 8245 if (mode_supports_dq_form (mode)) 8246 return false; 8247 x = XEXP (x, 1); 8248 8249 if (TARGET_ELF) 8250 { 8251 bool large_toc_ok; 8252 8253 if (DEFAULT_ABI == ABI_V4 && flag_pic) 8254 return false; 8255 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls 8256 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS 8257 recognizes some LO_SUM addresses as valid although this 8258 function says opposite. In most cases, LRA through different 8259 transformations can generate correct code for address reloads. 8260 It cannot manage only some LO_SUM cases. So we need to add 8261 code here saying that some addresses are still valid. */ 8262 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL 8263 && small_toc_ref (x, VOIDmode)); 8264 if (TARGET_TOC && ! large_toc_ok) 8265 return false; 8266 if (GET_MODE_NUNITS (mode) != 1) 8267 return false; 8268 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 8269 && !(/* ??? Assume floating point reg based on mode? */ 8270 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 8271 return false; 8272 8273 return CONSTANT_P (x) || large_toc_ok; 8274 } 8275 else if (TARGET_MACHO) 8276 { 8277 if (GET_MODE_NUNITS (mode) != 1) 8278 return false; 8279 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 8280 && !(/* see above */ 8281 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 8282 return false; 8283#if TARGET_MACHO 8284 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic) 8285 return CONSTANT_P (x); 8286#endif 8287 /* Macho-O PIC code from here. */ 8288 if (GET_CODE (x) == CONST) 8289 x = XEXP (x, 0); 8290 8291 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */ 8292 if (SYMBOL_REF_P (x)) 8293 return false; 8294 8295 /* So this is OK if the wrapped object is const. */ 8296 if (GET_CODE (x) == UNSPEC 8297 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) 8298 return CONSTANT_P (XVECEXP (x, 0, 0)); 8299 return CONSTANT_P (x); 8300 } 8301 return false; 8302} 8303 8304 8305/* Try machine-dependent ways of modifying an illegitimate address 8306 to be legitimate. If we find one, return the new, valid address. 8307 This is used from only one place: `memory_address' in explow.c. 8308 8309 OLDX is the address as it was before break_out_memory_refs was 8310 called. In some cases it is useful to look at this to decide what 8311 needs to be done. 8312 8313 It is always safe for this function to do nothing. It exists to 8314 recognize opportunities to optimize the output. 8315 8316 On RS/6000, first check for the sum of a register with a constant 8317 integer that is out of range. If so, generate code to add the 8318 constant with the low-order 16 bits masked to the register and force 8319 this result into another register (this can be done with `cau'). 8320 Then generate an address of REG+(CONST&0xffff), allowing for the 8321 possibility of bit 16 being a one. 8322 8323 Then check for the sum of a register and something not constant, try to 8324 load the other things into a register and return the sum. */ 8325 8326static rtx 8327rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 8328 machine_mode mode) 8329{ 8330 unsigned int extra; 8331 8332 if (!reg_offset_addressing_ok_p (mode) 8333 || mode_supports_dq_form (mode)) 8334 { 8335 if (virtual_stack_registers_memory_p (x)) 8336 return x; 8337 8338 /* In theory we should not be seeing addresses of the form reg+0, 8339 but just in case it is generated, optimize it away. */ 8340 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) 8341 return force_reg (Pmode, XEXP (x, 0)); 8342 8343 /* For TImode with load/store quad, restrict addresses to just a single 8344 pointer, so it works with both GPRs and VSX registers. */ 8345 /* Make sure both operands are registers. */ 8346 else if (GET_CODE (x) == PLUS 8347 && (mode != TImode || !TARGET_VSX)) 8348 return gen_rtx_PLUS (Pmode, 8349 force_reg (Pmode, XEXP (x, 0)), 8350 force_reg (Pmode, XEXP (x, 1))); 8351 else 8352 return force_reg (Pmode, x); 8353 } 8354 if (SYMBOL_REF_P (x) && !TARGET_MACHO) 8355 { 8356 enum tls_model model = SYMBOL_REF_TLS_MODEL (x); 8357 if (model != 0) 8358 return rs6000_legitimize_tls_address (x, model); 8359 } 8360 8361 extra = 0; 8362 switch (mode) 8363 { 8364 case E_TFmode: 8365 case E_TDmode: 8366 case E_TImode: 8367 case E_PTImode: 8368 case E_IFmode: 8369 case E_KFmode: 8370 /* As in legitimate_offset_address_p we do not assume 8371 worst-case. The mode here is just a hint as to the registers 8372 used. A TImode is usually in gprs, but may actually be in 8373 fprs. Leave worst-case scenario for reload to handle via 8374 insn constraints. PTImode is only GPRs. */ 8375 extra = 8; 8376 break; 8377 default: 8378 break; 8379 } 8380 8381 if (GET_CODE (x) == PLUS 8382 && REG_P (XEXP (x, 0)) 8383 && CONST_INT_P (XEXP (x, 1)) 8384 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) 8385 >= 0x10000 - extra)) 8386 { 8387 HOST_WIDE_INT high_int, low_int; 8388 rtx sum; 8389 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000; 8390 if (low_int >= 0x8000 - extra) 8391 low_int = 0; 8392 high_int = INTVAL (XEXP (x, 1)) - low_int; 8393 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), 8394 gen_int_mode (high_int, Pmode)), 0); 8395 return plus_constant (Pmode, sum, low_int); 8396 } 8397 else if (GET_CODE (x) == PLUS 8398 && REG_P (XEXP (x, 0)) 8399 && !CONST_INT_P (XEXP (x, 1)) 8400 && GET_MODE_NUNITS (mode) == 1 8401 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD 8402 || (/* ??? Assume floating point reg based on mode? */ 8403 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) 8404 && !avoiding_indexed_address_p (mode)) 8405 { 8406 return gen_rtx_PLUS (Pmode, XEXP (x, 0), 8407 force_reg (Pmode, force_operand (XEXP (x, 1), 0))); 8408 } 8409 else if ((TARGET_ELF 8410#if TARGET_MACHO 8411 || !MACHO_DYNAMIC_NO_PIC_P 8412#endif 8413 ) 8414 && TARGET_32BIT 8415 && TARGET_NO_TOC_OR_PCREL 8416 && !flag_pic 8417 && !CONST_INT_P (x) 8418 && !CONST_WIDE_INT_P (x) 8419 && !CONST_DOUBLE_P (x) 8420 && CONSTANT_P (x) 8421 && GET_MODE_NUNITS (mode) == 1 8422 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD 8423 || (/* ??? Assume floating point reg based on mode? */ 8424 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))) 8425 { 8426 rtx reg = gen_reg_rtx (Pmode); 8427 if (TARGET_ELF) 8428 emit_insn (gen_elf_high (reg, x)); 8429 else 8430 emit_insn (gen_macho_high (Pmode, reg, x)); 8431 return gen_rtx_LO_SUM (Pmode, reg, x); 8432 } 8433 else if (TARGET_TOC 8434 && SYMBOL_REF_P (x) 8435 && constant_pool_expr_p (x) 8436 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode)) 8437 return create_TOC_reference (x, NULL_RTX); 8438 else 8439 return x; 8440} 8441 8442/* Debug version of rs6000_legitimize_address. */ 8443static rtx 8444rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) 8445{ 8446 rtx ret; 8447 rtx_insn *insns; 8448 8449 start_sequence (); 8450 ret = rs6000_legitimize_address (x, oldx, mode); 8451 insns = get_insns (); 8452 end_sequence (); 8453 8454 if (ret != x) 8455 { 8456 fprintf (stderr, 8457 "\nrs6000_legitimize_address: mode %s, old code %s, " 8458 "new code %s, modified\n", 8459 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)), 8460 GET_RTX_NAME (GET_CODE (ret))); 8461 8462 fprintf (stderr, "Original address:\n"); 8463 debug_rtx (x); 8464 8465 fprintf (stderr, "oldx:\n"); 8466 debug_rtx (oldx); 8467 8468 fprintf (stderr, "New address:\n"); 8469 debug_rtx (ret); 8470 8471 if (insns) 8472 { 8473 fprintf (stderr, "Insns added:\n"); 8474 debug_rtx_list (insns, 20); 8475 } 8476 } 8477 else 8478 { 8479 fprintf (stderr, 8480 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n", 8481 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x))); 8482 8483 debug_rtx (x); 8484 } 8485 8486 if (insns) 8487 emit_insn (insns); 8488 8489 return ret; 8490} 8491 8492/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 8493 We need to emit DTP-relative relocations. */ 8494 8495static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 8496static void 8497rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x) 8498{ 8499 switch (size) 8500 { 8501 case 4: 8502 fputs ("\t.long\t", file); 8503 break; 8504 case 8: 8505 fputs (DOUBLE_INT_ASM_OP, file); 8506 break; 8507 default: 8508 gcc_unreachable (); 8509 } 8510 output_addr_const (file, x); 8511 if (TARGET_ELF) 8512 fputs ("@dtprel+0x8000", file); 8513} 8514 8515/* Return true if X is a symbol that refers to real (rather than emulated) 8516 TLS. */ 8517 8518static bool 8519rs6000_real_tls_symbol_ref_p (rtx x) 8520{ 8521 return (SYMBOL_REF_P (x) 8522 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL); 8523} 8524 8525/* In the name of slightly smaller debug output, and to cater to 8526 general assembler lossage, recognize various UNSPEC sequences 8527 and turn them back into a direct symbol reference. */ 8528 8529static rtx 8530rs6000_delegitimize_address (rtx orig_x) 8531{ 8532 rtx x, y, offset; 8533 8534 if (GET_CODE (orig_x) == UNSPEC && XINT (orig_x, 1) == UNSPEC_FUSION_GPR) 8535 orig_x = XVECEXP (orig_x, 0, 0); 8536 8537 orig_x = delegitimize_mem_from_attrs (orig_x); 8538 8539 x = orig_x; 8540 if (MEM_P (x)) 8541 x = XEXP (x, 0); 8542 8543 y = x; 8544 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM) 8545 y = XEXP (y, 1); 8546 8547 offset = NULL_RTX; 8548 if (GET_CODE (y) == PLUS 8549 && GET_MODE (y) == Pmode 8550 && CONST_INT_P (XEXP (y, 1))) 8551 { 8552 offset = XEXP (y, 1); 8553 y = XEXP (y, 0); 8554 } 8555 8556 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL) 8557 { 8558 y = XVECEXP (y, 0, 0); 8559 8560#ifdef HAVE_AS_TLS 8561 /* Do not associate thread-local symbols with the original 8562 constant pool symbol. */ 8563 if (TARGET_XCOFF 8564 && SYMBOL_REF_P (y) 8565 && CONSTANT_POOL_ADDRESS_P (y) 8566 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y))) 8567 return orig_x; 8568#endif 8569 8570 if (offset != NULL_RTX) 8571 y = gen_rtx_PLUS (Pmode, y, offset); 8572 if (!MEM_P (orig_x)) 8573 return y; 8574 else 8575 return replace_equiv_address_nv (orig_x, y); 8576 } 8577 8578 if (TARGET_MACHO 8579 && GET_CODE (orig_x) == LO_SUM 8580 && GET_CODE (XEXP (orig_x, 1)) == CONST) 8581 { 8582 y = XEXP (XEXP (orig_x, 1), 0); 8583 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET) 8584 return XVECEXP (y, 0, 0); 8585 } 8586 8587 return orig_x; 8588} 8589 8590/* Return true if X shouldn't be emitted into the debug info. 8591 The linker doesn't like .toc section references from 8592 .debug_* sections, so reject .toc section symbols. */ 8593 8594static bool 8595rs6000_const_not_ok_for_debug_p (rtx x) 8596{ 8597 if (GET_CODE (x) == UNSPEC) 8598 return true; 8599 if (SYMBOL_REF_P (x) 8600 && CONSTANT_POOL_ADDRESS_P (x)) 8601 { 8602 rtx c = get_pool_constant (x); 8603 machine_mode cmode = get_pool_mode (x); 8604 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode)) 8605 return true; 8606 } 8607 8608 return false; 8609} 8610 8611/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ 8612 8613static bool 8614rs6000_legitimate_combined_insn (rtx_insn *insn) 8615{ 8616 int icode = INSN_CODE (insn); 8617 8618 /* Reject creating doloop insns. Combine should not be allowed 8619 to create these for a number of reasons: 8620 1) In a nested loop, if combine creates one of these in an 8621 outer loop and the register allocator happens to allocate ctr 8622 to the outer loop insn, then the inner loop can't use ctr. 8623 Inner loops ought to be more highly optimized. 8624 2) Combine often wants to create one of these from what was 8625 originally a three insn sequence, first combining the three 8626 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not 8627 allocated ctr, the splitter takes use back to the three insn 8628 sequence. It's better to stop combine at the two insn 8629 sequence. 8630 3) Faced with not being able to allocate ctr for ctrsi/crtdi 8631 insns, the register allocator sometimes uses floating point 8632 or vector registers for the pseudo. Since ctrsi/ctrdi is a 8633 jump insn and output reloads are not implemented for jumps, 8634 the ctrsi/ctrdi splitters need to handle all possible cases. 8635 That's a pain, and it gets to be seriously difficult when a 8636 splitter that runs after reload needs memory to transfer from 8637 a gpr to fpr. See PR70098 and PR71763 which are not fixed 8638 for the difficult case. It's better to not create problems 8639 in the first place. */ 8640 if (icode != CODE_FOR_nothing 8641 && (icode == CODE_FOR_bdz_si 8642 || icode == CODE_FOR_bdz_di 8643 || icode == CODE_FOR_bdnz_si 8644 || icode == CODE_FOR_bdnz_di 8645 || icode == CODE_FOR_bdztf_si 8646 || icode == CODE_FOR_bdztf_di 8647 || icode == CODE_FOR_bdnztf_si 8648 || icode == CODE_FOR_bdnztf_di)) 8649 return false; 8650 8651 return true; 8652} 8653 8654/* Construct the SYMBOL_REF for the tls_get_addr function. */ 8655 8656static GTY(()) rtx rs6000_tls_symbol; 8657static rtx 8658rs6000_tls_get_addr (void) 8659{ 8660 if (!rs6000_tls_symbol) 8661 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr"); 8662 8663 return rs6000_tls_symbol; 8664} 8665 8666/* Construct the SYMBOL_REF for TLS GOT references. */ 8667 8668static GTY(()) rtx rs6000_got_symbol; 8669rtx 8670rs6000_got_sym (void) 8671{ 8672 if (!rs6000_got_symbol) 8673 { 8674 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 8675 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL; 8676 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL; 8677 } 8678 8679 return rs6000_got_symbol; 8680} 8681 8682/* AIX Thread-Local Address support. */ 8683 8684static rtx 8685rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model) 8686{ 8687 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr; 8688 const char *name; 8689 char *tlsname; 8690 8691 name = XSTR (addr, 0); 8692 /* Append TLS CSECT qualifier, unless the symbol already is qualified 8693 or the symbol will be in TLS private data section. */ 8694 if (name[strlen (name) - 1] != ']' 8695 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr)) 8696 || bss_initializer_p (SYMBOL_REF_DECL (addr)))) 8697 { 8698 tlsname = XALLOCAVEC (char, strlen (name) + 4); 8699 strcpy (tlsname, name); 8700 strcat (tlsname, 8701 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]"); 8702 tlsaddr = copy_rtx (addr); 8703 XSTR (tlsaddr, 0) = ggc_strdup (tlsname); 8704 } 8705 else 8706 tlsaddr = addr; 8707 8708 /* Place addr into TOC constant pool. */ 8709 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr); 8710 8711 /* Output the TOC entry and create the MEM referencing the value. */ 8712 if (constant_pool_expr_p (XEXP (sym, 0)) 8713 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode)) 8714 { 8715 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX); 8716 mem = gen_const_mem (Pmode, tocref); 8717 set_mem_alias_set (mem, get_TOC_alias_set ()); 8718 } 8719 else 8720 return sym; 8721 8722 /* Use global-dynamic for local-dynamic. */ 8723 if (model == TLS_MODEL_GLOBAL_DYNAMIC 8724 || model == TLS_MODEL_LOCAL_DYNAMIC) 8725 { 8726 /* Create new TOC reference for @m symbol. */ 8727 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0); 8728 tlsname = XALLOCAVEC (char, strlen (name) + 1); 8729 strcpy (tlsname, "*LCM"); 8730 strcat (tlsname, name + 3); 8731 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname)); 8732 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL; 8733 tocref = create_TOC_reference (modaddr, NULL_RTX); 8734 rtx modmem = gen_const_mem (Pmode, tocref); 8735 set_mem_alias_set (modmem, get_TOC_alias_set ()); 8736 8737 rtx modreg = gen_reg_rtx (Pmode); 8738 emit_insn (gen_rtx_SET (modreg, modmem)); 8739 8740 tmpreg = gen_reg_rtx (Pmode); 8741 emit_insn (gen_rtx_SET (tmpreg, mem)); 8742 8743 dest = gen_reg_rtx (Pmode); 8744 if (TARGET_32BIT) 8745 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg)); 8746 else 8747 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg)); 8748 return dest; 8749 } 8750 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */ 8751 else if (TARGET_32BIT) 8752 { 8753 tlsreg = gen_reg_rtx (SImode); 8754 emit_insn (gen_tls_get_tpointer (tlsreg)); 8755 } 8756 else 8757 tlsreg = gen_rtx_REG (DImode, 13); 8758 8759 /* Load the TOC value into temporary register. */ 8760 tmpreg = gen_reg_rtx (Pmode); 8761 emit_insn (gen_rtx_SET (tmpreg, mem)); 8762 set_unique_reg_note (get_last_insn (), REG_EQUAL, 8763 gen_rtx_MINUS (Pmode, addr, tlsreg)); 8764 8765 /* Add TOC symbol value to TLS pointer. */ 8766 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg)); 8767 8768 return dest; 8769} 8770 8771/* Passes the tls arg value for global dynamic and local dynamic 8772 emit_library_call_value in rs6000_legitimize_tls_address to 8773 rs6000_call_aix and rs6000_call_sysv. This is used to emit the 8774 marker relocs put on __tls_get_addr calls. */ 8775static rtx global_tlsarg; 8776 8777/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 8778 this (thread-local) address. */ 8779 8780static rtx 8781rs6000_legitimize_tls_address (rtx addr, enum tls_model model) 8782{ 8783 rtx dest, insn; 8784 8785 if (TARGET_XCOFF) 8786 return rs6000_legitimize_tls_address_aix (addr, model); 8787 8788 dest = gen_reg_rtx (Pmode); 8789 if (model == TLS_MODEL_LOCAL_EXEC 8790 && (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun))) 8791 { 8792 rtx tlsreg; 8793 8794 if (TARGET_64BIT) 8795 { 8796 tlsreg = gen_rtx_REG (Pmode, 13); 8797 insn = gen_tls_tprel_64 (dest, tlsreg, addr); 8798 } 8799 else 8800 { 8801 tlsreg = gen_rtx_REG (Pmode, 2); 8802 insn = gen_tls_tprel_32 (dest, tlsreg, addr); 8803 } 8804 emit_insn (insn); 8805 } 8806 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32) 8807 { 8808 rtx tlsreg, tmp; 8809 8810 tmp = gen_reg_rtx (Pmode); 8811 if (TARGET_64BIT) 8812 { 8813 tlsreg = gen_rtx_REG (Pmode, 13); 8814 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr); 8815 } 8816 else 8817 { 8818 tlsreg = gen_rtx_REG (Pmode, 2); 8819 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr); 8820 } 8821 emit_insn (insn); 8822 if (TARGET_64BIT) 8823 insn = gen_tls_tprel_lo_64 (dest, tmp, addr); 8824 else 8825 insn = gen_tls_tprel_lo_32 (dest, tmp, addr); 8826 emit_insn (insn); 8827 } 8828 else 8829 { 8830 rtx got, tga, tmp1, tmp2; 8831 8832 /* We currently use relocations like @got@tlsgd for tls, which 8833 means the linker will handle allocation of tls entries, placing 8834 them in the .got section. So use a pointer to the .got section, 8835 not one to secondary TOC sections used by 64-bit -mminimal-toc, 8836 or to secondary GOT sections used by 32-bit -fPIC. */ 8837 if (rs6000_pcrel_p (cfun)) 8838 got = const0_rtx; 8839 else if (TARGET_64BIT) 8840 got = gen_rtx_REG (Pmode, 2); 8841 else 8842 { 8843 if (flag_pic == 1) 8844 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 8845 else 8846 { 8847 rtx gsym = rs6000_got_sym (); 8848 got = gen_reg_rtx (Pmode); 8849 if (flag_pic == 0) 8850 rs6000_emit_move (got, gsym, Pmode); 8851 else 8852 { 8853 rtx mem, lab; 8854 8855 tmp1 = gen_reg_rtx (Pmode); 8856 tmp2 = gen_reg_rtx (Pmode); 8857 mem = gen_const_mem (Pmode, tmp1); 8858 lab = gen_label_rtx (); 8859 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab)); 8860 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); 8861 if (TARGET_LINK_STACK) 8862 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4))); 8863 emit_move_insn (tmp2, mem); 8864 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2)); 8865 set_unique_reg_note (last, REG_EQUAL, gsym); 8866 } 8867 } 8868 } 8869 8870 if (model == TLS_MODEL_GLOBAL_DYNAMIC) 8871 { 8872 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got), 8873 UNSPEC_TLSGD); 8874 tga = rs6000_tls_get_addr (); 8875 rtx argreg = gen_rtx_REG (Pmode, 3); 8876 emit_insn (gen_rtx_SET (argreg, arg)); 8877 global_tlsarg = arg; 8878 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode); 8879 global_tlsarg = NULL_RTX; 8880 8881 /* Make a note so that the result of this call can be CSEd. */ 8882 rtvec vec = gen_rtvec (1, copy_rtx (arg)); 8883 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR); 8884 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns); 8885 } 8886 else if (model == TLS_MODEL_LOCAL_DYNAMIC) 8887 { 8888 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD); 8889 tga = rs6000_tls_get_addr (); 8890 tmp1 = gen_reg_rtx (Pmode); 8891 rtx argreg = gen_rtx_REG (Pmode, 3); 8892 emit_insn (gen_rtx_SET (argreg, arg)); 8893 global_tlsarg = arg; 8894 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode); 8895 global_tlsarg = NULL_RTX; 8896 8897 /* Make a note so that the result of this call can be CSEd. */ 8898 rtvec vec = gen_rtvec (1, copy_rtx (arg)); 8899 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR); 8900 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns); 8901 8902 if (rs6000_tls_size == 16 || rs6000_pcrel_p (cfun)) 8903 { 8904 if (TARGET_64BIT) 8905 insn = gen_tls_dtprel_64 (dest, tmp1, addr); 8906 else 8907 insn = gen_tls_dtprel_32 (dest, tmp1, addr); 8908 } 8909 else if (rs6000_tls_size == 32) 8910 { 8911 tmp2 = gen_reg_rtx (Pmode); 8912 if (TARGET_64BIT) 8913 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr); 8914 else 8915 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr); 8916 emit_insn (insn); 8917 if (TARGET_64BIT) 8918 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr); 8919 else 8920 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr); 8921 } 8922 else 8923 { 8924 tmp2 = gen_reg_rtx (Pmode); 8925 if (TARGET_64BIT) 8926 insn = gen_tls_got_dtprel_64 (tmp2, got, addr); 8927 else 8928 insn = gen_tls_got_dtprel_32 (tmp2, got, addr); 8929 emit_insn (insn); 8930 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1)); 8931 } 8932 emit_insn (insn); 8933 } 8934 else 8935 { 8936 /* IE, or 64-bit offset LE. */ 8937 tmp2 = gen_reg_rtx (Pmode); 8938 if (TARGET_64BIT) 8939 insn = gen_tls_got_tprel_64 (tmp2, got, addr); 8940 else 8941 insn = gen_tls_got_tprel_32 (tmp2, got, addr); 8942 emit_insn (insn); 8943 if (rs6000_pcrel_p (cfun)) 8944 { 8945 if (TARGET_64BIT) 8946 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr); 8947 else 8948 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr); 8949 } 8950 else if (TARGET_64BIT) 8951 insn = gen_tls_tls_64 (dest, tmp2, addr); 8952 else 8953 insn = gen_tls_tls_32 (dest, tmp2, addr); 8954 emit_insn (insn); 8955 } 8956 } 8957 8958 return dest; 8959} 8960 8961/* Only create the global variable for the stack protect guard if we are using 8962 the global flavor of that guard. */ 8963static tree 8964rs6000_init_stack_protect_guard (void) 8965{ 8966 if (rs6000_stack_protector_guard == SSP_GLOBAL) 8967 return default_stack_protect_guard (); 8968 8969 return NULL_TREE; 8970} 8971 8972/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ 8973 8974static bool 8975rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) 8976{ 8977 if (GET_CODE (x) == HIGH 8978 && GET_CODE (XEXP (x, 0)) == UNSPEC) 8979 return true; 8980 8981 /* A TLS symbol in the TOC cannot contain a sum. */ 8982 if (GET_CODE (x) == CONST 8983 && GET_CODE (XEXP (x, 0)) == PLUS 8984 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0)) 8985 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0) 8986 return true; 8987 8988 /* Do not place an ELF TLS symbol in the constant pool. */ 8989 return TARGET_ELF && tls_referenced_p (x); 8990} 8991 8992/* Return true iff the given SYMBOL_REF refers to a constant pool entry 8993 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF 8994 can be addressed relative to the toc pointer. */ 8995 8996static bool 8997use_toc_relative_ref (rtx sym, machine_mode mode) 8998{ 8999 return ((constant_pool_expr_p (sym) 9000 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), 9001 get_pool_mode (sym))) 9002 || (TARGET_CMODEL == CMODEL_MEDIUM 9003 && SYMBOL_REF_LOCAL_P (sym) 9004 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT)); 9005} 9006 9007/* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression 9008 that is a valid memory address for an instruction. 9009 The MODE argument is the machine mode for the MEM expression 9010 that wants to use this address. 9011 9012 On the RS/6000, there are four valid address: a SYMBOL_REF that 9013 refers to a constant pool entry of an address (or the sum of it 9014 plus a constant), a short (16-bit signed) constant plus a register, 9015 the sum of two registers, or a register indirect, possibly with an 9016 auto-increment. For DFmode, DDmode and DImode with a constant plus 9017 register, we must ensure that both words are addressable or PowerPC64 9018 with offset word aligned. 9019 9020 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs, 9021 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used 9022 because adjacent memory cells are accessed by adding word-sized offsets 9023 during assembly output. */ 9024static bool 9025rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict) 9026{ 9027 bool reg_offset_p = reg_offset_addressing_ok_p (mode); 9028 bool quad_offset_p = mode_supports_dq_form (mode); 9029 9030 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x)) 9031 return 0; 9032 9033 /* Handle unaligned altivec lvx/stvx type addresses. */ 9034 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) 9035 && GET_CODE (x) == AND 9036 && CONST_INT_P (XEXP (x, 1)) 9037 && INTVAL (XEXP (x, 1)) == -16) 9038 { 9039 x = XEXP (x, 0); 9040 return (legitimate_indirect_address_p (x, reg_ok_strict) 9041 || legitimate_indexed_address_p (x, reg_ok_strict) 9042 || virtual_stack_registers_memory_p (x)); 9043 } 9044 9045 if (legitimate_indirect_address_p (x, reg_ok_strict)) 9046 return 1; 9047 if (TARGET_UPDATE 9048 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) 9049 && mode_supports_pre_incdec_p (mode) 9050 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) 9051 return 1; 9052 9053 /* Handle prefixed addresses (PC-relative or 34-bit offset). */ 9054 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT)) 9055 return 1; 9056 9057 /* Handle restricted vector d-form offsets in ISA 3.0. */ 9058 if (quad_offset_p) 9059 { 9060 if (quad_address_p (x, mode, reg_ok_strict)) 9061 return 1; 9062 } 9063 else if (virtual_stack_registers_memory_p (x)) 9064 return 1; 9065 9066 else if (reg_offset_p) 9067 { 9068 if (legitimate_small_data_p (mode, x)) 9069 return 1; 9070 if (legitimate_constant_pool_address_p (x, mode, 9071 reg_ok_strict || lra_in_progress)) 9072 return 1; 9073 } 9074 9075 /* For TImode, if we have TImode in VSX registers, only allow register 9076 indirect addresses. This will allow the values to go in either GPRs 9077 or VSX registers without reloading. The vector types would tend to 9078 go into VSX registers, so we allow REG+REG, while TImode seems 9079 somewhat split, in that some uses are GPR based, and some VSX based. */ 9080 /* FIXME: We could loosen this by changing the following to 9081 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX) 9082 but currently we cannot allow REG+REG addressing for TImode. See 9083 PR72827 for complete details on how this ends up hoodwinking DSE. */ 9084 if (mode == TImode && TARGET_VSX) 9085 return 0; 9086 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ 9087 if (! reg_ok_strict 9088 && reg_offset_p 9089 && GET_CODE (x) == PLUS 9090 && REG_P (XEXP (x, 0)) 9091 && (XEXP (x, 0) == virtual_stack_vars_rtx 9092 || XEXP (x, 0) == arg_pointer_rtx) 9093 && CONST_INT_P (XEXP (x, 1))) 9094 return 1; 9095 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false)) 9096 return 1; 9097 if (!FLOAT128_2REG_P (mode) 9098 && (TARGET_HARD_FLOAT 9099 || TARGET_POWERPC64 9100 || (mode != DFmode && mode != DDmode)) 9101 && (TARGET_POWERPC64 || mode != DImode) 9102 && (mode != TImode || VECTOR_MEM_VSX_P (TImode)) 9103 && mode != PTImode 9104 && !avoiding_indexed_address_p (mode) 9105 && legitimate_indexed_address_p (x, reg_ok_strict)) 9106 return 1; 9107 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY 9108 && mode_supports_pre_modify_p (mode) 9109 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) 9110 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), 9111 reg_ok_strict, false) 9112 || (!avoiding_indexed_address_p (mode) 9113 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict))) 9114 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) 9115 { 9116 /* There is no prefixed version of the load/store with update. */ 9117 rtx addr = XEXP (x, 1); 9118 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT); 9119 } 9120 if (reg_offset_p && !quad_offset_p 9121 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict)) 9122 return 1; 9123 return 0; 9124} 9125 9126/* Debug version of rs6000_legitimate_address_p. */ 9127static bool 9128rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, 9129 bool reg_ok_strict) 9130{ 9131 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); 9132 fprintf (stderr, 9133 "\nrs6000_legitimate_address_p: return = %s, mode = %s, " 9134 "strict = %d, reload = %s, code = %s\n", 9135 ret ? "true" : "false", 9136 GET_MODE_NAME (mode), 9137 reg_ok_strict, 9138 (reload_completed ? "after" : "before"), 9139 GET_RTX_NAME (GET_CODE (x))); 9140 debug_rtx (x); 9141 9142 return ret; 9143} 9144 9145/* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */ 9146 9147static bool 9148rs6000_mode_dependent_address_p (const_rtx addr, 9149 addr_space_t as ATTRIBUTE_UNUSED) 9150{ 9151 return rs6000_mode_dependent_address_ptr (addr); 9152} 9153 9154/* Go to LABEL if ADDR (a legitimate address expression) 9155 has an effect that depends on the machine mode it is used for. 9156 9157 On the RS/6000 this is true of all integral offsets (since AltiVec 9158 and VSX modes don't allow them) or is a pre-increment or decrement. 9159 9160 ??? Except that due to conceptual problems in offsettable_address_p 9161 we can't really report the problems of integral offsets. So leave 9162 this assuming that the adjustable offset must be valid for the 9163 sub-words of a TFmode operand, which is what we had before. */ 9164 9165static bool 9166rs6000_mode_dependent_address (const_rtx addr) 9167{ 9168 switch (GET_CODE (addr)) 9169 { 9170 case PLUS: 9171 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx 9172 is considered a legitimate address before reload, so there 9173 are no offset restrictions in that case. Note that this 9174 condition is safe in strict mode because any address involving 9175 virtual_stack_vars_rtx or arg_pointer_rtx would already have 9176 been rejected as illegitimate. */ 9177 if (XEXP (addr, 0) != virtual_stack_vars_rtx 9178 && XEXP (addr, 0) != arg_pointer_rtx 9179 && CONST_INT_P (XEXP (addr, 1))) 9180 { 9181 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); 9182 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12; 9183 if (TARGET_PREFIXED) 9184 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra); 9185 else 9186 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra); 9187 } 9188 break; 9189 9190 case LO_SUM: 9191 /* Anything in the constant pool is sufficiently aligned that 9192 all bytes have the same high part address. */ 9193 return !legitimate_constant_pool_address_p (addr, QImode, false); 9194 9195 /* Auto-increment cases are now treated generically in recog.c. */ 9196 case PRE_MODIFY: 9197 return TARGET_UPDATE; 9198 9199 /* AND is only allowed in Altivec loads. */ 9200 case AND: 9201 return true; 9202 9203 default: 9204 break; 9205 } 9206 9207 return false; 9208} 9209 9210/* Debug version of rs6000_mode_dependent_address. */ 9211static bool 9212rs6000_debug_mode_dependent_address (const_rtx addr) 9213{ 9214 bool ret = rs6000_mode_dependent_address (addr); 9215 9216 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n", 9217 ret ? "true" : "false"); 9218 debug_rtx (addr); 9219 9220 return ret; 9221} 9222 9223/* Implement FIND_BASE_TERM. */ 9224 9225rtx 9226rs6000_find_base_term (rtx op) 9227{ 9228 rtx base; 9229 9230 base = op; 9231 if (GET_CODE (base) == CONST) 9232 base = XEXP (base, 0); 9233 if (GET_CODE (base) == PLUS) 9234 base = XEXP (base, 0); 9235 if (GET_CODE (base) == UNSPEC) 9236 switch (XINT (base, 1)) 9237 { 9238 case UNSPEC_TOCREL: 9239 case UNSPEC_MACHOPIC_OFFSET: 9240 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term 9241 for aliasing purposes. */ 9242 return XVECEXP (base, 0, 0); 9243 } 9244 9245 return op; 9246} 9247 9248/* More elaborate version of recog's offsettable_memref_p predicate 9249 that works around the ??? note of rs6000_mode_dependent_address. 9250 In particular it accepts 9251 9252 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8]))) 9253 9254 in 32-bit mode, that the recog predicate rejects. */ 9255 9256static bool 9257rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict) 9258{ 9259 bool worst_case; 9260 9261 if (!MEM_P (op)) 9262 return false; 9263 9264 /* First mimic offsettable_memref_p. */ 9265 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0))) 9266 return true; 9267 9268 /* offsettable_address_p invokes rs6000_mode_dependent_address, but 9269 the latter predicate knows nothing about the mode of the memory 9270 reference and, therefore, assumes that it is the largest supported 9271 mode (TFmode). As a consequence, legitimate offsettable memory 9272 references are rejected. rs6000_legitimate_offset_address_p contains 9273 the correct logic for the PLUS case of rs6000_mode_dependent_address, 9274 at least with a little bit of help here given that we know the 9275 actual registers used. */ 9276 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT) 9277 || GET_MODE_SIZE (reg_mode) == 4); 9278 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), 9279 strict, worst_case); 9280} 9281 9282/* Determine the reassociation width to be used in reassociate_bb. 9283 This takes into account how many parallel operations we 9284 can actually do of a given type, and also the latency. 9285 P8: 9286 int add/sub 6/cycle 9287 mul 2/cycle 9288 vect add/sub/mul 2/cycle 9289 fp add/sub/mul 2/cycle 9290 dfp 1/cycle 9291*/ 9292 9293static int 9294rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, 9295 machine_mode mode) 9296{ 9297 switch (rs6000_tune) 9298 { 9299 case PROCESSOR_POWER8: 9300 case PROCESSOR_POWER9: 9301 case PROCESSOR_POWER10: 9302 if (DECIMAL_FLOAT_MODE_P (mode)) 9303 return 1; 9304 if (VECTOR_MODE_P (mode)) 9305 return 4; 9306 if (INTEGRAL_MODE_P (mode)) 9307 return 1; 9308 if (FLOAT_MODE_P (mode)) 9309 return 4; 9310 break; 9311 default: 9312 break; 9313 } 9314 return 1; 9315} 9316 9317/* Change register usage conditional on target flags. */ 9318static void 9319rs6000_conditional_register_usage (void) 9320{ 9321 int i; 9322 9323 if (TARGET_DEBUG_TARGET) 9324 fprintf (stderr, "rs6000_conditional_register_usage called\n"); 9325 9326 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */ 9327 if (TARGET_64BIT) 9328 fixed_regs[13] = call_used_regs[13] = 1; 9329 9330 /* Conditionally disable FPRs. */ 9331 if (TARGET_SOFT_FLOAT) 9332 for (i = 32; i < 64; i++) 9333 fixed_regs[i] = call_used_regs[i] = 1; 9334 9335 /* The TOC register is not killed across calls in a way that is 9336 visible to the compiler. */ 9337 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 9338 call_used_regs[2] = 0; 9339 9340 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2) 9341 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 9342 9343 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1) 9344 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] 9345 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 9346 9347 if (DEFAULT_ABI == ABI_DARWIN && flag_pic) 9348 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] 9349 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 9350 9351 if (TARGET_TOC && TARGET_MINIMAL_TOC) 9352 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; 9353 9354 if (!TARGET_ALTIVEC && !TARGET_VSX) 9355 { 9356 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) 9357 fixed_regs[i] = call_used_regs[i] = 1; 9358 call_used_regs[VRSAVE_REGNO] = 1; 9359 } 9360 9361 if (TARGET_ALTIVEC || TARGET_VSX) 9362 global_regs[VSCR_REGNO] = 1; 9363 9364 if (TARGET_ALTIVEC_ABI) 9365 { 9366 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i) 9367 call_used_regs[i] = 1; 9368 9369 /* AIX reserves VR20:31 in non-extended ABI mode. */ 9370 if (TARGET_XCOFF && !rs6000_aix_extabi) 9371 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i) 9372 fixed_regs[i] = call_used_regs[i] = 1; 9373 } 9374} 9375 9376 9377/* Output insns to set DEST equal to the constant SOURCE as a series of 9378 lis, ori and shl instructions and return TRUE. */ 9379 9380bool 9381rs6000_emit_set_const (rtx dest, rtx source) 9382{ 9383 machine_mode mode = GET_MODE (dest); 9384 rtx temp, set; 9385 rtx_insn *insn; 9386 HOST_WIDE_INT c; 9387 9388 gcc_checking_assert (CONST_INT_P (source)); 9389 c = INTVAL (source); 9390 switch (mode) 9391 { 9392 case E_QImode: 9393 case E_HImode: 9394 emit_insn (gen_rtx_SET (dest, source)); 9395 return true; 9396 9397 case E_SImode: 9398 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode); 9399 9400 emit_insn (gen_rtx_SET (copy_rtx (temp), 9401 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff))); 9402 emit_insn (gen_rtx_SET (dest, 9403 gen_rtx_IOR (SImode, copy_rtx (temp), 9404 GEN_INT (c & 0xffff)))); 9405 break; 9406 9407 case E_DImode: 9408 if (!TARGET_POWERPC64) 9409 { 9410 rtx hi, lo; 9411 9412 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0, 9413 DImode); 9414 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, 9415 DImode); 9416 emit_move_insn (hi, GEN_INT (c >> 32)); 9417 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000; 9418 emit_move_insn (lo, GEN_INT (c)); 9419 } 9420 else 9421 rs6000_emit_set_long_const (dest, c); 9422 break; 9423 9424 default: 9425 gcc_unreachable (); 9426 } 9427 9428 insn = get_last_insn (); 9429 set = single_set (insn); 9430 if (! CONSTANT_P (SET_SRC (set))) 9431 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c)); 9432 9433 return true; 9434} 9435 9436/* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. 9437 Output insns to set DEST equal to the constant C as a series of 9438 lis, ori and shl instructions. */ 9439 9440static void 9441rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) 9442{ 9443 rtx temp; 9444 HOST_WIDE_INT ud1, ud2, ud3, ud4; 9445 9446 ud1 = c & 0xffff; 9447 c = c >> 16; 9448 ud2 = c & 0xffff; 9449 c = c >> 16; 9450 ud3 = c & 0xffff; 9451 c = c >> 16; 9452 ud4 = c & 0xffff; 9453 9454 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) 9455 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) 9456 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); 9457 9458 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) 9459 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) 9460 { 9461 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 9462 9463 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 9464 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); 9465 if (ud1 != 0) 9466 emit_move_insn (dest, 9467 gen_rtx_IOR (DImode, copy_rtx (temp), 9468 GEN_INT (ud1))); 9469 } 9470 else if (ud3 == 0 && ud4 == 0) 9471 { 9472 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 9473 9474 gcc_assert (ud2 & 0x8000); 9475 emit_move_insn (copy_rtx (temp), 9476 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); 9477 if (ud1 != 0) 9478 emit_move_insn (copy_rtx (temp), 9479 gen_rtx_IOR (DImode, copy_rtx (temp), 9480 GEN_INT (ud1))); 9481 emit_move_insn (dest, 9482 gen_rtx_ZERO_EXTEND (DImode, 9483 gen_lowpart (SImode, 9484 copy_rtx (temp)))); 9485 } 9486 else if (ud1 == ud3 && ud2 == ud4) 9487 { 9488 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 9489 HOST_WIDE_INT num = (ud2 << 16) | ud1; 9490 rs6000_emit_set_long_const (temp, (num ^ 0x80000000) - 0x80000000); 9491 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); 9492 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); 9493 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); 9494 } 9495 else if ((ud4 == 0xffff && (ud3 & 0x8000)) 9496 || (ud4 == 0 && ! (ud3 & 0x8000))) 9497 { 9498 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 9499 9500 emit_move_insn (copy_rtx (temp), 9501 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); 9502 if (ud2 != 0) 9503 emit_move_insn (copy_rtx (temp), 9504 gen_rtx_IOR (DImode, copy_rtx (temp), 9505 GEN_INT (ud2))); 9506 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 9507 gen_rtx_ASHIFT (DImode, copy_rtx (temp), 9508 GEN_INT (16))); 9509 if (ud1 != 0) 9510 emit_move_insn (dest, 9511 gen_rtx_IOR (DImode, copy_rtx (temp), 9512 GEN_INT (ud1))); 9513 } 9514 else 9515 { 9516 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); 9517 9518 emit_move_insn (copy_rtx (temp), 9519 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); 9520 if (ud3 != 0) 9521 emit_move_insn (copy_rtx (temp), 9522 gen_rtx_IOR (DImode, copy_rtx (temp), 9523 GEN_INT (ud3))); 9524 9525 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest, 9526 gen_rtx_ASHIFT (DImode, copy_rtx (temp), 9527 GEN_INT (32))); 9528 if (ud2 != 0) 9529 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, 9530 gen_rtx_IOR (DImode, copy_rtx (temp), 9531 GEN_INT (ud2 << 16))); 9532 if (ud1 != 0) 9533 emit_move_insn (dest, 9534 gen_rtx_IOR (DImode, copy_rtx (temp), 9535 GEN_INT (ud1))); 9536 } 9537} 9538 9539/* Helper for the following. Get rid of [r+r] memory refs 9540 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */ 9541 9542static void 9543rs6000_eliminate_indexed_memrefs (rtx operands[2]) 9544{ 9545 if (MEM_P (operands[0]) 9546 && !REG_P (XEXP (operands[0], 0)) 9547 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), 9548 GET_MODE (operands[0]), false)) 9549 operands[0] 9550 = replace_equiv_address (operands[0], 9551 copy_addr_to_reg (XEXP (operands[0], 0))); 9552 9553 if (MEM_P (operands[1]) 9554 && !REG_P (XEXP (operands[1], 0)) 9555 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0), 9556 GET_MODE (operands[1]), false)) 9557 operands[1] 9558 = replace_equiv_address (operands[1], 9559 copy_addr_to_reg (XEXP (operands[1], 0))); 9560} 9561 9562/* Generate a vector of constants to permute MODE for a little-endian 9563 storage operation by swapping the two halves of a vector. */ 9564static rtvec 9565rs6000_const_vec (machine_mode mode) 9566{ 9567 int i, subparts; 9568 rtvec v; 9569 9570 switch (mode) 9571 { 9572 case E_V1TImode: 9573 subparts = 1; 9574 break; 9575 case E_V2DFmode: 9576 case E_V2DImode: 9577 subparts = 2; 9578 break; 9579 case E_V4SFmode: 9580 case E_V4SImode: 9581 subparts = 4; 9582 break; 9583 case E_V8HImode: 9584 subparts = 8; 9585 break; 9586 case E_V16QImode: 9587 subparts = 16; 9588 break; 9589 default: 9590 gcc_unreachable(); 9591 } 9592 9593 v = rtvec_alloc (subparts); 9594 9595 for (i = 0; i < subparts / 2; ++i) 9596 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); 9597 for (i = subparts / 2; i < subparts; ++i) 9598 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); 9599 9600 return v; 9601} 9602 9603/* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or 9604 store operation. */ 9605void 9606rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode) 9607{ 9608 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode)); 9609 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode)); 9610 9611 /* Scalar permutations are easier to express in integer modes rather than 9612 floating-point modes, so cast them here. We use V1TImode instead 9613 of TImode to ensure that the values don't go through GPRs. */ 9614 if (FLOAT128_VECTOR_P (mode)) 9615 { 9616 dest = gen_lowpart (V1TImode, dest); 9617 source = gen_lowpart (V1TImode, source); 9618 mode = V1TImode; 9619 } 9620 9621 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single 9622 scalar. */ 9623 if (mode == TImode || mode == V1TImode) 9624 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source, 9625 GEN_INT (64)))); 9626 else 9627 { 9628 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); 9629 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par))); 9630 } 9631} 9632 9633/* Emit a little-endian load from vector memory location SOURCE to VSX 9634 register DEST in mode MODE. The load is done with two permuting 9635 insn's that represent an lxvd2x and xxpermdi. */ 9636void 9637rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) 9638{ 9639 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, 9640 V1TImode). */ 9641 if (mode == TImode || mode == V1TImode) 9642 { 9643 mode = V2DImode; 9644 dest = gen_lowpart (V2DImode, dest); 9645 source = adjust_address (source, V2DImode, 0); 9646 } 9647 9648 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; 9649 rs6000_emit_le_vsx_permute (tmp, source, mode); 9650 rs6000_emit_le_vsx_permute (dest, tmp, mode); 9651} 9652 9653/* Emit a little-endian store to vector memory location DEST from VSX 9654 register SOURCE in mode MODE. The store is done with two permuting 9655 insn's that represent an xxpermdi and an stxvd2x. */ 9656void 9657rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) 9658{ 9659 /* This should never be called during or after LRA, because it does 9660 not re-permute the source register. It is intended only for use 9661 during expand. */ 9662 gcc_assert (!lra_in_progress && !reload_completed); 9663 9664 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, 9665 V1TImode). */ 9666 if (mode == TImode || mode == V1TImode) 9667 { 9668 mode = V2DImode; 9669 dest = adjust_address (dest, V2DImode, 0); 9670 source = gen_lowpart (V2DImode, source); 9671 } 9672 9673 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; 9674 rs6000_emit_le_vsx_permute (tmp, source, mode); 9675 rs6000_emit_le_vsx_permute (dest, tmp, mode); 9676} 9677 9678/* Emit a sequence representing a little-endian VSX load or store, 9679 moving data from SOURCE to DEST in mode MODE. This is done 9680 separately from rs6000_emit_move to ensure it is called only 9681 during expand. LE VSX loads and stores introduced later are 9682 handled with a split. The expand-time RTL generation allows 9683 us to optimize away redundant pairs of register-permutes. */ 9684void 9685rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) 9686{ 9687 gcc_assert (!BYTES_BIG_ENDIAN 9688 && VECTOR_MEM_VSX_P (mode) 9689 && !TARGET_P9_VECTOR 9690 && !gpr_or_gpr_p (dest, source) 9691 && (MEM_P (source) ^ MEM_P (dest))); 9692 9693 if (MEM_P (source)) 9694 { 9695 gcc_assert (REG_P (dest) || SUBREG_P (dest)); 9696 rs6000_emit_le_vsx_load (dest, source, mode); 9697 } 9698 else 9699 { 9700 if (!REG_P (source)) 9701 source = force_reg (mode, source); 9702 rs6000_emit_le_vsx_store (dest, source, mode); 9703 } 9704} 9705 9706/* Return whether a SFmode or SImode move can be done without converting one 9707 mode to another. This arrises when we have: 9708 9709 (SUBREG:SF (REG:SI ...)) 9710 (SUBREG:SI (REG:SF ...)) 9711 9712 and one of the values is in a floating point/vector register, where SFmode 9713 scalars are stored in DFmode format. */ 9714 9715bool 9716valid_sf_si_move (rtx dest, rtx src, machine_mode mode) 9717{ 9718 if (TARGET_ALLOW_SF_SUBREG) 9719 return true; 9720 9721 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) 9722 return true; 9723 9724 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) 9725 return true; 9726 9727 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ 9728 if (SUBREG_P (dest)) 9729 { 9730 rtx dest_subreg = SUBREG_REG (dest); 9731 rtx src_subreg = SUBREG_REG (src); 9732 return GET_MODE (dest_subreg) == GET_MODE (src_subreg); 9733 } 9734 9735 return false; 9736} 9737 9738 9739/* Helper function to change moves with: 9740 9741 (SUBREG:SF (REG:SI)) and 9742 (SUBREG:SI (REG:SF)) 9743 9744 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode 9745 values are stored as DFmode values in the VSX registers. We need to convert 9746 the bits before we can use a direct move or operate on the bits in the 9747 vector register as an integer type. 9748 9749 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ 9750 9751static bool 9752rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) 9753{ 9754 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed 9755 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) 9756 && SUBREG_P (source) && sf_subreg_operand (source, mode)) 9757 { 9758 rtx inner_source = SUBREG_REG (source); 9759 machine_mode inner_mode = GET_MODE (inner_source); 9760 9761 if (mode == SImode && inner_mode == SFmode) 9762 { 9763 emit_insn (gen_movsi_from_sf (dest, inner_source)); 9764 return true; 9765 } 9766 9767 if (mode == SFmode && inner_mode == SImode) 9768 { 9769 emit_insn (gen_movsf_from_si (dest, inner_source)); 9770 return true; 9771 } 9772 } 9773 9774 return false; 9775} 9776 9777/* Emit a move from SOURCE to DEST in mode MODE. */ 9778void 9779rs6000_emit_move (rtx dest, rtx source, machine_mode mode) 9780{ 9781 rtx operands[2]; 9782 operands[0] = dest; 9783 operands[1] = source; 9784 9785 if (TARGET_DEBUG_ADDR) 9786 { 9787 fprintf (stderr, 9788 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, " 9789 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", 9790 GET_MODE_NAME (mode), 9791 lra_in_progress, 9792 reload_completed, 9793 can_create_pseudo_p ()); 9794 debug_rtx (dest); 9795 fprintf (stderr, "source:\n"); 9796 debug_rtx (source); 9797 } 9798 9799 /* Check that we get CONST_WIDE_INT only when we should. */ 9800 if (CONST_WIDE_INT_P (operands[1]) 9801 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) 9802 gcc_unreachable (); 9803 9804#ifdef HAVE_AS_GNU_ATTRIBUTE 9805 /* If we use a long double type, set the flags in .gnu_attribute that say 9806 what the long double type is. This is to allow the linker's warning 9807 message for the wrong long double to be useful, even if the function does 9808 not do a call (for example, doing a 128-bit add on power9 if the long 9809 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are 9810 used if they aren't the default long dobule type. */ 9811 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)) 9812 { 9813 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode)) 9814 rs6000_passes_float = rs6000_passes_long_double = true; 9815 9816 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode)) 9817 rs6000_passes_float = rs6000_passes_long_double = true; 9818 } 9819#endif 9820 9821 /* See if we need to special case SImode/SFmode SUBREG moves. */ 9822 if ((mode == SImode || mode == SFmode) && SUBREG_P (source) 9823 && rs6000_emit_move_si_sf_subreg (dest, source, mode)) 9824 return; 9825 9826 /* Check if GCC is setting up a block move that will end up using FP 9827 registers as temporaries. We must make sure this is acceptable. */ 9828 if (MEM_P (operands[0]) 9829 && MEM_P (operands[1]) 9830 && mode == DImode 9831 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) 9832 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) 9833 && ! (rs6000_slow_unaligned_access (SImode, 9834 (MEM_ALIGN (operands[0]) > 32 9835 ? 32 : MEM_ALIGN (operands[0]))) 9836 || rs6000_slow_unaligned_access (SImode, 9837 (MEM_ALIGN (operands[1]) > 32 9838 ? 32 : MEM_ALIGN (operands[1])))) 9839 && ! MEM_VOLATILE_P (operands [0]) 9840 && ! MEM_VOLATILE_P (operands [1])) 9841 { 9842 emit_move_insn (adjust_address (operands[0], SImode, 0), 9843 adjust_address (operands[1], SImode, 0)); 9844 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), 9845 adjust_address (copy_rtx (operands[1]), SImode, 4)); 9846 return; 9847 } 9848 9849 if (can_create_pseudo_p () && MEM_P (operands[0]) 9850 && !gpc_reg_operand (operands[1], mode)) 9851 operands[1] = force_reg (mode, operands[1]); 9852 9853 /* Recognize the case where operand[1] is a reference to thread-local 9854 data and load its address to a register. */ 9855 if (tls_referenced_p (operands[1])) 9856 { 9857 enum tls_model model; 9858 rtx tmp = operands[1]; 9859 rtx addend = NULL; 9860 9861 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) 9862 { 9863 addend = XEXP (XEXP (tmp, 0), 1); 9864 tmp = XEXP (XEXP (tmp, 0), 0); 9865 } 9866 9867 gcc_assert (SYMBOL_REF_P (tmp)); 9868 model = SYMBOL_REF_TLS_MODEL (tmp); 9869 gcc_assert (model != 0); 9870 9871 tmp = rs6000_legitimize_tls_address (tmp, model); 9872 if (addend) 9873 { 9874 tmp = gen_rtx_PLUS (mode, tmp, addend); 9875 tmp = force_operand (tmp, operands[0]); 9876 } 9877 operands[1] = tmp; 9878 } 9879 9880 /* 128-bit constant floating-point values on Darwin should really be loaded 9881 as two parts. However, this premature splitting is a problem when DFmode 9882 values can go into Altivec registers. */ 9883 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode) 9884 && !reg_addr[DFmode].scalar_in_vmx_p) 9885 { 9886 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), 9887 simplify_gen_subreg (DFmode, operands[1], mode, 0), 9888 DFmode); 9889 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 9890 GET_MODE_SIZE (DFmode)), 9891 simplify_gen_subreg (DFmode, operands[1], mode, 9892 GET_MODE_SIZE (DFmode)), 9893 DFmode); 9894 return; 9895 } 9896 9897 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), 9898 p1:SD) if p1 is not of floating point class and p0 is spilled as 9899 we can have no analogous movsd_store for this. */ 9900 if (lra_in_progress && mode == DDmode 9901 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0]) 9902 && reg_preferred_class (REGNO (operands[0])) == NO_REGS 9903 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])) 9904 && GET_MODE (SUBREG_REG (operands[1])) == SDmode) 9905 { 9906 enum reg_class cl; 9907 int regno = REGNO (SUBREG_REG (operands[1])); 9908 9909 if (!HARD_REGISTER_NUM_P (regno)) 9910 { 9911 cl = reg_preferred_class (regno); 9912 regno = reg_renumber[regno]; 9913 if (regno < 0) 9914 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; 9915 } 9916 if (regno >= 0 && ! FP_REGNO_P (regno)) 9917 { 9918 mode = SDmode; 9919 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); 9920 operands[1] = SUBREG_REG (operands[1]); 9921 } 9922 } 9923 if (lra_in_progress 9924 && mode == SDmode 9925 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0]) 9926 && reg_preferred_class (REGNO (operands[0])) == NO_REGS 9927 && (REG_P (operands[1]) 9928 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))))) 9929 { 9930 int regno = reg_or_subregno (operands[1]); 9931 enum reg_class cl; 9932 9933 if (!HARD_REGISTER_NUM_P (regno)) 9934 { 9935 cl = reg_preferred_class (regno); 9936 gcc_assert (cl != NO_REGS); 9937 regno = reg_renumber[regno]; 9938 if (regno < 0) 9939 regno = ira_class_hard_regs[cl][0]; 9940 } 9941 if (FP_REGNO_P (regno)) 9942 { 9943 if (GET_MODE (operands[0]) != DDmode) 9944 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); 9945 emit_insn (gen_movsd_store (operands[0], operands[1])); 9946 } 9947 else if (INT_REGNO_P (regno)) 9948 emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); 9949 else 9950 gcc_unreachable(); 9951 return; 9952 } 9953 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD 9954 p:DD)) if p0 is not of floating point class and p1 is spilled as 9955 we can have no analogous movsd_load for this. */ 9956 if (lra_in_progress && mode == DDmode 9957 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0])) 9958 && GET_MODE (SUBREG_REG (operands[0])) == SDmode 9959 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1]) 9960 && reg_preferred_class (REGNO (operands[1])) == NO_REGS) 9961 { 9962 enum reg_class cl; 9963 int regno = REGNO (SUBREG_REG (operands[0])); 9964 9965 if (!HARD_REGISTER_NUM_P (regno)) 9966 { 9967 cl = reg_preferred_class (regno); 9968 regno = reg_renumber[regno]; 9969 if (regno < 0) 9970 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; 9971 } 9972 if (regno >= 0 && ! FP_REGNO_P (regno)) 9973 { 9974 mode = SDmode; 9975 operands[0] = SUBREG_REG (operands[0]); 9976 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); 9977 } 9978 } 9979 if (lra_in_progress 9980 && mode == SDmode 9981 && (REG_P (operands[0]) 9982 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0])))) 9983 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1]) 9984 && reg_preferred_class (REGNO (operands[1])) == NO_REGS) 9985 { 9986 int regno = reg_or_subregno (operands[0]); 9987 enum reg_class cl; 9988 9989 if (!HARD_REGISTER_NUM_P (regno)) 9990 { 9991 cl = reg_preferred_class (regno); 9992 gcc_assert (cl != NO_REGS); 9993 regno = reg_renumber[regno]; 9994 if (regno < 0) 9995 regno = ira_class_hard_regs[cl][0]; 9996 } 9997 if (FP_REGNO_P (regno)) 9998 { 9999 if (GET_MODE (operands[1]) != DDmode) 10000 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); 10001 emit_insn (gen_movsd_load (operands[0], operands[1])); 10002 } 10003 else if (INT_REGNO_P (regno)) 10004 emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); 10005 else 10006 gcc_unreachable(); 10007 return; 10008 } 10009 10010 /* FIXME: In the long term, this switch statement should go away 10011 and be replaced by a sequence of tests based on things like 10012 mode == Pmode. */ 10013 switch (mode) 10014 { 10015 case E_HImode: 10016 case E_QImode: 10017 if (CONSTANT_P (operands[1]) 10018 && !CONST_INT_P (operands[1])) 10019 operands[1] = force_const_mem (mode, operands[1]); 10020 break; 10021 10022 case E_TFmode: 10023 case E_TDmode: 10024 case E_IFmode: 10025 case E_KFmode: 10026 if (FLOAT128_2REG_P (mode)) 10027 rs6000_eliminate_indexed_memrefs (operands); 10028 /* fall through */ 10029 10030 case E_DFmode: 10031 case E_DDmode: 10032 case E_SFmode: 10033 case E_SDmode: 10034 if (CONSTANT_P (operands[1]) 10035 && ! easy_fp_constant (operands[1], mode)) 10036 operands[1] = force_const_mem (mode, operands[1]); 10037 break; 10038 10039 case E_V16QImode: 10040 case E_V8HImode: 10041 case E_V4SFmode: 10042 case E_V4SImode: 10043 case E_V2DFmode: 10044 case E_V2DImode: 10045 case E_V1TImode: 10046 if (CONSTANT_P (operands[1]) 10047 && !easy_vector_constant (operands[1], mode)) 10048 operands[1] = force_const_mem (mode, operands[1]); 10049 break; 10050 10051 case E_POImode: 10052 case E_PXImode: 10053 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0) 10054 error ("%qs is an opaque type, and you can't set it to other values.", 10055 (mode == POImode) ? "__vector_pair" : "__vector_quad"); 10056 break; 10057 10058 case E_SImode: 10059 case E_DImode: 10060 /* Use default pattern for address of ELF small data */ 10061 if (TARGET_ELF 10062 && mode == Pmode 10063 && DEFAULT_ABI == ABI_V4 10064 && (SYMBOL_REF_P (operands[1]) 10065 || GET_CODE (operands[1]) == CONST) 10066 && small_data_operand (operands[1], mode)) 10067 { 10068 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10069 return; 10070 } 10071 10072 /* Use the default pattern for loading up PC-relative addresses. */ 10073 if (TARGET_PCREL && mode == Pmode 10074 && pcrel_local_or_external_address (operands[1], Pmode)) 10075 { 10076 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10077 return; 10078 } 10079 10080 if (DEFAULT_ABI == ABI_V4 10081 && mode == Pmode && mode == SImode 10082 && flag_pic == 1 && got_operand (operands[1], mode)) 10083 { 10084 emit_insn (gen_movsi_got (operands[0], operands[1])); 10085 return; 10086 } 10087 10088 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN) 10089 && TARGET_NO_TOC_OR_PCREL 10090 && ! flag_pic 10091 && mode == Pmode 10092 && CONSTANT_P (operands[1]) 10093 && GET_CODE (operands[1]) != HIGH 10094 && !CONST_INT_P (operands[1])) 10095 { 10096 rtx target = (!can_create_pseudo_p () 10097 ? operands[0] 10098 : gen_reg_rtx (mode)); 10099 10100 /* If this is a function address on -mcall-aixdesc, 10101 convert it to the address of the descriptor. */ 10102 if (DEFAULT_ABI == ABI_AIX 10103 && SYMBOL_REF_P (operands[1]) 10104 && XSTR (operands[1], 0)[0] == '.') 10105 { 10106 const char *name = XSTR (operands[1], 0); 10107 rtx new_ref; 10108 while (*name == '.') 10109 name++; 10110 new_ref = gen_rtx_SYMBOL_REF (Pmode, name); 10111 CONSTANT_POOL_ADDRESS_P (new_ref) 10112 = CONSTANT_POOL_ADDRESS_P (operands[1]); 10113 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]); 10114 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]); 10115 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]); 10116 operands[1] = new_ref; 10117 } 10118 10119 if (DEFAULT_ABI == ABI_DARWIN) 10120 { 10121#if TARGET_MACHO 10122 /* This is not PIC code, but could require the subset of 10123 indirections used by mdynamic-no-pic. */ 10124 if (MACHO_DYNAMIC_NO_PIC_P) 10125 { 10126 /* Take care of any required data indirection. */ 10127 operands[1] = rs6000_machopic_legitimize_pic_address ( 10128 operands[1], mode, operands[0]); 10129 if (operands[0] != operands[1]) 10130 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10131 return; 10132 } 10133#endif 10134 emit_insn (gen_macho_high (Pmode, target, operands[1])); 10135 emit_insn (gen_macho_low (Pmode, operands[0], 10136 target, operands[1])); 10137 return; 10138 } 10139 10140 emit_insn (gen_elf_high (target, operands[1])); 10141 emit_insn (gen_elf_low (operands[0], target, operands[1])); 10142 return; 10143 } 10144 10145 /* If this is a SYMBOL_REF that refers to a constant pool entry, 10146 and we have put it in the TOC, we just need to make a TOC-relative 10147 reference to it. */ 10148 if (TARGET_TOC 10149 && SYMBOL_REF_P (operands[1]) 10150 && use_toc_relative_ref (operands[1], mode)) 10151 operands[1] = create_TOC_reference (operands[1], operands[0]); 10152 else if (mode == Pmode 10153 && CONSTANT_P (operands[1]) 10154 && GET_CODE (operands[1]) != HIGH 10155 && ((REG_P (operands[0]) 10156 && FP_REGNO_P (REGNO (operands[0]))) 10157 || !CONST_INT_P (operands[1]) 10158 || (num_insns_constant (operands[1], mode) 10159 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2))) 10160 && !toc_relative_expr_p (operands[1], false, NULL, NULL) 10161 && (TARGET_CMODEL == CMODEL_SMALL 10162 || can_create_pseudo_p () 10163 || (REG_P (operands[0]) 10164 && INT_REG_OK_FOR_BASE_P (operands[0], true)))) 10165 { 10166 10167#if TARGET_MACHO 10168 /* Darwin uses a special PIC legitimizer. */ 10169 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT) 10170 { 10171 operands[1] = 10172 rs6000_machopic_legitimize_pic_address (operands[1], mode, 10173 operands[0]); 10174 if (operands[0] != operands[1]) 10175 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10176 return; 10177 } 10178#endif 10179 10180 /* If we are to limit the number of things we put in the TOC and 10181 this is a symbol plus a constant we can add in one insn, 10182 just put the symbol in the TOC and add the constant. */ 10183 if (GET_CODE (operands[1]) == CONST 10184 && TARGET_NO_SUM_IN_TOC 10185 && GET_CODE (XEXP (operands[1], 0)) == PLUS 10186 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) 10187 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF 10188 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0))) 10189 && ! side_effects_p (operands[0])) 10190 { 10191 rtx sym = 10192 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0)); 10193 rtx other = XEXP (XEXP (operands[1], 0), 1); 10194 10195 sym = force_reg (mode, sym); 10196 emit_insn (gen_add3_insn (operands[0], sym, other)); 10197 return; 10198 } 10199 10200 operands[1] = force_const_mem (mode, operands[1]); 10201 10202 if (TARGET_TOC 10203 && SYMBOL_REF_P (XEXP (operands[1], 0)) 10204 && use_toc_relative_ref (XEXP (operands[1], 0), mode)) 10205 { 10206 rtx tocref = create_TOC_reference (XEXP (operands[1], 0), 10207 operands[0]); 10208 operands[1] = gen_const_mem (mode, tocref); 10209 set_mem_alias_set (operands[1], get_TOC_alias_set ()); 10210 } 10211 } 10212 break; 10213 10214 case E_TImode: 10215 if (!VECTOR_MEM_VSX_P (TImode)) 10216 rs6000_eliminate_indexed_memrefs (operands); 10217 break; 10218 10219 case E_PTImode: 10220 rs6000_eliminate_indexed_memrefs (operands); 10221 break; 10222 10223 default: 10224 fatal_insn ("bad move", gen_rtx_SET (dest, source)); 10225 } 10226 10227 /* Above, we may have called force_const_mem which may have returned 10228 an invalid address. If we can, fix this up; otherwise, reload will 10229 have to deal with it. */ 10230 if (MEM_P (operands[1])) 10231 operands[1] = validize_mem (operands[1]); 10232 10233 emit_insn (gen_rtx_SET (operands[0], operands[1])); 10234} 10235 10236 10237/* Set up AIX/Darwin/64-bit Linux quad floating point routines. */ 10238static void 10239init_float128_ibm (machine_mode mode) 10240{ 10241 if (!TARGET_XL_COMPAT) 10242 { 10243 set_optab_libfunc (add_optab, mode, "__gcc_qadd"); 10244 set_optab_libfunc (sub_optab, mode, "__gcc_qsub"); 10245 set_optab_libfunc (smul_optab, mode, "__gcc_qmul"); 10246 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv"); 10247 10248 if (!TARGET_HARD_FLOAT) 10249 { 10250 set_optab_libfunc (neg_optab, mode, "__gcc_qneg"); 10251 set_optab_libfunc (eq_optab, mode, "__gcc_qeq"); 10252 set_optab_libfunc (ne_optab, mode, "__gcc_qne"); 10253 set_optab_libfunc (gt_optab, mode, "__gcc_qgt"); 10254 set_optab_libfunc (ge_optab, mode, "__gcc_qge"); 10255 set_optab_libfunc (lt_optab, mode, "__gcc_qlt"); 10256 set_optab_libfunc (le_optab, mode, "__gcc_qle"); 10257 set_optab_libfunc (unord_optab, mode, "__gcc_qunord"); 10258 10259 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq"); 10260 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq"); 10261 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos"); 10262 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod"); 10263 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi"); 10264 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou"); 10265 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq"); 10266 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq"); 10267 } 10268 } 10269 else 10270 { 10271 set_optab_libfunc (add_optab, mode, "_xlqadd"); 10272 set_optab_libfunc (sub_optab, mode, "_xlqsub"); 10273 set_optab_libfunc (smul_optab, mode, "_xlqmul"); 10274 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv"); 10275 } 10276 10277 /* Add various conversions for IFmode to use the traditional TFmode 10278 names. */ 10279 if (mode == IFmode) 10280 { 10281 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf"); 10282 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf"); 10283 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf"); 10284 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd"); 10285 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd"); 10286 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd"); 10287 10288 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi"); 10289 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi"); 10290 10291 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf"); 10292 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf"); 10293 10294 if (TARGET_POWERPC64) 10295 { 10296 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); 10297 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti"); 10298 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf"); 10299 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf"); 10300 } 10301 } 10302} 10303 10304/* Create a decl for either complex long double multiply or complex long double 10305 divide when long double is IEEE 128-bit floating point. We can't use 10306 __multc3 and __divtc3 because the original long double using IBM extended 10307 double used those names. The complex multiply/divide functions are encoded 10308 as builtin functions with a complex result and 4 scalar inputs. */ 10309 10310static void 10311create_complex_muldiv (const char *name, built_in_function fncode, tree fntype) 10312{ 10313 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL, 10314 name, NULL_TREE); 10315 10316 set_builtin_decl (fncode, fndecl, true); 10317 10318 if (TARGET_DEBUG_BUILTIN) 10319 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode); 10320 10321 return; 10322} 10323 10324/* Set up IEEE 128-bit floating point routines. Use different names if the 10325 arguments can be passed in a vector register. The historical PowerPC 10326 implementation of IEEE 128-bit floating point used _q_<op> for the names, so 10327 continue to use that if we aren't using vector registers to pass IEEE 10328 128-bit floating point. */ 10329 10330static void 10331init_float128_ieee (machine_mode mode) 10332{ 10333 if (FLOAT128_VECTOR_P (mode)) 10334 { 10335 static bool complex_muldiv_init_p = false; 10336 10337 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If 10338 we have clone or target attributes, this will be called a second 10339 time. We want to create the built-in function only once. */ 10340 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p) 10341 { 10342 complex_muldiv_init_p = true; 10343 built_in_function fncode_mul = 10344 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode 10345 - MIN_MODE_COMPLEX_FLOAT); 10346 built_in_function fncode_div = 10347 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode 10348 - MIN_MODE_COMPLEX_FLOAT); 10349 10350 tree fntype = build_function_type_list (complex_long_double_type_node, 10351 long_double_type_node, 10352 long_double_type_node, 10353 long_double_type_node, 10354 long_double_type_node, 10355 NULL_TREE); 10356 10357 create_complex_muldiv ("__mulkc3", fncode_mul, fntype); 10358 create_complex_muldiv ("__divkc3", fncode_div, fntype); 10359 } 10360 10361 set_optab_libfunc (add_optab, mode, "__addkf3"); 10362 set_optab_libfunc (sub_optab, mode, "__subkf3"); 10363 set_optab_libfunc (neg_optab, mode, "__negkf2"); 10364 set_optab_libfunc (smul_optab, mode, "__mulkf3"); 10365 set_optab_libfunc (sdiv_optab, mode, "__divkf3"); 10366 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2"); 10367 set_optab_libfunc (abs_optab, mode, "__abskf2"); 10368 set_optab_libfunc (powi_optab, mode, "__powikf2"); 10369 10370 set_optab_libfunc (eq_optab, mode, "__eqkf2"); 10371 set_optab_libfunc (ne_optab, mode, "__nekf2"); 10372 set_optab_libfunc (gt_optab, mode, "__gtkf2"); 10373 set_optab_libfunc (ge_optab, mode, "__gekf2"); 10374 set_optab_libfunc (lt_optab, mode, "__ltkf2"); 10375 set_optab_libfunc (le_optab, mode, "__lekf2"); 10376 set_optab_libfunc (unord_optab, mode, "__unordkf2"); 10377 10378 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2"); 10379 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2"); 10380 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2"); 10381 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2"); 10382 10383 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2"); 10384 if (mode != TFmode && FLOAT128_IBM_P (TFmode)) 10385 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2"); 10386 10387 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2"); 10388 if (mode != TFmode && FLOAT128_IBM_P (TFmode)) 10389 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2"); 10390 10391 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf"); 10392 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf"); 10393 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf"); 10394 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd"); 10395 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd"); 10396 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd"); 10397 10398 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi"); 10399 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi"); 10400 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi"); 10401 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi"); 10402 10403 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf"); 10404 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf"); 10405 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf"); 10406 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf"); 10407 10408 if (TARGET_POWERPC64) 10409 { 10410 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti"); 10411 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti"); 10412 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf"); 10413 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf"); 10414 } 10415 } 10416 10417 else 10418 { 10419 set_optab_libfunc (add_optab, mode, "_q_add"); 10420 set_optab_libfunc (sub_optab, mode, "_q_sub"); 10421 set_optab_libfunc (neg_optab, mode, "_q_neg"); 10422 set_optab_libfunc (smul_optab, mode, "_q_mul"); 10423 set_optab_libfunc (sdiv_optab, mode, "_q_div"); 10424 if (TARGET_PPC_GPOPT) 10425 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt"); 10426 10427 set_optab_libfunc (eq_optab, mode, "_q_feq"); 10428 set_optab_libfunc (ne_optab, mode, "_q_fne"); 10429 set_optab_libfunc (gt_optab, mode, "_q_fgt"); 10430 set_optab_libfunc (ge_optab, mode, "_q_fge"); 10431 set_optab_libfunc (lt_optab, mode, "_q_flt"); 10432 set_optab_libfunc (le_optab, mode, "_q_fle"); 10433 10434 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq"); 10435 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq"); 10436 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos"); 10437 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod"); 10438 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi"); 10439 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou"); 10440 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq"); 10441 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq"); 10442 } 10443} 10444 10445static void 10446rs6000_init_libfuncs (void) 10447{ 10448 /* __float128 support. */ 10449 if (TARGET_FLOAT128_TYPE) 10450 { 10451 init_float128_ibm (IFmode); 10452 init_float128_ieee (KFmode); 10453 } 10454 10455 /* AIX/Darwin/64-bit Linux quad floating point routines. */ 10456 if (TARGET_LONG_DOUBLE_128) 10457 { 10458 if (!TARGET_IEEEQUAD) 10459 init_float128_ibm (TFmode); 10460 10461 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */ 10462 else 10463 init_float128_ieee (TFmode); 10464 } 10465} 10466 10467/* Emit a potentially record-form instruction, setting DST from SRC. 10468 If DOT is 0, that is all; otherwise, set CCREG to the result of the 10469 signed comparison of DST with zero. If DOT is 1, the generated RTL 10470 doesn't care about the DST result; if DOT is 2, it does. If CCREG 10471 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and 10472 a separate COMPARE. */ 10473 10474void 10475rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) 10476{ 10477 if (dot == 0) 10478 { 10479 emit_move_insn (dst, src); 10480 return; 10481 } 10482 10483 if (cc_reg_not_cr0_operand (ccreg, CCmode)) 10484 { 10485 emit_move_insn (dst, src); 10486 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx)); 10487 return; 10488 } 10489 10490 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx)); 10491 if (dot == 1) 10492 { 10493 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst); 10494 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber))); 10495 } 10496 else 10497 { 10498 rtx set = gen_rtx_SET (dst, src); 10499 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set))); 10500 } 10501} 10502 10503 10504/* A validation routine: say whether CODE, a condition code, and MODE 10505 match. The other alternatives either don't make sense or should 10506 never be generated. */ 10507 10508void 10509validate_condition_mode (enum rtx_code code, machine_mode mode) 10510{ 10511 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE 10512 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE) 10513 && GET_MODE_CLASS (mode) == MODE_CC); 10514 10515 /* These don't make sense. */ 10516 gcc_assert ((code != GT && code != LT && code != GE && code != LE) 10517 || mode != CCUNSmode); 10518 10519 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU) 10520 || mode == CCUNSmode); 10521 10522 gcc_assert (mode == CCFPmode 10523 || (code != ORDERED && code != UNORDERED 10524 && code != UNEQ && code != LTGT 10525 && code != UNGT && code != UNLT 10526 && code != UNGE && code != UNLE)); 10527 10528 /* These are invalid; the information is not there. */ 10529 gcc_assert (mode != CCEQmode || code == EQ || code == NE); 10530} 10531 10532 10533/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, 10534 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is 10535 not zero, store there the bit offset (counted from the right) where 10536 the single stretch of 1 bits begins; and similarly for B, the bit 10537 offset where it ends. */ 10538 10539bool 10540rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode) 10541{ 10542 unsigned HOST_WIDE_INT val = INTVAL (mask); 10543 unsigned HOST_WIDE_INT bit; 10544 int nb, ne; 10545 int n = GET_MODE_PRECISION (mode); 10546 10547 if (mode != DImode && mode != SImode) 10548 return false; 10549 10550 if (INTVAL (mask) >= 0) 10551 { 10552 bit = val & -val; 10553 ne = exact_log2 (bit); 10554 nb = exact_log2 (val + bit); 10555 } 10556 else if (val + 1 == 0) 10557 { 10558 nb = n; 10559 ne = 0; 10560 } 10561 else if (val & 1) 10562 { 10563 val = ~val; 10564 bit = val & -val; 10565 nb = exact_log2 (bit); 10566 ne = exact_log2 (val + bit); 10567 } 10568 else 10569 { 10570 bit = val & -val; 10571 ne = exact_log2 (bit); 10572 if (val + bit == 0) 10573 nb = n; 10574 else 10575 nb = 0; 10576 } 10577 10578 nb--; 10579 10580 if (nb < 0 || ne < 0 || nb >= n || ne >= n) 10581 return false; 10582 10583 if (b) 10584 *b = nb; 10585 if (e) 10586 *e = ne; 10587 10588 return true; 10589} 10590 10591/* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl, 10592 or rldicr instruction, to implement an AND with it in mode MODE. */ 10593 10594bool 10595rs6000_is_valid_and_mask (rtx mask, machine_mode mode) 10596{ 10597 int nb, ne; 10598 10599 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 10600 return false; 10601 10602 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that 10603 does not wrap. */ 10604 if (mode == DImode) 10605 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb)); 10606 10607 /* For SImode, rlwinm can do everything. */ 10608 if (mode == SImode) 10609 return (nb < 32 && ne < 32); 10610 10611 return false; 10612} 10613 10614/* Return the instruction template for an AND with mask in mode MODE, with 10615 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 10616 10617const char * 10618rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot) 10619{ 10620 int nb, ne; 10621 10622 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode)) 10623 gcc_unreachable (); 10624 10625 if (mode == DImode && ne == 0) 10626 { 10627 operands[3] = GEN_INT (63 - nb); 10628 if (dot) 10629 return "rldicl. %0,%1,0,%3"; 10630 return "rldicl %0,%1,0,%3"; 10631 } 10632 10633 if (mode == DImode && nb == 63) 10634 { 10635 operands[3] = GEN_INT (63 - ne); 10636 if (dot) 10637 return "rldicr. %0,%1,0,%3"; 10638 return "rldicr %0,%1,0,%3"; 10639 } 10640 10641 if (nb < 32 && ne < 32) 10642 { 10643 operands[3] = GEN_INT (31 - nb); 10644 operands[4] = GEN_INT (31 - ne); 10645 if (dot) 10646 return "rlwinm. %0,%1,0,%3,%4"; 10647 return "rlwinm %0,%1,0,%3,%4"; 10648 } 10649 10650 gcc_unreachable (); 10651} 10652 10653/* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm, 10654 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with 10655 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */ 10656 10657bool 10658rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode) 10659{ 10660 int nb, ne; 10661 10662 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 10663 return false; 10664 10665 int n = GET_MODE_PRECISION (mode); 10666 int sh = -1; 10667 10668 if (CONST_INT_P (XEXP (shift, 1))) 10669 { 10670 sh = INTVAL (XEXP (shift, 1)); 10671 if (sh < 0 || sh >= n) 10672 return false; 10673 } 10674 10675 rtx_code code = GET_CODE (shift); 10676 10677 /* Convert any shift by 0 to a rotate, to simplify below code. */ 10678 if (sh == 0) 10679 code = ROTATE; 10680 10681 /* Convert rotate to simple shift if we can, to make analysis simpler. */ 10682 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) 10683 code = ASHIFT; 10684 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) 10685 { 10686 code = LSHIFTRT; 10687 sh = n - sh; 10688 } 10689 10690 /* DImode rotates need rld*. */ 10691 if (mode == DImode && code == ROTATE) 10692 return (nb == 63 || ne == 0 || ne == sh); 10693 10694 /* SImode rotates need rlw*. */ 10695 if (mode == SImode && code == ROTATE) 10696 return (nb < 32 && ne < 32 && sh < 32); 10697 10698 /* Wrap-around masks are only okay for rotates. */ 10699 if (ne > nb) 10700 return false; 10701 10702 /* Variable shifts are only okay for rotates. */ 10703 if (sh < 0) 10704 return false; 10705 10706 /* Don't allow ASHIFT if the mask is wrong for that. */ 10707 if (code == ASHIFT && ne < sh) 10708 return false; 10709 10710 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT 10711 if the mask is wrong for that. */ 10712 if (nb < 32 && ne < 32 && sh < 32 10713 && !(code == LSHIFTRT && nb >= 32 - sh)) 10714 return true; 10715 10716 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT 10717 if the mask is wrong for that. */ 10718 if (code == LSHIFTRT) 10719 sh = 64 - sh; 10720 if (nb == 63 || ne == 0 || ne == sh) 10721 return !(code == LSHIFTRT && nb >= sh); 10722 10723 return false; 10724} 10725 10726/* Return the instruction template for a shift with mask in mode MODE, with 10727 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 10728 10729const char * 10730rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot) 10731{ 10732 int nb, ne; 10733 10734 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) 10735 gcc_unreachable (); 10736 10737 if (mode == DImode && ne == 0) 10738 { 10739 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 10740 operands[2] = GEN_INT (64 - INTVAL (operands[2])); 10741 operands[3] = GEN_INT (63 - nb); 10742 if (dot) 10743 return "rld%I2cl. %0,%1,%2,%3"; 10744 return "rld%I2cl %0,%1,%2,%3"; 10745 } 10746 10747 if (mode == DImode && nb == 63) 10748 { 10749 operands[3] = GEN_INT (63 - ne); 10750 if (dot) 10751 return "rld%I2cr. %0,%1,%2,%3"; 10752 return "rld%I2cr %0,%1,%2,%3"; 10753 } 10754 10755 if (mode == DImode 10756 && GET_CODE (operands[4]) != LSHIFTRT 10757 && CONST_INT_P (operands[2]) 10758 && ne == INTVAL (operands[2])) 10759 { 10760 operands[3] = GEN_INT (63 - nb); 10761 if (dot) 10762 return "rld%I2c. %0,%1,%2,%3"; 10763 return "rld%I2c %0,%1,%2,%3"; 10764 } 10765 10766 if (nb < 32 && ne < 32) 10767 { 10768 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 10769 operands[2] = GEN_INT (32 - INTVAL (operands[2])); 10770 operands[3] = GEN_INT (31 - nb); 10771 operands[4] = GEN_INT (31 - ne); 10772 /* This insn can also be a 64-bit rotate with mask that really makes 10773 it just a shift right (with mask); the %h below are to adjust for 10774 that situation (shift count is >= 32 in that case). */ 10775 if (dot) 10776 return "rlw%I2nm. %0,%1,%h2,%3,%4"; 10777 return "rlw%I2nm %0,%1,%h2,%3,%4"; 10778 } 10779 10780 gcc_unreachable (); 10781} 10782 10783/* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or 10784 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE, 10785 ASHIFT, or LSHIFTRT) in mode MODE. */ 10786 10787bool 10788rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode) 10789{ 10790 int nb, ne; 10791 10792 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) 10793 return false; 10794 10795 int n = GET_MODE_PRECISION (mode); 10796 10797 int sh = INTVAL (XEXP (shift, 1)); 10798 if (sh < 0 || sh >= n) 10799 return false; 10800 10801 rtx_code code = GET_CODE (shift); 10802 10803 /* Convert any shift by 0 to a rotate, to simplify below code. */ 10804 if (sh == 0) 10805 code = ROTATE; 10806 10807 /* Convert rotate to simple shift if we can, to make analysis simpler. */ 10808 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) 10809 code = ASHIFT; 10810 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) 10811 { 10812 code = LSHIFTRT; 10813 sh = n - sh; 10814 } 10815 10816 /* DImode rotates need rldimi. */ 10817 if (mode == DImode && code == ROTATE) 10818 return (ne == sh); 10819 10820 /* SImode rotates need rlwimi. */ 10821 if (mode == SImode && code == ROTATE) 10822 return (nb < 32 && ne < 32 && sh < 32); 10823 10824 /* Wrap-around masks are only okay for rotates. */ 10825 if (ne > nb) 10826 return false; 10827 10828 /* Don't allow ASHIFT if the mask is wrong for that. */ 10829 if (code == ASHIFT && ne < sh) 10830 return false; 10831 10832 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT 10833 if the mask is wrong for that. */ 10834 if (nb < 32 && ne < 32 && sh < 32 10835 && !(code == LSHIFTRT && nb >= 32 - sh)) 10836 return true; 10837 10838 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT 10839 if the mask is wrong for that. */ 10840 if (code == LSHIFTRT) 10841 sh = 64 - sh; 10842 if (ne == sh) 10843 return !(code == LSHIFTRT && nb >= sh); 10844 10845 return false; 10846} 10847 10848/* Return the instruction template for an insert with mask in mode MODE, with 10849 operands OPERANDS. If DOT is true, make it a record-form instruction. */ 10850 10851const char * 10852rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot) 10853{ 10854 int nb, ne; 10855 10856 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) 10857 gcc_unreachable (); 10858 10859 /* Prefer rldimi because rlwimi is cracked. */ 10860 if (TARGET_POWERPC64 10861 && (!dot || mode == DImode) 10862 && GET_CODE (operands[4]) != LSHIFTRT 10863 && ne == INTVAL (operands[2])) 10864 { 10865 operands[3] = GEN_INT (63 - nb); 10866 if (dot) 10867 return "rldimi. %0,%1,%2,%3"; 10868 return "rldimi %0,%1,%2,%3"; 10869 } 10870 10871 if (nb < 32 && ne < 32) 10872 { 10873 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) 10874 operands[2] = GEN_INT (32 - INTVAL (operands[2])); 10875 operands[3] = GEN_INT (31 - nb); 10876 operands[4] = GEN_INT (31 - ne); 10877 if (dot) 10878 return "rlwimi. %0,%1,%2,%3,%4"; 10879 return "rlwimi %0,%1,%2,%3,%4"; 10880 } 10881 10882 gcc_unreachable (); 10883} 10884 10885/* Return whether an AND with C (a CONST_INT) in mode MODE can be done 10886 using two machine instructions. */ 10887 10888bool 10889rs6000_is_valid_2insn_and (rtx c, machine_mode mode) 10890{ 10891 /* There are two kinds of AND we can handle with two insns: 10892 1) those we can do with two rl* insn; 10893 2) ori[s];xori[s]. 10894 10895 We do not handle that last case yet. */ 10896 10897 /* If there is just one stretch of ones, we can do it. */ 10898 if (rs6000_is_valid_mask (c, NULL, NULL, mode)) 10899 return true; 10900 10901 /* Otherwise, fill in the lowest "hole"; if we can do the result with 10902 one insn, we can do the whole thing with two. */ 10903 unsigned HOST_WIDE_INT val = INTVAL (c); 10904 unsigned HOST_WIDE_INT bit1 = val & -val; 10905 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; 10906 unsigned HOST_WIDE_INT val1 = (val + bit1) & val; 10907 unsigned HOST_WIDE_INT bit3 = val1 & -val1; 10908 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode); 10909} 10910 10911/* Emit the two insns to do an AND in mode MODE, with operands OPERANDS. 10912 If EXPAND is true, split rotate-and-mask instructions we generate to 10913 their constituent parts as well (this is used during expand); if DOT 10914 is 1, make the last insn a record-form instruction clobbering the 10915 destination GPR and setting the CC reg (from operands[3]); if 2, set 10916 that GPR as well as the CC reg. */ 10917 10918void 10919rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot) 10920{ 10921 gcc_assert (!(expand && dot)); 10922 10923 unsigned HOST_WIDE_INT val = INTVAL (operands[2]); 10924 10925 /* If it is one stretch of ones, it is DImode; shift left, mask, then 10926 shift right. This generates better code than doing the masks without 10927 shifts, or shifting first right and then left. */ 10928 int nb, ne; 10929 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne) 10930 { 10931 gcc_assert (mode == DImode); 10932 10933 int shift = 63 - nb; 10934 if (expand) 10935 { 10936 rtx tmp1 = gen_reg_rtx (DImode); 10937 rtx tmp2 = gen_reg_rtx (DImode); 10938 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift))); 10939 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift))); 10940 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift))); 10941 } 10942 else 10943 { 10944 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift)); 10945 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift)); 10946 emit_move_insn (operands[0], tmp); 10947 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift)); 10948 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 10949 } 10950 return; 10951 } 10952 10953 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1 10954 that does the rest. */ 10955 unsigned HOST_WIDE_INT bit1 = val & -val; 10956 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; 10957 unsigned HOST_WIDE_INT val1 = (val + bit1) & val; 10958 unsigned HOST_WIDE_INT bit3 = val1 & -val1; 10959 10960 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1; 10961 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2; 10962 10963 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode)); 10964 10965 /* Two "no-rotate"-and-mask instructions, for SImode. */ 10966 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode)) 10967 { 10968 gcc_assert (mode == SImode); 10969 10970 rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; 10971 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1)); 10972 emit_move_insn (reg, tmp); 10973 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); 10974 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 10975 return; 10976 } 10977 10978 gcc_assert (mode == DImode); 10979 10980 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm 10981 insns; we have to do the first in SImode, because it wraps. */ 10982 if (mask2 <= 0xffffffff 10983 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode)) 10984 { 10985 rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; 10986 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]), 10987 GEN_INT (mask1)); 10988 rtx reg_low = gen_lowpart (SImode, reg); 10989 emit_move_insn (reg_low, tmp); 10990 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); 10991 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 10992 return; 10993 } 10994 10995 /* Two rld* insns: rotate, clear the hole in the middle (which now is 10996 at the top end), rotate back and clear the other hole. */ 10997 int right = exact_log2 (bit3); 10998 int left = 64 - right; 10999 11000 /* Rotate the mask too. */ 11001 mask1 = (mask1 >> right) | ((bit2 - 1) << left); 11002 11003 if (expand) 11004 { 11005 rtx tmp1 = gen_reg_rtx (DImode); 11006 rtx tmp2 = gen_reg_rtx (DImode); 11007 rtx tmp3 = gen_reg_rtx (DImode); 11008 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left))); 11009 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1))); 11010 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right))); 11011 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2))); 11012 } 11013 else 11014 { 11015 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left)); 11016 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1)); 11017 emit_move_insn (operands[0], tmp); 11018 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right)); 11019 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2)); 11020 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); 11021 } 11022} 11023 11024/* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates 11025 for lfq and stfq insns iff the registers are hard registers. */ 11026 11027int 11028registers_ok_for_quad_peep (rtx reg1, rtx reg2) 11029{ 11030 /* We might have been passed a SUBREG. */ 11031 if (!REG_P (reg1) || !REG_P (reg2)) 11032 return 0; 11033 11034 /* We might have been passed non floating point registers. */ 11035 if (!FP_REGNO_P (REGNO (reg1)) 11036 || !FP_REGNO_P (REGNO (reg2))) 11037 return 0; 11038 11039 return (REGNO (reg1) == REGNO (reg2) - 1); 11040} 11041 11042/* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn. 11043 addr1 and addr2 must be in consecutive memory locations 11044 (addr2 == addr1 + 8). */ 11045 11046int 11047mems_ok_for_quad_peep (rtx mem1, rtx mem2) 11048{ 11049 rtx addr1, addr2; 11050 unsigned int reg1, reg2; 11051 int offset1, offset2; 11052 11053 /* The mems cannot be volatile. */ 11054 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 11055 return 0; 11056 11057 addr1 = XEXP (mem1, 0); 11058 addr2 = XEXP (mem2, 0); 11059 11060 /* Extract an offset (if used) from the first addr. */ 11061 if (GET_CODE (addr1) == PLUS) 11062 { 11063 /* If not a REG, return zero. */ 11064 if (!REG_P (XEXP (addr1, 0))) 11065 return 0; 11066 else 11067 { 11068 reg1 = REGNO (XEXP (addr1, 0)); 11069 /* The offset must be constant! */ 11070 if (!CONST_INT_P (XEXP (addr1, 1))) 11071 return 0; 11072 offset1 = INTVAL (XEXP (addr1, 1)); 11073 } 11074 } 11075 else if (!REG_P (addr1)) 11076 return 0; 11077 else 11078 { 11079 reg1 = REGNO (addr1); 11080 /* This was a simple (mem (reg)) expression. Offset is 0. */ 11081 offset1 = 0; 11082 } 11083 11084 /* And now for the second addr. */ 11085 if (GET_CODE (addr2) == PLUS) 11086 { 11087 /* If not a REG, return zero. */ 11088 if (!REG_P (XEXP (addr2, 0))) 11089 return 0; 11090 else 11091 { 11092 reg2 = REGNO (XEXP (addr2, 0)); 11093 /* The offset must be constant. */ 11094 if (!CONST_INT_P (XEXP (addr2, 1))) 11095 return 0; 11096 offset2 = INTVAL (XEXP (addr2, 1)); 11097 } 11098 } 11099 else if (!REG_P (addr2)) 11100 return 0; 11101 else 11102 { 11103 reg2 = REGNO (addr2); 11104 /* This was a simple (mem (reg)) expression. Offset is 0. */ 11105 offset2 = 0; 11106 } 11107 11108 /* Both of these must have the same base register. */ 11109 if (reg1 != reg2) 11110 return 0; 11111 11112 /* The offset for the second addr must be 8 more than the first addr. */ 11113 if (offset2 != offset1 + 8) 11114 return 0; 11115 11116 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq 11117 instructions. */ 11118 return 1; 11119} 11120 11121/* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we 11122 need to use DDmode, in all other cases we can use the same mode. */ 11123static machine_mode 11124rs6000_secondary_memory_needed_mode (machine_mode mode) 11125{ 11126 if (lra_in_progress && mode == SDmode) 11127 return DDmode; 11128 return mode; 11129} 11130 11131/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work 11132 on traditional floating point registers, and the VMRGOW/VMRGEW instructions 11133 only work on the traditional altivec registers, note if an altivec register 11134 was chosen. */ 11135 11136static enum rs6000_reg_type 11137register_to_reg_type (rtx reg, bool *is_altivec) 11138{ 11139 HOST_WIDE_INT regno; 11140 enum reg_class rclass; 11141 11142 if (SUBREG_P (reg)) 11143 reg = SUBREG_REG (reg); 11144 11145 if (!REG_P (reg)) 11146 return NO_REG_TYPE; 11147 11148 regno = REGNO (reg); 11149 if (!HARD_REGISTER_NUM_P (regno)) 11150 { 11151 if (!lra_in_progress && !reload_completed) 11152 return PSEUDO_REG_TYPE; 11153 11154 regno = true_regnum (reg); 11155 if (regno < 0 || !HARD_REGISTER_NUM_P (regno)) 11156 return PSEUDO_REG_TYPE; 11157 } 11158 11159 gcc_assert (regno >= 0); 11160 11161 if (is_altivec && ALTIVEC_REGNO_P (regno)) 11162 *is_altivec = true; 11163 11164 rclass = rs6000_regno_regclass[regno]; 11165 return reg_class_to_reg_type[(int)rclass]; 11166} 11167 11168/* Helper function to return the cost of adding a TOC entry address. */ 11169 11170static inline int 11171rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) 11172{ 11173 int ret; 11174 11175 if (TARGET_CMODEL != CMODEL_SMALL) 11176 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; 11177 11178 else 11179 ret = (TARGET_MINIMAL_TOC) ? 6 : 3; 11180 11181 return ret; 11182} 11183 11184/* Helper function for rs6000_secondary_reload to determine whether the memory 11185 address (ADDR) with a given register class (RCLASS) and machine mode (MODE) 11186 needs reloading. Return negative if the memory is not handled by the memory 11187 helper functions and to try a different reload method, 0 if no additional 11188 instructions are need, and positive to give the extra cost for the 11189 memory. */ 11190 11191static int 11192rs6000_secondary_reload_memory (rtx addr, 11193 enum reg_class rclass, 11194 machine_mode mode) 11195{ 11196 int extra_cost = 0; 11197 rtx reg, and_arg, plus_arg0, plus_arg1; 11198 addr_mask_type addr_mask; 11199 const char *type = NULL; 11200 const char *fail_msg = NULL; 11201 11202 if (GPR_REG_CLASS_P (rclass)) 11203 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; 11204 11205 else if (rclass == FLOAT_REGS) 11206 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; 11207 11208 else if (rclass == ALTIVEC_REGS) 11209 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; 11210 11211 /* For the combined VSX_REGS, turn off Altivec AND -16. */ 11212 else if (rclass == VSX_REGS) 11213 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] 11214 & ~RELOAD_REG_AND_M16); 11215 11216 /* If the register allocator hasn't made up its mind yet on the register 11217 class to use, settle on defaults to use. */ 11218 else if (rclass == NO_REGS) 11219 { 11220 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY] 11221 & ~RELOAD_REG_AND_M16); 11222 11223 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0) 11224 addr_mask &= ~(RELOAD_REG_INDEXED 11225 | RELOAD_REG_PRE_INCDEC 11226 | RELOAD_REG_PRE_MODIFY); 11227 } 11228 11229 else 11230 addr_mask = 0; 11231 11232 /* If the register isn't valid in this register class, just return now. */ 11233 if ((addr_mask & RELOAD_REG_VALID) == 0) 11234 { 11235 if (TARGET_DEBUG_ADDR) 11236 { 11237 fprintf (stderr, 11238 "rs6000_secondary_reload_memory: mode = %s, class = %s, " 11239 "not valid in class\n", 11240 GET_MODE_NAME (mode), reg_class_names[rclass]); 11241 debug_rtx (addr); 11242 } 11243 11244 return -1; 11245 } 11246 11247 switch (GET_CODE (addr)) 11248 { 11249 /* Does the register class supports auto update forms for this mode? We 11250 don't need a scratch register, since the powerpc only supports 11251 PRE_INC, PRE_DEC, and PRE_MODIFY. */ 11252 case PRE_INC: 11253 case PRE_DEC: 11254 reg = XEXP (addr, 0); 11255 if (!base_reg_operand (addr, GET_MODE (reg))) 11256 { 11257 fail_msg = "no base register #1"; 11258 extra_cost = -1; 11259 } 11260 11261 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) 11262 { 11263 extra_cost = 1; 11264 type = "update"; 11265 } 11266 break; 11267 11268 case PRE_MODIFY: 11269 reg = XEXP (addr, 0); 11270 plus_arg1 = XEXP (addr, 1); 11271 if (!base_reg_operand (reg, GET_MODE (reg)) 11272 || GET_CODE (plus_arg1) != PLUS 11273 || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) 11274 { 11275 fail_msg = "bad PRE_MODIFY"; 11276 extra_cost = -1; 11277 } 11278 11279 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) 11280 { 11281 extra_cost = 1; 11282 type = "update"; 11283 } 11284 break; 11285 11286 /* Do we need to simulate AND -16 to clear the bottom address bits used 11287 in VMX load/stores? Only allow the AND for vector sizes. */ 11288 case AND: 11289 and_arg = XEXP (addr, 0); 11290 if (GET_MODE_SIZE (mode) != 16 11291 || !CONST_INT_P (XEXP (addr, 1)) 11292 || INTVAL (XEXP (addr, 1)) != -16) 11293 { 11294 fail_msg = "bad Altivec AND #1"; 11295 extra_cost = -1; 11296 } 11297 11298 if (rclass != ALTIVEC_REGS) 11299 { 11300 if (legitimate_indirect_address_p (and_arg, false)) 11301 extra_cost = 1; 11302 11303 else if (legitimate_indexed_address_p (and_arg, false)) 11304 extra_cost = 2; 11305 11306 else 11307 { 11308 fail_msg = "bad Altivec AND #2"; 11309 extra_cost = -1; 11310 } 11311 11312 type = "and"; 11313 } 11314 break; 11315 11316 /* If this is an indirect address, make sure it is a base register. */ 11317 case REG: 11318 case SUBREG: 11319 if (!legitimate_indirect_address_p (addr, false)) 11320 { 11321 extra_cost = 1; 11322 type = "move"; 11323 } 11324 break; 11325 11326 /* If this is an indexed address, make sure the register class can handle 11327 indexed addresses for this mode. */ 11328 case PLUS: 11329 plus_arg0 = XEXP (addr, 0); 11330 plus_arg1 = XEXP (addr, 1); 11331 11332 /* (plus (plus (reg) (constant)) (constant)) is generated during 11333 push_reload processing, so handle it now. */ 11334 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) 11335 { 11336 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 11337 { 11338 extra_cost = 1; 11339 type = "offset"; 11340 } 11341 } 11342 11343 /* (plus (plus (reg) (constant)) (reg)) is also generated during 11344 push_reload processing, so handle it now. */ 11345 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) 11346 { 11347 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 11348 { 11349 extra_cost = 1; 11350 type = "indexed #2"; 11351 } 11352 } 11353 11354 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) 11355 { 11356 fail_msg = "no base register #2"; 11357 extra_cost = -1; 11358 } 11359 11360 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) 11361 { 11362 if ((addr_mask & RELOAD_REG_INDEXED) == 0 11363 || !legitimate_indexed_address_p (addr, false)) 11364 { 11365 extra_cost = 1; 11366 type = "indexed"; 11367 } 11368 } 11369 11370 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0 11371 && CONST_INT_P (plus_arg1)) 11372 { 11373 if (!quad_address_offset_p (INTVAL (plus_arg1))) 11374 { 11375 extra_cost = 1; 11376 type = "vector d-form offset"; 11377 } 11378 } 11379 11380 /* Make sure the register class can handle offset addresses. */ 11381 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) 11382 { 11383 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 11384 { 11385 extra_cost = 1; 11386 type = "offset #2"; 11387 } 11388 } 11389 11390 else 11391 { 11392 fail_msg = "bad PLUS"; 11393 extra_cost = -1; 11394 } 11395 11396 break; 11397 11398 case LO_SUM: 11399 /* Quad offsets are restricted and can't handle normal addresses. */ 11400 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 11401 { 11402 extra_cost = -1; 11403 type = "vector d-form lo_sum"; 11404 } 11405 11406 else if (!legitimate_lo_sum_address_p (mode, addr, false)) 11407 { 11408 fail_msg = "bad LO_SUM"; 11409 extra_cost = -1; 11410 } 11411 11412 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 11413 { 11414 extra_cost = 1; 11415 type = "lo_sum"; 11416 } 11417 break; 11418 11419 /* Static addresses need to create a TOC entry. */ 11420 case CONST: 11421 case SYMBOL_REF: 11422 case LABEL_REF: 11423 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 11424 { 11425 extra_cost = -1; 11426 type = "vector d-form lo_sum #2"; 11427 } 11428 11429 else 11430 { 11431 type = "address"; 11432 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); 11433 } 11434 break; 11435 11436 /* TOC references look like offsetable memory. */ 11437 case UNSPEC: 11438 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) 11439 { 11440 fail_msg = "bad UNSPEC"; 11441 extra_cost = -1; 11442 } 11443 11444 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) 11445 { 11446 extra_cost = -1; 11447 type = "vector d-form lo_sum #3"; 11448 } 11449 11450 else if ((addr_mask & RELOAD_REG_OFFSET) == 0) 11451 { 11452 extra_cost = 1; 11453 type = "toc reference"; 11454 } 11455 break; 11456 11457 default: 11458 { 11459 fail_msg = "bad address"; 11460 extra_cost = -1; 11461 } 11462 } 11463 11464 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) 11465 { 11466 if (extra_cost < 0) 11467 fprintf (stderr, 11468 "rs6000_secondary_reload_memory error: mode = %s, " 11469 "class = %s, addr_mask = '%s', %s\n", 11470 GET_MODE_NAME (mode), 11471 reg_class_names[rclass], 11472 rs6000_debug_addr_mask (addr_mask, false), 11473 (fail_msg != NULL) ? fail_msg : "<bad address>"); 11474 11475 else 11476 fprintf (stderr, 11477 "rs6000_secondary_reload_memory: mode = %s, class = %s, " 11478 "addr_mask = '%s', extra cost = %d, %s\n", 11479 GET_MODE_NAME (mode), 11480 reg_class_names[rclass], 11481 rs6000_debug_addr_mask (addr_mask, false), 11482 extra_cost, 11483 (type) ? type : "<none>"); 11484 11485 debug_rtx (addr); 11486 } 11487 11488 return extra_cost; 11489} 11490 11491/* Helper function for rs6000_secondary_reload to return true if a move to a 11492 different register classe is really a simple move. */ 11493 11494static bool 11495rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, 11496 enum rs6000_reg_type from_type, 11497 machine_mode mode) 11498{ 11499 int size = GET_MODE_SIZE (mode); 11500 11501 /* Add support for various direct moves available. In this function, we only 11502 look at cases where we don't need any extra registers, and one or more 11503 simple move insns are issued. Originally small integers are not allowed 11504 in FPR/VSX registers. Single precision binary floating is not a simple 11505 move because we need to convert to the single precision memory layout. 11506 The 4-byte SDmode can be moved. TDmode values are disallowed since they 11507 need special direct move handling, which we do not support yet. */ 11508 if (TARGET_DIRECT_MOVE 11509 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 11510 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) 11511 { 11512 if (TARGET_POWERPC64) 11513 { 11514 /* ISA 2.07: MTVSRD or MVFVSRD. */ 11515 if (size == 8) 11516 return true; 11517 11518 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */ 11519 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode) 11520 return true; 11521 } 11522 11523 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ 11524 if (TARGET_P8_VECTOR) 11525 { 11526 if (mode == SImode) 11527 return true; 11528 11529 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) 11530 return true; 11531 } 11532 11533 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ 11534 if (mode == SDmode) 11535 return true; 11536 } 11537 11538 /* Move to/from SPR. */ 11539 else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) 11540 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) 11541 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) 11542 return true; 11543 11544 return false; 11545} 11546 11547/* Direct move helper function for rs6000_secondary_reload, handle all of the 11548 special direct moves that involve allocating an extra register, return the 11549 insn code of the helper function if there is such a function or 11550 CODE_FOR_nothing if not. */ 11551 11552static bool 11553rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, 11554 enum rs6000_reg_type from_type, 11555 machine_mode mode, 11556 secondary_reload_info *sri, 11557 bool altivec_p) 11558{ 11559 bool ret = false; 11560 enum insn_code icode = CODE_FOR_nothing; 11561 int cost = 0; 11562 int size = GET_MODE_SIZE (mode); 11563 11564 if (TARGET_POWERPC64 && size == 16) 11565 { 11566 /* Handle moving 128-bit values from GPRs to VSX point registers on 11567 ISA 2.07 (power8, power9) when running in 64-bit mode using 11568 XXPERMDI to glue the two 64-bit values back together. */ 11569 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) 11570 { 11571 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ 11572 icode = reg_addr[mode].reload_vsx_gpr; 11573 } 11574 11575 /* Handle moving 128-bit values from VSX point registers to GPRs on 11576 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the 11577 bottom 64-bit value. */ 11578 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 11579 { 11580 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ 11581 icode = reg_addr[mode].reload_gpr_vsx; 11582 } 11583 } 11584 11585 else if (TARGET_POWERPC64 && mode == SFmode) 11586 { 11587 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) 11588 { 11589 cost = 3; /* xscvdpspn, mfvsrd, and. */ 11590 icode = reg_addr[mode].reload_gpr_vsx; 11591 } 11592 11593 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) 11594 { 11595 cost = 2; /* mtvsrz, xscvspdpn. */ 11596 icode = reg_addr[mode].reload_vsx_gpr; 11597 } 11598 } 11599 11600 else if (!TARGET_POWERPC64 && size == 8) 11601 { 11602 /* Handle moving 64-bit values from GPRs to floating point registers on 11603 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two 11604 32-bit values back together. Altivec register classes must be handled 11605 specially since a different instruction is used, and the secondary 11606 reload support requires a single instruction class in the scratch 11607 register constraint. However, right now TFmode is not allowed in 11608 Altivec registers, so the pattern will never match. */ 11609 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) 11610 { 11611 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ 11612 icode = reg_addr[mode].reload_fpr_gpr; 11613 } 11614 } 11615 11616 if (icode != CODE_FOR_nothing) 11617 { 11618 ret = true; 11619 if (sri) 11620 { 11621 sri->icode = icode; 11622 sri->extra_cost = cost; 11623 } 11624 } 11625 11626 return ret; 11627} 11628 11629/* Return whether a move between two register classes can be done either 11630 directly (simple move) or via a pattern that uses a single extra temporary 11631 (using ISA 2.07's direct move in this case. */ 11632 11633static bool 11634rs6000_secondary_reload_move (enum rs6000_reg_type to_type, 11635 enum rs6000_reg_type from_type, 11636 machine_mode mode, 11637 secondary_reload_info *sri, 11638 bool altivec_p) 11639{ 11640 /* Fall back to load/store reloads if either type is not a register. */ 11641 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) 11642 return false; 11643 11644 /* If we haven't allocated registers yet, assume the move can be done for the 11645 standard register types. */ 11646 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) 11647 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) 11648 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) 11649 return true; 11650 11651 /* Moves to the same set of registers is a simple move for non-specialized 11652 registers. */ 11653 if (to_type == from_type && IS_STD_REG_TYPE (to_type)) 11654 return true; 11655 11656 /* Check whether a simple move can be done directly. */ 11657 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) 11658 { 11659 if (sri) 11660 { 11661 sri->icode = CODE_FOR_nothing; 11662 sri->extra_cost = 0; 11663 } 11664 return true; 11665 } 11666 11667 /* Now check if we can do it in a few steps. */ 11668 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, 11669 altivec_p); 11670} 11671 11672/* Inform reload about cases where moving X with a mode MODE to a register in 11673 RCLASS requires an extra scratch or immediate register. Return the class 11674 needed for the immediate register. 11675 11676 For VSX and Altivec, we may need a register to convert sp+offset into 11677 reg+sp. 11678 11679 For misaligned 64-bit gpr loads and stores we need a register to 11680 convert an offset address to indirect. */ 11681 11682static reg_class_t 11683rs6000_secondary_reload (bool in_p, 11684 rtx x, 11685 reg_class_t rclass_i, 11686 machine_mode mode, 11687 secondary_reload_info *sri) 11688{ 11689 enum reg_class rclass = (enum reg_class) rclass_i; 11690 reg_class_t ret = ALL_REGS; 11691 enum insn_code icode; 11692 bool default_p = false; 11693 bool done_p = false; 11694 11695 /* Allow subreg of memory before/during reload. */ 11696 bool memory_p = (MEM_P (x) 11697 || (!reload_completed && SUBREG_P (x) 11698 && MEM_P (SUBREG_REG (x)))); 11699 11700 sri->icode = CODE_FOR_nothing; 11701 sri->t_icode = CODE_FOR_nothing; 11702 sri->extra_cost = 0; 11703 icode = ((in_p) 11704 ? reg_addr[mode].reload_load 11705 : reg_addr[mode].reload_store); 11706 11707 if (REG_P (x) || register_operand (x, mode)) 11708 { 11709 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; 11710 bool altivec_p = (rclass == ALTIVEC_REGS); 11711 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); 11712 11713 if (!in_p) 11714 std::swap (to_type, from_type); 11715 11716 /* Can we do a direct move of some sort? */ 11717 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, 11718 altivec_p)) 11719 { 11720 icode = (enum insn_code)sri->icode; 11721 default_p = false; 11722 done_p = true; 11723 ret = NO_REGS; 11724 } 11725 } 11726 11727 /* Make sure 0.0 is not reloaded or forced into memory. */ 11728 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) 11729 { 11730 ret = NO_REGS; 11731 default_p = false; 11732 done_p = true; 11733 } 11734 11735 /* If this is a scalar floating point value and we want to load it into the 11736 traditional Altivec registers, do it via a move via a traditional floating 11737 point register, unless we have D-form addressing. Also make sure that 11738 non-zero constants use a FPR. */ 11739 if (!done_p && reg_addr[mode].scalar_in_vmx_p 11740 && !mode_supports_vmx_dform (mode) 11741 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) 11742 && (memory_p || CONST_DOUBLE_P (x))) 11743 { 11744 ret = FLOAT_REGS; 11745 default_p = false; 11746 done_p = true; 11747 } 11748 11749 /* Handle reload of load/stores if we have reload helper functions. */ 11750 if (!done_p && icode != CODE_FOR_nothing && memory_p) 11751 { 11752 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, 11753 mode); 11754 11755 if (extra_cost >= 0) 11756 { 11757 done_p = true; 11758 ret = NO_REGS; 11759 if (extra_cost > 0) 11760 { 11761 sri->extra_cost = extra_cost; 11762 sri->icode = icode; 11763 } 11764 } 11765 } 11766 11767 /* Handle unaligned loads and stores of integer registers. */ 11768 if (!done_p && TARGET_POWERPC64 11769 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE 11770 && memory_p 11771 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) 11772 { 11773 rtx addr = XEXP (x, 0); 11774 rtx off = address_offset (addr); 11775 11776 if (off != NULL_RTX) 11777 { 11778 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; 11779 unsigned HOST_WIDE_INT offset = INTVAL (off); 11780 11781 /* We need a secondary reload when our legitimate_address_p 11782 says the address is good (as otherwise the entire address 11783 will be reloaded), and the offset is not a multiple of 11784 four or we have an address wrap. Address wrap will only 11785 occur for LO_SUMs since legitimate_offset_address_p 11786 rejects addresses for 16-byte mems that will wrap. */ 11787 if (GET_CODE (addr) == LO_SUM 11788 ? (1 /* legitimate_address_p allows any offset for lo_sum */ 11789 && ((offset & 3) != 0 11790 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) 11791 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ 11792 && (offset & 3) != 0)) 11793 { 11794 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ 11795 if (in_p) 11796 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load 11797 : CODE_FOR_reload_di_load); 11798 else 11799 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store 11800 : CODE_FOR_reload_di_store); 11801 sri->extra_cost = 2; 11802 ret = NO_REGS; 11803 done_p = true; 11804 } 11805 else 11806 default_p = true; 11807 } 11808 else 11809 default_p = true; 11810 } 11811 11812 if (!done_p && !TARGET_POWERPC64 11813 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE 11814 && memory_p 11815 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) 11816 { 11817 rtx addr = XEXP (x, 0); 11818 rtx off = address_offset (addr); 11819 11820 if (off != NULL_RTX) 11821 { 11822 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; 11823 unsigned HOST_WIDE_INT offset = INTVAL (off); 11824 11825 /* We need a secondary reload when our legitimate_address_p 11826 says the address is good (as otherwise the entire address 11827 will be reloaded), and we have a wrap. 11828 11829 legitimate_lo_sum_address_p allows LO_SUM addresses to 11830 have any offset so test for wrap in the low 16 bits. 11831 11832 legitimate_offset_address_p checks for the range 11833 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] 11834 for mode size of 16. We wrap at [0x7ffc,0x7fff] and 11835 [0x7ff4,0x7fff] respectively, so test for the 11836 intersection of these ranges, [0x7ffc,0x7fff] and 11837 [0x7ff4,0x7ff7] respectively. 11838 11839 Note that the address we see here may have been 11840 manipulated by legitimize_reload_address. */ 11841 if (GET_CODE (addr) == LO_SUM 11842 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra 11843 : offset - (0x8000 - extra) < UNITS_PER_WORD) 11844 { 11845 if (in_p) 11846 sri->icode = CODE_FOR_reload_si_load; 11847 else 11848 sri->icode = CODE_FOR_reload_si_store; 11849 sri->extra_cost = 2; 11850 ret = NO_REGS; 11851 done_p = true; 11852 } 11853 else 11854 default_p = true; 11855 } 11856 else 11857 default_p = true; 11858 } 11859 11860 if (!done_p) 11861 default_p = true; 11862 11863 if (default_p) 11864 ret = default_secondary_reload (in_p, x, rclass, mode, sri); 11865 11866 gcc_assert (ret != ALL_REGS); 11867 11868 if (TARGET_DEBUG_ADDR) 11869 { 11870 fprintf (stderr, 11871 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " 11872 "mode = %s", 11873 reg_class_names[ret], 11874 in_p ? "true" : "false", 11875 reg_class_names[rclass], 11876 GET_MODE_NAME (mode)); 11877 11878 if (reload_completed) 11879 fputs (", after reload", stderr); 11880 11881 if (!done_p) 11882 fputs (", done_p not set", stderr); 11883 11884 if (default_p) 11885 fputs (", default secondary reload", stderr); 11886 11887 if (sri->icode != CODE_FOR_nothing) 11888 fprintf (stderr, ", reload func = %s, extra cost = %d", 11889 insn_data[sri->icode].name, sri->extra_cost); 11890 11891 else if (sri->extra_cost > 0) 11892 fprintf (stderr, ", extra cost = %d", sri->extra_cost); 11893 11894 fputs ("\n", stderr); 11895 debug_rtx (x); 11896 } 11897 11898 return ret; 11899} 11900 11901/* Better tracing for rs6000_secondary_reload_inner. */ 11902 11903static void 11904rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, 11905 bool store_p) 11906{ 11907 rtx set, clobber; 11908 11909 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX); 11910 11911 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line, 11912 store_p ? "store" : "load"); 11913 11914 if (store_p) 11915 set = gen_rtx_SET (mem, reg); 11916 else 11917 set = gen_rtx_SET (reg, mem); 11918 11919 clobber = gen_rtx_CLOBBER (VOIDmode, scratch); 11920 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); 11921} 11922 11923static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) 11924 ATTRIBUTE_NORETURN; 11925 11926static void 11927rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, 11928 bool store_p) 11929{ 11930 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p); 11931 gcc_unreachable (); 11932} 11933 11934/* Fixup reload addresses for values in GPR, FPR, and VMX registers that have 11935 reload helper functions. These were identified in 11936 rs6000_secondary_reload_memory, and if reload decided to use the secondary 11937 reload, it calls the insns: 11938 reload_<RELOAD:mode>_<P:mptrsize>_store 11939 reload_<RELOAD:mode>_<P:mptrsize>_load 11940 11941 which in turn calls this function, to do whatever is necessary to create 11942 valid addresses. */ 11943 11944void 11945rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) 11946{ 11947 int regno = true_regnum (reg); 11948 machine_mode mode = GET_MODE (reg); 11949 addr_mask_type addr_mask; 11950 rtx addr; 11951 rtx new_addr; 11952 rtx op_reg, op0, op1; 11953 rtx and_op; 11954 rtx cc_clobber; 11955 rtvec rv; 11956 11957 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem) 11958 || !base_reg_operand (scratch, GET_MODE (scratch))) 11959 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 11960 11961 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) 11962 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; 11963 11964 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) 11965 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; 11966 11967 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) 11968 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; 11969 11970 else 11971 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 11972 11973 /* Make sure the mode is valid in this register class. */ 11974 if ((addr_mask & RELOAD_REG_VALID) == 0) 11975 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 11976 11977 if (TARGET_DEBUG_ADDR) 11978 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); 11979 11980 new_addr = addr = XEXP (mem, 0); 11981 switch (GET_CODE (addr)) 11982 { 11983 /* Does the register class support auto update forms for this mode? If 11984 not, do the update now. We don't need a scratch register, since the 11985 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ 11986 case PRE_INC: 11987 case PRE_DEC: 11988 op_reg = XEXP (addr, 0); 11989 if (!base_reg_operand (op_reg, Pmode)) 11990 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 11991 11992 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) 11993 { 11994 int delta = GET_MODE_SIZE (mode); 11995 if (GET_CODE (addr) == PRE_DEC) 11996 delta = -delta; 11997 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta))); 11998 new_addr = op_reg; 11999 } 12000 break; 12001 12002 case PRE_MODIFY: 12003 op0 = XEXP (addr, 0); 12004 op1 = XEXP (addr, 1); 12005 if (!base_reg_operand (op0, Pmode) 12006 || GET_CODE (op1) != PLUS 12007 || !rtx_equal_p (op0, XEXP (op1, 0))) 12008 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12009 12010 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) 12011 { 12012 emit_insn (gen_rtx_SET (op0, op1)); 12013 new_addr = reg; 12014 } 12015 break; 12016 12017 /* Do we need to simulate AND -16 to clear the bottom address bits used 12018 in VMX load/stores? */ 12019 case AND: 12020 op0 = XEXP (addr, 0); 12021 op1 = XEXP (addr, 1); 12022 if ((addr_mask & RELOAD_REG_AND_M16) == 0) 12023 { 12024 if (REG_P (op0) || SUBREG_P (op0)) 12025 op_reg = op0; 12026 12027 else if (GET_CODE (op1) == PLUS) 12028 { 12029 emit_insn (gen_rtx_SET (scratch, op1)); 12030 op_reg = scratch; 12031 } 12032 12033 else 12034 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12035 12036 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); 12037 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); 12038 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber); 12039 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); 12040 new_addr = scratch; 12041 } 12042 break; 12043 12044 /* If this is an indirect address, make sure it is a base register. */ 12045 case REG: 12046 case SUBREG: 12047 if (!base_reg_operand (addr, GET_MODE (addr))) 12048 { 12049 emit_insn (gen_rtx_SET (scratch, addr)); 12050 new_addr = scratch; 12051 } 12052 break; 12053 12054 /* If this is an indexed address, make sure the register class can handle 12055 indexed addresses for this mode. */ 12056 case PLUS: 12057 op0 = XEXP (addr, 0); 12058 op1 = XEXP (addr, 1); 12059 if (!base_reg_operand (op0, Pmode)) 12060 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12061 12062 else if (int_reg_operand (op1, Pmode)) 12063 { 12064 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 12065 { 12066 emit_insn (gen_rtx_SET (scratch, addr)); 12067 new_addr = scratch; 12068 } 12069 } 12070 12071 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1)) 12072 { 12073 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0) 12074 || !quad_address_p (addr, mode, false)) 12075 { 12076 emit_insn (gen_rtx_SET (scratch, addr)); 12077 new_addr = scratch; 12078 } 12079 } 12080 12081 /* Make sure the register class can handle offset addresses. */ 12082 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) 12083 { 12084 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12085 { 12086 emit_insn (gen_rtx_SET (scratch, addr)); 12087 new_addr = scratch; 12088 } 12089 } 12090 12091 else 12092 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12093 12094 break; 12095 12096 case LO_SUM: 12097 op0 = XEXP (addr, 0); 12098 op1 = XEXP (addr, 1); 12099 if (!base_reg_operand (op0, Pmode)) 12100 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12101 12102 else if (int_reg_operand (op1, Pmode)) 12103 { 12104 if ((addr_mask & RELOAD_REG_INDEXED) == 0) 12105 { 12106 emit_insn (gen_rtx_SET (scratch, addr)); 12107 new_addr = scratch; 12108 } 12109 } 12110 12111 /* Quad offsets are restricted and can't handle normal addresses. */ 12112 else if (mode_supports_dq_form (mode)) 12113 { 12114 emit_insn (gen_rtx_SET (scratch, addr)); 12115 new_addr = scratch; 12116 } 12117 12118 /* Make sure the register class can handle offset addresses. */ 12119 else if (legitimate_lo_sum_address_p (mode, addr, false)) 12120 { 12121 if ((addr_mask & RELOAD_REG_OFFSET) == 0) 12122 { 12123 emit_insn (gen_rtx_SET (scratch, addr)); 12124 new_addr = scratch; 12125 } 12126 } 12127 12128 else 12129 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12130 12131 break; 12132 12133 case SYMBOL_REF: 12134 case CONST: 12135 case LABEL_REF: 12136 rs6000_emit_move (scratch, addr, Pmode); 12137 new_addr = scratch; 12138 break; 12139 12140 default: 12141 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); 12142 } 12143 12144 /* Adjust the address if it changed. */ 12145 if (addr != new_addr) 12146 { 12147 mem = replace_equiv_address_nv (mem, new_addr); 12148 if (TARGET_DEBUG_ADDR) 12149 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); 12150 } 12151 12152 /* Now create the move. */ 12153 if (store_p) 12154 emit_insn (gen_rtx_SET (mem, reg)); 12155 else 12156 emit_insn (gen_rtx_SET (reg, mem)); 12157 12158 return; 12159} 12160 12161/* Convert reloads involving 64-bit gprs and misaligned offset 12162 addressing, or multiple 32-bit gprs and offsets that are too large, 12163 to use indirect addressing. */ 12164 12165void 12166rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) 12167{ 12168 int regno = true_regnum (reg); 12169 enum reg_class rclass; 12170 rtx addr; 12171 rtx scratch_or_premodify = scratch; 12172 12173 if (TARGET_DEBUG_ADDR) 12174 { 12175 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n", 12176 store_p ? "store" : "load"); 12177 fprintf (stderr, "reg:\n"); 12178 debug_rtx (reg); 12179 fprintf (stderr, "mem:\n"); 12180 debug_rtx (mem); 12181 fprintf (stderr, "scratch:\n"); 12182 debug_rtx (scratch); 12183 } 12184 12185 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno)); 12186 gcc_assert (MEM_P (mem)); 12187 rclass = REGNO_REG_CLASS (regno); 12188 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS); 12189 addr = XEXP (mem, 0); 12190 12191 if (GET_CODE (addr) == PRE_MODIFY) 12192 { 12193 gcc_assert (REG_P (XEXP (addr, 0)) 12194 && GET_CODE (XEXP (addr, 1)) == PLUS 12195 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); 12196 scratch_or_premodify = XEXP (addr, 0); 12197 addr = XEXP (addr, 1); 12198 } 12199 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); 12200 12201 rs6000_emit_move (scratch_or_premodify, addr, Pmode); 12202 12203 mem = replace_equiv_address_nv (mem, scratch_or_premodify); 12204 12205 /* Now create the move. */ 12206 if (store_p) 12207 emit_insn (gen_rtx_SET (mem, reg)); 12208 else 12209 emit_insn (gen_rtx_SET (reg, mem)); 12210 12211 return; 12212} 12213 12214/* Given an rtx X being reloaded into a reg required to be 12215 in class CLASS, return the class of reg to actually use. 12216 In general this is just CLASS; but on some machines 12217 in some cases it is preferable to use a more restrictive class. 12218 12219 On the RS/6000, we have to return NO_REGS when we want to reload a 12220 floating-point CONST_DOUBLE to force it to be copied to memory. 12221 12222 We also don't want to reload integer values into floating-point 12223 registers if we can at all help it. In fact, this can 12224 cause reload to die, if it tries to generate a reload of CTR 12225 into a FP register and discovers it doesn't have the memory location 12226 required. 12227 12228 ??? Would it be a good idea to have reload do the converse, that is 12229 try to reload floating modes into FP registers if possible? 12230 */ 12231 12232static enum reg_class 12233rs6000_preferred_reload_class (rtx x, enum reg_class rclass) 12234{ 12235 machine_mode mode = GET_MODE (x); 12236 bool is_constant = CONSTANT_P (x); 12237 12238 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred 12239 reload class for it. */ 12240 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) 12241 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) 12242 return NO_REGS; 12243 12244 if ((rclass == FLOAT_REGS || rclass == VSX_REGS) 12245 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0) 12246 return NO_REGS; 12247 12248 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow 12249 the reloading of address expressions using PLUS into floating point 12250 registers. */ 12251 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS) 12252 { 12253 if (is_constant) 12254 { 12255 /* Zero is always allowed in all VSX registers. */ 12256 if (x == CONST0_RTX (mode)) 12257 return rclass; 12258 12259 /* If this is a vector constant that can be formed with a few Altivec 12260 instructions, we want altivec registers. */ 12261 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) 12262 return ALTIVEC_REGS; 12263 12264 /* If this is an integer constant that can easily be loaded into 12265 vector registers, allow it. */ 12266 if (CONST_INT_P (x)) 12267 { 12268 HOST_WIDE_INT value = INTVAL (x); 12269 12270 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA 12271 2.06 can generate it in the Altivec registers with 12272 VSPLTI<x>. */ 12273 if (value == -1) 12274 { 12275 if (TARGET_P8_VECTOR) 12276 return rclass; 12277 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) 12278 return ALTIVEC_REGS; 12279 else 12280 return NO_REGS; 12281 } 12282 12283 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and 12284 a sign extend in the Altivec registers. */ 12285 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR 12286 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) 12287 return ALTIVEC_REGS; 12288 } 12289 12290 /* Force constant to memory. */ 12291 return NO_REGS; 12292 } 12293 12294 /* D-form addressing can easily reload the value. */ 12295 if (mode_supports_vmx_dform (mode) 12296 || mode_supports_dq_form (mode)) 12297 return rclass; 12298 12299 /* If this is a scalar floating point value and we don't have D-form 12300 addressing, prefer the traditional floating point registers so that we 12301 can use D-form (register+offset) addressing. */ 12302 if (rclass == VSX_REGS 12303 && (mode == SFmode || GET_MODE_SIZE (mode) == 8)) 12304 return FLOAT_REGS; 12305 12306 /* Prefer the Altivec registers if Altivec is handling the vector 12307 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec 12308 loads. */ 12309 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) 12310 || mode == V1TImode) 12311 return ALTIVEC_REGS; 12312 12313 return rclass; 12314 } 12315 12316 if (is_constant || GET_CODE (x) == PLUS) 12317 { 12318 if (reg_class_subset_p (GENERAL_REGS, rclass)) 12319 return GENERAL_REGS; 12320 if (reg_class_subset_p (BASE_REGS, rclass)) 12321 return BASE_REGS; 12322 return NO_REGS; 12323 } 12324 12325 /* For the vector pair and vector quad modes, prefer their natural register 12326 (VSX or FPR) rather than GPR registers. For other integer types, prefer 12327 the GPR registers. */ 12328 if (rclass == GEN_OR_FLOAT_REGS) 12329 { 12330 if (mode == POImode) 12331 return VSX_REGS; 12332 12333 if (mode == PXImode) 12334 return FLOAT_REGS; 12335 12336 if (GET_MODE_CLASS (mode) == MODE_INT) 12337 return GENERAL_REGS; 12338 } 12339 12340 return rclass; 12341} 12342 12343/* Debug version of rs6000_preferred_reload_class. */ 12344static enum reg_class 12345rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) 12346{ 12347 enum reg_class ret = rs6000_preferred_reload_class (x, rclass); 12348 12349 fprintf (stderr, 12350 "\nrs6000_preferred_reload_class, return %s, rclass = %s, " 12351 "mode = %s, x:\n", 12352 reg_class_names[ret], reg_class_names[rclass], 12353 GET_MODE_NAME (GET_MODE (x))); 12354 debug_rtx (x); 12355 12356 return ret; 12357} 12358 12359/* If we are copying between FP or AltiVec registers and anything else, we need 12360 a memory location. The exception is when we are targeting ppc64 and the 12361 move to/from fpr to gpr instructions are available. Also, under VSX, you 12362 can copy vector registers from the FP register set to the Altivec register 12363 set and vice versa. */ 12364 12365static bool 12366rs6000_secondary_memory_needed (machine_mode mode, 12367 reg_class_t from_class, 12368 reg_class_t to_class) 12369{ 12370 enum rs6000_reg_type from_type, to_type; 12371 bool altivec_p = ((from_class == ALTIVEC_REGS) 12372 || (to_class == ALTIVEC_REGS)); 12373 12374 /* If a simple/direct move is available, we don't need secondary memory */ 12375 from_type = reg_class_to_reg_type[(int)from_class]; 12376 to_type = reg_class_to_reg_type[(int)to_class]; 12377 12378 if (rs6000_secondary_reload_move (to_type, from_type, mode, 12379 (secondary_reload_info *)0, altivec_p)) 12380 return false; 12381 12382 /* If we have a floating point or vector register class, we need to use 12383 memory to transfer the data. */ 12384 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) 12385 return true; 12386 12387 return false; 12388} 12389 12390/* Debug version of rs6000_secondary_memory_needed. */ 12391static bool 12392rs6000_debug_secondary_memory_needed (machine_mode mode, 12393 reg_class_t from_class, 12394 reg_class_t to_class) 12395{ 12396 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class); 12397 12398 fprintf (stderr, 12399 "rs6000_secondary_memory_needed, return: %s, from_class = %s, " 12400 "to_class = %s, mode = %s\n", 12401 ret ? "true" : "false", 12402 reg_class_names[from_class], 12403 reg_class_names[to_class], 12404 GET_MODE_NAME (mode)); 12405 12406 return ret; 12407} 12408 12409/* Return the register class of a scratch register needed to copy IN into 12410 or out of a register in RCLASS in MODE. If it can be done directly, 12411 NO_REGS is returned. */ 12412 12413static enum reg_class 12414rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, 12415 rtx in) 12416{ 12417 int regno; 12418 12419 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN 12420#if TARGET_MACHO 12421 && MACHOPIC_INDIRECT 12422#endif 12423 )) 12424 { 12425 /* We cannot copy a symbolic operand directly into anything 12426 other than BASE_REGS for TARGET_ELF. So indicate that a 12427 register from BASE_REGS is needed as an intermediate 12428 register. 12429 12430 On Darwin, pic addresses require a load from memory, which 12431 needs a base register. */ 12432 if (rclass != BASE_REGS 12433 && (SYMBOL_REF_P (in) 12434 || GET_CODE (in) == HIGH 12435 || GET_CODE (in) == LABEL_REF 12436 || GET_CODE (in) == CONST)) 12437 return BASE_REGS; 12438 } 12439 12440 if (REG_P (in)) 12441 { 12442 regno = REGNO (in); 12443 if (!HARD_REGISTER_NUM_P (regno)) 12444 { 12445 regno = true_regnum (in); 12446 if (!HARD_REGISTER_NUM_P (regno)) 12447 regno = -1; 12448 } 12449 } 12450 else if (SUBREG_P (in)) 12451 { 12452 regno = true_regnum (in); 12453 if (!HARD_REGISTER_NUM_P (regno)) 12454 regno = -1; 12455 } 12456 else 12457 regno = -1; 12458 12459 /* If we have VSX register moves, prefer moving scalar values between 12460 Altivec registers and GPR by going via an FPR (and then via memory) 12461 instead of reloading the secondary memory address for Altivec moves. */ 12462 if (TARGET_VSX 12463 && GET_MODE_SIZE (mode) < 16 12464 && !mode_supports_vmx_dform (mode) 12465 && (((rclass == GENERAL_REGS || rclass == BASE_REGS) 12466 && (regno >= 0 && ALTIVEC_REGNO_P (regno))) 12467 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) 12468 && (regno >= 0 && INT_REGNO_P (regno))))) 12469 return FLOAT_REGS; 12470 12471 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS 12472 into anything. */ 12473 if (rclass == GENERAL_REGS || rclass == BASE_REGS 12474 || (regno >= 0 && INT_REGNO_P (regno))) 12475 return NO_REGS; 12476 12477 /* Constants, memory, and VSX registers can go into VSX registers (both the 12478 traditional floating point and the altivec registers). */ 12479 if (rclass == VSX_REGS 12480 && (regno == -1 || VSX_REGNO_P (regno))) 12481 return NO_REGS; 12482 12483 /* Constants, memory, and FP registers can go into FP registers. */ 12484 if ((regno == -1 || FP_REGNO_P (regno)) 12485 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS)) 12486 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; 12487 12488 /* Memory, and AltiVec registers can go into AltiVec registers. */ 12489 if ((regno == -1 || ALTIVEC_REGNO_P (regno)) 12490 && rclass == ALTIVEC_REGS) 12491 return NO_REGS; 12492 12493 /* We can copy among the CR registers. */ 12494 if ((rclass == CR_REGS || rclass == CR0_REGS) 12495 && regno >= 0 && CR_REGNO_P (regno)) 12496 return NO_REGS; 12497 12498 /* Otherwise, we need GENERAL_REGS. */ 12499 return GENERAL_REGS; 12500} 12501 12502/* Debug version of rs6000_secondary_reload_class. */ 12503static enum reg_class 12504rs6000_debug_secondary_reload_class (enum reg_class rclass, 12505 machine_mode mode, rtx in) 12506{ 12507 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in); 12508 fprintf (stderr, 12509 "\nrs6000_secondary_reload_class, return %s, rclass = %s, " 12510 "mode = %s, input rtx:\n", 12511 reg_class_names[ret], reg_class_names[rclass], 12512 GET_MODE_NAME (mode)); 12513 debug_rtx (in); 12514 12515 return ret; 12516} 12517 12518/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 12519 12520static bool 12521rs6000_can_change_mode_class (machine_mode from, 12522 machine_mode to, 12523 reg_class_t rclass) 12524{ 12525 unsigned from_size = GET_MODE_SIZE (from); 12526 unsigned to_size = GET_MODE_SIZE (to); 12527 12528 if (from_size != to_size) 12529 { 12530 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; 12531 12532 if (reg_classes_intersect_p (xclass, rclass)) 12533 { 12534 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to); 12535 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from); 12536 bool to_float128_vector_p = FLOAT128_VECTOR_P (to); 12537 bool from_float128_vector_p = FLOAT128_VECTOR_P (from); 12538 12539 /* Don't allow 64-bit types to overlap with 128-bit types that take a 12540 single register under VSX because the scalar part of the register 12541 is in the upper 64-bits, and not the lower 64-bits. Types like 12542 TFmode/TDmode that take 2 scalar register can overlap. 128-bit 12543 IEEE floating point can't overlap, and neither can small 12544 values. */ 12545 12546 if (to_float128_vector_p && from_float128_vector_p) 12547 return true; 12548 12549 else if (to_float128_vector_p || from_float128_vector_p) 12550 return false; 12551 12552 /* TDmode in floating-mode registers must always go into a register 12553 pair with the most significant word in the even-numbered register 12554 to match ISA requirements. In little-endian mode, this does not 12555 match subreg numbering, so we cannot allow subregs. */ 12556 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode)) 12557 return false; 12558 12559 /* Allow SD<->DD changes, since SDmode values are stored in 12560 the low half of the DDmode, just like target-independent 12561 code expects. We need to allow at least SD->DD since 12562 rs6000_secondary_memory_needed_mode asks for that change 12563 to be made for SD reloads. */ 12564 if ((to == DDmode && from == SDmode) 12565 || (to == SDmode && from == DDmode)) 12566 return true; 12567 12568 if (from_size < 8 || to_size < 8) 12569 return false; 12570 12571 if (from_size == 8 && (8 * to_nregs) != to_size) 12572 return false; 12573 12574 if (to_size == 8 && (8 * from_nregs) != from_size) 12575 return false; 12576 12577 return true; 12578 } 12579 else 12580 return true; 12581 } 12582 12583 /* Since the VSX register set includes traditional floating point registers 12584 and altivec registers, just check for the size being different instead of 12585 trying to check whether the modes are vector modes. Otherwise it won't 12586 allow say DF and DI to change classes. For types like TFmode and TDmode 12587 that take 2 64-bit registers, rather than a single 128-bit register, don't 12588 allow subregs of those types to other 128 bit types. */ 12589 if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) 12590 { 12591 unsigned num_regs = (from_size + 15) / 16; 12592 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs 12593 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs) 12594 return false; 12595 12596 return (from_size == 8 || from_size == 16); 12597 } 12598 12599 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS 12600 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1) 12601 return false; 12602 12603 return true; 12604} 12605 12606/* Debug version of rs6000_can_change_mode_class. */ 12607static bool 12608rs6000_debug_can_change_mode_class (machine_mode from, 12609 machine_mode to, 12610 reg_class_t rclass) 12611{ 12612 bool ret = rs6000_can_change_mode_class (from, to, rclass); 12613 12614 fprintf (stderr, 12615 "rs6000_can_change_mode_class, return %s, from = %s, " 12616 "to = %s, rclass = %s\n", 12617 ret ? "true" : "false", 12618 GET_MODE_NAME (from), GET_MODE_NAME (to), 12619 reg_class_names[rclass]); 12620 12621 return ret; 12622} 12623 12624/* Return a string to do a move operation of 128 bits of data. */ 12625 12626const char * 12627rs6000_output_move_128bit (rtx operands[]) 12628{ 12629 rtx dest = operands[0]; 12630 rtx src = operands[1]; 12631 machine_mode mode = GET_MODE (dest); 12632 int dest_regno; 12633 int src_regno; 12634 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; 12635 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; 12636 12637 if (REG_P (dest)) 12638 { 12639 dest_regno = REGNO (dest); 12640 dest_gpr_p = INT_REGNO_P (dest_regno); 12641 dest_fp_p = FP_REGNO_P (dest_regno); 12642 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); 12643 dest_vsx_p = dest_fp_p | dest_vmx_p; 12644 } 12645 else 12646 { 12647 dest_regno = -1; 12648 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; 12649 } 12650 12651 if (REG_P (src)) 12652 { 12653 src_regno = REGNO (src); 12654 src_gpr_p = INT_REGNO_P (src_regno); 12655 src_fp_p = FP_REGNO_P (src_regno); 12656 src_vmx_p = ALTIVEC_REGNO_P (src_regno); 12657 src_vsx_p = src_fp_p | src_vmx_p; 12658 } 12659 else 12660 { 12661 src_regno = -1; 12662 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; 12663 } 12664 12665 /* Register moves. */ 12666 if (dest_regno >= 0 && src_regno >= 0) 12667 { 12668 if (dest_gpr_p) 12669 { 12670 if (src_gpr_p) 12671 return "#"; 12672 12673 if (TARGET_DIRECT_MOVE_128 && src_vsx_p) 12674 return (WORDS_BIG_ENDIAN 12675 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" 12676 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); 12677 12678 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) 12679 return "#"; 12680 } 12681 12682 else if (TARGET_VSX && dest_vsx_p) 12683 { 12684 if (src_vsx_p) 12685 return "xxlor %x0,%x1,%x1"; 12686 12687 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) 12688 return (WORDS_BIG_ENDIAN 12689 ? "mtvsrdd %x0,%1,%L1" 12690 : "mtvsrdd %x0,%L1,%1"); 12691 12692 else if (TARGET_DIRECT_MOVE && src_gpr_p) 12693 return "#"; 12694 } 12695 12696 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) 12697 return "vor %0,%1,%1"; 12698 12699 else if (dest_fp_p && src_fp_p) 12700 return "#"; 12701 } 12702 12703 /* Loads. */ 12704 else if (dest_regno >= 0 && MEM_P (src)) 12705 { 12706 if (dest_gpr_p) 12707 { 12708 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) 12709 return "lq %0,%1"; 12710 else 12711 return "#"; 12712 } 12713 12714 else if (TARGET_ALTIVEC && dest_vmx_p 12715 && altivec_indexed_or_indirect_operand (src, mode)) 12716 return "lvx %0,%y1"; 12717 12718 else if (TARGET_VSX && dest_vsx_p) 12719 { 12720 if (mode_supports_dq_form (mode) 12721 && quad_address_p (XEXP (src, 0), mode, true)) 12722 return "lxv %x0,%1"; 12723 12724 else if (TARGET_P9_VECTOR) 12725 return "lxvx %x0,%y1"; 12726 12727 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) 12728 return "lxvw4x %x0,%y1"; 12729 12730 else 12731 return "lxvd2x %x0,%y1"; 12732 } 12733 12734 else if (TARGET_ALTIVEC && dest_vmx_p) 12735 return "lvx %0,%y1"; 12736 12737 else if (dest_fp_p) 12738 return "#"; 12739 } 12740 12741 /* Stores. */ 12742 else if (src_regno >= 0 && MEM_P (dest)) 12743 { 12744 if (src_gpr_p) 12745 { 12746 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) 12747 return "stq %1,%0"; 12748 else 12749 return "#"; 12750 } 12751 12752 else if (TARGET_ALTIVEC && src_vmx_p 12753 && altivec_indexed_or_indirect_operand (dest, mode)) 12754 return "stvx %1,%y0"; 12755 12756 else if (TARGET_VSX && src_vsx_p) 12757 { 12758 if (mode_supports_dq_form (mode) 12759 && quad_address_p (XEXP (dest, 0), mode, true)) 12760 return "stxv %x1,%0"; 12761 12762 else if (TARGET_P9_VECTOR) 12763 return "stxvx %x1,%y0"; 12764 12765 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) 12766 return "stxvw4x %x1,%y0"; 12767 12768 else 12769 return "stxvd2x %x1,%y0"; 12770 } 12771 12772 else if (TARGET_ALTIVEC && src_vmx_p) 12773 return "stvx %1,%y0"; 12774 12775 else if (src_fp_p) 12776 return "#"; 12777 } 12778 12779 /* Constants. */ 12780 else if (dest_regno >= 0 12781 && (CONST_INT_P (src) 12782 || CONST_WIDE_INT_P (src) 12783 || CONST_DOUBLE_P (src) 12784 || GET_CODE (src) == CONST_VECTOR)) 12785 { 12786 if (dest_gpr_p) 12787 return "#"; 12788 12789 else if ((dest_vmx_p && TARGET_ALTIVEC) 12790 || (dest_vsx_p && TARGET_VSX)) 12791 return output_vec_const_move (operands); 12792 } 12793 12794 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); 12795} 12796 12797/* Validate a 128-bit move. */ 12798bool 12799rs6000_move_128bit_ok_p (rtx operands[]) 12800{ 12801 machine_mode mode = GET_MODE (operands[0]); 12802 return (gpc_reg_operand (operands[0], mode) 12803 || gpc_reg_operand (operands[1], mode)); 12804} 12805 12806/* Return true if a 128-bit move needs to be split. */ 12807bool 12808rs6000_split_128bit_ok_p (rtx operands[]) 12809{ 12810 if (!reload_completed) 12811 return false; 12812 12813 if (!gpr_or_gpr_p (operands[0], operands[1])) 12814 return false; 12815 12816 if (quad_load_store_p (operands[0], operands[1])) 12817 return false; 12818 12819 return true; 12820} 12821 12822 12823/* Given a comparison operation, return the bit number in CCR to test. We 12824 know this is a valid comparison. 12825 12826 SCC_P is 1 if this is for an scc. That means that %D will have been 12827 used instead of %C, so the bits will be in different places. 12828 12829 Return -1 if OP isn't a valid comparison for some reason. */ 12830 12831int 12832ccr_bit (rtx op, int scc_p) 12833{ 12834 enum rtx_code code = GET_CODE (op); 12835 machine_mode cc_mode; 12836 int cc_regnum; 12837 int base_bit; 12838 rtx reg; 12839 12840 if (!COMPARISON_P (op)) 12841 return -1; 12842 12843 reg = XEXP (op, 0); 12844 12845 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg))) 12846 return -1; 12847 12848 cc_mode = GET_MODE (reg); 12849 cc_regnum = REGNO (reg); 12850 base_bit = 4 * (cc_regnum - CR0_REGNO); 12851 12852 validate_condition_mode (code, cc_mode); 12853 12854 /* When generating a sCOND operation, only positive conditions are 12855 allowed. */ 12856 if (scc_p) 12857 switch (code) 12858 { 12859 case EQ: 12860 case GT: 12861 case LT: 12862 case UNORDERED: 12863 case GTU: 12864 case LTU: 12865 break; 12866 default: 12867 return -1; 12868 } 12869 12870 switch (code) 12871 { 12872 case NE: 12873 return scc_p ? base_bit + 3 : base_bit + 2; 12874 case EQ: 12875 return base_bit + 2; 12876 case GT: case GTU: case UNLE: 12877 return base_bit + 1; 12878 case LT: case LTU: case UNGE: 12879 return base_bit; 12880 case ORDERED: case UNORDERED: 12881 return base_bit + 3; 12882 12883 case GE: case GEU: 12884 /* If scc, we will have done a cror to put the bit in the 12885 unordered position. So test that bit. For integer, this is ! LT 12886 unless this is an scc insn. */ 12887 return scc_p ? base_bit + 3 : base_bit; 12888 12889 case LE: case LEU: 12890 return scc_p ? base_bit + 3 : base_bit + 1; 12891 12892 default: 12893 return -1; 12894 } 12895} 12896 12897/* Return the GOT register. */ 12898 12899rtx 12900rs6000_got_register (rtx value ATTRIBUTE_UNUSED) 12901{ 12902 /* The second flow pass currently (June 1999) can't update 12903 regs_ever_live without disturbing other parts of the compiler, so 12904 update it here to make the prolog/epilogue code happy. */ 12905 if (!can_create_pseudo_p () 12906 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) 12907 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true); 12908 12909 crtl->uses_pic_offset_table = 1; 12910 12911 return pic_offset_table_rtx; 12912} 12913 12914#define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode) 12915 12916/* Write out a function code label. */ 12917 12918void 12919rs6000_output_function_entry (FILE *file, const char *fname) 12920{ 12921 if (fname[0] != '.') 12922 { 12923 switch (DEFAULT_ABI) 12924 { 12925 default: 12926 gcc_unreachable (); 12927 12928 case ABI_AIX: 12929 if (DOT_SYMBOLS) 12930 putc ('.', file); 12931 else 12932 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L."); 12933 break; 12934 12935 case ABI_ELFv2: 12936 case ABI_V4: 12937 case ABI_DARWIN: 12938 break; 12939 } 12940 } 12941 12942 RS6000_OUTPUT_BASENAME (file, fname); 12943} 12944 12945/* Print an operand. Recognize special options, documented below. */ 12946 12947#if TARGET_ELF 12948/* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is 12949 only introduced by the linker, when applying the sda21 12950 relocation. */ 12951#define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel") 12952#define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13) 12953#else 12954#define SMALL_DATA_RELOC "sda21" 12955#define SMALL_DATA_REG 0 12956#endif 12957 12958void 12959print_operand (FILE *file, rtx x, int code) 12960{ 12961 int i; 12962 unsigned HOST_WIDE_INT uval; 12963 12964 switch (code) 12965 { 12966 /* %a is output_address. */ 12967 12968 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise 12969 output_operand. */ 12970 12971 case 'A': 12972 /* Write the MMA accumulator number associated with VSX register X. */ 12973 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0) 12974 output_operand_lossage ("invalid %%A value"); 12975 else 12976 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4); 12977 return; 12978 12979 case 'D': 12980 /* Like 'J' but get to the GT bit only. */ 12981 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 12982 { 12983 output_operand_lossage ("invalid %%D value"); 12984 return; 12985 } 12986 12987 /* Bit 1 is GT bit. */ 12988 i = 4 * (REGNO (x) - CR0_REGNO) + 1; 12989 12990 /* Add one for shift count in rlinm for scc. */ 12991 fprintf (file, "%d", i + 1); 12992 return; 12993 12994 case 'e': 12995 /* If the low 16 bits are 0, but some other bit is set, write 's'. */ 12996 if (! INT_P (x)) 12997 { 12998 output_operand_lossage ("invalid %%e value"); 12999 return; 13000 } 13001 13002 uval = INTVAL (x); 13003 if ((uval & 0xffff) == 0 && uval != 0) 13004 putc ('s', file); 13005 return; 13006 13007 case 'E': 13008 /* X is a CR register. Print the number of the EQ bit of the CR */ 13009 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13010 output_operand_lossage ("invalid %%E value"); 13011 else 13012 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2); 13013 return; 13014 13015 case 'f': 13016 /* X is a CR register. Print the shift count needed to move it 13017 to the high-order four bits. */ 13018 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13019 output_operand_lossage ("invalid %%f value"); 13020 else 13021 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO)); 13022 return; 13023 13024 case 'F': 13025 /* Similar, but print the count for the rotate in the opposite 13026 direction. */ 13027 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13028 output_operand_lossage ("invalid %%F value"); 13029 else 13030 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO)); 13031 return; 13032 13033 case 'G': 13034 /* X is a constant integer. If it is negative, print "m", 13035 otherwise print "z". This is to make an aze or ame insn. */ 13036 if (!CONST_INT_P (x)) 13037 output_operand_lossage ("invalid %%G value"); 13038 else if (INTVAL (x) >= 0) 13039 putc ('z', file); 13040 else 13041 putc ('m', file); 13042 return; 13043 13044 case 'h': 13045 /* If constant, output low-order five bits. Otherwise, write 13046 normally. */ 13047 if (INT_P (x)) 13048 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31); 13049 else 13050 print_operand (file, x, 0); 13051 return; 13052 13053 case 'H': 13054 /* If constant, output low-order six bits. Otherwise, write 13055 normally. */ 13056 if (INT_P (x)) 13057 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63); 13058 else 13059 print_operand (file, x, 0); 13060 return; 13061 13062 case 'I': 13063 /* Print `i' if this is a constant, else nothing. */ 13064 if (INT_P (x)) 13065 putc ('i', file); 13066 return; 13067 13068 case 'j': 13069 /* Write the bit number in CCR for jump. */ 13070 i = ccr_bit (x, 0); 13071 if (i == -1) 13072 output_operand_lossage ("invalid %%j code"); 13073 else 13074 fprintf (file, "%d", i); 13075 return; 13076 13077 case 'J': 13078 /* Similar, but add one for shift count in rlinm for scc and pass 13079 scc flag to `ccr_bit'. */ 13080 i = ccr_bit (x, 1); 13081 if (i == -1) 13082 output_operand_lossage ("invalid %%J code"); 13083 else 13084 /* If we want bit 31, write a shift count of zero, not 32. */ 13085 fprintf (file, "%d", i == 31 ? 0 : i + 1); 13086 return; 13087 13088 case 'k': 13089 /* X must be a constant. Write the 1's complement of the 13090 constant. */ 13091 if (! INT_P (x)) 13092 output_operand_lossage ("invalid %%k value"); 13093 else 13094 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); 13095 return; 13096 13097 case 'K': 13098 /* X must be a symbolic constant on ELF. Write an 13099 expression suitable for an 'addi' that adds in the low 16 13100 bits of the MEM. */ 13101 if (GET_CODE (x) == CONST) 13102 { 13103 if (GET_CODE (XEXP (x, 0)) != PLUS 13104 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0)) 13105 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) 13106 || !CONST_INT_P (XEXP (XEXP (x, 0), 1))) 13107 output_operand_lossage ("invalid %%K value"); 13108 } 13109 print_operand_address (file, x); 13110 fputs ("@l", file); 13111 return; 13112 13113 /* %l is output_asm_label. */ 13114 13115 case 'L': 13116 /* Write second word of DImode or DFmode reference. Works on register 13117 or non-indexed memory only. */ 13118 if (REG_P (x)) 13119 fputs (reg_names[REGNO (x) + 1], file); 13120 else if (MEM_P (x)) 13121 { 13122 machine_mode mode = GET_MODE (x); 13123 /* Handle possible auto-increment. Since it is pre-increment and 13124 we have already done it, we can just use an offset of word. */ 13125 if (GET_CODE (XEXP (x, 0)) == PRE_INC 13126 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 13127 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 13128 UNITS_PER_WORD)); 13129 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 13130 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 13131 UNITS_PER_WORD)); 13132 else 13133 output_address (mode, XEXP (adjust_address_nv (x, SImode, 13134 UNITS_PER_WORD), 13135 0)); 13136 13137 if (small_data_operand (x, GET_MODE (x))) 13138 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 13139 reg_names[SMALL_DATA_REG]); 13140 } 13141 return; 13142 13143 case 'N': /* Unused */ 13144 /* Write the number of elements in the vector times 4. */ 13145 if (GET_CODE (x) != PARALLEL) 13146 output_operand_lossage ("invalid %%N value"); 13147 else 13148 fprintf (file, "%d", XVECLEN (x, 0) * 4); 13149 return; 13150 13151 case 'O': /* Unused */ 13152 /* Similar, but subtract 1 first. */ 13153 if (GET_CODE (x) != PARALLEL) 13154 output_operand_lossage ("invalid %%O value"); 13155 else 13156 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4); 13157 return; 13158 13159 case 'p': 13160 /* X is a CONST_INT that is a power of two. Output the logarithm. */ 13161 if (! INT_P (x) 13162 || INTVAL (x) < 0 13163 || (i = exact_log2 (INTVAL (x))) < 0) 13164 output_operand_lossage ("invalid %%p value"); 13165 else 13166 fprintf (file, "%d", i); 13167 return; 13168 13169 case 'P': 13170 /* The operand must be an indirect memory reference. The result 13171 is the register name. */ 13172 if (!MEM_P (x) || !REG_P (XEXP (x, 0)) 13173 || REGNO (XEXP (x, 0)) >= 32) 13174 output_operand_lossage ("invalid %%P value"); 13175 else 13176 fputs (reg_names[REGNO (XEXP (x, 0))], file); 13177 return; 13178 13179 case 'q': 13180 /* This outputs the logical code corresponding to a boolean 13181 expression. The expression may have one or both operands 13182 negated (if one, only the first one). For condition register 13183 logical operations, it will also treat the negated 13184 CR codes as NOTs, but not handle NOTs of them. */ 13185 { 13186 const char *const *t = 0; 13187 const char *s; 13188 enum rtx_code code = GET_CODE (x); 13189 static const char * const tbl[3][3] = { 13190 { "and", "andc", "nor" }, 13191 { "or", "orc", "nand" }, 13192 { "xor", "eqv", "xor" } }; 13193 13194 if (code == AND) 13195 t = tbl[0]; 13196 else if (code == IOR) 13197 t = tbl[1]; 13198 else if (code == XOR) 13199 t = tbl[2]; 13200 else 13201 output_operand_lossage ("invalid %%q value"); 13202 13203 if (GET_CODE (XEXP (x, 0)) != NOT) 13204 s = t[0]; 13205 else 13206 { 13207 if (GET_CODE (XEXP (x, 1)) == NOT) 13208 s = t[2]; 13209 else 13210 s = t[1]; 13211 } 13212 13213 fputs (s, file); 13214 } 13215 return; 13216 13217 case 'Q': 13218 if (! TARGET_MFCRF) 13219 return; 13220 fputc (',', file); 13221 /* FALLTHRU */ 13222 13223 case 'R': 13224 /* X is a CR register. Print the mask for `mtcrf'. */ 13225 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13226 output_operand_lossage ("invalid %%R value"); 13227 else 13228 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO)); 13229 return; 13230 13231 case 's': 13232 /* Low 5 bits of 32 - value */ 13233 if (! INT_P (x)) 13234 output_operand_lossage ("invalid %%s value"); 13235 else 13236 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); 13237 return; 13238 13239 case 't': 13240 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ 13241 if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) 13242 { 13243 output_operand_lossage ("invalid %%t value"); 13244 return; 13245 } 13246 13247 /* Bit 3 is OV bit. */ 13248 i = 4 * (REGNO (x) - CR0_REGNO) + 3; 13249 13250 /* If we want bit 31, write a shift count of zero, not 32. */ 13251 fprintf (file, "%d", i == 31 ? 0 : i + 1); 13252 return; 13253 13254 case 'T': 13255 /* Print the symbolic name of a branch target register. */ 13256 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 13257 x = XVECEXP (x, 0, 0); 13258 if (!REG_P (x) || (REGNO (x) != LR_REGNO 13259 && REGNO (x) != CTR_REGNO)) 13260 output_operand_lossage ("invalid %%T value"); 13261 else if (REGNO (x) == LR_REGNO) 13262 fputs ("lr", file); 13263 else 13264 fputs ("ctr", file); 13265 return; 13266 13267 case 'u': 13268 /* High-order or low-order 16 bits of constant, whichever is non-zero, 13269 for use in unsigned operand. */ 13270 if (! INT_P (x)) 13271 { 13272 output_operand_lossage ("invalid %%u value"); 13273 return; 13274 } 13275 13276 uval = INTVAL (x); 13277 if ((uval & 0xffff) == 0) 13278 uval >>= 16; 13279 13280 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff); 13281 return; 13282 13283 case 'v': 13284 /* High-order 16 bits of constant for use in signed operand. */ 13285 if (! INT_P (x)) 13286 output_operand_lossage ("invalid %%v value"); 13287 else 13288 fprintf (file, HOST_WIDE_INT_PRINT_HEX, 13289 (INTVAL (x) >> 16) & 0xffff); 13290 return; 13291 13292 case 'U': 13293 /* Print `u' if this has an auto-increment or auto-decrement. */ 13294 if (MEM_P (x) 13295 && (GET_CODE (XEXP (x, 0)) == PRE_INC 13296 || GET_CODE (XEXP (x, 0)) == PRE_DEC 13297 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)) 13298 putc ('u', file); 13299 return; 13300 13301 case 'V': 13302 /* Print the trap code for this operand. */ 13303 switch (GET_CODE (x)) 13304 { 13305 case EQ: 13306 fputs ("eq", file); /* 4 */ 13307 break; 13308 case NE: 13309 fputs ("ne", file); /* 24 */ 13310 break; 13311 case LT: 13312 fputs ("lt", file); /* 16 */ 13313 break; 13314 case LE: 13315 fputs ("le", file); /* 20 */ 13316 break; 13317 case GT: 13318 fputs ("gt", file); /* 8 */ 13319 break; 13320 case GE: 13321 fputs ("ge", file); /* 12 */ 13322 break; 13323 case LTU: 13324 fputs ("llt", file); /* 2 */ 13325 break; 13326 case LEU: 13327 fputs ("lle", file); /* 6 */ 13328 break; 13329 case GTU: 13330 fputs ("lgt", file); /* 1 */ 13331 break; 13332 case GEU: 13333 fputs ("lge", file); /* 5 */ 13334 break; 13335 default: 13336 output_operand_lossage ("invalid %%V value"); 13337 } 13338 break; 13339 13340 case 'w': 13341 /* If constant, low-order 16 bits of constant, signed. Otherwise, write 13342 normally. */ 13343 if (INT_P (x)) 13344 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 13345 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); 13346 else 13347 print_operand (file, x, 0); 13348 return; 13349 13350 case 'x': 13351 /* X is a FPR or Altivec register used in a VSX context. */ 13352 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x))) 13353 output_operand_lossage ("invalid %%x value"); 13354 else 13355 { 13356 int reg = REGNO (x); 13357 int vsx_reg = (FP_REGNO_P (reg) 13358 ? reg - 32 13359 : reg - FIRST_ALTIVEC_REGNO + 32); 13360 13361#ifdef TARGET_REGNAMES 13362 if (TARGET_REGNAMES) 13363 fprintf (file, "%%vs%d", vsx_reg); 13364 else 13365#endif 13366 fprintf (file, "%d", vsx_reg); 13367 } 13368 return; 13369 13370 case 'X': 13371 if (MEM_P (x) 13372 && (legitimate_indexed_address_p (XEXP (x, 0), 0) 13373 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY 13374 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0)))) 13375 putc ('x', file); 13376 return; 13377 13378 case 'Y': 13379 /* Like 'L', for third word of TImode/PTImode */ 13380 if (REG_P (x)) 13381 fputs (reg_names[REGNO (x) + 2], file); 13382 else if (MEM_P (x)) 13383 { 13384 machine_mode mode = GET_MODE (x); 13385 if (GET_CODE (XEXP (x, 0)) == PRE_INC 13386 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 13387 output_address (mode, plus_constant (Pmode, 13388 XEXP (XEXP (x, 0), 0), 8)); 13389 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 13390 output_address (mode, plus_constant (Pmode, 13391 XEXP (XEXP (x, 0), 0), 8)); 13392 else 13393 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0)); 13394 if (small_data_operand (x, GET_MODE (x))) 13395 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 13396 reg_names[SMALL_DATA_REG]); 13397 } 13398 return; 13399 13400 case 'z': 13401 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 13402 x = XVECEXP (x, 0, 1); 13403 /* X is a SYMBOL_REF. Write out the name preceded by a 13404 period and without any trailing data in brackets. Used for function 13405 names. If we are configured for System V (or the embedded ABI) on 13406 the PowerPC, do not emit the period, since those systems do not use 13407 TOCs and the like. */ 13408 if (!SYMBOL_REF_P (x)) 13409 { 13410 output_operand_lossage ("invalid %%z value"); 13411 return; 13412 } 13413 13414 /* For macho, check to see if we need a stub. */ 13415 if (TARGET_MACHO) 13416 { 13417 const char *name = XSTR (x, 0); 13418#if TARGET_MACHO 13419 if (darwin_symbol_stubs 13420 && MACHOPIC_INDIRECT 13421 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 13422 name = machopic_indirection_name (x, /*stub_p=*/true); 13423#endif 13424 assemble_name (file, name); 13425 } 13426 else if (!DOT_SYMBOLS) 13427 assemble_name (file, XSTR (x, 0)); 13428 else 13429 rs6000_output_function_entry (file, XSTR (x, 0)); 13430 return; 13431 13432 case 'Z': 13433 /* Like 'L', for last word of TImode/PTImode. */ 13434 if (REG_P (x)) 13435 fputs (reg_names[REGNO (x) + 3], file); 13436 else if (MEM_P (x)) 13437 { 13438 machine_mode mode = GET_MODE (x); 13439 if (GET_CODE (XEXP (x, 0)) == PRE_INC 13440 || GET_CODE (XEXP (x, 0)) == PRE_DEC) 13441 output_address (mode, plus_constant (Pmode, 13442 XEXP (XEXP (x, 0), 0), 12)); 13443 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 13444 output_address (mode, plus_constant (Pmode, 13445 XEXP (XEXP (x, 0), 0), 12)); 13446 else 13447 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0)); 13448 if (small_data_operand (x, GET_MODE (x))) 13449 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 13450 reg_names[SMALL_DATA_REG]); 13451 } 13452 return; 13453 13454 /* Print AltiVec memory operand. */ 13455 case 'y': 13456 { 13457 rtx tmp; 13458 13459 gcc_assert (MEM_P (x)); 13460 13461 tmp = XEXP (x, 0); 13462 13463 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x)) 13464 && GET_CODE (tmp) == AND 13465 && CONST_INT_P (XEXP (tmp, 1)) 13466 && INTVAL (XEXP (tmp, 1)) == -16) 13467 tmp = XEXP (tmp, 0); 13468 else if (VECTOR_MEM_VSX_P (GET_MODE (x)) 13469 && GET_CODE (tmp) == PRE_MODIFY) 13470 tmp = XEXP (tmp, 1); 13471 if (REG_P (tmp)) 13472 fprintf (file, "0,%s", reg_names[REGNO (tmp)]); 13473 else 13474 { 13475 if (GET_CODE (tmp) != PLUS 13476 || !REG_P (XEXP (tmp, 0)) 13477 || !REG_P (XEXP (tmp, 1))) 13478 { 13479 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint"); 13480 break; 13481 } 13482 13483 if (REGNO (XEXP (tmp, 0)) == 0) 13484 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ], 13485 reg_names[ REGNO (XEXP (tmp, 0)) ]); 13486 else 13487 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ], 13488 reg_names[ REGNO (XEXP (tmp, 1)) ]); 13489 } 13490 break; 13491 } 13492 13493 case 0: 13494 if (REG_P (x)) 13495 fprintf (file, "%s", reg_names[REGNO (x)]); 13496 else if (MEM_P (x)) 13497 { 13498 /* We need to handle PRE_INC and PRE_DEC here, since we need to 13499 know the width from the mode. */ 13500 if (GET_CODE (XEXP (x, 0)) == PRE_INC) 13501 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), 13502 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); 13503 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) 13504 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)), 13505 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); 13506 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) 13507 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); 13508 else 13509 output_address (GET_MODE (x), XEXP (x, 0)); 13510 } 13511 else if (toc_relative_expr_p (x, false, 13512 &tocrel_base_oac, &tocrel_offset_oac)) 13513 /* This hack along with a corresponding hack in 13514 rs6000_output_addr_const_extra arranges to output addends 13515 where the assembler expects to find them. eg. 13516 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4) 13517 without this hack would be output as "x@toc+4". We 13518 want "x+4@toc". */ 13519 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac)); 13520 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD) 13521 output_addr_const (file, XVECEXP (x, 0, 0)); 13522 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ) 13523 output_addr_const (file, XVECEXP (x, 0, 1)); 13524 else 13525 output_addr_const (file, x); 13526 return; 13527 13528 case '&': 13529 if (const char *name = get_some_local_dynamic_name ()) 13530 assemble_name (file, name); 13531 else 13532 output_operand_lossage ("'%%&' used without any " 13533 "local dynamic TLS references"); 13534 return; 13535 13536 default: 13537 output_operand_lossage ("invalid %%xn code"); 13538 } 13539} 13540 13541/* Print the address of an operand. */ 13542 13543void 13544print_operand_address (FILE *file, rtx x) 13545{ 13546 if (REG_P (x)) 13547 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]); 13548 13549 /* Is it a PC-relative address? */ 13550 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode)) 13551 { 13552 HOST_WIDE_INT offset; 13553 13554 if (GET_CODE (x) == CONST) 13555 x = XEXP (x, 0); 13556 13557 if (GET_CODE (x) == PLUS) 13558 { 13559 offset = INTVAL (XEXP (x, 1)); 13560 x = XEXP (x, 0); 13561 } 13562 else 13563 offset = 0; 13564 13565 output_addr_const (file, x); 13566 13567 if (offset) 13568 fprintf (file, "%+" PRId64, offset); 13569 13570 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x)) 13571 fprintf (file, "@got"); 13572 13573 fprintf (file, "@pcrel"); 13574 } 13575 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST 13576 || GET_CODE (x) == LABEL_REF) 13577 { 13578 output_addr_const (file, x); 13579 if (small_data_operand (x, GET_MODE (x))) 13580 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, 13581 reg_names[SMALL_DATA_REG]); 13582 else 13583 gcc_assert (!TARGET_TOC); 13584 } 13585 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) 13586 && REG_P (XEXP (x, 1))) 13587 { 13588 if (REGNO (XEXP (x, 0)) == 0) 13589 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ], 13590 reg_names[ REGNO (XEXP (x, 0)) ]); 13591 else 13592 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ], 13593 reg_names[ REGNO (XEXP (x, 1)) ]); 13594 } 13595 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) 13596 && CONST_INT_P (XEXP (x, 1))) 13597 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", 13598 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]); 13599#if TARGET_MACHO 13600 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) 13601 && CONSTANT_P (XEXP (x, 1))) 13602 { 13603 fprintf (file, "lo16("); 13604 output_addr_const (file, XEXP (x, 1)); 13605 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); 13606 } 13607#endif 13608#if TARGET_ELF 13609 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) 13610 && CONSTANT_P (XEXP (x, 1))) 13611 { 13612 output_addr_const (file, XEXP (x, 1)); 13613 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); 13614 } 13615#endif 13616 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac)) 13617 { 13618 /* This hack along with a corresponding hack in 13619 rs6000_output_addr_const_extra arranges to output addends 13620 where the assembler expects to find them. eg. 13621 (lo_sum (reg 9) 13622 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8)) 13623 without this hack would be output as "x@toc+8@l(9)". We 13624 want "x+8@toc@l(9)". */ 13625 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac)); 13626 if (GET_CODE (x) == LO_SUM) 13627 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]); 13628 else 13629 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]); 13630 } 13631 else 13632 output_addr_const (file, x); 13633} 13634 13635/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ 13636 13637bool 13638rs6000_output_addr_const_extra (FILE *file, rtx x) 13639{ 13640 if (GET_CODE (x) == UNSPEC) 13641 switch (XINT (x, 1)) 13642 { 13643 case UNSPEC_TOCREL: 13644 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0)) 13645 && REG_P (XVECEXP (x, 0, 1)) 13646 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER); 13647 output_addr_const (file, XVECEXP (x, 0, 0)); 13648 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx) 13649 { 13650 if (INTVAL (tocrel_offset_oac) >= 0) 13651 fprintf (file, "+"); 13652 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac)); 13653 } 13654 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC)) 13655 { 13656 putc ('-', file); 13657 assemble_name (file, toc_label_name); 13658 need_toc_init = 1; 13659 } 13660 else if (TARGET_ELF) 13661 fputs ("@toc", file); 13662 return true; 13663 13664#if TARGET_MACHO 13665 case UNSPEC_MACHOPIC_OFFSET: 13666 output_addr_const (file, XVECEXP (x, 0, 0)); 13667 putc ('-', file); 13668 machopic_output_function_base_name (file); 13669 return true; 13670#endif 13671 } 13672 return false; 13673} 13674 13675/* Target hook for assembling integer objects. The PowerPC version has 13676 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP 13677 is defined. It also needs to handle DI-mode objects on 64-bit 13678 targets. */ 13679 13680static bool 13681rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) 13682{ 13683#ifdef RELOCATABLE_NEEDS_FIXUP 13684 /* Special handling for SI values. */ 13685 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p) 13686 { 13687 static int recurse = 0; 13688 13689 /* For -mrelocatable, we mark all addresses that need to be fixed up in 13690 the .fixup section. Since the TOC section is already relocated, we 13691 don't need to mark it here. We used to skip the text section, but it 13692 should never be valid for relocated addresses to be placed in the text 13693 section. */ 13694 if (DEFAULT_ABI == ABI_V4 13695 && (TARGET_RELOCATABLE || flag_pic > 1) 13696 && in_section != toc_section 13697 && !recurse 13698 && !CONST_SCALAR_INT_P (x) 13699 && CONSTANT_P (x)) 13700 { 13701 char buf[256]; 13702 13703 recurse = 1; 13704 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno); 13705 fixuplabelno++; 13706 ASM_OUTPUT_LABEL (asm_out_file, buf); 13707 fprintf (asm_out_file, "\t.long\t("); 13708 output_addr_const (asm_out_file, x); 13709 fprintf (asm_out_file, ")@fixup\n"); 13710 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n"); 13711 ASM_OUTPUT_ALIGN (asm_out_file, 2); 13712 fprintf (asm_out_file, "\t.long\t"); 13713 assemble_name (asm_out_file, buf); 13714 fprintf (asm_out_file, "\n\t.previous\n"); 13715 recurse = 0; 13716 return true; 13717 } 13718 /* Remove initial .'s to turn a -mcall-aixdesc function 13719 address into the address of the descriptor, not the function 13720 itself. */ 13721 else if (SYMBOL_REF_P (x) 13722 && XSTR (x, 0)[0] == '.' 13723 && DEFAULT_ABI == ABI_AIX) 13724 { 13725 const char *name = XSTR (x, 0); 13726 while (*name == '.') 13727 name++; 13728 13729 fprintf (asm_out_file, "\t.long\t%s\n", name); 13730 return true; 13731 } 13732 } 13733#endif /* RELOCATABLE_NEEDS_FIXUP */ 13734 return default_assemble_integer (x, size, aligned_p); 13735} 13736 13737/* Return a template string for assembly to emit when making an 13738 external call. FUNOP is the call mem argument operand number. */ 13739 13740static const char * 13741rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall) 13742{ 13743 /* -Wformat-overflow workaround, without which gcc thinks that %u 13744 might produce 10 digits. */ 13745 gcc_assert (funop <= MAX_RECOG_OPERANDS); 13746 13747 char arg[12]; 13748 arg[0] = 0; 13749 if (GET_CODE (operands[funop + 1]) == UNSPEC) 13750 { 13751 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD) 13752 sprintf (arg, "(%%%u@tlsgd)", funop + 1); 13753 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD) 13754 sprintf (arg, "(%%&@tlsld)"); 13755 } 13756 13757 /* The magic 32768 offset here corresponds to the offset of 13758 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */ 13759 char z[11]; 13760 sprintf (z, "%%z%u%s", funop, 13761 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2 13762 ? "+32768" : "")); 13763 13764 static char str[32]; /* 1 spare */ 13765 if (rs6000_pcrel_p (cfun)) 13766 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg); 13767 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 13768 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg, 13769 sibcall ? "" : "\n\tnop"); 13770 else if (DEFAULT_ABI == ABI_V4) 13771 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg, 13772 flag_pic ? "@plt" : ""); 13773#if TARGET_MACHO 13774 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */ 13775 else if (DEFAULT_ABI == ABI_DARWIN) 13776 { 13777 /* The cookie is in operand func+2. */ 13778 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT); 13779 int cookie = INTVAL (operands[funop + 2]); 13780 if (cookie & CALL_LONG) 13781 { 13782 tree funname = get_identifier (XSTR (operands[funop], 0)); 13783 tree labelname = get_prev_label (funname); 13784 gcc_checking_assert (labelname && !sibcall); 13785 13786 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl' 13787 instruction will reach 'foo', otherwise link as 'bl L42'". 13788 "L42" should be a 'branch island', that will do a far jump to 13789 'foo'. Branch islands are generated in 13790 macho_branch_islands(). */ 13791 sprintf (str, "jbsr %%z%u,%.10s", funop, 13792 IDENTIFIER_POINTER (labelname)); 13793 } 13794 else 13795 /* Same as AIX or ELFv2, except to keep backwards compat, no nop 13796 after the call. */ 13797 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg); 13798 } 13799#endif 13800 else 13801 gcc_unreachable (); 13802 return str; 13803} 13804 13805const char * 13806rs6000_call_template (rtx *operands, unsigned int funop) 13807{ 13808 return rs6000_call_template_1 (operands, funop, false); 13809} 13810 13811const char * 13812rs6000_sibcall_template (rtx *operands, unsigned int funop) 13813{ 13814 return rs6000_call_template_1 (operands, funop, true); 13815} 13816 13817/* As above, for indirect calls. */ 13818 13819static const char * 13820rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop, 13821 bool sibcall) 13822{ 13823 /* -Wformat-overflow workaround, without which gcc thinks that %u 13824 might produce 10 digits. Note that -Wformat-overflow will not 13825 currently warn here for str[], so do not rely on a warning to 13826 ensure str[] is correctly sized. */ 13827 gcc_assert (funop <= MAX_RECOG_OPERANDS); 13828 13829 /* Currently, funop is either 0 or 1. The maximum string is always 13830 a !speculate 64-bit __tls_get_addr call. 13831 13832 ABI_ELFv2, pcrel: 13833 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13834 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t 13835 . 9 crset 2\n\t 13836 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13837 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t 13838 . 8 beq%T1l- 13839 .--- 13840 .142 13841 13842 ABI_AIX: 13843 . 9 ld 2,%3\n\t 13844 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13845 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t 13846 . 9 crset 2\n\t 13847 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13848 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t 13849 . 10 beq%T1l-\n\t 13850 . 10 ld 2,%4(1) 13851 .--- 13852 .151 13853 13854 ABI_ELFv2: 13855 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13856 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t 13857 . 9 crset 2\n\t 13858 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13859 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t 13860 . 10 beq%T1l-\n\t 13861 . 10 ld 2,%3(1) 13862 .--- 13863 .142 13864 13865 ABI_V4: 13866 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13867 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t 13868 . 9 crset 2\n\t 13869 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t 13870 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t 13871 . 8 beq%T1l- 13872 .--- 13873 .141 */ 13874 static char str[160]; /* 8 spare */ 13875 char *s = str; 13876 const char *ptrload = TARGET_64BIT ? "d" : "wz"; 13877 13878 if (DEFAULT_ABI == ABI_AIX) 13879 s += sprintf (s, 13880 "l%s 2,%%%u\n\t", 13881 ptrload, funop + 3); 13882 13883 /* We don't need the extra code to stop indirect call speculation if 13884 calling via LR. */ 13885 bool speculate = (TARGET_MACHO 13886 || rs6000_speculate_indirect_jumps 13887 || (REG_P (operands[funop]) 13888 && REGNO (operands[funop]) == LR_REGNO)); 13889 13890 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC) 13891 { 13892 const char *rel64 = TARGET_64BIT ? "64" : ""; 13893 char tls[29]; 13894 tls[0] = 0; 13895 if (GET_CODE (operands[funop + 1]) == UNSPEC) 13896 { 13897 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD) 13898 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t", 13899 rel64, funop + 1); 13900 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD) 13901 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t", 13902 rel64); 13903 } 13904 13905 const char *notoc = rs6000_pcrel_p (cfun) ? "_NOTOC" : ""; 13906 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT 13907 && flag_pic == 2 ? "+32768" : ""); 13908 if (!speculate) 13909 { 13910 s += sprintf (s, 13911 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t", 13912 tls, rel64, notoc, funop, addend); 13913 s += sprintf (s, "crset 2\n\t"); 13914 } 13915 s += sprintf (s, 13916 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t", 13917 tls, rel64, notoc, funop, addend); 13918 } 13919 else if (!speculate) 13920 s += sprintf (s, "crset 2\n\t"); 13921 13922 if (rs6000_pcrel_p (cfun)) 13923 { 13924 if (speculate) 13925 sprintf (s, "b%%T%ul", funop); 13926 else 13927 sprintf (s, "beq%%T%ul-", funop); 13928 } 13929 else if (DEFAULT_ABI == ABI_AIX) 13930 { 13931 if (speculate) 13932 sprintf (s, 13933 "b%%T%ul\n\t" 13934 "l%s 2,%%%u(1)", 13935 funop, ptrload, funop + 4); 13936 else 13937 sprintf (s, 13938 "beq%%T%ul-\n\t" 13939 "l%s 2,%%%u(1)", 13940 funop, ptrload, funop + 4); 13941 } 13942 else if (DEFAULT_ABI == ABI_ELFv2) 13943 { 13944 if (speculate) 13945 sprintf (s, 13946 "b%%T%ul\n\t" 13947 "l%s 2,%%%u(1)", 13948 funop, ptrload, funop + 3); 13949 else 13950 sprintf (s, 13951 "beq%%T%ul-\n\t" 13952 "l%s 2,%%%u(1)", 13953 funop, ptrload, funop + 3); 13954 } 13955 else 13956 { 13957 if (speculate) 13958 sprintf (s, 13959 "b%%T%u%s", 13960 funop, sibcall ? "" : "l"); 13961 else 13962 sprintf (s, 13963 "beq%%T%u%s-%s", 13964 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : ""); 13965 } 13966 return str; 13967} 13968 13969const char * 13970rs6000_indirect_call_template (rtx *operands, unsigned int funop) 13971{ 13972 return rs6000_indirect_call_template_1 (operands, funop, false); 13973} 13974 13975const char * 13976rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop) 13977{ 13978 return rs6000_indirect_call_template_1 (operands, funop, true); 13979} 13980 13981#if HAVE_AS_PLTSEQ 13982/* Output indirect call insns. WHICH identifies the type of sequence. */ 13983const char * 13984rs6000_pltseq_template (rtx *operands, int which) 13985{ 13986 const char *rel64 = TARGET_64BIT ? "64" : ""; 13987 char tls[30]; 13988 tls[0] = 0; 13989 if (GET_CODE (operands[3]) == UNSPEC) 13990 { 13991 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4'; 13992 if (XINT (operands[3], 1) == UNSPEC_TLSGD) 13993 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t", 13994 off, rel64); 13995 else if (XINT (operands[3], 1) == UNSPEC_TLSLD) 13996 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t", 13997 off, rel64); 13998 } 13999 14000 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4); 14001 static char str[96]; /* 10 spare */ 14002 char off = WORDS_BIG_ENDIAN ? '2' : '4'; 14003 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT 14004 && flag_pic == 2 ? "+32768" : ""); 14005 switch (which) 14006 { 14007 case RS6000_PLTSEQ_TOCSAVE: 14008 sprintf (str, 14009 "st%s\n\t" 14010 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2", 14011 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)", 14012 tls, rel64); 14013 break; 14014 case RS6000_PLTSEQ_PLT16_HA: 14015 if (DEFAULT_ABI == ABI_V4 && !flag_pic) 14016 sprintf (str, 14017 "lis %%0,0\n\t" 14018 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2", 14019 tls, off, rel64); 14020 else 14021 sprintf (str, 14022 "addis %%0,%%1,0\n\t" 14023 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s", 14024 tls, off, rel64, addend); 14025 break; 14026 case RS6000_PLTSEQ_PLT16_LO: 14027 sprintf (str, 14028 "l%s %%0,0(%%1)\n\t" 14029 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s", 14030 TARGET_64BIT ? "d" : "wz", 14031 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend); 14032 break; 14033 case RS6000_PLTSEQ_MTCTR: 14034 sprintf (str, 14035 "mtctr %%1\n\t" 14036 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s", 14037 tls, rel64, addend); 14038 break; 14039 case RS6000_PLTSEQ_PLT_PCREL34: 14040 sprintf (str, 14041 "pl%s %%0,0(0),1\n\t" 14042 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2", 14043 TARGET_64BIT ? "d" : "wz", 14044 tls, rel64); 14045 break; 14046 default: 14047 gcc_unreachable (); 14048 } 14049 return str; 14050} 14051#endif 14052 14053#if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO 14054/* Emit an assembler directive to set symbol visibility for DECL to 14055 VISIBILITY_TYPE. */ 14056 14057static void 14058rs6000_assemble_visibility (tree decl, int vis) 14059{ 14060 if (TARGET_XCOFF) 14061 return; 14062 14063 /* Functions need to have their entry point symbol visibility set as 14064 well as their descriptor symbol visibility. */ 14065 if (DEFAULT_ABI == ABI_AIX 14066 && DOT_SYMBOLS 14067 && TREE_CODE (decl) == FUNCTION_DECL) 14068 { 14069 static const char * const visibility_types[] = { 14070 NULL, "protected", "hidden", "internal" 14071 }; 14072 14073 const char *name, *type; 14074 14075 name = ((* targetm.strip_name_encoding) 14076 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); 14077 type = visibility_types[vis]; 14078 14079 fprintf (asm_out_file, "\t.%s\t%s\n", type, name); 14080 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name); 14081 } 14082 else 14083 default_assemble_visibility (decl, vis); 14084} 14085#endif 14086 14087enum rtx_code 14088rs6000_reverse_condition (machine_mode mode, enum rtx_code code) 14089{ 14090 /* Reversal of FP compares takes care -- an ordered compare 14091 becomes an unordered compare and vice versa. */ 14092 if (mode == CCFPmode 14093 && (!flag_finite_math_only 14094 || code == UNLT || code == UNLE || code == UNGT || code == UNGE 14095 || code == UNEQ || code == LTGT)) 14096 return reverse_condition_maybe_unordered (code); 14097 else 14098 return reverse_condition (code); 14099} 14100 14101/* Generate a compare for CODE. Return a brand-new rtx that 14102 represents the result of the compare. */ 14103 14104static rtx 14105rs6000_generate_compare (rtx cmp, machine_mode mode) 14106{ 14107 machine_mode comp_mode; 14108 rtx compare_result; 14109 enum rtx_code code = GET_CODE (cmp); 14110 rtx op0 = XEXP (cmp, 0); 14111 rtx op1 = XEXP (cmp, 1); 14112 14113 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) 14114 comp_mode = CCmode; 14115 else if (FLOAT_MODE_P (mode)) 14116 comp_mode = CCFPmode; 14117 else if (code == GTU || code == LTU 14118 || code == GEU || code == LEU) 14119 comp_mode = CCUNSmode; 14120 else if ((code == EQ || code == NE) 14121 && unsigned_reg_p (op0) 14122 && (unsigned_reg_p (op1) 14123 || (CONST_INT_P (op1) && INTVAL (op1) != 0))) 14124 /* These are unsigned values, perhaps there will be a later 14125 ordering compare that can be shared with this one. */ 14126 comp_mode = CCUNSmode; 14127 else 14128 comp_mode = CCmode; 14129 14130 /* If we have an unsigned compare, make sure we don't have a signed value as 14131 an immediate. */ 14132 if (comp_mode == CCUNSmode && CONST_INT_P (op1) 14133 && INTVAL (op1) < 0) 14134 { 14135 op0 = copy_rtx_if_shared (op0); 14136 op1 = force_reg (GET_MODE (op0), op1); 14137 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1); 14138 } 14139 14140 /* First, the compare. */ 14141 compare_result = gen_reg_rtx (comp_mode); 14142 14143 /* IEEE 128-bit support in VSX registers when we do not have hardware 14144 support. */ 14145 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) 14146 { 14147 rtx libfunc = NULL_RTX; 14148 bool check_nan = false; 14149 rtx dest; 14150 14151 switch (code) 14152 { 14153 case EQ: 14154 case NE: 14155 libfunc = optab_libfunc (eq_optab, mode); 14156 break; 14157 14158 case GT: 14159 case GE: 14160 libfunc = optab_libfunc (ge_optab, mode); 14161 break; 14162 14163 case LT: 14164 case LE: 14165 libfunc = optab_libfunc (le_optab, mode); 14166 break; 14167 14168 case UNORDERED: 14169 case ORDERED: 14170 libfunc = optab_libfunc (unord_optab, mode); 14171 code = (code == UNORDERED) ? NE : EQ; 14172 break; 14173 14174 case UNGE: 14175 case UNGT: 14176 check_nan = true; 14177 libfunc = optab_libfunc (ge_optab, mode); 14178 code = (code == UNGE) ? GE : GT; 14179 break; 14180 14181 case UNLE: 14182 case UNLT: 14183 check_nan = true; 14184 libfunc = optab_libfunc (le_optab, mode); 14185 code = (code == UNLE) ? LE : LT; 14186 break; 14187 14188 case UNEQ: 14189 case LTGT: 14190 check_nan = true; 14191 libfunc = optab_libfunc (eq_optab, mode); 14192 code = (code = UNEQ) ? EQ : NE; 14193 break; 14194 14195 default: 14196 gcc_unreachable (); 14197 } 14198 14199 gcc_assert (libfunc); 14200 14201 if (!check_nan) 14202 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, 14203 SImode, op0, mode, op1, mode); 14204 14205 /* The library signals an exception for signalling NaNs, so we need to 14206 handle isgreater, etc. by first checking isordered. */ 14207 else 14208 { 14209 rtx ne_rtx, normal_dest, unord_dest; 14210 rtx unord_func = optab_libfunc (unord_optab, mode); 14211 rtx join_label = gen_label_rtx (); 14212 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); 14213 rtx unord_cmp = gen_reg_rtx (comp_mode); 14214 14215 14216 /* Test for either value being a NaN. */ 14217 gcc_assert (unord_func); 14218 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST, 14219 SImode, op0, mode, op1, mode); 14220 14221 /* Set value (0) if either value is a NaN, and jump to the join 14222 label. */ 14223 dest = gen_reg_rtx (SImode); 14224 emit_move_insn (dest, const1_rtx); 14225 emit_insn (gen_rtx_SET (unord_cmp, 14226 gen_rtx_COMPARE (comp_mode, unord_dest, 14227 const0_rtx))); 14228 14229 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx); 14230 emit_jump_insn (gen_rtx_SET (pc_rtx, 14231 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, 14232 join_ref, 14233 pc_rtx))); 14234 14235 /* Do the normal comparison, knowing that the values are not 14236 NaNs. */ 14237 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, 14238 SImode, op0, mode, op1, mode); 14239 14240 emit_insn (gen_cstoresi4 (dest, 14241 gen_rtx_fmt_ee (code, SImode, normal_dest, 14242 const0_rtx), 14243 normal_dest, const0_rtx)); 14244 14245 /* Join NaN and non-Nan paths. Compare dest against 0. */ 14246 emit_label (join_label); 14247 code = NE; 14248 } 14249 14250 emit_insn (gen_rtx_SET (compare_result, 14251 gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); 14252 } 14253 14254 else 14255 { 14256 /* Generate XLC-compatible TFmode compare as PARALLEL with extra 14257 CLOBBERs to match cmptf_internal2 pattern. */ 14258 if (comp_mode == CCFPmode && TARGET_XL_COMPAT 14259 && FLOAT128_IBM_P (GET_MODE (op0)) 14260 && TARGET_HARD_FLOAT) 14261 emit_insn (gen_rtx_PARALLEL (VOIDmode, 14262 gen_rtvec (10, 14263 gen_rtx_SET (compare_result, 14264 gen_rtx_COMPARE (comp_mode, op0, op1)), 14265 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14266 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14267 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14268 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14269 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14270 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14271 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14272 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), 14273 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode))))); 14274 else if (GET_CODE (op1) == UNSPEC 14275 && XINT (op1, 1) == UNSPEC_SP_TEST) 14276 { 14277 rtx op1b = XVECEXP (op1, 0, 0); 14278 comp_mode = CCEQmode; 14279 compare_result = gen_reg_rtx (CCEQmode); 14280 if (TARGET_64BIT) 14281 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b)); 14282 else 14283 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); 14284 } 14285 else 14286 emit_insn (gen_rtx_SET (compare_result, 14287 gen_rtx_COMPARE (comp_mode, op0, op1))); 14288 } 14289 14290 validate_condition_mode (code, GET_MODE (compare_result)); 14291 14292 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx); 14293} 14294 14295 14296/* Return the diagnostic message string if the binary operation OP is 14297 not permitted on TYPE1 and TYPE2, NULL otherwise. */ 14298 14299static const char* 14300rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED, 14301 const_tree type1, 14302 const_tree type2) 14303{ 14304 machine_mode mode1 = TYPE_MODE (type1); 14305 machine_mode mode2 = TYPE_MODE (type2); 14306 14307 /* For complex modes, use the inner type. */ 14308 if (COMPLEX_MODE_P (mode1)) 14309 mode1 = GET_MODE_INNER (mode1); 14310 14311 if (COMPLEX_MODE_P (mode2)) 14312 mode2 = GET_MODE_INNER (mode2); 14313 14314 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended 14315 double to intermix unless -mfloat128-convert. */ 14316 if (mode1 == mode2) 14317 return NULL; 14318 14319 if (!TARGET_FLOAT128_CVT) 14320 { 14321 if ((mode1 == KFmode && mode2 == IFmode) 14322 || (mode1 == IFmode && mode2 == KFmode)) 14323 return N_("__float128 and __ibm128 cannot be used in the same " 14324 "expression"); 14325 14326 if (TARGET_IEEEQUAD 14327 && ((mode1 == IFmode && mode2 == TFmode) 14328 || (mode1 == TFmode && mode2 == IFmode))) 14329 return N_("__ibm128 and long double cannot be used in the same " 14330 "expression"); 14331 14332 if (!TARGET_IEEEQUAD 14333 && ((mode1 == KFmode && mode2 == TFmode) 14334 || (mode1 == TFmode && mode2 == KFmode))) 14335 return N_("__float128 and long double cannot be used in the same " 14336 "expression"); 14337 } 14338 14339 return NULL; 14340} 14341 14342 14343/* Expand floating point conversion to/from __float128 and __ibm128. */ 14344 14345void 14346rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) 14347{ 14348 machine_mode dest_mode = GET_MODE (dest); 14349 machine_mode src_mode = GET_MODE (src); 14350 convert_optab cvt = unknown_optab; 14351 bool do_move = false; 14352 rtx libfunc = NULL_RTX; 14353 rtx dest2; 14354 typedef rtx (*rtx_2func_t) (rtx, rtx); 14355 rtx_2func_t hw_convert = (rtx_2func_t)0; 14356 size_t kf_or_tf; 14357 14358 struct hw_conv_t { 14359 rtx_2func_t from_df; 14360 rtx_2func_t from_sf; 14361 rtx_2func_t from_si_sign; 14362 rtx_2func_t from_si_uns; 14363 rtx_2func_t from_di_sign; 14364 rtx_2func_t from_di_uns; 14365 rtx_2func_t to_df; 14366 rtx_2func_t to_sf; 14367 rtx_2func_t to_si_sign; 14368 rtx_2func_t to_si_uns; 14369 rtx_2func_t to_di_sign; 14370 rtx_2func_t to_di_uns; 14371 } hw_conversions[2] = { 14372 /* convertions to/from KFmode */ 14373 { 14374 gen_extenddfkf2_hw, /* KFmode <- DFmode. */ 14375 gen_extendsfkf2_hw, /* KFmode <- SFmode. */ 14376 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ 14377 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ 14378 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ 14379 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ 14380 gen_trunckfdf2_hw, /* DFmode <- KFmode. */ 14381 gen_trunckfsf2_hw, /* SFmode <- KFmode. */ 14382 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ 14383 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ 14384 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ 14385 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ 14386 }, 14387 14388 /* convertions to/from TFmode */ 14389 { 14390 gen_extenddftf2_hw, /* TFmode <- DFmode. */ 14391 gen_extendsftf2_hw, /* TFmode <- SFmode. */ 14392 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ 14393 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ 14394 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ 14395 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ 14396 gen_trunctfdf2_hw, /* DFmode <- TFmode. */ 14397 gen_trunctfsf2_hw, /* SFmode <- TFmode. */ 14398 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ 14399 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ 14400 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ 14401 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ 14402 }, 14403 }; 14404 14405 if (dest_mode == src_mode) 14406 gcc_unreachable (); 14407 14408 /* Eliminate memory operations. */ 14409 if (MEM_P (src)) 14410 src = force_reg (src_mode, src); 14411 14412 if (MEM_P (dest)) 14413 { 14414 rtx tmp = gen_reg_rtx (dest_mode); 14415 rs6000_expand_float128_convert (tmp, src, unsigned_p); 14416 rs6000_emit_move (dest, tmp, dest_mode); 14417 return; 14418 } 14419 14420 /* Convert to IEEE 128-bit floating point. */ 14421 if (FLOAT128_IEEE_P (dest_mode)) 14422 { 14423 if (dest_mode == KFmode) 14424 kf_or_tf = 0; 14425 else if (dest_mode == TFmode) 14426 kf_or_tf = 1; 14427 else 14428 gcc_unreachable (); 14429 14430 switch (src_mode) 14431 { 14432 case E_DFmode: 14433 cvt = sext_optab; 14434 hw_convert = hw_conversions[kf_or_tf].from_df; 14435 break; 14436 14437 case E_SFmode: 14438 cvt = sext_optab; 14439 hw_convert = hw_conversions[kf_or_tf].from_sf; 14440 break; 14441 14442 case E_KFmode: 14443 case E_IFmode: 14444 case E_TFmode: 14445 if (FLOAT128_IBM_P (src_mode)) 14446 cvt = sext_optab; 14447 else 14448 do_move = true; 14449 break; 14450 14451 case E_SImode: 14452 if (unsigned_p) 14453 { 14454 cvt = ufloat_optab; 14455 hw_convert = hw_conversions[kf_or_tf].from_si_uns; 14456 } 14457 else 14458 { 14459 cvt = sfloat_optab; 14460 hw_convert = hw_conversions[kf_or_tf].from_si_sign; 14461 } 14462 break; 14463 14464 case E_DImode: 14465 if (unsigned_p) 14466 { 14467 cvt = ufloat_optab; 14468 hw_convert = hw_conversions[kf_or_tf].from_di_uns; 14469 } 14470 else 14471 { 14472 cvt = sfloat_optab; 14473 hw_convert = hw_conversions[kf_or_tf].from_di_sign; 14474 } 14475 break; 14476 14477 default: 14478 gcc_unreachable (); 14479 } 14480 } 14481 14482 /* Convert from IEEE 128-bit floating point. */ 14483 else if (FLOAT128_IEEE_P (src_mode)) 14484 { 14485 if (src_mode == KFmode) 14486 kf_or_tf = 0; 14487 else if (src_mode == TFmode) 14488 kf_or_tf = 1; 14489 else 14490 gcc_unreachable (); 14491 14492 switch (dest_mode) 14493 { 14494 case E_DFmode: 14495 cvt = trunc_optab; 14496 hw_convert = hw_conversions[kf_or_tf].to_df; 14497 break; 14498 14499 case E_SFmode: 14500 cvt = trunc_optab; 14501 hw_convert = hw_conversions[kf_or_tf].to_sf; 14502 break; 14503 14504 case E_KFmode: 14505 case E_IFmode: 14506 case E_TFmode: 14507 if (FLOAT128_IBM_P (dest_mode)) 14508 cvt = trunc_optab; 14509 else 14510 do_move = true; 14511 break; 14512 14513 case E_SImode: 14514 if (unsigned_p) 14515 { 14516 cvt = ufix_optab; 14517 hw_convert = hw_conversions[kf_or_tf].to_si_uns; 14518 } 14519 else 14520 { 14521 cvt = sfix_optab; 14522 hw_convert = hw_conversions[kf_or_tf].to_si_sign; 14523 } 14524 break; 14525 14526 case E_DImode: 14527 if (unsigned_p) 14528 { 14529 cvt = ufix_optab; 14530 hw_convert = hw_conversions[kf_or_tf].to_di_uns; 14531 } 14532 else 14533 { 14534 cvt = sfix_optab; 14535 hw_convert = hw_conversions[kf_or_tf].to_di_sign; 14536 } 14537 break; 14538 14539 default: 14540 gcc_unreachable (); 14541 } 14542 } 14543 14544 /* Both IBM format. */ 14545 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode)) 14546 do_move = true; 14547 14548 else 14549 gcc_unreachable (); 14550 14551 /* Handle conversion between TFmode/KFmode/IFmode. */ 14552 if (do_move) 14553 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src))); 14554 14555 /* Handle conversion if we have hardware support. */ 14556 else if (TARGET_FLOAT128_HW && hw_convert) 14557 emit_insn ((hw_convert) (dest, src)); 14558 14559 /* Call an external function to do the conversion. */ 14560 else if (cvt != unknown_optab) 14561 { 14562 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); 14563 gcc_assert (libfunc != NULL_RTX); 14564 14565 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 14566 src, src_mode); 14567 14568 gcc_assert (dest2 != NULL_RTX); 14569 if (!rtx_equal_p (dest, dest2)) 14570 emit_move_insn (dest, dest2); 14571 } 14572 14573 else 14574 gcc_unreachable (); 14575 14576 return; 14577} 14578 14579 14580/* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH 14581 can be used as that dest register. Return the dest register. */ 14582 14583rtx 14584rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch) 14585{ 14586 if (op2 == const0_rtx) 14587 return op1; 14588 14589 if (GET_CODE (scratch) == SCRATCH) 14590 scratch = gen_reg_rtx (mode); 14591 14592 if (logical_operand (op2, mode)) 14593 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2))); 14594 else 14595 emit_insn (gen_rtx_SET (scratch, 14596 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2)))); 14597 14598 return scratch; 14599} 14600 14601/* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that 14602 requires this. The result is mode MODE. */ 14603rtx 14604rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x) 14605{ 14606 rtx cond[2]; 14607 int n = 0; 14608 if (code == LTGT || code == LE || code == UNLT) 14609 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx); 14610 if (code == LTGT || code == GE || code == UNGT) 14611 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx); 14612 if (code == LE || code == GE || code == UNEQ) 14613 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx); 14614 if (code == UNLT || code == UNGT || code == UNEQ) 14615 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx); 14616 14617 gcc_assert (n == 2); 14618 14619 rtx cc = gen_reg_rtx (CCEQmode); 14620 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]); 14621 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x)); 14622 14623 return cc; 14624} 14625 14626void 14627rs6000_emit_sCOND (machine_mode mode, rtx operands[]) 14628{ 14629 rtx condition_rtx = rs6000_generate_compare (operands[1], mode); 14630 rtx_code cond_code = GET_CODE (condition_rtx); 14631 14632 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode) 14633 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW)) 14634 ; 14635 else if (cond_code == NE 14636 || cond_code == GE || cond_code == LE 14637 || cond_code == GEU || cond_code == LEU 14638 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) 14639 { 14640 rtx not_result = gen_reg_rtx (CCEQmode); 14641 rtx not_op, rev_cond_rtx; 14642 machine_mode cc_mode; 14643 14644 cc_mode = GET_MODE (XEXP (condition_rtx, 0)); 14645 14646 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), 14647 SImode, XEXP (condition_rtx, 0), const0_rtx); 14648 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); 14649 emit_insn (gen_rtx_SET (not_result, not_op)); 14650 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); 14651 } 14652 14653 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0)); 14654 if (op_mode == VOIDmode) 14655 op_mode = GET_MODE (XEXP (operands[1], 1)); 14656 14657 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode))) 14658 { 14659 PUT_MODE (condition_rtx, DImode); 14660 convert_move (operands[0], condition_rtx, 0); 14661 } 14662 else 14663 { 14664 PUT_MODE (condition_rtx, SImode); 14665 emit_insn (gen_rtx_SET (operands[0], condition_rtx)); 14666 } 14667} 14668 14669/* Emit a branch of kind CODE to location LOC. */ 14670 14671void 14672rs6000_emit_cbranch (machine_mode mode, rtx operands[]) 14673{ 14674 rtx condition_rtx = rs6000_generate_compare (operands[0], mode); 14675 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); 14676 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx); 14677 emit_jump_insn (gen_rtx_SET (pc_rtx, ite)); 14678} 14679 14680/* Return the string to output a conditional branch to LABEL, which is 14681 the operand template of the label, or NULL if the branch is really a 14682 conditional return. 14683 14684 OP is the conditional expression. XEXP (OP, 0) is assumed to be a 14685 condition code register and its mode specifies what kind of 14686 comparison we made. 14687 14688 REVERSED is nonzero if we should reverse the sense of the comparison. 14689 14690 INSN is the insn. */ 14691 14692char * 14693output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn) 14694{ 14695 static char string[64]; 14696 enum rtx_code code = GET_CODE (op); 14697 rtx cc_reg = XEXP (op, 0); 14698 machine_mode mode = GET_MODE (cc_reg); 14699 int cc_regno = REGNO (cc_reg) - CR0_REGNO; 14700 int need_longbranch = label != NULL && get_attr_length (insn) == 8; 14701 int really_reversed = reversed ^ need_longbranch; 14702 char *s = string; 14703 const char *ccode; 14704 const char *pred; 14705 rtx note; 14706 14707 validate_condition_mode (code, mode); 14708 14709 /* Work out which way this really branches. We could use 14710 reverse_condition_maybe_unordered here always but this 14711 makes the resulting assembler clearer. */ 14712 if (really_reversed) 14713 { 14714 /* Reversal of FP compares takes care -- an ordered compare 14715 becomes an unordered compare and vice versa. */ 14716 if (mode == CCFPmode) 14717 code = reverse_condition_maybe_unordered (code); 14718 else 14719 code = reverse_condition (code); 14720 } 14721 14722 switch (code) 14723 { 14724 /* Not all of these are actually distinct opcodes, but 14725 we distinguish them for clarity of the resulting assembler. */ 14726 case NE: case LTGT: 14727 ccode = "ne"; break; 14728 case EQ: case UNEQ: 14729 ccode = "eq"; break; 14730 case GE: case GEU: 14731 ccode = "ge"; break; 14732 case GT: case GTU: case UNGT: 14733 ccode = "gt"; break; 14734 case LE: case LEU: 14735 ccode = "le"; break; 14736 case LT: case LTU: case UNLT: 14737 ccode = "lt"; break; 14738 case UNORDERED: ccode = "un"; break; 14739 case ORDERED: ccode = "nu"; break; 14740 case UNGE: ccode = "nl"; break; 14741 case UNLE: ccode = "ng"; break; 14742 default: 14743 gcc_unreachable (); 14744 } 14745 14746 /* Maybe we have a guess as to how likely the branch is. */ 14747 pred = ""; 14748 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX); 14749 if (note != NULL_RTX) 14750 { 14751 /* PROB is the difference from 50%. */ 14752 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0)) 14753 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2; 14754 14755 /* Only hint for highly probable/improbable branches on newer cpus when 14756 we have real profile data, as static prediction overrides processor 14757 dynamic prediction. For older cpus we may as well always hint, but 14758 assume not taken for branches that are very close to 50% as a 14759 mispredicted taken branch is more expensive than a 14760 mispredicted not-taken branch. */ 14761 if (rs6000_always_hint 14762 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48 14763 && (profile_status_for_fn (cfun) != PROFILE_GUESSED) 14764 && br_prob_note_reliable_p (note))) 14765 { 14766 if (abs (prob) > REG_BR_PROB_BASE / 20 14767 && ((prob > 0) ^ need_longbranch)) 14768 pred = "+"; 14769 else 14770 pred = "-"; 14771 } 14772 } 14773 14774 if (label == NULL) 14775 s += sprintf (s, "b%slr%s ", ccode, pred); 14776 else 14777 s += sprintf (s, "b%s%s ", ccode, pred); 14778 14779 /* We need to escape any '%' characters in the reg_names string. 14780 Assume they'd only be the first character.... */ 14781 if (reg_names[cc_regno + CR0_REGNO][0] == '%') 14782 *s++ = '%'; 14783 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]); 14784 14785 if (label != NULL) 14786 { 14787 /* If the branch distance was too far, we may have to use an 14788 unconditional branch to go the distance. */ 14789 if (need_longbranch) 14790 s += sprintf (s, ",$+8\n\tb %s", label); 14791 else 14792 s += sprintf (s, ",%s", label); 14793 } 14794 14795 return string; 14796} 14797 14798/* Return insn for VSX or Altivec comparisons. */ 14799 14800static rtx 14801rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) 14802{ 14803 rtx mask; 14804 machine_mode mode = GET_MODE (op0); 14805 14806 switch (code) 14807 { 14808 default: 14809 break; 14810 14811 case GE: 14812 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 14813 return NULL_RTX; 14814 /* FALLTHRU */ 14815 14816 case EQ: 14817 case GT: 14818 case GTU: 14819 case ORDERED: 14820 case UNORDERED: 14821 case UNEQ: 14822 case LTGT: 14823 mask = gen_reg_rtx (mode); 14824 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1))); 14825 return mask; 14826 } 14827 14828 return NULL_RTX; 14829} 14830 14831/* Emit vector compare for operands OP0 and OP1 using code RCODE. 14832 DMODE is expected destination mode. This is a recursive function. */ 14833 14834static rtx 14835rs6000_emit_vector_compare (enum rtx_code rcode, 14836 rtx op0, rtx op1, 14837 machine_mode dmode) 14838{ 14839 rtx mask; 14840 bool swap_operands = false; 14841 bool try_again = false; 14842 14843 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); 14844 gcc_assert (GET_MODE (op0) == GET_MODE (op1)); 14845 14846 /* See if the comparison works as is. */ 14847 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); 14848 if (mask) 14849 return mask; 14850 14851 switch (rcode) 14852 { 14853 case LT: 14854 rcode = GT; 14855 swap_operands = true; 14856 try_again = true; 14857 break; 14858 case LTU: 14859 rcode = GTU; 14860 swap_operands = true; 14861 try_again = true; 14862 break; 14863 case NE: 14864 case UNLE: 14865 case UNLT: 14866 case UNGE: 14867 case UNGT: 14868 /* Invert condition and try again. 14869 e.g., A != B becomes ~(A==B). */ 14870 { 14871 enum rtx_code rev_code; 14872 enum insn_code nor_code; 14873 rtx mask2; 14874 14875 rev_code = reverse_condition_maybe_unordered (rcode); 14876 if (rev_code == UNKNOWN) 14877 return NULL_RTX; 14878 14879 nor_code = optab_handler (one_cmpl_optab, dmode); 14880 if (nor_code == CODE_FOR_nothing) 14881 return NULL_RTX; 14882 14883 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); 14884 if (!mask2) 14885 return NULL_RTX; 14886 14887 mask = gen_reg_rtx (dmode); 14888 emit_insn (GEN_FCN (nor_code) (mask, mask2)); 14889 return mask; 14890 } 14891 break; 14892 case GE: 14893 case GEU: 14894 case LE: 14895 case LEU: 14896 /* Try GT/GTU/LT/LTU OR EQ */ 14897 { 14898 rtx c_rtx, eq_rtx; 14899 enum insn_code ior_code; 14900 enum rtx_code new_code; 14901 14902 switch (rcode) 14903 { 14904 case GE: 14905 new_code = GT; 14906 break; 14907 14908 case GEU: 14909 new_code = GTU; 14910 break; 14911 14912 case LE: 14913 new_code = LT; 14914 break; 14915 14916 case LEU: 14917 new_code = LTU; 14918 break; 14919 14920 default: 14921 gcc_unreachable (); 14922 } 14923 14924 ior_code = optab_handler (ior_optab, dmode); 14925 if (ior_code == CODE_FOR_nothing) 14926 return NULL_RTX; 14927 14928 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); 14929 if (!c_rtx) 14930 return NULL_RTX; 14931 14932 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); 14933 if (!eq_rtx) 14934 return NULL_RTX; 14935 14936 mask = gen_reg_rtx (dmode); 14937 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); 14938 return mask; 14939 } 14940 break; 14941 default: 14942 return NULL_RTX; 14943 } 14944 14945 if (try_again) 14946 { 14947 if (swap_operands) 14948 std::swap (op0, op1); 14949 14950 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); 14951 if (mask) 14952 return mask; 14953 } 14954 14955 /* You only get two chances. */ 14956 return NULL_RTX; 14957} 14958 14959/* Emit vector conditional expression. DEST is destination. OP_TRUE and 14960 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two 14961 operands for the relation operation COND. */ 14962 14963int 14964rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, 14965 rtx cond, rtx cc_op0, rtx cc_op1) 14966{ 14967 machine_mode dest_mode = GET_MODE (dest); 14968 machine_mode mask_mode = GET_MODE (cc_op0); 14969 enum rtx_code rcode = GET_CODE (cond); 14970 machine_mode cc_mode = CCmode; 14971 rtx mask; 14972 rtx cond2; 14973 bool invert_move = false; 14974 14975 if (VECTOR_UNIT_NONE_P (dest_mode)) 14976 return 0; 14977 14978 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode) 14979 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode)); 14980 14981 switch (rcode) 14982 { 14983 /* Swap operands if we can, and fall back to doing the operation as 14984 specified, and doing a NOR to invert the test. */ 14985 case NE: 14986 case UNLE: 14987 case UNLT: 14988 case UNGE: 14989 case UNGT: 14990 /* Invert condition and try again. 14991 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ 14992 invert_move = true; 14993 rcode = reverse_condition_maybe_unordered (rcode); 14994 if (rcode == UNKNOWN) 14995 return 0; 14996 break; 14997 14998 case GE: 14999 case LE: 15000 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT) 15001 { 15002 /* Invert condition to avoid compound test. */ 15003 invert_move = true; 15004 rcode = reverse_condition (rcode); 15005 } 15006 break; 15007 15008 case GTU: 15009 case GEU: 15010 case LTU: 15011 case LEU: 15012 /* Mark unsigned tests with CCUNSmode. */ 15013 cc_mode = CCUNSmode; 15014 15015 /* Invert condition to avoid compound test if necessary. */ 15016 if (rcode == GEU || rcode == LEU) 15017 { 15018 invert_move = true; 15019 rcode = reverse_condition (rcode); 15020 } 15021 break; 15022 15023 default: 15024 break; 15025 } 15026 15027 /* Get the vector mask for the given relational operations. */ 15028 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode); 15029 15030 if (!mask) 15031 return 0; 15032 15033 if (invert_move) 15034 std::swap (op_true, op_false); 15035 15036 /* Optimize vec1 == vec2, to know the mask generates -1/0. */ 15037 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT 15038 && (GET_CODE (op_true) == CONST_VECTOR 15039 || GET_CODE (op_false) == CONST_VECTOR)) 15040 { 15041 rtx constant_0 = CONST0_RTX (dest_mode); 15042 rtx constant_m1 = CONSTM1_RTX (dest_mode); 15043 15044 if (op_true == constant_m1 && op_false == constant_0) 15045 { 15046 emit_move_insn (dest, mask); 15047 return 1; 15048 } 15049 15050 else if (op_true == constant_0 && op_false == constant_m1) 15051 { 15052 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask))); 15053 return 1; 15054 } 15055 15056 /* If we can't use the vector comparison directly, perhaps we can use 15057 the mask for the true or false fields, instead of loading up a 15058 constant. */ 15059 if (op_true == constant_m1) 15060 op_true = mask; 15061 15062 if (op_false == constant_0) 15063 op_false = mask; 15064 } 15065 15066 if (!REG_P (op_true) && !SUBREG_P (op_true)) 15067 op_true = force_reg (dest_mode, op_true); 15068 15069 if (!REG_P (op_false) && !SUBREG_P (op_false)) 15070 op_false = force_reg (dest_mode, op_false); 15071 15072 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask), 15073 CONST0_RTX (dest_mode)); 15074 emit_insn (gen_rtx_SET (dest, 15075 gen_rtx_IF_THEN_ELSE (dest_mode, 15076 cond2, 15077 op_true, 15078 op_false))); 15079 return 1; 15080} 15081 15082/* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction 15083 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last 15084 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the 15085 hardware has no such operation. */ 15086 15087static int 15088rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15089{ 15090 enum rtx_code code = GET_CODE (op); 15091 rtx op0 = XEXP (op, 0); 15092 rtx op1 = XEXP (op, 1); 15093 machine_mode compare_mode = GET_MODE (op0); 15094 machine_mode result_mode = GET_MODE (dest); 15095 bool max_p = false; 15096 15097 if (result_mode != compare_mode) 15098 return 0; 15099 15100 if (code == GE || code == GT) 15101 max_p = true; 15102 else if (code == LE || code == LT) 15103 max_p = false; 15104 else 15105 return 0; 15106 15107 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond)) 15108 ; 15109 15110 /* Only when NaNs and signed-zeros are not in effect, smax could be 15111 used for `op0 < op1 ? op1 : op0`, and smin could be used for 15112 `op0 > op1 ? op1 : op0`. */ 15113 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond) 15114 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode)) 15115 max_p = !max_p; 15116 15117 else 15118 return 0; 15119 15120 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1); 15121 return 1; 15122} 15123 15124/* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and 15125 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the 15126 operands of the last comparison is nonzero/true, FALSE_COND if it is 15127 zero/false. Return 0 if the hardware has no such operation. */ 15128 15129static int 15130rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15131{ 15132 enum rtx_code code = GET_CODE (op); 15133 rtx op0 = XEXP (op, 0); 15134 rtx op1 = XEXP (op, 1); 15135 machine_mode result_mode = GET_MODE (dest); 15136 rtx compare_rtx; 15137 rtx cmove_rtx; 15138 rtx clobber_rtx; 15139 15140 if (!can_create_pseudo_p ()) 15141 return 0; 15142 15143 switch (code) 15144 { 15145 case EQ: 15146 case GE: 15147 case GT: 15148 break; 15149 15150 case NE: 15151 case LT: 15152 case LE: 15153 code = swap_condition (code); 15154 std::swap (op0, op1); 15155 break; 15156 15157 default: 15158 return 0; 15159 } 15160 15161 /* Generate: [(parallel [(set (dest) 15162 (if_then_else (op (cmp1) (cmp2)) 15163 (true) 15164 (false))) 15165 (clobber (scratch))])]. */ 15166 15167 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); 15168 cmove_rtx = gen_rtx_SET (dest, 15169 gen_rtx_IF_THEN_ELSE (result_mode, 15170 compare_rtx, 15171 true_cond, 15172 false_cond)); 15173 15174 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); 15175 emit_insn (gen_rtx_PARALLEL (VOIDmode, 15176 gen_rtvec (2, cmove_rtx, clobber_rtx))); 15177 15178 return 1; 15179} 15180 15181/* Emit a conditional move: move TRUE_COND to DEST if OP of the 15182 operands of the last comparison is nonzero/true, FALSE_COND if it 15183 is zero/false. Return 0 if the hardware has no such operation. */ 15184 15185int 15186rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15187{ 15188 enum rtx_code code = GET_CODE (op); 15189 rtx op0 = XEXP (op, 0); 15190 rtx op1 = XEXP (op, 1); 15191 machine_mode compare_mode = GET_MODE (op0); 15192 machine_mode result_mode = GET_MODE (dest); 15193 rtx temp; 15194 bool is_against_zero; 15195 15196 /* These modes should always match. */ 15197 if (GET_MODE (op1) != compare_mode 15198 /* In the isel case however, we can use a compare immediate, so 15199 op1 may be a small constant. */ 15200 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode))) 15201 return 0; 15202 if (GET_MODE (true_cond) != result_mode) 15203 return 0; 15204 if (GET_MODE (false_cond) != result_mode) 15205 return 0; 15206 15207 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */ 15208 if (TARGET_P9_MINMAX 15209 && (compare_mode == SFmode || compare_mode == DFmode) 15210 && (result_mode == SFmode || result_mode == DFmode)) 15211 { 15212 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond)) 15213 return 1; 15214 15215 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond)) 15216 return 1; 15217 } 15218 15219 /* Don't allow using floating point comparisons for integer results for 15220 now. */ 15221 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) 15222 return 0; 15223 15224 /* First, work out if the hardware can do this at all, or 15225 if it's too slow.... */ 15226 if (!FLOAT_MODE_P (compare_mode)) 15227 { 15228 if (TARGET_ISEL) 15229 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); 15230 return 0; 15231 } 15232 15233 is_against_zero = op1 == CONST0_RTX (compare_mode); 15234 15235 /* A floating-point subtract might overflow, underflow, or produce 15236 an inexact result, thus changing the floating-point flags, so it 15237 can't be generated if we care about that. It's safe if one side 15238 of the construct is zero, since then no subtract will be 15239 generated. */ 15240 if (SCALAR_FLOAT_MODE_P (compare_mode) 15241 && flag_trapping_math && ! is_against_zero) 15242 return 0; 15243 15244 /* Eliminate half of the comparisons by switching operands, this 15245 makes the remaining code simpler. */ 15246 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE 15247 || code == LTGT || code == LT || code == UNLE) 15248 { 15249 code = reverse_condition_maybe_unordered (code); 15250 temp = true_cond; 15251 true_cond = false_cond; 15252 false_cond = temp; 15253 } 15254 15255 /* UNEQ and LTGT take four instructions for a comparison with zero, 15256 it'll probably be faster to use a branch here too. */ 15257 if (code == UNEQ && HONOR_NANS (compare_mode)) 15258 return 0; 15259 15260 /* We're going to try to implement comparisons by performing 15261 a subtract, then comparing against zero. Unfortunately, 15262 Inf - Inf is NaN which is not zero, and so if we don't 15263 know that the operand is finite and the comparison 15264 would treat EQ different to UNORDERED, we can't do it. */ 15265 if (HONOR_INFINITIES (compare_mode) 15266 && code != GT && code != UNGE 15267 && (!CONST_DOUBLE_P (op1) 15268 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1))) 15269 /* Constructs of the form (a OP b ? a : b) are safe. */ 15270 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond)) 15271 || (! rtx_equal_p (op0, true_cond) 15272 && ! rtx_equal_p (op1, true_cond)))) 15273 return 0; 15274 15275 /* At this point we know we can use fsel. */ 15276 15277 /* Don't allow compare_mode other than SFmode or DFmode, for others there 15278 is no fsel instruction. */ 15279 if (compare_mode != SFmode && compare_mode != DFmode) 15280 return 0; 15281 15282 /* Reduce the comparison to a comparison against zero. */ 15283 if (! is_against_zero) 15284 { 15285 temp = gen_reg_rtx (compare_mode); 15286 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1))); 15287 op0 = temp; 15288 op1 = CONST0_RTX (compare_mode); 15289 } 15290 15291 /* If we don't care about NaNs we can reduce some of the comparisons 15292 down to faster ones. */ 15293 if (! HONOR_NANS (compare_mode)) 15294 switch (code) 15295 { 15296 case GT: 15297 code = LE; 15298 temp = true_cond; 15299 true_cond = false_cond; 15300 false_cond = temp; 15301 break; 15302 case UNGE: 15303 code = GE; 15304 break; 15305 case UNEQ: 15306 code = EQ; 15307 break; 15308 default: 15309 break; 15310 } 15311 15312 /* Now, reduce everything down to a GE. */ 15313 switch (code) 15314 { 15315 case GE: 15316 break; 15317 15318 case LE: 15319 temp = gen_reg_rtx (compare_mode); 15320 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 15321 op0 = temp; 15322 break; 15323 15324 case ORDERED: 15325 temp = gen_reg_rtx (compare_mode); 15326 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0))); 15327 op0 = temp; 15328 break; 15329 15330 case EQ: 15331 temp = gen_reg_rtx (compare_mode); 15332 emit_insn (gen_rtx_SET (temp, 15333 gen_rtx_NEG (compare_mode, 15334 gen_rtx_ABS (compare_mode, op0)))); 15335 op0 = temp; 15336 break; 15337 15338 case UNGE: 15339 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */ 15340 temp = gen_reg_rtx (result_mode); 15341 emit_insn (gen_rtx_SET (temp, 15342 gen_rtx_IF_THEN_ELSE (result_mode, 15343 gen_rtx_GE (VOIDmode, 15344 op0, op1), 15345 true_cond, false_cond))); 15346 false_cond = true_cond; 15347 true_cond = temp; 15348 15349 temp = gen_reg_rtx (compare_mode); 15350 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 15351 op0 = temp; 15352 break; 15353 15354 case GT: 15355 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */ 15356 temp = gen_reg_rtx (result_mode); 15357 emit_insn (gen_rtx_SET (temp, 15358 gen_rtx_IF_THEN_ELSE (result_mode, 15359 gen_rtx_GE (VOIDmode, 15360 op0, op1), 15361 true_cond, false_cond))); 15362 true_cond = false_cond; 15363 false_cond = temp; 15364 15365 temp = gen_reg_rtx (compare_mode); 15366 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); 15367 op0 = temp; 15368 break; 15369 15370 default: 15371 gcc_unreachable (); 15372 } 15373 15374 emit_insn (gen_rtx_SET (dest, 15375 gen_rtx_IF_THEN_ELSE (result_mode, 15376 gen_rtx_GE (VOIDmode, 15377 op0, op1), 15378 true_cond, false_cond))); 15379 return 1; 15380} 15381 15382/* Same as above, but for ints (isel). */ 15383 15384int 15385rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) 15386{ 15387 rtx condition_rtx, cr; 15388 machine_mode mode = GET_MODE (dest); 15389 enum rtx_code cond_code; 15390 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx); 15391 bool signedp; 15392 15393 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) 15394 return 0; 15395 15396 /* We still have to do the compare, because isel doesn't do a 15397 compare, it just looks at the CRx bits set by a previous compare 15398 instruction. */ 15399 condition_rtx = rs6000_generate_compare (op, mode); 15400 cond_code = GET_CODE (condition_rtx); 15401 cr = XEXP (condition_rtx, 0); 15402 signedp = GET_MODE (cr) == CCmode; 15403 15404 isel_func = (mode == SImode 15405 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) 15406 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); 15407 15408 switch (cond_code) 15409 { 15410 case LT: case GT: case LTU: case GTU: case EQ: 15411 /* isel handles these directly. */ 15412 break; 15413 15414 default: 15415 /* We need to swap the sense of the comparison. */ 15416 { 15417 std::swap (false_cond, true_cond); 15418 PUT_CODE (condition_rtx, reverse_condition (cond_code)); 15419 } 15420 break; 15421 } 15422 15423 false_cond = force_reg (mode, false_cond); 15424 if (true_cond != const0_rtx) 15425 true_cond = force_reg (mode, true_cond); 15426 15427 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr)); 15428 15429 return 1; 15430} 15431 15432void 15433rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) 15434{ 15435 machine_mode mode = GET_MODE (op0); 15436 enum rtx_code c; 15437 rtx target; 15438 15439 /* VSX/altivec have direct min/max insns. */ 15440 if ((code == SMAX || code == SMIN) 15441 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) 15442 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)))) 15443 { 15444 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); 15445 return; 15446 } 15447 15448 if (code == SMAX || code == SMIN) 15449 c = GE; 15450 else 15451 c = GEU; 15452 15453 if (code == SMAX || code == UMAX) 15454 target = emit_conditional_move (dest, c, op0, op1, mode, 15455 op0, op1, mode, 0); 15456 else 15457 target = emit_conditional_move (dest, c, op0, op1, mode, 15458 op1, op0, mode, 0); 15459 gcc_assert (target); 15460 if (target != dest) 15461 emit_move_insn (dest, target); 15462} 15463 15464/* A subroutine of the atomic operation splitters. Jump to LABEL if 15465 COND is true. Mark the jump as unlikely to be taken. */ 15466 15467static void 15468emit_unlikely_jump (rtx cond, rtx label) 15469{ 15470 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); 15471 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); 15472 add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); 15473} 15474 15475/* A subroutine of the atomic operation splitters. Emit a load-locked 15476 instruction in MODE. For QI/HImode, possibly use a pattern than includes 15477 the zero_extend operation. */ 15478 15479static void 15480emit_load_locked (machine_mode mode, rtx reg, rtx mem) 15481{ 15482 rtx (*fn) (rtx, rtx) = NULL; 15483 15484 switch (mode) 15485 { 15486 case E_QImode: 15487 fn = gen_load_lockedqi; 15488 break; 15489 case E_HImode: 15490 fn = gen_load_lockedhi; 15491 break; 15492 case E_SImode: 15493 if (GET_MODE (mem) == QImode) 15494 fn = gen_load_lockedqi_si; 15495 else if (GET_MODE (mem) == HImode) 15496 fn = gen_load_lockedhi_si; 15497 else 15498 fn = gen_load_lockedsi; 15499 break; 15500 case E_DImode: 15501 fn = gen_load_lockeddi; 15502 break; 15503 case E_TImode: 15504 fn = gen_load_lockedti; 15505 break; 15506 default: 15507 gcc_unreachable (); 15508 } 15509 emit_insn (fn (reg, mem)); 15510} 15511 15512/* A subroutine of the atomic operation splitters. Emit a store-conditional 15513 instruction in MODE. */ 15514 15515static void 15516emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) 15517{ 15518 rtx (*fn) (rtx, rtx, rtx) = NULL; 15519 15520 switch (mode) 15521 { 15522 case E_QImode: 15523 fn = gen_store_conditionalqi; 15524 break; 15525 case E_HImode: 15526 fn = gen_store_conditionalhi; 15527 break; 15528 case E_SImode: 15529 fn = gen_store_conditionalsi; 15530 break; 15531 case E_DImode: 15532 fn = gen_store_conditionaldi; 15533 break; 15534 case E_TImode: 15535 fn = gen_store_conditionalti; 15536 break; 15537 default: 15538 gcc_unreachable (); 15539 } 15540 15541 /* Emit sync before stwcx. to address PPC405 Erratum. */ 15542 if (PPC405_ERRATUM77) 15543 emit_insn (gen_hwsync ()); 15544 15545 emit_insn (fn (res, mem, val)); 15546} 15547 15548/* Expand barriers before and after a load_locked/store_cond sequence. */ 15549 15550static rtx 15551rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) 15552{ 15553 rtx addr = XEXP (mem, 0); 15554 15555 if (!legitimate_indirect_address_p (addr, reload_completed) 15556 && !legitimate_indexed_address_p (addr, reload_completed)) 15557 { 15558 addr = force_reg (Pmode, addr); 15559 mem = replace_equiv_address_nv (mem, addr); 15560 } 15561 15562 switch (model) 15563 { 15564 case MEMMODEL_RELAXED: 15565 case MEMMODEL_CONSUME: 15566 case MEMMODEL_ACQUIRE: 15567 break; 15568 case MEMMODEL_RELEASE: 15569 case MEMMODEL_ACQ_REL: 15570 emit_insn (gen_lwsync ()); 15571 break; 15572 case MEMMODEL_SEQ_CST: 15573 emit_insn (gen_hwsync ()); 15574 break; 15575 default: 15576 gcc_unreachable (); 15577 } 15578 return mem; 15579} 15580 15581static void 15582rs6000_post_atomic_barrier (enum memmodel model) 15583{ 15584 switch (model) 15585 { 15586 case MEMMODEL_RELAXED: 15587 case MEMMODEL_CONSUME: 15588 case MEMMODEL_RELEASE: 15589 break; 15590 case MEMMODEL_ACQUIRE: 15591 case MEMMODEL_ACQ_REL: 15592 case MEMMODEL_SEQ_CST: 15593 emit_insn (gen_isync ()); 15594 break; 15595 default: 15596 gcc_unreachable (); 15597 } 15598} 15599 15600/* A subroutine of the various atomic expanders. For sub-word operations, 15601 we must adjust things to operate on SImode. Given the original MEM, 15602 return a new aligned memory. Also build and return the quantities by 15603 which to shift and mask. */ 15604 15605static rtx 15606rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) 15607{ 15608 rtx addr, align, shift, mask, mem; 15609 HOST_WIDE_INT shift_mask; 15610 machine_mode mode = GET_MODE (orig_mem); 15611 15612 /* For smaller modes, we have to implement this via SImode. */ 15613 shift_mask = (mode == QImode ? 0x18 : 0x10); 15614 15615 addr = XEXP (orig_mem, 0); 15616 addr = force_reg (GET_MODE (addr), addr); 15617 15618 /* Aligned memory containing subword. Generate a new memory. We 15619 do not want any of the existing MEM_ATTR data, as we're now 15620 accessing memory outside the original object. */ 15621 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4), 15622 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15623 mem = gen_rtx_MEM (SImode, align); 15624 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); 15625 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) 15626 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); 15627 15628 /* Shift amount for subword relative to aligned word. */ 15629 shift = gen_reg_rtx (SImode); 15630 addr = gen_lowpart (SImode, addr); 15631 rtx tmp = gen_reg_rtx (SImode); 15632 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3))); 15633 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask))); 15634 if (BYTES_BIG_ENDIAN) 15635 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), 15636 shift, 1, OPTAB_LIB_WIDEN); 15637 *pshift = shift; 15638 15639 /* Mask for insertion. */ 15640 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)), 15641 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN); 15642 *pmask = mask; 15643 15644 return mem; 15645} 15646 15647/* A subroutine of the various atomic expanders. For sub-word operands, 15648 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */ 15649 15650static rtx 15651rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask) 15652{ 15653 rtx x; 15654 15655 x = gen_reg_rtx (SImode); 15656 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode, 15657 gen_rtx_NOT (SImode, mask), 15658 oldval))); 15659 15660 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN); 15661 15662 return x; 15663} 15664 15665/* A subroutine of the various atomic expanders. For sub-word operands, 15666 extract WIDE to NARROW via SHIFT. */ 15667 15668static void 15669rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift) 15670{ 15671 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift, 15672 wide, 1, OPTAB_LIB_WIDEN); 15673 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide)); 15674} 15675 15676/* Expand an atomic compare and swap operation. */ 15677 15678void 15679rs6000_expand_atomic_compare_and_swap (rtx operands[]) 15680{ 15681 rtx boolval, retval, mem, oldval, newval, cond; 15682 rtx label1, label2, x, mask, shift; 15683 machine_mode mode, orig_mode; 15684 enum memmodel mod_s, mod_f; 15685 bool is_weak; 15686 15687 boolval = operands[0]; 15688 retval = operands[1]; 15689 mem = operands[2]; 15690 oldval = operands[3]; 15691 newval = operands[4]; 15692 is_weak = (INTVAL (operands[5]) != 0); 15693 mod_s = memmodel_base (INTVAL (operands[6])); 15694 mod_f = memmodel_base (INTVAL (operands[7])); 15695 orig_mode = mode = GET_MODE (mem); 15696 15697 mask = shift = NULL_RTX; 15698 if (mode == QImode || mode == HImode) 15699 { 15700 /* Before power8, we didn't have access to lbarx/lharx, so generate a 15701 lwarx and shift/mask operations. With power8, we need to do the 15702 comparison in SImode, but the store is still done in QI/HImode. */ 15703 oldval = convert_modes (SImode, mode, oldval, 1); 15704 15705 if (!TARGET_SYNC_HI_QI) 15706 { 15707 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 15708 15709 /* Shift and mask OLDVAL into position with the word. */ 15710 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, 15711 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15712 15713 /* Shift and mask NEWVAL into position within the word. */ 15714 newval = convert_modes (SImode, mode, newval, 1); 15715 newval = expand_simple_binop (SImode, ASHIFT, newval, shift, 15716 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15717 } 15718 15719 /* Prepare to adjust the return value. */ 15720 retval = gen_reg_rtx (SImode); 15721 mode = SImode; 15722 } 15723 else if (reg_overlap_mentioned_p (retval, oldval)) 15724 oldval = copy_to_reg (oldval); 15725 15726 if (mode != TImode && !reg_or_short_operand (oldval, mode)) 15727 oldval = copy_to_mode_reg (mode, oldval); 15728 15729 if (reg_overlap_mentioned_p (retval, newval)) 15730 newval = copy_to_reg (newval); 15731 15732 mem = rs6000_pre_atomic_barrier (mem, mod_s); 15733 15734 label1 = NULL_RTX; 15735 if (!is_weak) 15736 { 15737 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 15738 emit_label (XEXP (label1, 0)); 15739 } 15740 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 15741 15742 emit_load_locked (mode, retval, mem); 15743 15744 x = retval; 15745 if (mask) 15746 x = expand_simple_binop (SImode, AND, retval, mask, 15747 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15748 15749 cond = gen_reg_rtx (CCmode); 15750 /* If we have TImode, synthesize a comparison. */ 15751 if (mode != TImode) 15752 x = gen_rtx_COMPARE (CCmode, x, oldval); 15753 else 15754 { 15755 rtx xor1_result = gen_reg_rtx (DImode); 15756 rtx xor2_result = gen_reg_rtx (DImode); 15757 rtx or_result = gen_reg_rtx (DImode); 15758 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); 15759 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); 15760 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); 15761 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); 15762 15763 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); 15764 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); 15765 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); 15766 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); 15767 } 15768 15769 emit_insn (gen_rtx_SET (cond, x)); 15770 15771 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 15772 emit_unlikely_jump (x, label2); 15773 15774 x = newval; 15775 if (mask) 15776 x = rs6000_mask_atomic_subword (retval, newval, mask); 15777 15778 emit_store_conditional (orig_mode, cond, mem, x); 15779 15780 if (!is_weak) 15781 { 15782 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 15783 emit_unlikely_jump (x, label1); 15784 } 15785 15786 if (!is_mm_relaxed (mod_f)) 15787 emit_label (XEXP (label2, 0)); 15788 15789 rs6000_post_atomic_barrier (mod_s); 15790 15791 if (is_mm_relaxed (mod_f)) 15792 emit_label (XEXP (label2, 0)); 15793 15794 if (shift) 15795 rs6000_finish_atomic_subword (operands[1], retval, shift); 15796 else if (mode != GET_MODE (operands[1])) 15797 convert_move (operands[1], retval, 1); 15798 15799 /* In all cases, CR0 contains EQ on success, and NE on failure. */ 15800 x = gen_rtx_EQ (SImode, cond, const0_rtx); 15801 emit_insn (gen_rtx_SET (boolval, x)); 15802} 15803 15804/* Expand an atomic exchange operation. */ 15805 15806void 15807rs6000_expand_atomic_exchange (rtx operands[]) 15808{ 15809 rtx retval, mem, val, cond; 15810 machine_mode mode; 15811 enum memmodel model; 15812 rtx label, x, mask, shift; 15813 15814 retval = operands[0]; 15815 mem = operands[1]; 15816 val = operands[2]; 15817 model = memmodel_base (INTVAL (operands[3])); 15818 mode = GET_MODE (mem); 15819 15820 mask = shift = NULL_RTX; 15821 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) 15822 { 15823 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 15824 15825 /* Shift and mask VAL into position with the word. */ 15826 val = convert_modes (SImode, mode, val, 1); 15827 val = expand_simple_binop (SImode, ASHIFT, val, shift, 15828 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15829 15830 /* Prepare to adjust the return value. */ 15831 retval = gen_reg_rtx (SImode); 15832 mode = SImode; 15833 } 15834 15835 mem = rs6000_pre_atomic_barrier (mem, model); 15836 15837 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); 15838 emit_label (XEXP (label, 0)); 15839 15840 emit_load_locked (mode, retval, mem); 15841 15842 x = val; 15843 if (mask) 15844 x = rs6000_mask_atomic_subword (retval, val, mask); 15845 15846 cond = gen_reg_rtx (CCmode); 15847 emit_store_conditional (mode, cond, mem, x); 15848 15849 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 15850 emit_unlikely_jump (x, label); 15851 15852 rs6000_post_atomic_barrier (model); 15853 15854 if (shift) 15855 rs6000_finish_atomic_subword (operands[0], retval, shift); 15856} 15857 15858/* Expand an atomic fetch-and-operate pattern. CODE is the binary operation 15859 to perform. MEM is the memory on which to operate. VAL is the second 15860 operand of the binary operator. BEFORE and AFTER are optional locations to 15861 return the value of MEM either before of after the operation. MODEL_RTX 15862 is a CONST_INT containing the memory model to use. */ 15863 15864void 15865rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, 15866 rtx orig_before, rtx orig_after, rtx model_rtx) 15867{ 15868 enum memmodel model = memmodel_base (INTVAL (model_rtx)); 15869 machine_mode mode = GET_MODE (mem); 15870 machine_mode store_mode = mode; 15871 rtx label, x, cond, mask, shift; 15872 rtx before = orig_before, after = orig_after; 15873 15874 mask = shift = NULL_RTX; 15875 /* On power8, we want to use SImode for the operation. On previous systems, 15876 use the operation in a subword and shift/mask to get the proper byte or 15877 halfword. */ 15878 if (mode == QImode || mode == HImode) 15879 { 15880 if (TARGET_SYNC_HI_QI) 15881 { 15882 val = convert_modes (SImode, mode, val, 1); 15883 15884 /* Prepare to adjust the return value. */ 15885 before = gen_reg_rtx (SImode); 15886 if (after) 15887 after = gen_reg_rtx (SImode); 15888 mode = SImode; 15889 } 15890 else 15891 { 15892 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); 15893 15894 /* Shift and mask VAL into position with the word. */ 15895 val = convert_modes (SImode, mode, val, 1); 15896 val = expand_simple_binop (SImode, ASHIFT, val, shift, 15897 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15898 15899 switch (code) 15900 { 15901 case IOR: 15902 case XOR: 15903 /* We've already zero-extended VAL. That is sufficient to 15904 make certain that it does not affect other bits. */ 15905 mask = NULL; 15906 break; 15907 15908 case AND: 15909 /* If we make certain that all of the other bits in VAL are 15910 set, that will be sufficient to not affect other bits. */ 15911 x = gen_rtx_NOT (SImode, mask); 15912 x = gen_rtx_IOR (SImode, x, val); 15913 emit_insn (gen_rtx_SET (val, x)); 15914 mask = NULL; 15915 break; 15916 15917 case NOT: 15918 case PLUS: 15919 case MINUS: 15920 /* These will all affect bits outside the field and need 15921 adjustment via MASK within the loop. */ 15922 break; 15923 15924 default: 15925 gcc_unreachable (); 15926 } 15927 15928 /* Prepare to adjust the return value. */ 15929 before = gen_reg_rtx (SImode); 15930 if (after) 15931 after = gen_reg_rtx (SImode); 15932 store_mode = mode = SImode; 15933 } 15934 } 15935 15936 mem = rs6000_pre_atomic_barrier (mem, model); 15937 15938 label = gen_label_rtx (); 15939 emit_label (label); 15940 label = gen_rtx_LABEL_REF (VOIDmode, label); 15941 15942 if (before == NULL_RTX) 15943 before = gen_reg_rtx (mode); 15944 15945 emit_load_locked (mode, before, mem); 15946 15947 if (code == NOT) 15948 { 15949 x = expand_simple_binop (mode, AND, before, val, 15950 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15951 after = expand_simple_unop (mode, NOT, x, after, 1); 15952 } 15953 else 15954 { 15955 after = expand_simple_binop (mode, code, before, val, 15956 after, 1, OPTAB_LIB_WIDEN); 15957 } 15958 15959 x = after; 15960 if (mask) 15961 { 15962 x = expand_simple_binop (SImode, AND, after, mask, 15963 NULL_RTX, 1, OPTAB_LIB_WIDEN); 15964 x = rs6000_mask_atomic_subword (before, x, mask); 15965 } 15966 else if (store_mode != mode) 15967 x = convert_modes (store_mode, mode, x, 1); 15968 15969 cond = gen_reg_rtx (CCmode); 15970 emit_store_conditional (store_mode, cond, mem, x); 15971 15972 x = gen_rtx_NE (VOIDmode, cond, const0_rtx); 15973 emit_unlikely_jump (x, label); 15974 15975 rs6000_post_atomic_barrier (model); 15976 15977 if (shift) 15978 { 15979 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and 15980 then do the calcuations in a SImode register. */ 15981 if (orig_before) 15982 rs6000_finish_atomic_subword (orig_before, before, shift); 15983 if (orig_after) 15984 rs6000_finish_atomic_subword (orig_after, after, shift); 15985 } 15986 else if (store_mode != mode) 15987 { 15988 /* QImode/HImode on machines with lbarx/lharx where we do the native 15989 operation and then do the calcuations in a SImode register. */ 15990 if (orig_before) 15991 convert_move (orig_before, before, 1); 15992 if (orig_after) 15993 convert_move (orig_after, after, 1); 15994 } 15995 else if (orig_after && after != orig_after) 15996 emit_move_insn (orig_after, after); 15997} 15998 15999/* Emit instructions to move SRC to DST. Called by splitters for 16000 multi-register moves. It will emit at most one instruction for 16001 each register that is accessed; that is, it won't emit li/lis pairs 16002 (or equivalent for 64-bit code). One of SRC or DST must be a hard 16003 register. */ 16004 16005void 16006rs6000_split_multireg_move (rtx dst, rtx src) 16007{ 16008 /* The register number of the first register being moved. */ 16009 int reg; 16010 /* The mode that is to be moved. */ 16011 machine_mode mode; 16012 /* The mode that the move is being done in, and its size. */ 16013 machine_mode reg_mode; 16014 int reg_mode_size; 16015 /* The number of registers that will be moved. */ 16016 int nregs; 16017 16018 reg = REG_P (dst) ? REGNO (dst) : REGNO (src); 16019 mode = GET_MODE (dst); 16020 nregs = hard_regno_nregs (reg, mode); 16021 16022 /* If we have a vector quad register for MMA, and this is a load or store, 16023 see if we can use vector paired load/stores. */ 16024 if (mode == PXImode && TARGET_MMA 16025 && (MEM_P (dst) || MEM_P (src))) 16026 { 16027 reg_mode = POImode; 16028 nregs /= 2; 16029 } 16030 /* If we have a vector pair/quad mode, split it into two/four separate 16031 vectors. */ 16032 else if (mode == POImode || mode == PXImode) 16033 reg_mode = V1TImode; 16034 else if (FP_REGNO_P (reg)) 16035 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : 16036 (TARGET_HARD_FLOAT ? DFmode : SFmode); 16037 else if (ALTIVEC_REGNO_P (reg)) 16038 reg_mode = V16QImode; 16039 else 16040 reg_mode = word_mode; 16041 reg_mode_size = GET_MODE_SIZE (reg_mode); 16042 16043 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); 16044 16045 /* TDmode residing in FP registers is special, since the ISA requires that 16046 the lower-numbered word of a register pair is always the most significant 16047 word, even in little-endian mode. This does not match the usual subreg 16048 semantics, so we cannnot use simplify_gen_subreg in those cases. Access 16049 the appropriate constituent registers "by hand" in little-endian mode. 16050 16051 Note we do not need to check for destructive overlap here since TDmode 16052 can only reside in even/odd register pairs. */ 16053 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) 16054 { 16055 rtx p_src, p_dst; 16056 int i; 16057 16058 for (i = 0; i < nregs; i++) 16059 { 16060 if (REG_P (src) && FP_REGNO_P (REGNO (src))) 16061 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); 16062 else 16063 p_src = simplify_gen_subreg (reg_mode, src, mode, 16064 i * reg_mode_size); 16065 16066 if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) 16067 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); 16068 else 16069 p_dst = simplify_gen_subreg (reg_mode, dst, mode, 16070 i * reg_mode_size); 16071 16072 emit_insn (gen_rtx_SET (p_dst, p_src)); 16073 } 16074 16075 return; 16076 } 16077 16078 /* The __vector_pair and __vector_quad modes are multi-register modes, 16079 so if have to load or store the registers, we have to be careful to 16080 properly swap them if we're in little endian mode below. This means 16081 the last register gets the first memory location. */ 16082 if (mode == POImode || mode == PXImode) 16083 { 16084 if (MEM_P (dst)) 16085 { 16086 unsigned offset = 0; 16087 unsigned size = GET_MODE_SIZE (reg_mode); 16088 16089 /* If we are reading an accumulator register, we have to 16090 deprime it before we can access it. */ 16091 if (TARGET_MMA 16092 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src))) 16093 emit_insn (gen_mma_xxmfacc (src, src)); 16094 16095 for (int i = 0; i < nregs; i++) 16096 { 16097 unsigned subreg = (WORDS_BIG_ENDIAN) 16098 ? i * size : (nregs - 1 - i) * size; 16099 rtx dst2 = adjust_address (dst, reg_mode, offset); 16100 rtx src2 = simplify_gen_subreg (reg_mode, src, mode, subreg); 16101 offset += size; 16102 emit_insn (gen_rtx_SET (dst2, src2)); 16103 } 16104 16105 return; 16106 } 16107 16108 if (MEM_P (src)) 16109 { 16110 unsigned offset = 0; 16111 unsigned size = GET_MODE_SIZE (reg_mode); 16112 16113 for (int i = 0; i < nregs; i++) 16114 { 16115 unsigned subreg = (WORDS_BIG_ENDIAN) 16116 ? i * size : (nregs - 1 - i) * size; 16117 rtx dst2 = simplify_gen_subreg (reg_mode, dst, mode, subreg); 16118 rtx src2 = adjust_address (src, reg_mode, offset); 16119 offset += size; 16120 emit_insn (gen_rtx_SET (dst2, src2)); 16121 } 16122 16123 /* If we are writing an accumulator register, we have to 16124 prime it after we've written it. */ 16125 if (TARGET_MMA 16126 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst))) 16127 emit_insn (gen_mma_xxmtacc (dst, dst)); 16128 16129 return; 16130 } 16131 16132 if (GET_CODE (src) == UNSPEC) 16133 { 16134 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE 16135 || XINT (src, 1) == UNSPEC_MMA_ASSEMBLE_ACC); 16136 gcc_assert (REG_P (dst)); 16137 if (GET_MODE (src) == PXImode) 16138 gcc_assert (FP_REGNO_P (REGNO (dst))); 16139 if (GET_MODE (src) == POImode) 16140 gcc_assert (VSX_REGNO_P (REGNO (dst))); 16141 16142 reg_mode = GET_MODE (XVECEXP (src, 0, 0)); 16143 int nvecs = XVECLEN (src, 0); 16144 for (int i = 0; i < nvecs; i++) 16145 { 16146 int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; 16147 rtx dst_i = gen_rtx_REG (reg_mode, reg + index); 16148 emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); 16149 } 16150 16151 /* We are writing an accumulator register, so we have to 16152 prime it after we've written it. */ 16153 if (TARGET_MMA 16154 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst))) 16155 emit_insn (gen_mma_xxmtacc (dst, dst)); 16156 16157 return; 16158 } 16159 16160 /* Register -> register moves can use common code. */ 16161 } 16162 16163 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) 16164 { 16165 /* If we are reading an accumulator register, we have to 16166 deprime it before we can access it. */ 16167 if (TARGET_MMA 16168 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src))) 16169 emit_insn (gen_mma_xxmfacc (src, src)); 16170 16171 /* Move register range backwards, if we might have destructive 16172 overlap. */ 16173 int i; 16174 for (i = nregs - 1; i >= 0; i--) 16175 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, 16176 i * reg_mode_size), 16177 simplify_gen_subreg (reg_mode, src, mode, 16178 i * reg_mode_size))); 16179 16180 /* If we are writing an accumulator register, we have to 16181 prime it after we've written it. */ 16182 if (TARGET_MMA 16183 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst))) 16184 emit_insn (gen_mma_xxmtacc (dst, dst)); 16185 } 16186 else 16187 { 16188 int i; 16189 int j = -1; 16190 bool used_update = false; 16191 rtx restore_basereg = NULL_RTX; 16192 16193 if (MEM_P (src) && INT_REGNO_P (reg)) 16194 { 16195 rtx breg; 16196 16197 if (GET_CODE (XEXP (src, 0)) == PRE_INC 16198 || GET_CODE (XEXP (src, 0)) == PRE_DEC) 16199 { 16200 rtx delta_rtx; 16201 breg = XEXP (XEXP (src, 0), 0); 16202 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC 16203 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) 16204 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); 16205 emit_insn (gen_add3_insn (breg, breg, delta_rtx)); 16206 src = replace_equiv_address (src, breg); 16207 } 16208 else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) 16209 { 16210 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) 16211 { 16212 rtx basereg = XEXP (XEXP (src, 0), 0); 16213 if (TARGET_UPDATE) 16214 { 16215 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); 16216 emit_insn (gen_rtx_SET (ndst, 16217 gen_rtx_MEM (reg_mode, 16218 XEXP (src, 0)))); 16219 used_update = true; 16220 } 16221 else 16222 emit_insn (gen_rtx_SET (basereg, 16223 XEXP (XEXP (src, 0), 1))); 16224 src = replace_equiv_address (src, basereg); 16225 } 16226 else 16227 { 16228 rtx basereg = gen_rtx_REG (Pmode, reg); 16229 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); 16230 src = replace_equiv_address (src, basereg); 16231 } 16232 } 16233 16234 breg = XEXP (src, 0); 16235 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) 16236 breg = XEXP (breg, 0); 16237 16238 /* If the base register we are using to address memory is 16239 also a destination reg, then change that register last. */ 16240 if (REG_P (breg) 16241 && REGNO (breg) >= REGNO (dst) 16242 && REGNO (breg) < REGNO (dst) + nregs) 16243 j = REGNO (breg) - REGNO (dst); 16244 } 16245 else if (MEM_P (dst) && INT_REGNO_P (reg)) 16246 { 16247 rtx breg; 16248 16249 if (GET_CODE (XEXP (dst, 0)) == PRE_INC 16250 || GET_CODE (XEXP (dst, 0)) == PRE_DEC) 16251 { 16252 rtx delta_rtx; 16253 breg = XEXP (XEXP (dst, 0), 0); 16254 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC 16255 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) 16256 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); 16257 16258 /* We have to update the breg before doing the store. 16259 Use store with update, if available. */ 16260 16261 if (TARGET_UPDATE) 16262 { 16263 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); 16264 emit_insn (TARGET_32BIT 16265 ? (TARGET_POWERPC64 16266 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) 16267 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) 16268 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); 16269 used_update = true; 16270 } 16271 else 16272 emit_insn (gen_add3_insn (breg, breg, delta_rtx)); 16273 dst = replace_equiv_address (dst, breg); 16274 } 16275 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) 16276 && GET_CODE (XEXP (dst, 0)) != LO_SUM) 16277 { 16278 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) 16279 { 16280 rtx basereg = XEXP (XEXP (dst, 0), 0); 16281 if (TARGET_UPDATE) 16282 { 16283 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); 16284 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, 16285 XEXP (dst, 0)), 16286 nsrc)); 16287 used_update = true; 16288 } 16289 else 16290 emit_insn (gen_rtx_SET (basereg, 16291 XEXP (XEXP (dst, 0), 1))); 16292 dst = replace_equiv_address (dst, basereg); 16293 } 16294 else 16295 { 16296 rtx basereg = XEXP (XEXP (dst, 0), 0); 16297 rtx offsetreg = XEXP (XEXP (dst, 0), 1); 16298 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS 16299 && REG_P (basereg) 16300 && REG_P (offsetreg) 16301 && REGNO (basereg) != REGNO (offsetreg)); 16302 if (REGNO (basereg) == 0) 16303 { 16304 rtx tmp = offsetreg; 16305 offsetreg = basereg; 16306 basereg = tmp; 16307 } 16308 emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); 16309 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); 16310 dst = replace_equiv_address (dst, basereg); 16311 } 16312 } 16313 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) 16314 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); 16315 } 16316 16317 /* If we are reading an accumulator register, we have to 16318 deprime it before we can access it. */ 16319 if (TARGET_MMA && REG_P (src) 16320 && GET_MODE (src) == PXImode && FP_REGNO_P (REGNO (src))) 16321 emit_insn (gen_mma_xxmfacc (src, src)); 16322 16323 for (i = 0; i < nregs; i++) 16324 { 16325 /* Calculate index to next subword. */ 16326 ++j; 16327 if (j == nregs) 16328 j = 0; 16329 16330 /* If compiler already emitted move of first word by 16331 store with update, no need to do anything. */ 16332 if (j == 0 && used_update) 16333 continue; 16334 16335 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, 16336 j * reg_mode_size), 16337 simplify_gen_subreg (reg_mode, src, mode, 16338 j * reg_mode_size))); 16339 } 16340 16341 /* If we are writing an accumulator register, we have to 16342 prime it after we've written it. */ 16343 if (TARGET_MMA && REG_P (dst) 16344 && GET_MODE (dst) == PXImode && FP_REGNO_P (REGNO (dst))) 16345 emit_insn (gen_mma_xxmtacc (dst, dst)); 16346 16347 if (restore_basereg != NULL_RTX) 16348 emit_insn (restore_basereg); 16349 } 16350} 16351 16352static GTY(()) alias_set_type TOC_alias_set = -1; 16353 16354alias_set_type 16355get_TOC_alias_set (void) 16356{ 16357 if (TOC_alias_set == -1) 16358 TOC_alias_set = new_alias_set (); 16359 return TOC_alias_set; 16360} 16361 16362/* The mode the ABI uses for a word. This is not the same as word_mode 16363 for -m32 -mpowerpc64. This is used to implement various target hooks. */ 16364 16365static scalar_int_mode 16366rs6000_abi_word_mode (void) 16367{ 16368 return TARGET_32BIT ? SImode : DImode; 16369} 16370 16371/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ 16372static char * 16373rs6000_offload_options (void) 16374{ 16375 if (TARGET_64BIT) 16376 return xstrdup ("-foffload-abi=lp64"); 16377 else 16378 return xstrdup ("-foffload-abi=ilp32"); 16379} 16380 16381 16382/* A quick summary of the various types of 'constant-pool tables' 16383 under PowerPC: 16384 16385 Target Flags Name One table per 16386 AIX (none) AIX TOC object file 16387 AIX -mfull-toc AIX TOC object file 16388 AIX -mminimal-toc AIX minimal TOC translation unit 16389 SVR4/EABI (none) SVR4 SDATA object file 16390 SVR4/EABI -fpic SVR4 pic object file 16391 SVR4/EABI -fPIC SVR4 PIC translation unit 16392 SVR4/EABI -mrelocatable EABI TOC function 16393 SVR4/EABI -maix AIX TOC object file 16394 SVR4/EABI -maix -mminimal-toc 16395 AIX minimal TOC translation unit 16396 16397 Name Reg. Set by entries contains: 16398 made by addrs? fp? sum? 16399 16400 AIX TOC 2 crt0 as Y option option 16401 AIX minimal TOC 30 prolog gcc Y Y option 16402 SVR4 SDATA 13 crt0 gcc N Y N 16403 SVR4 pic 30 prolog ld Y not yet N 16404 SVR4 PIC 30 prolog gcc Y option option 16405 EABI TOC 30 prolog gcc Y option option 16406 16407*/ 16408 16409/* Hash functions for the hash table. */ 16410 16411static unsigned 16412rs6000_hash_constant (rtx k) 16413{ 16414 enum rtx_code code = GET_CODE (k); 16415 machine_mode mode = GET_MODE (k); 16416 unsigned result = (code << 3) ^ mode; 16417 const char *format; 16418 int flen, fidx; 16419 16420 format = GET_RTX_FORMAT (code); 16421 flen = strlen (format); 16422 fidx = 0; 16423 16424 switch (code) 16425 { 16426 case LABEL_REF: 16427 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); 16428 16429 case CONST_WIDE_INT: 16430 { 16431 int i; 16432 flen = CONST_WIDE_INT_NUNITS (k); 16433 for (i = 0; i < flen; i++) 16434 result = result * 613 + CONST_WIDE_INT_ELT (k, i); 16435 return result; 16436 } 16437 16438 case CONST_DOUBLE: 16439 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; 16440 16441 case CODE_LABEL: 16442 fidx = 3; 16443 break; 16444 16445 default: 16446 break; 16447 } 16448 16449 for (; fidx < flen; fidx++) 16450 switch (format[fidx]) 16451 { 16452 case 's': 16453 { 16454 unsigned i, len; 16455 const char *str = XSTR (k, fidx); 16456 len = strlen (str); 16457 result = result * 613 + len; 16458 for (i = 0; i < len; i++) 16459 result = result * 613 + (unsigned) str[i]; 16460 break; 16461 } 16462 case 'u': 16463 case 'e': 16464 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); 16465 break; 16466 case 'i': 16467 case 'n': 16468 result = result * 613 + (unsigned) XINT (k, fidx); 16469 break; 16470 case 'w': 16471 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) 16472 result = result * 613 + (unsigned) XWINT (k, fidx); 16473 else 16474 { 16475 size_t i; 16476 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) 16477 result = result * 613 + (unsigned) (XWINT (k, fidx) 16478 >> CHAR_BIT * i); 16479 } 16480 break; 16481 case '0': 16482 break; 16483 default: 16484 gcc_unreachable (); 16485 } 16486 16487 return result; 16488} 16489 16490hashval_t 16491toc_hasher::hash (toc_hash_struct *thc) 16492{ 16493 return rs6000_hash_constant (thc->key) ^ thc->key_mode; 16494} 16495 16496/* Compare H1 and H2 for equivalence. */ 16497 16498bool 16499toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) 16500{ 16501 rtx r1 = h1->key; 16502 rtx r2 = h2->key; 16503 16504 if (h1->key_mode != h2->key_mode) 16505 return 0; 16506 16507 return rtx_equal_p (r1, r2); 16508} 16509 16510/* These are the names given by the C++ front-end to vtables, and 16511 vtable-like objects. Ideally, this logic should not be here; 16512 instead, there should be some programmatic way of inquiring as 16513 to whether or not an object is a vtable. */ 16514 16515#define VTABLE_NAME_P(NAME) \ 16516 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \ 16517 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \ 16518 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \ 16519 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \ 16520 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0) 16521 16522#ifdef NO_DOLLAR_IN_LABEL 16523/* Return a GGC-allocated character string translating dollar signs in 16524 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */ 16525 16526const char * 16527rs6000_xcoff_strip_dollar (const char *name) 16528{ 16529 char *strip, *p; 16530 const char *q; 16531 size_t len; 16532 16533 q = (const char *) strchr (name, '$'); 16534 16535 if (q == 0 || q == name) 16536 return name; 16537 16538 len = strlen (name); 16539 strip = XALLOCAVEC (char, len + 1); 16540 strcpy (strip, name); 16541 p = strip + (q - name); 16542 while (p) 16543 { 16544 *p = '_'; 16545 p = strchr (p + 1, '$'); 16546 } 16547 16548 return ggc_alloc_string (strip, len); 16549} 16550#endif 16551 16552void 16553rs6000_output_symbol_ref (FILE *file, rtx x) 16554{ 16555 const char *name = XSTR (x, 0); 16556 16557 /* Currently C++ toc references to vtables can be emitted before it 16558 is decided whether the vtable is public or private. If this is 16559 the case, then the linker will eventually complain that there is 16560 a reference to an unknown section. Thus, for vtables only, 16561 we emit the TOC reference to reference the identifier and not the 16562 symbol. */ 16563 if (VTABLE_NAME_P (name)) 16564 { 16565 RS6000_OUTPUT_BASENAME (file, name); 16566 } 16567 else 16568 assemble_name (file, name); 16569} 16570 16571/* Output a TOC entry. We derive the entry name from what is being 16572 written. */ 16573 16574void 16575output_toc (FILE *file, rtx x, int labelno, machine_mode mode) 16576{ 16577 char buf[256]; 16578 const char *name = buf; 16579 rtx base = x; 16580 HOST_WIDE_INT offset = 0; 16581 16582 gcc_assert (!TARGET_NO_TOC_OR_PCREL); 16583 16584 /* When the linker won't eliminate them, don't output duplicate 16585 TOC entries (this happens on AIX if there is any kind of TOC, 16586 and on SVR4 under -fPIC or -mrelocatable). Don't do this for 16587 CODE_LABELs. */ 16588 if (TARGET_TOC && GET_CODE (x) != LABEL_REF) 16589 { 16590 struct toc_hash_struct *h; 16591 16592 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE 16593 time because GGC is not initialized at that point. */ 16594 if (toc_hash_table == NULL) 16595 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021); 16596 16597 h = ggc_alloc<toc_hash_struct> (); 16598 h->key = x; 16599 h->key_mode = mode; 16600 h->labelno = labelno; 16601 16602 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT); 16603 if (*found == NULL) 16604 *found = h; 16605 else /* This is indeed a duplicate. 16606 Set this label equal to that label. */ 16607 { 16608 fputs ("\t.set ", file); 16609 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); 16610 fprintf (file, "%d,", labelno); 16611 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); 16612 fprintf (file, "%d\n", ((*found)->labelno)); 16613 16614#ifdef HAVE_AS_TLS 16615 if (TARGET_XCOFF && SYMBOL_REF_P (x) 16616 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC 16617 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)) 16618 { 16619 fputs ("\t.set ", file); 16620 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); 16621 fprintf (file, "%d,", labelno); 16622 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); 16623 fprintf (file, "%d\n", ((*found)->labelno)); 16624 } 16625#endif 16626 return; 16627 } 16628 } 16629 16630 /* If we're going to put a double constant in the TOC, make sure it's 16631 aligned properly when strict alignment is on. */ 16632 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x)) 16633 && STRICT_ALIGNMENT 16634 && GET_MODE_BITSIZE (mode) >= 64 16635 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) { 16636 ASM_OUTPUT_ALIGN (file, 3); 16637 } 16638 16639 (*targetm.asm_out.internal_label) (file, "LC", labelno); 16640 16641 /* Handle FP constants specially. Note that if we have a minimal 16642 TOC, things we put here aren't actually in the TOC, so we can allow 16643 FP constants. */ 16644 if (CONST_DOUBLE_P (x) 16645 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode 16646 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode)) 16647 { 16648 long k[4]; 16649 16650 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 16651 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k); 16652 else 16653 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x)); 16654 16655 if (TARGET_64BIT) 16656 { 16657 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16658 fputs (DOUBLE_INT_ASM_OP, file); 16659 else 16660 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", 16661 k[0] & 0xffffffff, k[1] & 0xffffffff, 16662 k[2] & 0xffffffff, k[3] & 0xffffffff); 16663 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n", 16664 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, 16665 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff, 16666 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff, 16667 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff); 16668 return; 16669 } 16670 else 16671 { 16672 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16673 fputs ("\t.long ", file); 16674 else 16675 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", 16676 k[0] & 0xffffffff, k[1] & 0xffffffff, 16677 k[2] & 0xffffffff, k[3] & 0xffffffff); 16678 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n", 16679 k[0] & 0xffffffff, k[1] & 0xffffffff, 16680 k[2] & 0xffffffff, k[3] & 0xffffffff); 16681 return; 16682 } 16683 } 16684 else if (CONST_DOUBLE_P (x) 16685 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode)) 16686 { 16687 long k[2]; 16688 16689 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 16690 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k); 16691 else 16692 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); 16693 16694 if (TARGET_64BIT) 16695 { 16696 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16697 fputs (DOUBLE_INT_ASM_OP, file); 16698 else 16699 fprintf (file, "\t.tc FD_%lx_%lx[TC],", 16700 k[0] & 0xffffffff, k[1] & 0xffffffff); 16701 fprintf (file, "0x%lx%08lx\n", 16702 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, 16703 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff); 16704 return; 16705 } 16706 else 16707 { 16708 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16709 fputs ("\t.long ", file); 16710 else 16711 fprintf (file, "\t.tc FD_%lx_%lx[TC],", 16712 k[0] & 0xffffffff, k[1] & 0xffffffff); 16713 fprintf (file, "0x%lx,0x%lx\n", 16714 k[0] & 0xffffffff, k[1] & 0xffffffff); 16715 return; 16716 } 16717 } 16718 else if (CONST_DOUBLE_P (x) 16719 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode)) 16720 { 16721 long l; 16722 16723 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) 16724 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l); 16725 else 16726 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); 16727 16728 if (TARGET_64BIT) 16729 { 16730 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16731 fputs (DOUBLE_INT_ASM_OP, file); 16732 else 16733 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); 16734 if (WORDS_BIG_ENDIAN) 16735 fprintf (file, "0x%lx00000000\n", l & 0xffffffff); 16736 else 16737 fprintf (file, "0x%lx\n", l & 0xffffffff); 16738 return; 16739 } 16740 else 16741 { 16742 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16743 fputs ("\t.long ", file); 16744 else 16745 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); 16746 fprintf (file, "0x%lx\n", l & 0xffffffff); 16747 return; 16748 } 16749 } 16750 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x)) 16751 { 16752 unsigned HOST_WIDE_INT low; 16753 HOST_WIDE_INT high; 16754 16755 low = INTVAL (x) & 0xffffffff; 16756 high = (HOST_WIDE_INT) INTVAL (x) >> 32; 16757 16758 /* TOC entries are always Pmode-sized, so when big-endian 16759 smaller integer constants in the TOC need to be padded. 16760 (This is still a win over putting the constants in 16761 a separate constant pool, because then we'd have 16762 to have both a TOC entry _and_ the actual constant.) 16763 16764 For a 32-bit target, CONST_INT values are loaded and shifted 16765 entirely within `low' and can be stored in one TOC entry. */ 16766 16767 /* It would be easy to make this work, but it doesn't now. */ 16768 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode)); 16769 16770 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode)) 16771 { 16772 low |= high << 32; 16773 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode); 16774 high = (HOST_WIDE_INT) low >> 32; 16775 low &= 0xffffffff; 16776 } 16777 16778 if (TARGET_64BIT) 16779 { 16780 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16781 fputs (DOUBLE_INT_ASM_OP, file); 16782 else 16783 fprintf (file, "\t.tc ID_%lx_%lx[TC],", 16784 (long) high & 0xffffffff, (long) low & 0xffffffff); 16785 fprintf (file, "0x%lx%08lx\n", 16786 (long) high & 0xffffffff, (long) low & 0xffffffff); 16787 return; 16788 } 16789 else 16790 { 16791 if (POINTER_SIZE < GET_MODE_BITSIZE (mode)) 16792 { 16793 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16794 fputs ("\t.long ", file); 16795 else 16796 fprintf (file, "\t.tc ID_%lx_%lx[TC],", 16797 (long) high & 0xffffffff, (long) low & 0xffffffff); 16798 fprintf (file, "0x%lx,0x%lx\n", 16799 (long) high & 0xffffffff, (long) low & 0xffffffff); 16800 } 16801 else 16802 { 16803 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16804 fputs ("\t.long ", file); 16805 else 16806 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff); 16807 fprintf (file, "0x%lx\n", (long) low & 0xffffffff); 16808 } 16809 return; 16810 } 16811 } 16812 16813 if (GET_CODE (x) == CONST) 16814 { 16815 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS 16816 && CONST_INT_P (XEXP (XEXP (x, 0), 1))); 16817 16818 base = XEXP (XEXP (x, 0), 0); 16819 offset = INTVAL (XEXP (XEXP (x, 0), 1)); 16820 } 16821 16822 switch (GET_CODE (base)) 16823 { 16824 case SYMBOL_REF: 16825 name = XSTR (base, 0); 16826 break; 16827 16828 case LABEL_REF: 16829 ASM_GENERATE_INTERNAL_LABEL (buf, "L", 16830 CODE_LABEL_NUMBER (XEXP (base, 0))); 16831 break; 16832 16833 case CODE_LABEL: 16834 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base)); 16835 break; 16836 16837 default: 16838 gcc_unreachable (); 16839 } 16840 16841 if (TARGET_ELF || TARGET_MINIMAL_TOC) 16842 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file); 16843 else 16844 { 16845 fputs ("\t.tc ", file); 16846 RS6000_OUTPUT_BASENAME (file, name); 16847 16848 if (offset < 0) 16849 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset); 16850 else if (offset) 16851 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset); 16852 16853 /* Mark large TOC symbols on AIX with [TE] so they are mapped 16854 after other TOC symbols, reducing overflow of small TOC access 16855 to [TC] symbols. */ 16856 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL 16857 ? "[TE]," : "[TC],", file); 16858 } 16859 16860 /* Currently C++ toc references to vtables can be emitted before it 16861 is decided whether the vtable is public or private. If this is 16862 the case, then the linker will eventually complain that there is 16863 a TOC reference to an unknown section. Thus, for vtables only, 16864 we emit the TOC reference to reference the symbol and not the 16865 section. */ 16866 if (VTABLE_NAME_P (name)) 16867 { 16868 RS6000_OUTPUT_BASENAME (file, name); 16869 if (offset < 0) 16870 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset); 16871 else if (offset > 0) 16872 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); 16873 } 16874 else 16875 output_addr_const (file, x); 16876 16877#if HAVE_AS_TLS 16878 if (TARGET_XCOFF && SYMBOL_REF_P (base)) 16879 { 16880 switch (SYMBOL_REF_TLS_MODEL (base)) 16881 { 16882 case 0: 16883 break; 16884 case TLS_MODEL_LOCAL_EXEC: 16885 fputs ("@le", file); 16886 break; 16887 case TLS_MODEL_INITIAL_EXEC: 16888 fputs ("@ie", file); 16889 break; 16890 /* Use global-dynamic for local-dynamic. */ 16891 case TLS_MODEL_GLOBAL_DYNAMIC: 16892 case TLS_MODEL_LOCAL_DYNAMIC: 16893 putc ('\n', file); 16894 (*targetm.asm_out.internal_label) (file, "LCM", labelno); 16895 fputs ("\t.tc .", file); 16896 RS6000_OUTPUT_BASENAME (file, name); 16897 fputs ("[TC],", file); 16898 output_addr_const (file, x); 16899 fputs ("@m", file); 16900 break; 16901 default: 16902 gcc_unreachable (); 16903 } 16904 } 16905#endif 16906 16907 putc ('\n', file); 16908} 16909 16910/* Output an assembler pseudo-op to write an ASCII string of N characters 16911 starting at P to FILE. 16912 16913 On the RS/6000, we have to do this using the .byte operation and 16914 write out special characters outside the quoted string. 16915 Also, the assembler is broken; very long strings are truncated, 16916 so we must artificially break them up early. */ 16917 16918void 16919output_ascii (FILE *file, const char *p, int n) 16920{ 16921 char c; 16922 int i, count_string; 16923 const char *for_string = "\t.byte \""; 16924 const char *for_decimal = "\t.byte "; 16925 const char *to_close = NULL; 16926 16927 count_string = 0; 16928 for (i = 0; i < n; i++) 16929 { 16930 c = *p++; 16931 if (c >= ' ' && c < 0177) 16932 { 16933 if (for_string) 16934 fputs (for_string, file); 16935 putc (c, file); 16936 16937 /* Write two quotes to get one. */ 16938 if (c == '"') 16939 { 16940 putc (c, file); 16941 ++count_string; 16942 } 16943 16944 for_string = NULL; 16945 for_decimal = "\"\n\t.byte "; 16946 to_close = "\"\n"; 16947 ++count_string; 16948 16949 if (count_string >= 512) 16950 { 16951 fputs (to_close, file); 16952 16953 for_string = "\t.byte \""; 16954 for_decimal = "\t.byte "; 16955 to_close = NULL; 16956 count_string = 0; 16957 } 16958 } 16959 else 16960 { 16961 if (for_decimal) 16962 fputs (for_decimal, file); 16963 fprintf (file, "%d", c); 16964 16965 for_string = "\n\t.byte \""; 16966 for_decimal = ", "; 16967 to_close = "\n"; 16968 count_string = 0; 16969 } 16970 } 16971 16972 /* Now close the string if we have written one. Then end the line. */ 16973 if (to_close) 16974 fputs (to_close, file); 16975} 16976 16977/* Generate a unique section name for FILENAME for a section type 16978 represented by SECTION_DESC. Output goes into BUF. 16979 16980 SECTION_DESC can be any string, as long as it is different for each 16981 possible section type. 16982 16983 We name the section in the same manner as xlc. The name begins with an 16984 underscore followed by the filename (after stripping any leading directory 16985 names) with the last period replaced by the string SECTION_DESC. If 16986 FILENAME does not contain a period, SECTION_DESC is appended to the end of 16987 the name. */ 16988 16989void 16990rs6000_gen_section_name (char **buf, const char *filename, 16991 const char *section_desc) 16992{ 16993 const char *q, *after_last_slash, *last_period = 0; 16994 char *p; 16995 int len; 16996 16997 after_last_slash = filename; 16998 for (q = filename; *q; q++) 16999 { 17000 if (*q == '/') 17001 after_last_slash = q + 1; 17002 else if (*q == '.') 17003 last_period = q; 17004 } 17005 17006 len = strlen (after_last_slash) + strlen (section_desc) + 2; 17007 *buf = (char *) xmalloc (len); 17008 17009 p = *buf; 17010 *p++ = '_'; 17011 17012 for (q = after_last_slash; *q; q++) 17013 { 17014 if (q == last_period) 17015 { 17016 strcpy (p, section_desc); 17017 p += strlen (section_desc); 17018 break; 17019 } 17020 17021 else if (ISALNUM (*q)) 17022 *p++ = *q; 17023 } 17024 17025 if (last_period == 0) 17026 strcpy (p, section_desc); 17027 else 17028 *p = '\0'; 17029} 17030 17031/* Emit profile function. */ 17032 17033void 17034output_profile_hook (int labelno ATTRIBUTE_UNUSED) 17035{ 17036 /* Non-standard profiling for kernels, which just saves LR then calls 17037 _mcount without worrying about arg saves. The idea is to change 17038 the function prologue as little as possible as it isn't easy to 17039 account for arg save/restore code added just for _mcount. */ 17040 if (TARGET_PROFILE_KERNEL) 17041 return; 17042 17043 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 17044 { 17045#ifndef NO_PROFILE_COUNTERS 17046# define NO_PROFILE_COUNTERS 0 17047#endif 17048 if (NO_PROFILE_COUNTERS) 17049 emit_library_call (init_one_libfunc (RS6000_MCOUNT), 17050 LCT_NORMAL, VOIDmode); 17051 else 17052 { 17053 char buf[30]; 17054 const char *label_name; 17055 rtx fun; 17056 17057 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 17058 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); 17059 fun = gen_rtx_SYMBOL_REF (Pmode, label_name); 17060 17061 emit_library_call (init_one_libfunc (RS6000_MCOUNT), 17062 LCT_NORMAL, VOIDmode, fun, Pmode); 17063 } 17064 } 17065 else if (DEFAULT_ABI == ABI_DARWIN) 17066 { 17067 const char *mcount_name = RS6000_MCOUNT; 17068 int caller_addr_regno = LR_REGNO; 17069 17070 /* Be conservative and always set this, at least for now. */ 17071 crtl->uses_pic_offset_table = 1; 17072 17073#if TARGET_MACHO 17074 /* For PIC code, set up a stub and collect the caller's address 17075 from r0, which is where the prologue puts it. */ 17076 if (MACHOPIC_INDIRECT 17077 && crtl->uses_pic_offset_table) 17078 caller_addr_regno = 0; 17079#endif 17080 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name), 17081 LCT_NORMAL, VOIDmode, 17082 gen_rtx_REG (Pmode, caller_addr_regno), Pmode); 17083 } 17084} 17085 17086/* Write function profiler code. */ 17087 17088void 17089output_function_profiler (FILE *file, int labelno) 17090{ 17091 char buf[100]; 17092 17093 switch (DEFAULT_ABI) 17094 { 17095 default: 17096 gcc_unreachable (); 17097 17098 case ABI_V4: 17099 if (!TARGET_32BIT) 17100 { 17101 warning (0, "no profiling of 64-bit code for this ABI"); 17102 return; 17103 } 17104 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 17105 fprintf (file, "\tmflr %s\n", reg_names[0]); 17106 if (NO_PROFILE_COUNTERS) 17107 { 17108 asm_fprintf (file, "\tstw %s,4(%s)\n", 17109 reg_names[0], reg_names[1]); 17110 } 17111 else if (TARGET_SECURE_PLT && flag_pic) 17112 { 17113 if (TARGET_LINK_STACK) 17114 { 17115 char name[32]; 17116 get_ppc476_thunk_name (name); 17117 asm_fprintf (file, "\tbl %s\n", name); 17118 } 17119 else 17120 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n"); 17121 asm_fprintf (file, "\tstw %s,4(%s)\n", 17122 reg_names[0], reg_names[1]); 17123 asm_fprintf (file, "\tmflr %s\n", reg_names[12]); 17124 asm_fprintf (file, "\taddis %s,%s,", 17125 reg_names[12], reg_names[12]); 17126 assemble_name (file, buf); 17127 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]); 17128 assemble_name (file, buf); 17129 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]); 17130 } 17131 else if (flag_pic == 1) 17132 { 17133 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file); 17134 asm_fprintf (file, "\tstw %s,4(%s)\n", 17135 reg_names[0], reg_names[1]); 17136 asm_fprintf (file, "\tmflr %s\n", reg_names[12]); 17137 asm_fprintf (file, "\tlwz %s,", reg_names[0]); 17138 assemble_name (file, buf); 17139 asm_fprintf (file, "@got(%s)\n", reg_names[12]); 17140 } 17141 else if (flag_pic > 1) 17142 { 17143 asm_fprintf (file, "\tstw %s,4(%s)\n", 17144 reg_names[0], reg_names[1]); 17145 /* Now, we need to get the address of the label. */ 17146 if (TARGET_LINK_STACK) 17147 { 17148 char name[32]; 17149 get_ppc476_thunk_name (name); 17150 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name); 17151 assemble_name (file, buf); 17152 fputs ("-.\n1:", file); 17153 asm_fprintf (file, "\tmflr %s\n", reg_names[11]); 17154 asm_fprintf (file, "\taddi %s,%s,4\n", 17155 reg_names[11], reg_names[11]); 17156 } 17157 else 17158 { 17159 fputs ("\tbcl 20,31,1f\n\t.long ", file); 17160 assemble_name (file, buf); 17161 fputs ("-.\n1:", file); 17162 asm_fprintf (file, "\tmflr %s\n", reg_names[11]); 17163 } 17164 asm_fprintf (file, "\tlwz %s,0(%s)\n", 17165 reg_names[0], reg_names[11]); 17166 asm_fprintf (file, "\tadd %s,%s,%s\n", 17167 reg_names[0], reg_names[0], reg_names[11]); 17168 } 17169 else 17170 { 17171 asm_fprintf (file, "\tlis %s,", reg_names[12]); 17172 assemble_name (file, buf); 17173 fputs ("@ha\n", file); 17174 asm_fprintf (file, "\tstw %s,4(%s)\n", 17175 reg_names[0], reg_names[1]); 17176 asm_fprintf (file, "\tla %s,", reg_names[0]); 17177 assemble_name (file, buf); 17178 asm_fprintf (file, "@l(%s)\n", reg_names[12]); 17179 } 17180 17181 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */ 17182 fprintf (file, "\tbl %s%s\n", 17183 RS6000_MCOUNT, flag_pic ? "@plt" : ""); 17184 break; 17185 17186 case ABI_AIX: 17187 case ABI_ELFv2: 17188 case ABI_DARWIN: 17189 /* Don't do anything, done in output_profile_hook (). */ 17190 break; 17191 } 17192} 17193 17194 17195 17196/* The following variable value is the last issued insn. */ 17197 17198static rtx_insn *last_scheduled_insn; 17199 17200/* The following variable helps to balance issuing of load and 17201 store instructions */ 17202 17203static int load_store_pendulum; 17204 17205/* The following variable helps pair divide insns during scheduling. */ 17206static int divide_cnt; 17207/* The following variable helps pair and alternate vector and vector load 17208 insns during scheduling. */ 17209static int vec_pairing; 17210 17211 17212/* Power4 load update and store update instructions are cracked into a 17213 load or store and an integer insn which are executed in the same cycle. 17214 Branches have their own dispatch slot which does not count against the 17215 GCC issue rate, but it changes the program flow so there are no other 17216 instructions to issue in this cycle. */ 17217 17218static int 17219rs6000_variable_issue_1 (rtx_insn *insn, int more) 17220{ 17221 last_scheduled_insn = insn; 17222 if (GET_CODE (PATTERN (insn)) == USE 17223 || GET_CODE (PATTERN (insn)) == CLOBBER) 17224 { 17225 cached_can_issue_more = more; 17226 return cached_can_issue_more; 17227 } 17228 17229 if (insn_terminates_group_p (insn, current_group)) 17230 { 17231 cached_can_issue_more = 0; 17232 return cached_can_issue_more; 17233 } 17234 17235 /* If no reservation, but reach here */ 17236 if (recog_memoized (insn) < 0) 17237 return more; 17238 17239 if (rs6000_sched_groups) 17240 { 17241 if (is_microcoded_insn (insn)) 17242 cached_can_issue_more = 0; 17243 else if (is_cracked_insn (insn)) 17244 cached_can_issue_more = more > 2 ? more - 2 : 0; 17245 else 17246 cached_can_issue_more = more - 1; 17247 17248 return cached_can_issue_more; 17249 } 17250 17251 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn)) 17252 return 0; 17253 17254 cached_can_issue_more = more - 1; 17255 return cached_can_issue_more; 17256} 17257 17258static int 17259rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more) 17260{ 17261 int r = rs6000_variable_issue_1 (insn, more); 17262 if (verbose) 17263 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r); 17264 return r; 17265} 17266 17267/* Adjust the cost of a scheduling dependency. Return the new cost of 17268 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 17269 17270static int 17271rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, 17272 unsigned int) 17273{ 17274 enum attr_type attr_type; 17275 17276 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 17277 return cost; 17278 17279 switch (dep_type) 17280 { 17281 case REG_DEP_TRUE: 17282 { 17283 /* Data dependency; DEP_INSN writes a register that INSN reads 17284 some cycles later. */ 17285 17286 /* Separate a load from a narrower, dependent store. */ 17287 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9 17288 || rs6000_tune == PROCESSOR_POWER10) 17289 && GET_CODE (PATTERN (insn)) == SET 17290 && GET_CODE (PATTERN (dep_insn)) == SET 17291 && MEM_P (XEXP (PATTERN (insn), 1)) 17292 && MEM_P (XEXP (PATTERN (dep_insn), 0)) 17293 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) 17294 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) 17295 return cost + 14; 17296 17297 attr_type = get_attr_type (insn); 17298 17299 switch (attr_type) 17300 { 17301 case TYPE_JMPREG: 17302 /* Tell the first scheduling pass about the latency between 17303 a mtctr and bctr (and mtlr and br/blr). The first 17304 scheduling pass will not know about this latency since 17305 the mtctr instruction, which has the latency associated 17306 to it, will be generated by reload. */ 17307 return 4; 17308 case TYPE_BRANCH: 17309 /* Leave some extra cycles between a compare and its 17310 dependent branch, to inhibit expensive mispredicts. */ 17311 if ((rs6000_tune == PROCESSOR_PPC603 17312 || rs6000_tune == PROCESSOR_PPC604 17313 || rs6000_tune == PROCESSOR_PPC604e 17314 || rs6000_tune == PROCESSOR_PPC620 17315 || rs6000_tune == PROCESSOR_PPC630 17316 || rs6000_tune == PROCESSOR_PPC750 17317 || rs6000_tune == PROCESSOR_PPC7400 17318 || rs6000_tune == PROCESSOR_PPC7450 17319 || rs6000_tune == PROCESSOR_PPCE5500 17320 || rs6000_tune == PROCESSOR_PPCE6500 17321 || rs6000_tune == PROCESSOR_POWER4 17322 || rs6000_tune == PROCESSOR_POWER5 17323 || rs6000_tune == PROCESSOR_POWER7 17324 || rs6000_tune == PROCESSOR_POWER8 17325 || rs6000_tune == PROCESSOR_POWER9 17326 || rs6000_tune == PROCESSOR_POWER10 17327 || rs6000_tune == PROCESSOR_CELL) 17328 && recog_memoized (dep_insn) 17329 && (INSN_CODE (dep_insn) >= 0)) 17330 17331 switch (get_attr_type (dep_insn)) 17332 { 17333 case TYPE_CMP: 17334 case TYPE_FPCOMPARE: 17335 case TYPE_CR_LOGICAL: 17336 return cost + 2; 17337 case TYPE_EXTS: 17338 case TYPE_MUL: 17339 if (get_attr_dot (dep_insn) == DOT_YES) 17340 return cost + 2; 17341 else 17342 break; 17343 case TYPE_SHIFT: 17344 if (get_attr_dot (dep_insn) == DOT_YES 17345 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO) 17346 return cost + 2; 17347 else 17348 break; 17349 default: 17350 break; 17351 } 17352 break; 17353 17354 case TYPE_STORE: 17355 case TYPE_FPSTORE: 17356 if ((rs6000_tune == PROCESSOR_POWER6) 17357 && recog_memoized (dep_insn) 17358 && (INSN_CODE (dep_insn) >= 0)) 17359 { 17360 17361 if (GET_CODE (PATTERN (insn)) != SET) 17362 /* If this happens, we have to extend this to schedule 17363 optimally. Return default for now. */ 17364 return cost; 17365 17366 /* Adjust the cost for the case where the value written 17367 by a fixed point operation is used as the address 17368 gen value on a store. */ 17369 switch (get_attr_type (dep_insn)) 17370 { 17371 case TYPE_LOAD: 17372 case TYPE_CNTLZ: 17373 { 17374 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17375 return get_attr_sign_extend (dep_insn) 17376 == SIGN_EXTEND_YES ? 6 : 4; 17377 break; 17378 } 17379 case TYPE_SHIFT: 17380 { 17381 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17382 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? 17383 6 : 3; 17384 break; 17385 } 17386 case TYPE_INTEGER: 17387 case TYPE_ADD: 17388 case TYPE_LOGICAL: 17389 case TYPE_EXTS: 17390 case TYPE_INSERT: 17391 { 17392 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17393 return 3; 17394 break; 17395 } 17396 case TYPE_STORE: 17397 case TYPE_FPLOAD: 17398 case TYPE_FPSTORE: 17399 { 17400 if (get_attr_update (dep_insn) == UPDATE_YES 17401 && ! rs6000_store_data_bypass_p (dep_insn, insn)) 17402 return 3; 17403 break; 17404 } 17405 case TYPE_MUL: 17406 { 17407 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17408 return 17; 17409 break; 17410 } 17411 case TYPE_DIV: 17412 { 17413 if (! rs6000_store_data_bypass_p (dep_insn, insn)) 17414 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; 17415 break; 17416 } 17417 default: 17418 break; 17419 } 17420 } 17421 break; 17422 17423 case TYPE_LOAD: 17424 if ((rs6000_tune == PROCESSOR_POWER6) 17425 && recog_memoized (dep_insn) 17426 && (INSN_CODE (dep_insn) >= 0)) 17427 { 17428 17429 /* Adjust the cost for the case where the value written 17430 by a fixed point instruction is used within the address 17431 gen portion of a subsequent load(u)(x) */ 17432 switch (get_attr_type (dep_insn)) 17433 { 17434 case TYPE_LOAD: 17435 case TYPE_CNTLZ: 17436 { 17437 if (set_to_load_agen (dep_insn, insn)) 17438 return get_attr_sign_extend (dep_insn) 17439 == SIGN_EXTEND_YES ? 6 : 4; 17440 break; 17441 } 17442 case TYPE_SHIFT: 17443 { 17444 if (set_to_load_agen (dep_insn, insn)) 17445 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? 17446 6 : 3; 17447 break; 17448 } 17449 case TYPE_INTEGER: 17450 case TYPE_ADD: 17451 case TYPE_LOGICAL: 17452 case TYPE_EXTS: 17453 case TYPE_INSERT: 17454 { 17455 if (set_to_load_agen (dep_insn, insn)) 17456 return 3; 17457 break; 17458 } 17459 case TYPE_STORE: 17460 case TYPE_FPLOAD: 17461 case TYPE_FPSTORE: 17462 { 17463 if (get_attr_update (dep_insn) == UPDATE_YES 17464 && set_to_load_agen (dep_insn, insn)) 17465 return 3; 17466 break; 17467 } 17468 case TYPE_MUL: 17469 { 17470 if (set_to_load_agen (dep_insn, insn)) 17471 return 17; 17472 break; 17473 } 17474 case TYPE_DIV: 17475 { 17476 if (set_to_load_agen (dep_insn, insn)) 17477 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; 17478 break; 17479 } 17480 default: 17481 break; 17482 } 17483 } 17484 break; 17485 17486 case TYPE_FPLOAD: 17487 if ((rs6000_tune == PROCESSOR_POWER6) 17488 && get_attr_update (insn) == UPDATE_NO 17489 && recog_memoized (dep_insn) 17490 && (INSN_CODE (dep_insn) >= 0) 17491 && (get_attr_type (dep_insn) == TYPE_MFFGPR)) 17492 return 2; 17493 17494 default: 17495 break; 17496 } 17497 17498 /* Fall out to return default cost. */ 17499 } 17500 break; 17501 17502 case REG_DEP_OUTPUT: 17503 /* Output dependency; DEP_INSN writes a register that INSN writes some 17504 cycles later. */ 17505 if ((rs6000_tune == PROCESSOR_POWER6) 17506 && recog_memoized (dep_insn) 17507 && (INSN_CODE (dep_insn) >= 0)) 17508 { 17509 attr_type = get_attr_type (insn); 17510 17511 switch (attr_type) 17512 { 17513 case TYPE_FP: 17514 case TYPE_FPSIMPLE: 17515 if (get_attr_type (dep_insn) == TYPE_FP 17516 || get_attr_type (dep_insn) == TYPE_FPSIMPLE) 17517 return 1; 17518 break; 17519 case TYPE_FPLOAD: 17520 if (get_attr_update (insn) == UPDATE_NO 17521 && get_attr_type (dep_insn) == TYPE_MFFGPR) 17522 return 2; 17523 break; 17524 default: 17525 break; 17526 } 17527 } 17528 /* Fall through, no cost for output dependency. */ 17529 /* FALLTHRU */ 17530 17531 case REG_DEP_ANTI: 17532 /* Anti dependency; DEP_INSN reads a register that INSN writes some 17533 cycles later. */ 17534 return 0; 17535 17536 default: 17537 gcc_unreachable (); 17538 } 17539 17540 return cost; 17541} 17542 17543/* Debug version of rs6000_adjust_cost. */ 17544 17545static int 17546rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, 17547 int cost, unsigned int dw) 17548{ 17549 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw); 17550 17551 if (ret != cost) 17552 { 17553 const char *dep; 17554 17555 switch (dep_type) 17556 { 17557 default: dep = "unknown depencency"; break; 17558 case REG_DEP_TRUE: dep = "data dependency"; break; 17559 case REG_DEP_OUTPUT: dep = "output dependency"; break; 17560 case REG_DEP_ANTI: dep = "anti depencency"; break; 17561 } 17562 17563 fprintf (stderr, 17564 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, " 17565 "%s, insn:\n", ret, cost, dep); 17566 17567 debug_rtx (insn); 17568 } 17569 17570 return ret; 17571} 17572 17573/* The function returns a true if INSN is microcoded. 17574 Return false otherwise. */ 17575 17576static bool 17577is_microcoded_insn (rtx_insn *insn) 17578{ 17579 if (!insn || !NONDEBUG_INSN_P (insn) 17580 || GET_CODE (PATTERN (insn)) == USE 17581 || GET_CODE (PATTERN (insn)) == CLOBBER) 17582 return false; 17583 17584 if (rs6000_tune == PROCESSOR_CELL) 17585 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; 17586 17587 if (rs6000_sched_groups 17588 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5)) 17589 { 17590 enum attr_type type = get_attr_type (insn); 17591 if ((type == TYPE_LOAD 17592 && get_attr_update (insn) == UPDATE_YES 17593 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) 17594 || ((type == TYPE_LOAD || type == TYPE_STORE) 17595 && get_attr_update (insn) == UPDATE_YES 17596 && get_attr_indexed (insn) == INDEXED_YES) 17597 || type == TYPE_MFCR) 17598 return true; 17599 } 17600 17601 return false; 17602} 17603 17604/* The function returns true if INSN is cracked into 2 instructions 17605 by the processor (and therefore occupies 2 issue slots). */ 17606 17607static bool 17608is_cracked_insn (rtx_insn *insn) 17609{ 17610 if (!insn || !NONDEBUG_INSN_P (insn) 17611 || GET_CODE (PATTERN (insn)) == USE 17612 || GET_CODE (PATTERN (insn)) == CLOBBER) 17613 return false; 17614 17615 if (rs6000_sched_groups 17616 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5)) 17617 { 17618 enum attr_type type = get_attr_type (insn); 17619 if ((type == TYPE_LOAD 17620 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES 17621 && get_attr_update (insn) == UPDATE_NO) 17622 || (type == TYPE_LOAD 17623 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO 17624 && get_attr_update (insn) == UPDATE_YES 17625 && get_attr_indexed (insn) == INDEXED_NO) 17626 || (type == TYPE_STORE 17627 && get_attr_update (insn) == UPDATE_YES 17628 && get_attr_indexed (insn) == INDEXED_NO) 17629 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE) 17630 && get_attr_update (insn) == UPDATE_YES) 17631 || (type == TYPE_CR_LOGICAL 17632 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES) 17633 || (type == TYPE_EXTS 17634 && get_attr_dot (insn) == DOT_YES) 17635 || (type == TYPE_SHIFT 17636 && get_attr_dot (insn) == DOT_YES 17637 && get_attr_var_shift (insn) == VAR_SHIFT_NO) 17638 || (type == TYPE_MUL 17639 && get_attr_dot (insn) == DOT_YES) 17640 || type == TYPE_DIV 17641 || (type == TYPE_INSERT 17642 && get_attr_size (insn) == SIZE_32)) 17643 return true; 17644 } 17645 17646 return false; 17647} 17648 17649/* The function returns true if INSN can be issued only from 17650 the branch slot. */ 17651 17652static bool 17653is_branch_slot_insn (rtx_insn *insn) 17654{ 17655 if (!insn || !NONDEBUG_INSN_P (insn) 17656 || GET_CODE (PATTERN (insn)) == USE 17657 || GET_CODE (PATTERN (insn)) == CLOBBER) 17658 return false; 17659 17660 if (rs6000_sched_groups) 17661 { 17662 enum attr_type type = get_attr_type (insn); 17663 if (type == TYPE_BRANCH || type == TYPE_JMPREG) 17664 return true; 17665 return false; 17666 } 17667 17668 return false; 17669} 17670 17671/* The function returns true if out_inst sets a value that is 17672 used in the address generation computation of in_insn */ 17673static bool 17674set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn) 17675{ 17676 rtx out_set, in_set; 17677 17678 /* For performance reasons, only handle the simple case where 17679 both loads are a single_set. */ 17680 out_set = single_set (out_insn); 17681 if (out_set) 17682 { 17683 in_set = single_set (in_insn); 17684 if (in_set) 17685 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); 17686 } 17687 17688 return false; 17689} 17690 17691/* Try to determine base/offset/size parts of the given MEM. 17692 Return true if successful, false if all the values couldn't 17693 be determined. 17694 17695 This function only looks for REG or REG+CONST address forms. 17696 REG+REG address form will return false. */ 17697 17698static bool 17699get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, 17700 HOST_WIDE_INT *size) 17701{ 17702 rtx addr_rtx; 17703 if MEM_SIZE_KNOWN_P (mem) 17704 *size = MEM_SIZE (mem); 17705 else 17706 return false; 17707 17708 addr_rtx = (XEXP (mem, 0)); 17709 if (GET_CODE (addr_rtx) == PRE_MODIFY) 17710 addr_rtx = XEXP (addr_rtx, 1); 17711 17712 *offset = 0; 17713 while (GET_CODE (addr_rtx) == PLUS 17714 && CONST_INT_P (XEXP (addr_rtx, 1))) 17715 { 17716 *offset += INTVAL (XEXP (addr_rtx, 1)); 17717 addr_rtx = XEXP (addr_rtx, 0); 17718 } 17719 if (!REG_P (addr_rtx)) 17720 return false; 17721 17722 *base = addr_rtx; 17723 return true; 17724} 17725 17726/* The function returns true if the target storage location of 17727 mem1 is adjacent to the target storage location of mem2 */ 17728/* Return 1 if memory locations are adjacent. */ 17729 17730static bool 17731adjacent_mem_locations (rtx mem1, rtx mem2) 17732{ 17733 rtx reg1, reg2; 17734 HOST_WIDE_INT off1, size1, off2, size2; 17735 17736 if (get_memref_parts (mem1, ®1, &off1, &size1) 17737 && get_memref_parts (mem2, ®2, &off2, &size2)) 17738 return ((REGNO (reg1) == REGNO (reg2)) 17739 && ((off1 + size1 == off2) 17740 || (off2 + size2 == off1))); 17741 17742 return false; 17743} 17744 17745/* This function returns true if it can be determined that the two MEM 17746 locations overlap by at least 1 byte based on base reg/offset/size. */ 17747 17748static bool 17749mem_locations_overlap (rtx mem1, rtx mem2) 17750{ 17751 rtx reg1, reg2; 17752 HOST_WIDE_INT off1, size1, off2, size2; 17753 17754 if (get_memref_parts (mem1, ®1, &off1, &size1) 17755 && get_memref_parts (mem2, ®2, &off2, &size2)) 17756 return ((REGNO (reg1) == REGNO (reg2)) 17757 && (((off1 <= off2) && (off1 + size1 > off2)) 17758 || ((off2 <= off1) && (off2 + size2 > off1)))); 17759 17760 return false; 17761} 17762 17763/* A C statement (sans semicolon) to update the integer scheduling 17764 priority INSN_PRIORITY (INSN). Increase the priority to execute the 17765 INSN earlier, reduce the priority to execute INSN later. Do not 17766 define this macro if you do not need to adjust the scheduling 17767 priorities of insns. */ 17768 17769static int 17770rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority) 17771{ 17772 rtx load_mem, str_mem; 17773 /* On machines (like the 750) which have asymmetric integer units, 17774 where one integer unit can do multiply and divides and the other 17775 can't, reduce the priority of multiply/divide so it is scheduled 17776 before other integer operations. */ 17777 17778#if 0 17779 if (! INSN_P (insn)) 17780 return priority; 17781 17782 if (GET_CODE (PATTERN (insn)) == USE) 17783 return priority; 17784 17785 switch (rs6000_tune) { 17786 case PROCESSOR_PPC750: 17787 switch (get_attr_type (insn)) 17788 { 17789 default: 17790 break; 17791 17792 case TYPE_MUL: 17793 case TYPE_DIV: 17794 fprintf (stderr, "priority was %#x (%d) before adjustment\n", 17795 priority, priority); 17796 if (priority >= 0 && priority < 0x01000000) 17797 priority >>= 3; 17798 break; 17799 } 17800 } 17801#endif 17802 17803 if (insn_must_be_first_in_group (insn) 17804 && reload_completed 17805 && current_sched_info->sched_max_insns_priority 17806 && rs6000_sched_restricted_insns_priority) 17807 { 17808 17809 /* Prioritize insns that can be dispatched only in the first 17810 dispatch slot. */ 17811 if (rs6000_sched_restricted_insns_priority == 1) 17812 /* Attach highest priority to insn. This means that in 17813 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations 17814 precede 'priority' (critical path) considerations. */ 17815 return current_sched_info->sched_max_insns_priority; 17816 else if (rs6000_sched_restricted_insns_priority == 2) 17817 /* Increase priority of insn by a minimal amount. This means that in 17818 haifa-sched.c:ready_sort(), only 'priority' (critical path) 17819 considerations precede dispatch-slot restriction considerations. */ 17820 return (priority + 1); 17821 } 17822 17823 if (rs6000_tune == PROCESSOR_POWER6 17824 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem)) 17825 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem)))) 17826 /* Attach highest priority to insn if the scheduler has just issued two 17827 stores and this instruction is a load, or two loads and this instruction 17828 is a store. Power6 wants loads and stores scheduled alternately 17829 when possible */ 17830 return current_sched_info->sched_max_insns_priority; 17831 17832 return priority; 17833} 17834 17835/* Return true if the instruction is nonpipelined on the Cell. */ 17836static bool 17837is_nonpipeline_insn (rtx_insn *insn) 17838{ 17839 enum attr_type type; 17840 if (!insn || !NONDEBUG_INSN_P (insn) 17841 || GET_CODE (PATTERN (insn)) == USE 17842 || GET_CODE (PATTERN (insn)) == CLOBBER) 17843 return false; 17844 17845 type = get_attr_type (insn); 17846 if (type == TYPE_MUL 17847 || type == TYPE_DIV 17848 || type == TYPE_SDIV 17849 || type == TYPE_DDIV 17850 || type == TYPE_SSQRT 17851 || type == TYPE_DSQRT 17852 || type == TYPE_MFCR 17853 || type == TYPE_MFCRF 17854 || type == TYPE_MFJMPR) 17855 { 17856 return true; 17857 } 17858 return false; 17859} 17860 17861 17862/* Return how many instructions the machine can issue per cycle. */ 17863 17864static int 17865rs6000_issue_rate (void) 17866{ 17867 /* Unless scheduling for register pressure, use issue rate of 1 for 17868 first scheduling pass to decrease degradation. */ 17869 if (!reload_completed && !flag_sched_pressure) 17870 return 1; 17871 17872 switch (rs6000_tune) { 17873 case PROCESSOR_RS64A: 17874 case PROCESSOR_PPC601: /* ? */ 17875 case PROCESSOR_PPC7450: 17876 return 3; 17877 case PROCESSOR_PPC440: 17878 case PROCESSOR_PPC603: 17879 case PROCESSOR_PPC750: 17880 case PROCESSOR_PPC7400: 17881 case PROCESSOR_PPC8540: 17882 case PROCESSOR_PPC8548: 17883 case PROCESSOR_CELL: 17884 case PROCESSOR_PPCE300C2: 17885 case PROCESSOR_PPCE300C3: 17886 case PROCESSOR_PPCE500MC: 17887 case PROCESSOR_PPCE500MC64: 17888 case PROCESSOR_PPCE5500: 17889 case PROCESSOR_PPCE6500: 17890 case PROCESSOR_TITAN: 17891 return 2; 17892 case PROCESSOR_PPC476: 17893 case PROCESSOR_PPC604: 17894 case PROCESSOR_PPC604e: 17895 case PROCESSOR_PPC620: 17896 case PROCESSOR_PPC630: 17897 return 4; 17898 case PROCESSOR_POWER4: 17899 case PROCESSOR_POWER5: 17900 case PROCESSOR_POWER6: 17901 case PROCESSOR_POWER7: 17902 return 5; 17903 case PROCESSOR_POWER8: 17904 return 7; 17905 case PROCESSOR_POWER9: 17906 case PROCESSOR_POWER10: 17907 return 6; 17908 default: 17909 return 1; 17910 } 17911} 17912 17913/* Return how many instructions to look ahead for better insn 17914 scheduling. */ 17915 17916static int 17917rs6000_use_sched_lookahead (void) 17918{ 17919 switch (rs6000_tune) 17920 { 17921 case PROCESSOR_PPC8540: 17922 case PROCESSOR_PPC8548: 17923 return 4; 17924 17925 case PROCESSOR_CELL: 17926 return (reload_completed ? 8 : 0); 17927 17928 default: 17929 return 0; 17930 } 17931} 17932 17933/* We are choosing insn from the ready queue. Return zero if INSN can be 17934 chosen. */ 17935static int 17936rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index) 17937{ 17938 if (ready_index == 0) 17939 return 0; 17940 17941 if (rs6000_tune != PROCESSOR_CELL) 17942 return 0; 17943 17944 gcc_assert (insn != NULL_RTX && INSN_P (insn)); 17945 17946 if (!reload_completed 17947 || is_nonpipeline_insn (insn) 17948 || is_microcoded_insn (insn)) 17949 return 1; 17950 17951 return 0; 17952} 17953 17954/* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx 17955 and return true. */ 17956 17957static bool 17958find_mem_ref (rtx pat, rtx *mem_ref) 17959{ 17960 const char * fmt; 17961 int i, j; 17962 17963 /* stack_tie does not produce any real memory traffic. */ 17964 if (tie_operand (pat, VOIDmode)) 17965 return false; 17966 17967 if (MEM_P (pat)) 17968 { 17969 *mem_ref = pat; 17970 return true; 17971 } 17972 17973 /* Recursively process the pattern. */ 17974 fmt = GET_RTX_FORMAT (GET_CODE (pat)); 17975 17976 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--) 17977 { 17978 if (fmt[i] == 'e') 17979 { 17980 if (find_mem_ref (XEXP (pat, i), mem_ref)) 17981 return true; 17982 } 17983 else if (fmt[i] == 'E') 17984 for (j = XVECLEN (pat, i) - 1; j >= 0; j--) 17985 { 17986 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref)) 17987 return true; 17988 } 17989 } 17990 17991 return false; 17992} 17993 17994/* Determine if PAT is a PATTERN of a load insn. */ 17995 17996static bool 17997is_load_insn1 (rtx pat, rtx *load_mem) 17998{ 17999 if (!pat || pat == NULL_RTX) 18000 return false; 18001 18002 if (GET_CODE (pat) == SET) 18003 return find_mem_ref (SET_SRC (pat), load_mem); 18004 18005 if (GET_CODE (pat) == PARALLEL) 18006 { 18007 int i; 18008 18009 for (i = 0; i < XVECLEN (pat, 0); i++) 18010 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem)) 18011 return true; 18012 } 18013 18014 return false; 18015} 18016 18017/* Determine if INSN loads from memory. */ 18018 18019static bool 18020is_load_insn (rtx insn, rtx *load_mem) 18021{ 18022 if (!insn || !INSN_P (insn)) 18023 return false; 18024 18025 if (CALL_P (insn)) 18026 return false; 18027 18028 return is_load_insn1 (PATTERN (insn), load_mem); 18029} 18030 18031/* Determine if PAT is a PATTERN of a store insn. */ 18032 18033static bool 18034is_store_insn1 (rtx pat, rtx *str_mem) 18035{ 18036 if (!pat || pat == NULL_RTX) 18037 return false; 18038 18039 if (GET_CODE (pat) == SET) 18040 return find_mem_ref (SET_DEST (pat), str_mem); 18041 18042 if (GET_CODE (pat) == PARALLEL) 18043 { 18044 int i; 18045 18046 for (i = 0; i < XVECLEN (pat, 0); i++) 18047 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem)) 18048 return true; 18049 } 18050 18051 return false; 18052} 18053 18054/* Determine if INSN stores to memory. */ 18055 18056static bool 18057is_store_insn (rtx insn, rtx *str_mem) 18058{ 18059 if (!insn || !INSN_P (insn)) 18060 return false; 18061 18062 return is_store_insn1 (PATTERN (insn), str_mem); 18063} 18064 18065/* Return whether TYPE is a Power9 pairable vector instruction type. */ 18066 18067static bool 18068is_power9_pairable_vec_type (enum attr_type type) 18069{ 18070 switch (type) 18071 { 18072 case TYPE_VECSIMPLE: 18073 case TYPE_VECCOMPLEX: 18074 case TYPE_VECDIV: 18075 case TYPE_VECCMP: 18076 case TYPE_VECPERM: 18077 case TYPE_VECFLOAT: 18078 case TYPE_VECFDIV: 18079 case TYPE_VECDOUBLE: 18080 return true; 18081 default: 18082 break; 18083 } 18084 return false; 18085} 18086 18087/* Returns whether the dependence between INSN and NEXT is considered 18088 costly by the given target. */ 18089 18090static bool 18091rs6000_is_costly_dependence (dep_t dep, int cost, int distance) 18092{ 18093 rtx insn; 18094 rtx next; 18095 rtx load_mem, str_mem; 18096 18097 /* If the flag is not enabled - no dependence is considered costly; 18098 allow all dependent insns in the same group. 18099 This is the most aggressive option. */ 18100 if (rs6000_sched_costly_dep == no_dep_costly) 18101 return false; 18102 18103 /* If the flag is set to 1 - a dependence is always considered costly; 18104 do not allow dependent instructions in the same group. 18105 This is the most conservative option. */ 18106 if (rs6000_sched_costly_dep == all_deps_costly) 18107 return true; 18108 18109 insn = DEP_PRO (dep); 18110 next = DEP_CON (dep); 18111 18112 if (rs6000_sched_costly_dep == store_to_load_dep_costly 18113 && is_load_insn (next, &load_mem) 18114 && is_store_insn (insn, &str_mem)) 18115 /* Prevent load after store in the same group. */ 18116 return true; 18117 18118 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly 18119 && is_load_insn (next, &load_mem) 18120 && is_store_insn (insn, &str_mem) 18121 && DEP_TYPE (dep) == REG_DEP_TRUE 18122 && mem_locations_overlap(str_mem, load_mem)) 18123 /* Prevent load after store in the same group if it is a true 18124 dependence. */ 18125 return true; 18126 18127 /* The flag is set to X; dependences with latency >= X are considered costly, 18128 and will not be scheduled in the same group. */ 18129 if (rs6000_sched_costly_dep <= max_dep_latency 18130 && ((cost - distance) >= (int)rs6000_sched_costly_dep)) 18131 return true; 18132 18133 return false; 18134} 18135 18136/* Return the next insn after INSN that is found before TAIL is reached, 18137 skipping any "non-active" insns - insns that will not actually occupy 18138 an issue slot. Return NULL_RTX if such an insn is not found. */ 18139 18140static rtx_insn * 18141get_next_active_insn (rtx_insn *insn, rtx_insn *tail) 18142{ 18143 if (insn == NULL_RTX || insn == tail) 18144 return NULL; 18145 18146 while (1) 18147 { 18148 insn = NEXT_INSN (insn); 18149 if (insn == NULL_RTX || insn == tail) 18150 return NULL; 18151 18152 if (CALL_P (insn) 18153 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) 18154 || (NONJUMP_INSN_P (insn) 18155 && GET_CODE (PATTERN (insn)) != USE 18156 && GET_CODE (PATTERN (insn)) != CLOBBER 18157 && INSN_CODE (insn) != CODE_FOR_stack_tie)) 18158 break; 18159 } 18160 return insn; 18161} 18162 18163/* Move instruction at POS to the end of the READY list. */ 18164 18165static void 18166move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos) 18167{ 18168 rtx_insn *tmp; 18169 int i; 18170 18171 tmp = ready[pos]; 18172 for (i = pos; i < lastpos; i++) 18173 ready[i] = ready[i + 1]; 18174 ready[lastpos] = tmp; 18175} 18176 18177/* Do Power6 specific sched_reorder2 reordering of ready list. */ 18178 18179static int 18180power6_sched_reorder2 (rtx_insn **ready, int lastpos) 18181{ 18182 /* For Power6, we need to handle some special cases to try and keep the 18183 store queue from overflowing and triggering expensive flushes. 18184 18185 This code monitors how load and store instructions are being issued 18186 and skews the ready list one way or the other to increase the likelihood 18187 that a desired instruction is issued at the proper time. 18188 18189 A couple of things are done. First, we maintain a "load_store_pendulum" 18190 to track the current state of load/store issue. 18191 18192 - If the pendulum is at zero, then no loads or stores have been 18193 issued in the current cycle so we do nothing. 18194 18195 - If the pendulum is 1, then a single load has been issued in this 18196 cycle and we attempt to locate another load in the ready list to 18197 issue with it. 18198 18199 - If the pendulum is -2, then two stores have already been 18200 issued in this cycle, so we increase the priority of the first load 18201 in the ready list to increase it's likelihood of being chosen first 18202 in the next cycle. 18203 18204 - If the pendulum is -1, then a single store has been issued in this 18205 cycle and we attempt to locate another store in the ready list to 18206 issue with it, preferring a store to an adjacent memory location to 18207 facilitate store pairing in the store queue. 18208 18209 - If the pendulum is 2, then two loads have already been 18210 issued in this cycle, so we increase the priority of the first store 18211 in the ready list to increase it's likelihood of being chosen first 18212 in the next cycle. 18213 18214 - If the pendulum < -2 or > 2, then do nothing. 18215 18216 Note: This code covers the most common scenarios. There exist non 18217 load/store instructions which make use of the LSU and which 18218 would need to be accounted for to strictly model the behavior 18219 of the machine. Those instructions are currently unaccounted 18220 for to help minimize compile time overhead of this code. 18221 */ 18222 int pos; 18223 rtx load_mem, str_mem; 18224 18225 if (is_store_insn (last_scheduled_insn, &str_mem)) 18226 /* Issuing a store, swing the load_store_pendulum to the left */ 18227 load_store_pendulum--; 18228 else if (is_load_insn (last_scheduled_insn, &load_mem)) 18229 /* Issuing a load, swing the load_store_pendulum to the right */ 18230 load_store_pendulum++; 18231 else 18232 return cached_can_issue_more; 18233 18234 /* If the pendulum is balanced, or there is only one instruction on 18235 the ready list, then all is well, so return. */ 18236 if ((load_store_pendulum == 0) || (lastpos <= 0)) 18237 return cached_can_issue_more; 18238 18239 if (load_store_pendulum == 1) 18240 { 18241 /* A load has been issued in this cycle. Scan the ready list 18242 for another load to issue with it */ 18243 pos = lastpos; 18244 18245 while (pos >= 0) 18246 { 18247 if (is_load_insn (ready[pos], &load_mem)) 18248 { 18249 /* Found a load. Move it to the head of the ready list, 18250 and adjust it's priority so that it is more likely to 18251 stay there */ 18252 move_to_end_of_ready (ready, pos, lastpos); 18253 18254 if (!sel_sched_p () 18255 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18256 INSN_PRIORITY (ready[lastpos])++; 18257 break; 18258 } 18259 pos--; 18260 } 18261 } 18262 else if (load_store_pendulum == -2) 18263 { 18264 /* Two stores have been issued in this cycle. Increase the 18265 priority of the first load in the ready list to favor it for 18266 issuing in the next cycle. */ 18267 pos = lastpos; 18268 18269 while (pos >= 0) 18270 { 18271 if (is_load_insn (ready[pos], &load_mem) 18272 && !sel_sched_p () 18273 && INSN_PRIORITY_KNOWN (ready[pos])) 18274 { 18275 INSN_PRIORITY (ready[pos])++; 18276 18277 /* Adjust the pendulum to account for the fact that a load 18278 was found and increased in priority. This is to prevent 18279 increasing the priority of multiple loads */ 18280 load_store_pendulum--; 18281 18282 break; 18283 } 18284 pos--; 18285 } 18286 } 18287 else if (load_store_pendulum == -1) 18288 { 18289 /* A store has been issued in this cycle. Scan the ready list for 18290 another store to issue with it, preferring a store to an adjacent 18291 memory location */ 18292 int first_store_pos = -1; 18293 18294 pos = lastpos; 18295 18296 while (pos >= 0) 18297 { 18298 if (is_store_insn (ready[pos], &str_mem)) 18299 { 18300 rtx str_mem2; 18301 /* Maintain the index of the first store found on the 18302 list */ 18303 if (first_store_pos == -1) 18304 first_store_pos = pos; 18305 18306 if (is_store_insn (last_scheduled_insn, &str_mem2) 18307 && adjacent_mem_locations (str_mem, str_mem2)) 18308 { 18309 /* Found an adjacent store. Move it to the head of the 18310 ready list, and adjust it's priority so that it is 18311 more likely to stay there */ 18312 move_to_end_of_ready (ready, pos, lastpos); 18313 18314 if (!sel_sched_p () 18315 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18316 INSN_PRIORITY (ready[lastpos])++; 18317 18318 first_store_pos = -1; 18319 18320 break; 18321 }; 18322 } 18323 pos--; 18324 } 18325 18326 if (first_store_pos >= 0) 18327 { 18328 /* An adjacent store wasn't found, but a non-adjacent store was, 18329 so move the non-adjacent store to the front of the ready 18330 list, and adjust its priority so that it is more likely to 18331 stay there. */ 18332 move_to_end_of_ready (ready, first_store_pos, lastpos); 18333 if (!sel_sched_p () 18334 && INSN_PRIORITY_KNOWN (ready[lastpos])) 18335 INSN_PRIORITY (ready[lastpos])++; 18336 } 18337 } 18338 else if (load_store_pendulum == 2) 18339 { 18340 /* Two loads have been issued in this cycle. Increase the priority 18341 of the first store in the ready list to favor it for issuing in 18342 the next cycle. */ 18343 pos = lastpos; 18344 18345 while (pos >= 0) 18346 { 18347 if (is_store_insn (ready[pos], &str_mem) 18348 && !sel_sched_p () 18349 && INSN_PRIORITY_KNOWN (ready[pos])) 18350 { 18351 INSN_PRIORITY (ready[pos])++; 18352 18353 /* Adjust the pendulum to account for the fact that a store 18354 was found and increased in priority. This is to prevent 18355 increasing the priority of multiple stores */ 18356 load_store_pendulum++; 18357 18358 break; 18359 } 18360 pos--; 18361 } 18362 } 18363 18364 return cached_can_issue_more; 18365} 18366 18367/* Do Power9 specific sched_reorder2 reordering of ready list. */ 18368 18369static int 18370power9_sched_reorder2 (rtx_insn **ready, int lastpos) 18371{ 18372 int pos; 18373 enum attr_type type, type2; 18374 18375 type = get_attr_type (last_scheduled_insn); 18376 18377 /* Try to issue fixed point divides back-to-back in pairs so they will be 18378 routed to separate execution units and execute in parallel. */ 18379 if (type == TYPE_DIV && divide_cnt == 0) 18380 { 18381 /* First divide has been scheduled. */ 18382 divide_cnt = 1; 18383 18384 /* Scan the ready list looking for another divide, if found move it 18385 to the end of the list so it is chosen next. */ 18386 pos = lastpos; 18387 while (pos >= 0) 18388 { 18389 if (recog_memoized (ready[pos]) >= 0 18390 && get_attr_type (ready[pos]) == TYPE_DIV) 18391 { 18392 move_to_end_of_ready (ready, pos, lastpos); 18393 break; 18394 } 18395 pos--; 18396 } 18397 } 18398 else 18399 { 18400 /* Last insn was the 2nd divide or not a divide, reset the counter. */ 18401 divide_cnt = 0; 18402 18403 /* The best dispatch throughput for vector and vector load insns can be 18404 achieved by interleaving a vector and vector load such that they'll 18405 dispatch to the same superslice. If this pairing cannot be achieved 18406 then it is best to pair vector insns together and vector load insns 18407 together. 18408 18409 To aid in this pairing, vec_pairing maintains the current state with 18410 the following values: 18411 18412 0 : Initial state, no vecload/vector pairing has been started. 18413 18414 1 : A vecload or vector insn has been issued and a candidate for 18415 pairing has been found and moved to the end of the ready 18416 list. */ 18417 if (type == TYPE_VECLOAD) 18418 { 18419 /* Issued a vecload. */ 18420 if (vec_pairing == 0) 18421 { 18422 int vecload_pos = -1; 18423 /* We issued a single vecload, look for a vector insn to pair it 18424 with. If one isn't found, try to pair another vecload. */ 18425 pos = lastpos; 18426 while (pos >= 0) 18427 { 18428 if (recog_memoized (ready[pos]) >= 0) 18429 { 18430 type2 = get_attr_type (ready[pos]); 18431 if (is_power9_pairable_vec_type (type2)) 18432 { 18433 /* Found a vector insn to pair with, move it to the 18434 end of the ready list so it is scheduled next. */ 18435 move_to_end_of_ready (ready, pos, lastpos); 18436 vec_pairing = 1; 18437 return cached_can_issue_more; 18438 } 18439 else if (type2 == TYPE_VECLOAD && vecload_pos == -1) 18440 /* Remember position of first vecload seen. */ 18441 vecload_pos = pos; 18442 } 18443 pos--; 18444 } 18445 if (vecload_pos >= 0) 18446 { 18447 /* Didn't find a vector to pair with but did find a vecload, 18448 move it to the end of the ready list. */ 18449 move_to_end_of_ready (ready, vecload_pos, lastpos); 18450 vec_pairing = 1; 18451 return cached_can_issue_more; 18452 } 18453 } 18454 } 18455 else if (is_power9_pairable_vec_type (type)) 18456 { 18457 /* Issued a vector operation. */ 18458 if (vec_pairing == 0) 18459 { 18460 int vec_pos = -1; 18461 /* We issued a single vector insn, look for a vecload to pair it 18462 with. If one isn't found, try to pair another vector. */ 18463 pos = lastpos; 18464 while (pos >= 0) 18465 { 18466 if (recog_memoized (ready[pos]) >= 0) 18467 { 18468 type2 = get_attr_type (ready[pos]); 18469 if (type2 == TYPE_VECLOAD) 18470 { 18471 /* Found a vecload insn to pair with, move it to the 18472 end of the ready list so it is scheduled next. */ 18473 move_to_end_of_ready (ready, pos, lastpos); 18474 vec_pairing = 1; 18475 return cached_can_issue_more; 18476 } 18477 else if (is_power9_pairable_vec_type (type2) 18478 && vec_pos == -1) 18479 /* Remember position of first vector insn seen. */ 18480 vec_pos = pos; 18481 } 18482 pos--; 18483 } 18484 if (vec_pos >= 0) 18485 { 18486 /* Didn't find a vecload to pair with but did find a vector 18487 insn, move it to the end of the ready list. */ 18488 move_to_end_of_ready (ready, vec_pos, lastpos); 18489 vec_pairing = 1; 18490 return cached_can_issue_more; 18491 } 18492 } 18493 } 18494 18495 /* We've either finished a vec/vecload pair, couldn't find an insn to 18496 continue the current pair, or the last insn had nothing to do with 18497 with pairing. In any case, reset the state. */ 18498 vec_pairing = 0; 18499 } 18500 18501 return cached_can_issue_more; 18502} 18503 18504/* We are about to begin issuing insns for this clock cycle. */ 18505 18506static int 18507rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, 18508 rtx_insn **ready ATTRIBUTE_UNUSED, 18509 int *pn_ready ATTRIBUTE_UNUSED, 18510 int clock_var ATTRIBUTE_UNUSED) 18511{ 18512 int n_ready = *pn_ready; 18513 18514 if (sched_verbose) 18515 fprintf (dump, "// rs6000_sched_reorder :\n"); 18516 18517 /* Reorder the ready list, if the second to last ready insn 18518 is a nonepipeline insn. */ 18519 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1) 18520 { 18521 if (is_nonpipeline_insn (ready[n_ready - 1]) 18522 && (recog_memoized (ready[n_ready - 2]) > 0)) 18523 /* Simply swap first two insns. */ 18524 std::swap (ready[n_ready - 1], ready[n_ready - 2]); 18525 } 18526 18527 if (rs6000_tune == PROCESSOR_POWER6) 18528 load_store_pendulum = 0; 18529 18530 return rs6000_issue_rate (); 18531} 18532 18533/* Like rs6000_sched_reorder, but called after issuing each insn. */ 18534 18535static int 18536rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, 18537 int *pn_ready, int clock_var ATTRIBUTE_UNUSED) 18538{ 18539 if (sched_verbose) 18540 fprintf (dump, "// rs6000_sched_reorder2 :\n"); 18541 18542 /* Do Power6 dependent reordering if necessary. */ 18543 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn) 18544 return power6_sched_reorder2 (ready, *pn_ready - 1); 18545 18546 /* Do Power9 dependent reordering if necessary. */ 18547 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn 18548 && recog_memoized (last_scheduled_insn) >= 0) 18549 return power9_sched_reorder2 (ready, *pn_ready - 1); 18550 18551 return cached_can_issue_more; 18552} 18553 18554/* Return whether the presence of INSN causes a dispatch group termination 18555 of group WHICH_GROUP. 18556 18557 If WHICH_GROUP == current_group, this function will return true if INSN 18558 causes the termination of the current group (i.e, the dispatch group to 18559 which INSN belongs). This means that INSN will be the last insn in the 18560 group it belongs to. 18561 18562 If WHICH_GROUP == previous_group, this function will return true if INSN 18563 causes the termination of the previous group (i.e, the dispatch group that 18564 precedes the group to which INSN belongs). This means that INSN will be 18565 the first insn in the group it belongs to). */ 18566 18567static bool 18568insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group) 18569{ 18570 bool first, last; 18571 18572 if (! insn) 18573 return false; 18574 18575 first = insn_must_be_first_in_group (insn); 18576 last = insn_must_be_last_in_group (insn); 18577 18578 if (first && last) 18579 return true; 18580 18581 if (which_group == current_group) 18582 return last; 18583 else if (which_group == previous_group) 18584 return first; 18585 18586 return false; 18587} 18588 18589 18590static bool 18591insn_must_be_first_in_group (rtx_insn *insn) 18592{ 18593 enum attr_type type; 18594 18595 if (!insn 18596 || NOTE_P (insn) 18597 || DEBUG_INSN_P (insn) 18598 || GET_CODE (PATTERN (insn)) == USE 18599 || GET_CODE (PATTERN (insn)) == CLOBBER) 18600 return false; 18601 18602 switch (rs6000_tune) 18603 { 18604 case PROCESSOR_POWER5: 18605 if (is_cracked_insn (insn)) 18606 return true; 18607 /* FALLTHRU */ 18608 case PROCESSOR_POWER4: 18609 if (is_microcoded_insn (insn)) 18610 return true; 18611 18612 if (!rs6000_sched_groups) 18613 return false; 18614 18615 type = get_attr_type (insn); 18616 18617 switch (type) 18618 { 18619 case TYPE_MFCR: 18620 case TYPE_MFCRF: 18621 case TYPE_MTCR: 18622 case TYPE_CR_LOGICAL: 18623 case TYPE_MTJMPR: 18624 case TYPE_MFJMPR: 18625 case TYPE_DIV: 18626 case TYPE_LOAD_L: 18627 case TYPE_STORE_C: 18628 case TYPE_ISYNC: 18629 case TYPE_SYNC: 18630 return true; 18631 default: 18632 break; 18633 } 18634 break; 18635 case PROCESSOR_POWER6: 18636 type = get_attr_type (insn); 18637 18638 switch (type) 18639 { 18640 case TYPE_EXTS: 18641 case TYPE_CNTLZ: 18642 case TYPE_TRAP: 18643 case TYPE_MUL: 18644 case TYPE_INSERT: 18645 case TYPE_FPCOMPARE: 18646 case TYPE_MFCR: 18647 case TYPE_MTCR: 18648 case TYPE_MFJMPR: 18649 case TYPE_MTJMPR: 18650 case TYPE_ISYNC: 18651 case TYPE_SYNC: 18652 case TYPE_LOAD_L: 18653 case TYPE_STORE_C: 18654 return true; 18655 case TYPE_SHIFT: 18656 if (get_attr_dot (insn) == DOT_NO 18657 || get_attr_var_shift (insn) == VAR_SHIFT_NO) 18658 return true; 18659 else 18660 break; 18661 case TYPE_DIV: 18662 if (get_attr_size (insn) == SIZE_32) 18663 return true; 18664 else 18665 break; 18666 case TYPE_LOAD: 18667 case TYPE_STORE: 18668 case TYPE_FPLOAD: 18669 case TYPE_FPSTORE: 18670 if (get_attr_update (insn) == UPDATE_YES) 18671 return true; 18672 else 18673 break; 18674 default: 18675 break; 18676 } 18677 break; 18678 case PROCESSOR_POWER7: 18679 type = get_attr_type (insn); 18680 18681 switch (type) 18682 { 18683 case TYPE_CR_LOGICAL: 18684 case TYPE_MFCR: 18685 case TYPE_MFCRF: 18686 case TYPE_MTCR: 18687 case TYPE_DIV: 18688 case TYPE_ISYNC: 18689 case TYPE_LOAD_L: 18690 case TYPE_STORE_C: 18691 case TYPE_MFJMPR: 18692 case TYPE_MTJMPR: 18693 return true; 18694 case TYPE_MUL: 18695 case TYPE_SHIFT: 18696 case TYPE_EXTS: 18697 if (get_attr_dot (insn) == DOT_YES) 18698 return true; 18699 else 18700 break; 18701 case TYPE_LOAD: 18702 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 18703 || get_attr_update (insn) == UPDATE_YES) 18704 return true; 18705 else 18706 break; 18707 case TYPE_STORE: 18708 case TYPE_FPLOAD: 18709 case TYPE_FPSTORE: 18710 if (get_attr_update (insn) == UPDATE_YES) 18711 return true; 18712 else 18713 break; 18714 default: 18715 break; 18716 } 18717 break; 18718 case PROCESSOR_POWER8: 18719 type = get_attr_type (insn); 18720 18721 switch (type) 18722 { 18723 case TYPE_CR_LOGICAL: 18724 case TYPE_MFCR: 18725 case TYPE_MFCRF: 18726 case TYPE_MTCR: 18727 case TYPE_SYNC: 18728 case TYPE_ISYNC: 18729 case TYPE_LOAD_L: 18730 case TYPE_STORE_C: 18731 case TYPE_VECSTORE: 18732 case TYPE_MFJMPR: 18733 case TYPE_MTJMPR: 18734 return true; 18735 case TYPE_SHIFT: 18736 case TYPE_EXTS: 18737 case TYPE_MUL: 18738 if (get_attr_dot (insn) == DOT_YES) 18739 return true; 18740 else 18741 break; 18742 case TYPE_LOAD: 18743 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 18744 || get_attr_update (insn) == UPDATE_YES) 18745 return true; 18746 else 18747 break; 18748 case TYPE_STORE: 18749 if (get_attr_update (insn) == UPDATE_YES 18750 && get_attr_indexed (insn) == INDEXED_YES) 18751 return true; 18752 else 18753 break; 18754 default: 18755 break; 18756 } 18757 break; 18758 default: 18759 break; 18760 } 18761 18762 return false; 18763} 18764 18765static bool 18766insn_must_be_last_in_group (rtx_insn *insn) 18767{ 18768 enum attr_type type; 18769 18770 if (!insn 18771 || NOTE_P (insn) 18772 || DEBUG_INSN_P (insn) 18773 || GET_CODE (PATTERN (insn)) == USE 18774 || GET_CODE (PATTERN (insn)) == CLOBBER) 18775 return false; 18776 18777 switch (rs6000_tune) { 18778 case PROCESSOR_POWER4: 18779 case PROCESSOR_POWER5: 18780 if (is_microcoded_insn (insn)) 18781 return true; 18782 18783 if (is_branch_slot_insn (insn)) 18784 return true; 18785 18786 break; 18787 case PROCESSOR_POWER6: 18788 type = get_attr_type (insn); 18789 18790 switch (type) 18791 { 18792 case TYPE_EXTS: 18793 case TYPE_CNTLZ: 18794 case TYPE_TRAP: 18795 case TYPE_MUL: 18796 case TYPE_FPCOMPARE: 18797 case TYPE_MFCR: 18798 case TYPE_MTCR: 18799 case TYPE_MFJMPR: 18800 case TYPE_MTJMPR: 18801 case TYPE_ISYNC: 18802 case TYPE_SYNC: 18803 case TYPE_LOAD_L: 18804 case TYPE_STORE_C: 18805 return true; 18806 case TYPE_SHIFT: 18807 if (get_attr_dot (insn) == DOT_NO 18808 || get_attr_var_shift (insn) == VAR_SHIFT_NO) 18809 return true; 18810 else 18811 break; 18812 case TYPE_DIV: 18813 if (get_attr_size (insn) == SIZE_32) 18814 return true; 18815 else 18816 break; 18817 default: 18818 break; 18819 } 18820 break; 18821 case PROCESSOR_POWER7: 18822 type = get_attr_type (insn); 18823 18824 switch (type) 18825 { 18826 case TYPE_ISYNC: 18827 case TYPE_SYNC: 18828 case TYPE_LOAD_L: 18829 case TYPE_STORE_C: 18830 return true; 18831 case TYPE_LOAD: 18832 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 18833 && get_attr_update (insn) == UPDATE_YES) 18834 return true; 18835 else 18836 break; 18837 case TYPE_STORE: 18838 if (get_attr_update (insn) == UPDATE_YES 18839 && get_attr_indexed (insn) == INDEXED_YES) 18840 return true; 18841 else 18842 break; 18843 default: 18844 break; 18845 } 18846 break; 18847 case PROCESSOR_POWER8: 18848 type = get_attr_type (insn); 18849 18850 switch (type) 18851 { 18852 case TYPE_MFCR: 18853 case TYPE_MTCR: 18854 case TYPE_ISYNC: 18855 case TYPE_SYNC: 18856 case TYPE_LOAD_L: 18857 case TYPE_STORE_C: 18858 return true; 18859 case TYPE_LOAD: 18860 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES 18861 && get_attr_update (insn) == UPDATE_YES) 18862 return true; 18863 else 18864 break; 18865 case TYPE_STORE: 18866 if (get_attr_update (insn) == UPDATE_YES 18867 && get_attr_indexed (insn) == INDEXED_YES) 18868 return true; 18869 else 18870 break; 18871 default: 18872 break; 18873 } 18874 break; 18875 default: 18876 break; 18877 } 18878 18879 return false; 18880} 18881 18882/* Return true if it is recommended to keep NEXT_INSN "far" (in a separate 18883 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ 18884 18885static bool 18886is_costly_group (rtx *group_insns, rtx next_insn) 18887{ 18888 int i; 18889 int issue_rate = rs6000_issue_rate (); 18890 18891 for (i = 0; i < issue_rate; i++) 18892 { 18893 sd_iterator_def sd_it; 18894 dep_t dep; 18895 rtx insn = group_insns[i]; 18896 18897 if (!insn) 18898 continue; 18899 18900 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep) 18901 { 18902 rtx next = DEP_CON (dep); 18903 18904 if (next == next_insn 18905 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) 18906 return true; 18907 } 18908 } 18909 18910 return false; 18911} 18912 18913/* Utility of the function redefine_groups. 18914 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS 18915 in the same dispatch group. If so, insert nops before NEXT_INSN, in order 18916 to keep it "far" (in a separate group) from GROUP_INSNS, following 18917 one of the following schemes, depending on the value of the flag 18918 -minsert_sched_nops = X: 18919 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed 18920 in order to force NEXT_INSN into a separate group. 18921 (2) X < sched_finish_regroup_exact: insert exactly X nops. 18922 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop 18923 insertion (has a group just ended, how many vacant issue slots remain in the 18924 last group, and how many dispatch groups were encountered so far). */ 18925 18926static int 18927force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, 18928 rtx_insn *next_insn, bool *group_end, int can_issue_more, 18929 int *group_count) 18930{ 18931 rtx nop; 18932 bool force; 18933 int issue_rate = rs6000_issue_rate (); 18934 bool end = *group_end; 18935 int i; 18936 18937 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn)) 18938 return can_issue_more; 18939 18940 if (rs6000_sched_insert_nops > sched_finish_regroup_exact) 18941 return can_issue_more; 18942 18943 force = is_costly_group (group_insns, next_insn); 18944 if (!force) 18945 return can_issue_more; 18946 18947 if (sched_verbose > 6) 18948 fprintf (dump,"force: group count = %d, can_issue_more = %d\n", 18949 *group_count ,can_issue_more); 18950 18951 if (rs6000_sched_insert_nops == sched_finish_regroup_exact) 18952 { 18953 if (*group_end) 18954 can_issue_more = 0; 18955 18956 /* Since only a branch can be issued in the last issue_slot, it is 18957 sufficient to insert 'can_issue_more - 1' nops if next_insn is not 18958 a branch. If next_insn is a branch, we insert 'can_issue_more' nops; 18959 in this case the last nop will start a new group and the branch 18960 will be forced to the new group. */ 18961 if (can_issue_more && !is_branch_slot_insn (next_insn)) 18962 can_issue_more--; 18963 18964 /* Do we have a special group ending nop? */ 18965 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7 18966 || rs6000_tune == PROCESSOR_POWER8) 18967 { 18968 nop = gen_group_ending_nop (); 18969 emit_insn_before (nop, next_insn); 18970 can_issue_more = 0; 18971 } 18972 else 18973 while (can_issue_more > 0) 18974 { 18975 nop = gen_nop (); 18976 emit_insn_before (nop, next_insn); 18977 can_issue_more--; 18978 } 18979 18980 *group_end = true; 18981 return 0; 18982 } 18983 18984 if (rs6000_sched_insert_nops < sched_finish_regroup_exact) 18985 { 18986 int n_nops = rs6000_sched_insert_nops; 18987 18988 /* Nops can't be issued from the branch slot, so the effective 18989 issue_rate for nops is 'issue_rate - 1'. */ 18990 if (can_issue_more == 0) 18991 can_issue_more = issue_rate; 18992 can_issue_more--; 18993 if (can_issue_more == 0) 18994 { 18995 can_issue_more = issue_rate - 1; 18996 (*group_count)++; 18997 end = true; 18998 for (i = 0; i < issue_rate; i++) 18999 { 19000 group_insns[i] = 0; 19001 } 19002 } 19003 19004 while (n_nops > 0) 19005 { 19006 nop = gen_nop (); 19007 emit_insn_before (nop, next_insn); 19008 if (can_issue_more == issue_rate - 1) /* new group begins */ 19009 end = false; 19010 can_issue_more--; 19011 if (can_issue_more == 0) 19012 { 19013 can_issue_more = issue_rate - 1; 19014 (*group_count)++; 19015 end = true; 19016 for (i = 0; i < issue_rate; i++) 19017 { 19018 group_insns[i] = 0; 19019 } 19020 } 19021 n_nops--; 19022 } 19023 19024 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ 19025 can_issue_more++; 19026 19027 /* Is next_insn going to start a new group? */ 19028 *group_end 19029 = (end 19030 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) 19031 || (can_issue_more <= 2 && is_cracked_insn (next_insn)) 19032 || (can_issue_more < issue_rate && 19033 insn_terminates_group_p (next_insn, previous_group))); 19034 if (*group_end && end) 19035 (*group_count)--; 19036 19037 if (sched_verbose > 6) 19038 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", 19039 *group_count, can_issue_more); 19040 return can_issue_more; 19041 } 19042 19043 return can_issue_more; 19044} 19045 19046/* This function tries to synch the dispatch groups that the compiler "sees" 19047 with the dispatch groups that the processor dispatcher is expected to 19048 form in practice. It tries to achieve this synchronization by forcing the 19049 estimated processor grouping on the compiler (as opposed to the function 19050 'pad_goups' which tries to force the scheduler's grouping on the processor). 19051 19052 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and 19053 examines the (estimated) dispatch groups that will be formed by the processor 19054 dispatcher. It marks these group boundaries to reflect the estimated 19055 processor grouping, overriding the grouping that the scheduler had marked. 19056 Depending on the value of the flag '-minsert-sched-nops' this function can 19057 force certain insns into separate groups or force a certain distance between 19058 them by inserting nops, for example, if there exists a "costly dependence" 19059 between the insns. 19060 19061 The function estimates the group boundaries that the processor will form as 19062 follows: It keeps track of how many vacant issue slots are available after 19063 each insn. A subsequent insn will start a new group if one of the following 19064 4 cases applies: 19065 - no more vacant issue slots remain in the current dispatch group. 19066 - only the last issue slot, which is the branch slot, is vacant, but the next 19067 insn is not a branch. 19068 - only the last 2 or less issue slots, including the branch slot, are vacant, 19069 which means that a cracked insn (which occupies two issue slots) can't be 19070 issued in this group. 19071 - less than 'issue_rate' slots are vacant, and the next insn always needs to 19072 start a new group. */ 19073 19074static int 19075redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, 19076 rtx_insn *tail) 19077{ 19078 rtx_insn *insn, *next_insn; 19079 int issue_rate; 19080 int can_issue_more; 19081 int slot, i; 19082 bool group_end; 19083 int group_count = 0; 19084 rtx *group_insns; 19085 19086 /* Initialize. */ 19087 issue_rate = rs6000_issue_rate (); 19088 group_insns = XALLOCAVEC (rtx, issue_rate); 19089 for (i = 0; i < issue_rate; i++) 19090 { 19091 group_insns[i] = 0; 19092 } 19093 can_issue_more = issue_rate; 19094 slot = 0; 19095 insn = get_next_active_insn (prev_head_insn, tail); 19096 group_end = false; 19097 19098 while (insn != NULL_RTX) 19099 { 19100 slot = (issue_rate - can_issue_more); 19101 group_insns[slot] = insn; 19102 can_issue_more = 19103 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); 19104 if (insn_terminates_group_p (insn, current_group)) 19105 can_issue_more = 0; 19106 19107 next_insn = get_next_active_insn (insn, tail); 19108 if (next_insn == NULL_RTX) 19109 return group_count + 1; 19110 19111 /* Is next_insn going to start a new group? */ 19112 group_end 19113 = (can_issue_more == 0 19114 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) 19115 || (can_issue_more <= 2 && is_cracked_insn (next_insn)) 19116 || (can_issue_more < issue_rate && 19117 insn_terminates_group_p (next_insn, previous_group))); 19118 19119 can_issue_more = force_new_group (sched_verbose, dump, group_insns, 19120 next_insn, &group_end, can_issue_more, 19121 &group_count); 19122 19123 if (group_end) 19124 { 19125 group_count++; 19126 can_issue_more = 0; 19127 for (i = 0; i < issue_rate; i++) 19128 { 19129 group_insns[i] = 0; 19130 } 19131 } 19132 19133 if (GET_MODE (next_insn) == TImode && can_issue_more) 19134 PUT_MODE (next_insn, VOIDmode); 19135 else if (!can_issue_more && GET_MODE (next_insn) != TImode) 19136 PUT_MODE (next_insn, TImode); 19137 19138 insn = next_insn; 19139 if (can_issue_more == 0) 19140 can_issue_more = issue_rate; 19141 } /* while */ 19142 19143 return group_count; 19144} 19145 19146/* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the 19147 dispatch group boundaries that the scheduler had marked. Pad with nops 19148 any dispatch groups which have vacant issue slots, in order to force the 19149 scheduler's grouping on the processor dispatcher. The function 19150 returns the number of dispatch groups found. */ 19151 19152static int 19153pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, 19154 rtx_insn *tail) 19155{ 19156 rtx_insn *insn, *next_insn; 19157 rtx nop; 19158 int issue_rate; 19159 int can_issue_more; 19160 int group_end; 19161 int group_count = 0; 19162 19163 /* Initialize issue_rate. */ 19164 issue_rate = rs6000_issue_rate (); 19165 can_issue_more = issue_rate; 19166 19167 insn = get_next_active_insn (prev_head_insn, tail); 19168 next_insn = get_next_active_insn (insn, tail); 19169 19170 while (insn != NULL_RTX) 19171 { 19172 can_issue_more = 19173 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); 19174 19175 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); 19176 19177 if (next_insn == NULL_RTX) 19178 break; 19179 19180 if (group_end) 19181 { 19182 /* If the scheduler had marked group termination at this location 19183 (between insn and next_insn), and neither insn nor next_insn will 19184 force group termination, pad the group with nops to force group 19185 termination. */ 19186 if (can_issue_more 19187 && (rs6000_sched_insert_nops == sched_finish_pad_groups) 19188 && !insn_terminates_group_p (insn, current_group) 19189 && !insn_terminates_group_p (next_insn, previous_group)) 19190 { 19191 if (!is_branch_slot_insn (next_insn)) 19192 can_issue_more--; 19193 19194 while (can_issue_more) 19195 { 19196 nop = gen_nop (); 19197 emit_insn_before (nop, next_insn); 19198 can_issue_more--; 19199 } 19200 } 19201 19202 can_issue_more = issue_rate; 19203 group_count++; 19204 } 19205 19206 insn = next_insn; 19207 next_insn = get_next_active_insn (insn, tail); 19208 } 19209 19210 return group_count; 19211} 19212 19213/* We're beginning a new block. Initialize data structures as necessary. */ 19214 19215static void 19216rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, 19217 int sched_verbose ATTRIBUTE_UNUSED, 19218 int max_ready ATTRIBUTE_UNUSED) 19219{ 19220 last_scheduled_insn = NULL; 19221 load_store_pendulum = 0; 19222 divide_cnt = 0; 19223 vec_pairing = 0; 19224} 19225 19226/* The following function is called at the end of scheduling BB. 19227 After reload, it inserts nops at insn group bundling. */ 19228 19229static void 19230rs6000_sched_finish (FILE *dump, int sched_verbose) 19231{ 19232 int n_groups; 19233 19234 if (sched_verbose) 19235 fprintf (dump, "=== Finishing schedule.\n"); 19236 19237 if (reload_completed && rs6000_sched_groups) 19238 { 19239 /* Do not run sched_finish hook when selective scheduling enabled. */ 19240 if (sel_sched_p ()) 19241 return; 19242 19243 if (rs6000_sched_insert_nops == sched_finish_none) 19244 return; 19245 19246 if (rs6000_sched_insert_nops == sched_finish_pad_groups) 19247 n_groups = pad_groups (dump, sched_verbose, 19248 current_sched_info->prev_head, 19249 current_sched_info->next_tail); 19250 else 19251 n_groups = redefine_groups (dump, sched_verbose, 19252 current_sched_info->prev_head, 19253 current_sched_info->next_tail); 19254 19255 if (sched_verbose >= 6) 19256 { 19257 fprintf (dump, "ngroups = %d\n", n_groups); 19258 print_rtl (dump, current_sched_info->prev_head); 19259 fprintf (dump, "Done finish_sched\n"); 19260 } 19261 } 19262} 19263 19264struct rs6000_sched_context 19265{ 19266 short cached_can_issue_more; 19267 rtx_insn *last_scheduled_insn; 19268 int load_store_pendulum; 19269 int divide_cnt; 19270 int vec_pairing; 19271}; 19272 19273typedef struct rs6000_sched_context rs6000_sched_context_def; 19274typedef rs6000_sched_context_def *rs6000_sched_context_t; 19275 19276/* Allocate store for new scheduling context. */ 19277static void * 19278rs6000_alloc_sched_context (void) 19279{ 19280 return xmalloc (sizeof (rs6000_sched_context_def)); 19281} 19282 19283/* If CLEAN_P is true then initializes _SC with clean data, 19284 and from the global context otherwise. */ 19285static void 19286rs6000_init_sched_context (void *_sc, bool clean_p) 19287{ 19288 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; 19289 19290 if (clean_p) 19291 { 19292 sc->cached_can_issue_more = 0; 19293 sc->last_scheduled_insn = NULL; 19294 sc->load_store_pendulum = 0; 19295 sc->divide_cnt = 0; 19296 sc->vec_pairing = 0; 19297 } 19298 else 19299 { 19300 sc->cached_can_issue_more = cached_can_issue_more; 19301 sc->last_scheduled_insn = last_scheduled_insn; 19302 sc->load_store_pendulum = load_store_pendulum; 19303 sc->divide_cnt = divide_cnt; 19304 sc->vec_pairing = vec_pairing; 19305 } 19306} 19307 19308/* Sets the global scheduling context to the one pointed to by _SC. */ 19309static void 19310rs6000_set_sched_context (void *_sc) 19311{ 19312 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; 19313 19314 gcc_assert (sc != NULL); 19315 19316 cached_can_issue_more = sc->cached_can_issue_more; 19317 last_scheduled_insn = sc->last_scheduled_insn; 19318 load_store_pendulum = sc->load_store_pendulum; 19319 divide_cnt = sc->divide_cnt; 19320 vec_pairing = sc->vec_pairing; 19321} 19322 19323/* Free _SC. */ 19324static void 19325rs6000_free_sched_context (void *_sc) 19326{ 19327 gcc_assert (_sc != NULL); 19328 19329 free (_sc); 19330} 19331 19332static bool 19333rs6000_sched_can_speculate_insn (rtx_insn *insn) 19334{ 19335 switch (get_attr_type (insn)) 19336 { 19337 case TYPE_DIV: 19338 case TYPE_SDIV: 19339 case TYPE_DDIV: 19340 case TYPE_VECDIV: 19341 case TYPE_SSQRT: 19342 case TYPE_DSQRT: 19343 return false; 19344 19345 default: 19346 return true; 19347 } 19348} 19349 19350/* Length in units of the trampoline for entering a nested function. */ 19351 19352int 19353rs6000_trampoline_size (void) 19354{ 19355 int ret = 0; 19356 19357 switch (DEFAULT_ABI) 19358 { 19359 default: 19360 gcc_unreachable (); 19361 19362 case ABI_AIX: 19363 ret = (TARGET_32BIT) ? 12 : 24; 19364 break; 19365 19366 case ABI_ELFv2: 19367 gcc_assert (!TARGET_32BIT); 19368 ret = 32; 19369 break; 19370 19371 case ABI_DARWIN: 19372 case ABI_V4: 19373 ret = (TARGET_32BIT) ? 40 : 48; 19374 break; 19375 } 19376 19377 return ret; 19378} 19379 19380/* Emit RTL insns to initialize the variable parts of a trampoline. 19381 FNADDR is an RTX for the address of the function's pure code. 19382 CXT is an RTX for the static chain value for the function. */ 19383 19384static void 19385rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 19386{ 19387 int regsize = (TARGET_32BIT) ? 4 : 8; 19388 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 19389 rtx ctx_reg = force_reg (Pmode, cxt); 19390 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0)); 19391 19392 switch (DEFAULT_ABI) 19393 { 19394 default: 19395 gcc_unreachable (); 19396 19397 /* Under AIX, just build the 3 word function descriptor */ 19398 case ABI_AIX: 19399 { 19400 rtx fnmem, fn_reg, toc_reg; 19401 19402 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) 19403 error ("you cannot take the address of a nested function if you use " 19404 "the %qs option", "-mno-pointers-to-nested-functions"); 19405 19406 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); 19407 fn_reg = gen_reg_rtx (Pmode); 19408 toc_reg = gen_reg_rtx (Pmode); 19409 19410 /* Macro to shorten the code expansions below. */ 19411# define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET) 19412 19413 m_tramp = replace_equiv_address (m_tramp, addr); 19414 19415 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0)); 19416 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize)); 19417 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg); 19418 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg); 19419 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg); 19420 19421# undef MEM_PLUS 19422 } 19423 break; 19424 19425 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */ 19426 case ABI_ELFv2: 19427 case ABI_DARWIN: 19428 case ABI_V4: 19429 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"), 19430 LCT_NORMAL, VOIDmode, 19431 addr, Pmode, 19432 GEN_INT (rs6000_trampoline_size ()), SImode, 19433 fnaddr, Pmode, 19434 ctx_reg, Pmode); 19435 break; 19436 } 19437} 19438 19439 19440/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain 19441 identifier as an argument, so the front end shouldn't look it up. */ 19442 19443static bool 19444rs6000_attribute_takes_identifier_p (const_tree attr_id) 19445{ 19446 return is_attribute_p ("altivec", attr_id); 19447} 19448 19449/* Handle the "altivec" attribute. The attribute may have 19450 arguments as follows: 19451 19452 __attribute__((altivec(vector__))) 19453 __attribute__((altivec(pixel__))) (always followed by 'unsigned short') 19454 __attribute__((altivec(bool__))) (always followed by 'unsigned') 19455 19456 and may appear more than once (e.g., 'vector bool char') in a 19457 given declaration. */ 19458 19459static tree 19460rs6000_handle_altivec_attribute (tree *node, 19461 tree name ATTRIBUTE_UNUSED, 19462 tree args, 19463 int flags ATTRIBUTE_UNUSED, 19464 bool *no_add_attrs) 19465{ 19466 tree type = *node, result = NULL_TREE; 19467 machine_mode mode; 19468 int unsigned_p; 19469 char altivec_type 19470 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) 19471 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE) 19472 ? *IDENTIFIER_POINTER (TREE_VALUE (args)) 19473 : '?'); 19474 19475 while (POINTER_TYPE_P (type) 19476 || TREE_CODE (type) == FUNCTION_TYPE 19477 || TREE_CODE (type) == METHOD_TYPE 19478 || TREE_CODE (type) == ARRAY_TYPE) 19479 type = TREE_TYPE (type); 19480 19481 mode = TYPE_MODE (type); 19482 19483 /* Check for invalid AltiVec type qualifiers. */ 19484 if (type == long_double_type_node) 19485 error ("use of %<long double%> in AltiVec types is invalid"); 19486 else if (type == boolean_type_node) 19487 error ("use of boolean types in AltiVec types is invalid"); 19488 else if (TREE_CODE (type) == COMPLEX_TYPE) 19489 error ("use of %<complex%> in AltiVec types is invalid"); 19490 else if (DECIMAL_FLOAT_MODE_P (mode)) 19491 error ("use of decimal floating point types in AltiVec types is invalid"); 19492 else if (!TARGET_VSX) 19493 { 19494 if (type == long_unsigned_type_node || type == long_integer_type_node) 19495 { 19496 if (TARGET_64BIT) 19497 error ("use of %<long%> in AltiVec types is invalid for " 19498 "64-bit code without %qs", "-mvsx"); 19499 else if (rs6000_warn_altivec_long) 19500 warning (0, "use of %<long%> in AltiVec types is deprecated; " 19501 "use %<int%>"); 19502 } 19503 else if (type == long_long_unsigned_type_node 19504 || type == long_long_integer_type_node) 19505 error ("use of %<long long%> in AltiVec types is invalid without %qs", 19506 "-mvsx"); 19507 else if (type == double_type_node) 19508 error ("use of %<double%> in AltiVec types is invalid without %qs", 19509 "-mvsx"); 19510 } 19511 19512 switch (altivec_type) 19513 { 19514 case 'v': 19515 unsigned_p = TYPE_UNSIGNED (type); 19516 switch (mode) 19517 { 19518 case E_TImode: 19519 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); 19520 break; 19521 case E_DImode: 19522 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); 19523 break; 19524 case E_SImode: 19525 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); 19526 break; 19527 case E_HImode: 19528 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); 19529 break; 19530 case E_QImode: 19531 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); 19532 break; 19533 case E_SFmode: result = V4SF_type_node; break; 19534 case E_DFmode: result = V2DF_type_node; break; 19535 /* If the user says 'vector int bool', we may be handed the 'bool' 19536 attribute _before_ the 'vector' attribute, and so select the 19537 proper type in the 'b' case below. */ 19538 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode: 19539 case E_V2DImode: case E_V2DFmode: 19540 result = type; 19541 default: break; 19542 } 19543 break; 19544 case 'b': 19545 switch (mode) 19546 { 19547 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; 19548 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; 19549 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; 19550 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node; 19551 default: break; 19552 } 19553 break; 19554 case 'p': 19555 switch (mode) 19556 { 19557 case E_V8HImode: result = pixel_V8HI_type_node; 19558 default: break; 19559 } 19560 default: break; 19561 } 19562 19563 /* Propagate qualifiers attached to the element type 19564 onto the vector type. */ 19565 if (result && result != type && TYPE_QUALS (type)) 19566 result = build_qualified_type (result, TYPE_QUALS (type)); 19567 19568 *no_add_attrs = true; /* No need to hang on to the attribute. */ 19569 19570 if (result) 19571 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 19572 19573 return NULL_TREE; 19574} 19575 19576/* AltiVec defines five built-in scalar types that serve as vector 19577 elements; we must teach the compiler how to mangle them. The 128-bit 19578 floating point mangling is target-specific as well. MMA defines 19579 two built-in types to be used as opaque vector types. */ 19580 19581static const char * 19582rs6000_mangle_type (const_tree type) 19583{ 19584 type = TYPE_MAIN_VARIANT (type); 19585 19586 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 19587 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 19588 return NULL; 19589 19590 if (type == bool_char_type_node) return "U6__boolc"; 19591 if (type == bool_short_type_node) return "U6__bools"; 19592 if (type == pixel_type_node) return "u7__pixel"; 19593 if (type == bool_int_type_node) return "U6__booli"; 19594 if (type == bool_long_long_type_node) return "U6__boolx"; 19595 19596 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type))) 19597 return "g"; 19598 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type))) 19599 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128"; 19600 19601 if (type == vector_pair_type_node) 19602 return "u13__vector_pair"; 19603 if (type == vector_quad_type_node) 19604 return "u13__vector_quad"; 19605 19606 /* For all other types, use the default mangling. */ 19607 return NULL; 19608} 19609 19610/* Handle a "longcall" or "shortcall" attribute; arguments as in 19611 struct attribute_spec.handler. */ 19612 19613static tree 19614rs6000_handle_longcall_attribute (tree *node, tree name, 19615 tree args ATTRIBUTE_UNUSED, 19616 int flags ATTRIBUTE_UNUSED, 19617 bool *no_add_attrs) 19618{ 19619 if (TREE_CODE (*node) != FUNCTION_TYPE 19620 && TREE_CODE (*node) != FIELD_DECL 19621 && TREE_CODE (*node) != TYPE_DECL) 19622 { 19623 warning (OPT_Wattributes, "%qE attribute only applies to functions", 19624 name); 19625 *no_add_attrs = true; 19626 } 19627 19628 return NULL_TREE; 19629} 19630 19631/* Set longcall attributes on all functions declared when 19632 rs6000_default_long_calls is true. */ 19633static void 19634rs6000_set_default_type_attributes (tree type) 19635{ 19636 if (rs6000_default_long_calls 19637 && (TREE_CODE (type) == FUNCTION_TYPE 19638 || TREE_CODE (type) == METHOD_TYPE)) 19639 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"), 19640 NULL_TREE, 19641 TYPE_ATTRIBUTES (type)); 19642 19643#if TARGET_MACHO 19644 darwin_set_default_type_attributes (type); 19645#endif 19646} 19647 19648/* Return a reference suitable for calling a function with the 19649 longcall attribute. */ 19650 19651static rtx 19652rs6000_longcall_ref (rtx call_ref, rtx arg) 19653{ 19654 /* System V adds '.' to the internal name, so skip them. */ 19655 const char *call_name = XSTR (call_ref, 0); 19656 if (*call_name == '.') 19657 { 19658 while (*call_name == '.') 19659 call_name++; 19660 19661 tree node = get_identifier (call_name); 19662 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node)); 19663 } 19664 19665 if (TARGET_PLTSEQ) 19666 { 19667 rtx base = const0_rtx; 19668 int regno = 12; 19669 if (rs6000_pcrel_p (cfun)) 19670 { 19671 rtx reg = gen_rtx_REG (Pmode, regno); 19672 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode, 19673 gen_rtvec (3, base, call_ref, arg), 19674 UNSPECV_PLT_PCREL); 19675 emit_insn (gen_rtx_SET (reg, u)); 19676 return reg; 19677 } 19678 19679 if (DEFAULT_ABI == ABI_ELFv2) 19680 base = gen_rtx_REG (Pmode, TOC_REGISTER); 19681 else 19682 { 19683 if (flag_pic) 19684 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 19685 regno = 11; 19686 } 19687 /* Reg must match that used by linker PLT stubs. For ELFv2, r12 19688 may be used by a function global entry point. For SysV4, r11 19689 is used by __glink_PLTresolve lazy resolver entry. */ 19690 rtx reg = gen_rtx_REG (Pmode, regno); 19691 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg), 19692 UNSPEC_PLT16_HA); 19693 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode, 19694 gen_rtvec (3, reg, call_ref, arg), 19695 UNSPECV_PLT16_LO); 19696 emit_insn (gen_rtx_SET (reg, hi)); 19697 emit_insn (gen_rtx_SET (reg, lo)); 19698 return reg; 19699 } 19700 19701 return force_reg (Pmode, call_ref); 19702} 19703 19704#ifndef TARGET_USE_MS_BITFIELD_LAYOUT 19705#define TARGET_USE_MS_BITFIELD_LAYOUT 0 19706#endif 19707 19708/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 19709 struct attribute_spec.handler. */ 19710static tree 19711rs6000_handle_struct_attribute (tree *node, tree name, 19712 tree args ATTRIBUTE_UNUSED, 19713 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 19714{ 19715 tree *type = NULL; 19716 if (DECL_P (*node)) 19717 { 19718 if (TREE_CODE (*node) == TYPE_DECL) 19719 type = &TREE_TYPE (*node); 19720 } 19721 else 19722 type = node; 19723 19724 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 19725 || TREE_CODE (*type) == UNION_TYPE))) 19726 { 19727 warning (OPT_Wattributes, "%qE attribute ignored", name); 19728 *no_add_attrs = true; 19729 } 19730 19731 else if ((is_attribute_p ("ms_struct", name) 19732 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 19733 || ((is_attribute_p ("gcc_struct", name) 19734 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 19735 { 19736 warning (OPT_Wattributes, "%qE incompatible attribute ignored", 19737 name); 19738 *no_add_attrs = true; 19739 } 19740 19741 return NULL_TREE; 19742} 19743 19744static bool 19745rs6000_ms_bitfield_layout_p (const_tree record_type) 19746{ 19747 return (TARGET_USE_MS_BITFIELD_LAYOUT && 19748 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 19749 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 19750} 19751 19752#ifdef USING_ELFOS_H 19753 19754/* A get_unnamed_section callback, used for switching to toc_section. */ 19755 19756static void 19757rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) 19758{ 19759 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 19760 && TARGET_MINIMAL_TOC) 19761 { 19762 if (!toc_initialized) 19763 { 19764 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); 19765 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 19766 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); 19767 fprintf (asm_out_file, "\t.tc "); 19768 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); 19769 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 19770 fprintf (asm_out_file, "\n"); 19771 19772 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 19773 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 19774 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 19775 fprintf (asm_out_file, " = .+32768\n"); 19776 toc_initialized = 1; 19777 } 19778 else 19779 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 19780 } 19781 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 19782 { 19783 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); 19784 if (!toc_initialized) 19785 { 19786 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 19787 toc_initialized = 1; 19788 } 19789 } 19790 else 19791 { 19792 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 19793 if (!toc_initialized) 19794 { 19795 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); 19796 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); 19797 fprintf (asm_out_file, " = .+32768\n"); 19798 toc_initialized = 1; 19799 } 19800 } 19801} 19802 19803/* Implement TARGET_ASM_INIT_SECTIONS. */ 19804 19805static void 19806rs6000_elf_asm_init_sections (void) 19807{ 19808 toc_section 19809 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL); 19810 19811 sdata2_section 19812 = get_unnamed_section (SECTION_WRITE, output_section_asm_op, 19813 SDATA2_SECTION_ASM_OP); 19814} 19815 19816/* Implement TARGET_SELECT_RTX_SECTION. */ 19817 19818static section * 19819rs6000_elf_select_rtx_section (machine_mode mode, rtx x, 19820 unsigned HOST_WIDE_INT align) 19821{ 19822 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) 19823 return toc_section; 19824 else 19825 return default_elf_select_rtx_section (mode, x, align); 19826} 19827 19828/* For a SYMBOL_REF, set generic flags and then perform some 19829 target-specific processing. 19830 19831 When the AIX ABI is requested on a non-AIX system, replace the 19832 function name with the real name (with a leading .) rather than the 19833 function descriptor name. This saves a lot of overriding code to 19834 read the prefixes. */ 19835 19836static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; 19837static void 19838rs6000_elf_encode_section_info (tree decl, rtx rtl, int first) 19839{ 19840 default_encode_section_info (decl, rtl, first); 19841 19842 if (first 19843 && TREE_CODE (decl) == FUNCTION_DECL 19844 && !TARGET_AIX 19845 && DEFAULT_ABI == ABI_AIX) 19846 { 19847 rtx sym_ref = XEXP (rtl, 0); 19848 size_t len = strlen (XSTR (sym_ref, 0)); 19849 char *str = XALLOCAVEC (char, len + 2); 19850 str[0] = '.'; 19851 memcpy (str + 1, XSTR (sym_ref, 0), len + 1); 19852 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1); 19853 } 19854} 19855 19856static inline bool 19857compare_section_name (const char *section, const char *templ) 19858{ 19859 int len; 19860 19861 len = strlen (templ); 19862 return (strncmp (section, templ, len) == 0 19863 && (section[len] == 0 || section[len] == '.')); 19864} 19865 19866bool 19867rs6000_elf_in_small_data_p (const_tree decl) 19868{ 19869 if (rs6000_sdata == SDATA_NONE) 19870 return false; 19871 19872 /* We want to merge strings, so we never consider them small data. */ 19873 if (TREE_CODE (decl) == STRING_CST) 19874 return false; 19875 19876 /* Functions are never in the small data area. */ 19877 if (TREE_CODE (decl) == FUNCTION_DECL) 19878 return false; 19879 19880 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl)) 19881 { 19882 const char *section = DECL_SECTION_NAME (decl); 19883 if (compare_section_name (section, ".sdata") 19884 || compare_section_name (section, ".sdata2") 19885 || compare_section_name (section, ".gnu.linkonce.s") 19886 || compare_section_name (section, ".sbss") 19887 || compare_section_name (section, ".sbss2") 19888 || compare_section_name (section, ".gnu.linkonce.sb") 19889 || strcmp (section, ".PPC.EMB.sdata0") == 0 19890 || strcmp (section, ".PPC.EMB.sbss0") == 0) 19891 return true; 19892 } 19893 else 19894 { 19895 /* If we are told not to put readonly data in sdata, then don't. */ 19896 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI 19897 && !rs6000_readonly_in_sdata) 19898 return false; 19899 19900 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); 19901 19902 if (size > 0 19903 && size <= g_switch_value 19904 /* If it's not public, and we're not going to reference it there, 19905 there's no need to put it in the small data section. */ 19906 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl))) 19907 return true; 19908 } 19909 19910 return false; 19911} 19912 19913#endif /* USING_ELFOS_H */ 19914 19915/* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */ 19916 19917static bool 19918rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x) 19919{ 19920 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode); 19921} 19922 19923/* Do not place thread-local symbols refs in the object blocks. */ 19924 19925static bool 19926rs6000_use_blocks_for_decl_p (const_tree decl) 19927{ 19928 return !DECL_THREAD_LOCAL_P (decl); 19929} 19930 19931/* Return a REG that occurs in ADDR with coefficient 1. 19932 ADDR can be effectively incremented by incrementing REG. 19933 19934 r0 is special and we must not select it as an address 19935 register by this routine since our caller will try to 19936 increment the returned register via an "la" instruction. */ 19937 19938rtx 19939find_addr_reg (rtx addr) 19940{ 19941 while (GET_CODE (addr) == PLUS) 19942 { 19943 if (REG_P (XEXP (addr, 0)) 19944 && REGNO (XEXP (addr, 0)) != 0) 19945 addr = XEXP (addr, 0); 19946 else if (REG_P (XEXP (addr, 1)) 19947 && REGNO (XEXP (addr, 1)) != 0) 19948 addr = XEXP (addr, 1); 19949 else if (CONSTANT_P (XEXP (addr, 0))) 19950 addr = XEXP (addr, 1); 19951 else if (CONSTANT_P (XEXP (addr, 1))) 19952 addr = XEXP (addr, 0); 19953 else 19954 gcc_unreachable (); 19955 } 19956 gcc_assert (REG_P (addr) && REGNO (addr) != 0); 19957 return addr; 19958} 19959 19960void 19961rs6000_fatal_bad_address (rtx op) 19962{ 19963 fatal_insn ("bad address", op); 19964} 19965 19966#if TARGET_MACHO 19967 19968vec<branch_island, va_gc> *branch_islands; 19969 19970/* Remember to generate a branch island for far calls to the given 19971 function. */ 19972 19973static void 19974add_compiler_branch_island (tree label_name, tree function_name, 19975 int line_number) 19976{ 19977 branch_island bi = {function_name, label_name, line_number}; 19978 vec_safe_push (branch_islands, bi); 19979} 19980 19981/* NO_PREVIOUS_DEF checks in the link list whether the function name is 19982 already there or not. */ 19983 19984static int 19985no_previous_def (tree function_name) 19986{ 19987 branch_island *bi; 19988 unsigned ix; 19989 19990 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) 19991 if (function_name == bi->function_name) 19992 return 0; 19993 return 1; 19994} 19995 19996/* GET_PREV_LABEL gets the label name from the previous definition of 19997 the function. */ 19998 19999static tree 20000get_prev_label (tree function_name) 20001{ 20002 branch_island *bi; 20003 unsigned ix; 20004 20005 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) 20006 if (function_name == bi->function_name) 20007 return bi->label_name; 20008 return NULL_TREE; 20009} 20010 20011/* Generate external symbol indirection stubs (PIC and non-PIC). */ 20012 20013void 20014machopic_output_stub (FILE *file, const char *symb, const char *stub) 20015{ 20016 unsigned int length; 20017 char *symbol_name, *lazy_ptr_name; 20018 char *local_label_0; 20019 static unsigned label = 0; 20020 20021 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 20022 symb = (*targetm.strip_name_encoding) (symb); 20023 20024 length = strlen (symb); 20025 symbol_name = XALLOCAVEC (char, length + 32); 20026 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 20027 20028 lazy_ptr_name = XALLOCAVEC (char, length + 32); 20029 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length); 20030 20031 if (MACHOPIC_PURE) 20032 { 20033 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]); 20034 fprintf (file, "\t.align 5\n"); 20035 20036 fprintf (file, "%s:\n", stub); 20037 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20038 20039 label++; 20040 local_label_0 = XALLOCAVEC (char, 16); 20041 sprintf (local_label_0, "L%u$spb", label); 20042 20043 fprintf (file, "\tmflr r0\n"); 20044 fprintf (file, "\tbcl 20,31,%s\n", local_label_0); 20045 fprintf (file, "%s:\n\tmflr r11\n", local_label_0); 20046 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n", 20047 lazy_ptr_name, local_label_0); 20048 fprintf (file, "\tmtlr r0\n"); 20049 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n", 20050 (TARGET_64BIT ? "ldu" : "lwzu"), 20051 lazy_ptr_name, local_label_0); 20052 fprintf (file, "\tmtctr r12\n"); 20053 fprintf (file, "\tbctr\n"); 20054 } 20055 else /* mdynamic-no-pic or mkernel. */ 20056 { 20057 switch_to_section (darwin_sections[machopic_symbol_stub1_section]); 20058 fprintf (file, "\t.align 4\n"); 20059 20060 fprintf (file, "%s:\n", stub); 20061 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20062 20063 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name); 20064 fprintf (file, "\t%s r12,lo16(%s)(r11)\n", 20065 (TARGET_64BIT ? "ldu" : "lwzu"), 20066 lazy_ptr_name); 20067 fprintf (file, "\tmtctr r12\n"); 20068 fprintf (file, "\tbctr\n"); 20069 } 20070 20071 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 20072 fprintf (file, "%s:\n", lazy_ptr_name); 20073 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 20074 fprintf (file, "%sdyld_stub_binding_helper\n", 20075 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t")); 20076} 20077 20078/* Legitimize PIC addresses. If the address is already 20079 position-independent, we return ORIG. Newly generated 20080 position-independent addresses go into a reg. This is REG if non 20081 zero, otherwise we allocate register(s) as necessary. */ 20082 20083#define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000) 20084 20085rtx 20086rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, 20087 rtx reg) 20088{ 20089 rtx base, offset; 20090 20091 if (reg == NULL && !reload_completed) 20092 reg = gen_reg_rtx (Pmode); 20093 20094 if (GET_CODE (orig) == CONST) 20095 { 20096 rtx reg_temp; 20097 20098 if (GET_CODE (XEXP (orig, 0)) == PLUS 20099 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 20100 return orig; 20101 20102 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 20103 20104 /* Use a different reg for the intermediate value, as 20105 it will be marked UNCHANGING. */ 20106 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode); 20107 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), 20108 Pmode, reg_temp); 20109 offset = 20110 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 20111 Pmode, reg); 20112 20113 if (CONST_INT_P (offset)) 20114 { 20115 if (SMALL_INT (offset)) 20116 return plus_constant (Pmode, base, INTVAL (offset)); 20117 else if (!reload_completed) 20118 offset = force_reg (Pmode, offset); 20119 else 20120 { 20121 rtx mem = force_const_mem (Pmode, orig); 20122 return machopic_legitimize_pic_address (mem, Pmode, reg); 20123 } 20124 } 20125 return gen_rtx_PLUS (Pmode, base, offset); 20126 } 20127 20128 /* Fall back on generic machopic code. */ 20129 return machopic_legitimize_pic_address (orig, mode, reg); 20130} 20131 20132/* Output a .machine directive for the Darwin assembler, and call 20133 the generic start_file routine. */ 20134 20135static void 20136rs6000_darwin_file_start (void) 20137{ 20138 static const struct 20139 { 20140 const char *arg; 20141 const char *name; 20142 HOST_WIDE_INT if_set; 20143 } mapping[] = { 20144 { "ppc64", "ppc64", MASK_64BIT }, 20145 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 }, 20146 { "power4", "ppc970", 0 }, 20147 { "G5", "ppc970", 0 }, 20148 { "7450", "ppc7450", 0 }, 20149 { "7400", "ppc7400", MASK_ALTIVEC }, 20150 { "G4", "ppc7400", 0 }, 20151 { "750", "ppc750", 0 }, 20152 { "740", "ppc750", 0 }, 20153 { "G3", "ppc750", 0 }, 20154 { "604e", "ppc604e", 0 }, 20155 { "604", "ppc604", 0 }, 20156 { "603e", "ppc603", 0 }, 20157 { "603", "ppc603", 0 }, 20158 { "601", "ppc601", 0 }, 20159 { NULL, "ppc", 0 } }; 20160 const char *cpu_id = ""; 20161 size_t i; 20162 20163 rs6000_file_start (); 20164 darwin_file_start (); 20165 20166 /* Determine the argument to -mcpu=. Default to G3 if not specified. */ 20167 20168 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') 20169 cpu_id = rs6000_default_cpu; 20170 20171 if (global_options_set.x_rs6000_cpu_index) 20172 cpu_id = processor_target_table[rs6000_cpu_index].name; 20173 20174 /* Look through the mapping array. Pick the first name that either 20175 matches the argument, has a bit set in IF_SET that is also set 20176 in the target flags, or has a NULL name. */ 20177 20178 i = 0; 20179 while (mapping[i].arg != NULL 20180 && strcmp (mapping[i].arg, cpu_id) != 0 20181 && (mapping[i].if_set & rs6000_isa_flags) == 0) 20182 i++; 20183 20184 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name); 20185} 20186 20187#endif /* TARGET_MACHO */ 20188 20189#if TARGET_ELF 20190static int 20191rs6000_elf_reloc_rw_mask (void) 20192{ 20193 if (flag_pic) 20194 return 3; 20195 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) 20196#if defined (POWERPC_NETBSD) 20197 return 3; 20198#else 20199 return 2; 20200#endif 20201 else 20202 return 0; 20203} 20204 20205/* Record an element in the table of global constructors. SYMBOL is 20206 a SYMBOL_REF of the function to be called; PRIORITY is a number 20207 between 0 and MAX_INIT_PRIORITY. 20208 20209 This differs from default_named_section_asm_out_constructor in 20210 that we have special handling for -mrelocatable. */ 20211 20212static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED; 20213static void 20214rs6000_elf_asm_out_constructor (rtx symbol, int priority) 20215{ 20216 const char *section = ".ctors"; 20217 char buf[18]; 20218 20219 if (priority != DEFAULT_INIT_PRIORITY) 20220 { 20221 sprintf (buf, ".ctors.%.5u", 20222 /* Invert the numbering so the linker puts us in the proper 20223 order; constructors are run from right to left, and the 20224 linker sorts in increasing order. */ 20225 MAX_INIT_PRIORITY - priority); 20226 section = buf; 20227 } 20228 20229 switch_to_section (get_section (section, SECTION_WRITE, NULL)); 20230 assemble_align (POINTER_SIZE); 20231 20232 if (DEFAULT_ABI == ABI_V4 20233 && (TARGET_RELOCATABLE || flag_pic > 1)) 20234 { 20235 fputs ("\t.long (", asm_out_file); 20236 output_addr_const (asm_out_file, symbol); 20237 fputs (")@fixup\n", asm_out_file); 20238 } 20239 else 20240 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); 20241} 20242 20243static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED; 20244static void 20245rs6000_elf_asm_out_destructor (rtx symbol, int priority) 20246{ 20247 const char *section = ".dtors"; 20248 char buf[18]; 20249 20250 if (priority != DEFAULT_INIT_PRIORITY) 20251 { 20252 sprintf (buf, ".dtors.%.5u", 20253 /* Invert the numbering so the linker puts us in the proper 20254 order; constructors are run from right to left, and the 20255 linker sorts in increasing order. */ 20256 MAX_INIT_PRIORITY - priority); 20257 section = buf; 20258 } 20259 20260 switch_to_section (get_section (section, SECTION_WRITE, NULL)); 20261 assemble_align (POINTER_SIZE); 20262 20263 if (DEFAULT_ABI == ABI_V4 20264 && (TARGET_RELOCATABLE || flag_pic > 1)) 20265 { 20266 fputs ("\t.long (", asm_out_file); 20267 output_addr_const (asm_out_file, symbol); 20268 fputs (")@fixup\n", asm_out_file); 20269 } 20270 else 20271 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); 20272} 20273 20274void 20275rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl) 20276{ 20277 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) 20278 { 20279 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file); 20280 ASM_OUTPUT_LABEL (file, name); 20281 fputs (DOUBLE_INT_ASM_OP, file); 20282 rs6000_output_function_entry (file, name); 20283 fputs (",.TOC.@tocbase,0\n\t.previous\n", file); 20284 if (DOT_SYMBOLS) 20285 { 20286 fputs ("\t.size\t", file); 20287 assemble_name (file, name); 20288 fputs (",24\n\t.type\t.", file); 20289 assemble_name (file, name); 20290 fputs (",@function\n", file); 20291 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl)) 20292 { 20293 fputs ("\t.globl\t.", file); 20294 assemble_name (file, name); 20295 putc ('\n', file); 20296 } 20297 } 20298 else 20299 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 20300 ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); 20301 rs6000_output_function_entry (file, name); 20302 fputs (":\n", file); 20303 return; 20304 } 20305 20306 int uses_toc; 20307 if (DEFAULT_ABI == ABI_V4 20308 && (TARGET_RELOCATABLE || flag_pic > 1) 20309 && !TARGET_SECURE_PLT 20310 && (!constant_pool_empty_p () || crtl->profile) 20311 && (uses_toc = uses_TOC ())) 20312 { 20313 char buf[256]; 20314 20315 if (uses_toc == 2) 20316 switch_to_other_text_partition (); 20317 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); 20318 20319 fprintf (file, "\t.long "); 20320 assemble_name (file, toc_label_name); 20321 need_toc_init = 1; 20322 putc ('-', file); 20323 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); 20324 assemble_name (file, buf); 20325 putc ('\n', file); 20326 if (uses_toc == 2) 20327 switch_to_other_text_partition (); 20328 } 20329 20330 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); 20331 ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); 20332 20333 if (TARGET_CMODEL == CMODEL_LARGE 20334 && rs6000_global_entry_point_prologue_needed_p ()) 20335 { 20336 char buf[256]; 20337 20338 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); 20339 20340 fprintf (file, "\t.quad .TOC.-"); 20341 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); 20342 assemble_name (file, buf); 20343 putc ('\n', file); 20344 } 20345 20346 if (DEFAULT_ABI == ABI_AIX) 20347 { 20348 const char *desc_name, *orig_name; 20349 20350 orig_name = (*targetm.strip_name_encoding) (name); 20351 desc_name = orig_name; 20352 while (*desc_name == '.') 20353 desc_name++; 20354 20355 if (TREE_PUBLIC (decl)) 20356 fprintf (file, "\t.globl %s\n", desc_name); 20357 20358 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); 20359 fprintf (file, "%s:\n", desc_name); 20360 fprintf (file, "\t.long %s\n", orig_name); 20361 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file); 20362 fputs ("\t.long 0\n", file); 20363 fprintf (file, "\t.previous\n"); 20364 } 20365 ASM_OUTPUT_LABEL (file, name); 20366} 20367 20368static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; 20369static void 20370rs6000_elf_file_end (void) 20371{ 20372#ifdef HAVE_AS_GNU_ATTRIBUTE 20373 /* ??? The value emitted depends on options active at file end. 20374 Assume anyone using #pragma or attributes that might change 20375 options knows what they are doing. */ 20376 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4) 20377 && rs6000_passes_float) 20378 { 20379 int fp; 20380 20381 if (TARGET_HARD_FLOAT) 20382 fp = 1; 20383 else 20384 fp = 2; 20385 if (rs6000_passes_long_double) 20386 { 20387 if (!TARGET_LONG_DOUBLE_128) 20388 fp |= 2 * 4; 20389 else if (TARGET_IEEEQUAD) 20390 fp |= 3 * 4; 20391 else 20392 fp |= 1 * 4; 20393 } 20394 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp); 20395 } 20396 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4) 20397 { 20398 if (rs6000_passes_vector) 20399 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", 20400 (TARGET_ALTIVEC_ABI ? 2 : 1)); 20401 if (rs6000_returns_struct) 20402 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n", 20403 aix_struct_return ? 2 : 1); 20404 } 20405#endif 20406#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) 20407 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2) 20408 file_end_indicate_exec_stack (); 20409#endif 20410 20411 if (flag_split_stack) 20412 file_end_indicate_split_stack (); 20413 20414 if (cpu_builtin_p) 20415 { 20416 /* We have expanded a CPU builtin, so we need to emit a reference to 20417 the special symbol that LIBC uses to declare it supports the 20418 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */ 20419 switch_to_section (data_section); 20420 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3); 20421 fprintf (asm_out_file, "\t%s %s\n", 20422 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol); 20423 } 20424} 20425#endif 20426 20427#if TARGET_XCOFF 20428 20429#ifndef HAVE_XCOFF_DWARF_EXTRAS 20430#define HAVE_XCOFF_DWARF_EXTRAS 0 20431#endif 20432 20433static enum unwind_info_type 20434rs6000_xcoff_debug_unwind_info (void) 20435{ 20436 return UI_NONE; 20437} 20438 20439static void 20440rs6000_xcoff_asm_output_anchor (rtx symbol) 20441{ 20442 char buffer[100]; 20443 20444 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC, 20445 SYMBOL_REF_BLOCK_OFFSET (symbol)); 20446 fprintf (asm_out_file, "%s", SET_ASM_OP); 20447 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0)); 20448 fprintf (asm_out_file, ","); 20449 RS6000_OUTPUT_BASENAME (asm_out_file, buffer); 20450 fprintf (asm_out_file, "\n"); 20451} 20452 20453static void 20454rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) 20455{ 20456 fputs (GLOBAL_ASM_OP, stream); 20457 RS6000_OUTPUT_BASENAME (stream, name); 20458 putc ('\n', stream); 20459} 20460 20461/* A get_unnamed_decl callback, used for read-only sections. PTR 20462 points to the section string variable. */ 20463 20464static void 20465rs6000_xcoff_output_readonly_section_asm_op (const void *directive) 20466{ 20467 fprintf (asm_out_file, "\t.csect %s[RO],%s\n", 20468 *(const char *const *) directive, 20469 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 20470} 20471 20472/* Likewise for read-write sections. */ 20473 20474static void 20475rs6000_xcoff_output_readwrite_section_asm_op (const void *directive) 20476{ 20477 fprintf (asm_out_file, "\t.csect %s[RW],%s\n", 20478 *(const char *const *) directive, 20479 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 20480} 20481 20482static void 20483rs6000_xcoff_output_tls_section_asm_op (const void *directive) 20484{ 20485 fprintf (asm_out_file, "\t.csect %s[TL],%s\n", 20486 *(const char *const *) directive, 20487 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); 20488} 20489 20490/* A get_unnamed_section callback, used for switching to toc_section. */ 20491 20492static void 20493rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) 20494{ 20495 if (TARGET_MINIMAL_TOC) 20496 { 20497 /* toc_section is always selected at least once from 20498 rs6000_xcoff_file_start, so this is guaranteed to 20499 always be defined once and only once in each file. */ 20500 if (!toc_initialized) 20501 { 20502 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file); 20503 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file); 20504 toc_initialized = 1; 20505 } 20506 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n", 20507 (TARGET_32BIT ? "" : ",3")); 20508 } 20509 else 20510 fputs ("\t.toc\n", asm_out_file); 20511} 20512 20513/* Implement TARGET_ASM_INIT_SECTIONS. */ 20514 20515static void 20516rs6000_xcoff_asm_init_sections (void) 20517{ 20518 read_only_data_section 20519 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, 20520 &xcoff_read_only_section_name); 20521 20522 private_data_section 20523 = get_unnamed_section (SECTION_WRITE, 20524 rs6000_xcoff_output_readwrite_section_asm_op, 20525 &xcoff_private_data_section_name); 20526 20527 read_only_private_data_section 20528 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, 20529 &xcoff_private_rodata_section_name); 20530 20531 tls_data_section 20532 = get_unnamed_section (SECTION_TLS, 20533 rs6000_xcoff_output_tls_section_asm_op, 20534 &xcoff_tls_data_section_name); 20535 20536 tls_private_data_section 20537 = get_unnamed_section (SECTION_TLS, 20538 rs6000_xcoff_output_tls_section_asm_op, 20539 &xcoff_private_data_section_name); 20540 20541 toc_section 20542 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); 20543 20544 readonly_data_section = read_only_data_section; 20545} 20546 20547static int 20548rs6000_xcoff_reloc_rw_mask (void) 20549{ 20550 return 3; 20551} 20552 20553static void 20554rs6000_xcoff_asm_named_section (const char *name, unsigned int flags, 20555 tree decl ATTRIBUTE_UNUSED) 20556{ 20557 int smclass; 20558 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" }; 20559 20560 if (flags & SECTION_EXCLUDE) 20561 smclass = 4; 20562 else if (flags & SECTION_DEBUG) 20563 { 20564 fprintf (asm_out_file, "\t.dwsect %s\n", name); 20565 return; 20566 } 20567 else if (flags & SECTION_CODE) 20568 smclass = 0; 20569 else if (flags & SECTION_TLS) 20570 smclass = 3; 20571 else if (flags & SECTION_WRITE) 20572 smclass = 2; 20573 else 20574 smclass = 1; 20575 20576 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n", 20577 (flags & SECTION_CODE) ? "." : "", 20578 name, suffix[smclass], flags & SECTION_ENTSIZE); 20579} 20580 20581#define IN_NAMED_SECTION(DECL) \ 20582 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ 20583 && DECL_SECTION_NAME (DECL) != NULL) 20584 20585static section * 20586rs6000_xcoff_select_section (tree decl, int reloc, 20587 unsigned HOST_WIDE_INT align) 20588{ 20589 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into 20590 named section. */ 20591 if (align > BIGGEST_ALIGNMENT) 20592 { 20593 resolve_unique_section (decl, reloc, true); 20594 if (IN_NAMED_SECTION (decl)) 20595 return get_named_section (decl, NULL, reloc); 20596 } 20597 20598 if (decl_readonly_section (decl, reloc)) 20599 { 20600 if (TREE_PUBLIC (decl)) 20601 return read_only_data_section; 20602 else 20603 return read_only_private_data_section; 20604 } 20605 else 20606 { 20607#if HAVE_AS_TLS 20608 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) 20609 { 20610 if (TREE_PUBLIC (decl)) 20611 return tls_data_section; 20612 else if (bss_initializer_p (decl)) 20613 { 20614 /* Convert to COMMON to emit in BSS. */ 20615 DECL_COMMON (decl) = 1; 20616 return tls_comm_section; 20617 } 20618 else 20619 return tls_private_data_section; 20620 } 20621 else 20622#endif 20623 if (TREE_PUBLIC (decl)) 20624 return data_section; 20625 else 20626 return private_data_section; 20627 } 20628} 20629 20630static void 20631rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) 20632{ 20633 const char *name; 20634 20635 /* Use select_section for private data and uninitialized data with 20636 alignment <= BIGGEST_ALIGNMENT. */ 20637 if (!TREE_PUBLIC (decl) 20638 || DECL_COMMON (decl) 20639 || (DECL_INITIAL (decl) == NULL_TREE 20640 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT) 20641 || DECL_INITIAL (decl) == error_mark_node 20642 || (flag_zero_initialized_in_bss 20643 && initializer_zerop (DECL_INITIAL (decl)))) 20644 return; 20645 20646 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 20647 name = (*targetm.strip_name_encoding) (name); 20648 set_decl_section_name (decl, name); 20649} 20650 20651/* Select section for constant in constant pool. 20652 20653 On RS/6000, all constants are in the private read-only data area. 20654 However, if this is being placed in the TOC it must be output as a 20655 toc entry. */ 20656 20657static section * 20658rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x, 20659 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) 20660{ 20661 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) 20662 return toc_section; 20663 else 20664 return read_only_private_data_section; 20665} 20666 20667/* Remove any trailing [DS] or the like from the symbol name. */ 20668 20669static const char * 20670rs6000_xcoff_strip_name_encoding (const char *name) 20671{ 20672 size_t len; 20673 if (*name == '*') 20674 name++; 20675 len = strlen (name); 20676 if (name[len - 1] == ']') 20677 return ggc_alloc_string (name, len - 4); 20678 else 20679 return name; 20680} 20681 20682/* Section attributes. AIX is always PIC. */ 20683 20684static unsigned int 20685rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc) 20686{ 20687 unsigned int align; 20688 unsigned int flags = default_section_type_flags (decl, name, reloc); 20689 20690 /* Align to at least UNIT size. */ 20691 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl)) 20692 align = MIN_UNITS_PER_WORD; 20693 else 20694 /* Increase alignment of large objects if not already stricter. */ 20695 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 20696 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD 20697 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD); 20698 20699 return flags | (exact_log2 (align) & SECTION_ENTSIZE); 20700} 20701 20702/* Output at beginning of assembler file. 20703 20704 Initialize the section names for the RS/6000 at this point. 20705 20706 Specify filename, including full path, to assembler. 20707 20708 We want to go into the TOC section so at least one .toc will be emitted. 20709 Also, in order to output proper .bs/.es pairs, we need at least one static 20710 [RW] section emitted. 20711 20712 Finally, declare mcount when profiling to make the assembler happy. */ 20713 20714static void 20715rs6000_xcoff_file_start (void) 20716{ 20717 rs6000_gen_section_name (&xcoff_bss_section_name, 20718 main_input_filename, ".bss_"); 20719 rs6000_gen_section_name (&xcoff_private_data_section_name, 20720 main_input_filename, ".rw_"); 20721 rs6000_gen_section_name (&xcoff_private_rodata_section_name, 20722 main_input_filename, ".rop_"); 20723 rs6000_gen_section_name (&xcoff_read_only_section_name, 20724 main_input_filename, ".ro_"); 20725 rs6000_gen_section_name (&xcoff_tls_data_section_name, 20726 main_input_filename, ".tls_"); 20727 rs6000_gen_section_name (&xcoff_tbss_section_name, 20728 main_input_filename, ".tbss_[UL]"); 20729 20730 fputs ("\t.file\t", asm_out_file); 20731 output_quoted_string (asm_out_file, main_input_filename); 20732 fputc ('\n', asm_out_file); 20733 if (write_symbols != NO_DEBUG) 20734 switch_to_section (private_data_section); 20735 switch_to_section (toc_section); 20736 switch_to_section (text_section); 20737 if (profile_flag) 20738 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT); 20739 rs6000_file_start (); 20740} 20741 20742/* Output at end of assembler file. 20743 On the RS/6000, referencing data should automatically pull in text. */ 20744 20745static void 20746rs6000_xcoff_file_end (void) 20747{ 20748 switch_to_section (text_section); 20749 fputs ("_section_.text:\n", asm_out_file); 20750 switch_to_section (data_section); 20751 fputs (TARGET_32BIT 20752 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n", 20753 asm_out_file); 20754} 20755 20756struct declare_alias_data 20757{ 20758 FILE *file; 20759 bool function_descriptor; 20760}; 20761 20762/* Declare alias N. A helper function for for_node_and_aliases. */ 20763 20764static bool 20765rs6000_declare_alias (struct symtab_node *n, void *d) 20766{ 20767 struct declare_alias_data *data = (struct declare_alias_data *)d; 20768 /* Main symbol is output specially, because varasm machinery does part of 20769 the job for us - we do not need to declare .globl/lglobs and such. */ 20770 if (!n->alias || n->weakref) 20771 return false; 20772 20773 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl))) 20774 return false; 20775 20776 /* Prevent assemble_alias from trying to use .set pseudo operation 20777 that does not behave as expected by the middle-end. */ 20778 TREE_ASM_WRITTEN (n->decl) = true; 20779 20780 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)); 20781 char *buffer = (char *) alloca (strlen (name) + 2); 20782 char *p; 20783 int dollar_inside = 0; 20784 20785 strcpy (buffer, name); 20786 p = strchr (buffer, '$'); 20787 while (p) { 20788 *p = '_'; 20789 dollar_inside++; 20790 p = strchr (p + 1, '$'); 20791 } 20792 if (TREE_PUBLIC (n->decl)) 20793 { 20794 if (!RS6000_WEAK || !DECL_WEAK (n->decl)) 20795 { 20796 if (dollar_inside) { 20797 if (data->function_descriptor) 20798 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); 20799 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); 20800 } 20801 if (data->function_descriptor) 20802 { 20803 fputs ("\t.globl .", data->file); 20804 RS6000_OUTPUT_BASENAME (data->file, buffer); 20805 putc ('\n', data->file); 20806 } 20807 fputs ("\t.globl ", data->file); 20808 RS6000_OUTPUT_BASENAME (data->file, buffer); 20809 putc ('\n', data->file); 20810 } 20811#ifdef ASM_WEAKEN_DECL 20812 else if (DECL_WEAK (n->decl) && !data->function_descriptor) 20813 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL); 20814#endif 20815 } 20816 else 20817 { 20818 if (dollar_inside) 20819 { 20820 if (data->function_descriptor) 20821 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); 20822 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); 20823 } 20824 if (data->function_descriptor) 20825 { 20826 fputs ("\t.lglobl .", data->file); 20827 RS6000_OUTPUT_BASENAME (data->file, buffer); 20828 putc ('\n', data->file); 20829 } 20830 fputs ("\t.lglobl ", data->file); 20831 RS6000_OUTPUT_BASENAME (data->file, buffer); 20832 putc ('\n', data->file); 20833 } 20834 if (data->function_descriptor) 20835 fputs (".", data->file); 20836 RS6000_OUTPUT_BASENAME (data->file, buffer); 20837 fputs (":\n", data->file); 20838 return false; 20839} 20840 20841 20842#ifdef HAVE_GAS_HIDDEN 20843/* Helper function to calculate visibility of a DECL 20844 and return the value as a const string. */ 20845 20846static const char * 20847rs6000_xcoff_visibility (tree decl) 20848{ 20849 static const char * const visibility_types[] = { 20850 "", ",protected", ",hidden", ",internal" 20851 }; 20852 20853 enum symbol_visibility vis = DECL_VISIBILITY (decl); 20854 return visibility_types[vis]; 20855} 20856#endif 20857 20858 20859/* This macro produces the initial definition of a function name. 20860 On the RS/6000, we need to place an extra '.' in the function name and 20861 output the function descriptor. 20862 Dollar signs are converted to underscores. 20863 20864 The csect for the function will have already been created when 20865 text_section was selected. We do have to go back to that csect, however. 20866 20867 The third and fourth parameters to the .function pseudo-op (16 and 044) 20868 are placeholders which no longer have any use. 20869 20870 Because AIX assembler's .set command has unexpected semantics, we output 20871 all aliases as alternative labels in front of the definition. */ 20872 20873void 20874rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl) 20875{ 20876 char *buffer = (char *) alloca (strlen (name) + 1); 20877 char *p; 20878 int dollar_inside = 0; 20879 struct declare_alias_data data = {file, false}; 20880 20881 strcpy (buffer, name); 20882 p = strchr (buffer, '$'); 20883 while (p) { 20884 *p = '_'; 20885 dollar_inside++; 20886 p = strchr (p + 1, '$'); 20887 } 20888 if (TREE_PUBLIC (decl)) 20889 { 20890 if (!RS6000_WEAK || !DECL_WEAK (decl)) 20891 { 20892 if (dollar_inside) { 20893 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); 20894 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); 20895 } 20896 fputs ("\t.globl .", file); 20897 RS6000_OUTPUT_BASENAME (file, buffer); 20898#ifdef HAVE_GAS_HIDDEN 20899 fputs (rs6000_xcoff_visibility (decl), file); 20900#endif 20901 putc ('\n', file); 20902 } 20903 } 20904 else 20905 { 20906 if (dollar_inside) { 20907 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); 20908 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); 20909 } 20910 fputs ("\t.lglobl .", file); 20911 RS6000_OUTPUT_BASENAME (file, buffer); 20912 putc ('\n', file); 20913 } 20914 fputs ("\t.csect ", file); 20915 RS6000_OUTPUT_BASENAME (file, buffer); 20916 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file); 20917 RS6000_OUTPUT_BASENAME (file, buffer); 20918 fputs (":\n", file); 20919 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 20920 &data, true); 20921 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file); 20922 RS6000_OUTPUT_BASENAME (file, buffer); 20923 fputs (", TOC[tc0], 0\n", file); 20924 in_section = NULL; 20925 switch_to_section (function_section (decl)); 20926 putc ('.', file); 20927 RS6000_OUTPUT_BASENAME (file, buffer); 20928 fputs (":\n", file); 20929 data.function_descriptor = true; 20930 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 20931 &data, true); 20932 if (!DECL_IGNORED_P (decl)) 20933 { 20934 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) 20935 xcoffout_declare_function (file, decl, buffer); 20936 else if (write_symbols == DWARF2_DEBUG) 20937 { 20938 name = (*targetm.strip_name_encoding) (name); 20939 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name); 20940 } 20941 } 20942 return; 20943} 20944 20945 20946/* Output assembly language to globalize a symbol from a DECL, 20947 possibly with visibility. */ 20948 20949void 20950rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl) 20951{ 20952 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 20953 fputs (GLOBAL_ASM_OP, stream); 20954 RS6000_OUTPUT_BASENAME (stream, name); 20955#ifdef HAVE_GAS_HIDDEN 20956 fputs (rs6000_xcoff_visibility (decl), stream); 20957#endif 20958 putc ('\n', stream); 20959} 20960 20961/* Output assembly language to define a symbol as COMMON from a DECL, 20962 possibly with visibility. */ 20963 20964void 20965rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream, 20966 tree decl ATTRIBUTE_UNUSED, 20967 const char *name, 20968 unsigned HOST_WIDE_INT size, 20969 unsigned HOST_WIDE_INT align) 20970{ 20971 unsigned HOST_WIDE_INT align2 = 2; 20972 20973 if (align > 32) 20974 align2 = floor_log2 (align / BITS_PER_UNIT); 20975 else if (size > 4) 20976 align2 = 3; 20977 20978 fputs (COMMON_ASM_OP, stream); 20979 RS6000_OUTPUT_BASENAME (stream, name); 20980 20981 fprintf (stream, 20982 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED, 20983 size, align2); 20984 20985#ifdef HAVE_GAS_HIDDEN 20986 if (decl != NULL) 20987 fputs (rs6000_xcoff_visibility (decl), stream); 20988#endif 20989 putc ('\n', stream); 20990} 20991 20992/* This macro produces the initial definition of a object (variable) name. 20993 Because AIX assembler's .set command has unexpected semantics, we output 20994 all aliases as alternative labels in front of the definition. */ 20995 20996void 20997rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl) 20998{ 20999 struct declare_alias_data data = {file, false}; 21000 RS6000_OUTPUT_BASENAME (file, name); 21001 fputs (":\n", file); 21002 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, 21003 &data, true); 21004} 21005 21006/* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */ 21007 21008void 21009rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label) 21010{ 21011 fputs (integer_asm_op (size, FALSE), file); 21012 assemble_name (file, label); 21013 fputs ("-$", file); 21014} 21015 21016/* Output a symbol offset relative to the dbase for the current object. 21017 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume 21018 signed offsets. 21019 21020 __gcc_unwind_dbase is embedded in all executables/libraries through 21021 libgcc/config/rs6000/crtdbase.S. */ 21022 21023void 21024rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label) 21025{ 21026 fputs (integer_asm_op (size, FALSE), file); 21027 assemble_name (file, label); 21028 fputs("-__gcc_unwind_dbase", file); 21029} 21030 21031#ifdef HAVE_AS_TLS 21032static void 21033rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) 21034{ 21035 rtx symbol; 21036 int flags; 21037 const char *symname; 21038 21039 default_encode_section_info (decl, rtl, first); 21040 21041 /* Careful not to prod global register variables. */ 21042 if (!MEM_P (rtl)) 21043 return; 21044 symbol = XEXP (rtl, 0); 21045 if (!SYMBOL_REF_P (symbol)) 21046 return; 21047 21048 flags = SYMBOL_REF_FLAGS (symbol); 21049 21050 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) 21051 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO; 21052 21053 SYMBOL_REF_FLAGS (symbol) = flags; 21054 21055 /* Append mapping class to extern decls. */ 21056 symname = XSTR (symbol, 0); 21057 if (decl /* sync condition with assemble_external () */ 21058 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) 21059 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl)) 21060 || TREE_CODE (decl) == FUNCTION_DECL) 21061 && symname[strlen (symname) - 1] != ']') 21062 { 21063 char *newname = (char *) alloca (strlen (symname) + 5); 21064 strcpy (newname, symname); 21065 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL 21066 ? "[DS]" : "[UA]")); 21067 XSTR (symbol, 0) = ggc_strdup (newname); 21068 } 21069} 21070#endif /* HAVE_AS_TLS */ 21071#endif /* TARGET_XCOFF */ 21072 21073void 21074rs6000_asm_weaken_decl (FILE *stream, tree decl, 21075 const char *name, const char *val) 21076{ 21077 fputs ("\t.weak\t", stream); 21078 RS6000_OUTPUT_BASENAME (stream, name); 21079 if (decl && TREE_CODE (decl) == FUNCTION_DECL 21080 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) 21081 { 21082 if (TARGET_XCOFF) 21083 fputs ("[DS]", stream); 21084#if TARGET_XCOFF && HAVE_GAS_HIDDEN 21085 if (TARGET_XCOFF) 21086 fputs (rs6000_xcoff_visibility (decl), stream); 21087#endif 21088 fputs ("\n\t.weak\t.", stream); 21089 RS6000_OUTPUT_BASENAME (stream, name); 21090 } 21091#if TARGET_XCOFF && HAVE_GAS_HIDDEN 21092 if (TARGET_XCOFF) 21093 fputs (rs6000_xcoff_visibility (decl), stream); 21094#endif 21095 fputc ('\n', stream); 21096 if (val) 21097 { 21098#ifdef ASM_OUTPUT_DEF 21099 ASM_OUTPUT_DEF (stream, name, val); 21100#endif 21101 if (decl && TREE_CODE (decl) == FUNCTION_DECL 21102 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) 21103 { 21104 fputs ("\t.set\t.", stream); 21105 RS6000_OUTPUT_BASENAME (stream, name); 21106 fputs (",.", stream); 21107 RS6000_OUTPUT_BASENAME (stream, val); 21108 fputc ('\n', stream); 21109 } 21110 } 21111} 21112 21113 21114/* Return true if INSN should not be copied. */ 21115 21116static bool 21117rs6000_cannot_copy_insn_p (rtx_insn *insn) 21118{ 21119 return recog_memoized (insn) >= 0 21120 && get_attr_cannot_copy (insn); 21121} 21122 21123/* Compute a (partial) cost for rtx X. Return true if the complete 21124 cost has been computed, and false if subexpressions should be 21125 scanned. In either case, *TOTAL contains the cost result. */ 21126 21127static bool 21128rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, 21129 int opno ATTRIBUTE_UNUSED, int *total, bool speed) 21130{ 21131 int code = GET_CODE (x); 21132 21133 switch (code) 21134 { 21135 /* On the RS/6000, if it is valid in the insn, it is free. */ 21136 case CONST_INT: 21137 if (((outer_code == SET 21138 || outer_code == PLUS 21139 || outer_code == MINUS) 21140 && (satisfies_constraint_I (x) 21141 || satisfies_constraint_L (x))) 21142 || (outer_code == AND 21143 && (satisfies_constraint_K (x) 21144 || (mode == SImode 21145 ? satisfies_constraint_L (x) 21146 : satisfies_constraint_J (x)))) 21147 || ((outer_code == IOR || outer_code == XOR) 21148 && (satisfies_constraint_K (x) 21149 || (mode == SImode 21150 ? satisfies_constraint_L (x) 21151 : satisfies_constraint_J (x)))) 21152 || outer_code == ASHIFT 21153 || outer_code == ASHIFTRT 21154 || outer_code == LSHIFTRT 21155 || outer_code == ROTATE 21156 || outer_code == ROTATERT 21157 || outer_code == ZERO_EXTRACT 21158 || (outer_code == MULT 21159 && satisfies_constraint_I (x)) 21160 || ((outer_code == DIV || outer_code == UDIV 21161 || outer_code == MOD || outer_code == UMOD) 21162 && exact_log2 (INTVAL (x)) >= 0) 21163 || (outer_code == COMPARE 21164 && (satisfies_constraint_I (x) 21165 || satisfies_constraint_K (x))) 21166 || ((outer_code == EQ || outer_code == NE) 21167 && (satisfies_constraint_I (x) 21168 || satisfies_constraint_K (x) 21169 || (mode == SImode 21170 ? satisfies_constraint_L (x) 21171 : satisfies_constraint_J (x)))) 21172 || (outer_code == GTU 21173 && satisfies_constraint_I (x)) 21174 || (outer_code == LTU 21175 && satisfies_constraint_P (x))) 21176 { 21177 *total = 0; 21178 return true; 21179 } 21180 else if ((outer_code == PLUS 21181 && reg_or_add_cint_operand (x, VOIDmode)) 21182 || (outer_code == MINUS 21183 && reg_or_sub_cint_operand (x, VOIDmode)) 21184 || ((outer_code == SET 21185 || outer_code == IOR 21186 || outer_code == XOR) 21187 && (INTVAL (x) 21188 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0)) 21189 { 21190 *total = COSTS_N_INSNS (1); 21191 return true; 21192 } 21193 /* FALLTHRU */ 21194 21195 case CONST_DOUBLE: 21196 case CONST_WIDE_INT: 21197 case CONST: 21198 case HIGH: 21199 case SYMBOL_REF: 21200 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); 21201 return true; 21202 21203 case MEM: 21204 /* When optimizing for size, MEM should be slightly more expensive 21205 than generating address, e.g., (plus (reg) (const)). 21206 L1 cache latency is about two instructions. */ 21207 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); 21208 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) 21209 *total += COSTS_N_INSNS (100); 21210 return true; 21211 21212 case LABEL_REF: 21213 *total = 0; 21214 return true; 21215 21216 case PLUS: 21217 case MINUS: 21218 if (FLOAT_MODE_P (mode)) 21219 *total = rs6000_cost->fp; 21220 else 21221 *total = COSTS_N_INSNS (1); 21222 return false; 21223 21224 case MULT: 21225 if (CONST_INT_P (XEXP (x, 1)) 21226 && satisfies_constraint_I (XEXP (x, 1))) 21227 { 21228 if (INTVAL (XEXP (x, 1)) >= -256 21229 && INTVAL (XEXP (x, 1)) <= 255) 21230 *total = rs6000_cost->mulsi_const9; 21231 else 21232 *total = rs6000_cost->mulsi_const; 21233 } 21234 else if (mode == SFmode) 21235 *total = rs6000_cost->fp; 21236 else if (FLOAT_MODE_P (mode)) 21237 *total = rs6000_cost->dmul; 21238 else if (mode == DImode) 21239 *total = rs6000_cost->muldi; 21240 else 21241 *total = rs6000_cost->mulsi; 21242 return false; 21243 21244 case FMA: 21245 if (mode == SFmode) 21246 *total = rs6000_cost->fp; 21247 else 21248 *total = rs6000_cost->dmul; 21249 break; 21250 21251 case DIV: 21252 case MOD: 21253 if (FLOAT_MODE_P (mode)) 21254 { 21255 *total = mode == DFmode ? rs6000_cost->ddiv 21256 : rs6000_cost->sdiv; 21257 return false; 21258 } 21259 /* FALLTHRU */ 21260 21261 case UDIV: 21262 case UMOD: 21263 if (CONST_INT_P (XEXP (x, 1)) 21264 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) 21265 { 21266 if (code == DIV || code == MOD) 21267 /* Shift, addze */ 21268 *total = COSTS_N_INSNS (2); 21269 else 21270 /* Shift */ 21271 *total = COSTS_N_INSNS (1); 21272 } 21273 else 21274 { 21275 if (GET_MODE (XEXP (x, 1)) == DImode) 21276 *total = rs6000_cost->divdi; 21277 else 21278 *total = rs6000_cost->divsi; 21279 } 21280 /* Add in shift and subtract for MOD unless we have a mod instruction. */ 21281 if (!TARGET_MODULO && (code == MOD || code == UMOD)) 21282 *total += COSTS_N_INSNS (2); 21283 return false; 21284 21285 case CTZ: 21286 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4); 21287 return false; 21288 21289 case FFS: 21290 *total = COSTS_N_INSNS (4); 21291 return false; 21292 21293 case POPCOUNT: 21294 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6); 21295 return false; 21296 21297 case PARITY: 21298 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6); 21299 return false; 21300 21301 case NOT: 21302 if (outer_code == AND || outer_code == IOR || outer_code == XOR) 21303 *total = 0; 21304 else 21305 *total = COSTS_N_INSNS (1); 21306 return false; 21307 21308 case AND: 21309 if (CONST_INT_P (XEXP (x, 1))) 21310 { 21311 rtx left = XEXP (x, 0); 21312 rtx_code left_code = GET_CODE (left); 21313 21314 /* rotate-and-mask: 1 insn. */ 21315 if ((left_code == ROTATE 21316 || left_code == ASHIFT 21317 || left_code == LSHIFTRT) 21318 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode)) 21319 { 21320 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed); 21321 if (!CONST_INT_P (XEXP (left, 1))) 21322 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed); 21323 *total += COSTS_N_INSNS (1); 21324 return true; 21325 } 21326 21327 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */ 21328 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 21329 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode) 21330 || (val & 0xffff) == val 21331 || (val & 0xffff0000) == val 21332 || ((val & 0xffff) == 0 && mode == SImode)) 21333 { 21334 *total = rtx_cost (left, mode, AND, 0, speed); 21335 *total += COSTS_N_INSNS (1); 21336 return true; 21337 } 21338 21339 /* 2 insns. */ 21340 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode)) 21341 { 21342 *total = rtx_cost (left, mode, AND, 0, speed); 21343 *total += COSTS_N_INSNS (2); 21344 return true; 21345 } 21346 } 21347 21348 *total = COSTS_N_INSNS (1); 21349 return false; 21350 21351 case IOR: 21352 /* FIXME */ 21353 *total = COSTS_N_INSNS (1); 21354 return true; 21355 21356 case CLZ: 21357 case XOR: 21358 case ZERO_EXTRACT: 21359 *total = COSTS_N_INSNS (1); 21360 return false; 21361 21362 case ASHIFT: 21363 /* The EXTSWSLI instruction is a combined instruction. Don't count both 21364 the sign extend and shift separately within the insn. */ 21365 if (TARGET_EXTSWSLI && mode == DImode 21366 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND 21367 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) 21368 { 21369 *total = 0; 21370 return false; 21371 } 21372 /* fall through */ 21373 21374 case ASHIFTRT: 21375 case LSHIFTRT: 21376 case ROTATE: 21377 case ROTATERT: 21378 /* Handle mul_highpart. */ 21379 if (outer_code == TRUNCATE 21380 && GET_CODE (XEXP (x, 0)) == MULT) 21381 { 21382 if (mode == DImode) 21383 *total = rs6000_cost->muldi; 21384 else 21385 *total = rs6000_cost->mulsi; 21386 return true; 21387 } 21388 else if (outer_code == AND) 21389 *total = 0; 21390 else 21391 *total = COSTS_N_INSNS (1); 21392 return false; 21393 21394 case SIGN_EXTEND: 21395 case ZERO_EXTEND: 21396 if (MEM_P (XEXP (x, 0))) 21397 *total = 0; 21398 else 21399 *total = COSTS_N_INSNS (1); 21400 return false; 21401 21402 case COMPARE: 21403 case NEG: 21404 case ABS: 21405 if (!FLOAT_MODE_P (mode)) 21406 { 21407 *total = COSTS_N_INSNS (1); 21408 return false; 21409 } 21410 /* FALLTHRU */ 21411 21412 case FLOAT: 21413 case UNSIGNED_FLOAT: 21414 case FIX: 21415 case UNSIGNED_FIX: 21416 case FLOAT_TRUNCATE: 21417 *total = rs6000_cost->fp; 21418 return false; 21419 21420 case FLOAT_EXTEND: 21421 if (mode == DFmode) 21422 *total = rs6000_cost->sfdf_convert; 21423 else 21424 *total = rs6000_cost->fp; 21425 return false; 21426 21427 case CALL: 21428 case IF_THEN_ELSE: 21429 if (!speed) 21430 { 21431 *total = COSTS_N_INSNS (1); 21432 return true; 21433 } 21434 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT) 21435 { 21436 *total = rs6000_cost->fp; 21437 return false; 21438 } 21439 break; 21440 21441 case NE: 21442 case EQ: 21443 case GTU: 21444 case LTU: 21445 /* Carry bit requires mode == Pmode. 21446 NEG or PLUS already counted so only add one. */ 21447 if (mode == Pmode 21448 && (outer_code == NEG || outer_code == PLUS)) 21449 { 21450 *total = COSTS_N_INSNS (1); 21451 return true; 21452 } 21453 /* FALLTHRU */ 21454 21455 case GT: 21456 case LT: 21457 case UNORDERED: 21458 if (outer_code == SET) 21459 { 21460 if (XEXP (x, 1) == const0_rtx) 21461 { 21462 *total = COSTS_N_INSNS (2); 21463 return true; 21464 } 21465 else 21466 { 21467 *total = COSTS_N_INSNS (3); 21468 return false; 21469 } 21470 } 21471 /* CC COMPARE. */ 21472 if (outer_code == COMPARE) 21473 { 21474 *total = 0; 21475 return true; 21476 } 21477 break; 21478 21479 case UNSPEC_VOLATILE: 21480 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ) 21481 { 21482 *total = 0; 21483 return true; 21484 } 21485 break; 21486 21487 default: 21488 break; 21489 } 21490 21491 return false; 21492} 21493 21494/* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */ 21495 21496static bool 21497rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code, 21498 int opno, int *total, bool speed) 21499{ 21500 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed); 21501 21502 fprintf (stderr, 21503 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, " 21504 "opno = %d, total = %d, speed = %s, x:\n", 21505 ret ? "complete" : "scan inner", 21506 GET_MODE_NAME (mode), 21507 GET_RTX_NAME (outer_code), 21508 opno, 21509 *total, 21510 speed ? "true" : "false"); 21511 21512 debug_rtx (x); 21513 21514 return ret; 21515} 21516 21517static int 21518rs6000_insn_cost (rtx_insn *insn, bool speed) 21519{ 21520 if (recog_memoized (insn) < 0) 21521 return 0; 21522 21523 /* If we are optimizing for size, just use the length. */ 21524 if (!speed) 21525 return get_attr_length (insn); 21526 21527 /* Use the cost if provided. */ 21528 int cost = get_attr_cost (insn); 21529 if (cost > 0) 21530 return cost; 21531 21532 /* If the insn tells us how many insns there are, use that. Otherwise use 21533 the length/4. Adjust the insn length to remove the extra size that 21534 prefixed instructions take. */ 21535 int n = get_attr_num_insns (insn); 21536 if (n == 0) 21537 { 21538 int length = get_attr_length (insn); 21539 if (get_attr_prefixed (insn) == PREFIXED_YES) 21540 { 21541 int adjust = 0; 21542 ADJUST_INSN_LENGTH (insn, adjust); 21543 length -= adjust; 21544 } 21545 21546 n = length / 4; 21547 } 21548 21549 enum attr_type type = get_attr_type (insn); 21550 21551 switch (type) 21552 { 21553 case TYPE_LOAD: 21554 case TYPE_FPLOAD: 21555 case TYPE_VECLOAD: 21556 cost = COSTS_N_INSNS (n + 1); 21557 break; 21558 21559 case TYPE_MUL: 21560 switch (get_attr_size (insn)) 21561 { 21562 case SIZE_8: 21563 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9; 21564 break; 21565 case SIZE_16: 21566 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const; 21567 break; 21568 case SIZE_32: 21569 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi; 21570 break; 21571 case SIZE_64: 21572 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi; 21573 break; 21574 default: 21575 gcc_unreachable (); 21576 } 21577 break; 21578 case TYPE_DIV: 21579 switch (get_attr_size (insn)) 21580 { 21581 case SIZE_32: 21582 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi; 21583 break; 21584 case SIZE_64: 21585 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi; 21586 break; 21587 default: 21588 gcc_unreachable (); 21589 } 21590 break; 21591 21592 case TYPE_FP: 21593 cost = n * rs6000_cost->fp; 21594 break; 21595 case TYPE_DMUL: 21596 cost = n * rs6000_cost->dmul; 21597 break; 21598 case TYPE_SDIV: 21599 cost = n * rs6000_cost->sdiv; 21600 break; 21601 case TYPE_DDIV: 21602 cost = n * rs6000_cost->ddiv; 21603 break; 21604 21605 case TYPE_SYNC: 21606 case TYPE_LOAD_L: 21607 case TYPE_MFCR: 21608 case TYPE_MFCRF: 21609 cost = COSTS_N_INSNS (n + 2); 21610 break; 21611 21612 default: 21613 cost = COSTS_N_INSNS (n); 21614 } 21615 21616 return cost; 21617} 21618 21619/* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */ 21620 21621static int 21622rs6000_debug_address_cost (rtx x, machine_mode mode, 21623 addr_space_t as, bool speed) 21624{ 21625 int ret = TARGET_ADDRESS_COST (x, mode, as, speed); 21626 21627 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n", 21628 ret, speed ? "true" : "false"); 21629 debug_rtx (x); 21630 21631 return ret; 21632} 21633 21634 21635/* A C expression returning the cost of moving data from a register of class 21636 CLASS1 to one of CLASS2. */ 21637 21638static int 21639rs6000_register_move_cost (machine_mode mode, 21640 reg_class_t from, reg_class_t to) 21641{ 21642 int ret; 21643 reg_class_t rclass; 21644 21645 if (TARGET_DEBUG_COST) 21646 dbg_cost_ctrl++; 21647 21648 /* If we have VSX, we can easily move between FPR or Altivec registers, 21649 otherwise we can only easily move within classes. 21650 Do this first so we give best-case answers for union classes 21651 containing both gprs and vsx regs. */ 21652 HARD_REG_SET to_vsx, from_vsx; 21653 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS]; 21654 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS]; 21655 if (!hard_reg_set_empty_p (to_vsx) 21656 && !hard_reg_set_empty_p (from_vsx) 21657 && (TARGET_VSX 21658 || hard_reg_set_intersect_p (to_vsx, from_vsx))) 21659 { 21660 int reg = FIRST_FPR_REGNO; 21661 if (TARGET_VSX 21662 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO) 21663 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO))) 21664 reg = FIRST_ALTIVEC_REGNO; 21665 ret = 2 * hard_regno_nregs (reg, mode); 21666 } 21667 21668 /* Moves from/to GENERAL_REGS. */ 21669 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS)) 21670 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS))) 21671 { 21672 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) 21673 { 21674 if (TARGET_DIRECT_MOVE) 21675 { 21676 /* Keep the cost for direct moves above that for within 21677 a register class even if the actual processor cost is 21678 comparable. We do this because a direct move insn 21679 can't be a nop, whereas with ideal register 21680 allocation a move within the same class might turn 21681 out to be a nop. */ 21682 if (rs6000_tune == PROCESSOR_POWER9 21683 || rs6000_tune == PROCESSOR_POWER10) 21684 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 21685 else 21686 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 21687 /* SFmode requires a conversion when moving between gprs 21688 and vsx. */ 21689 if (mode == SFmode) 21690 ret += 2; 21691 } 21692 else 21693 ret = (rs6000_memory_move_cost (mode, rclass, false) 21694 + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); 21695 } 21696 21697 /* It's more expensive to move CR_REGS than CR0_REGS because of the 21698 shift. */ 21699 else if (rclass == CR_REGS) 21700 ret = 4; 21701 21702 /* For those processors that have slow LR/CTR moves, make them more 21703 expensive than memory in order to bias spills to memory .*/ 21704 else if ((rs6000_tune == PROCESSOR_POWER6 21705 || rs6000_tune == PROCESSOR_POWER7 21706 || rs6000_tune == PROCESSOR_POWER8 21707 || rs6000_tune == PROCESSOR_POWER9) 21708 && reg_class_subset_p (rclass, SPECIAL_REGS)) 21709 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 21710 21711 else 21712 /* A move will cost one instruction per GPR moved. */ 21713 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode); 21714 } 21715 21716 /* Everything else has to go through GENERAL_REGS. */ 21717 else 21718 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) 21719 + rs6000_register_move_cost (mode, from, GENERAL_REGS)); 21720 21721 if (TARGET_DEBUG_COST) 21722 { 21723 if (dbg_cost_ctrl == 1) 21724 fprintf (stderr, 21725 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n", 21726 ret, GET_MODE_NAME (mode), reg_class_names[from], 21727 reg_class_names[to]); 21728 dbg_cost_ctrl--; 21729 } 21730 21731 return ret; 21732} 21733 21734/* A C expressions returning the cost of moving data of MODE from a register to 21735 or from memory. */ 21736 21737static int 21738rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, 21739 bool in ATTRIBUTE_UNUSED) 21740{ 21741 int ret; 21742 21743 if (TARGET_DEBUG_COST) 21744 dbg_cost_ctrl++; 21745 21746 if (reg_classes_intersect_p (rclass, GENERAL_REGS)) 21747 ret = 4 * hard_regno_nregs (0, mode); 21748 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS) 21749 || reg_classes_intersect_p (rclass, VSX_REGS))) 21750 ret = 4 * hard_regno_nregs (32, mode); 21751 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) 21752 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode); 21753 else 21754 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); 21755 21756 if (TARGET_DEBUG_COST) 21757 { 21758 if (dbg_cost_ctrl == 1) 21759 fprintf (stderr, 21760 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", 21761 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); 21762 dbg_cost_ctrl--; 21763 } 21764 21765 return ret; 21766} 21767 21768/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. 21769 21770 The register allocator chooses GEN_OR_VSX_REGS for the allocno 21771 class if GENERAL_REGS and VSX_REGS cost is lower than the memory 21772 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register 21773 move cost between GENERAL_REGS and VSX_REGS low. 21774 21775 It might seem reasonable to use a union class. After all, if usage 21776 of vsr is low and gpr high, it might make sense to spill gpr to vsr 21777 rather than memory. However, in cases where register pressure of 21778 both is high, like the cactus_adm spec test, allowing 21779 GEN_OR_VSX_REGS as the allocno class results in bad decisions in 21780 the first scheduling pass. This is partly due to an allocno of 21781 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure 21782 class, which gives too high a pressure for GENERAL_REGS and too low 21783 for VSX_REGS. So, force a choice of the subclass here. 21784 21785 The best class is also the union if GENERAL_REGS and VSX_REGS have 21786 the same cost. In that case we do use GEN_OR_VSX_REGS as the 21787 allocno class, since trying to narrow down the class by regno mode 21788 is prone to error. For example, SImode is allowed in VSX regs and 21789 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect) 21790 it would be wrong to choose an allocno of GENERAL_REGS based on 21791 SImode. */ 21792 21793static reg_class_t 21794rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, 21795 reg_class_t allocno_class, 21796 reg_class_t best_class) 21797{ 21798 switch (allocno_class) 21799 { 21800 case GEN_OR_VSX_REGS: 21801 /* best_class must be a subset of allocno_class. */ 21802 gcc_checking_assert (best_class == GEN_OR_VSX_REGS 21803 || best_class == GEN_OR_FLOAT_REGS 21804 || best_class == VSX_REGS 21805 || best_class == ALTIVEC_REGS 21806 || best_class == FLOAT_REGS 21807 || best_class == GENERAL_REGS 21808 || best_class == BASE_REGS); 21809 /* Use best_class but choose wider classes when copying from the 21810 wider class to best_class is cheap. This mimics IRA choice 21811 of allocno class. */ 21812 if (best_class == BASE_REGS) 21813 return GENERAL_REGS; 21814 if (TARGET_VSX 21815 && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS)) 21816 return VSX_REGS; 21817 return best_class; 21818 21819 default: 21820 break; 21821 } 21822 21823 return allocno_class; 21824} 21825 21826/* Returns a code for a target-specific builtin that implements 21827 reciprocal of the function, or NULL_TREE if not available. */ 21828 21829static tree 21830rs6000_builtin_reciprocal (tree fndecl) 21831{ 21832 switch (DECL_MD_FUNCTION_CODE (fndecl)) 21833 { 21834 case VSX_BUILTIN_XVSQRTDP: 21835 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) 21836 return NULL_TREE; 21837 21838 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; 21839 21840 case VSX_BUILTIN_XVSQRTSP: 21841 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) 21842 return NULL_TREE; 21843 21844 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF]; 21845 21846 default: 21847 return NULL_TREE; 21848 } 21849} 21850 21851/* Load up a constant. If the mode is a vector mode, splat the value across 21852 all of the vector elements. */ 21853 21854static rtx 21855rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) 21856{ 21857 rtx reg; 21858 21859 if (mode == SFmode || mode == DFmode) 21860 { 21861 rtx d = const_double_from_real_value (dconst, mode); 21862 reg = force_reg (mode, d); 21863 } 21864 else if (mode == V4SFmode) 21865 { 21866 rtx d = const_double_from_real_value (dconst, SFmode); 21867 rtvec v = gen_rtvec (4, d, d, d, d); 21868 reg = gen_reg_rtx (mode); 21869 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); 21870 } 21871 else if (mode == V2DFmode) 21872 { 21873 rtx d = const_double_from_real_value (dconst, DFmode); 21874 rtvec v = gen_rtvec (2, d, d); 21875 reg = gen_reg_rtx (mode); 21876 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); 21877 } 21878 else 21879 gcc_unreachable (); 21880 21881 return reg; 21882} 21883 21884/* Generate an FMA instruction. */ 21885 21886static void 21887rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) 21888{ 21889 machine_mode mode = GET_MODE (target); 21890 rtx dst; 21891 21892 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); 21893 gcc_assert (dst != NULL); 21894 21895 if (dst != target) 21896 emit_move_insn (target, dst); 21897} 21898 21899/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ 21900 21901static void 21902rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) 21903{ 21904 machine_mode mode = GET_MODE (dst); 21905 rtx r; 21906 21907 /* This is a tad more complicated, since the fnma_optab is for 21908 a different expression: fma(-m1, m2, a), which is the same 21909 thing except in the case of signed zeros. 21910 21911 Fortunately we know that if FMA is supported that FNMSUB is 21912 also supported in the ISA. Just expand it directly. */ 21913 21914 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); 21915 21916 r = gen_rtx_NEG (mode, a); 21917 r = gen_rtx_FMA (mode, m1, m2, r); 21918 r = gen_rtx_NEG (mode, r); 21919 emit_insn (gen_rtx_SET (dst, r)); 21920} 21921 21922/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, 21923 add a reg_note saying that this was a division. Support both scalar and 21924 vector divide. Assumes no trapping math and finite arguments. */ 21925 21926void 21927rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) 21928{ 21929 machine_mode mode = GET_MODE (dst); 21930 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v; 21931 int i; 21932 21933 /* Low precision estimates guarantee 5 bits of accuracy. High 21934 precision estimates guarantee 14 bits of accuracy. SFmode 21935 requires 23 bits of accuracy. DFmode requires 52 bits of 21936 accuracy. Each pass at least doubles the accuracy, leading 21937 to the following. */ 21938 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; 21939 if (mode == DFmode || mode == V2DFmode) 21940 passes++; 21941 21942 enum insn_code code = optab_handler (smul_optab, mode); 21943 insn_gen_fn gen_mul = GEN_FCN (code); 21944 21945 gcc_assert (code != CODE_FOR_nothing); 21946 21947 one = rs6000_load_constant_and_splat (mode, dconst1); 21948 21949 /* x0 = 1./d estimate */ 21950 x0 = gen_reg_rtx (mode); 21951 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), 21952 UNSPEC_FRES))); 21953 21954 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ 21955 if (passes > 1) { 21956 21957 /* e0 = 1. - d * x0 */ 21958 e0 = gen_reg_rtx (mode); 21959 rs6000_emit_nmsub (e0, d, x0, one); 21960 21961 /* x1 = x0 + e0 * x0 */ 21962 x1 = gen_reg_rtx (mode); 21963 rs6000_emit_madd (x1, e0, x0, x0); 21964 21965 for (i = 0, xprev = x1, eprev = e0; i < passes - 2; 21966 ++i, xprev = xnext, eprev = enext) { 21967 21968 /* enext = eprev * eprev */ 21969 enext = gen_reg_rtx (mode); 21970 emit_insn (gen_mul (enext, eprev, eprev)); 21971 21972 /* xnext = xprev + enext * xprev */ 21973 xnext = gen_reg_rtx (mode); 21974 rs6000_emit_madd (xnext, enext, xprev, xprev); 21975 } 21976 21977 } else 21978 xprev = x0; 21979 21980 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ 21981 21982 /* u = n * xprev */ 21983 u = gen_reg_rtx (mode); 21984 emit_insn (gen_mul (u, n, xprev)); 21985 21986 /* v = n - (d * u) */ 21987 v = gen_reg_rtx (mode); 21988 rs6000_emit_nmsub (v, d, u, n); 21989 21990 /* dst = (v * xprev) + u */ 21991 rs6000_emit_madd (dst, v, xprev, u); 21992 21993 if (note_p) 21994 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); 21995} 21996 21997/* Goldschmidt's Algorithm for single/double-precision floating point 21998 sqrt and rsqrt. Assumes no trapping math and finite arguments. */ 21999 22000void 22001rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) 22002{ 22003 machine_mode mode = GET_MODE (src); 22004 rtx e = gen_reg_rtx (mode); 22005 rtx g = gen_reg_rtx (mode); 22006 rtx h = gen_reg_rtx (mode); 22007 22008 /* Low precision estimates guarantee 5 bits of accuracy. High 22009 precision estimates guarantee 14 bits of accuracy. SFmode 22010 requires 23 bits of accuracy. DFmode requires 52 bits of 22011 accuracy. Each pass at least doubles the accuracy, leading 22012 to the following. */ 22013 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; 22014 if (mode == DFmode || mode == V2DFmode) 22015 passes++; 22016 22017 int i; 22018 rtx mhalf; 22019 enum insn_code code = optab_handler (smul_optab, mode); 22020 insn_gen_fn gen_mul = GEN_FCN (code); 22021 22022 gcc_assert (code != CODE_FOR_nothing); 22023 22024 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf); 22025 22026 /* e = rsqrt estimate */ 22027 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src), 22028 UNSPEC_RSQRT))); 22029 22030 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ 22031 if (!recip) 22032 { 22033 rtx zero = force_reg (mode, CONST0_RTX (mode)); 22034 22035 if (mode == SFmode) 22036 { 22037 rtx target = emit_conditional_move (e, GT, src, zero, mode, 22038 e, zero, mode, 0); 22039 if (target != e) 22040 emit_move_insn (e, target); 22041 } 22042 else 22043 { 22044 rtx cond = gen_rtx_GT (VOIDmode, e, zero); 22045 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero); 22046 } 22047 } 22048 22049 /* g = sqrt estimate. */ 22050 emit_insn (gen_mul (g, e, src)); 22051 /* h = 1/(2*sqrt) estimate. */ 22052 emit_insn (gen_mul (h, e, mhalf)); 22053 22054 if (recip) 22055 { 22056 if (passes == 1) 22057 { 22058 rtx t = gen_reg_rtx (mode); 22059 rs6000_emit_nmsub (t, g, h, mhalf); 22060 /* Apply correction directly to 1/rsqrt estimate. */ 22061 rs6000_emit_madd (dst, e, t, e); 22062 } 22063 else 22064 { 22065 for (i = 0; i < passes; i++) 22066 { 22067 rtx t1 = gen_reg_rtx (mode); 22068 rtx g1 = gen_reg_rtx (mode); 22069 rtx h1 = gen_reg_rtx (mode); 22070 22071 rs6000_emit_nmsub (t1, g, h, mhalf); 22072 rs6000_emit_madd (g1, g, t1, g); 22073 rs6000_emit_madd (h1, h, t1, h); 22074 22075 g = g1; 22076 h = h1; 22077 } 22078 /* Multiply by 2 for 1/rsqrt. */ 22079 emit_insn (gen_add3_insn (dst, h, h)); 22080 } 22081 } 22082 else 22083 { 22084 rtx t = gen_reg_rtx (mode); 22085 rs6000_emit_nmsub (t, g, h, mhalf); 22086 rs6000_emit_madd (dst, g, t, g); 22087 } 22088 22089 return; 22090} 22091 22092/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD 22093 (Power7) targets. DST is the target, and SRC is the argument operand. */ 22094 22095void 22096rs6000_emit_popcount (rtx dst, rtx src) 22097{ 22098 machine_mode mode = GET_MODE (dst); 22099 rtx tmp1, tmp2; 22100 22101 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */ 22102 if (TARGET_POPCNTD) 22103 { 22104 if (mode == SImode) 22105 emit_insn (gen_popcntdsi2 (dst, src)); 22106 else 22107 emit_insn (gen_popcntddi2 (dst, src)); 22108 return; 22109 } 22110 22111 tmp1 = gen_reg_rtx (mode); 22112 22113 if (mode == SImode) 22114 { 22115 emit_insn (gen_popcntbsi2 (tmp1, src)); 22116 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), 22117 NULL_RTX, 0); 22118 tmp2 = force_reg (SImode, tmp2); 22119 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); 22120 } 22121 else 22122 { 22123 emit_insn (gen_popcntbdi2 (tmp1, src)); 22124 tmp2 = expand_mult (DImode, tmp1, 22125 GEN_INT ((HOST_WIDE_INT) 22126 0x01010101 << 32 | 0x01010101), 22127 NULL_RTX, 0); 22128 tmp2 = force_reg (DImode, tmp2); 22129 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); 22130 } 22131} 22132 22133 22134/* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the 22135 target, and SRC is the argument operand. */ 22136 22137void 22138rs6000_emit_parity (rtx dst, rtx src) 22139{ 22140 machine_mode mode = GET_MODE (dst); 22141 rtx tmp; 22142 22143 tmp = gen_reg_rtx (mode); 22144 22145 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */ 22146 if (TARGET_CMPB) 22147 { 22148 if (mode == SImode) 22149 { 22150 emit_insn (gen_popcntbsi2 (tmp, src)); 22151 emit_insn (gen_paritysi2_cmpb (dst, tmp)); 22152 } 22153 else 22154 { 22155 emit_insn (gen_popcntbdi2 (tmp, src)); 22156 emit_insn (gen_paritydi2_cmpb (dst, tmp)); 22157 } 22158 return; 22159 } 22160 22161 if (mode == SImode) 22162 { 22163 /* Is mult+shift >= shift+xor+shift+xor? */ 22164 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) 22165 { 22166 rtx tmp1, tmp2, tmp3, tmp4; 22167 22168 tmp1 = gen_reg_rtx (SImode); 22169 emit_insn (gen_popcntbsi2 (tmp1, src)); 22170 22171 tmp2 = gen_reg_rtx (SImode); 22172 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); 22173 tmp3 = gen_reg_rtx (SImode); 22174 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); 22175 22176 tmp4 = gen_reg_rtx (SImode); 22177 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); 22178 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); 22179 } 22180 else 22181 rs6000_emit_popcount (tmp, src); 22182 emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); 22183 } 22184 else 22185 { 22186 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ 22187 if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) 22188 { 22189 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; 22190 22191 tmp1 = gen_reg_rtx (DImode); 22192 emit_insn (gen_popcntbdi2 (tmp1, src)); 22193 22194 tmp2 = gen_reg_rtx (DImode); 22195 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); 22196 tmp3 = gen_reg_rtx (DImode); 22197 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); 22198 22199 tmp4 = gen_reg_rtx (DImode); 22200 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); 22201 tmp5 = gen_reg_rtx (DImode); 22202 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); 22203 22204 tmp6 = gen_reg_rtx (DImode); 22205 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); 22206 emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); 22207 } 22208 else 22209 rs6000_emit_popcount (tmp, src); 22210 emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); 22211 } 22212} 22213 22214/* Expand an Altivec constant permutation for little endian mode. 22215 OP0 and OP1 are the input vectors and TARGET is the output vector. 22216 SEL specifies the constant permutation vector. 22217 22218 There are two issues: First, the two input operands must be 22219 swapped so that together they form a double-wide array in LE 22220 order. Second, the vperm instruction has surprising behavior 22221 in LE mode: it interprets the elements of the source vectors 22222 in BE mode ("left to right") and interprets the elements of 22223 the destination vector in LE mode ("right to left"). To 22224 correct for this, we must subtract each element of the permute 22225 control vector from 31. 22226 22227 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} 22228 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. 22229 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to 22230 serve as the permute control vector. Then, in BE mode, 22231 22232 vperm 9,10,11,12 22233 22234 places the desired result in vr9. However, in LE mode the 22235 vector contents will be 22236 22237 vr10 = 00000003 00000002 00000001 00000000 22238 vr11 = 00000007 00000006 00000005 00000004 22239 22240 The result of the vperm using the same permute control vector is 22241 22242 vr9 = 05000000 07000000 01000000 03000000 22243 22244 That is, the leftmost 4 bytes of vr10 are interpreted as the 22245 source for the rightmost 4 bytes of vr9, and so on. 22246 22247 If we change the permute control vector to 22248 22249 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} 22250 22251 and issue 22252 22253 vperm 9,11,10,12 22254 22255 we get the desired 22256 22257 vr9 = 00000006 00000004 00000002 00000000. */ 22258 22259static void 22260altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, 22261 const vec_perm_indices &sel) 22262{ 22263 unsigned int i; 22264 rtx perm[16]; 22265 rtx constv, unspec; 22266 22267 /* Unpack and adjust the constant selector. */ 22268 for (i = 0; i < 16; ++i) 22269 { 22270 unsigned int elt = 31 - (sel[i] & 31); 22271 perm[i] = GEN_INT (elt); 22272 } 22273 22274 /* Expand to a permute, swapping the inputs and using the 22275 adjusted selector. */ 22276 if (!REG_P (op0)) 22277 op0 = force_reg (V16QImode, op0); 22278 if (!REG_P (op1)) 22279 op1 = force_reg (V16QImode, op1); 22280 22281 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); 22282 constv = force_reg (V16QImode, constv); 22283 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), 22284 UNSPEC_VPERM); 22285 if (!REG_P (target)) 22286 { 22287 rtx tmp = gen_reg_rtx (V16QImode); 22288 emit_move_insn (tmp, unspec); 22289 unspec = tmp; 22290 } 22291 22292 emit_move_insn (target, unspec); 22293} 22294 22295/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the 22296 permute control vector. But here it's not a constant, so we must 22297 generate a vector NAND or NOR to do the adjustment. */ 22298 22299void 22300altivec_expand_vec_perm_le (rtx operands[4]) 22301{ 22302 rtx notx, iorx, unspec; 22303 rtx target = operands[0]; 22304 rtx op0 = operands[1]; 22305 rtx op1 = operands[2]; 22306 rtx sel = operands[3]; 22307 rtx tmp = target; 22308 rtx norreg = gen_reg_rtx (V16QImode); 22309 machine_mode mode = GET_MODE (target); 22310 22311 /* Get everything in regs so the pattern matches. */ 22312 if (!REG_P (op0)) 22313 op0 = force_reg (mode, op0); 22314 if (!REG_P (op1)) 22315 op1 = force_reg (mode, op1); 22316 if (!REG_P (sel)) 22317 sel = force_reg (V16QImode, sel); 22318 if (!REG_P (target)) 22319 tmp = gen_reg_rtx (mode); 22320 22321 if (TARGET_P9_VECTOR) 22322 { 22323 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel), 22324 UNSPEC_VPERMR); 22325 } 22326 else 22327 { 22328 /* Invert the selector with a VNAND if available, else a VNOR. 22329 The VNAND is preferred for future fusion opportunities. */ 22330 notx = gen_rtx_NOT (V16QImode, sel); 22331 iorx = (TARGET_P8_VECTOR 22332 ? gen_rtx_IOR (V16QImode, notx, notx) 22333 : gen_rtx_AND (V16QImode, notx, notx)); 22334 emit_insn (gen_rtx_SET (norreg, iorx)); 22335 22336 /* Permute with operands reversed and adjusted selector. */ 22337 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), 22338 UNSPEC_VPERM); 22339 } 22340 22341 /* Copy into target, possibly by way of a register. */ 22342 if (!REG_P (target)) 22343 { 22344 emit_move_insn (tmp, unspec); 22345 unspec = tmp; 22346 } 22347 22348 emit_move_insn (target, unspec); 22349} 22350 22351/* Expand an Altivec constant permutation. Return true if we match 22352 an efficient implementation; false to fall back to VPERM. 22353 22354 OP0 and OP1 are the input vectors and TARGET is the output vector. 22355 SEL specifies the constant permutation vector. */ 22356 22357static bool 22358altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, 22359 const vec_perm_indices &sel) 22360{ 22361 struct altivec_perm_insn { 22362 HOST_WIDE_INT mask; 22363 enum insn_code impl; 22364 unsigned char perm[16]; 22365 }; 22366 static const struct altivec_perm_insn patterns[] = { 22367 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct, 22368 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 22369 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct, 22370 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 22371 { OPTION_MASK_ALTIVEC, 22372 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct 22373 : CODE_FOR_altivec_vmrglb_direct), 22374 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 22375 { OPTION_MASK_ALTIVEC, 22376 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct 22377 : CODE_FOR_altivec_vmrglh_direct), 22378 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 22379 { OPTION_MASK_ALTIVEC, 22380 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct 22381 : CODE_FOR_altivec_vmrglw_direct), 22382 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 22383 { OPTION_MASK_ALTIVEC, 22384 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct 22385 : CODE_FOR_altivec_vmrghb_direct), 22386 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 22387 { OPTION_MASK_ALTIVEC, 22388 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct 22389 : CODE_FOR_altivec_vmrghh_direct), 22390 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 22391 { OPTION_MASK_ALTIVEC, 22392 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct 22393 : CODE_FOR_altivec_vmrghw_direct), 22394 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 22395 { OPTION_MASK_P8_VECTOR, 22396 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct 22397 : CODE_FOR_p8_vmrgow_v4sf_direct), 22398 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, 22399 { OPTION_MASK_P8_VECTOR, 22400 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct 22401 : CODE_FOR_p8_vmrgew_v4sf_direct), 22402 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } 22403 }; 22404 22405 unsigned int i, j, elt, which; 22406 unsigned char perm[16]; 22407 rtx x; 22408 bool one_vec; 22409 22410 /* Unpack the constant selector. */ 22411 for (i = which = 0; i < 16; ++i) 22412 { 22413 elt = sel[i] & 31; 22414 which |= (elt < 16 ? 1 : 2); 22415 perm[i] = elt; 22416 } 22417 22418 /* Simplify the constant selector based on operands. */ 22419 switch (which) 22420 { 22421 default: 22422 gcc_unreachable (); 22423 22424 case 3: 22425 one_vec = false; 22426 if (!rtx_equal_p (op0, op1)) 22427 break; 22428 /* FALLTHRU */ 22429 22430 case 2: 22431 for (i = 0; i < 16; ++i) 22432 perm[i] &= 15; 22433 op0 = op1; 22434 one_vec = true; 22435 break; 22436 22437 case 1: 22438 op1 = op0; 22439 one_vec = true; 22440 break; 22441 } 22442 22443 /* Look for splat patterns. */ 22444 if (one_vec) 22445 { 22446 elt = perm[0]; 22447 22448 for (i = 0; i < 16; ++i) 22449 if (perm[i] != elt) 22450 break; 22451 if (i == 16) 22452 { 22453 if (!BYTES_BIG_ENDIAN) 22454 elt = 15 - elt; 22455 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt))); 22456 return true; 22457 } 22458 22459 if (elt % 2 == 0) 22460 { 22461 for (i = 0; i < 16; i += 2) 22462 if (perm[i] != elt || perm[i + 1] != elt + 1) 22463 break; 22464 if (i == 16) 22465 { 22466 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2; 22467 x = gen_reg_rtx (V8HImode); 22468 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0), 22469 GEN_INT (field))); 22470 emit_move_insn (target, gen_lowpart (V16QImode, x)); 22471 return true; 22472 } 22473 } 22474 22475 if (elt % 4 == 0) 22476 { 22477 for (i = 0; i < 16; i += 4) 22478 if (perm[i] != elt 22479 || perm[i + 1] != elt + 1 22480 || perm[i + 2] != elt + 2 22481 || perm[i + 3] != elt + 3) 22482 break; 22483 if (i == 16) 22484 { 22485 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4; 22486 x = gen_reg_rtx (V4SImode); 22487 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0), 22488 GEN_INT (field))); 22489 emit_move_insn (target, gen_lowpart (V16QImode, x)); 22490 return true; 22491 } 22492 } 22493 } 22494 22495 /* Look for merge and pack patterns. */ 22496 for (j = 0; j < ARRAY_SIZE (patterns); ++j) 22497 { 22498 bool swapped; 22499 22500 if ((patterns[j].mask & rs6000_isa_flags) == 0) 22501 continue; 22502 22503 elt = patterns[j].perm[0]; 22504 if (perm[0] == elt) 22505 swapped = false; 22506 else if (perm[0] == elt + 16) 22507 swapped = true; 22508 else 22509 continue; 22510 for (i = 1; i < 16; ++i) 22511 { 22512 elt = patterns[j].perm[i]; 22513 if (swapped) 22514 elt = (elt >= 16 ? elt - 16 : elt + 16); 22515 else if (one_vec && elt >= 16) 22516 elt -= 16; 22517 if (perm[i] != elt) 22518 break; 22519 } 22520 if (i == 16) 22521 { 22522 enum insn_code icode = patterns[j].impl; 22523 machine_mode omode = insn_data[icode].operand[0].mode; 22524 machine_mode imode = insn_data[icode].operand[1].mode; 22525 22526 /* For little-endian, don't use vpkuwum and vpkuhum if the 22527 underlying vector type is not V4SI and V8HI, respectively. 22528 For example, using vpkuwum with a V8HI picks up the even 22529 halfwords (BE numbering) when the even halfwords (LE 22530 numbering) are what we need. */ 22531 if (!BYTES_BIG_ENDIAN 22532 && icode == CODE_FOR_altivec_vpkuwum_direct 22533 && ((REG_P (op0) 22534 && GET_MODE (op0) != V4SImode) 22535 || (SUBREG_P (op0) 22536 && GET_MODE (XEXP (op0, 0)) != V4SImode))) 22537 continue; 22538 if (!BYTES_BIG_ENDIAN 22539 && icode == CODE_FOR_altivec_vpkuhum_direct 22540 && ((REG_P (op0) 22541 && GET_MODE (op0) != V8HImode) 22542 || (SUBREG_P (op0) 22543 && GET_MODE (XEXP (op0, 0)) != V8HImode))) 22544 continue; 22545 22546 /* For little-endian, the two input operands must be swapped 22547 (or swapped back) to ensure proper right-to-left numbering 22548 from 0 to 2N-1. */ 22549 if (swapped ^ !BYTES_BIG_ENDIAN) 22550 std::swap (op0, op1); 22551 if (imode != V16QImode) 22552 { 22553 op0 = gen_lowpart (imode, op0); 22554 op1 = gen_lowpart (imode, op1); 22555 } 22556 if (omode == V16QImode) 22557 x = target; 22558 else 22559 x = gen_reg_rtx (omode); 22560 emit_insn (GEN_FCN (icode) (x, op0, op1)); 22561 if (omode != V16QImode) 22562 emit_move_insn (target, gen_lowpart (V16QImode, x)); 22563 return true; 22564 } 22565 } 22566 22567 if (!BYTES_BIG_ENDIAN) 22568 { 22569 altivec_expand_vec_perm_const_le (target, op0, op1, sel); 22570 return true; 22571 } 22572 22573 return false; 22574} 22575 22576/* Expand a VSX Permute Doubleword constant permutation. 22577 Return true if we match an efficient implementation. */ 22578 22579static bool 22580rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, 22581 unsigned char perm0, unsigned char perm1) 22582{ 22583 rtx x; 22584 22585 /* If both selectors come from the same operand, fold to single op. */ 22586 if ((perm0 & 2) == (perm1 & 2)) 22587 { 22588 if (perm0 & 2) 22589 op0 = op1; 22590 else 22591 op1 = op0; 22592 } 22593 /* If both operands are equal, fold to simpler permutation. */ 22594 if (rtx_equal_p (op0, op1)) 22595 { 22596 perm0 = perm0 & 1; 22597 perm1 = (perm1 & 1) + 2; 22598 } 22599 /* If the first selector comes from the second operand, swap. */ 22600 else if (perm0 & 2) 22601 { 22602 if (perm1 & 2) 22603 return false; 22604 perm0 -= 2; 22605 perm1 += 2; 22606 std::swap (op0, op1); 22607 } 22608 /* If the second selector does not come from the second operand, fail. */ 22609 else if ((perm1 & 2) == 0) 22610 return false; 22611 22612 /* Success! */ 22613 if (target != NULL) 22614 { 22615 machine_mode vmode, dmode; 22616 rtvec v; 22617 22618 vmode = GET_MODE (target); 22619 gcc_assert (GET_MODE_NUNITS (vmode) == 2); 22620 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require (); 22621 x = gen_rtx_VEC_CONCAT (dmode, op0, op1); 22622 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); 22623 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); 22624 emit_insn (gen_rtx_SET (target, x)); 22625 } 22626 return true; 22627} 22628 22629/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ 22630 22631static bool 22632rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, 22633 rtx op1, const vec_perm_indices &sel) 22634{ 22635 bool testing_p = !target; 22636 22637 /* AltiVec (and thus VSX) can handle arbitrary permutations. */ 22638 if (TARGET_ALTIVEC && testing_p) 22639 return true; 22640 22641 /* Check for ps_merge* or xxpermdi insns. */ 22642 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode)) 22643 { 22644 if (testing_p) 22645 { 22646 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); 22647 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); 22648 } 22649 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) 22650 return true; 22651 } 22652 22653 if (TARGET_ALTIVEC) 22654 { 22655 /* Force the target-independent code to lower to V16QImode. */ 22656 if (vmode != V16QImode) 22657 return false; 22658 if (altivec_expand_vec_perm_const (target, op0, op1, sel)) 22659 return true; 22660 } 22661 22662 return false; 22663} 22664 22665/* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. 22666 OP0 and OP1 are the input vectors and TARGET is the output vector. 22667 PERM specifies the constant permutation vector. */ 22668 22669static void 22670rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, 22671 machine_mode vmode, const vec_perm_builder &perm) 22672{ 22673 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); 22674 if (x != target) 22675 emit_move_insn (target, x); 22676} 22677 22678/* Expand an extract even operation. */ 22679 22680void 22681rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) 22682{ 22683 machine_mode vmode = GET_MODE (target); 22684 unsigned i, nelt = GET_MODE_NUNITS (vmode); 22685 vec_perm_builder perm (nelt, nelt, 1); 22686 22687 for (i = 0; i < nelt; i++) 22688 perm.quick_push (i * 2); 22689 22690 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); 22691} 22692 22693/* Expand a vector interleave operation. */ 22694 22695void 22696rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) 22697{ 22698 machine_mode vmode = GET_MODE (target); 22699 unsigned i, high, nelt = GET_MODE_NUNITS (vmode); 22700 vec_perm_builder perm (nelt, nelt, 1); 22701 22702 high = (highp ? 0 : nelt / 2); 22703 for (i = 0; i < nelt / 2; i++) 22704 { 22705 perm.quick_push (i + high); 22706 perm.quick_push (i + nelt + high); 22707 } 22708 22709 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); 22710} 22711 22712/* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ 22713void 22714rs6000_scale_v2df (rtx tgt, rtx src, int scale) 22715{ 22716 HOST_WIDE_INT hwi_scale (scale); 22717 REAL_VALUE_TYPE r_pow; 22718 rtvec v = rtvec_alloc (2); 22719 rtx elt; 22720 rtx scale_vec = gen_reg_rtx (V2DFmode); 22721 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); 22722 elt = const_double_from_real_value (r_pow, DFmode); 22723 RTVEC_ELT (v, 0) = elt; 22724 RTVEC_ELT (v, 1) = elt; 22725 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); 22726 emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); 22727} 22728 22729/* Return an RTX representing where to find the function value of a 22730 function returning MODE. */ 22731static rtx 22732rs6000_complex_function_value (machine_mode mode) 22733{ 22734 unsigned int regno; 22735 rtx r1, r2; 22736 machine_mode inner = GET_MODE_INNER (mode); 22737 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode); 22738 22739 if (TARGET_FLOAT128_TYPE 22740 && (mode == KCmode 22741 || (mode == TCmode && TARGET_IEEEQUAD))) 22742 regno = ALTIVEC_ARG_RETURN; 22743 22744 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 22745 regno = FP_ARG_RETURN; 22746 22747 else 22748 { 22749 regno = GP_ARG_RETURN; 22750 22751 /* 32-bit is OK since it'll go in r3/r4. */ 22752 if (TARGET_32BIT && inner_bytes >= 4) 22753 return gen_rtx_REG (mode, regno); 22754 } 22755 22756 if (inner_bytes >= 8) 22757 return gen_rtx_REG (mode, regno); 22758 22759 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno), 22760 const0_rtx); 22761 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1), 22762 GEN_INT (inner_bytes)); 22763 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); 22764} 22765 22766/* Return an rtx describing a return value of MODE as a PARALLEL 22767 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO, 22768 stride REG_STRIDE. */ 22769 22770static rtx 22771rs6000_parallel_return (machine_mode mode, 22772 int n_elts, machine_mode elt_mode, 22773 unsigned int regno, unsigned int reg_stride) 22774{ 22775 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); 22776 22777 int i; 22778 for (i = 0; i < n_elts; i++) 22779 { 22780 rtx r = gen_rtx_REG (elt_mode, regno); 22781 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); 22782 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off); 22783 regno += reg_stride; 22784 } 22785 22786 return par; 22787} 22788 22789/* Target hook for TARGET_FUNCTION_VALUE. 22790 22791 An integer value is in r3 and a floating-point value is in fp1, 22792 unless -msoft-float. */ 22793 22794static rtx 22795rs6000_function_value (const_tree valtype, 22796 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 22797 bool outgoing ATTRIBUTE_UNUSED) 22798{ 22799 machine_mode mode; 22800 unsigned int regno; 22801 machine_mode elt_mode; 22802 int n_elts; 22803 22804 /* Special handling for structs in darwin64. */ 22805 if (TARGET_MACHO 22806 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype)) 22807 { 22808 CUMULATIVE_ARGS valcum; 22809 rtx valret; 22810 22811 valcum.words = 0; 22812 valcum.fregno = FP_ARG_MIN_REG; 22813 valcum.vregno = ALTIVEC_ARG_MIN_REG; 22814 /* Do a trial code generation as if this were going to be passed as 22815 an argument; if any part goes in memory, we return NULL. */ 22816 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true); 22817 if (valret) 22818 return valret; 22819 /* Otherwise fall through to standard ABI rules. */ 22820 } 22821 22822 mode = TYPE_MODE (valtype); 22823 22824 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */ 22825 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts)) 22826 { 22827 int first_reg, n_regs; 22828 22829 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode)) 22830 { 22831 /* _Decimal128 must use even/odd register pairs. */ 22832 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 22833 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3; 22834 } 22835 else 22836 { 22837 first_reg = ALTIVEC_ARG_RETURN; 22838 n_regs = 1; 22839 } 22840 22841 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs); 22842 } 22843 22844 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */ 22845 if (TARGET_32BIT && TARGET_POWERPC64) 22846 switch (mode) 22847 { 22848 default: 22849 break; 22850 case E_DImode: 22851 case E_SCmode: 22852 case E_DCmode: 22853 case E_TCmode: 22854 int count = GET_MODE_SIZE (mode) / 4; 22855 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1); 22856 } 22857 22858 if ((INTEGRAL_TYPE_P (valtype) 22859 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64)) 22860 || POINTER_TYPE_P (valtype)) 22861 mode = TARGET_32BIT ? SImode : DImode; 22862 22863 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 22864 /* _Decimal128 must use an even/odd register pair. */ 22865 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 22866 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT 22867 && !FLOAT128_VECTOR_P (mode)) 22868 regno = FP_ARG_RETURN; 22869 else if (TREE_CODE (valtype) == COMPLEX_TYPE 22870 && targetm.calls.split_complex_arg) 22871 return rs6000_complex_function_value (mode); 22872 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same 22873 return register is used in both cases, and we won't see V2DImode/V2DFmode 22874 for pure altivec, combine the two cases. */ 22875 else if ((TREE_CODE (valtype) == VECTOR_TYPE || VECTOR_ALIGNMENT_P (mode)) 22876 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI 22877 && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) 22878 regno = ALTIVEC_ARG_RETURN; 22879 else 22880 regno = GP_ARG_RETURN; 22881 22882 return gen_rtx_REG (mode, regno); 22883} 22884 22885/* Define how to find the value returned by a library function 22886 assuming the value has mode MODE. */ 22887rtx 22888rs6000_libcall_value (machine_mode mode) 22889{ 22890 unsigned int regno; 22891 22892 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */ 22893 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode) 22894 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1); 22895 22896 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) 22897 /* _Decimal128 must use an even/odd register pair. */ 22898 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; 22899 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT) 22900 regno = FP_ARG_RETURN; 22901 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same 22902 return register is used in both cases, and we won't see V2DImode/V2DFmode 22903 for pure altivec, combine the two cases. */ 22904 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) 22905 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) 22906 regno = ALTIVEC_ARG_RETURN; 22907 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) 22908 return rs6000_complex_function_value (mode); 22909 else 22910 regno = GP_ARG_RETURN; 22911 22912 return gen_rtx_REG (mode, regno); 22913} 22914 22915/* Compute register pressure classes. We implement the target hook to avoid 22916 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can 22917 lead to incorrect estimates of number of available registers and therefor 22918 increased register pressure/spill. */ 22919static int 22920rs6000_compute_pressure_classes (enum reg_class *pressure_classes) 22921{ 22922 int n; 22923 22924 n = 0; 22925 pressure_classes[n++] = GENERAL_REGS; 22926 if (TARGET_VSX) 22927 pressure_classes[n++] = VSX_REGS; 22928 else 22929 { 22930 if (TARGET_ALTIVEC) 22931 pressure_classes[n++] = ALTIVEC_REGS; 22932 if (TARGET_HARD_FLOAT) 22933 pressure_classes[n++] = FLOAT_REGS; 22934 } 22935 pressure_classes[n++] = CR_REGS; 22936 pressure_classes[n++] = SPECIAL_REGS; 22937 22938 return n; 22939} 22940 22941/* Given FROM and TO register numbers, say whether this elimination is allowed. 22942 Frame pointer elimination is automatically handled. 22943 22944 For the RS/6000, if frame pointer elimination is being done, we would like 22945 to convert ap into fp, not sp. 22946 22947 We need r30 if -mminimal-toc was specified, and there are constant pool 22948 references. */ 22949 22950static bool 22951rs6000_can_eliminate (const int from, const int to) 22952{ 22953 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM 22954 ? ! frame_pointer_needed 22955 : from == RS6000_PIC_OFFSET_TABLE_REGNUM 22956 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL 22957 || constant_pool_empty_p () 22958 : true); 22959} 22960 22961/* Define the offset between two registers, FROM to be eliminated and its 22962 replacement TO, at the start of a routine. */ 22963HOST_WIDE_INT 22964rs6000_initial_elimination_offset (int from, int to) 22965{ 22966 rs6000_stack_t *info = rs6000_stack_info (); 22967 HOST_WIDE_INT offset; 22968 22969 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 22970 offset = info->push_p ? 0 : -info->total_size; 22971 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 22972 { 22973 offset = info->push_p ? 0 : -info->total_size; 22974 if (FRAME_GROWS_DOWNWARD) 22975 offset += info->fixed_size + info->vars_size + info->parm_size; 22976 } 22977 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 22978 offset = FRAME_GROWS_DOWNWARD 22979 ? info->fixed_size + info->vars_size + info->parm_size 22980 : 0; 22981 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 22982 offset = info->total_size; 22983 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 22984 offset = info->push_p ? info->total_size : 0; 22985 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM) 22986 offset = 0; 22987 else 22988 gcc_unreachable (); 22989 22990 return offset; 22991} 22992 22993/* Fill in sizes of registers used by unwinder. */ 22994 22995static void 22996rs6000_init_dwarf_reg_sizes_extra (tree address) 22997{ 22998 if (TARGET_MACHO && ! TARGET_ALTIVEC) 22999 { 23000 int i; 23001 machine_mode mode = TYPE_MODE (char_type_node); 23002 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); 23003 rtx mem = gen_rtx_MEM (BLKmode, addr); 23004 rtx value = gen_int_mode (16, mode); 23005 23006 /* On Darwin, libgcc may be built to run on both G3 and G4/5. 23007 The unwinder still needs to know the size of Altivec registers. */ 23008 23009 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) 23010 { 23011 int column = DWARF_REG_TO_UNWIND_COLUMN 23012 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); 23013 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); 23014 23015 emit_move_insn (adjust_address (mem, mode, offset), value); 23016 } 23017 } 23018} 23019 23020/* Map internal gcc register numbers to debug format register numbers. 23021 FORMAT specifies the type of debug register number to use: 23022 0 -- debug information, except for frame-related sections 23023 1 -- DWARF .debug_frame section 23024 2 -- DWARF .eh_frame section */ 23025 23026unsigned int 23027rs6000_dbx_register_number (unsigned int regno, unsigned int format) 23028{ 23029 /* On some platforms, we use the standard DWARF register 23030 numbering for .debug_info and .debug_frame. */ 23031 if ((format == 0 && write_symbols == DWARF2_DEBUG) || format == 1) 23032 { 23033#ifdef RS6000_USE_DWARF_NUMBERING 23034 if (regno <= 31) 23035 return regno; 23036 if (FP_REGNO_P (regno)) 23037 return regno - FIRST_FPR_REGNO + 32; 23038 if (ALTIVEC_REGNO_P (regno)) 23039 return regno - FIRST_ALTIVEC_REGNO + 1124; 23040 if (regno == LR_REGNO) 23041 return 108; 23042 if (regno == CTR_REGNO) 23043 return 109; 23044 if (regno == CA_REGNO) 23045 return 101; /* XER */ 23046 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has 23047 translated any combination of CR2, CR3, CR4 saves to a save of CR2. 23048 The actual code emitted saves the whole of CR, so we map CR2_REGNO 23049 to the DWARF reg for CR. */ 23050 if (format == 1 && regno == CR2_REGNO) 23051 return 64; 23052 if (CR_REGNO_P (regno)) 23053 return regno - CR0_REGNO + 86; 23054 if (regno == VRSAVE_REGNO) 23055 return 356; 23056 if (regno == VSCR_REGNO) 23057 return 67; 23058 23059 /* These do not make much sense. */ 23060 if (regno == FRAME_POINTER_REGNUM) 23061 return 111; 23062 if (regno == ARG_POINTER_REGNUM) 23063 return 67; 23064 if (regno == 64) 23065 return 100; 23066 23067 gcc_unreachable (); 23068#endif 23069 } 23070 23071 /* We use the GCC 7 (and before) internal number for non-DWARF debug 23072 information, and also for .eh_frame. */ 23073 /* Translate the regnos to their numbers in GCC 7 (and before). */ 23074 if (regno <= 31) 23075 return regno; 23076 if (FP_REGNO_P (regno)) 23077 return regno - FIRST_FPR_REGNO + 32; 23078 if (ALTIVEC_REGNO_P (regno)) 23079 return regno - FIRST_ALTIVEC_REGNO + 77; 23080 if (regno == LR_REGNO) 23081 return 65; 23082 if (regno == CTR_REGNO) 23083 return 66; 23084 if (regno == CA_REGNO) 23085 return 76; /* XER */ 23086 if (CR_REGNO_P (regno)) 23087 return regno - CR0_REGNO + 68; 23088 if (regno == VRSAVE_REGNO) 23089 return 109; 23090 if (regno == VSCR_REGNO) 23091 return 110; 23092 23093 if (regno == FRAME_POINTER_REGNUM) 23094 return 111; 23095 if (regno == ARG_POINTER_REGNUM) 23096 return 67; 23097 if (regno == 64) 23098 return 64; 23099 23100 gcc_unreachable (); 23101} 23102 23103/* target hook eh_return_filter_mode */ 23104static scalar_int_mode 23105rs6000_eh_return_filter_mode (void) 23106{ 23107 return TARGET_32BIT ? SImode : word_mode; 23108} 23109 23110/* Target hook for translate_mode_attribute. */ 23111static machine_mode 23112rs6000_translate_mode_attribute (machine_mode mode) 23113{ 23114 if ((FLOAT128_IEEE_P (mode) 23115 && ieee128_float_type_node == long_double_type_node) 23116 || (FLOAT128_IBM_P (mode) 23117 && ibm128_float_type_node == long_double_type_node)) 23118 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode; 23119 return mode; 23120} 23121 23122/* Target hook for scalar_mode_supported_p. */ 23123static bool 23124rs6000_scalar_mode_supported_p (scalar_mode mode) 23125{ 23126 /* -m32 does not support TImode. This is the default, from 23127 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the 23128 same ABI as for -m32. But default_scalar_mode_supported_p allows 23129 integer modes of precision 2 * BITS_PER_WORD, which matches TImode 23130 for -mpowerpc64. */ 23131 if (TARGET_32BIT && mode == TImode) 23132 return false; 23133 23134 if (DECIMAL_FLOAT_MODE_P (mode)) 23135 return default_decimal_float_supported_p (); 23136 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) 23137 return true; 23138 else 23139 return default_scalar_mode_supported_p (mode); 23140} 23141 23142/* Target hook for vector_mode_supported_p. */ 23143static bool 23144rs6000_vector_mode_supported_p (machine_mode mode) 23145{ 23146 /* There is no vector form for IEEE 128-bit. If we return true for IEEE 23147 128-bit, the compiler might try to widen IEEE 128-bit to IBM 23148 double-double. */ 23149 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode)) 23150 return true; 23151 23152 else 23153 return false; 23154} 23155 23156/* Target hook for floatn_mode. */ 23157static opt_scalar_float_mode 23158rs6000_floatn_mode (int n, bool extended) 23159{ 23160 if (extended) 23161 { 23162 switch (n) 23163 { 23164 case 32: 23165 return DFmode; 23166 23167 case 64: 23168 if (TARGET_FLOAT128_TYPE) 23169 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23170 else 23171 return opt_scalar_float_mode (); 23172 23173 case 128: 23174 return opt_scalar_float_mode (); 23175 23176 default: 23177 /* Those are the only valid _FloatNx types. */ 23178 gcc_unreachable (); 23179 } 23180 } 23181 else 23182 { 23183 switch (n) 23184 { 23185 case 32: 23186 return SFmode; 23187 23188 case 64: 23189 return DFmode; 23190 23191 case 128: 23192 if (TARGET_FLOAT128_TYPE) 23193 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23194 else 23195 return opt_scalar_float_mode (); 23196 23197 default: 23198 return opt_scalar_float_mode (); 23199 } 23200 } 23201 23202} 23203 23204/* Target hook for c_mode_for_suffix. */ 23205static machine_mode 23206rs6000_c_mode_for_suffix (char suffix) 23207{ 23208 if (TARGET_FLOAT128_TYPE) 23209 { 23210 if (suffix == 'q' || suffix == 'Q') 23211 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; 23212 23213 /* At the moment, we are not defining a suffix for IBM extended double. 23214 If/when the default for -mabi=ieeelongdouble is changed, and we want 23215 to support __ibm128 constants in legacy library code, we may need to 23216 re-evalaute this decision. Currently, c-lex.c only supports 'w' and 23217 'q' as machine dependent suffixes. The x86_64 port uses 'w' for 23218 __float80 constants. */ 23219 } 23220 23221 return VOIDmode; 23222} 23223 23224/* Target hook for invalid_arg_for_unprototyped_fn. */ 23225static const char * 23226invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) 23227{ 23228 return (!rs6000_darwin64_abi 23229 && typelist == 0 23230 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE 23231 && (funcdecl == NULL_TREE 23232 || (TREE_CODE (funcdecl) == FUNCTION_DECL 23233 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) 23234 ? N_("AltiVec argument passed to unprototyped function") 23235 : NULL; 23236} 23237 23238/* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register 23239 setup by using __stack_chk_fail_local hidden function instead of 23240 calling __stack_chk_fail directly. Otherwise it is better to call 23241 __stack_chk_fail directly. */ 23242 23243static tree ATTRIBUTE_UNUSED 23244rs6000_stack_protect_fail (void) 23245{ 23246 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) 23247 ? default_hidden_stack_protect_fail () 23248 : default_external_stack_protect_fail (); 23249} 23250 23251/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 23252 23253#if TARGET_ELF 23254static unsigned HOST_WIDE_INT 23255rs6000_asan_shadow_offset (void) 23256{ 23257 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29); 23258} 23259#endif 23260 23261/* Mask options that we want to support inside of attribute((target)) and 23262 #pragma GCC target operations. Note, we do not include things like 23263 64/32-bit, endianness, hard/soft floating point, etc. that would have 23264 different calling sequences. */ 23265 23266struct rs6000_opt_mask { 23267 const char *name; /* option name */ 23268 HOST_WIDE_INT mask; /* mask to set */ 23269 bool invert; /* invert sense of mask */ 23270 bool valid_target; /* option is a target option */ 23271}; 23272 23273static struct rs6000_opt_mask const rs6000_opt_masks[] = 23274{ 23275 { "altivec", OPTION_MASK_ALTIVEC, false, true }, 23276 { "cmpb", OPTION_MASK_CMPB, false, true }, 23277 { "crypto", OPTION_MASK_CRYPTO, false, true }, 23278 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, 23279 { "dlmzb", OPTION_MASK_DLMZB, false, true }, 23280 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, 23281 false, true }, 23282 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true }, 23283 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true }, 23284 { "fprnd", OPTION_MASK_FPRND, false, true }, 23285 { "power10", OPTION_MASK_POWER10, false, true }, 23286 { "hard-dfp", OPTION_MASK_DFP, false, true }, 23287 { "htm", OPTION_MASK_HTM, false, true }, 23288 { "isel", OPTION_MASK_ISEL, false, true }, 23289 { "mfcrf", OPTION_MASK_MFCRF, false, true }, 23290 { "mfpgpr", 0, false, true }, 23291 { "mma", OPTION_MASK_MMA, false, true }, 23292 { "modulo", OPTION_MASK_MODULO, false, true }, 23293 { "mulhw", OPTION_MASK_MULHW, false, true }, 23294 { "multiple", OPTION_MASK_MULTIPLE, false, true }, 23295 { "pcrel", OPTION_MASK_PCREL, false, true }, 23296 { "popcntb", OPTION_MASK_POPCNTB, false, true }, 23297 { "popcntd", OPTION_MASK_POPCNTD, false, true }, 23298 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, 23299 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, 23300 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, 23301 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, 23302 { "power9-misc", OPTION_MASK_P9_MISC, false, true }, 23303 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, 23304 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, 23305 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, 23306 { "prefixed", OPTION_MASK_PREFIXED, false, true }, 23307 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, 23308 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, 23309 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, 23310 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, 23311 { "string", 0, false, true }, 23312 { "update", OPTION_MASK_NO_UPDATE, true , true }, 23313 { "vsx", OPTION_MASK_VSX, false, true }, 23314#ifdef OPTION_MASK_64BIT 23315#if TARGET_AIX_OS 23316 { "aix64", OPTION_MASK_64BIT, false, false }, 23317 { "aix32", OPTION_MASK_64BIT, true, false }, 23318#else 23319 { "64", OPTION_MASK_64BIT, false, false }, 23320 { "32", OPTION_MASK_64BIT, true, false }, 23321#endif 23322#endif 23323#ifdef OPTION_MASK_EABI 23324 { "eabi", OPTION_MASK_EABI, false, false }, 23325#endif 23326#ifdef OPTION_MASK_LITTLE_ENDIAN 23327 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false }, 23328 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false }, 23329#endif 23330#ifdef OPTION_MASK_RELOCATABLE 23331 { "relocatable", OPTION_MASK_RELOCATABLE, false, false }, 23332#endif 23333#ifdef OPTION_MASK_STRICT_ALIGN 23334 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false }, 23335#endif 23336 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false }, 23337 { "string", 0, false, false }, 23338}; 23339 23340/* Builtin mask mapping for printing the flags. */ 23341static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = 23342{ 23343 { "altivec", RS6000_BTM_ALTIVEC, false, false }, 23344 { "vsx", RS6000_BTM_VSX, false, false }, 23345 { "fre", RS6000_BTM_FRE, false, false }, 23346 { "fres", RS6000_BTM_FRES, false, false }, 23347 { "frsqrte", RS6000_BTM_FRSQRTE, false, false }, 23348 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, 23349 { "popcntd", RS6000_BTM_POPCNTD, false, false }, 23350 { "cell", RS6000_BTM_CELL, false, false }, 23351 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, 23352 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false }, 23353 { "power9-misc", RS6000_BTM_P9_MISC, false, false }, 23354 { "crypto", RS6000_BTM_CRYPTO, false, false }, 23355 { "htm", RS6000_BTM_HTM, false, false }, 23356 { "hard-dfp", RS6000_BTM_DFP, false, false }, 23357 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, 23358 { "long-double-128", RS6000_BTM_LDBL128, false, false }, 23359 { "powerpc64", RS6000_BTM_POWERPC64, false, false }, 23360 { "float128", RS6000_BTM_FLOAT128, false, false }, 23361 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false }, 23362 { "mma", RS6000_BTM_MMA, false, false }, 23363 { "power10", RS6000_BTM_P10, false, false }, 23364}; 23365 23366/* Option variables that we want to support inside attribute((target)) and 23367 #pragma GCC target operations. */ 23368 23369struct rs6000_opt_var { 23370 const char *name; /* option name */ 23371 size_t global_offset; /* offset of the option in global_options. */ 23372 size_t target_offset; /* offset of the option in target options. */ 23373}; 23374 23375static struct rs6000_opt_var const rs6000_opt_vars[] = 23376{ 23377 { "friz", 23378 offsetof (struct gcc_options, x_TARGET_FRIZ), 23379 offsetof (struct cl_target_option, x_TARGET_FRIZ), }, 23380 { "avoid-indexed-addresses", 23381 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM), 23382 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) }, 23383 { "longcall", 23384 offsetof (struct gcc_options, x_rs6000_default_long_calls), 23385 offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, 23386 { "optimize-swaps", 23387 offsetof (struct gcc_options, x_rs6000_optimize_swaps), 23388 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, 23389 { "allow-movmisalign", 23390 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), 23391 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, 23392 { "sched-groups", 23393 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), 23394 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, 23395 { "always-hint", 23396 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), 23397 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, 23398 { "align-branch-targets", 23399 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), 23400 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, 23401 { "sched-prolog", 23402 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), 23403 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, 23404 { "sched-epilog", 23405 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), 23406 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, 23407 { "speculate-indirect-jumps", 23408 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps), 23409 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), }, 23410}; 23411 23412/* Inner function to handle attribute((target("..."))) and #pragma GCC target 23413 parsing. Return true if there were no errors. */ 23414 23415static bool 23416rs6000_inner_target_options (tree args, bool attr_p) 23417{ 23418 bool ret = true; 23419 23420 if (args == NULL_TREE) 23421 ; 23422 23423 else if (TREE_CODE (args) == STRING_CST) 23424 { 23425 char *p = ASTRDUP (TREE_STRING_POINTER (args)); 23426 char *q; 23427 23428 while ((q = strtok (p, ",")) != NULL) 23429 { 23430 bool error_p = false; 23431 bool not_valid_p = false; 23432 const char *cpu_opt = NULL; 23433 23434 p = NULL; 23435 if (strncmp (q, "cpu=", 4) == 0) 23436 { 23437 int cpu_index = rs6000_cpu_name_lookup (q+4); 23438 if (cpu_index >= 0) 23439 rs6000_cpu_index = cpu_index; 23440 else 23441 { 23442 error_p = true; 23443 cpu_opt = q+4; 23444 } 23445 } 23446 else if (strncmp (q, "tune=", 5) == 0) 23447 { 23448 int tune_index = rs6000_cpu_name_lookup (q+5); 23449 if (tune_index >= 0) 23450 rs6000_tune_index = tune_index; 23451 else 23452 { 23453 error_p = true; 23454 cpu_opt = q+5; 23455 } 23456 } 23457 else 23458 { 23459 size_t i; 23460 bool invert = false; 23461 char *r = q; 23462 23463 error_p = true; 23464 if (strncmp (r, "no-", 3) == 0) 23465 { 23466 invert = true; 23467 r += 3; 23468 } 23469 23470 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++) 23471 if (strcmp (r, rs6000_opt_masks[i].name) == 0) 23472 { 23473 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask; 23474 23475 if (!rs6000_opt_masks[i].valid_target) 23476 not_valid_p = true; 23477 else 23478 { 23479 error_p = false; 23480 rs6000_isa_flags_explicit |= mask; 23481 23482 /* VSX needs altivec, so -mvsx automagically sets 23483 altivec and disables -mavoid-indexed-addresses. */ 23484 if (!invert) 23485 { 23486 if (mask == OPTION_MASK_VSX) 23487 { 23488 mask |= OPTION_MASK_ALTIVEC; 23489 TARGET_AVOID_XFORM = 0; 23490 } 23491 } 23492 23493 if (rs6000_opt_masks[i].invert) 23494 invert = !invert; 23495 23496 if (invert) 23497 rs6000_isa_flags &= ~mask; 23498 else 23499 rs6000_isa_flags |= mask; 23500 } 23501 break; 23502 } 23503 23504 if (error_p && !not_valid_p) 23505 { 23506 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++) 23507 if (strcmp (r, rs6000_opt_vars[i].name) == 0) 23508 { 23509 size_t j = rs6000_opt_vars[i].global_offset; 23510 *((int *) ((char *)&global_options + j)) = !invert; 23511 error_p = false; 23512 not_valid_p = false; 23513 break; 23514 } 23515 } 23516 } 23517 23518 if (error_p) 23519 { 23520 const char *eprefix, *esuffix; 23521 23522 ret = false; 23523 if (attr_p) 23524 { 23525 eprefix = "__attribute__((__target__("; 23526 esuffix = ")))"; 23527 } 23528 else 23529 { 23530 eprefix = "#pragma GCC target "; 23531 esuffix = ""; 23532 } 23533 23534 if (cpu_opt) 23535 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix, 23536 q, esuffix); 23537 else if (not_valid_p) 23538 error ("%s%qs%s is not allowed", eprefix, q, esuffix); 23539 else 23540 error ("%s%qs%s is invalid", eprefix, q, esuffix); 23541 } 23542 } 23543 } 23544 23545 else if (TREE_CODE (args) == TREE_LIST) 23546 { 23547 do 23548 { 23549 tree value = TREE_VALUE (args); 23550 if (value) 23551 { 23552 bool ret2 = rs6000_inner_target_options (value, attr_p); 23553 if (!ret2) 23554 ret = false; 23555 } 23556 args = TREE_CHAIN (args); 23557 } 23558 while (args != NULL_TREE); 23559 } 23560 23561 else 23562 { 23563 error ("attribute %<target%> argument not a string"); 23564 return false; 23565 } 23566 23567 return ret; 23568} 23569 23570/* Print out the target options as a list for -mdebug=target. */ 23571 23572static void 23573rs6000_debug_target_options (tree args, const char *prefix) 23574{ 23575 if (args == NULL_TREE) 23576 fprintf (stderr, "%s<NULL>", prefix); 23577 23578 else if (TREE_CODE (args) == STRING_CST) 23579 { 23580 char *p = ASTRDUP (TREE_STRING_POINTER (args)); 23581 char *q; 23582 23583 while ((q = strtok (p, ",")) != NULL) 23584 { 23585 p = NULL; 23586 fprintf (stderr, "%s\"%s\"", prefix, q); 23587 prefix = ", "; 23588 } 23589 } 23590 23591 else if (TREE_CODE (args) == TREE_LIST) 23592 { 23593 do 23594 { 23595 tree value = TREE_VALUE (args); 23596 if (value) 23597 { 23598 rs6000_debug_target_options (value, prefix); 23599 prefix = ", "; 23600 } 23601 args = TREE_CHAIN (args); 23602 } 23603 while (args != NULL_TREE); 23604 } 23605 23606 else 23607 gcc_unreachable (); 23608 23609 return; 23610} 23611 23612 23613/* Hook to validate attribute((target("..."))). */ 23614 23615static bool 23616rs6000_valid_attribute_p (tree fndecl, 23617 tree ARG_UNUSED (name), 23618 tree args, 23619 int flags) 23620{ 23621 struct cl_target_option cur_target; 23622 bool ret; 23623 tree old_optimize; 23624 tree new_target, new_optimize; 23625 tree func_optimize; 23626 23627 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); 23628 23629 if (TARGET_DEBUG_TARGET) 23630 { 23631 tree tname = DECL_NAME (fndecl); 23632 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n"); 23633 if (tname) 23634 fprintf (stderr, "function: %.*s\n", 23635 (int) IDENTIFIER_LENGTH (tname), 23636 IDENTIFIER_POINTER (tname)); 23637 else 23638 fprintf (stderr, "function: unknown\n"); 23639 23640 fprintf (stderr, "args:"); 23641 rs6000_debug_target_options (args, " "); 23642 fprintf (stderr, "\n"); 23643 23644 if (flags) 23645 fprintf (stderr, "flags: 0x%x\n", flags); 23646 23647 fprintf (stderr, "--------------------\n"); 23648 } 23649 23650 /* attribute((target("default"))) does nothing, beyond 23651 affecting multi-versioning. */ 23652 if (TREE_VALUE (args) 23653 && TREE_CODE (TREE_VALUE (args)) == STRING_CST 23654 && TREE_CHAIN (args) == NULL_TREE 23655 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) 23656 return true; 23657 23658 old_optimize = build_optimization_node (&global_options); 23659 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 23660 23661 /* If the function changed the optimization levels as well as setting target 23662 options, start with the optimizations specified. */ 23663 if (func_optimize && func_optimize != old_optimize) 23664 cl_optimization_restore (&global_options, 23665 TREE_OPTIMIZATION (func_optimize)); 23666 23667 /* The target attributes may also change some optimization flags, so update 23668 the optimization options if necessary. */ 23669 cl_target_option_save (&cur_target, &global_options); 23670 rs6000_cpu_index = rs6000_tune_index = -1; 23671 ret = rs6000_inner_target_options (args, true); 23672 23673 /* Set up any additional state. */ 23674 if (ret) 23675 { 23676 ret = rs6000_option_override_internal (false); 23677 new_target = build_target_option_node (&global_options); 23678 } 23679 else 23680 new_target = NULL; 23681 23682 new_optimize = build_optimization_node (&global_options); 23683 23684 if (!new_target) 23685 ret = false; 23686 23687 else if (fndecl) 23688 { 23689 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; 23690 23691 if (old_optimize != new_optimize) 23692 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 23693 } 23694 23695 cl_target_option_restore (&global_options, &cur_target); 23696 23697 if (old_optimize != new_optimize) 23698 cl_optimization_restore (&global_options, 23699 TREE_OPTIMIZATION (old_optimize)); 23700 23701 return ret; 23702} 23703 23704 23705/* Hook to validate the current #pragma GCC target and set the state, and 23706 update the macros based on what was changed. If ARGS is NULL, then 23707 POP_TARGET is used to reset the options. */ 23708 23709bool 23710rs6000_pragma_target_parse (tree args, tree pop_target) 23711{ 23712 tree prev_tree = build_target_option_node (&global_options); 23713 tree cur_tree; 23714 struct cl_target_option *prev_opt, *cur_opt; 23715 HOST_WIDE_INT prev_flags, cur_flags, diff_flags; 23716 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask; 23717 23718 if (TARGET_DEBUG_TARGET) 23719 { 23720 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n"); 23721 fprintf (stderr, "args:"); 23722 rs6000_debug_target_options (args, " "); 23723 fprintf (stderr, "\n"); 23724 23725 if (pop_target) 23726 { 23727 fprintf (stderr, "pop_target:\n"); 23728 debug_tree (pop_target); 23729 } 23730 else 23731 fprintf (stderr, "pop_target: <NULL>\n"); 23732 23733 fprintf (stderr, "--------------------\n"); 23734 } 23735 23736 if (! args) 23737 { 23738 cur_tree = ((pop_target) 23739 ? pop_target 23740 : target_option_default_node); 23741 cl_target_option_restore (&global_options, 23742 TREE_TARGET_OPTION (cur_tree)); 23743 } 23744 else 23745 { 23746 rs6000_cpu_index = rs6000_tune_index = -1; 23747 if (!rs6000_inner_target_options (args, false) 23748 || !rs6000_option_override_internal (false) 23749 || (cur_tree = build_target_option_node (&global_options)) 23750 == NULL_TREE) 23751 { 23752 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) 23753 fprintf (stderr, "invalid pragma\n"); 23754 23755 return false; 23756 } 23757 } 23758 23759 target_option_current_node = cur_tree; 23760 rs6000_activate_target_options (target_option_current_node); 23761 23762 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly 23763 change the macros that are defined. */ 23764 if (rs6000_target_modify_macros_ptr) 23765 { 23766 prev_opt = TREE_TARGET_OPTION (prev_tree); 23767 prev_bumask = prev_opt->x_rs6000_builtin_mask; 23768 prev_flags = prev_opt->x_rs6000_isa_flags; 23769 23770 cur_opt = TREE_TARGET_OPTION (cur_tree); 23771 cur_flags = cur_opt->x_rs6000_isa_flags; 23772 cur_bumask = cur_opt->x_rs6000_builtin_mask; 23773 23774 diff_bumask = (prev_bumask ^ cur_bumask); 23775 diff_flags = (prev_flags ^ cur_flags); 23776 23777 if ((diff_flags != 0) || (diff_bumask != 0)) 23778 { 23779 /* Delete old macros. */ 23780 rs6000_target_modify_macros_ptr (false, 23781 prev_flags & diff_flags, 23782 prev_bumask & diff_bumask); 23783 23784 /* Define new macros. */ 23785 rs6000_target_modify_macros_ptr (true, 23786 cur_flags & diff_flags, 23787 cur_bumask & diff_bumask); 23788 } 23789 } 23790 23791 return true; 23792} 23793 23794 23795/* Remember the last target of rs6000_set_current_function. */ 23796static GTY(()) tree rs6000_previous_fndecl; 23797 23798/* Restore target's globals from NEW_TREE and invalidate the 23799 rs6000_previous_fndecl cache. */ 23800 23801void 23802rs6000_activate_target_options (tree new_tree) 23803{ 23804 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); 23805 if (TREE_TARGET_GLOBALS (new_tree)) 23806 restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); 23807 else if (new_tree == target_option_default_node) 23808 restore_target_globals (&default_target_globals); 23809 else 23810 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); 23811 rs6000_previous_fndecl = NULL_TREE; 23812} 23813 23814/* Establish appropriate back-end context for processing the function 23815 FNDECL. The argument might be NULL to indicate processing at top 23816 level, outside of any function scope. */ 23817static void 23818rs6000_set_current_function (tree fndecl) 23819{ 23820 if (TARGET_DEBUG_TARGET) 23821 { 23822 fprintf (stderr, "\n==================== rs6000_set_current_function"); 23823 23824 if (fndecl) 23825 fprintf (stderr, ", fndecl %s (%p)", 23826 (DECL_NAME (fndecl) 23827 ? IDENTIFIER_POINTER (DECL_NAME (fndecl)) 23828 : "<unknown>"), (void *)fndecl); 23829 23830 if (rs6000_previous_fndecl) 23831 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); 23832 23833 fprintf (stderr, "\n"); 23834 } 23835 23836 /* Only change the context if the function changes. This hook is called 23837 several times in the course of compiling a function, and we don't want to 23838 slow things down too much or call target_reinit when it isn't safe. */ 23839 if (fndecl == rs6000_previous_fndecl) 23840 return; 23841 23842 tree old_tree; 23843 if (rs6000_previous_fndecl == NULL_TREE) 23844 old_tree = target_option_current_node; 23845 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)) 23846 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl); 23847 else 23848 old_tree = target_option_default_node; 23849 23850 tree new_tree; 23851 if (fndecl == NULL_TREE) 23852 { 23853 if (old_tree != target_option_current_node) 23854 new_tree = target_option_current_node; 23855 else 23856 new_tree = NULL_TREE; 23857 } 23858 else 23859 { 23860 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); 23861 if (new_tree == NULL_TREE) 23862 new_tree = target_option_default_node; 23863 } 23864 23865 if (TARGET_DEBUG_TARGET) 23866 { 23867 if (new_tree) 23868 { 23869 fprintf (stderr, "\nnew fndecl target specific options:\n"); 23870 debug_tree (new_tree); 23871 } 23872 23873 if (old_tree) 23874 { 23875 fprintf (stderr, "\nold fndecl target specific options:\n"); 23876 debug_tree (old_tree); 23877 } 23878 23879 if (old_tree != NULL_TREE || new_tree != NULL_TREE) 23880 fprintf (stderr, "--------------------\n"); 23881 } 23882 23883 if (new_tree && old_tree != new_tree) 23884 rs6000_activate_target_options (new_tree); 23885 23886 if (fndecl) 23887 rs6000_previous_fndecl = fndecl; 23888} 23889 23890 23891/* Save the current options */ 23892 23893static void 23894rs6000_function_specific_save (struct cl_target_option *ptr, 23895 struct gcc_options *opts) 23896{ 23897 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; 23898 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; 23899} 23900 23901/* Restore the current options */ 23902 23903static void 23904rs6000_function_specific_restore (struct gcc_options *opts, 23905 struct cl_target_option *ptr) 23906 23907{ 23908 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; 23909 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; 23910 (void) rs6000_option_override_internal (false); 23911} 23912 23913/* Print the current options */ 23914 23915static void 23916rs6000_function_specific_print (FILE *file, int indent, 23917 struct cl_target_option *ptr) 23918{ 23919 rs6000_print_isa_options (file, indent, "Isa options set", 23920 ptr->x_rs6000_isa_flags); 23921 23922 rs6000_print_isa_options (file, indent, "Isa options explicit", 23923 ptr->x_rs6000_isa_flags_explicit); 23924} 23925 23926/* Helper function to print the current isa or misc options on a line. */ 23927 23928static void 23929rs6000_print_options_internal (FILE *file, 23930 int indent, 23931 const char *string, 23932 HOST_WIDE_INT flags, 23933 const char *prefix, 23934 const struct rs6000_opt_mask *opts, 23935 size_t num_elements) 23936{ 23937 size_t i; 23938 size_t start_column = 0; 23939 size_t cur_column; 23940 size_t max_column = 120; 23941 size_t prefix_len = strlen (prefix); 23942 size_t comma_len = 0; 23943 const char *comma = ""; 23944 23945 if (indent) 23946 start_column += fprintf (file, "%*s", indent, ""); 23947 23948 if (!flags) 23949 { 23950 fprintf (stderr, DEBUG_FMT_S, string, "<none>"); 23951 return; 23952 } 23953 23954 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags); 23955 23956 /* Print the various mask options. */ 23957 cur_column = start_column; 23958 for (i = 0; i < num_elements; i++) 23959 { 23960 bool invert = opts[i].invert; 23961 const char *name = opts[i].name; 23962 const char *no_str = ""; 23963 HOST_WIDE_INT mask = opts[i].mask; 23964 size_t len = comma_len + prefix_len + strlen (name); 23965 23966 if (!invert) 23967 { 23968 if ((flags & mask) == 0) 23969 { 23970 no_str = "no-"; 23971 len += strlen ("no-"); 23972 } 23973 23974 flags &= ~mask; 23975 } 23976 23977 else 23978 { 23979 if ((flags & mask) != 0) 23980 { 23981 no_str = "no-"; 23982 len += strlen ("no-"); 23983 } 23984 23985 flags |= mask; 23986 } 23987 23988 cur_column += len; 23989 if (cur_column > max_column) 23990 { 23991 fprintf (stderr, ", \\\n%*s", (int)start_column, ""); 23992 cur_column = start_column + len; 23993 comma = ""; 23994 } 23995 23996 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name); 23997 comma = ", "; 23998 comma_len = strlen (", "); 23999 } 24000 24001 fputs ("\n", file); 24002} 24003 24004/* Helper function to print the current isa options on a line. */ 24005 24006static void 24007rs6000_print_isa_options (FILE *file, int indent, const char *string, 24008 HOST_WIDE_INT flags) 24009{ 24010 rs6000_print_options_internal (file, indent, string, flags, "-m", 24011 &rs6000_opt_masks[0], 24012 ARRAY_SIZE (rs6000_opt_masks)); 24013} 24014 24015static void 24016rs6000_print_builtin_options (FILE *file, int indent, const char *string, 24017 HOST_WIDE_INT flags) 24018{ 24019 rs6000_print_options_internal (file, indent, string, flags, "", 24020 &rs6000_builtin_mask_names[0], 24021 ARRAY_SIZE (rs6000_builtin_mask_names)); 24022} 24023 24024/* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06, 24025 2.07, and 3.0 options that relate to the vector unit (-mdirect-move, 24026 -mupper-regs-df, etc.). 24027 24028 If the user used -mno-power8-vector, we need to turn off all of the implicit 24029 ISA 2.07 and 3.0 options that relate to the vector unit. 24030 24031 If the user used -mno-power9-vector, we need to turn off all of the implicit 24032 ISA 3.0 options that relate to the vector unit. 24033 24034 This function does not handle explicit options such as the user specifying 24035 -mdirect-move. These are handled in rs6000_option_override_internal, and 24036 the appropriate error is given if needed. 24037 24038 We return a mask of all of the implicit options that should not be enabled 24039 by default. */ 24040 24041static HOST_WIDE_INT 24042rs6000_disable_incompatible_switches (void) 24043{ 24044 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit; 24045 size_t i, j; 24046 24047 static const struct { 24048 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */ 24049 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */ 24050 const char *const name; /* name of the switch. */ 24051 } flags[] = { 24052 { OPTION_MASK_POWER10, OTHER_POWER10_MASKS, "power10" }, 24053 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" }, 24054 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" }, 24055 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" }, 24056 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" }, 24057 }; 24058 24059 for (i = 0; i < ARRAY_SIZE (flags); i++) 24060 { 24061 HOST_WIDE_INT no_flag = flags[i].no_flag; 24062 24063 if ((rs6000_isa_flags & no_flag) == 0 24064 && (rs6000_isa_flags_explicit & no_flag) != 0) 24065 { 24066 HOST_WIDE_INT dep_flags = flags[i].dep_flags; 24067 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit 24068 & rs6000_isa_flags 24069 & dep_flags); 24070 24071 if (set_flags) 24072 { 24073 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++) 24074 if ((set_flags & rs6000_opt_masks[j].mask) != 0) 24075 { 24076 set_flags &= ~rs6000_opt_masks[j].mask; 24077 error ("%<-mno-%s%> turns off %<-m%s%>", 24078 flags[i].name, 24079 rs6000_opt_masks[j].name); 24080 } 24081 24082 gcc_assert (!set_flags); 24083 } 24084 24085 rs6000_isa_flags &= ~dep_flags; 24086 ignore_masks |= no_flag | dep_flags; 24087 } 24088 } 24089 24090 return ignore_masks; 24091} 24092 24093 24094/* Helper function for printing the function name when debugging. */ 24095 24096static const char * 24097get_decl_name (tree fn) 24098{ 24099 tree name; 24100 24101 if (!fn) 24102 return "<null>"; 24103 24104 name = DECL_NAME (fn); 24105 if (!name) 24106 return "<no-name>"; 24107 24108 return IDENTIFIER_POINTER (name); 24109} 24110 24111/* Return the clone id of the target we are compiling code for in a target 24112 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives 24113 the priority list for the target clones (ordered from lowest to 24114 highest). */ 24115 24116static int 24117rs6000_clone_priority (tree fndecl) 24118{ 24119 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); 24120 HOST_WIDE_INT isa_masks; 24121 int ret = CLONE_DEFAULT; 24122 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl)); 24123 const char *attrs_str = NULL; 24124 24125 attrs = TREE_VALUE (TREE_VALUE (attrs)); 24126 attrs_str = TREE_STRING_POINTER (attrs); 24127 24128 /* Return priority zero for default function. Return the ISA needed for the 24129 function if it is not the default. */ 24130 if (strcmp (attrs_str, "default") != 0) 24131 { 24132 if (fn_opts == NULL_TREE) 24133 fn_opts = target_option_default_node; 24134 24135 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts)) 24136 isa_masks = rs6000_isa_flags; 24137 else 24138 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags; 24139 24140 for (ret = CLONE_MAX - 1; ret != 0; ret--) 24141 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0) 24142 break; 24143 } 24144 24145 if (TARGET_DEBUG_TARGET) 24146 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n", 24147 get_decl_name (fndecl), ret); 24148 24149 return ret; 24150} 24151 24152/* This compares the priority of target features in function DECL1 and DECL2. 24153 It returns positive value if DECL1 is higher priority, negative value if 24154 DECL2 is higher priority and 0 if they are the same. Note, priorities are 24155 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */ 24156 24157static int 24158rs6000_compare_version_priority (tree decl1, tree decl2) 24159{ 24160 int priority1 = rs6000_clone_priority (decl1); 24161 int priority2 = rs6000_clone_priority (decl2); 24162 int ret = priority1 - priority2; 24163 24164 if (TARGET_DEBUG_TARGET) 24165 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n", 24166 get_decl_name (decl1), get_decl_name (decl2), ret); 24167 24168 return ret; 24169} 24170 24171/* Make a dispatcher declaration for the multi-versioned function DECL. 24172 Calls to DECL function will be replaced with calls to the dispatcher 24173 by the front-end. Returns the decl of the dispatcher function. */ 24174 24175static tree 24176rs6000_get_function_versions_dispatcher (void *decl) 24177{ 24178 tree fn = (tree) decl; 24179 struct cgraph_node *node = NULL; 24180 struct cgraph_node *default_node = NULL; 24181 struct cgraph_function_version_info *node_v = NULL; 24182 struct cgraph_function_version_info *first_v = NULL; 24183 24184 tree dispatch_decl = NULL; 24185 24186 struct cgraph_function_version_info *default_version_info = NULL; 24187 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); 24188 24189 if (TARGET_DEBUG_TARGET) 24190 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n", 24191 get_decl_name (fn)); 24192 24193 node = cgraph_node::get (fn); 24194 gcc_assert (node != NULL); 24195 24196 node_v = node->function_version (); 24197 gcc_assert (node_v != NULL); 24198 24199 if (node_v->dispatcher_resolver != NULL) 24200 return node_v->dispatcher_resolver; 24201 24202 /* Find the default version and make it the first node. */ 24203 first_v = node_v; 24204 /* Go to the beginning of the chain. */ 24205 while (first_v->prev != NULL) 24206 first_v = first_v->prev; 24207 24208 default_version_info = first_v; 24209 while (default_version_info != NULL) 24210 { 24211 const tree decl2 = default_version_info->this_node->decl; 24212 if (is_function_default_version (decl2)) 24213 break; 24214 default_version_info = default_version_info->next; 24215 } 24216 24217 /* If there is no default node, just return NULL. */ 24218 if (default_version_info == NULL) 24219 return NULL; 24220 24221 /* Make default info the first node. */ 24222 if (first_v != default_version_info) 24223 { 24224 default_version_info->prev->next = default_version_info->next; 24225 if (default_version_info->next) 24226 default_version_info->next->prev = default_version_info->prev; 24227 first_v->prev = default_version_info; 24228 default_version_info->next = first_v; 24229 default_version_info->prev = NULL; 24230 } 24231 24232 default_node = default_version_info->this_node; 24233 24234#ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB 24235 error_at (DECL_SOURCE_LOCATION (default_node->decl), 24236 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that " 24237 "exports hardware capability bits"); 24238#else 24239 24240 if (targetm.has_ifunc_p ()) 24241 { 24242 struct cgraph_function_version_info *it_v = NULL; 24243 struct cgraph_node *dispatcher_node = NULL; 24244 struct cgraph_function_version_info *dispatcher_version_info = NULL; 24245 24246 /* Right now, the dispatching is done via ifunc. */ 24247 dispatch_decl = make_dispatcher_decl (default_node->decl); 24248 24249 dispatcher_node = cgraph_node::get_create (dispatch_decl); 24250 gcc_assert (dispatcher_node != NULL); 24251 dispatcher_node->dispatcher_function = 1; 24252 dispatcher_version_info 24253 = dispatcher_node->insert_new_function_version (); 24254 dispatcher_version_info->next = default_version_info; 24255 dispatcher_node->definition = 1; 24256 24257 /* Set the dispatcher for all the versions. */ 24258 it_v = default_version_info; 24259 while (it_v != NULL) 24260 { 24261 it_v->dispatcher_resolver = dispatch_decl; 24262 it_v = it_v->next; 24263 } 24264 } 24265 else 24266 { 24267 error_at (DECL_SOURCE_LOCATION (default_node->decl), 24268 "multiversioning needs ifunc which is not supported " 24269 "on this target"); 24270 } 24271#endif 24272 24273 return dispatch_decl; 24274} 24275 24276/* Make the resolver function decl to dispatch the versions of a multi- 24277 versioned function, DEFAULT_DECL. Create an empty basic block in the 24278 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver 24279 function. */ 24280 24281static tree 24282make_resolver_func (const tree default_decl, 24283 const tree dispatch_decl, 24284 basic_block *empty_bb) 24285{ 24286 /* Make the resolver function static. The resolver function returns 24287 void *. */ 24288 tree decl_name = clone_function_name (default_decl, "resolver"); 24289 const char *resolver_name = IDENTIFIER_POINTER (decl_name); 24290 tree type = build_function_type_list (ptr_type_node, NULL_TREE); 24291 tree decl = build_fn_decl (resolver_name, type); 24292 SET_DECL_ASSEMBLER_NAME (decl, decl_name); 24293 24294 DECL_NAME (decl) = decl_name; 24295 TREE_USED (decl) = 1; 24296 DECL_ARTIFICIAL (decl) = 1; 24297 DECL_IGNORED_P (decl) = 0; 24298 TREE_PUBLIC (decl) = 0; 24299 DECL_UNINLINABLE (decl) = 1; 24300 24301 /* Resolver is not external, body is generated. */ 24302 DECL_EXTERNAL (decl) = 0; 24303 DECL_EXTERNAL (dispatch_decl) = 0; 24304 24305 DECL_CONTEXT (decl) = NULL_TREE; 24306 DECL_INITIAL (decl) = make_node (BLOCK); 24307 DECL_STATIC_CONSTRUCTOR (decl) = 0; 24308 24309 if (DECL_COMDAT_GROUP (default_decl) 24310 || TREE_PUBLIC (default_decl)) 24311 { 24312 /* In this case, each translation unit with a call to this 24313 versioned function will put out a resolver. Ensure it 24314 is comdat to keep just one copy. */ 24315 DECL_COMDAT (decl) = 1; 24316 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 24317 } 24318 else 24319 TREE_PUBLIC (dispatch_decl) = 0; 24320 24321 /* Build result decl and add to function_decl. */ 24322 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); 24323 DECL_CONTEXT (t) = decl; 24324 DECL_ARTIFICIAL (t) = 1; 24325 DECL_IGNORED_P (t) = 1; 24326 DECL_RESULT (decl) = t; 24327 24328 gimplify_function_tree (decl); 24329 push_cfun (DECL_STRUCT_FUNCTION (decl)); 24330 *empty_bb = init_lowered_empty_function (decl, false, 24331 profile_count::uninitialized ()); 24332 24333 cgraph_node::add_new_function (decl, true); 24334 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); 24335 24336 pop_cfun (); 24337 24338 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */ 24339 DECL_ATTRIBUTES (dispatch_decl) 24340 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl)); 24341 24342 cgraph_node::create_same_body_alias (dispatch_decl, decl); 24343 24344 return decl; 24345} 24346 24347/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to 24348 return a pointer to VERSION_DECL if we are running on a machine that 24349 supports the index CLONE_ISA hardware architecture bits. This function will 24350 be called during version dispatch to decide which function version to 24351 execute. It returns the basic block at the end, to which more conditions 24352 can be added. */ 24353 24354static basic_block 24355add_condition_to_bb (tree function_decl, tree version_decl, 24356 int clone_isa, basic_block new_bb) 24357{ 24358 push_cfun (DECL_STRUCT_FUNCTION (function_decl)); 24359 24360 gcc_assert (new_bb != NULL); 24361 gimple_seq gseq = bb_seq (new_bb); 24362 24363 24364 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node, 24365 build_fold_addr_expr (version_decl)); 24366 tree result_var = create_tmp_var (ptr_type_node); 24367 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr); 24368 gimple *return_stmt = gimple_build_return (result_var); 24369 24370 if (clone_isa == CLONE_DEFAULT) 24371 { 24372 gimple_seq_add_stmt (&gseq, convert_stmt); 24373 gimple_seq_add_stmt (&gseq, return_stmt); 24374 set_bb_seq (new_bb, gseq); 24375 gimple_set_bb (convert_stmt, new_bb); 24376 gimple_set_bb (return_stmt, new_bb); 24377 pop_cfun (); 24378 return new_bb; 24379 } 24380 24381 tree bool_zero = build_int_cst (bool_int_type_node, 0); 24382 tree cond_var = create_tmp_var (bool_int_type_node); 24383 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS]; 24384 const char *arg_str = rs6000_clone_map[clone_isa].name; 24385 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); 24386 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); 24387 gimple_call_set_lhs (call_cond_stmt, cond_var); 24388 24389 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); 24390 gimple_set_bb (call_cond_stmt, new_bb); 24391 gimple_seq_add_stmt (&gseq, call_cond_stmt); 24392 24393 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero, 24394 NULL_TREE, NULL_TREE); 24395 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); 24396 gimple_set_bb (if_else_stmt, new_bb); 24397 gimple_seq_add_stmt (&gseq, if_else_stmt); 24398 24399 gimple_seq_add_stmt (&gseq, convert_stmt); 24400 gimple_seq_add_stmt (&gseq, return_stmt); 24401 set_bb_seq (new_bb, gseq); 24402 24403 basic_block bb1 = new_bb; 24404 edge e12 = split_block (bb1, if_else_stmt); 24405 basic_block bb2 = e12->dest; 24406 e12->flags &= ~EDGE_FALLTHRU; 24407 e12->flags |= EDGE_TRUE_VALUE; 24408 24409 edge e23 = split_block (bb2, return_stmt); 24410 gimple_set_bb (convert_stmt, bb2); 24411 gimple_set_bb (return_stmt, bb2); 24412 24413 basic_block bb3 = e23->dest; 24414 make_edge (bb1, bb3, EDGE_FALSE_VALUE); 24415 24416 remove_edge (e23); 24417 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); 24418 24419 pop_cfun (); 24420 return bb3; 24421} 24422 24423/* This function generates the dispatch function for multi-versioned functions. 24424 DISPATCH_DECL is the function which will contain the dispatch logic. 24425 FNDECLS are the function choices for dispatch, and is a tree chain. 24426 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch 24427 code is generated. */ 24428 24429static int 24430dispatch_function_versions (tree dispatch_decl, 24431 void *fndecls_p, 24432 basic_block *empty_bb) 24433{ 24434 int ix; 24435 tree ele; 24436 vec<tree> *fndecls; 24437 tree clones[CLONE_MAX]; 24438 24439 if (TARGET_DEBUG_TARGET) 24440 fputs ("dispatch_function_versions, top\n", stderr); 24441 24442 gcc_assert (dispatch_decl != NULL 24443 && fndecls_p != NULL 24444 && empty_bb != NULL); 24445 24446 /* fndecls_p is actually a vector. */ 24447 fndecls = static_cast<vec<tree> *> (fndecls_p); 24448 24449 /* At least one more version other than the default. */ 24450 gcc_assert (fndecls->length () >= 2); 24451 24452 /* The first version in the vector is the default decl. */ 24453 memset ((void *) clones, '\0', sizeof (clones)); 24454 clones[CLONE_DEFAULT] = (*fndecls)[0]; 24455 24456 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP 24457 on the PowerPC (on the x86_64, it is not a NOP). The builtin function 24458 __builtin_cpu_support ensures that the TOC fields are setup by requiring a 24459 recent glibc. If we ever need to call __builtin_cpu_init, we would need 24460 to insert the code here to do the call. */ 24461 24462 for (ix = 1; fndecls->iterate (ix, &ele); ++ix) 24463 { 24464 int priority = rs6000_clone_priority (ele); 24465 if (!clones[priority]) 24466 clones[priority] = ele; 24467 } 24468 24469 for (ix = CLONE_MAX - 1; ix >= 0; ix--) 24470 if (clones[ix]) 24471 { 24472 if (TARGET_DEBUG_TARGET) 24473 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n", 24474 ix, get_decl_name (clones[ix])); 24475 24476 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix, 24477 *empty_bb); 24478 } 24479 24480 return 0; 24481} 24482 24483/* Generate the dispatching code body to dispatch multi-versioned function 24484 DECL. The target hook is called to process the "target" attributes and 24485 provide the code to dispatch the right function at run-time. NODE points 24486 to the dispatcher decl whose body will be created. */ 24487 24488static tree 24489rs6000_generate_version_dispatcher_body (void *node_p) 24490{ 24491 tree resolver; 24492 basic_block empty_bb; 24493 struct cgraph_node *node = (cgraph_node *) node_p; 24494 struct cgraph_function_version_info *ninfo = node->function_version (); 24495 24496 if (ninfo->dispatcher_resolver) 24497 return ninfo->dispatcher_resolver; 24498 24499 /* node is going to be an alias, so remove the finalized bit. */ 24500 node->definition = false; 24501 24502 /* The first version in the chain corresponds to the default version. */ 24503 ninfo->dispatcher_resolver = resolver 24504 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb); 24505 24506 if (TARGET_DEBUG_TARGET) 24507 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n", 24508 get_decl_name (resolver)); 24509 24510 push_cfun (DECL_STRUCT_FUNCTION (resolver)); 24511 auto_vec<tree, 2> fn_ver_vec; 24512 24513 for (struct cgraph_function_version_info *vinfo = ninfo->next; 24514 vinfo; 24515 vinfo = vinfo->next) 24516 { 24517 struct cgraph_node *version = vinfo->this_node; 24518 /* Check for virtual functions here again, as by this time it should 24519 have been determined if this function needs a vtable index or 24520 not. This happens for methods in derived classes that override 24521 virtual methods in base classes but are not explicitly marked as 24522 virtual. */ 24523 if (DECL_VINDEX (version->decl)) 24524 sorry ("Virtual function multiversioning not supported"); 24525 24526 fn_ver_vec.safe_push (version->decl); 24527 } 24528 24529 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb); 24530 cgraph_edge::rebuild_edges (); 24531 pop_cfun (); 24532 return resolver; 24533} 24534 24535 24536/* Hook to determine if one function can safely inline another. */ 24537 24538static bool 24539rs6000_can_inline_p (tree caller, tree callee) 24540{ 24541 bool ret = false; 24542 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 24543 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 24544 24545 /* If the callee has no option attributes, then it is ok to inline. */ 24546 if (!callee_tree) 24547 ret = true; 24548 24549 else 24550 { 24551 HOST_WIDE_INT caller_isa; 24552 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 24553 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags; 24554 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit; 24555 24556 /* If the caller has option attributes, then use them. 24557 Otherwise, use the command line options. */ 24558 if (caller_tree) 24559 caller_isa = TREE_TARGET_OPTION (caller_tree)->x_rs6000_isa_flags; 24560 else 24561 caller_isa = rs6000_isa_flags; 24562 24563 /* Ignore the -mpower8-fusion option for inlining purposes. */ 24564 callee_isa &= ~OPTION_MASK_P8_FUSION; 24565 explicit_isa &= ~OPTION_MASK_P8_FUSION; 24566 24567 /* The callee's options must be a subset of the caller's options, i.e. 24568 a vsx function may inline an altivec function, but a no-vsx function 24569 must not inline a vsx function. However, for those options that the 24570 callee has explicitly enabled or disabled, then we must enforce that 24571 the callee's and caller's options match exactly; see PR70010. */ 24572 if (((caller_isa & callee_isa) == callee_isa) 24573 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa)) 24574 ret = true; 24575 } 24576 24577 if (TARGET_DEBUG_TARGET) 24578 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n", 24579 get_decl_name (caller), get_decl_name (callee), 24580 (ret ? "can" : "cannot")); 24581 24582 return ret; 24583} 24584 24585/* Allocate a stack temp and fixup the address so it meets the particular 24586 memory requirements (either offetable or REG+REG addressing). */ 24587 24588rtx 24589rs6000_allocate_stack_temp (machine_mode mode, 24590 bool offsettable_p, 24591 bool reg_reg_p) 24592{ 24593 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 24594 rtx addr = XEXP (stack, 0); 24595 int strict_p = reload_completed; 24596 24597 if (!legitimate_indirect_address_p (addr, strict_p)) 24598 { 24599 if (offsettable_p 24600 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true)) 24601 stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); 24602 24603 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) 24604 stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); 24605 } 24606 24607 return stack; 24608} 24609 24610/* Given a memory reference, if it is not a reg or reg+reg addressing, 24611 convert to such a form to deal with memory reference instructions 24612 like STFIWX and LDBRX that only take reg+reg addressing. */ 24613 24614rtx 24615rs6000_force_indexed_or_indirect_mem (rtx x) 24616{ 24617 machine_mode mode = GET_MODE (x); 24618 24619 gcc_assert (MEM_P (x)); 24620 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode)) 24621 { 24622 rtx addr = XEXP (x, 0); 24623 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) 24624 { 24625 rtx reg = XEXP (addr, 0); 24626 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); 24627 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); 24628 gcc_assert (REG_P (reg)); 24629 emit_insn (gen_add3_insn (reg, reg, size_rtx)); 24630 addr = reg; 24631 } 24632 else if (GET_CODE (addr) == PRE_MODIFY) 24633 { 24634 rtx reg = XEXP (addr, 0); 24635 rtx expr = XEXP (addr, 1); 24636 gcc_assert (REG_P (reg)); 24637 gcc_assert (GET_CODE (expr) == PLUS); 24638 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); 24639 addr = reg; 24640 } 24641 24642 if (GET_CODE (addr) == PLUS) 24643 { 24644 rtx op0 = XEXP (addr, 0); 24645 rtx op1 = XEXP (addr, 1); 24646 op0 = force_reg (Pmode, op0); 24647 op1 = force_reg (Pmode, op1); 24648 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1)); 24649 } 24650 else 24651 x = replace_equiv_address (x, force_reg (Pmode, addr)); 24652 } 24653 24654 return x; 24655} 24656 24657/* Implement TARGET_LEGITIMATE_CONSTANT_P. 24658 24659 On the RS/6000, all integer constants are acceptable, most won't be valid 24660 for particular insns, though. Only easy FP constants are acceptable. */ 24661 24662static bool 24663rs6000_legitimate_constant_p (machine_mode mode, rtx x) 24664{ 24665 if (TARGET_ELF && tls_referenced_p (x)) 24666 return false; 24667 24668 if (CONST_DOUBLE_P (x)) 24669 return easy_fp_constant (x, mode); 24670 24671 if (GET_CODE (x) == CONST_VECTOR) 24672 return easy_vector_constant (x, mode); 24673 24674 return true; 24675} 24676 24677 24678/* Return TRUE iff the sequence ending in LAST sets the static chain. */ 24679 24680static bool 24681chain_already_loaded (rtx_insn *last) 24682{ 24683 for (; last != NULL; last = PREV_INSN (last)) 24684 { 24685 if (NONJUMP_INSN_P (last)) 24686 { 24687 rtx patt = PATTERN (last); 24688 24689 if (GET_CODE (patt) == SET) 24690 { 24691 rtx lhs = XEXP (patt, 0); 24692 24693 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM) 24694 return true; 24695 } 24696 } 24697 } 24698 return false; 24699} 24700 24701/* Expand code to perform a call under the AIX or ELFv2 ABI. */ 24702 24703void 24704rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 24705{ 24706 rtx func = func_desc; 24707 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM); 24708 rtx toc_load = NULL_RTX; 24709 rtx toc_restore = NULL_RTX; 24710 rtx func_addr; 24711 rtx abi_reg = NULL_RTX; 24712 rtx call[5]; 24713 int n_call; 24714 rtx insn; 24715 bool is_pltseq_longcall; 24716 24717 if (global_tlsarg) 24718 tlsarg = global_tlsarg; 24719 24720 /* Handle longcall attributes. */ 24721 is_pltseq_longcall = false; 24722 if ((INTVAL (cookie) & CALL_LONG) != 0 24723 && GET_CODE (func_desc) == SYMBOL_REF) 24724 { 24725 func = rs6000_longcall_ref (func_desc, tlsarg); 24726 if (TARGET_PLTSEQ) 24727 is_pltseq_longcall = true; 24728 } 24729 24730 /* Handle indirect calls. */ 24731 if (!SYMBOL_REF_P (func) 24732 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func))) 24733 { 24734 if (!rs6000_pcrel_p (cfun)) 24735 { 24736 /* Save the TOC into its reserved slot before the call, 24737 and prepare to restore it after the call. */ 24738 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT); 24739 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode, 24740 gen_rtvec (1, stack_toc_offset), 24741 UNSPEC_TOCSLOT); 24742 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec); 24743 24744 /* Can we optimize saving the TOC in the prologue or 24745 do we need to do it at every call? */ 24746 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) 24747 cfun->machine->save_toc_in_prologue = true; 24748 else 24749 { 24750 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 24751 rtx stack_toc_mem = gen_frame_mem (Pmode, 24752 gen_rtx_PLUS (Pmode, stack_ptr, 24753 stack_toc_offset)); 24754 MEM_VOLATILE_P (stack_toc_mem) = 1; 24755 if (is_pltseq_longcall) 24756 { 24757 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg); 24758 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 24759 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg)); 24760 } 24761 else 24762 emit_move_insn (stack_toc_mem, toc_reg); 24763 } 24764 } 24765 24766 if (DEFAULT_ABI == ABI_ELFv2) 24767 { 24768 /* A function pointer in the ELFv2 ABI is just a plain address, but 24769 the ABI requires it to be loaded into r12 before the call. */ 24770 func_addr = gen_rtx_REG (Pmode, 12); 24771 if (!rtx_equal_p (func_addr, func)) 24772 emit_move_insn (func_addr, func); 24773 abi_reg = func_addr; 24774 /* Indirect calls via CTR are strongly preferred over indirect 24775 calls via LR, so move the address there. Needed to mark 24776 this insn for linker plt sequence editing too. */ 24777 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 24778 if (is_pltseq_longcall) 24779 { 24780 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg); 24781 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 24782 emit_insn (gen_rtx_SET (func_addr, mark_func)); 24783 v = gen_rtvec (2, func_addr, func_desc); 24784 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 24785 } 24786 else 24787 emit_move_insn (func_addr, abi_reg); 24788 } 24789 else 24790 { 24791 /* A function pointer under AIX is a pointer to a data area whose 24792 first word contains the actual address of the function, whose 24793 second word contains a pointer to its TOC, and whose third word 24794 contains a value to place in the static chain register (r11). 24795 Note that if we load the static chain, our "trampoline" need 24796 not have any executable code. */ 24797 24798 /* Load up address of the actual function. */ 24799 func = force_reg (Pmode, func); 24800 func_addr = gen_reg_rtx (Pmode); 24801 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func)); 24802 24803 /* Indirect calls via CTR are strongly preferred over indirect 24804 calls via LR, so move the address there. */ 24805 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO); 24806 emit_move_insn (ctr_reg, func_addr); 24807 func_addr = ctr_reg; 24808 24809 /* Prepare to load the TOC of the called function. Note that the 24810 TOC load must happen immediately before the actual call so 24811 that unwinding the TOC registers works correctly. See the 24812 comment in frob_update_context. */ 24813 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode)); 24814 rtx func_toc_mem = gen_rtx_MEM (Pmode, 24815 gen_rtx_PLUS (Pmode, func, 24816 func_toc_offset)); 24817 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem); 24818 24819 /* If we have a static chain, load it up. But, if the call was 24820 originally direct, the 3rd word has not been written since no 24821 trampoline has been built, so we ought not to load it, lest we 24822 override a static chain value. */ 24823 if (!(GET_CODE (func_desc) == SYMBOL_REF 24824 && SYMBOL_REF_FUNCTION_P (func_desc)) 24825 && TARGET_POINTERS_TO_NESTED_FUNCTIONS 24826 && !chain_already_loaded (get_current_sequence ()->next->last)) 24827 { 24828 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); 24829 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode)); 24830 rtx func_sc_mem = gen_rtx_MEM (Pmode, 24831 gen_rtx_PLUS (Pmode, func, 24832 func_sc_offset)); 24833 emit_move_insn (sc_reg, func_sc_mem); 24834 abi_reg = sc_reg; 24835 } 24836 } 24837 } 24838 else 24839 { 24840 /* No TOC register needed for calls from PC-relative callers. */ 24841 if (!rs6000_pcrel_p (cfun)) 24842 /* Direct calls use the TOC: for local calls, the callee will 24843 assume the TOC register is set; for non-local calls, the 24844 PLT stub needs the TOC register. */ 24845 abi_reg = toc_reg; 24846 func_addr = func; 24847 } 24848 24849 /* Create the call. */ 24850 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 24851 if (value != NULL_RTX) 24852 call[0] = gen_rtx_SET (value, call[0]); 24853 call[1] = gen_rtx_USE (VOIDmode, cookie); 24854 n_call = 2; 24855 24856 if (toc_load) 24857 call[n_call++] = toc_load; 24858 if (toc_restore) 24859 call[n_call++] = toc_restore; 24860 24861 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO); 24862 24863 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call)); 24864 insn = emit_call_insn (insn); 24865 24866 /* Mention all registers defined by the ABI to hold information 24867 as uses in CALL_INSN_FUNCTION_USAGE. */ 24868 if (abi_reg) 24869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 24870} 24871 24872/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */ 24873 24874void 24875rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 24876{ 24877 rtx call[2]; 24878 rtx insn; 24879 24880 gcc_assert (INTVAL (cookie) == 0); 24881 24882 if (global_tlsarg) 24883 tlsarg = global_tlsarg; 24884 24885 /* Create the call. */ 24886 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), tlsarg); 24887 if (value != NULL_RTX) 24888 call[0] = gen_rtx_SET (value, call[0]); 24889 24890 call[1] = simple_return_rtx; 24891 24892 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call)); 24893 insn = emit_call_insn (insn); 24894 24895 /* Note use of the TOC register. */ 24896 if (!rs6000_pcrel_p (cfun)) 24897 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), 24898 gen_rtx_REG (Pmode, TOC_REGNUM)); 24899} 24900 24901/* Expand code to perform a call under the SYSV4 ABI. */ 24902 24903void 24904rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 24905{ 24906 rtx func = func_desc; 24907 rtx func_addr; 24908 rtx call[4]; 24909 rtx insn; 24910 rtx abi_reg = NULL_RTX; 24911 int n; 24912 24913 if (global_tlsarg) 24914 tlsarg = global_tlsarg; 24915 24916 /* Handle longcall attributes. */ 24917 if ((INTVAL (cookie) & CALL_LONG) != 0 24918 && GET_CODE (func_desc) == SYMBOL_REF) 24919 { 24920 func = rs6000_longcall_ref (func_desc, tlsarg); 24921 /* If the longcall was implemented as an inline PLT call using 24922 PLT unspecs then func will be REG:r11. If not, func will be 24923 a pseudo reg. The inline PLT call sequence supports lazy 24924 linking (and longcalls to functions in dlopen'd libraries). 24925 The other style of longcalls don't. The lazy linking entry 24926 to the dynamic symbol resolver requires r11 be the function 24927 address (as it is for linker generated PLT stubs). Ensure 24928 r11 stays valid to the bctrl by marking r11 used by the call. */ 24929 if (TARGET_PLTSEQ) 24930 abi_reg = func; 24931 } 24932 24933 /* Handle indirect calls. */ 24934 if (GET_CODE (func) != SYMBOL_REF) 24935 { 24936 func = force_reg (Pmode, func); 24937 24938 /* Indirect calls via CTR are strongly preferred over indirect 24939 calls via LR, so move the address there. That can't be left 24940 to reload because we want to mark every instruction in an 24941 inline PLT call sequence with a reloc, enabling the linker to 24942 edit the sequence back to a direct call when that makes sense. */ 24943 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 24944 if (abi_reg) 24945 { 24946 rtvec v = gen_rtvec (3, func, func_desc, tlsarg); 24947 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 24948 emit_insn (gen_rtx_SET (func_addr, mark_func)); 24949 v = gen_rtvec (2, func_addr, func_desc); 24950 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 24951 } 24952 else 24953 emit_move_insn (func_addr, func); 24954 } 24955 else 24956 func_addr = func; 24957 24958 /* Create the call. */ 24959 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 24960 if (value != NULL_RTX) 24961 call[0] = gen_rtx_SET (value, call[0]); 24962 24963 call[1] = gen_rtx_USE (VOIDmode, cookie); 24964 n = 2; 24965 if (TARGET_SECURE_PLT 24966 && flag_pic 24967 && GET_CODE (func_addr) == SYMBOL_REF 24968 && !SYMBOL_REF_LOCAL_P (func_addr)) 24969 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx); 24970 24971 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO); 24972 24973 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call)); 24974 insn = emit_call_insn (insn); 24975 if (abi_reg) 24976 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 24977} 24978 24979/* Expand code to perform a sibling call under the SysV4 ABI. */ 24980 24981void 24982rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie) 24983{ 24984 rtx func = func_desc; 24985 rtx func_addr; 24986 rtx call[3]; 24987 rtx insn; 24988 rtx abi_reg = NULL_RTX; 24989 24990 if (global_tlsarg) 24991 tlsarg = global_tlsarg; 24992 24993 /* Handle longcall attributes. */ 24994 if ((INTVAL (cookie) & CALL_LONG) != 0 24995 && GET_CODE (func_desc) == SYMBOL_REF) 24996 { 24997 func = rs6000_longcall_ref (func_desc, tlsarg); 24998 /* If the longcall was implemented as an inline PLT call using 24999 PLT unspecs then func will be REG:r11. If not, func will be 25000 a pseudo reg. The inline PLT call sequence supports lazy 25001 linking (and longcalls to functions in dlopen'd libraries). 25002 The other style of longcalls don't. The lazy linking entry 25003 to the dynamic symbol resolver requires r11 be the function 25004 address (as it is for linker generated PLT stubs). Ensure 25005 r11 stays valid to the bctr by marking r11 used by the call. */ 25006 if (TARGET_PLTSEQ) 25007 abi_reg = func; 25008 } 25009 25010 /* Handle indirect calls. */ 25011 if (GET_CODE (func) != SYMBOL_REF) 25012 { 25013 func = force_reg (Pmode, func); 25014 25015 /* Indirect sibcalls must go via CTR. That can't be left to 25016 reload because we want to mark every instruction in an inline 25017 PLT call sequence with a reloc, enabling the linker to edit 25018 the sequence back to a direct call when that makes sense. */ 25019 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25020 if (abi_reg) 25021 { 25022 rtvec v = gen_rtvec (3, func, func_desc, tlsarg); 25023 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25024 emit_insn (gen_rtx_SET (func_addr, mark_func)); 25025 v = gen_rtvec (2, func_addr, func_desc); 25026 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ); 25027 } 25028 else 25029 emit_move_insn (func_addr, func); 25030 } 25031 else 25032 func_addr = func; 25033 25034 /* Create the call. */ 25035 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25036 if (value != NULL_RTX) 25037 call[0] = gen_rtx_SET (value, call[0]); 25038 25039 call[1] = gen_rtx_USE (VOIDmode, cookie); 25040 call[2] = simple_return_rtx; 25041 25042 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call)); 25043 insn = emit_call_insn (insn); 25044 if (abi_reg) 25045 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); 25046} 25047 25048#if TARGET_MACHO 25049 25050/* Expand code to perform a call under the Darwin ABI. 25051 Modulo handling of mlongcall, this is much the same as sysv. 25052 if/when the longcall optimisation is removed, we could drop this 25053 code and use the sysv case (taking care to avoid the tls stuff). 25054 25055 We can use this for sibcalls too, if needed. */ 25056 25057void 25058rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg, 25059 rtx cookie, bool sibcall) 25060{ 25061 rtx func = func_desc; 25062 rtx func_addr; 25063 rtx call[3]; 25064 rtx insn; 25065 int cookie_val = INTVAL (cookie); 25066 bool make_island = false; 25067 25068 /* Handle longcall attributes, there are two cases for Darwin: 25069 1) Newer linkers are capable of synthesising any branch islands needed. 25070 2) We need a helper branch island synthesised by the compiler. 25071 The second case has mostly been retired and we don't use it for m64. 25072 In fact, it's is an optimisation, we could just indirect as sysv does.. 25073 ... however, backwards compatibility for now. 25074 If we're going to use this, then we need to keep the CALL_LONG bit set, 25075 so that we can pick up the special insn form later. */ 25076 if ((cookie_val & CALL_LONG) != 0 25077 && GET_CODE (func_desc) == SYMBOL_REF) 25078 { 25079 /* FIXME: the longcall opt should not hang off this flag, it is most 25080 likely incorrect for kernel-mode code-generation. */ 25081 if (darwin_symbol_stubs && TARGET_32BIT) 25082 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */ 25083 else 25084 { 25085 /* The linker is capable of doing this, but the user explicitly 25086 asked for -mlongcall, so we'll do the 'normal' version. */ 25087 func = rs6000_longcall_ref (func_desc, NULL_RTX); 25088 cookie_val &= ~CALL_LONG; /* Handled, zap it. */ 25089 } 25090 } 25091 25092 /* Handle indirect calls. */ 25093 if (GET_CODE (func) != SYMBOL_REF) 25094 { 25095 func = force_reg (Pmode, func); 25096 25097 /* Indirect calls via CTR are strongly preferred over indirect 25098 calls via LR, and are required for indirect sibcalls, so move 25099 the address there. */ 25100 func_addr = gen_rtx_REG (Pmode, CTR_REGNO); 25101 emit_move_insn (func_addr, func); 25102 } 25103 else 25104 func_addr = func; 25105 25106 /* Create the call. */ 25107 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg); 25108 if (value != NULL_RTX) 25109 call[0] = gen_rtx_SET (value, call[0]); 25110 25111 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val)); 25112 25113 if (sibcall) 25114 call[2] = simple_return_rtx; 25115 else 25116 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO); 25117 25118 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call)); 25119 insn = emit_call_insn (insn); 25120 /* Now we have the debug info in the insn, we can set up the branch island 25121 if we're using one. */ 25122 if (make_island) 25123 { 25124 tree funname = get_identifier (XSTR (func_desc, 0)); 25125 25126 if (no_previous_def (funname)) 25127 { 25128 rtx label_rtx = gen_label_rtx (); 25129 char *label_buf, temp_buf[256]; 25130 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L", 25131 CODE_LABEL_NUMBER (label_rtx)); 25132 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; 25133 tree labelname = get_identifier (label_buf); 25134 add_compiler_branch_island (labelname, funname, 25135 insn_line ((const rtx_insn*)insn)); 25136 } 25137 } 25138} 25139#endif 25140 25141void 25142rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED, 25143 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED) 25144{ 25145#if TARGET_MACHO 25146 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false); 25147#else 25148 gcc_unreachable(); 25149#endif 25150} 25151 25152 25153void 25154rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED, 25155 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED) 25156{ 25157#if TARGET_MACHO 25158 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true); 25159#else 25160 gcc_unreachable(); 25161#endif 25162} 25163 25164/* Return whether we should generate PC-relative code for FNDECL. */ 25165bool 25166rs6000_fndecl_pcrel_p (const_tree fndecl) 25167{ 25168 if (DEFAULT_ABI != ABI_ELFv2) 25169 return false; 25170 25171 struct cl_target_option *opts = target_opts_for_fn (fndecl); 25172 25173 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0 25174 && TARGET_CMODEL == CMODEL_MEDIUM); 25175} 25176 25177/* Return whether we should generate PC-relative code for *FN. */ 25178bool 25179rs6000_pcrel_p (struct function *fn) 25180{ 25181 if (DEFAULT_ABI != ABI_ELFv2) 25182 return false; 25183 25184 /* Optimize usual case. */ 25185 if (fn == cfun) 25186 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0 25187 && TARGET_CMODEL == CMODEL_MEDIUM); 25188 25189 return rs6000_fndecl_pcrel_p (fn->decl); 25190} 25191 25192 25193/* Given an address (ADDR), a mode (MODE), and what the format of the 25194 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format 25195 for the address. */ 25196 25197enum insn_form 25198address_to_insn_form (rtx addr, 25199 machine_mode mode, 25200 enum non_prefixed_form non_prefixed_format) 25201{ 25202 /* Single register is easy. */ 25203 if (REG_P (addr) || SUBREG_P (addr)) 25204 return INSN_FORM_BASE_REG; 25205 25206 /* If the non prefixed instruction format doesn't support offset addressing, 25207 make sure only indexed addressing is allowed. 25208 25209 We special case SDmode so that the register allocator does not try to move 25210 SDmode through GPR registers, but instead uses the 32-bit integer load and 25211 store instructions for the floating point registers. */ 25212 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP)) 25213 { 25214 if (GET_CODE (addr) != PLUS) 25215 return INSN_FORM_BAD; 25216 25217 rtx op0 = XEXP (addr, 0); 25218 rtx op1 = XEXP (addr, 1); 25219 if (!REG_P (op0) && !SUBREG_P (op0)) 25220 return INSN_FORM_BAD; 25221 25222 if (!REG_P (op1) && !SUBREG_P (op1)) 25223 return INSN_FORM_BAD; 25224 25225 return INSN_FORM_X; 25226 } 25227 25228 /* Deal with update forms. */ 25229 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC) 25230 return INSN_FORM_UPDATE; 25231 25232 /* Handle PC-relative symbols and labels. Check for both local and 25233 external symbols. Assume labels are always local. TLS symbols 25234 are not PC-relative for rs6000. */ 25235 if (TARGET_PCREL) 25236 { 25237 if (LABEL_REF_P (addr)) 25238 return INSN_FORM_PCREL_LOCAL; 25239 25240 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr)) 25241 { 25242 if (!SYMBOL_REF_LOCAL_P (addr)) 25243 return INSN_FORM_PCREL_EXTERNAL; 25244 else 25245 return INSN_FORM_PCREL_LOCAL; 25246 } 25247 } 25248 25249 if (GET_CODE (addr) == CONST) 25250 addr = XEXP (addr, 0); 25251 25252 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */ 25253 if (GET_CODE (addr) == LO_SUM) 25254 return INSN_FORM_LO_SUM; 25255 25256 /* Everything below must be an offset address of some form. */ 25257 if (GET_CODE (addr) != PLUS) 25258 return INSN_FORM_BAD; 25259 25260 rtx op0 = XEXP (addr, 0); 25261 rtx op1 = XEXP (addr, 1); 25262 25263 /* Check for indexed addresses. */ 25264 if (REG_P (op1) || SUBREG_P (op1)) 25265 { 25266 if (REG_P (op0) || SUBREG_P (op0)) 25267 return INSN_FORM_X; 25268 25269 return INSN_FORM_BAD; 25270 } 25271 25272 if (!CONST_INT_P (op1)) 25273 return INSN_FORM_BAD; 25274 25275 HOST_WIDE_INT offset = INTVAL (op1); 25276 if (!SIGNED_INTEGER_34BIT_P (offset)) 25277 return INSN_FORM_BAD; 25278 25279 /* Check for local and external PC-relative addresses. Labels are always 25280 local. TLS symbols are not PC-relative for rs6000. */ 25281 if (TARGET_PCREL) 25282 { 25283 if (LABEL_REF_P (op0)) 25284 return INSN_FORM_PCREL_LOCAL; 25285 25286 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0)) 25287 { 25288 if (!SYMBOL_REF_LOCAL_P (op0)) 25289 return INSN_FORM_PCREL_EXTERNAL; 25290 else 25291 return INSN_FORM_PCREL_LOCAL; 25292 } 25293 } 25294 25295 /* If it isn't PC-relative, the address must use a base register. */ 25296 if (!REG_P (op0) && !SUBREG_P (op0)) 25297 return INSN_FORM_BAD; 25298 25299 /* Large offsets must be prefixed. */ 25300 if (!SIGNED_INTEGER_16BIT_P (offset)) 25301 { 25302 if (TARGET_PREFIXED) 25303 return INSN_FORM_PREFIXED_NUMERIC; 25304 25305 return INSN_FORM_BAD; 25306 } 25307 25308 /* We have a 16-bit offset, see what default instruction format to use. */ 25309 if (non_prefixed_format == NON_PREFIXED_DEFAULT) 25310 { 25311 unsigned size = GET_MODE_SIZE (mode); 25312 25313 /* On 64-bit systems, assume 64-bit integers need to use DS form 25314 addresses (for LD/STD). VSX vectors need to use DQ form addresses 25315 (for LXV and STXV). TImode is problematical in that its normal usage 25316 is expected to be GPRs where it wants a DS instruction format, but if 25317 it goes into the vector registers, it wants a DQ instruction 25318 format. */ 25319 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT) 25320 non_prefixed_format = NON_PREFIXED_DS; 25321 25322 else if (TARGET_VSX && size >= 16 25323 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))) 25324 non_prefixed_format = NON_PREFIXED_DQ; 25325 25326 else 25327 non_prefixed_format = NON_PREFIXED_D; 25328 } 25329 25330 /* Classify the D/DS/DQ-form addresses. */ 25331 switch (non_prefixed_format) 25332 { 25333 /* Instruction format D, all 16 bits are valid. */ 25334 case NON_PREFIXED_D: 25335 return INSN_FORM_D; 25336 25337 /* Instruction format DS, bottom 2 bits must be 0. */ 25338 case NON_PREFIXED_DS: 25339 if ((offset & 3) == 0) 25340 return INSN_FORM_DS; 25341 25342 else if (TARGET_PREFIXED) 25343 return INSN_FORM_PREFIXED_NUMERIC; 25344 25345 else 25346 return INSN_FORM_BAD; 25347 25348 /* Instruction format DQ, bottom 4 bits must be 0. */ 25349 case NON_PREFIXED_DQ: 25350 if ((offset & 15) == 0) 25351 return INSN_FORM_DQ; 25352 25353 else if (TARGET_PREFIXED) 25354 return INSN_FORM_PREFIXED_NUMERIC; 25355 25356 else 25357 return INSN_FORM_BAD; 25358 25359 default: 25360 break; 25361 } 25362 25363 return INSN_FORM_BAD; 25364} 25365 25366/* Helper function to see if we're potentially looking at lfs/stfs. 25367 - PARALLEL containing a SET and a CLOBBER 25368 - stfs: 25369 - SET is from UNSPEC_SI_FROM_SF to MEM:SI 25370 - CLOBBER is a V4SF 25371 - lfs: 25372 - SET is from UNSPEC_SF_FROM_SI to REG:SF 25373 - CLOBBER is a DI 25374 */ 25375 25376static bool 25377is_lfs_stfs_insn (rtx_insn *insn) 25378{ 25379 rtx pattern = PATTERN (insn); 25380 if (GET_CODE (pattern) != PARALLEL) 25381 return false; 25382 25383 /* This should be a parallel with exactly one set and one clobber. */ 25384 if (XVECLEN (pattern, 0) != 2) 25385 return false; 25386 25387 rtx set = XVECEXP (pattern, 0, 0); 25388 if (GET_CODE (set) != SET) 25389 return false; 25390 25391 rtx clobber = XVECEXP (pattern, 0, 1); 25392 if (GET_CODE (clobber) != CLOBBER) 25393 return false; 25394 25395 /* All we care is that the destination of the SET is a mem:SI, 25396 the source should be an UNSPEC_SI_FROM_SF, and the clobber 25397 should be a scratch:V4SF. */ 25398 25399 rtx dest = SET_DEST (set); 25400 rtx src = SET_SRC (set); 25401 rtx scratch = SET_DEST (clobber); 25402 25403 if (GET_CODE (src) != UNSPEC) 25404 return false; 25405 25406 /* stfs case. */ 25407 if (XINT (src, 1) == UNSPEC_SI_FROM_SF 25408 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode 25409 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode) 25410 return true; 25411 25412 /* lfs case. */ 25413 if (XINT (src, 1) == UNSPEC_SF_FROM_SI 25414 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode 25415 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode) 25416 return true; 25417 25418 return false; 25419} 25420 25421/* Helper function to take a REG and a MODE and turn it into the non-prefixed 25422 instruction format (D/DS/DQ) used for offset memory. */ 25423 25424static enum non_prefixed_form 25425reg_to_non_prefixed (rtx reg, machine_mode mode) 25426{ 25427 /* If it isn't a register, use the defaults. */ 25428 if (!REG_P (reg) && !SUBREG_P (reg)) 25429 return NON_PREFIXED_DEFAULT; 25430 25431 unsigned int r = reg_or_subregno (reg); 25432 25433 /* If we have a pseudo, use the default instruction format. */ 25434 if (!HARD_REGISTER_NUM_P (r)) 25435 return NON_PREFIXED_DEFAULT; 25436 25437 unsigned size = GET_MODE_SIZE (mode); 25438 25439 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE 25440 128-bit floating point, and 128-bit integers. Before power9, only indexed 25441 addressing was available for vectors. */ 25442 if (FP_REGNO_P (r)) 25443 { 25444 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode)) 25445 return NON_PREFIXED_D; 25446 25447 else if (size < 8) 25448 return NON_PREFIXED_X; 25449 25450 else if (TARGET_VSX && size >= 16 25451 && (VECTOR_MODE_P (mode) 25452 || VECTOR_ALIGNMENT_P (mode) 25453 || mode == TImode || mode == CTImode)) 25454 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X; 25455 25456 else 25457 return NON_PREFIXED_DEFAULT; 25458 } 25459 25460 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE 25461 128-bit floating point, and 128-bit integers. Before power9, only indexed 25462 addressing was available. */ 25463 else if (ALTIVEC_REGNO_P (r)) 25464 { 25465 if (!TARGET_P9_VECTOR) 25466 return NON_PREFIXED_X; 25467 25468 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode)) 25469 return NON_PREFIXED_DS; 25470 25471 else if (size < 8) 25472 return NON_PREFIXED_X; 25473 25474 else if (TARGET_VSX && size >= 16 25475 && (VECTOR_MODE_P (mode) 25476 || VECTOR_ALIGNMENT_P (mode) 25477 || mode == TImode || mode == CTImode)) 25478 return NON_PREFIXED_DQ; 25479 25480 else 25481 return NON_PREFIXED_DEFAULT; 25482 } 25483 25484 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode 25485 otherwise. Assume that any other register, such as LR, CRs, etc. will go 25486 through the GPR registers for memory operations. */ 25487 else if (TARGET_POWERPC64 && size >= 8) 25488 return NON_PREFIXED_DS; 25489 25490 return NON_PREFIXED_D; 25491} 25492 25493 25494/* Whether a load instruction is a prefixed instruction. This is called from 25495 the prefixed attribute processing. */ 25496 25497bool 25498prefixed_load_p (rtx_insn *insn) 25499{ 25500 /* Validate the insn to make sure it is a normal load insn. */ 25501 extract_insn_cached (insn); 25502 if (recog_data.n_operands < 2) 25503 return false; 25504 25505 rtx reg = recog_data.operand[0]; 25506 rtx mem = recog_data.operand[1]; 25507 25508 if (!REG_P (reg) && !SUBREG_P (reg)) 25509 return false; 25510 25511 if (!MEM_P (mem)) 25512 return false; 25513 25514 /* Prefixed load instructions do not support update or indexed forms. */ 25515 if (get_attr_indexed (insn) == INDEXED_YES 25516 || get_attr_update (insn) == UPDATE_YES) 25517 return false; 25518 25519 /* LWA uses the DS format instead of the D format that LWZ uses. */ 25520 enum non_prefixed_form non_prefixed; 25521 machine_mode reg_mode = GET_MODE (reg); 25522 machine_mode mem_mode = GET_MODE (mem); 25523 25524 if (mem_mode == SImode && reg_mode == DImode 25525 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) 25526 non_prefixed = NON_PREFIXED_DS; 25527 25528 else 25529 non_prefixed = reg_to_non_prefixed (reg, mem_mode); 25530 25531 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) 25532 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT); 25533 else 25534 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed); 25535} 25536 25537/* Whether a store instruction is a prefixed instruction. This is called from 25538 the prefixed attribute processing. */ 25539 25540bool 25541prefixed_store_p (rtx_insn *insn) 25542{ 25543 /* Validate the insn to make sure it is a normal store insn. */ 25544 extract_insn_cached (insn); 25545 if (recog_data.n_operands < 2) 25546 return false; 25547 25548 rtx mem = recog_data.operand[0]; 25549 rtx reg = recog_data.operand[1]; 25550 25551 if (!REG_P (reg) && !SUBREG_P (reg)) 25552 return false; 25553 25554 if (!MEM_P (mem)) 25555 return false; 25556 25557 /* Prefixed store instructions do not support update or indexed forms. */ 25558 if (get_attr_indexed (insn) == INDEXED_YES 25559 || get_attr_update (insn) == UPDATE_YES) 25560 return false; 25561 25562 machine_mode mem_mode = GET_MODE (mem); 25563 rtx addr = XEXP (mem, 0); 25564 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode); 25565 25566 /* Need to make sure we aren't looking at a stfs which doesn't look 25567 like the other things reg_to_non_prefixed/address_is_prefixed 25568 looks for. */ 25569 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn)) 25570 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT); 25571 else 25572 return address_is_prefixed (addr, mem_mode, non_prefixed); 25573} 25574 25575/* Whether a load immediate or add instruction is a prefixed instruction. This 25576 is called from the prefixed attribute processing. */ 25577 25578bool 25579prefixed_paddi_p (rtx_insn *insn) 25580{ 25581 rtx set = single_set (insn); 25582 if (!set) 25583 return false; 25584 25585 rtx dest = SET_DEST (set); 25586 rtx src = SET_SRC (set); 25587 25588 if (!REG_P (dest) && !SUBREG_P (dest)) 25589 return false; 25590 25591 /* Is this a load immediate that can't be done with a simple ADDI or 25592 ADDIS? */ 25593 if (CONST_INT_P (src)) 25594 return (satisfies_constraint_eI (src) 25595 && !satisfies_constraint_I (src) 25596 && !satisfies_constraint_L (src)); 25597 25598 /* Is this a PADDI instruction that can't be done with a simple ADDI or 25599 ADDIS? */ 25600 if (GET_CODE (src) == PLUS) 25601 { 25602 rtx op1 = XEXP (src, 1); 25603 25604 return (CONST_INT_P (op1) 25605 && satisfies_constraint_eI (op1) 25606 && !satisfies_constraint_I (op1) 25607 && !satisfies_constraint_L (op1)); 25608 } 25609 25610 /* If not, is it a load of a PC-relative address? */ 25611 if (!TARGET_PCREL || GET_MODE (dest) != Pmode) 25612 return false; 25613 25614 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST) 25615 return false; 25616 25617 enum insn_form iform = address_to_insn_form (src, Pmode, 25618 NON_PREFIXED_DEFAULT); 25619 25620 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL); 25621} 25622 25623/* Whether the next instruction needs a 'p' prefix issued before the 25624 instruction is printed out. */ 25625static bool next_insn_prefixed_p; 25626 25627/* Define FINAL_PRESCAN_INSN if some processing needs to be done before 25628 outputting the assembler code. On the PowerPC, we remember if the current 25629 insn is a prefixed insn where we need to emit a 'p' before the insn. 25630 25631 In addition, if the insn is part of a PC-relative reference to an external 25632 label optimization, this is recorded also. */ 25633void 25634rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int) 25635{ 25636 next_insn_prefixed_p = (get_attr_prefixed (insn) != PREFIXED_NO); 25637 return; 25638} 25639 25640/* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode. 25641 We use it to emit a 'p' for prefixed insns that is set in 25642 FINAL_PRESCAN_INSN. */ 25643void 25644rs6000_asm_output_opcode (FILE *stream) 25645{ 25646 if (next_insn_prefixed_p) 25647 fprintf (stream, "p"); 25648 25649 return; 25650} 25651 25652/* Adjust the length of an INSN. LENGTH is the currently-computed length and 25653 should be adjusted to reflect any required changes. This macro is used when 25654 there is some systematic length adjustment required that would be difficult 25655 to express in the length attribute. 25656 25657 In the PowerPC, we use this to adjust the length of an instruction if one or 25658 more prefixed instructions are generated, using the attribute 25659 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the 25660 hardware requires that a prefied instruciton does not cross a 64-byte 25661 boundary. This means the compiler has to assume the length of the first 25662 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is 25663 already set for the non-prefixed instruction, we just need to udpate for the 25664 difference. */ 25665 25666int 25667rs6000_adjust_insn_length (rtx_insn *insn, int length) 25668{ 25669 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn)) 25670 { 25671 rtx pattern = PATTERN (insn); 25672 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER 25673 && get_attr_prefixed (insn) == PREFIXED_YES) 25674 { 25675 int num_prefixed = get_attr_max_prefixed_insns (insn); 25676 length += 4 * (num_prefixed + 1); 25677 } 25678 } 25679 25680 return length; 25681} 25682 25683 25684#ifdef HAVE_GAS_HIDDEN 25685# define USE_HIDDEN_LINKONCE 1 25686#else 25687# define USE_HIDDEN_LINKONCE 0 25688#endif 25689 25690/* Fills in the label name that should be used for a 476 link stack thunk. */ 25691 25692void 25693get_ppc476_thunk_name (char name[32]) 25694{ 25695 gcc_assert (TARGET_LINK_STACK); 25696 25697 if (USE_HIDDEN_LINKONCE) 25698 sprintf (name, "__ppc476.get_thunk"); 25699 else 25700 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); 25701} 25702 25703/* This function emits the simple thunk routine that is used to preserve 25704 the link stack on the 476 cpu. */ 25705 25706static void rs6000_code_end (void) ATTRIBUTE_UNUSED; 25707static void 25708rs6000_code_end (void) 25709{ 25710 char name[32]; 25711 tree decl; 25712 25713 if (!TARGET_LINK_STACK) 25714 return; 25715 25716 get_ppc476_thunk_name (name); 25717 25718 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), 25719 build_function_type_list (void_type_node, NULL_TREE)); 25720 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 25721 NULL_TREE, void_type_node); 25722 TREE_PUBLIC (decl) = 1; 25723 TREE_STATIC (decl) = 1; 25724 25725#if RS6000_WEAK 25726 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) 25727 { 25728 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); 25729 targetm.asm_out.unique_section (decl, 0); 25730 switch_to_section (get_named_section (decl, NULL, 0)); 25731 DECL_WEAK (decl) = 1; 25732 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); 25733 targetm.asm_out.globalize_label (asm_out_file, name); 25734 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); 25735 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 25736 } 25737 else 25738#endif 25739 { 25740 switch_to_section (text_section); 25741 ASM_OUTPUT_LABEL (asm_out_file, name); 25742 } 25743 25744 DECL_INITIAL (decl) = make_node (BLOCK); 25745 current_function_decl = decl; 25746 allocate_struct_function (decl, false); 25747 init_function_start (decl); 25748 first_function_block_is_cold = false; 25749 /* Make sure unwind info is emitted for the thunk if needed. */ 25750 final_start_function (emit_barrier (), asm_out_file, 1); 25751 25752 fputs ("\tblr\n", asm_out_file); 25753 25754 final_end_function (); 25755 init_insn_lengths (); 25756 free_after_compilation (cfun); 25757 set_cfun (NULL); 25758 current_function_decl = NULL; 25759} 25760 25761/* Add r30 to hard reg set if the prologue sets it up and it is not 25762 pic_offset_table_rtx. */ 25763 25764static void 25765rs6000_set_up_by_prologue (struct hard_reg_set_container *set) 25766{ 25767 if (!TARGET_SINGLE_PIC_BASE 25768 && TARGET_TOC 25769 && TARGET_MINIMAL_TOC 25770 && !constant_pool_empty_p ()) 25771 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); 25772 if (cfun->machine->split_stack_argp_used) 25773 add_to_hard_reg_set (&set->set, Pmode, 12); 25774 25775 /* Make sure the hard reg set doesn't include r2, which was possibly added 25776 via PIC_OFFSET_TABLE_REGNUM. */ 25777 if (TARGET_TOC) 25778 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM); 25779} 25780 25781 25782/* Helper function for rs6000_split_logical to emit a logical instruction after 25783 spliting the operation to single GPR registers. 25784 25785 DEST is the destination register. 25786 OP1 and OP2 are the input source registers. 25787 CODE is the base operation (AND, IOR, XOR, NOT). 25788 MODE is the machine mode. 25789 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 25790 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 25791 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ 25792 25793static void 25794rs6000_split_logical_inner (rtx dest, 25795 rtx op1, 25796 rtx op2, 25797 enum rtx_code code, 25798 machine_mode mode, 25799 bool complement_final_p, 25800 bool complement_op1_p, 25801 bool complement_op2_p) 25802{ 25803 rtx bool_rtx; 25804 25805 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ 25806 if (op2 && CONST_INT_P (op2) 25807 && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) 25808 && !complement_final_p && !complement_op1_p && !complement_op2_p) 25809 { 25810 HOST_WIDE_INT mask = GET_MODE_MASK (mode); 25811 HOST_WIDE_INT value = INTVAL (op2) & mask; 25812 25813 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ 25814 if (code == AND) 25815 { 25816 if (value == 0) 25817 { 25818 emit_insn (gen_rtx_SET (dest, const0_rtx)); 25819 return; 25820 } 25821 25822 else if (value == mask) 25823 { 25824 if (!rtx_equal_p (dest, op1)) 25825 emit_insn (gen_rtx_SET (dest, op1)); 25826 return; 25827 } 25828 } 25829 25830 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations 25831 into separate ORI/ORIS or XORI/XORIS instrucitons. */ 25832 else if (code == IOR || code == XOR) 25833 { 25834 if (value == 0) 25835 { 25836 if (!rtx_equal_p (dest, op1)) 25837 emit_insn (gen_rtx_SET (dest, op1)); 25838 return; 25839 } 25840 } 25841 } 25842 25843 if (code == AND && mode == SImode 25844 && !complement_final_p && !complement_op1_p && !complement_op2_p) 25845 { 25846 emit_insn (gen_andsi3 (dest, op1, op2)); 25847 return; 25848 } 25849 25850 if (complement_op1_p) 25851 op1 = gen_rtx_NOT (mode, op1); 25852 25853 if (complement_op2_p) 25854 op2 = gen_rtx_NOT (mode, op2); 25855 25856 /* For canonical RTL, if only one arm is inverted it is the first. */ 25857 if (!complement_op1_p && complement_op2_p) 25858 std::swap (op1, op2); 25859 25860 bool_rtx = ((code == NOT) 25861 ? gen_rtx_NOT (mode, op1) 25862 : gen_rtx_fmt_ee (code, mode, op1, op2)); 25863 25864 if (complement_final_p) 25865 bool_rtx = gen_rtx_NOT (mode, bool_rtx); 25866 25867 emit_insn (gen_rtx_SET (dest, bool_rtx)); 25868} 25869 25870/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These 25871 operations are split immediately during RTL generation to allow for more 25872 optimizations of the AND/IOR/XOR. 25873 25874 OPERANDS is an array containing the destination and two input operands. 25875 CODE is the base operation (AND, IOR, XOR, NOT). 25876 MODE is the machine mode. 25877 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 25878 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 25879 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. 25880 CLOBBER_REG is either NULL or a scratch register of type CC to allow 25881 formation of the AND instructions. */ 25882 25883static void 25884rs6000_split_logical_di (rtx operands[3], 25885 enum rtx_code code, 25886 bool complement_final_p, 25887 bool complement_op1_p, 25888 bool complement_op2_p) 25889{ 25890 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); 25891 const HOST_WIDE_INT upper_32bits = ~ lower_32bits; 25892 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); 25893 enum hi_lo { hi = 0, lo = 1 }; 25894 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; 25895 size_t i; 25896 25897 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); 25898 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); 25899 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); 25900 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); 25901 25902 if (code == NOT) 25903 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; 25904 else 25905 { 25906 if (!CONST_INT_P (operands[2])) 25907 { 25908 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); 25909 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); 25910 } 25911 else 25912 { 25913 HOST_WIDE_INT value = INTVAL (operands[2]); 25914 HOST_WIDE_INT value_hi_lo[2]; 25915 25916 gcc_assert (!complement_final_p); 25917 gcc_assert (!complement_op1_p); 25918 gcc_assert (!complement_op2_p); 25919 25920 value_hi_lo[hi] = value >> 32; 25921 value_hi_lo[lo] = value & lower_32bits; 25922 25923 for (i = 0; i < 2; i++) 25924 { 25925 HOST_WIDE_INT sub_value = value_hi_lo[i]; 25926 25927 if (sub_value & sign_bit) 25928 sub_value |= upper_32bits; 25929 25930 op2_hi_lo[i] = GEN_INT (sub_value); 25931 25932 /* If this is an AND instruction, check to see if we need to load 25933 the value in a register. */ 25934 if (code == AND && sub_value != -1 && sub_value != 0 25935 && !and_operand (op2_hi_lo[i], SImode)) 25936 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); 25937 } 25938 } 25939 } 25940 25941 for (i = 0; i < 2; i++) 25942 { 25943 /* Split large IOR/XOR operations. */ 25944 if ((code == IOR || code == XOR) 25945 && CONST_INT_P (op2_hi_lo[i]) 25946 && !complement_final_p 25947 && !complement_op1_p 25948 && !complement_op2_p 25949 && !logical_const_operand (op2_hi_lo[i], SImode)) 25950 { 25951 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); 25952 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); 25953 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); 25954 rtx tmp = gen_reg_rtx (SImode); 25955 25956 /* Make sure the constant is sign extended. */ 25957 if ((hi_16bits & sign_bit) != 0) 25958 hi_16bits |= upper_32bits; 25959 25960 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), 25961 code, SImode, false, false, false); 25962 25963 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), 25964 code, SImode, false, false, false); 25965 } 25966 else 25967 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], 25968 code, SImode, complement_final_p, 25969 complement_op1_p, complement_op2_p); 25970 } 25971 25972 return; 25973} 25974 25975/* Split the insns that make up boolean operations operating on multiple GPR 25976 registers. The boolean MD patterns ensure that the inputs either are 25977 exactly the same as the output registers, or there is no overlap. 25978 25979 OPERANDS is an array containing the destination and two input operands. 25980 CODE is the base operation (AND, IOR, XOR, NOT). 25981 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. 25982 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. 25983 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ 25984 25985void 25986rs6000_split_logical (rtx operands[3], 25987 enum rtx_code code, 25988 bool complement_final_p, 25989 bool complement_op1_p, 25990 bool complement_op2_p) 25991{ 25992 machine_mode mode = GET_MODE (operands[0]); 25993 machine_mode sub_mode; 25994 rtx op0, op1, op2; 25995 int sub_size, regno0, regno1, nregs, i; 25996 25997 /* If this is DImode, use the specialized version that can run before 25998 register allocation. */ 25999 if (mode == DImode && !TARGET_POWERPC64) 26000 { 26001 rs6000_split_logical_di (operands, code, complement_final_p, 26002 complement_op1_p, complement_op2_p); 26003 return; 26004 } 26005 26006 op0 = operands[0]; 26007 op1 = operands[1]; 26008 op2 = (code == NOT) ? NULL_RTX : operands[2]; 26009 sub_mode = (TARGET_POWERPC64) ? DImode : SImode; 26010 sub_size = GET_MODE_SIZE (sub_mode); 26011 regno0 = REGNO (op0); 26012 regno1 = REGNO (op1); 26013 26014 gcc_assert (reload_completed); 26015 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26016 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26017 26018 nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; 26019 gcc_assert (nregs > 1); 26020 26021 if (op2 && REG_P (op2)) 26022 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); 26023 26024 for (i = 0; i < nregs; i++) 26025 { 26026 int offset = i * sub_size; 26027 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); 26028 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); 26029 rtx sub_op2 = ((code == NOT) 26030 ? NULL_RTX 26031 : simplify_subreg (sub_mode, op2, mode, offset)); 26032 26033 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, 26034 complement_final_p, complement_op1_p, 26035 complement_op2_p); 26036 } 26037 26038 return; 26039} 26040 26041 26042/* Return true if the peephole2 can combine a load involving a combination of 26043 an addis instruction and a load with an offset that can be fused together on 26044 a power8. */ 26045 26046bool 26047fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ 26048 rtx addis_value, /* addis value. */ 26049 rtx target, /* target register that is loaded. */ 26050 rtx mem) /* bottom part of the memory addr. */ 26051{ 26052 rtx addr; 26053 rtx base_reg; 26054 26055 /* Validate arguments. */ 26056 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) 26057 return false; 26058 26059 if (!base_reg_operand (target, GET_MODE (target))) 26060 return false; 26061 26062 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) 26063 return false; 26064 26065 /* Allow sign/zero extension. */ 26066 if (GET_CODE (mem) == ZERO_EXTEND 26067 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) 26068 mem = XEXP (mem, 0); 26069 26070 if (!MEM_P (mem)) 26071 return false; 26072 26073 if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) 26074 return false; 26075 26076 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ 26077 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) 26078 return false; 26079 26080 /* Validate that the register used to load the high value is either the 26081 register being loaded, or we can safely replace its use. 26082 26083 This function is only called from the peephole2 pass and we assume that 26084 there are 2 instructions in the peephole (addis and load), so we want to 26085 check if the target register was not used in the memory address and the 26086 register to hold the addis result is dead after the peephole. */ 26087 if (REGNO (addis_reg) != REGNO (target)) 26088 { 26089 if (reg_mentioned_p (target, mem)) 26090 return false; 26091 26092 if (!peep2_reg_dead_p (2, addis_reg)) 26093 return false; 26094 26095 /* If the target register being loaded is the stack pointer, we must 26096 avoid loading any other value into it, even temporarily. */ 26097 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM) 26098 return false; 26099 } 26100 26101 base_reg = XEXP (addr, 0); 26102 return REGNO (addis_reg) == REGNO (base_reg); 26103} 26104 26105/* During the peephole2 pass, adjust and expand the insns for a load fusion 26106 sequence. We adjust the addis register to use the target register. If the 26107 load sign extends, we adjust the code to do the zero extending load, and an 26108 explicit sign extension later since the fusion only covers zero extending 26109 loads. 26110 26111 The operands are: 26112 operands[0] register set with addis (to be replaced with target) 26113 operands[1] value set via addis 26114 operands[2] target register being loaded 26115 operands[3] D-form memory reference using operands[0]. */ 26116 26117void 26118expand_fusion_gpr_load (rtx *operands) 26119{ 26120 rtx addis_value = operands[1]; 26121 rtx target = operands[2]; 26122 rtx orig_mem = operands[3]; 26123 rtx new_addr, new_mem, orig_addr, offset; 26124 enum rtx_code plus_or_lo_sum; 26125 machine_mode target_mode = GET_MODE (target); 26126 machine_mode extend_mode = target_mode; 26127 machine_mode ptr_mode = Pmode; 26128 enum rtx_code extend = UNKNOWN; 26129 26130 if (GET_CODE (orig_mem) == ZERO_EXTEND 26131 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) 26132 { 26133 extend = GET_CODE (orig_mem); 26134 orig_mem = XEXP (orig_mem, 0); 26135 target_mode = GET_MODE (orig_mem); 26136 } 26137 26138 gcc_assert (MEM_P (orig_mem)); 26139 26140 orig_addr = XEXP (orig_mem, 0); 26141 plus_or_lo_sum = GET_CODE (orig_addr); 26142 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); 26143 26144 offset = XEXP (orig_addr, 1); 26145 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); 26146 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); 26147 26148 if (extend != UNKNOWN) 26149 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); 26150 26151 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), 26152 UNSPEC_FUSION_GPR); 26153 emit_insn (gen_rtx_SET (target, new_mem)); 26154 26155 if (extend == SIGN_EXTEND) 26156 { 26157 int sub_off = ((BYTES_BIG_ENDIAN) 26158 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) 26159 : 0); 26160 rtx sign_reg 26161 = simplify_subreg (target_mode, target, extend_mode, sub_off); 26162 26163 emit_insn (gen_rtx_SET (target, 26164 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); 26165 } 26166 26167 return; 26168} 26169 26170/* Emit the addis instruction that will be part of a fused instruction 26171 sequence. */ 26172 26173void 26174emit_fusion_addis (rtx target, rtx addis_value) 26175{ 26176 rtx fuse_ops[10]; 26177 const char *addis_str = NULL; 26178 26179 /* Emit the addis instruction. */ 26180 fuse_ops[0] = target; 26181 if (satisfies_constraint_L (addis_value)) 26182 { 26183 fuse_ops[1] = addis_value; 26184 addis_str = "lis %0,%v1"; 26185 } 26186 26187 else if (GET_CODE (addis_value) == PLUS) 26188 { 26189 rtx op0 = XEXP (addis_value, 0); 26190 rtx op1 = XEXP (addis_value, 1); 26191 26192 if (REG_P (op0) && CONST_INT_P (op1) 26193 && satisfies_constraint_L (op1)) 26194 { 26195 fuse_ops[1] = op0; 26196 fuse_ops[2] = op1; 26197 addis_str = "addis %0,%1,%v2"; 26198 } 26199 } 26200 26201 else if (GET_CODE (addis_value) == HIGH) 26202 { 26203 rtx value = XEXP (addis_value, 0); 26204 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) 26205 { 26206 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ 26207 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ 26208 if (TARGET_ELF) 26209 addis_str = "addis %0,%2,%1@toc@ha"; 26210 26211 else if (TARGET_XCOFF) 26212 addis_str = "addis %0,%1@u(%2)"; 26213 26214 else 26215 gcc_unreachable (); 26216 } 26217 26218 else if (GET_CODE (value) == PLUS) 26219 { 26220 rtx op0 = XEXP (value, 0); 26221 rtx op1 = XEXP (value, 1); 26222 26223 if (GET_CODE (op0) == UNSPEC 26224 && XINT (op0, 1) == UNSPEC_TOCREL 26225 && CONST_INT_P (op1)) 26226 { 26227 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ 26228 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ 26229 fuse_ops[3] = op1; 26230 if (TARGET_ELF) 26231 addis_str = "addis %0,%2,%1+%3@toc@ha"; 26232 26233 else if (TARGET_XCOFF) 26234 addis_str = "addis %0,%1+%3@u(%2)"; 26235 26236 else 26237 gcc_unreachable (); 26238 } 26239 } 26240 26241 else if (satisfies_constraint_L (value)) 26242 { 26243 fuse_ops[1] = value; 26244 addis_str = "lis %0,%v1"; 26245 } 26246 26247 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) 26248 { 26249 fuse_ops[1] = value; 26250 addis_str = "lis %0,%1@ha"; 26251 } 26252 } 26253 26254 if (!addis_str) 26255 fatal_insn ("Could not generate addis value for fusion", addis_value); 26256 26257 output_asm_insn (addis_str, fuse_ops); 26258} 26259 26260/* Emit a D-form load or store instruction that is the second instruction 26261 of a fusion sequence. */ 26262 26263static void 26264emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str) 26265{ 26266 rtx fuse_ops[10]; 26267 char insn_template[80]; 26268 26269 fuse_ops[0] = load_reg; 26270 fuse_ops[1] = addis_reg; 26271 26272 if (CONST_INT_P (offset) && satisfies_constraint_I (offset)) 26273 { 26274 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str); 26275 fuse_ops[2] = offset; 26276 output_asm_insn (insn_template, fuse_ops); 26277 } 26278 26279 else if (GET_CODE (offset) == UNSPEC 26280 && XINT (offset, 1) == UNSPEC_TOCREL) 26281 { 26282 if (TARGET_ELF) 26283 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str); 26284 26285 else if (TARGET_XCOFF) 26286 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); 26287 26288 else 26289 gcc_unreachable (); 26290 26291 fuse_ops[2] = XVECEXP (offset, 0, 0); 26292 output_asm_insn (insn_template, fuse_ops); 26293 } 26294 26295 else if (GET_CODE (offset) == PLUS 26296 && GET_CODE (XEXP (offset, 0)) == UNSPEC 26297 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL 26298 && CONST_INT_P (XEXP (offset, 1))) 26299 { 26300 rtx tocrel_unspec = XEXP (offset, 0); 26301 if (TARGET_ELF) 26302 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str); 26303 26304 else if (TARGET_XCOFF) 26305 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str); 26306 26307 else 26308 gcc_unreachable (); 26309 26310 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0); 26311 fuse_ops[3] = XEXP (offset, 1); 26312 output_asm_insn (insn_template, fuse_ops); 26313 } 26314 26315 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset)) 26316 { 26317 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); 26318 26319 fuse_ops[2] = offset; 26320 output_asm_insn (insn_template, fuse_ops); 26321 } 26322 26323 else 26324 fatal_insn ("Unable to generate load/store offset for fusion", offset); 26325 26326 return; 26327} 26328 26329/* Given an address, convert it into the addis and load offset parts. Addresses 26330 created during the peephole2 process look like: 26331 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL)) 26332 (unspec [(...)] UNSPEC_TOCREL)) */ 26333 26334static void 26335fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo) 26336{ 26337 rtx hi, lo; 26338 26339 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) 26340 { 26341 hi = XEXP (addr, 0); 26342 lo = XEXP (addr, 1); 26343 } 26344 else 26345 gcc_unreachable (); 26346 26347 *p_hi = hi; 26348 *p_lo = lo; 26349} 26350 26351/* Return a string to fuse an addis instruction with a gpr load to the same 26352 register that we loaded up the addis instruction. The address that is used 26353 is the logical address that was formed during peephole2: 26354 (lo_sum (high) (low-part)) 26355 26356 The code is complicated, so we call output_asm_insn directly, and just 26357 return "". */ 26358 26359const char * 26360emit_fusion_gpr_load (rtx target, rtx mem) 26361{ 26362 rtx addis_value; 26363 rtx addr; 26364 rtx load_offset; 26365 const char *load_str = NULL; 26366 machine_mode mode; 26367 26368 if (GET_CODE (mem) == ZERO_EXTEND) 26369 mem = XEXP (mem, 0); 26370 26371 gcc_assert (REG_P (target) && MEM_P (mem)); 26372 26373 addr = XEXP (mem, 0); 26374 fusion_split_address (addr, &addis_value, &load_offset); 26375 26376 /* Now emit the load instruction to the same register. */ 26377 mode = GET_MODE (mem); 26378 switch (mode) 26379 { 26380 case E_QImode: 26381 load_str = "lbz"; 26382 break; 26383 26384 case E_HImode: 26385 load_str = "lhz"; 26386 break; 26387 26388 case E_SImode: 26389 case E_SFmode: 26390 load_str = "lwz"; 26391 break; 26392 26393 case E_DImode: 26394 case E_DFmode: 26395 gcc_assert (TARGET_POWERPC64); 26396 load_str = "ld"; 26397 break; 26398 26399 default: 26400 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem)); 26401 } 26402 26403 /* Emit the addis instruction. */ 26404 emit_fusion_addis (target, addis_value); 26405 26406 /* Emit the D-form load instruction. */ 26407 emit_fusion_load (target, target, load_offset, load_str); 26408 26409 return ""; 26410} 26411 26412 26413#ifdef RS6000_GLIBC_ATOMIC_FENV 26414/* Function declarations for rs6000_atomic_assign_expand_fenv. */ 26415static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl; 26416#endif 26417 26418/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 26419 26420static void 26421rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 26422{ 26423 if (!TARGET_HARD_FLOAT) 26424 { 26425#ifdef RS6000_GLIBC_ATOMIC_FENV 26426 if (atomic_hold_decl == NULL_TREE) 26427 { 26428 atomic_hold_decl 26429 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 26430 get_identifier ("__atomic_feholdexcept"), 26431 build_function_type_list (void_type_node, 26432 double_ptr_type_node, 26433 NULL_TREE)); 26434 TREE_PUBLIC (atomic_hold_decl) = 1; 26435 DECL_EXTERNAL (atomic_hold_decl) = 1; 26436 } 26437 26438 if (atomic_clear_decl == NULL_TREE) 26439 { 26440 atomic_clear_decl 26441 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 26442 get_identifier ("__atomic_feclearexcept"), 26443 build_function_type_list (void_type_node, 26444 NULL_TREE)); 26445 TREE_PUBLIC (atomic_clear_decl) = 1; 26446 DECL_EXTERNAL (atomic_clear_decl) = 1; 26447 } 26448 26449 tree const_double = build_qualified_type (double_type_node, 26450 TYPE_QUAL_CONST); 26451 tree const_double_ptr = build_pointer_type (const_double); 26452 if (atomic_update_decl == NULL_TREE) 26453 { 26454 atomic_update_decl 26455 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 26456 get_identifier ("__atomic_feupdateenv"), 26457 build_function_type_list (void_type_node, 26458 const_double_ptr, 26459 NULL_TREE)); 26460 TREE_PUBLIC (atomic_update_decl) = 1; 26461 DECL_EXTERNAL (atomic_update_decl) = 1; 26462 } 26463 26464 tree fenv_var = create_tmp_var_raw (double_type_node); 26465 TREE_ADDRESSABLE (fenv_var) = 1; 26466 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, 26467 build4 (TARGET_EXPR, double_type_node, fenv_var, 26468 void_node, NULL_TREE, NULL_TREE)); 26469 26470 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr); 26471 *clear = build_call_expr (atomic_clear_decl, 0); 26472 *update = build_call_expr (atomic_update_decl, 1, 26473 fold_convert (const_double_ptr, fenv_addr)); 26474#endif 26475 return; 26476 } 26477 26478 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; 26479 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; 26480 tree call_mffs = build_call_expr (mffs, 0); 26481 26482 /* Generates the equivalent of feholdexcept (&fenv_var) 26483 26484 *fenv_var = __builtin_mffs (); 26485 double fenv_hold; 26486 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; 26487 __builtin_mtfsf (0xff, fenv_hold); */ 26488 26489 /* Mask to clear everything except for the rounding modes and non-IEEE 26490 arithmetic flag. */ 26491 const unsigned HOST_WIDE_INT hold_exception_mask 26492 = HOST_WIDE_INT_C (0xffffffff00000007); 26493 26494 tree fenv_var = create_tmp_var_raw (double_type_node); 26495 26496 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs, 26497 NULL_TREE, NULL_TREE); 26498 26499 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); 26500 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, 26501 build_int_cst (uint64_type_node, 26502 hold_exception_mask)); 26503 26504 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 26505 fenv_llu_and); 26506 26507 tree hold_mtfsf = build_call_expr (mtfsf, 2, 26508 build_int_cst (unsigned_type_node, 0xff), 26509 fenv_hold_mtfsf); 26510 26511 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); 26512 26513 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): 26514 26515 double fenv_clear = __builtin_mffs (); 26516 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; 26517 __builtin_mtfsf (0xff, fenv_clear); */ 26518 26519 /* Mask to clear everything except for the rounding modes and non-IEEE 26520 arithmetic flag. */ 26521 const unsigned HOST_WIDE_INT clear_exception_mask 26522 = HOST_WIDE_INT_C (0xffffffff00000000); 26523 26524 tree fenv_clear = create_tmp_var_raw (double_type_node); 26525 26526 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear, 26527 call_mffs, NULL_TREE, NULL_TREE); 26528 26529 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear); 26530 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, 26531 fenv_clean_llu, 26532 build_int_cst (uint64_type_node, 26533 clear_exception_mask)); 26534 26535 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 26536 fenv_clear_llu_and); 26537 26538 tree clear_mtfsf = build_call_expr (mtfsf, 2, 26539 build_int_cst (unsigned_type_node, 0xff), 26540 fenv_clear_mtfsf); 26541 26542 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); 26543 26544 /* Generates the equivalent of feupdateenv (&fenv_var) 26545 26546 double old_fenv = __builtin_mffs (); 26547 double fenv_update; 26548 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | 26549 (*(uint64_t*)fenv_var 0x1ff80fff); 26550 __builtin_mtfsf (0xff, fenv_update); */ 26551 26552 const unsigned HOST_WIDE_INT update_exception_mask 26553 = HOST_WIDE_INT_C (0xffffffff1fffff00); 26554 const unsigned HOST_WIDE_INT new_exception_mask 26555 = HOST_WIDE_INT_C (0x1ff80fff); 26556 26557 tree old_fenv = create_tmp_var_raw (double_type_node); 26558 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv, 26559 call_mffs, NULL_TREE, NULL_TREE); 26560 26561 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv); 26562 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu, 26563 build_int_cst (uint64_type_node, 26564 update_exception_mask)); 26565 26566 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, 26567 build_int_cst (uint64_type_node, 26568 new_exception_mask)); 26569 26570 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, 26571 old_llu_and, new_llu_and); 26572 26573 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, 26574 new_llu_mask); 26575 26576 tree update_mtfsf = build_call_expr (mtfsf, 2, 26577 build_int_cst (unsigned_type_node, 0xff), 26578 fenv_update_mtfsf); 26579 26580 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); 26581} 26582 26583void 26584rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2) 26585{ 26586 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 26587 26588 rtx_tmp0 = gen_reg_rtx (V2DFmode); 26589 rtx_tmp1 = gen_reg_rtx (V2DFmode); 26590 26591 /* The destination of the vmrgew instruction layout is: 26592 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. 26593 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the 26594 vmrgew instruction will be correct. */ 26595 if (BYTES_BIG_ENDIAN) 26596 { 26597 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2, 26598 GEN_INT (0))); 26599 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2, 26600 GEN_INT (3))); 26601 } 26602 else 26603 { 26604 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3))); 26605 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0))); 26606 } 26607 26608 rtx_tmp2 = gen_reg_rtx (V4SFmode); 26609 rtx_tmp3 = gen_reg_rtx (V4SFmode); 26610 26611 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0)); 26612 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1)); 26613 26614 if (BYTES_BIG_ENDIAN) 26615 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); 26616 else 26617 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); 26618} 26619 26620void 26621rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2) 26622{ 26623 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 26624 26625 rtx_tmp0 = gen_reg_rtx (V2DImode); 26626 rtx_tmp1 = gen_reg_rtx (V2DImode); 26627 26628 /* The destination of the vmrgew instruction layout is: 26629 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0]. 26630 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the 26631 vmrgew instruction will be correct. */ 26632 if (BYTES_BIG_ENDIAN) 26633 { 26634 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0))); 26635 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3))); 26636 } 26637 else 26638 { 26639 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3))); 26640 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0))); 26641 } 26642 26643 rtx_tmp2 = gen_reg_rtx (V4SFmode); 26644 rtx_tmp3 = gen_reg_rtx (V4SFmode); 26645 26646 if (signed_convert) 26647 { 26648 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0)); 26649 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1)); 26650 } 26651 else 26652 { 26653 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0)); 26654 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1)); 26655 } 26656 26657 if (BYTES_BIG_ENDIAN) 26658 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3)); 26659 else 26660 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2)); 26661} 26662 26663void 26664rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1, 26665 rtx src2) 26666{ 26667 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3; 26668 26669 rtx_tmp0 = gen_reg_rtx (V2DFmode); 26670 rtx_tmp1 = gen_reg_rtx (V2DFmode); 26671 26672 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0))); 26673 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3))); 26674 26675 rtx_tmp2 = gen_reg_rtx (V4SImode); 26676 rtx_tmp3 = gen_reg_rtx (V4SImode); 26677 26678 if (signed_convert) 26679 { 26680 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0)); 26681 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1)); 26682 } 26683 else 26684 { 26685 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0)); 26686 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1)); 26687 } 26688 26689 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3)); 26690} 26691 26692/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ 26693 26694static bool 26695rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode, 26696 optimization_type opt_type) 26697{ 26698 switch (op) 26699 { 26700 case rsqrt_optab: 26701 return (opt_type == OPTIMIZE_FOR_SPEED 26702 && RS6000_RECIP_AUTO_RSQRTE_P (mode1)); 26703 26704 default: 26705 return true; 26706 } 26707} 26708 26709/* Implement TARGET_CONSTANT_ALIGNMENT. */ 26710 26711static HOST_WIDE_INT 26712rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align) 26713{ 26714 if (TREE_CODE (exp) == STRING_CST 26715 && (STRICT_ALIGNMENT || !optimize_size)) 26716 return MAX (align, BITS_PER_WORD); 26717 return align; 26718} 26719 26720/* Implement TARGET_STARTING_FRAME_OFFSET. */ 26721 26722static HOST_WIDE_INT 26723rs6000_starting_frame_offset (void) 26724{ 26725 if (FRAME_GROWS_DOWNWARD) 26726 return 0; 26727 return RS6000_STARTING_FRAME_OFFSET; 26728} 26729 26730 26731/* Create an alias for a mangled name where we have changed the mangling (in 26732 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called 26733 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */ 26734 26735#if TARGET_ELF && RS6000_WEAK 26736static void 26737rs6000_globalize_decl_name (FILE * stream, tree decl) 26738{ 26739 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); 26740 26741 targetm.asm_out.globalize_label (stream, name); 26742 26743 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z') 26744 { 26745 tree save_asm_name = DECL_ASSEMBLER_NAME (decl); 26746 const char *old_name; 26747 26748 ieee128_mangling_gcc_8_1 = true; 26749 lang_hooks.set_decl_assembler_name (decl); 26750 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 26751 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name); 26752 ieee128_mangling_gcc_8_1 = false; 26753 26754 if (strcmp (name, old_name) != 0) 26755 { 26756 fprintf (stream, "\t.weak %s\n", old_name); 26757 fprintf (stream, "\t.set %s,%s\n", old_name, name); 26758 } 26759 } 26760} 26761#endif 26762 26763 26764/* On 64-bit Linux and Freebsd systems, possibly switch the long double library 26765 function names from <foo>l to <foo>f128 if the default long double type is 26766 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h 26767 include file switches the names on systems that support long double as IEEE 26768 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly. 26769 In the future, glibc will export names like __ieee128_sinf128 and we can 26770 switch to using those instead of using sinf128, which pollutes the user's 26771 namespace. 26772 26773 This will switch the names for Fortran math functions as well (which doesn't 26774 use math.h). However, Fortran needs other changes to the compiler and 26775 library before you can switch the real*16 type at compile time. 26776 26777 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We 26778 only do this if the default is that long double is IBM extended double, and 26779 the user asked for IEEE 128-bit. */ 26780 26781static tree 26782rs6000_mangle_decl_assembler_name (tree decl, tree id) 26783{ 26784 if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 26785 && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) ) 26786 { 26787 size_t len = IDENTIFIER_LENGTH (id); 26788 const char *name = IDENTIFIER_POINTER (id); 26789 26790 if (name[len - 1] == 'l') 26791 { 26792 bool uses_ieee128_p = false; 26793 tree type = TREE_TYPE (decl); 26794 machine_mode ret_mode = TYPE_MODE (type); 26795 26796 /* See if the function returns a IEEE 128-bit floating point type or 26797 complex type. */ 26798 if (ret_mode == TFmode || ret_mode == TCmode) 26799 uses_ieee128_p = true; 26800 else 26801 { 26802 function_args_iterator args_iter; 26803 tree arg; 26804 26805 /* See if the function passes a IEEE 128-bit floating point type 26806 or complex type. */ 26807 FOREACH_FUNCTION_ARGS (type, arg, args_iter) 26808 { 26809 machine_mode arg_mode = TYPE_MODE (arg); 26810 if (arg_mode == TFmode || arg_mode == TCmode) 26811 { 26812 uses_ieee128_p = true; 26813 break; 26814 } 26815 } 26816 } 26817 26818 /* If we passed or returned an IEEE 128-bit floating point type, 26819 change the name. */ 26820 if (uses_ieee128_p) 26821 { 26822 char *name2 = (char *) alloca (len + 4); 26823 memcpy (name2, name, len - 1); 26824 strcpy (name2 + len - 1, "f128"); 26825 id = get_identifier (name2); 26826 } 26827 } 26828 } 26829 26830 return id; 26831} 26832 26833/* Predict whether the given loop in gimple will be transformed in the RTL 26834 doloop_optimize pass. */ 26835 26836static bool 26837rs6000_predict_doloop_p (struct loop *loop) 26838{ 26839 gcc_assert (loop); 26840 26841 /* On rs6000, targetm.can_use_doloop_p is actually 26842 can_use_doloop_if_innermost. Just ensure the loop is innermost. */ 26843 if (loop->inner != NULL) 26844 { 26845 if (dump_file && (dump_flags & TDF_DETAILS)) 26846 fprintf (dump_file, "Predict doloop failure due to" 26847 " loop nesting.\n"); 26848 return false; 26849 } 26850 26851 return true; 26852} 26853 26854/* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */ 26855 26856static bool 26857rs6000_cannot_substitute_mem_equiv_p (rtx mem) 26858{ 26859 gcc_assert (MEM_P (mem)); 26860 26861 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND: 26862 type addresses, so don't allow MEMs with those address types to be 26863 substituted as an equivalent expression. See PR93974 for details. */ 26864 if (GET_CODE (XEXP (mem, 0)) == AND) 26865 return true; 26866 26867 return false; 26868} 26869 26870/* Implement TARGET_INVALID_CONVERSION. */ 26871 26872static const char * 26873rs6000_invalid_conversion (const_tree fromtype, const_tree totype) 26874{ 26875 /* Make sure we're working with the canonical types. */ 26876 if (TYPE_CANONICAL (fromtype) != NULL_TREE) 26877 fromtype = TYPE_CANONICAL (fromtype); 26878 if (TYPE_CANONICAL (totype) != NULL_TREE) 26879 totype = TYPE_CANONICAL (totype); 26880 26881 machine_mode frommode = TYPE_MODE (fromtype); 26882 machine_mode tomode = TYPE_MODE (totype); 26883 26884 if (frommode != tomode) 26885 { 26886 /* Do not allow conversions to/from PXImode and POImode types. */ 26887 if (frommode == PXImode) 26888 return N_("invalid conversion from type %<__vector_quad%>"); 26889 if (tomode == PXImode) 26890 return N_("invalid conversion to type %<__vector_quad%>"); 26891 if (frommode == POImode) 26892 return N_("invalid conversion from type %<__vector_pair%>"); 26893 if (tomode == POImode) 26894 return N_("invalid conversion to type %<__vector_pair%>"); 26895 } 26896 26897 /* Conversion allowed. */ 26898 return NULL; 26899} 26900 26901/* If the given TYPE is one MMA opaque type, emit the corresponding 26902 error messages and return true, otherwise return false. */ 26903 26904static inline bool 26905check_and_error_invalid_use (tree type) 26906{ 26907 tree mv = TYPE_MAIN_VARIANT (type); 26908 if (mv == vector_quad_type_node) 26909 { 26910 error ("type %<__vector_quad%> requires the %qs option", "-mmma"); 26911 return true; 26912 } 26913 else if (mv == vector_pair_type_node) 26914 { 26915 error ("type %<__vector_pair%> requires the %qs option", "-mmma"); 26916 return true; 26917 } 26918 return false; 26919} 26920 26921/* Now we have only two opaque types, they are __vector_quad and 26922 __vector_pair built-in types. They are target specific and 26923 only available when MMA is supported. With MMA supported, it 26924 simply returns true, otherwise it checks if the given gimple 26925 STMT is an assignment, asm or call stmt and uses either of 26926 these two opaque types unexpectedly, if yes, it would raise 26927 an error message and returns true, otherwise it returns false. */ 26928 26929bool 26930rs6000_opaque_type_invalid_use_p (gimple *stmt) 26931{ 26932 if (TARGET_MMA) 26933 return false; 26934 26935 if (stmt) 26936 { 26937 /* The usage of MMA opaque types is very limited for now, 26938 to check with gassign, gasm and gcall is enough so far. */ 26939 if (gassign *ga = dyn_cast<gassign *> (stmt)) 26940 { 26941 tree lhs = gimple_assign_lhs (ga); 26942 tree type = TREE_TYPE (lhs); 26943 if (check_and_error_invalid_use (type)) 26944 return true; 26945 } 26946 else if (gasm *gs = dyn_cast<gasm *> (stmt)) 26947 { 26948 unsigned ninputs = gimple_asm_ninputs (gs); 26949 for (unsigned i = 0; i < ninputs; i++) 26950 { 26951 tree op = gimple_asm_input_op (gs, i); 26952 tree val = TREE_VALUE (op); 26953 tree type = TREE_TYPE (val); 26954 if (check_and_error_invalid_use (type)) 26955 return true; 26956 } 26957 unsigned noutputs = gimple_asm_noutputs (gs); 26958 for (unsigned i = 0; i < noutputs; i++) 26959 { 26960 tree op = gimple_asm_output_op (gs, i); 26961 tree val = TREE_VALUE (op); 26962 tree type = TREE_TYPE (val); 26963 if (check_and_error_invalid_use (type)) 26964 return true; 26965 } 26966 } 26967 else if (gcall *gc = dyn_cast<gcall *> (stmt)) 26968 { 26969 unsigned nargs = gimple_call_num_args (gc); 26970 for (unsigned i = 0; i < nargs; i++) 26971 { 26972 tree arg = gimple_call_arg (gc, i); 26973 tree type = TREE_TYPE (arg); 26974 if (check_and_error_invalid_use (type)) 26975 return true; 26976 } 26977 } 26978 } 26979 26980 return false; 26981} 26982 26983struct gcc_target targetm = TARGET_INITIALIZER; 26984 26985#include "gt-rs6000.h" 26986