1/* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988-2020 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation; either version 3, or (at your option) 9any later version. 10 11GCC is distributed in the hope that it will be useful, 12but WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14GNU General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20#define IN_TARGET_CODE 1 21 22#include "config.h" 23#include "system.h" 24#include "coretypes.h" 25#include "backend.h" 26#include "rtl.h" 27#include "tree.h" 28#include "memmodel.h" 29#include "gimple.h" 30#include "cfghooks.h" 31#include "cfgloop.h" 32#include "df.h" 33#include "tm_p.h" 34#include "stringpool.h" 35#include "expmed.h" 36#include "optabs.h" 37#include "regs.h" 38#include "emit-rtl.h" 39#include "recog.h" 40#include "cgraph.h" 41#include "diagnostic.h" 42#include "cfgbuild.h" 43#include "alias.h" 44#include "fold-const.h" 45#include "attribs.h" 46#include "calls.h" 47#include "stor-layout.h" 48#include "varasm.h" 49#include "output.h" 50#include "insn-attr.h" 51#include "flags.h" 52#include "except.h" 53#include "explow.h" 54#include "expr.h" 55#include "cfgrtl.h" 56#include "common/common-target.h" 57#include "langhooks.h" 58#include "reload.h" 59#include "gimplify.h" 60#include "dwarf2.h" 61#include "tm-constrs.h" 62#include "cselib.h" 63#include "sched-int.h" 64#include "opts.h" 65#include "tree-pass.h" 66#include "context.h" 67#include "pass_manager.h" 68#include "target-globals.h" 69#include "gimple-iterator.h" 70#include "tree-vectorizer.h" 71#include "shrink-wrap.h" 72#include "builtins.h" 73#include "rtl-iter.h" 74#include "tree-iterator.h" 75#include "dbgcnt.h" 76#include "case-cfn-macros.h" 77#include "dojump.h" 78#include "fold-const-call.h" 79#include "tree-vrp.h" 80#include "tree-ssanames.h" 81#include "selftest.h" 82#include "selftest-rtl.h" 83#include "print-rtl.h" 84#include "intl.h" 85#include "ifcvt.h" 86#include "symbol-summary.h" 87#include "ipa-prop.h" 88#include "ipa-fnsummary.h" 89#include "wide-int-bitmask.h" 90#include "tree-vector-builder.h" 91#include "debug.h" 92#include "dwarf2out.h" 93#include "i386-options.h" 94#include "i386-builtins.h" 95#include "i386-expand.h" 96#include "i386-features.h" 97#include "function-abi.h" 98 99/* This file should be included last. */ 100#include "target-def.h" 101 102static rtx legitimize_dllimport_symbol (rtx, bool); 103static rtx legitimize_pe_coff_extern_decl (rtx, bool); 104static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); 105static void ix86_emit_restore_reg_using_pop (rtx); 106 107 108#ifndef CHECK_STACK_LIMIT 109#define CHECK_STACK_LIMIT (-1) 110#endif 111 112/* Return index of given mode in mult and division cost tables. */ 113#define MODE_INDEX(mode) \ 114 ((mode) == QImode ? 0 \ 115 : (mode) == HImode ? 1 \ 116 : (mode) == SImode ? 2 \ 117 : (mode) == DImode ? 3 \ 118 : 4) 119 120 121/* Set by -mtune. */ 122const struct processor_costs *ix86_tune_cost = NULL; 123 124/* Set by -mtune or -Os. */ 125const struct processor_costs *ix86_cost = NULL; 126 127/* In case the average insn count for single function invocation is 128 lower than this constant, emit fast (but longer) prologue and 129 epilogue code. */ 130#define FAST_PROLOGUE_INSN_COUNT 20 131 132/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 133static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 134static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 135static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 136 137/* Array of the smallest class containing reg number REGNO, indexed by 138 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 139 140enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 141{ 142 /* ax, dx, cx, bx */ 143 AREG, DREG, CREG, BREG, 144 /* si, di, bp, sp */ 145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 146 /* FP registers */ 147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 149 /* arg pointer, flags, fpsr, frame */ 150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 151 /* SSE registers */ 152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, 153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 154 /* MMX registers */ 155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 157 /* REX registers */ 158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, 160 /* SSE REX registers */ 161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 163 /* AVX-512 SSE registers */ 164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, 165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, 166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, 167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, 168 /* Mask registers. */ 169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, 170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS 171}; 172 173/* The "default" register map used in 32bit mode. */ 174 175int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 176{ 177 /* general regs */ 178 0, 2, 1, 3, 6, 7, 4, 5, 179 /* fp regs */ 180 12, 13, 14, 15, 16, 17, 18, 19, 181 /* arg, flags, fpsr, frame */ 182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, 183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, 184 /* SSE */ 185 21, 22, 23, 24, 25, 26, 27, 28, 186 /* MMX */ 187 29, 30, 31, 32, 33, 34, 35, 36, 188 /* extended integer registers */ 189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 191 /* extended sse registers */ 192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 194 /* AVX-512 registers 16-23 */ 195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 197 /* AVX-512 registers 24-31 */ 198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 200 /* Mask registers */ 201 93, 94, 95, 96, 97, 98, 99, 100 202}; 203 204/* The "default" register map used in 64bit mode. */ 205 206int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 207{ 208 /* general regs */ 209 0, 1, 2, 3, 4, 5, 6, 7, 210 /* fp regs */ 211 33, 34, 35, 36, 37, 38, 39, 40, 212 /* arg, flags, fpsr, frame */ 213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, 214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, 215 /* SSE */ 216 17, 18, 19, 20, 21, 22, 23, 24, 217 /* MMX */ 218 41, 42, 43, 44, 45, 46, 47, 48, 219 /* extended integer registers */ 220 8, 9, 10, 11, 12, 13, 14, 15, 221 /* extended SSE registers */ 222 25, 26, 27, 28, 29, 30, 31, 32, 223 /* AVX-512 registers 16-23 */ 224 67, 68, 69, 70, 71, 72, 73, 74, 225 /* AVX-512 registers 24-31 */ 226 75, 76, 77, 78, 79, 80, 81, 82, 227 /* Mask registers */ 228 118, 119, 120, 121, 122, 123, 124, 125 229}; 230 231/* Define the register numbers to be used in Dwarf debugging information. 232 The SVR4 reference port C compiler uses the following register numbers 233 in its Dwarf output code: 234 0 for %eax (gcc regno = 0) 235 1 for %ecx (gcc regno = 2) 236 2 for %edx (gcc regno = 1) 237 3 for %ebx (gcc regno = 3) 238 4 for %esp (gcc regno = 7) 239 5 for %ebp (gcc regno = 6) 240 6 for %esi (gcc regno = 4) 241 7 for %edi (gcc regno = 5) 242 The following three DWARF register numbers are never generated by 243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 244 believed these numbers have these meanings. 245 8 for %eip (no gcc equivalent) 246 9 for %eflags (gcc regno = 17) 247 10 for %trapno (no gcc equivalent) 248 It is not at all clear how we should number the FP stack registers 249 for the x86 architecture. If the version of SDB on x86/svr4 were 250 a bit less brain dead with respect to floating-point then we would 251 have a precedent to follow with respect to DWARF register numbers 252 for x86 FP registers, but the SDB on x86/svr4 was so completely 253 broken with respect to FP registers that it is hardly worth thinking 254 of it as something to strive for compatibility with. 255 The version of x86/svr4 SDB I had does (partially) 256 seem to believe that DWARF register number 11 is associated with 257 the x86 register %st(0), but that's about all. Higher DWARF 258 register numbers don't seem to be associated with anything in 259 particular, and even for DWARF regno 11, SDB only seemed to under- 260 stand that it should say that a variable lives in %st(0) (when 261 asked via an `=' command) if we said it was in DWARF regno 11, 262 but SDB still printed garbage when asked for the value of the 263 variable in question (via a `/' command). 264 (Also note that the labels SDB printed for various FP stack regs 265 when doing an `x' command were all wrong.) 266 Note that these problems generally don't affect the native SVR4 267 C compiler because it doesn't allow the use of -O with -g and 268 because when it is *not* optimizing, it allocates a memory 269 location for each floating-point variable, and the memory 270 location is what gets described in the DWARF AT_location 271 attribute for the variable in question. 272 Regardless of the severe mental illness of the x86/svr4 SDB, we 273 do something sensible here and we use the following DWARF 274 register numbers. Note that these are all stack-top-relative 275 numbers. 276 11 for %st(0) (gcc regno = 8) 277 12 for %st(1) (gcc regno = 9) 278 13 for %st(2) (gcc regno = 10) 279 14 for %st(3) (gcc regno = 11) 280 15 for %st(4) (gcc regno = 12) 281 16 for %st(5) (gcc regno = 13) 282 17 for %st(6) (gcc regno = 14) 283 18 for %st(7) (gcc regno = 15) 284*/ 285int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 286{ 287 /* general regs */ 288 0, 2, 1, 3, 6, 7, 5, 4, 289 /* fp regs */ 290 11, 12, 13, 14, 15, 16, 17, 18, 291 /* arg, flags, fpsr, frame */ 292 IGNORED_DWARF_REGNUM, 9, 293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, 294 /* SSE registers */ 295 21, 22, 23, 24, 25, 26, 27, 28, 296 /* MMX registers */ 297 29, 30, 31, 32, 33, 34, 35, 36, 298 /* extended integer registers */ 299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 301 /* extended sse registers */ 302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 304 /* AVX-512 registers 16-23 */ 305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 307 /* AVX-512 registers 24-31 */ 308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, 310 /* Mask registers */ 311 93, 94, 95, 96, 97, 98, 99, 100 312}; 313 314/* Define parameter passing and return registers. */ 315 316static int const x86_64_int_parameter_registers[6] = 317{ 318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG 319}; 320 321static int const x86_64_ms_abi_int_parameter_registers[4] = 322{ 323 CX_REG, DX_REG, R8_REG, R9_REG 324}; 325 326static int const x86_64_int_return_registers[4] = 327{ 328 AX_REG, DX_REG, DI_REG, SI_REG 329}; 330 331/* Define the structure for the machine field in struct function. */ 332 333struct GTY(()) stack_local_entry { 334 unsigned short mode; 335 unsigned short n; 336 rtx rtl; 337 struct stack_local_entry *next; 338}; 339 340/* Which cpu are we scheduling for. */ 341enum attr_cpu ix86_schedule; 342 343/* Which cpu are we optimizing for. */ 344enum processor_type ix86_tune; 345 346/* Which instruction set architecture to use. */ 347enum processor_type ix86_arch; 348 349/* True if processor has SSE prefetch instruction. */ 350unsigned char x86_prefetch_sse; 351 352/* Preferred alignment for stack boundary in bits. */ 353unsigned int ix86_preferred_stack_boundary; 354 355/* Alignment for incoming stack boundary in bits specified at 356 command line. */ 357unsigned int ix86_user_incoming_stack_boundary; 358 359/* Default alignment for incoming stack boundary in bits. */ 360unsigned int ix86_default_incoming_stack_boundary; 361 362/* Alignment for incoming stack boundary in bits. */ 363unsigned int ix86_incoming_stack_boundary; 364 365/* Calling abi specific va_list type nodes. */ 366tree sysv_va_list_type_node; 367tree ms_va_list_type_node; 368 369/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 370char internal_label_prefix[16]; 371int internal_label_prefix_len; 372 373/* Fence to use after loop using movnt. */ 374tree x86_mfence; 375 376/* Register class used for passing given 64bit part of the argument. 377 These represent classes as documented by the PS ABI, with the exception 378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 379 use SF or DFmode move instead of DImode to avoid reformatting penalties. 380 381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 382 whenever possible (upper half does contain padding). */ 383enum x86_64_reg_class 384 { 385 X86_64_NO_CLASS, 386 X86_64_INTEGER_CLASS, 387 X86_64_INTEGERSI_CLASS, 388 X86_64_SSE_CLASS, 389 X86_64_SSESF_CLASS, 390 X86_64_SSEDF_CLASS, 391 X86_64_SSEUP_CLASS, 392 X86_64_X87_CLASS, 393 X86_64_X87UP_CLASS, 394 X86_64_COMPLEX_X87_CLASS, 395 X86_64_MEMORY_CLASS 396 }; 397 398#define MAX_CLASSES 8 399 400/* Table of constants used by fldpi, fldln2, etc.... */ 401static REAL_VALUE_TYPE ext_80387_constants_table [5]; 402static bool ext_80387_constants_init; 403 404 405static rtx ix86_function_value (const_tree, const_tree, bool); 406static bool ix86_function_value_regno_p (const unsigned int); 407static unsigned int ix86_function_arg_boundary (machine_mode, 408 const_tree); 409static rtx ix86_static_chain (const_tree, bool); 410static int ix86_function_regparm (const_tree, const_tree); 411static void ix86_compute_frame_layout (void); 412static tree ix86_canonical_va_list_type (tree); 413static unsigned int split_stack_prologue_scratch_regno (void); 414static bool i386_asm_output_addr_const_extra (FILE *, rtx); 415 416static bool ix86_can_inline_p (tree, tree); 417static unsigned int ix86_minimum_incoming_stack_boundary (bool); 418 419 420/* Whether -mtune= or -march= were specified */ 421int ix86_tune_defaulted; 422int ix86_arch_specified; 423 424/* Return true if a red-zone is in use. We can't use red-zone when 425 there are local indirect jumps, like "indirect_jump" or "tablejump", 426 which jumps to another place in the function, since "call" in the 427 indirect thunk pushes the return address onto stack, destroying 428 red-zone. 429 430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another 431 for CALL, in red-zone, we can allow local indirect jumps with 432 indirect thunk. */ 433 434bool 435ix86_using_red_zone (void) 436{ 437 return (TARGET_RED_ZONE 438 && !TARGET_64BIT_MS_ABI 439 && (!cfun->machine->has_local_indirect_jump 440 || cfun->machine->indirect_branch_type == indirect_branch_keep)); 441} 442 443/* Return true, if profiling code should be emitted before 444 prologue. Otherwise it returns false. 445 Note: For x86 with "hotfix" it is sorried. */ 446static bool 447ix86_profile_before_prologue (void) 448{ 449 return flag_fentry != 0; 450} 451 452/* Update register usage after having seen the compiler flags. */ 453 454static void 455ix86_conditional_register_usage (void) 456{ 457 int i, c_mask; 458 459 /* If there are no caller-saved registers, preserve all registers. 460 except fixed_regs and registers used for function return value 461 since aggregate_value_p checks call_used_regs[regno] on return 462 value. */ 463 if (cfun && cfun->machine->no_caller_saved_registers) 464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) 466 call_used_regs[i] = 0; 467 468 /* For 32-bit targets, disable the REX registers. */ 469 if (! TARGET_64BIT) 470 { 471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) 472 CLEAR_HARD_REG_BIT (accessible_reg_set, i); 473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 474 CLEAR_HARD_REG_BIT (accessible_reg_set, i); 475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) 476 CLEAR_HARD_REG_BIT (accessible_reg_set, i); 477 } 478 479 /* See the definition of CALL_USED_REGISTERS in i386.h. */ 480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); 481 482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); 483 484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 485 { 486 /* Set/reset conditionally defined registers from 487 CALL_USED_REGISTERS initializer. */ 488 if (call_used_regs[i] > 1) 489 call_used_regs[i] = !!(call_used_regs[i] & c_mask); 490 491 /* Calculate registers of CLOBBERED_REGS register set 492 as call used registers from GENERAL_REGS register set. */ 493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) 494 && call_used_regs[i]) 495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); 496 } 497 498 /* If MMX is disabled, disable the registers. */ 499 if (! TARGET_MMX) 500 accessible_reg_set &= ~reg_class_contents[MMX_REGS]; 501 502 /* If SSE is disabled, disable the registers. */ 503 if (! TARGET_SSE) 504 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; 505 506 /* If the FPU is disabled, disable the registers. */ 507 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) 508 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; 509 510 /* If AVX512F is disabled, disable the registers. */ 511 if (! TARGET_AVX512F) 512 { 513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) 514 CLEAR_HARD_REG_BIT (accessible_reg_set, i); 515 516 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; 517 } 518} 519 520/* Canonicalize a comparison from one we don't have to one we do have. */ 521 522static void 523ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, 524 bool op0_preserve_value) 525{ 526 /* The order of operands in x87 ficom compare is forced by combine in 527 simplify_comparison () function. Float operator is treated as RTX_OBJ 528 with a precedence over other operators and is always put in the first 529 place. Swap condition and operands to match ficom instruction. */ 530 if (!op0_preserve_value 531 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) 532 { 533 enum rtx_code scode = swap_condition ((enum rtx_code) *code); 534 535 /* We are called only for compares that are split to SAHF instruction. 536 Ensure that we have setcc/jcc insn for the swapped condition. */ 537 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) 538 { 539 std::swap (*op0, *op1); 540 *code = (int) scode; 541 } 542 } 543} 544 545 546/* Hook to determine if one function can safely inline another. */ 547 548static bool 549ix86_can_inline_p (tree caller, tree callee) 550{ 551 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 553 554 /* Changes of those flags can be tolerated for always inlines. Lets hope 555 user knows what he is doing. */ 556 const unsigned HOST_WIDE_INT always_inline_safe_mask 557 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS 558 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD 559 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD 560 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS 561 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE 562 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER 563 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); 564 565 566 if (!callee_tree) 567 callee_tree = target_option_default_node; 568 if (!caller_tree) 569 caller_tree = target_option_default_node; 570 if (callee_tree == caller_tree) 571 return true; 572 573 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); 574 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 575 bool ret = false; 576 bool always_inline 577 = (DECL_DISREGARD_INLINE_LIMITS (callee) 578 && lookup_attribute ("always_inline", 579 DECL_ATTRIBUTES (callee))); 580 581 cgraph_node *callee_node = cgraph_node::get (callee); 582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 583 function can inline a SSE2 function but a SSE2 function can't inline 584 a SSE4 function. */ 585 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) 586 != callee_opts->x_ix86_isa_flags) 587 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) 588 != callee_opts->x_ix86_isa_flags2)) 589 ret = false; 590 591 /* See if we have the same non-isa options. */ 592 else if ((!always_inline 593 && caller_opts->x_target_flags != callee_opts->x_target_flags) 594 || (caller_opts->x_target_flags & ~always_inline_safe_mask) 595 != (callee_opts->x_target_flags & ~always_inline_safe_mask)) 596 ret = false; 597 598 /* See if arch, tune, etc. are the same. */ 599 else if (caller_opts->arch != callee_opts->arch) 600 ret = false; 601 602 else if (!always_inline && caller_opts->tune != callee_opts->tune) 603 ret = false; 604 605 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath 606 /* If the calle doesn't use FP expressions differences in 607 ix86_fpmath can be ignored. We are called from FEs 608 for multi-versioning call optimization, so beware of 609 ipa_fn_summaries not available. */ 610 && (! ipa_fn_summaries 611 || ipa_fn_summaries->get (callee_node) == NULL 612 || ipa_fn_summaries->get (callee_node)->fp_expressions)) 613 ret = false; 614 615 else if (!always_inline 616 && caller_opts->branch_cost != callee_opts->branch_cost) 617 ret = false; 618 619 else 620 ret = true; 621 622 return ret; 623} 624 625/* Return true if this goes in large data/bss. */ 626 627static bool 628ix86_in_large_data_p (tree exp) 629{ 630 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 631 return false; 632 633 if (exp == NULL_TREE) 634 return false; 635 636 /* Functions are never large data. */ 637 if (TREE_CODE (exp) == FUNCTION_DECL) 638 return false; 639 640 /* Automatic variables are never large data. */ 641 if (VAR_P (exp) && !is_global_var (exp)) 642 return false; 643 644 if (VAR_P (exp) && DECL_SECTION_NAME (exp)) 645 { 646 const char *section = DECL_SECTION_NAME (exp); 647 if (strcmp (section, ".ldata") == 0 648 || strcmp (section, ".lbss") == 0) 649 return true; 650 return false; 651 } 652 else 653 { 654 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 655 656 /* If this is an incomplete type with size 0, then we can't put it 657 in data because it might be too big when completed. Also, 658 int_size_in_bytes returns -1 if size can vary or is larger than 659 an integer in which case also it is safer to assume that it goes in 660 large data. */ 661 if (size <= 0 || size > ix86_section_threshold) 662 return true; 663 } 664 665 return false; 666} 667 668/* i386-specific section flag to mark large sections. */ 669#define SECTION_LARGE SECTION_MACH_DEP 670 671/* Switch to the appropriate section for output of DECL. 672 DECL is either a `VAR_DECL' node or a constant of some sort. 673 RELOC indicates whether forming the initial value of DECL requires 674 link-time relocations. */ 675 676ATTRIBUTE_UNUSED static section * 677x86_64_elf_select_section (tree decl, int reloc, 678 unsigned HOST_WIDE_INT align) 679{ 680 if (ix86_in_large_data_p (decl)) 681 { 682 const char *sname = NULL; 683 unsigned int flags = SECTION_WRITE | SECTION_LARGE; 684 switch (categorize_decl_for_section (decl, reloc)) 685 { 686 case SECCAT_DATA: 687 sname = ".ldata"; 688 break; 689 case SECCAT_DATA_REL: 690 sname = ".ldata.rel"; 691 break; 692 case SECCAT_DATA_REL_LOCAL: 693 sname = ".ldata.rel.local"; 694 break; 695 case SECCAT_DATA_REL_RO: 696 sname = ".ldata.rel.ro"; 697 break; 698 case SECCAT_DATA_REL_RO_LOCAL: 699 sname = ".ldata.rel.ro.local"; 700 break; 701 case SECCAT_BSS: 702 sname = ".lbss"; 703 flags |= SECTION_BSS; 704 break; 705 case SECCAT_RODATA: 706 case SECCAT_RODATA_MERGE_STR: 707 case SECCAT_RODATA_MERGE_STR_INIT: 708 case SECCAT_RODATA_MERGE_CONST: 709 sname = ".lrodata"; 710 flags &= ~SECTION_WRITE; 711 break; 712 case SECCAT_SRODATA: 713 case SECCAT_SDATA: 714 case SECCAT_SBSS: 715 gcc_unreachable (); 716 case SECCAT_TEXT: 717 case SECCAT_TDATA: 718 case SECCAT_TBSS: 719 /* We don't split these for medium model. Place them into 720 default sections and hope for best. */ 721 break; 722 } 723 if (sname) 724 { 725 /* We might get called with string constants, but get_named_section 726 doesn't like them as they are not DECLs. Also, we need to set 727 flags in that case. */ 728 if (!DECL_P (decl)) 729 return get_section (sname, flags, NULL); 730 return get_named_section (decl, sname, reloc); 731 } 732 } 733 return default_elf_select_section (decl, reloc, align); 734} 735 736/* Select a set of attributes for section NAME based on the properties 737 of DECL and whether or not RELOC indicates that DECL's initializer 738 might contain runtime relocations. */ 739 740static unsigned int ATTRIBUTE_UNUSED 741x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) 742{ 743 unsigned int flags = default_section_type_flags (decl, name, reloc); 744 745 if (ix86_in_large_data_p (decl)) 746 flags |= SECTION_LARGE; 747 748 if (decl == NULL_TREE 749 && (strcmp (name, ".ldata.rel.ro") == 0 750 || strcmp (name, ".ldata.rel.ro.local") == 0)) 751 flags |= SECTION_RELRO; 752 753 if (strcmp (name, ".lbss") == 0 754 || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0 755 || strncmp (name, ".gnu.linkonce.lb.", 756 sizeof (".gnu.linkonce.lb.") - 1) == 0) 757 flags |= SECTION_BSS; 758 759 return flags; 760} 761 762/* Build up a unique section name, expressed as a 763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 764 RELOC indicates whether the initial value of EXP requires 765 link-time relocations. */ 766 767static void ATTRIBUTE_UNUSED 768x86_64_elf_unique_section (tree decl, int reloc) 769{ 770 if (ix86_in_large_data_p (decl)) 771 { 772 const char *prefix = NULL; 773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 774 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; 775 776 switch (categorize_decl_for_section (decl, reloc)) 777 { 778 case SECCAT_DATA: 779 case SECCAT_DATA_REL: 780 case SECCAT_DATA_REL_LOCAL: 781 case SECCAT_DATA_REL_RO: 782 case SECCAT_DATA_REL_RO_LOCAL: 783 prefix = one_only ? ".ld" : ".ldata"; 784 break; 785 case SECCAT_BSS: 786 prefix = one_only ? ".lb" : ".lbss"; 787 break; 788 case SECCAT_RODATA: 789 case SECCAT_RODATA_MERGE_STR: 790 case SECCAT_RODATA_MERGE_STR_INIT: 791 case SECCAT_RODATA_MERGE_CONST: 792 prefix = one_only ? ".lr" : ".lrodata"; 793 break; 794 case SECCAT_SRODATA: 795 case SECCAT_SDATA: 796 case SECCAT_SBSS: 797 gcc_unreachable (); 798 case SECCAT_TEXT: 799 case SECCAT_TDATA: 800 case SECCAT_TBSS: 801 /* We don't split these for medium model. Place them into 802 default sections and hope for best. */ 803 break; 804 } 805 if (prefix) 806 { 807 const char *name, *linkonce; 808 char *string; 809 810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 811 name = targetm.strip_name_encoding (name); 812 813 /* If we're using one_only, then there needs to be a .gnu.linkonce 814 prefix to the section name. */ 815 linkonce = one_only ? ".gnu.linkonce" : ""; 816 817 string = ACONCAT ((linkonce, prefix, ".", name, NULL)); 818 819 set_decl_section_name (decl, string); 820 return; 821 } 822 } 823 default_unique_section (decl, reloc); 824} 825 826#ifdef COMMON_ASM_OP 827 828#ifndef LARGECOMM_SECTION_ASM_OP 829#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" 830#endif 831 832/* This says how to output assembler code to declare an 833 uninitialized external linkage data object. 834 835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for 836 large objects. */ 837void 838x86_elf_aligned_decl_common (FILE *file, tree decl, 839 const char *name, unsigned HOST_WIDE_INT size, 840 int align) 841{ 842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 843 && size > (unsigned int)ix86_section_threshold) 844 { 845 switch_to_section (get_named_section (decl, ".lbss", 0)); 846 fputs (LARGECOMM_SECTION_ASM_OP, file); 847 } 848 else 849 fputs (COMMON_ASM_OP, file); 850 assemble_name (file, name); 851 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", 852 size, align / BITS_PER_UNIT); 853} 854#endif 855 856/* Utility function for targets to use in implementing 857 ASM_OUTPUT_ALIGNED_BSS. */ 858 859void 860x86_output_aligned_bss (FILE *file, tree decl, const char *name, 861 unsigned HOST_WIDE_INT size, int align) 862{ 863 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 864 && size > (unsigned int)ix86_section_threshold) 865 switch_to_section (get_named_section (decl, ".lbss", 0)); 866 else 867 switch_to_section (bss_section); 868 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 869#ifdef ASM_DECLARE_OBJECT_NAME 870 last_assemble_variable_decl = decl; 871 ASM_DECLARE_OBJECT_NAME (file, name, decl); 872#else 873 /* Standard thing is just output label for the object. */ 874 ASM_OUTPUT_LABEL (file, name); 875#endif /* ASM_DECLARE_OBJECT_NAME */ 876 ASM_OUTPUT_SKIP (file, size ? size : 1); 877} 878 879/* Decide whether we must probe the stack before any space allocation 880 on this target. It's essentially TARGET_STACK_PROBE except when 881 -fstack-check causes the stack to be already probed differently. */ 882 883bool 884ix86_target_stack_probe (void) 885{ 886 /* Do not probe the stack twice if static stack checking is enabled. */ 887 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 888 return false; 889 890 return TARGET_STACK_PROBE; 891} 892 893/* Decide whether we can make a sibling call to a function. DECL is the 894 declaration of the function being targeted by the call and EXP is the 895 CALL_EXPR representing the call. */ 896 897static bool 898ix86_function_ok_for_sibcall (tree decl, tree exp) 899{ 900 tree type, decl_or_type; 901 rtx a, b; 902 bool bind_global = decl && !targetm.binds_local_p (decl); 903 904 if (ix86_function_naked (current_function_decl)) 905 return false; 906 907 /* Sibling call isn't OK if there are no caller-saved registers 908 since all registers must be preserved before return. */ 909 if (cfun->machine->no_caller_saved_registers) 910 return false; 911 912 /* If we are generating position-independent code, we cannot sibcall 913 optimize direct calls to global functions, as the PLT requires 914 %ebx be live. (Darwin does not have a PLT.) */ 915 if (!TARGET_MACHO 916 && !TARGET_64BIT 917 && flag_pic 918 && flag_plt 919 && bind_global) 920 return false; 921 922 /* If we need to align the outgoing stack, then sibcalling would 923 unalign the stack, which may break the called function. */ 924 if (ix86_minimum_incoming_stack_boundary (true) 925 < PREFERRED_STACK_BOUNDARY) 926 return false; 927 928 if (decl) 929 { 930 decl_or_type = decl; 931 type = TREE_TYPE (decl); 932 } 933 else 934 { 935 /* We're looking at the CALL_EXPR, we need the type of the function. */ 936 type = CALL_EXPR_FN (exp); /* pointer expression */ 937 type = TREE_TYPE (type); /* pointer type */ 938 type = TREE_TYPE (type); /* function type */ 939 decl_or_type = type; 940 } 941 942 /* Check that the return value locations are the same. Like 943 if we are returning floats on the 80387 register stack, we cannot 944 make a sibcall from a function that doesn't return a float to a 945 function that does or, conversely, from a function that does return 946 a float to a function that doesn't; the necessary stack adjustment 947 would not be executed. This is also the place we notice 948 differences in the return value ABI. Note that it is ok for one 949 of the functions to have void return type as long as the return 950 value of the other is passed in a register. */ 951 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); 952 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 953 cfun->decl, false); 954 if (STACK_REG_P (a) || STACK_REG_P (b)) 955 { 956 if (!rtx_equal_p (a, b)) 957 return false; 958 } 959 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 960 ; 961 else if (!rtx_equal_p (a, b)) 962 return false; 963 964 if (TARGET_64BIT) 965 { 966 /* The SYSV ABI has more call-clobbered registers; 967 disallow sibcalls from MS to SYSV. */ 968 if (cfun->machine->call_abi == MS_ABI 969 && ix86_function_type_abi (type) == SYSV_ABI) 970 return false; 971 } 972 else 973 { 974 /* If this call is indirect, we'll need to be able to use a 975 call-clobbered register for the address of the target function. 976 Make sure that all such registers are not used for passing 977 parameters. Note that DLLIMPORT functions and call to global 978 function via GOT slot are indirect. */ 979 if (!decl 980 || (bind_global && flag_pic && !flag_plt) 981 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) 982 || flag_force_indirect_call) 983 { 984 /* Check if regparm >= 3 since arg_reg_available is set to 985 false if regparm == 0. If regparm is 1 or 2, there is 986 always a call-clobbered register available. 987 988 ??? The symbol indirect call doesn't need a call-clobbered 989 register. But we don't know if this is a symbol indirect 990 call or not here. */ 991 if (ix86_function_regparm (type, decl) >= 3 992 && !cfun->machine->arg_reg_available) 993 return false; 994 } 995 } 996 997 /* Otherwise okay. That also includes certain types of indirect calls. */ 998 return true; 999} 1000 1001/* This function determines from TYPE the calling-convention. */ 1002 1003unsigned int 1004ix86_get_callcvt (const_tree type) 1005{ 1006 unsigned int ret = 0; 1007 bool is_stdarg; 1008 tree attrs; 1009 1010 if (TARGET_64BIT) 1011 return IX86_CALLCVT_CDECL; 1012 1013 attrs = TYPE_ATTRIBUTES (type); 1014 if (attrs != NULL_TREE) 1015 { 1016 if (lookup_attribute ("cdecl", attrs)) 1017 ret |= IX86_CALLCVT_CDECL; 1018 else if (lookup_attribute ("stdcall", attrs)) 1019 ret |= IX86_CALLCVT_STDCALL; 1020 else if (lookup_attribute ("fastcall", attrs)) 1021 ret |= IX86_CALLCVT_FASTCALL; 1022 else if (lookup_attribute ("thiscall", attrs)) 1023 ret |= IX86_CALLCVT_THISCALL; 1024 1025 /* Regparam isn't allowed for thiscall and fastcall. */ 1026 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) 1027 { 1028 if (lookup_attribute ("regparm", attrs)) 1029 ret |= IX86_CALLCVT_REGPARM; 1030 if (lookup_attribute ("sseregparm", attrs)) 1031 ret |= IX86_CALLCVT_SSEREGPARM; 1032 } 1033 1034 if (IX86_BASE_CALLCVT(ret) != 0) 1035 return ret; 1036 } 1037 1038 is_stdarg = stdarg_p (type); 1039 if (TARGET_RTD && !is_stdarg) 1040 return IX86_CALLCVT_STDCALL | ret; 1041 1042 if (ret != 0 1043 || is_stdarg 1044 || TREE_CODE (type) != METHOD_TYPE 1045 || ix86_function_type_abi (type) != MS_ABI) 1046 return IX86_CALLCVT_CDECL | ret; 1047 1048 return IX86_CALLCVT_THISCALL; 1049} 1050 1051/* Return 0 if the attributes for two types are incompatible, 1 if they 1052 are compatible, and 2 if they are nearly compatible (which causes a 1053 warning to be generated). */ 1054 1055static int 1056ix86_comp_type_attributes (const_tree type1, const_tree type2) 1057{ 1058 unsigned int ccvt1, ccvt2; 1059 1060 if (TREE_CODE (type1) != FUNCTION_TYPE 1061 && TREE_CODE (type1) != METHOD_TYPE) 1062 return 1; 1063 1064 ccvt1 = ix86_get_callcvt (type1); 1065 ccvt2 = ix86_get_callcvt (type2); 1066 if (ccvt1 != ccvt2) 1067 return 0; 1068 if (ix86_function_regparm (type1, NULL) 1069 != ix86_function_regparm (type2, NULL)) 1070 return 0; 1071 1072 return 1; 1073} 1074 1075/* Return the regparm value for a function with the indicated TYPE and DECL. 1076 DECL may be NULL when calling function indirectly 1077 or considering a libcall. */ 1078 1079static int 1080ix86_function_regparm (const_tree type, const_tree decl) 1081{ 1082 tree attr; 1083 int regparm; 1084 unsigned int ccvt; 1085 1086 if (TARGET_64BIT) 1087 return (ix86_function_type_abi (type) == SYSV_ABI 1088 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); 1089 ccvt = ix86_get_callcvt (type); 1090 regparm = ix86_regparm; 1091 1092 if ((ccvt & IX86_CALLCVT_REGPARM) != 0) 1093 { 1094 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 1095 if (attr) 1096 { 1097 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 1098 return regparm; 1099 } 1100 } 1101 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) 1102 return 2; 1103 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) 1104 return 1; 1105 1106 /* Use register calling convention for local functions when possible. */ 1107 if (decl 1108 && TREE_CODE (decl) == FUNCTION_DECL) 1109 { 1110 cgraph_node *target = cgraph_node::get (decl); 1111 if (target) 1112 target = target->function_symbol (); 1113 1114 /* Caller and callee must agree on the calling convention, so 1115 checking here just optimize means that with 1116 __attribute__((optimize (...))) caller could use regparm convention 1117 and callee not, or vice versa. Instead look at whether the callee 1118 is optimized or not. */ 1119 if (target && opt_for_fn (target->decl, optimize) 1120 && !(profile_flag && !flag_fentry)) 1121 { 1122 if (target->local && target->can_change_signature) 1123 { 1124 int local_regparm, globals = 0, regno; 1125 1126 /* Make sure no regparm register is taken by a 1127 fixed register variable. */ 1128 for (local_regparm = 0; local_regparm < REGPARM_MAX; 1129 local_regparm++) 1130 if (fixed_regs[local_regparm]) 1131 break; 1132 1133 /* We don't want to use regparm(3) for nested functions as 1134 these use a static chain pointer in the third argument. */ 1135 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) 1136 local_regparm = 2; 1137 1138 /* Save a register for the split stack. */ 1139 if (flag_split_stack) 1140 { 1141 if (local_regparm == 3) 1142 local_regparm = 2; 1143 else if (local_regparm == 2 1144 && DECL_STATIC_CHAIN (target->decl)) 1145 local_regparm = 1; 1146 } 1147 1148 /* Each fixed register usage increases register pressure, 1149 so less registers should be used for argument passing. 1150 This functionality can be overriden by an explicit 1151 regparm value. */ 1152 for (regno = AX_REG; regno <= DI_REG; regno++) 1153 if (fixed_regs[regno]) 1154 globals++; 1155 1156 local_regparm 1157 = globals < local_regparm ? local_regparm - globals : 0; 1158 1159 if (local_regparm > regparm) 1160 regparm = local_regparm; 1161 } 1162 } 1163 } 1164 1165 return regparm; 1166} 1167 1168/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 1169 DFmode (2) arguments in SSE registers for a function with the 1170 indicated TYPE and DECL. DECL may be NULL when calling function 1171 indirectly or considering a libcall. Return -1 if any FP parameter 1172 should be rejected by error. This is used in siutation we imply SSE 1173 calling convetion but the function is called from another function with 1174 SSE disabled. Otherwise return 0. */ 1175 1176static int 1177ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) 1178{ 1179 gcc_assert (!TARGET_64BIT); 1180 1181 /* Use SSE registers to pass SFmode and DFmode arguments if requested 1182 by the sseregparm attribute. */ 1183 if (TARGET_SSEREGPARM 1184 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 1185 { 1186 if (!TARGET_SSE) 1187 { 1188 if (warn) 1189 { 1190 if (decl) 1191 error ("calling %qD with attribute sseregparm without " 1192 "SSE/SSE2 enabled", decl); 1193 else 1194 error ("calling %qT with attribute sseregparm without " 1195 "SSE/SSE2 enabled", type); 1196 } 1197 return 0; 1198 } 1199 1200 return 2; 1201 } 1202 1203 if (!decl) 1204 return 0; 1205 1206 cgraph_node *target = cgraph_node::get (decl); 1207 if (target) 1208 target = target->function_symbol (); 1209 1210 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 1211 (and DFmode for SSE2) arguments in SSE registers. */ 1212 if (target 1213 /* TARGET_SSE_MATH */ 1214 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) 1215 && opt_for_fn (target->decl, optimize) 1216 && !(profile_flag && !flag_fentry)) 1217 { 1218 if (target->local && target->can_change_signature) 1219 { 1220 /* Refuse to produce wrong code when local function with SSE enabled 1221 is called from SSE disabled function. 1222 FIXME: We need a way to detect these cases cross-ltrans partition 1223 and avoid using SSE calling conventions on local functions called 1224 from function with SSE disabled. For now at least delay the 1225 warning until we know we are going to produce wrong code. 1226 See PR66047 */ 1227 if (!TARGET_SSE && warn) 1228 return -1; 1229 return TARGET_SSE2_P (target_opts_for_fn (target->decl) 1230 ->x_ix86_isa_flags) ? 2 : 1; 1231 } 1232 } 1233 1234 return 0; 1235} 1236 1237/* Return true if EAX is live at the start of the function. Used by 1238 ix86_expand_prologue to determine if we need special help before 1239 calling allocate_stack_worker. */ 1240 1241static bool 1242ix86_eax_live_at_start_p (void) 1243{ 1244 /* Cheat. Don't bother working forward from ix86_function_regparm 1245 to the function type to whether an actual argument is located in 1246 eax. Instead just look at cfg info, which is still close enough 1247 to correct at this point. This gives false positives for broken 1248 functions that might use uninitialized data that happens to be 1249 allocated in eax, but who cares? */ 1250 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); 1251} 1252 1253static bool 1254ix86_keep_aggregate_return_pointer (tree fntype) 1255{ 1256 tree attr; 1257 1258 if (!TARGET_64BIT) 1259 { 1260 attr = lookup_attribute ("callee_pop_aggregate_return", 1261 TYPE_ATTRIBUTES (fntype)); 1262 if (attr) 1263 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); 1264 1265 /* For 32-bit MS-ABI the default is to keep aggregate 1266 return pointer. */ 1267 if (ix86_function_type_abi (fntype) == MS_ABI) 1268 return true; 1269 } 1270 return KEEP_AGGREGATE_RETURN_POINTER != 0; 1271} 1272 1273/* Value is the number of bytes of arguments automatically 1274 popped when returning from a subroutine call. 1275 FUNDECL is the declaration node of the function (as a tree), 1276 FUNTYPE is the data type of the function (as a tree), 1277 or for a library call it is an identifier node for the subroutine name. 1278 SIZE is the number of bytes of arguments passed on the stack. 1279 1280 On the 80386, the RTD insn may be used to pop them if the number 1281 of args is fixed, but if the number is variable then the caller 1282 must pop them all. RTD can't be used for library calls now 1283 because the library is compiled with the Unix compiler. 1284 Use of RTD is a selectable option, since it is incompatible with 1285 standard Unix calling sequences. If the option is not selected, 1286 the caller must always pop the args. 1287 1288 The attribute stdcall is equivalent to RTD on a per module basis. */ 1289 1290static poly_int64 1291ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) 1292{ 1293 unsigned int ccvt; 1294 1295 /* None of the 64-bit ABIs pop arguments. */ 1296 if (TARGET_64BIT) 1297 return 0; 1298 1299 ccvt = ix86_get_callcvt (funtype); 1300 1301 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL 1302 | IX86_CALLCVT_THISCALL)) != 0 1303 && ! stdarg_p (funtype)) 1304 return size; 1305 1306 /* Lose any fake structure return argument if it is passed on the stack. */ 1307 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 1308 && !ix86_keep_aggregate_return_pointer (funtype)) 1309 { 1310 int nregs = ix86_function_regparm (funtype, fundecl); 1311 if (nregs == 0) 1312 return GET_MODE_SIZE (Pmode); 1313 } 1314 1315 return 0; 1316} 1317 1318/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ 1319 1320static bool 1321ix86_legitimate_combined_insn (rtx_insn *insn) 1322{ 1323 int i; 1324 1325 /* Check operand constraints in case hard registers were propagated 1326 into insn pattern. This check prevents combine pass from 1327 generating insn patterns with invalid hard register operands. 1328 These invalid insns can eventually confuse reload to error out 1329 with a spill failure. See also PRs 46829 and 46843. */ 1330 1331 gcc_assert (INSN_CODE (insn) >= 0); 1332 1333 extract_insn (insn); 1334 preprocess_constraints (insn); 1335 1336 int n_operands = recog_data.n_operands; 1337 int n_alternatives = recog_data.n_alternatives; 1338 for (i = 0; i < n_operands; i++) 1339 { 1340 rtx op = recog_data.operand[i]; 1341 machine_mode mode = GET_MODE (op); 1342 const operand_alternative *op_alt; 1343 int offset = 0; 1344 bool win; 1345 int j; 1346 1347 /* A unary operator may be accepted by the predicate, but it 1348 is irrelevant for matching constraints. */ 1349 if (UNARY_P (op)) 1350 op = XEXP (op, 0); 1351 1352 if (SUBREG_P (op)) 1353 { 1354 if (REG_P (SUBREG_REG (op)) 1355 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) 1356 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), 1357 GET_MODE (SUBREG_REG (op)), 1358 SUBREG_BYTE (op), 1359 GET_MODE (op)); 1360 op = SUBREG_REG (op); 1361 } 1362 1363 if (!(REG_P (op) && HARD_REGISTER_P (op))) 1364 continue; 1365 1366 op_alt = recog_op_alt; 1367 1368 /* Operand has no constraints, anything is OK. */ 1369 win = !n_alternatives; 1370 1371 alternative_mask preferred = get_preferred_alternatives (insn); 1372 for (j = 0; j < n_alternatives; j++, op_alt += n_operands) 1373 { 1374 if (!TEST_BIT (preferred, j)) 1375 continue; 1376 if (op_alt[i].anything_ok 1377 || (op_alt[i].matches != -1 1378 && operands_match_p 1379 (recog_data.operand[i], 1380 recog_data.operand[op_alt[i].matches])) 1381 || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) 1382 { 1383 win = true; 1384 break; 1385 } 1386 } 1387 1388 if (!win) 1389 return false; 1390 } 1391 1392 return true; 1393} 1394 1395/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 1396 1397static unsigned HOST_WIDE_INT 1398ix86_asan_shadow_offset (void) 1399{ 1400 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) 1401 : HOST_WIDE_INT_C (0x7fff8000)) 1402 : (HOST_WIDE_INT_1 << X86_32_ASAN_BIT_OFFSET); 1403} 1404 1405/* Argument support functions. */ 1406 1407/* Return true when register may be used to pass function parameters. */ 1408bool 1409ix86_function_arg_regno_p (int regno) 1410{ 1411 int i; 1412 enum calling_abi call_abi; 1413 const int *parm_regs; 1414 1415 if (!TARGET_64BIT) 1416 { 1417 if (TARGET_MACHO) 1418 return (regno < REGPARM_MAX 1419 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 1420 else 1421 return (regno < REGPARM_MAX 1422 || (TARGET_MMX && MMX_REGNO_P (regno) 1423 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 1424 || (TARGET_SSE && SSE_REGNO_P (regno) 1425 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 1426 } 1427 1428 if (TARGET_SSE && SSE_REGNO_P (regno) 1429 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 1430 return true; 1431 1432 /* TODO: The function should depend on current function ABI but 1433 builtins.c would need updating then. Therefore we use the 1434 default ABI. */ 1435 call_abi = ix86_cfun_abi (); 1436 1437 /* RAX is used as hidden argument to va_arg functions. */ 1438 if (call_abi == SYSV_ABI && regno == AX_REG) 1439 return true; 1440 1441 if (call_abi == MS_ABI) 1442 parm_regs = x86_64_ms_abi_int_parameter_registers; 1443 else 1444 parm_regs = x86_64_int_parameter_registers; 1445 1446 for (i = 0; i < (call_abi == MS_ABI 1447 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) 1448 if (regno == parm_regs[i]) 1449 return true; 1450 return false; 1451} 1452 1453/* Return if we do not know how to pass ARG solely in registers. */ 1454 1455static bool 1456ix86_must_pass_in_stack (const function_arg_info &arg) 1457{ 1458 if (must_pass_in_stack_var_size_or_pad (arg)) 1459 return true; 1460 1461 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 1462 The layout_type routine is crafty and tries to trick us into passing 1463 currently unsupported vector types on the stack by using TImode. */ 1464 return (!TARGET_64BIT && arg.mode == TImode 1465 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); 1466} 1467 1468/* It returns the size, in bytes, of the area reserved for arguments passed 1469 in registers for the function represented by fndecl dependent to the used 1470 abi format. */ 1471int 1472ix86_reg_parm_stack_space (const_tree fndecl) 1473{ 1474 enum calling_abi call_abi = SYSV_ABI; 1475 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) 1476 call_abi = ix86_function_abi (fndecl); 1477 else 1478 call_abi = ix86_function_type_abi (fndecl); 1479 if (TARGET_64BIT && call_abi == MS_ABI) 1480 return 32; 1481 return 0; 1482} 1483 1484/* We add this as a workaround in order to use libc_has_function 1485 hook in i386.md. */ 1486bool 1487ix86_libc_has_function (enum function_class fn_class) 1488{ 1489 return targetm.libc_has_function (fn_class); 1490} 1491 1492/* Returns value SYSV_ABI, MS_ABI dependent on fntype, 1493 specifying the call abi used. */ 1494enum calling_abi 1495ix86_function_type_abi (const_tree fntype) 1496{ 1497 enum calling_abi abi = ix86_abi; 1498 1499 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) 1500 return abi; 1501 1502 if (abi == SYSV_ABI 1503 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) 1504 { 1505 static int warned; 1506 if (TARGET_X32 && !warned) 1507 { 1508 error ("X32 does not support %<ms_abi%> attribute"); 1509 warned = 1; 1510 } 1511 1512 abi = MS_ABI; 1513 } 1514 else if (abi == MS_ABI 1515 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) 1516 abi = SYSV_ABI; 1517 1518 return abi; 1519} 1520 1521enum calling_abi 1522ix86_function_abi (const_tree fndecl) 1523{ 1524 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; 1525} 1526 1527/* Returns value SYSV_ABI, MS_ABI dependent on cfun, 1528 specifying the call abi used. */ 1529enum calling_abi 1530ix86_cfun_abi (void) 1531{ 1532 return cfun ? cfun->machine->call_abi : ix86_abi; 1533} 1534 1535bool 1536ix86_function_ms_hook_prologue (const_tree fn) 1537{ 1538 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) 1539 { 1540 if (decl_function_context (fn) != NULL_TREE) 1541 error_at (DECL_SOURCE_LOCATION (fn), 1542 "%<ms_hook_prologue%> attribute is not compatible " 1543 "with nested function"); 1544 else 1545 return true; 1546 } 1547 return false; 1548} 1549 1550bool 1551ix86_function_naked (const_tree fn) 1552{ 1553 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) 1554 return true; 1555 1556 return false; 1557} 1558 1559/* Write the extra assembler code needed to declare a function properly. */ 1560 1561void 1562ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, 1563 tree decl) 1564{ 1565 bool is_ms_hook = ix86_function_ms_hook_prologue (decl); 1566 1567 if (is_ms_hook) 1568 { 1569 int i, filler_count = (TARGET_64BIT ? 32 : 16); 1570 unsigned int filler_cc = 0xcccccccc; 1571 1572 for (i = 0; i < filler_count; i += 4) 1573 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); 1574 } 1575 1576#ifdef SUBTARGET_ASM_UNWIND_INIT 1577 SUBTARGET_ASM_UNWIND_INIT (asm_out_file); 1578#endif 1579 1580 ASM_OUTPUT_LABEL (asm_out_file, fname); 1581 1582 /* Output magic byte marker, if hot-patch attribute is set. */ 1583 if (is_ms_hook) 1584 { 1585 if (TARGET_64BIT) 1586 { 1587 /* leaq [%rsp + 0], %rsp */ 1588 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", 1589 asm_out_file); 1590 } 1591 else 1592 { 1593 /* movl.s %edi, %edi 1594 push %ebp 1595 movl.s %esp, %ebp */ 1596 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); 1597 } 1598 } 1599} 1600 1601/* Implementation of call abi switching target hook. Specific to FNDECL 1602 the specific call register sets are set. See also 1603 ix86_conditional_register_usage for more details. */ 1604void 1605ix86_call_abi_override (const_tree fndecl) 1606{ 1607 cfun->machine->call_abi = ix86_function_abi (fndecl); 1608} 1609 1610/* Return 1 if pseudo register should be created and used to hold 1611 GOT address for PIC code. */ 1612bool 1613ix86_use_pseudo_pic_reg (void) 1614{ 1615 if ((TARGET_64BIT 1616 && (ix86_cmodel == CM_SMALL_PIC 1617 || TARGET_PECOFF)) 1618 || !flag_pic) 1619 return false; 1620 return true; 1621} 1622 1623/* Initialize large model PIC register. */ 1624 1625static void 1626ix86_init_large_pic_reg (unsigned int tmp_regno) 1627{ 1628 rtx_code_label *label; 1629 rtx tmp_reg; 1630 1631 gcc_assert (Pmode == DImode); 1632 label = gen_label_rtx (); 1633 emit_label (label); 1634 LABEL_PRESERVE_P (label) = 1; 1635 tmp_reg = gen_rtx_REG (Pmode, tmp_regno); 1636 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); 1637 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, 1638 label)); 1639 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); 1640 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); 1641 const char *name = LABEL_NAME (label); 1642 PUT_CODE (label, NOTE); 1643 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; 1644 NOTE_DELETED_LABEL_NAME (label) = name; 1645} 1646 1647/* Create and initialize PIC register if required. */ 1648static void 1649ix86_init_pic_reg (void) 1650{ 1651 edge entry_edge; 1652 rtx_insn *seq; 1653 1654 if (!ix86_use_pseudo_pic_reg ()) 1655 return; 1656 1657 start_sequence (); 1658 1659 if (TARGET_64BIT) 1660 { 1661 if (ix86_cmodel == CM_LARGE_PIC) 1662 ix86_init_large_pic_reg (R11_REG); 1663 else 1664 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 1665 } 1666 else 1667 { 1668 /* If there is future mcount call in the function it is more profitable 1669 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ 1670 rtx reg = crtl->profile 1671 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) 1672 : pic_offset_table_rtx; 1673 rtx_insn *insn = emit_insn (gen_set_got (reg)); 1674 RTX_FRAME_RELATED_P (insn) = 1; 1675 if (crtl->profile) 1676 emit_move_insn (pic_offset_table_rtx, reg); 1677 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); 1678 } 1679 1680 seq = get_insns (); 1681 end_sequence (); 1682 1683 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 1684 insert_insn_on_edge (seq, entry_edge); 1685 commit_one_edge_insertion (entry_edge); 1686} 1687 1688/* Initialize a variable CUM of type CUMULATIVE_ARGS 1689 for a call to a function whose data type is FNTYPE. 1690 For a library call, FNTYPE is 0. */ 1691 1692void 1693init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 1694 tree fntype, /* tree ptr for function decl */ 1695 rtx libname, /* SYMBOL_REF of library name or 0 */ 1696 tree fndecl, 1697 int caller) 1698{ 1699 struct cgraph_node *local_info_node = NULL; 1700 struct cgraph_node *target = NULL; 1701 1702 memset (cum, 0, sizeof (*cum)); 1703 1704 if (fndecl) 1705 { 1706 target = cgraph_node::get (fndecl); 1707 if (target) 1708 { 1709 target = target->function_symbol (); 1710 local_info_node = cgraph_node::local_info_node (target->decl); 1711 cum->call_abi = ix86_function_abi (target->decl); 1712 } 1713 else 1714 cum->call_abi = ix86_function_abi (fndecl); 1715 } 1716 else 1717 cum->call_abi = ix86_function_type_abi (fntype); 1718 1719 cum->caller = caller; 1720 1721 /* Set up the number of registers to use for passing arguments. */ 1722 cum->nregs = ix86_regparm; 1723 if (TARGET_64BIT) 1724 { 1725 cum->nregs = (cum->call_abi == SYSV_ABI 1726 ? X86_64_REGPARM_MAX 1727 : X86_64_MS_REGPARM_MAX); 1728 } 1729 if (TARGET_SSE) 1730 { 1731 cum->sse_nregs = SSE_REGPARM_MAX; 1732 if (TARGET_64BIT) 1733 { 1734 cum->sse_nregs = (cum->call_abi == SYSV_ABI 1735 ? X86_64_SSE_REGPARM_MAX 1736 : X86_64_MS_SSE_REGPARM_MAX); 1737 } 1738 } 1739 if (TARGET_MMX) 1740 cum->mmx_nregs = MMX_REGPARM_MAX; 1741 cum->warn_avx512f = true; 1742 cum->warn_avx = true; 1743 cum->warn_sse = true; 1744 cum->warn_mmx = true; 1745 1746 /* Because type might mismatch in between caller and callee, we need to 1747 use actual type of function for local calls. 1748 FIXME: cgraph_analyze can be told to actually record if function uses 1749 va_start so for local functions maybe_vaarg can be made aggressive 1750 helping K&R code. 1751 FIXME: once typesytem is fixed, we won't need this code anymore. */ 1752 if (local_info_node && local_info_node->local 1753 && local_info_node->can_change_signature) 1754 fntype = TREE_TYPE (target->decl); 1755 cum->stdarg = stdarg_p (fntype); 1756 cum->maybe_vaarg = (fntype 1757 ? (!prototype_p (fntype) || stdarg_p (fntype)) 1758 : !libname); 1759 1760 cum->decl = fndecl; 1761 1762 cum->warn_empty = !warn_abi || cum->stdarg; 1763 if (!cum->warn_empty && fntype) 1764 { 1765 function_args_iterator iter; 1766 tree argtype; 1767 bool seen_empty_type = false; 1768 FOREACH_FUNCTION_ARGS (fntype, argtype, iter) 1769 { 1770 if (argtype == error_mark_node || VOID_TYPE_P (argtype)) 1771 break; 1772 if (TYPE_EMPTY_P (argtype)) 1773 seen_empty_type = true; 1774 else if (seen_empty_type) 1775 { 1776 cum->warn_empty = true; 1777 break; 1778 } 1779 } 1780 } 1781 1782 if (!TARGET_64BIT) 1783 { 1784 /* If there are variable arguments, then we won't pass anything 1785 in registers in 32-bit mode. */ 1786 if (stdarg_p (fntype)) 1787 { 1788 cum->nregs = 0; 1789 /* Since in 32-bit, variable arguments are always passed on 1790 stack, there is scratch register available for indirect 1791 sibcall. */ 1792 cfun->machine->arg_reg_available = true; 1793 cum->sse_nregs = 0; 1794 cum->mmx_nregs = 0; 1795 cum->warn_avx512f = false; 1796 cum->warn_avx = false; 1797 cum->warn_sse = false; 1798 cum->warn_mmx = false; 1799 return; 1800 } 1801 1802 /* Use ecx and edx registers if function has fastcall attribute, 1803 else look for regparm information. */ 1804 if (fntype) 1805 { 1806 unsigned int ccvt = ix86_get_callcvt (fntype); 1807 if ((ccvt & IX86_CALLCVT_THISCALL) != 0) 1808 { 1809 cum->nregs = 1; 1810 cum->fastcall = 1; /* Same first register as in fastcall. */ 1811 } 1812 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) 1813 { 1814 cum->nregs = 2; 1815 cum->fastcall = 1; 1816 } 1817 else 1818 cum->nregs = ix86_function_regparm (fntype, fndecl); 1819 } 1820 1821 /* Set up the number of SSE registers used for passing SFmode 1822 and DFmode arguments. Warn for mismatching ABI. */ 1823 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); 1824 } 1825 1826 cfun->machine->arg_reg_available = (cum->nregs > 0); 1827} 1828 1829/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 1830 But in the case of vector types, it is some vector mode. 1831 1832 When we have only some of our vector isa extensions enabled, then there 1833 are some modes for which vector_mode_supported_p is false. For these 1834 modes, the generic vector support in gcc will choose some non-vector mode 1835 in order to implement the type. By computing the natural mode, we'll 1836 select the proper ABI location for the operand and not depend on whatever 1837 the middle-end decides to do with these vector types. 1838 1839 The midde-end can't deal with the vector types > 16 bytes. In this 1840 case, we return the original mode and warn ABI change if CUM isn't 1841 NULL. 1842 1843 If INT_RETURN is true, warn ABI change if the vector mode isn't 1844 available for function return value. */ 1845 1846static machine_mode 1847type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, 1848 bool in_return) 1849{ 1850 machine_mode mode = TYPE_MODE (type); 1851 1852 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 1853 { 1854 HOST_WIDE_INT size = int_size_in_bytes (type); 1855 if ((size == 8 || size == 16 || size == 32 || size == 64) 1856 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 1857 && TYPE_VECTOR_SUBPARTS (type) > 1) 1858 { 1859 machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 1860 1861 /* There are no XFmode vector modes. */ 1862 if (innermode == XFmode) 1863 return mode; 1864 1865 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 1866 mode = MIN_MODE_VECTOR_FLOAT; 1867 else 1868 mode = MIN_MODE_VECTOR_INT; 1869 1870 /* Get the mode which has this inner mode and number of units. */ 1871 FOR_EACH_MODE_FROM (mode, mode) 1872 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 1873 && GET_MODE_INNER (mode) == innermode) 1874 { 1875 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) 1876 { 1877 static bool warnedavx512f; 1878 static bool warnedavx512f_ret; 1879 1880 if (cum && cum->warn_avx512f && !warnedavx512f) 1881 { 1882 if (warning (OPT_Wpsabi, "AVX512F vector argument " 1883 "without AVX512F enabled changes the ABI")) 1884 warnedavx512f = true; 1885 } 1886 else if (in_return && !warnedavx512f_ret) 1887 { 1888 if (warning (OPT_Wpsabi, "AVX512F vector return " 1889 "without AVX512F enabled changes the ABI")) 1890 warnedavx512f_ret = true; 1891 } 1892 1893 return TYPE_MODE (type); 1894 } 1895 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) 1896 { 1897 static bool warnedavx; 1898 static bool warnedavx_ret; 1899 1900 if (cum && cum->warn_avx && !warnedavx) 1901 { 1902 if (warning (OPT_Wpsabi, "AVX vector argument " 1903 "without AVX enabled changes the ABI")) 1904 warnedavx = true; 1905 } 1906 else if (in_return && !warnedavx_ret) 1907 { 1908 if (warning (OPT_Wpsabi, "AVX vector return " 1909 "without AVX enabled changes the ABI")) 1910 warnedavx_ret = true; 1911 } 1912 1913 return TYPE_MODE (type); 1914 } 1915 else if (((size == 8 && TARGET_64BIT) || size == 16) 1916 && !TARGET_SSE 1917 && !TARGET_IAMCU) 1918 { 1919 static bool warnedsse; 1920 static bool warnedsse_ret; 1921 1922 if (cum && cum->warn_sse && !warnedsse) 1923 { 1924 if (warning (OPT_Wpsabi, "SSE vector argument " 1925 "without SSE enabled changes the ABI")) 1926 warnedsse = true; 1927 } 1928 else if (!TARGET_64BIT && in_return && !warnedsse_ret) 1929 { 1930 if (warning (OPT_Wpsabi, "SSE vector return " 1931 "without SSE enabled changes the ABI")) 1932 warnedsse_ret = true; 1933 } 1934 } 1935 else if ((size == 8 && !TARGET_64BIT) 1936 && (!cfun 1937 || cfun->machine->func_type == TYPE_NORMAL) 1938 && !TARGET_MMX 1939 && !TARGET_IAMCU) 1940 { 1941 static bool warnedmmx; 1942 static bool warnedmmx_ret; 1943 1944 if (cum && cum->warn_mmx && !warnedmmx) 1945 { 1946 if (warning (OPT_Wpsabi, "MMX vector argument " 1947 "without MMX enabled changes the ABI")) 1948 warnedmmx = true; 1949 } 1950 else if (in_return && !warnedmmx_ret) 1951 { 1952 if (warning (OPT_Wpsabi, "MMX vector return " 1953 "without MMX enabled changes the ABI")) 1954 warnedmmx_ret = true; 1955 } 1956 } 1957 return mode; 1958 } 1959 1960 gcc_unreachable (); 1961 } 1962 } 1963 1964 return mode; 1965} 1966 1967/* We want to pass a value in REGNO whose "natural" mode is MODE. However, 1968 this may not agree with the mode that the type system has chosen for the 1969 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 1970 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 1971 1972static rtx 1973gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, 1974 unsigned int regno) 1975{ 1976 rtx tmp; 1977 1978 if (orig_mode != BLKmode) 1979 tmp = gen_rtx_REG (orig_mode, regno); 1980 else 1981 { 1982 tmp = gen_rtx_REG (mode, regno); 1983 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 1984 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 1985 } 1986 1987 return tmp; 1988} 1989 1990/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 1991 of this code is to classify each 8bytes of incoming argument by the register 1992 class and assign registers accordingly. */ 1993 1994/* Return the union class of CLASS1 and CLASS2. 1995 See the x86-64 PS ABI for details. */ 1996 1997static enum x86_64_reg_class 1998merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 1999{ 2000 /* Rule #1: If both classes are equal, this is the resulting class. */ 2001 if (class1 == class2) 2002 return class1; 2003 2004 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 2005 the other class. */ 2006 if (class1 == X86_64_NO_CLASS) 2007 return class2; 2008 if (class2 == X86_64_NO_CLASS) 2009 return class1; 2010 2011 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 2012 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 2013 return X86_64_MEMORY_CLASS; 2014 2015 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 2016 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 2017 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 2018 return X86_64_INTEGERSI_CLASS; 2019 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 2020 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 2021 return X86_64_INTEGER_CLASS; 2022 2023 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 2024 MEMORY is used. */ 2025 if (class1 == X86_64_X87_CLASS 2026 || class1 == X86_64_X87UP_CLASS 2027 || class1 == X86_64_COMPLEX_X87_CLASS 2028 || class2 == X86_64_X87_CLASS 2029 || class2 == X86_64_X87UP_CLASS 2030 || class2 == X86_64_COMPLEX_X87_CLASS) 2031 return X86_64_MEMORY_CLASS; 2032 2033 /* Rule #6: Otherwise class SSE is used. */ 2034 return X86_64_SSE_CLASS; 2035} 2036 2037/* Classify the argument of type TYPE and mode MODE. 2038 CLASSES will be filled by the register class used to pass each word 2039 of the operand. The number of words is returned. In case the parameter 2040 should be passed in memory, 0 is returned. As a special case for zero 2041 sized containers, classes[0] will be NO_CLASS and 1 is returned. 2042 2043 BIT_OFFSET is used internally for handling records and specifies offset 2044 of the offset in bits modulo 512 to avoid overflow cases. 2045 2046 See the x86-64 PS ABI for details. 2047*/ 2048 2049static int 2050classify_argument (machine_mode mode, const_tree type, 2051 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 2052{ 2053 HOST_WIDE_INT bytes 2054 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2055 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); 2056 2057 /* Variable sized entities are always passed/returned in memory. */ 2058 if (bytes < 0) 2059 return 0; 2060 2061 if (mode != VOIDmode) 2062 { 2063 /* The value of "named" doesn't matter. */ 2064 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); 2065 if (targetm.calls.must_pass_in_stack (arg)) 2066 return 0; 2067 } 2068 2069 if (type && AGGREGATE_TYPE_P (type)) 2070 { 2071 int i; 2072 tree field; 2073 enum x86_64_reg_class subclasses[MAX_CLASSES]; 2074 2075 /* On x86-64 we pass structures larger than 64 bytes on the stack. */ 2076 if (bytes > 64) 2077 return 0; 2078 2079 for (i = 0; i < words; i++) 2080 classes[i] = X86_64_NO_CLASS; 2081 2082 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 2083 signalize memory class, so handle it as special case. */ 2084 if (!words) 2085 { 2086 classes[0] = X86_64_NO_CLASS; 2087 return 1; 2088 } 2089 2090 /* Classify each field of record and merge classes. */ 2091 switch (TREE_CODE (type)) 2092 { 2093 case RECORD_TYPE: 2094 /* And now merge the fields of structure. */ 2095 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 2096 { 2097 if (TREE_CODE (field) == FIELD_DECL) 2098 { 2099 int num; 2100 2101 if (TREE_TYPE (field) == error_mark_node) 2102 continue; 2103 2104 /* Bitfields are always classified as integer. Handle them 2105 early, since later code would consider them to be 2106 misaligned integers. */ 2107 if (DECL_BIT_FIELD (field)) 2108 { 2109 for (i = (int_bit_position (field) 2110 + (bit_offset % 64)) / 8 / 8; 2111 i < ((int_bit_position (field) + (bit_offset % 64)) 2112 + tree_to_shwi (DECL_SIZE (field)) 2113 + 63) / 8 / 8; i++) 2114 classes[i] 2115 = merge_classes (X86_64_INTEGER_CLASS, classes[i]); 2116 } 2117 else 2118 { 2119 int pos; 2120 2121 type = TREE_TYPE (field); 2122 2123 /* Flexible array member is ignored. */ 2124 if (TYPE_MODE (type) == BLKmode 2125 && TREE_CODE (type) == ARRAY_TYPE 2126 && TYPE_SIZE (type) == NULL_TREE 2127 && TYPE_DOMAIN (type) != NULL_TREE 2128 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) 2129 == NULL_TREE)) 2130 { 2131 static bool warned; 2132 2133 if (!warned && warn_psabi) 2134 { 2135 warned = true; 2136 inform (input_location, 2137 "the ABI of passing struct with" 2138 " a flexible array member has" 2139 " changed in GCC 4.4"); 2140 } 2141 continue; 2142 } 2143 num = classify_argument (TYPE_MODE (type), type, 2144 subclasses, 2145 (int_bit_position (field) 2146 + bit_offset) % 512); 2147 if (!num) 2148 return 0; 2149 pos = (int_bit_position (field) 2150 + (bit_offset % 64)) / 8 / 8; 2151 for (i = 0; i < num && (i + pos) < words; i++) 2152 classes[i + pos] 2153 = merge_classes (subclasses[i], classes[i + pos]); 2154 } 2155 } 2156 } 2157 break; 2158 2159 case ARRAY_TYPE: 2160 /* Arrays are handled as small records. */ 2161 { 2162 int num; 2163 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 2164 TREE_TYPE (type), subclasses, bit_offset); 2165 if (!num) 2166 return 0; 2167 2168 /* The partial classes are now full classes. */ 2169 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 2170 subclasses[0] = X86_64_SSE_CLASS; 2171 if (subclasses[0] == X86_64_INTEGERSI_CLASS 2172 && !((bit_offset % 64) == 0 && bytes == 4)) 2173 subclasses[0] = X86_64_INTEGER_CLASS; 2174 2175 for (i = 0; i < words; i++) 2176 classes[i] = subclasses[i % num]; 2177 2178 break; 2179 } 2180 case UNION_TYPE: 2181 case QUAL_UNION_TYPE: 2182 /* Unions are similar to RECORD_TYPE but offset is always 0. 2183 */ 2184 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 2185 { 2186 if (TREE_CODE (field) == FIELD_DECL) 2187 { 2188 int num; 2189 2190 if (TREE_TYPE (field) == error_mark_node) 2191 continue; 2192 2193 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 2194 TREE_TYPE (field), subclasses, 2195 bit_offset); 2196 if (!num) 2197 return 0; 2198 for (i = 0; i < num && i < words; i++) 2199 classes[i] = merge_classes (subclasses[i], classes[i]); 2200 } 2201 } 2202 break; 2203 2204 default: 2205 gcc_unreachable (); 2206 } 2207 2208 if (words > 2) 2209 { 2210 /* When size > 16 bytes, if the first one isn't 2211 X86_64_SSE_CLASS or any other ones aren't 2212 X86_64_SSEUP_CLASS, everything should be passed in 2213 memory. */ 2214 if (classes[0] != X86_64_SSE_CLASS) 2215 return 0; 2216 2217 for (i = 1; i < words; i++) 2218 if (classes[i] != X86_64_SSEUP_CLASS) 2219 return 0; 2220 } 2221 2222 /* Final merger cleanup. */ 2223 for (i = 0; i < words; i++) 2224 { 2225 /* If one class is MEMORY, everything should be passed in 2226 memory. */ 2227 if (classes[i] == X86_64_MEMORY_CLASS) 2228 return 0; 2229 2230 /* The X86_64_SSEUP_CLASS should be always preceded by 2231 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ 2232 if (classes[i] == X86_64_SSEUP_CLASS 2233 && classes[i - 1] != X86_64_SSE_CLASS 2234 && classes[i - 1] != X86_64_SSEUP_CLASS) 2235 { 2236 /* The first one should never be X86_64_SSEUP_CLASS. */ 2237 gcc_assert (i != 0); 2238 classes[i] = X86_64_SSE_CLASS; 2239 } 2240 2241 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, 2242 everything should be passed in memory. */ 2243 if (classes[i] == X86_64_X87UP_CLASS 2244 && (classes[i - 1] != X86_64_X87_CLASS)) 2245 { 2246 static bool warned; 2247 2248 /* The first one should never be X86_64_X87UP_CLASS. */ 2249 gcc_assert (i != 0); 2250 if (!warned && warn_psabi) 2251 { 2252 warned = true; 2253 inform (input_location, 2254 "the ABI of passing union with %<long double%>" 2255 " has changed in GCC 4.4"); 2256 } 2257 return 0; 2258 } 2259 } 2260 return words; 2261 } 2262 2263 /* Compute alignment needed. We align all types to natural boundaries with 2264 exception of XFmode that is aligned to 64bits. */ 2265 if (mode != VOIDmode && mode != BLKmode) 2266 { 2267 int mode_alignment = GET_MODE_BITSIZE (mode); 2268 2269 if (mode == XFmode) 2270 mode_alignment = 128; 2271 else if (mode == XCmode) 2272 mode_alignment = 256; 2273 if (COMPLEX_MODE_P (mode)) 2274 mode_alignment /= 2; 2275 /* Misaligned fields are always returned in memory. */ 2276 if (bit_offset % mode_alignment) 2277 return 0; 2278 } 2279 2280 /* for V1xx modes, just use the base mode */ 2281 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode 2282 && GET_MODE_UNIT_SIZE (mode) == bytes) 2283 mode = GET_MODE_INNER (mode); 2284 2285 /* Classification of atomic types. */ 2286 switch (mode) 2287 { 2288 case E_SDmode: 2289 case E_DDmode: 2290 classes[0] = X86_64_SSE_CLASS; 2291 return 1; 2292 case E_TDmode: 2293 classes[0] = X86_64_SSE_CLASS; 2294 classes[1] = X86_64_SSEUP_CLASS; 2295 return 2; 2296 case E_DImode: 2297 case E_SImode: 2298 case E_HImode: 2299 case E_QImode: 2300 case E_CSImode: 2301 case E_CHImode: 2302 case E_CQImode: 2303 { 2304 int size = bit_offset + (int) GET_MODE_BITSIZE (mode); 2305 2306 /* Analyze last 128 bits only. */ 2307 size = (size - 1) & 0x7f; 2308 2309 if (size < 32) 2310 { 2311 classes[0] = X86_64_INTEGERSI_CLASS; 2312 return 1; 2313 } 2314 else if (size < 64) 2315 { 2316 classes[0] = X86_64_INTEGER_CLASS; 2317 return 1; 2318 } 2319 else if (size < 64+32) 2320 { 2321 classes[0] = X86_64_INTEGER_CLASS; 2322 classes[1] = X86_64_INTEGERSI_CLASS; 2323 return 2; 2324 } 2325 else if (size < 64+64) 2326 { 2327 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2328 return 2; 2329 } 2330 else 2331 gcc_unreachable (); 2332 } 2333 case E_CDImode: 2334 case E_TImode: 2335 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 2336 return 2; 2337 case E_COImode: 2338 case E_OImode: 2339 /* OImode shouldn't be used directly. */ 2340 gcc_unreachable (); 2341 case E_CTImode: 2342 return 0; 2343 case E_SFmode: 2344 if (!(bit_offset % 64)) 2345 classes[0] = X86_64_SSESF_CLASS; 2346 else 2347 classes[0] = X86_64_SSE_CLASS; 2348 return 1; 2349 case E_DFmode: 2350 classes[0] = X86_64_SSEDF_CLASS; 2351 return 1; 2352 case E_XFmode: 2353 classes[0] = X86_64_X87_CLASS; 2354 classes[1] = X86_64_X87UP_CLASS; 2355 return 2; 2356 case E_TFmode: 2357 classes[0] = X86_64_SSE_CLASS; 2358 classes[1] = X86_64_SSEUP_CLASS; 2359 return 2; 2360 case E_SCmode: 2361 classes[0] = X86_64_SSE_CLASS; 2362 if (!(bit_offset % 64)) 2363 return 1; 2364 else 2365 { 2366 static bool warned; 2367 2368 if (!warned && warn_psabi) 2369 { 2370 warned = true; 2371 inform (input_location, 2372 "the ABI of passing structure with %<complex float%>" 2373 " member has changed in GCC 4.4"); 2374 } 2375 classes[1] = X86_64_SSESF_CLASS; 2376 return 2; 2377 } 2378 case E_DCmode: 2379 classes[0] = X86_64_SSEDF_CLASS; 2380 classes[1] = X86_64_SSEDF_CLASS; 2381 return 2; 2382 case E_XCmode: 2383 classes[0] = X86_64_COMPLEX_X87_CLASS; 2384 return 1; 2385 case E_TCmode: 2386 /* This modes is larger than 16 bytes. */ 2387 return 0; 2388 case E_V8SFmode: 2389 case E_V8SImode: 2390 case E_V32QImode: 2391 case E_V16HImode: 2392 case E_V4DFmode: 2393 case E_V4DImode: 2394 classes[0] = X86_64_SSE_CLASS; 2395 classes[1] = X86_64_SSEUP_CLASS; 2396 classes[2] = X86_64_SSEUP_CLASS; 2397 classes[3] = X86_64_SSEUP_CLASS; 2398 return 4; 2399 case E_V8DFmode: 2400 case E_V16SFmode: 2401 case E_V8DImode: 2402 case E_V16SImode: 2403 case E_V32HImode: 2404 case E_V64QImode: 2405 classes[0] = X86_64_SSE_CLASS; 2406 classes[1] = X86_64_SSEUP_CLASS; 2407 classes[2] = X86_64_SSEUP_CLASS; 2408 classes[3] = X86_64_SSEUP_CLASS; 2409 classes[4] = X86_64_SSEUP_CLASS; 2410 classes[5] = X86_64_SSEUP_CLASS; 2411 classes[6] = X86_64_SSEUP_CLASS; 2412 classes[7] = X86_64_SSEUP_CLASS; 2413 return 8; 2414 case E_V4SFmode: 2415 case E_V4SImode: 2416 case E_V16QImode: 2417 case E_V8HImode: 2418 case E_V2DFmode: 2419 case E_V2DImode: 2420 classes[0] = X86_64_SSE_CLASS; 2421 classes[1] = X86_64_SSEUP_CLASS; 2422 return 2; 2423 case E_V1TImode: 2424 case E_V1DImode: 2425 case E_V2SFmode: 2426 case E_V2SImode: 2427 case E_V4HImode: 2428 case E_V8QImode: 2429 classes[0] = X86_64_SSE_CLASS; 2430 return 1; 2431 case E_BLKmode: 2432 case E_VOIDmode: 2433 return 0; 2434 default: 2435 gcc_assert (VECTOR_MODE_P (mode)); 2436 2437 if (bytes > 16) 2438 return 0; 2439 2440 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 2441 2442 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 2443 classes[0] = X86_64_INTEGERSI_CLASS; 2444 else 2445 classes[0] = X86_64_INTEGER_CLASS; 2446 classes[1] = X86_64_INTEGER_CLASS; 2447 return 1 + (bytes > 8); 2448 } 2449} 2450 2451/* Examine the argument and return set number of register required in each 2452 class. Return true iff parameter should be passed in memory. */ 2453 2454static bool 2455examine_argument (machine_mode mode, const_tree type, int in_return, 2456 int *int_nregs, int *sse_nregs) 2457{ 2458 enum x86_64_reg_class regclass[MAX_CLASSES]; 2459 int n = classify_argument (mode, type, regclass, 0); 2460 2461 *int_nregs = 0; 2462 *sse_nregs = 0; 2463 2464 if (!n) 2465 return true; 2466 for (n--; n >= 0; n--) 2467 switch (regclass[n]) 2468 { 2469 case X86_64_INTEGER_CLASS: 2470 case X86_64_INTEGERSI_CLASS: 2471 (*int_nregs)++; 2472 break; 2473 case X86_64_SSE_CLASS: 2474 case X86_64_SSESF_CLASS: 2475 case X86_64_SSEDF_CLASS: 2476 (*sse_nregs)++; 2477 break; 2478 case X86_64_NO_CLASS: 2479 case X86_64_SSEUP_CLASS: 2480 break; 2481 case X86_64_X87_CLASS: 2482 case X86_64_X87UP_CLASS: 2483 case X86_64_COMPLEX_X87_CLASS: 2484 if (!in_return) 2485 return true; 2486 break; 2487 case X86_64_MEMORY_CLASS: 2488 gcc_unreachable (); 2489 } 2490 2491 return false; 2492} 2493 2494/* Construct container for the argument used by GCC interface. See 2495 FUNCTION_ARG for the detailed description. */ 2496 2497static rtx 2498construct_container (machine_mode mode, machine_mode orig_mode, 2499 const_tree type, int in_return, int nintregs, int nsseregs, 2500 const int *intreg, int sse_regno) 2501{ 2502 /* The following variables hold the static issued_error state. */ 2503 static bool issued_sse_arg_error; 2504 static bool issued_sse_ret_error; 2505 static bool issued_x87_ret_error; 2506 2507 machine_mode tmpmode; 2508 int bytes 2509 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 2510 enum x86_64_reg_class regclass[MAX_CLASSES]; 2511 int n; 2512 int i; 2513 int nexps = 0; 2514 int needed_sseregs, needed_intregs; 2515 rtx exp[MAX_CLASSES]; 2516 rtx ret; 2517 2518 n = classify_argument (mode, type, regclass, 0); 2519 if (!n) 2520 return NULL; 2521 if (examine_argument (mode, type, in_return, &needed_intregs, 2522 &needed_sseregs)) 2523 return NULL; 2524 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 2525 return NULL; 2526 2527 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 2528 some less clueful developer tries to use floating-point anyway. */ 2529 if (needed_sseregs && !TARGET_SSE) 2530 { 2531 if (in_return) 2532 { 2533 if (!issued_sse_ret_error) 2534 { 2535 error ("SSE register return with SSE disabled"); 2536 issued_sse_ret_error = true; 2537 } 2538 } 2539 else if (!issued_sse_arg_error) 2540 { 2541 error ("SSE register argument with SSE disabled"); 2542 issued_sse_arg_error = true; 2543 } 2544 return NULL; 2545 } 2546 2547 /* Likewise, error if the ABI requires us to return values in the 2548 x87 registers and the user specified -mno-80387. */ 2549 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) 2550 for (i = 0; i < n; i++) 2551 if (regclass[i] == X86_64_X87_CLASS 2552 || regclass[i] == X86_64_X87UP_CLASS 2553 || regclass[i] == X86_64_COMPLEX_X87_CLASS) 2554 { 2555 if (!issued_x87_ret_error) 2556 { 2557 error ("x87 register return with x87 disabled"); 2558 issued_x87_ret_error = true; 2559 } 2560 return NULL; 2561 } 2562 2563 /* First construct simple cases. Avoid SCmode, since we want to use 2564 single register to pass this type. */ 2565 if (n == 1 && mode != SCmode) 2566 switch (regclass[0]) 2567 { 2568 case X86_64_INTEGER_CLASS: 2569 case X86_64_INTEGERSI_CLASS: 2570 return gen_rtx_REG (mode, intreg[0]); 2571 case X86_64_SSE_CLASS: 2572 case X86_64_SSESF_CLASS: 2573 case X86_64_SSEDF_CLASS: 2574 if (mode != BLKmode) 2575 return gen_reg_or_parallel (mode, orig_mode, 2576 GET_SSE_REGNO (sse_regno)); 2577 break; 2578 case X86_64_X87_CLASS: 2579 case X86_64_COMPLEX_X87_CLASS: 2580 return gen_rtx_REG (mode, FIRST_STACK_REG); 2581 case X86_64_NO_CLASS: 2582 /* Zero sized array, struct or class. */ 2583 return NULL; 2584 default: 2585 gcc_unreachable (); 2586 } 2587 if (n == 2 2588 && regclass[0] == X86_64_SSE_CLASS 2589 && regclass[1] == X86_64_SSEUP_CLASS 2590 && mode != BLKmode) 2591 return gen_reg_or_parallel (mode, orig_mode, 2592 GET_SSE_REGNO (sse_regno)); 2593 if (n == 4 2594 && regclass[0] == X86_64_SSE_CLASS 2595 && regclass[1] == X86_64_SSEUP_CLASS 2596 && regclass[2] == X86_64_SSEUP_CLASS 2597 && regclass[3] == X86_64_SSEUP_CLASS 2598 && mode != BLKmode) 2599 return gen_reg_or_parallel (mode, orig_mode, 2600 GET_SSE_REGNO (sse_regno)); 2601 if (n == 8 2602 && regclass[0] == X86_64_SSE_CLASS 2603 && regclass[1] == X86_64_SSEUP_CLASS 2604 && regclass[2] == X86_64_SSEUP_CLASS 2605 && regclass[3] == X86_64_SSEUP_CLASS 2606 && regclass[4] == X86_64_SSEUP_CLASS 2607 && regclass[5] == X86_64_SSEUP_CLASS 2608 && regclass[6] == X86_64_SSEUP_CLASS 2609 && regclass[7] == X86_64_SSEUP_CLASS 2610 && mode != BLKmode) 2611 return gen_reg_or_parallel (mode, orig_mode, 2612 GET_SSE_REGNO (sse_regno)); 2613 if (n == 2 2614 && regclass[0] == X86_64_X87_CLASS 2615 && regclass[1] == X86_64_X87UP_CLASS) 2616 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 2617 2618 if (n == 2 2619 && regclass[0] == X86_64_INTEGER_CLASS 2620 && regclass[1] == X86_64_INTEGER_CLASS 2621 && (mode == CDImode || mode == TImode || mode == BLKmode) 2622 && intreg[0] + 1 == intreg[1]) 2623 { 2624 if (mode == BLKmode) 2625 { 2626 /* Use TImode for BLKmode values in 2 integer registers. */ 2627 exp[0] = gen_rtx_EXPR_LIST (VOIDmode, 2628 gen_rtx_REG (TImode, intreg[0]), 2629 GEN_INT (0)); 2630 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); 2631 XVECEXP (ret, 0, 0) = exp[0]; 2632 return ret; 2633 } 2634 else 2635 return gen_rtx_REG (mode, intreg[0]); 2636 } 2637 2638 /* Otherwise figure out the entries of the PARALLEL. */ 2639 for (i = 0; i < n; i++) 2640 { 2641 int pos; 2642 2643 switch (regclass[i]) 2644 { 2645 case X86_64_NO_CLASS: 2646 break; 2647 case X86_64_INTEGER_CLASS: 2648 case X86_64_INTEGERSI_CLASS: 2649 /* Merge TImodes on aligned occasions here too. */ 2650 if (i * 8 + 8 > bytes) 2651 { 2652 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; 2653 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) 2654 /* We've requested 24 bytes we 2655 don't have mode for. Use DImode. */ 2656 tmpmode = DImode; 2657 } 2658 else if (regclass[i] == X86_64_INTEGERSI_CLASS) 2659 tmpmode = SImode; 2660 else 2661 tmpmode = DImode; 2662 exp [nexps++] 2663 = gen_rtx_EXPR_LIST (VOIDmode, 2664 gen_rtx_REG (tmpmode, *intreg), 2665 GEN_INT (i*8)); 2666 intreg++; 2667 break; 2668 case X86_64_SSESF_CLASS: 2669 exp [nexps++] 2670 = gen_rtx_EXPR_LIST (VOIDmode, 2671 gen_rtx_REG (SFmode, 2672 GET_SSE_REGNO (sse_regno)), 2673 GEN_INT (i*8)); 2674 sse_regno++; 2675 break; 2676 case X86_64_SSEDF_CLASS: 2677 exp [nexps++] 2678 = gen_rtx_EXPR_LIST (VOIDmode, 2679 gen_rtx_REG (DFmode, 2680 GET_SSE_REGNO (sse_regno)), 2681 GEN_INT (i*8)); 2682 sse_regno++; 2683 break; 2684 case X86_64_SSE_CLASS: 2685 pos = i; 2686 switch (n) 2687 { 2688 case 1: 2689 tmpmode = DImode; 2690 break; 2691 case 2: 2692 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) 2693 { 2694 tmpmode = TImode; 2695 i++; 2696 } 2697 else 2698 tmpmode = DImode; 2699 break; 2700 case 4: 2701 gcc_assert (i == 0 2702 && regclass[1] == X86_64_SSEUP_CLASS 2703 && regclass[2] == X86_64_SSEUP_CLASS 2704 && regclass[3] == X86_64_SSEUP_CLASS); 2705 tmpmode = OImode; 2706 i += 3; 2707 break; 2708 case 8: 2709 gcc_assert (i == 0 2710 && regclass[1] == X86_64_SSEUP_CLASS 2711 && regclass[2] == X86_64_SSEUP_CLASS 2712 && regclass[3] == X86_64_SSEUP_CLASS 2713 && regclass[4] == X86_64_SSEUP_CLASS 2714 && regclass[5] == X86_64_SSEUP_CLASS 2715 && regclass[6] == X86_64_SSEUP_CLASS 2716 && regclass[7] == X86_64_SSEUP_CLASS); 2717 tmpmode = XImode; 2718 i += 7; 2719 break; 2720 default: 2721 gcc_unreachable (); 2722 } 2723 exp [nexps++] 2724 = gen_rtx_EXPR_LIST (VOIDmode, 2725 gen_rtx_REG (tmpmode, 2726 GET_SSE_REGNO (sse_regno)), 2727 GEN_INT (pos*8)); 2728 sse_regno++; 2729 break; 2730 default: 2731 gcc_unreachable (); 2732 } 2733 } 2734 2735 /* Empty aligned struct, union or class. */ 2736 if (nexps == 0) 2737 return NULL; 2738 2739 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 2740 for (i = 0; i < nexps; i++) 2741 XVECEXP (ret, 0, i) = exp [i]; 2742 return ret; 2743} 2744 2745/* Update the data in CUM to advance over an argument of mode MODE 2746 and data type TYPE. (TYPE is null for libcalls where that information 2747 may not be available.) 2748 2749 Return a number of integer regsiters advanced over. */ 2750 2751static int 2752function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, 2753 const_tree type, HOST_WIDE_INT bytes, 2754 HOST_WIDE_INT words) 2755{ 2756 int res = 0; 2757 bool error_p = false; 2758 2759 if (TARGET_IAMCU) 2760 { 2761 /* Intel MCU psABI passes scalars and aggregates no larger than 8 2762 bytes in registers. */ 2763 if (!VECTOR_MODE_P (mode) && bytes <= 8) 2764 goto pass_in_reg; 2765 return res; 2766 } 2767 2768 switch (mode) 2769 { 2770 default: 2771 break; 2772 2773 case E_BLKmode: 2774 if (bytes < 0) 2775 break; 2776 /* FALLTHRU */ 2777 2778 case E_DImode: 2779 case E_SImode: 2780 case E_HImode: 2781 case E_QImode: 2782pass_in_reg: 2783 cum->words += words; 2784 cum->nregs -= words; 2785 cum->regno += words; 2786 if (cum->nregs >= 0) 2787 res = words; 2788 if (cum->nregs <= 0) 2789 { 2790 cum->nregs = 0; 2791 cfun->machine->arg_reg_available = false; 2792 cum->regno = 0; 2793 } 2794 break; 2795 2796 case E_OImode: 2797 /* OImode shouldn't be used directly. */ 2798 gcc_unreachable (); 2799 2800 case E_DFmode: 2801 if (cum->float_in_sse == -1) 2802 error_p = true; 2803 if (cum->float_in_sse < 2) 2804 break; 2805 /* FALLTHRU */ 2806 case E_SFmode: 2807 if (cum->float_in_sse == -1) 2808 error_p = true; 2809 if (cum->float_in_sse < 1) 2810 break; 2811 /* FALLTHRU */ 2812 2813 case E_V8SFmode: 2814 case E_V8SImode: 2815 case E_V64QImode: 2816 case E_V32HImode: 2817 case E_V16SImode: 2818 case E_V8DImode: 2819 case E_V16SFmode: 2820 case E_V8DFmode: 2821 case E_V32QImode: 2822 case E_V16HImode: 2823 case E_V4DFmode: 2824 case E_V4DImode: 2825 case E_TImode: 2826 case E_V16QImode: 2827 case E_V8HImode: 2828 case E_V4SImode: 2829 case E_V2DImode: 2830 case E_V4SFmode: 2831 case E_V2DFmode: 2832 if (!type || !AGGREGATE_TYPE_P (type)) 2833 { 2834 cum->sse_words += words; 2835 cum->sse_nregs -= 1; 2836 cum->sse_regno += 1; 2837 if (cum->sse_nregs <= 0) 2838 { 2839 cum->sse_nregs = 0; 2840 cum->sse_regno = 0; 2841 } 2842 } 2843 break; 2844 2845 case E_V8QImode: 2846 case E_V4HImode: 2847 case E_V2SImode: 2848 case E_V2SFmode: 2849 case E_V1TImode: 2850 case E_V1DImode: 2851 if (!type || !AGGREGATE_TYPE_P (type)) 2852 { 2853 cum->mmx_words += words; 2854 cum->mmx_nregs -= 1; 2855 cum->mmx_regno += 1; 2856 if (cum->mmx_nregs <= 0) 2857 { 2858 cum->mmx_nregs = 0; 2859 cum->mmx_regno = 0; 2860 } 2861 } 2862 break; 2863 } 2864 if (error_p) 2865 { 2866 cum->float_in_sse = 0; 2867 error ("calling %qD with SSE calling convention without " 2868 "SSE/SSE2 enabled", cum->decl); 2869 sorry ("this is a GCC bug that can be worked around by adding " 2870 "attribute used to function called"); 2871 } 2872 2873 return res; 2874} 2875 2876static int 2877function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, 2878 const_tree type, HOST_WIDE_INT words, bool named) 2879{ 2880 int int_nregs, sse_nregs; 2881 2882 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ 2883 if (!named && (VALID_AVX512F_REG_MODE (mode) 2884 || VALID_AVX256_REG_MODE (mode))) 2885 return 0; 2886 2887 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) 2888 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 2889 { 2890 cum->nregs -= int_nregs; 2891 cum->sse_nregs -= sse_nregs; 2892 cum->regno += int_nregs; 2893 cum->sse_regno += sse_nregs; 2894 return int_nregs; 2895 } 2896 else 2897 { 2898 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; 2899 cum->words = ROUND_UP (cum->words, align); 2900 cum->words += words; 2901 return 0; 2902 } 2903} 2904 2905static int 2906function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, 2907 HOST_WIDE_INT words) 2908{ 2909 /* Otherwise, this should be passed indirect. */ 2910 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); 2911 2912 cum->words += words; 2913 if (cum->nregs > 0) 2914 { 2915 cum->nregs -= 1; 2916 cum->regno += 1; 2917 return 1; 2918 } 2919 return 0; 2920} 2921 2922/* Update the data in CUM to advance over argument ARG. */ 2923 2924static void 2925ix86_function_arg_advance (cumulative_args_t cum_v, 2926 const function_arg_info &arg) 2927{ 2928 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 2929 machine_mode mode = arg.mode; 2930 HOST_WIDE_INT bytes, words; 2931 int nregs; 2932 2933 /* The argument of interrupt handler is a special case and is 2934 handled in ix86_function_arg. */ 2935 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) 2936 return; 2937 2938 bytes = arg.promoted_size_in_bytes (); 2939 words = CEIL (bytes, UNITS_PER_WORD); 2940 2941 if (arg.type) 2942 mode = type_natural_mode (arg.type, NULL, false); 2943 2944 if (TARGET_64BIT) 2945 { 2946 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; 2947 2948 if (call_abi == MS_ABI) 2949 nregs = function_arg_advance_ms_64 (cum, bytes, words); 2950 else 2951 nregs = function_arg_advance_64 (cum, mode, arg.type, words, 2952 arg.named); 2953 } 2954 else 2955 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words); 2956 2957 if (!nregs) 2958 { 2959 /* Track if there are outgoing arguments on stack. */ 2960 if (cum->caller) 2961 cfun->machine->outgoing_args_on_stack = true; 2962 } 2963} 2964 2965/* Define where to put the arguments to a function. 2966 Value is zero to push the argument on the stack, 2967 or a hard register in which to store the argument. 2968 2969 MODE is the argument's machine mode. 2970 TYPE is the data type of the argument (as a tree). 2971 This is null for libcalls where that information may 2972 not be available. 2973 CUM is a variable of type CUMULATIVE_ARGS which gives info about 2974 the preceding args and about the function being called. 2975 NAMED is nonzero if this argument is a named parameter 2976 (otherwise it is an extra parameter matching an ellipsis). */ 2977 2978static rtx 2979function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, 2980 machine_mode orig_mode, const_tree type, 2981 HOST_WIDE_INT bytes, HOST_WIDE_INT words) 2982{ 2983 bool error_p = false; 2984 2985 /* Avoid the AL settings for the Unix64 ABI. */ 2986 if (mode == VOIDmode) 2987 return constm1_rtx; 2988 2989 if (TARGET_IAMCU) 2990 { 2991 /* Intel MCU psABI passes scalars and aggregates no larger than 8 2992 bytes in registers. */ 2993 if (!VECTOR_MODE_P (mode) && bytes <= 8) 2994 goto pass_in_reg; 2995 return NULL_RTX; 2996 } 2997 2998 switch (mode) 2999 { 3000 default: 3001 break; 3002 3003 case E_BLKmode: 3004 if (bytes < 0) 3005 break; 3006 /* FALLTHRU */ 3007 case E_DImode: 3008 case E_SImode: 3009 case E_HImode: 3010 case E_QImode: 3011pass_in_reg: 3012 if (words <= cum->nregs) 3013 { 3014 int regno = cum->regno; 3015 3016 /* Fastcall allocates the first two DWORD (SImode) or 3017 smaller arguments to ECX and EDX if it isn't an 3018 aggregate type . */ 3019 if (cum->fastcall) 3020 { 3021 if (mode == BLKmode 3022 || mode == DImode 3023 || (type && AGGREGATE_TYPE_P (type))) 3024 break; 3025 3026 /* ECX not EAX is the first allocated register. */ 3027 if (regno == AX_REG) 3028 regno = CX_REG; 3029 } 3030 return gen_rtx_REG (mode, regno); 3031 } 3032 break; 3033 3034 case E_DFmode: 3035 if (cum->float_in_sse == -1) 3036 error_p = true; 3037 if (cum->float_in_sse < 2) 3038 break; 3039 /* FALLTHRU */ 3040 case E_SFmode: 3041 if (cum->float_in_sse == -1) 3042 error_p = true; 3043 if (cum->float_in_sse < 1) 3044 break; 3045 /* FALLTHRU */ 3046 case E_TImode: 3047 /* In 32bit, we pass TImode in xmm registers. */ 3048 case E_V16QImode: 3049 case E_V8HImode: 3050 case E_V4SImode: 3051 case E_V2DImode: 3052 case E_V4SFmode: 3053 case E_V2DFmode: 3054 if (!type || !AGGREGATE_TYPE_P (type)) 3055 { 3056 if (cum->sse_nregs) 3057 return gen_reg_or_parallel (mode, orig_mode, 3058 cum->sse_regno + FIRST_SSE_REG); 3059 } 3060 break; 3061 3062 case E_OImode: 3063 case E_XImode: 3064 /* OImode and XImode shouldn't be used directly. */ 3065 gcc_unreachable (); 3066 3067 case E_V64QImode: 3068 case E_V32HImode: 3069 case E_V16SImode: 3070 case E_V8DImode: 3071 case E_V16SFmode: 3072 case E_V8DFmode: 3073 case E_V8SFmode: 3074 case E_V8SImode: 3075 case E_V32QImode: 3076 case E_V16HImode: 3077 case E_V4DFmode: 3078 case E_V4DImode: 3079 if (!type || !AGGREGATE_TYPE_P (type)) 3080 { 3081 if (cum->sse_nregs) 3082 return gen_reg_or_parallel (mode, orig_mode, 3083 cum->sse_regno + FIRST_SSE_REG); 3084 } 3085 break; 3086 3087 case E_V8QImode: 3088 case E_V4HImode: 3089 case E_V2SImode: 3090 case E_V2SFmode: 3091 case E_V1TImode: 3092 case E_V1DImode: 3093 if (!type || !AGGREGATE_TYPE_P (type)) 3094 { 3095 if (cum->mmx_nregs) 3096 return gen_reg_or_parallel (mode, orig_mode, 3097 cum->mmx_regno + FIRST_MMX_REG); 3098 } 3099 break; 3100 } 3101 if (error_p) 3102 { 3103 cum->float_in_sse = 0; 3104 error ("calling %qD with SSE calling convention without " 3105 "SSE/SSE2 enabled", cum->decl); 3106 sorry ("this is a GCC bug that can be worked around by adding " 3107 "attribute used to function called"); 3108 } 3109 3110 return NULL_RTX; 3111} 3112 3113static rtx 3114function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, 3115 machine_mode orig_mode, const_tree type, bool named) 3116{ 3117 /* Handle a hidden AL argument containing number of registers 3118 for varargs x86-64 functions. */ 3119 if (mode == VOIDmode) 3120 return GEN_INT (cum->maybe_vaarg 3121 ? (cum->sse_nregs < 0 3122 ? X86_64_SSE_REGPARM_MAX 3123 : cum->sse_regno) 3124 : -1); 3125 3126 switch (mode) 3127 { 3128 default: 3129 break; 3130 3131 case E_V8SFmode: 3132 case E_V8SImode: 3133 case E_V32QImode: 3134 case E_V16HImode: 3135 case E_V4DFmode: 3136 case E_V4DImode: 3137 case E_V16SFmode: 3138 case E_V16SImode: 3139 case E_V64QImode: 3140 case E_V32HImode: 3141 case E_V8DFmode: 3142 case E_V8DImode: 3143 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ 3144 if (!named) 3145 return NULL; 3146 break; 3147 } 3148 3149 return construct_container (mode, orig_mode, type, 0, cum->nregs, 3150 cum->sse_nregs, 3151 &x86_64_int_parameter_registers [cum->regno], 3152 cum->sse_regno); 3153} 3154 3155static rtx 3156function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, 3157 machine_mode orig_mode, bool named, const_tree type, 3158 HOST_WIDE_INT bytes) 3159{ 3160 unsigned int regno; 3161 3162 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. 3163 We use value of -2 to specify that current function call is MSABI. */ 3164 if (mode == VOIDmode) 3165 return GEN_INT (-2); 3166 3167 /* If we've run out of registers, it goes on the stack. */ 3168 if (cum->nregs == 0) 3169 return NULL_RTX; 3170 3171 regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; 3172 3173 /* Only floating point modes are passed in anything but integer regs. */ 3174 if (TARGET_SSE && (mode == SFmode || mode == DFmode)) 3175 { 3176 if (named) 3177 { 3178 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) 3179 regno = cum->regno + FIRST_SSE_REG; 3180 } 3181 else 3182 { 3183 rtx t1, t2; 3184 3185 /* Unnamed floating parameters are passed in both the 3186 SSE and integer registers. */ 3187 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); 3188 t2 = gen_rtx_REG (mode, regno); 3189 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); 3190 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); 3191 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); 3192 } 3193 } 3194 /* Handle aggregated types passed in register. */ 3195 if (orig_mode == BLKmode) 3196 { 3197 if (bytes > 0 && bytes <= 8) 3198 mode = (bytes > 4 ? DImode : SImode); 3199 if (mode == BLKmode) 3200 mode = DImode; 3201 } 3202 3203 return gen_reg_or_parallel (mode, orig_mode, regno); 3204} 3205 3206/* Return where to put the arguments to a function. 3207 Return zero to push the argument on the stack, or a hard register in which to store the argument. 3208 3209 ARG describes the argument while CUM gives information about the 3210 preceding args and about the function being called. */ 3211 3212static rtx 3213ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) 3214{ 3215 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 3216 machine_mode mode = arg.mode; 3217 HOST_WIDE_INT bytes, words; 3218 rtx reg; 3219 3220 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) 3221 { 3222 gcc_assert (arg.type != NULL_TREE); 3223 if (POINTER_TYPE_P (arg.type)) 3224 { 3225 /* This is the pointer argument. */ 3226 gcc_assert (TYPE_MODE (arg.type) == ptr_mode); 3227 /* It is at -WORD(AP) in the current frame in interrupt and 3228 exception handlers. */ 3229 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); 3230 } 3231 else 3232 { 3233 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION 3234 && TREE_CODE (arg.type) == INTEGER_TYPE 3235 && TYPE_MODE (arg.type) == word_mode); 3236 /* The error code is the word-mode integer argument at 3237 -2 * WORD(AP) in the current frame of the exception 3238 handler. */ 3239 reg = gen_rtx_MEM (word_mode, 3240 plus_constant (Pmode, 3241 arg_pointer_rtx, 3242 -2 * UNITS_PER_WORD)); 3243 } 3244 return reg; 3245 } 3246 3247 bytes = arg.promoted_size_in_bytes (); 3248 words = CEIL (bytes, UNITS_PER_WORD); 3249 3250 /* To simplify the code below, represent vector types with a vector mode 3251 even if MMX/SSE are not active. */ 3252 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE) 3253 mode = type_natural_mode (arg.type, cum, false); 3254 3255 if (TARGET_64BIT) 3256 { 3257 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; 3258 3259 if (call_abi == MS_ABI) 3260 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named, 3261 arg.type, bytes); 3262 else 3263 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named); 3264 } 3265 else 3266 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words); 3267 3268 /* Track if there are outgoing arguments on stack. */ 3269 if (reg == NULL_RTX && cum->caller) 3270 cfun->machine->outgoing_args_on_stack = true; 3271 3272 return reg; 3273} 3274 3275/* A C expression that indicates when an argument must be passed by 3276 reference. If nonzero for an argument, a copy of that argument is 3277 made in memory and a pointer to the argument is passed instead of 3278 the argument itself. The pointer is passed in whatever way is 3279 appropriate for passing a pointer to that type. */ 3280 3281static bool 3282ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) 3283{ 3284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 3285 3286 if (TARGET_64BIT) 3287 { 3288 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; 3289 3290 /* See Windows x64 Software Convention. */ 3291 if (call_abi == MS_ABI) 3292 { 3293 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); 3294 3295 if (tree type = arg.type) 3296 { 3297 /* Arrays are passed by reference. */ 3298 if (TREE_CODE (type) == ARRAY_TYPE) 3299 return true; 3300 3301 if (RECORD_OR_UNION_TYPE_P (type)) 3302 { 3303 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits 3304 are passed by reference. */ 3305 msize = int_size_in_bytes (type); 3306 } 3307 } 3308 3309 /* __m128 is passed by reference. */ 3310 return msize != 1 && msize != 2 && msize != 4 && msize != 8; 3311 } 3312 else if (arg.type && int_size_in_bytes (arg.type) == -1) 3313 return true; 3314 } 3315 3316 return false; 3317} 3318 3319/* Return true when TYPE should be 128bit aligned for 32bit argument 3320 passing ABI. XXX: This function is obsolete and is only used for 3321 checking psABI compatibility with previous versions of GCC. */ 3322 3323static bool 3324ix86_compat_aligned_value_p (const_tree type) 3325{ 3326 machine_mode mode = TYPE_MODE (type); 3327 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) 3328 || mode == TDmode 3329 || mode == TFmode 3330 || mode == TCmode) 3331 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 3332 return true; 3333 if (TYPE_ALIGN (type) < 128) 3334 return false; 3335 3336 if (AGGREGATE_TYPE_P (type)) 3337 { 3338 /* Walk the aggregates recursively. */ 3339 switch (TREE_CODE (type)) 3340 { 3341 case RECORD_TYPE: 3342 case UNION_TYPE: 3343 case QUAL_UNION_TYPE: 3344 { 3345 tree field; 3346 3347 /* Walk all the structure fields. */ 3348 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 3349 { 3350 if (TREE_CODE (field) == FIELD_DECL 3351 && ix86_compat_aligned_value_p (TREE_TYPE (field))) 3352 return true; 3353 } 3354 break; 3355 } 3356 3357 case ARRAY_TYPE: 3358 /* Just for use if some languages passes arrays by value. */ 3359 if (ix86_compat_aligned_value_p (TREE_TYPE (type))) 3360 return true; 3361 break; 3362 3363 default: 3364 gcc_unreachable (); 3365 } 3366 } 3367 return false; 3368} 3369 3370/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. 3371 XXX: This function is obsolete and is only used for checking psABI 3372 compatibility with previous versions of GCC. */ 3373 3374static unsigned int 3375ix86_compat_function_arg_boundary (machine_mode mode, 3376 const_tree type, unsigned int align) 3377{ 3378 /* In 32bit, only _Decimal128 and __float128 are aligned to their 3379 natural boundaries. */ 3380 if (!TARGET_64BIT && mode != TDmode && mode != TFmode) 3381 { 3382 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 3383 make an exception for SSE modes since these require 128bit 3384 alignment. 3385 3386 The handling here differs from field_alignment. ICC aligns MMX 3387 arguments to 4 byte boundaries, while structure fields are aligned 3388 to 8 byte boundaries. */ 3389 if (!type) 3390 { 3391 if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) 3392 align = PARM_BOUNDARY; 3393 } 3394 else 3395 { 3396 if (!ix86_compat_aligned_value_p (type)) 3397 align = PARM_BOUNDARY; 3398 } 3399 } 3400 if (align > BIGGEST_ALIGNMENT) 3401 align = BIGGEST_ALIGNMENT; 3402 return align; 3403} 3404 3405/* Return true when TYPE should be 128bit aligned for 32bit argument 3406 passing ABI. */ 3407 3408static bool 3409ix86_contains_aligned_value_p (const_tree type) 3410{ 3411 machine_mode mode = TYPE_MODE (type); 3412 3413 if (mode == XFmode || mode == XCmode) 3414 return false; 3415 3416 if (TYPE_ALIGN (type) < 128) 3417 return false; 3418 3419 if (AGGREGATE_TYPE_P (type)) 3420 { 3421 /* Walk the aggregates recursively. */ 3422 switch (TREE_CODE (type)) 3423 { 3424 case RECORD_TYPE: 3425 case UNION_TYPE: 3426 case QUAL_UNION_TYPE: 3427 { 3428 tree field; 3429 3430 /* Walk all the structure fields. */ 3431 for (field = TYPE_FIELDS (type); 3432 field; 3433 field = DECL_CHAIN (field)) 3434 { 3435 if (TREE_CODE (field) == FIELD_DECL 3436 && ix86_contains_aligned_value_p (TREE_TYPE (field))) 3437 return true; 3438 } 3439 break; 3440 } 3441 3442 case ARRAY_TYPE: 3443 /* Just for use if some languages passes arrays by value. */ 3444 if (ix86_contains_aligned_value_p (TREE_TYPE (type))) 3445 return true; 3446 break; 3447 3448 default: 3449 gcc_unreachable (); 3450 } 3451 } 3452 else 3453 return TYPE_ALIGN (type) >= 128; 3454 3455 return false; 3456} 3457 3458/* Gives the alignment boundary, in bits, of an argument with the 3459 specified mode and type. */ 3460 3461static unsigned int 3462ix86_function_arg_boundary (machine_mode mode, const_tree type) 3463{ 3464 unsigned int align; 3465 if (type) 3466 { 3467 /* Since the main variant type is used for call, we convert it to 3468 the main variant type. */ 3469 type = TYPE_MAIN_VARIANT (type); 3470 align = TYPE_ALIGN (type); 3471 if (TYPE_EMPTY_P (type)) 3472 return PARM_BOUNDARY; 3473 } 3474 else 3475 align = GET_MODE_ALIGNMENT (mode); 3476 if (align < PARM_BOUNDARY) 3477 align = PARM_BOUNDARY; 3478 else 3479 { 3480 static bool warned; 3481 unsigned int saved_align = align; 3482 3483 if (!TARGET_64BIT) 3484 { 3485 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ 3486 if (!type) 3487 { 3488 if (mode == XFmode || mode == XCmode) 3489 align = PARM_BOUNDARY; 3490 } 3491 else if (!ix86_contains_aligned_value_p (type)) 3492 align = PARM_BOUNDARY; 3493 3494 if (align < 128) 3495 align = PARM_BOUNDARY; 3496 } 3497 3498 if (warn_psabi 3499 && !warned 3500 && align != ix86_compat_function_arg_boundary (mode, type, 3501 saved_align)) 3502 { 3503 warned = true; 3504 inform (input_location, 3505 "the ABI for passing parameters with %d-byte" 3506 " alignment has changed in GCC 4.6", 3507 align / BITS_PER_UNIT); 3508 } 3509 } 3510 3511 return align; 3512} 3513 3514/* Return true if N is a possible register number of function value. */ 3515 3516static bool 3517ix86_function_value_regno_p (const unsigned int regno) 3518{ 3519 switch (regno) 3520 { 3521 case AX_REG: 3522 return true; 3523 case DX_REG: 3524 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); 3525 case DI_REG: 3526 case SI_REG: 3527 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; 3528 3529 /* Complex values are returned in %st(0)/%st(1) pair. */ 3530 case ST0_REG: 3531 case ST1_REG: 3532 /* TODO: The function should depend on current function ABI but 3533 builtins.c would need updating then. Therefore we use the 3534 default ABI. */ 3535 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) 3536 return false; 3537 return TARGET_FLOAT_RETURNS_IN_80387; 3538 3539 /* Complex values are returned in %xmm0/%xmm1 pair. */ 3540 case XMM0_REG: 3541 case XMM1_REG: 3542 return TARGET_SSE; 3543 3544 case MM0_REG: 3545 if (TARGET_MACHO || TARGET_64BIT) 3546 return false; 3547 return TARGET_MMX; 3548 } 3549 3550 return false; 3551} 3552 3553/* Define how to find the value returned by a function. 3554 VALTYPE is the data type of the value (as a tree). 3555 If the precise function being called is known, FUNC is its FUNCTION_DECL; 3556 otherwise, FUNC is 0. */ 3557 3558static rtx 3559function_value_32 (machine_mode orig_mode, machine_mode mode, 3560 const_tree fntype, const_tree fn) 3561{ 3562 unsigned int regno; 3563 3564 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 3565 we normally prevent this case when mmx is not available. However 3566 some ABIs may require the result to be returned like DImode. */ 3567 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 3568 regno = FIRST_MMX_REG; 3569 3570 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 3571 we prevent this case when sse is not available. However some ABIs 3572 may require the result to be returned like integer TImode. */ 3573 else if (mode == TImode 3574 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 3575 regno = FIRST_SSE_REG; 3576 3577 /* 32-byte vector modes in %ymm0. */ 3578 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) 3579 regno = FIRST_SSE_REG; 3580 3581 /* 64-byte vector modes in %zmm0. */ 3582 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) 3583 regno = FIRST_SSE_REG; 3584 3585 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ 3586 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) 3587 regno = FIRST_FLOAT_REG; 3588 else 3589 /* Most things go in %eax. */ 3590 regno = AX_REG; 3591 3592 /* Override FP return register with %xmm0 for local functions when 3593 SSE math is enabled or for functions with sseregparm attribute. */ 3594 if ((fn || fntype) && (mode == SFmode || mode == DFmode)) 3595 { 3596 int sse_level = ix86_function_sseregparm (fntype, fn, false); 3597 if (sse_level == -1) 3598 { 3599 error ("calling %qD with SSE calling convention without " 3600 "SSE/SSE2 enabled", fn); 3601 sorry ("this is a GCC bug that can be worked around by adding " 3602 "attribute used to function called"); 3603 } 3604 else if ((sse_level >= 1 && mode == SFmode) 3605 || (sse_level == 2 && mode == DFmode)) 3606 regno = FIRST_SSE_REG; 3607 } 3608 3609 /* OImode shouldn't be used directly. */ 3610 gcc_assert (mode != OImode); 3611 3612 return gen_rtx_REG (orig_mode, regno); 3613} 3614 3615static rtx 3616function_value_64 (machine_mode orig_mode, machine_mode mode, 3617 const_tree valtype) 3618{ 3619 rtx ret; 3620 3621 /* Handle libcalls, which don't provide a type node. */ 3622 if (valtype == NULL) 3623 { 3624 unsigned int regno; 3625 3626 switch (mode) 3627 { 3628 case E_SFmode: 3629 case E_SCmode: 3630 case E_DFmode: 3631 case E_DCmode: 3632 case E_TFmode: 3633 case E_SDmode: 3634 case E_DDmode: 3635 case E_TDmode: 3636 regno = FIRST_SSE_REG; 3637 break; 3638 case E_XFmode: 3639 case E_XCmode: 3640 regno = FIRST_FLOAT_REG; 3641 break; 3642 case E_TCmode: 3643 return NULL; 3644 default: 3645 regno = AX_REG; 3646 } 3647 3648 return gen_rtx_REG (mode, regno); 3649 } 3650 else if (POINTER_TYPE_P (valtype)) 3651 { 3652 /* Pointers are always returned in word_mode. */ 3653 mode = word_mode; 3654 } 3655 3656 ret = construct_container (mode, orig_mode, valtype, 1, 3657 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, 3658 x86_64_int_return_registers, 0); 3659 3660 /* For zero sized structures, construct_container returns NULL, but we 3661 need to keep rest of compiler happy by returning meaningful value. */ 3662 if (!ret) 3663 ret = gen_rtx_REG (orig_mode, AX_REG); 3664 3665 return ret; 3666} 3667 3668static rtx 3669function_value_ms_32 (machine_mode orig_mode, machine_mode mode, 3670 const_tree fntype, const_tree fn, const_tree valtype) 3671{ 3672 unsigned int regno; 3673 3674 /* Floating point return values in %st(0) 3675 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ 3676 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 3677 && (GET_MODE_SIZE (mode) > 8 3678 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) 3679 { 3680 regno = FIRST_FLOAT_REG; 3681 return gen_rtx_REG (orig_mode, regno); 3682 } 3683 else 3684 return function_value_32(orig_mode, mode, fntype,fn); 3685} 3686 3687static rtx 3688function_value_ms_64 (machine_mode orig_mode, machine_mode mode, 3689 const_tree valtype) 3690{ 3691 unsigned int regno = AX_REG; 3692 3693 if (TARGET_SSE) 3694 { 3695 switch (GET_MODE_SIZE (mode)) 3696 { 3697 case 16: 3698 if (valtype != NULL_TREE 3699 && !VECTOR_INTEGER_TYPE_P (valtype) 3700 && !VECTOR_INTEGER_TYPE_P (valtype) 3701 && !INTEGRAL_TYPE_P (valtype) 3702 && !VECTOR_FLOAT_TYPE_P (valtype)) 3703 break; 3704 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 3705 && !COMPLEX_MODE_P (mode)) 3706 regno = FIRST_SSE_REG; 3707 break; 3708 case 8: 3709 case 4: 3710 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) 3711 break; 3712 if (mode == SFmode || mode == DFmode) 3713 regno = FIRST_SSE_REG; 3714 break; 3715 default: 3716 break; 3717 } 3718 } 3719 return gen_rtx_REG (orig_mode, regno); 3720} 3721 3722static rtx 3723ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, 3724 machine_mode orig_mode, machine_mode mode) 3725{ 3726 const_tree fn, fntype; 3727 3728 fn = NULL_TREE; 3729 if (fntype_or_decl && DECL_P (fntype_or_decl)) 3730 fn = fntype_or_decl; 3731 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 3732 3733 if (ix86_function_type_abi (fntype) == MS_ABI) 3734 { 3735 if (TARGET_64BIT) 3736 return function_value_ms_64 (orig_mode, mode, valtype); 3737 else 3738 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); 3739 } 3740 else if (TARGET_64BIT) 3741 return function_value_64 (orig_mode, mode, valtype); 3742 else 3743 return function_value_32 (orig_mode, mode, fntype, fn); 3744} 3745 3746static rtx 3747ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) 3748{ 3749 machine_mode mode, orig_mode; 3750 3751 orig_mode = TYPE_MODE (valtype); 3752 mode = type_natural_mode (valtype, NULL, true); 3753 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); 3754} 3755 3756/* Pointer function arguments and return values are promoted to 3757 word_mode for normal functions. */ 3758 3759static machine_mode 3760ix86_promote_function_mode (const_tree type, machine_mode mode, 3761 int *punsignedp, const_tree fntype, 3762 int for_return) 3763{ 3764 if (cfun->machine->func_type == TYPE_NORMAL 3765 && type != NULL_TREE 3766 && POINTER_TYPE_P (type)) 3767 { 3768 *punsignedp = POINTERS_EXTEND_UNSIGNED; 3769 return word_mode; 3770 } 3771 return default_promote_function_mode (type, mode, punsignedp, fntype, 3772 for_return); 3773} 3774 3775/* Return true if a structure, union or array with MODE containing FIELD 3776 should be accessed using BLKmode. */ 3777 3778static bool 3779ix86_member_type_forces_blk (const_tree field, machine_mode mode) 3780{ 3781 /* Union with XFmode must be in BLKmode. */ 3782 return (mode == XFmode 3783 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE 3784 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); 3785} 3786 3787rtx 3788ix86_libcall_value (machine_mode mode) 3789{ 3790 return ix86_function_value_1 (NULL, NULL, mode, mode); 3791} 3792 3793/* Return true iff type is returned in memory. */ 3794 3795static bool 3796ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 3797{ 3798#ifdef SUBTARGET_RETURN_IN_MEMORY 3799 return SUBTARGET_RETURN_IN_MEMORY (type, fntype); 3800#else 3801 const machine_mode mode = type_natural_mode (type, NULL, true); 3802 HOST_WIDE_INT size; 3803 3804 if (TARGET_64BIT) 3805 { 3806 if (ix86_function_type_abi (fntype) == MS_ABI) 3807 { 3808 size = int_size_in_bytes (type); 3809 3810 /* __m128 is returned in xmm0. */ 3811 if ((!type || VECTOR_INTEGER_TYPE_P (type) 3812 || INTEGRAL_TYPE_P (type) 3813 || VECTOR_FLOAT_TYPE_P (type)) 3814 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 3815 && !COMPLEX_MODE_P (mode) 3816 && (GET_MODE_SIZE (mode) == 16 || size == 16)) 3817 return false; 3818 3819 /* Otherwise, the size must be exactly in [1248]. */ 3820 return size != 1 && size != 2 && size != 4 && size != 8; 3821 } 3822 else 3823 { 3824 int needed_intregs, needed_sseregs; 3825 3826 return examine_argument (mode, type, 1, 3827 &needed_intregs, &needed_sseregs); 3828 } 3829 } 3830 else 3831 { 3832 size = int_size_in_bytes (type); 3833 3834 /* Intel MCU psABI returns scalars and aggregates no larger than 8 3835 bytes in registers. */ 3836 if (TARGET_IAMCU) 3837 return VECTOR_MODE_P (mode) || size < 0 || size > 8; 3838 3839 if (mode == BLKmode) 3840 return true; 3841 3842 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 3843 return false; 3844 3845 if (VECTOR_MODE_P (mode) || mode == TImode) 3846 { 3847 /* User-created vectors small enough to fit in EAX. */ 3848 if (size < 8) 3849 return false; 3850 3851 /* Unless ABI prescibes otherwise, 3852 MMX/3dNow values are returned in MM0 if available. */ 3853 3854 if (size == 8) 3855 return TARGET_VECT8_RETURNS || !TARGET_MMX; 3856 3857 /* SSE values are returned in XMM0 if available. */ 3858 if (size == 16) 3859 return !TARGET_SSE; 3860 3861 /* AVX values are returned in YMM0 if available. */ 3862 if (size == 32) 3863 return !TARGET_AVX; 3864 3865 /* AVX512F values are returned in ZMM0 if available. */ 3866 if (size == 64) 3867 return !TARGET_AVX512F; 3868 } 3869 3870 if (mode == XFmode) 3871 return false; 3872 3873 if (size > 12) 3874 return true; 3875 3876 /* OImode shouldn't be used directly. */ 3877 gcc_assert (mode != OImode); 3878 3879 return false; 3880 } 3881#endif 3882} 3883 3884 3885/* Create the va_list data type. */ 3886 3887static tree 3888ix86_build_builtin_va_list_64 (void) 3889{ 3890 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 3891 3892 record = lang_hooks.types.make_type (RECORD_TYPE); 3893 type_decl = build_decl (BUILTINS_LOCATION, 3894 TYPE_DECL, get_identifier ("__va_list_tag"), record); 3895 3896 f_gpr = build_decl (BUILTINS_LOCATION, 3897 FIELD_DECL, get_identifier ("gp_offset"), 3898 unsigned_type_node); 3899 f_fpr = build_decl (BUILTINS_LOCATION, 3900 FIELD_DECL, get_identifier ("fp_offset"), 3901 unsigned_type_node); 3902 f_ovf = build_decl (BUILTINS_LOCATION, 3903 FIELD_DECL, get_identifier ("overflow_arg_area"), 3904 ptr_type_node); 3905 f_sav = build_decl (BUILTINS_LOCATION, 3906 FIELD_DECL, get_identifier ("reg_save_area"), 3907 ptr_type_node); 3908 3909 va_list_gpr_counter_field = f_gpr; 3910 va_list_fpr_counter_field = f_fpr; 3911 3912 DECL_FIELD_CONTEXT (f_gpr) = record; 3913 DECL_FIELD_CONTEXT (f_fpr) = record; 3914 DECL_FIELD_CONTEXT (f_ovf) = record; 3915 DECL_FIELD_CONTEXT (f_sav) = record; 3916 3917 TYPE_STUB_DECL (record) = type_decl; 3918 TYPE_NAME (record) = type_decl; 3919 TYPE_FIELDS (record) = f_gpr; 3920 DECL_CHAIN (f_gpr) = f_fpr; 3921 DECL_CHAIN (f_fpr) = f_ovf; 3922 DECL_CHAIN (f_ovf) = f_sav; 3923 3924 layout_type (record); 3925 3926 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), 3927 NULL_TREE, TYPE_ATTRIBUTES (record)); 3928 3929 /* The correct type is an array type of one element. */ 3930 return build_array_type (record, build_index_type (size_zero_node)); 3931} 3932 3933/* Setup the builtin va_list data type and for 64-bit the additional 3934 calling convention specific va_list data types. */ 3935 3936static tree 3937ix86_build_builtin_va_list (void) 3938{ 3939 if (TARGET_64BIT) 3940 { 3941 /* Initialize ABI specific va_list builtin types. 3942 3943 In lto1, we can encounter two va_list types: 3944 - one as a result of the type-merge across TUs, and 3945 - the one constructed here. 3946 These two types will not have the same TYPE_MAIN_VARIANT, and therefore 3947 a type identity check in canonical_va_list_type based on 3948 TYPE_MAIN_VARIANT (which we used to have) will not work. 3949 Instead, we tag each va_list_type_node with its unique attribute, and 3950 look for the attribute in the type identity check in 3951 canonical_va_list_type. 3952 3953 Tagging sysv_va_list_type_node directly with the attribute is 3954 problematic since it's a array of one record, which will degrade into a 3955 pointer to record when used as parameter (see build_va_arg comments for 3956 an example), dropping the attribute in the process. So we tag the 3957 record instead. */ 3958 3959 /* For SYSV_ABI we use an array of one record. */ 3960 sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); 3961 3962 /* For MS_ABI we use plain pointer to argument area. */ 3963 tree char_ptr_type = build_pointer_type (char_type_node); 3964 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, 3965 TYPE_ATTRIBUTES (char_ptr_type)); 3966 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); 3967 3968 return ((ix86_abi == MS_ABI) 3969 ? ms_va_list_type_node 3970 : sysv_va_list_type_node); 3971 } 3972 else 3973 { 3974 /* For i386 we use plain pointer to argument area. */ 3975 return build_pointer_type (char_type_node); 3976 } 3977} 3978 3979/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 3980 3981static void 3982setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) 3983{ 3984 rtx save_area, mem; 3985 alias_set_type set; 3986 int i, max; 3987 3988 /* GPR size of varargs save area. */ 3989 if (cfun->va_list_gpr_size) 3990 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; 3991 else 3992 ix86_varargs_gpr_size = 0; 3993 3994 /* FPR size of varargs save area. We don't need it if we don't pass 3995 anything in SSE registers. */ 3996 if (TARGET_SSE && cfun->va_list_fpr_size) 3997 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; 3998 else 3999 ix86_varargs_fpr_size = 0; 4000 4001 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) 4002 return; 4003 4004 save_area = frame_pointer_rtx; 4005 set = get_varargs_alias_set (); 4006 4007 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 4008 if (max > X86_64_REGPARM_MAX) 4009 max = X86_64_REGPARM_MAX; 4010 4011 for (i = cum->regno; i < max; i++) 4012 { 4013 mem = gen_rtx_MEM (word_mode, 4014 plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); 4015 MEM_NOTRAP_P (mem) = 1; 4016 set_mem_alias_set (mem, set); 4017 emit_move_insn (mem, 4018 gen_rtx_REG (word_mode, 4019 x86_64_int_parameter_registers[i])); 4020 } 4021 4022 if (ix86_varargs_fpr_size) 4023 { 4024 machine_mode smode; 4025 rtx_code_label *label; 4026 rtx test; 4027 4028 /* Now emit code to save SSE registers. The AX parameter contains number 4029 of SSE parameter registers used to call this function, though all we 4030 actually check here is the zero/non-zero status. */ 4031 4032 label = gen_label_rtx (); 4033 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); 4034 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), 4035 label)); 4036 4037 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if 4038 we used movdqa (i.e. TImode) instead? Perhaps even better would 4039 be if we could determine the real mode of the data, via a hook 4040 into pass_stdarg. Ignore all that for now. */ 4041 smode = V4SFmode; 4042 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) 4043 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); 4044 4045 max = cum->sse_regno + cfun->va_list_fpr_size / 16; 4046 if (max > X86_64_SSE_REGPARM_MAX) 4047 max = X86_64_SSE_REGPARM_MAX; 4048 4049 for (i = cum->sse_regno; i < max; ++i) 4050 { 4051 mem = plus_constant (Pmode, save_area, 4052 i * 16 + ix86_varargs_gpr_size); 4053 mem = gen_rtx_MEM (smode, mem); 4054 MEM_NOTRAP_P (mem) = 1; 4055 set_mem_alias_set (mem, set); 4056 set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); 4057 4058 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); 4059 } 4060 4061 emit_label (label); 4062 } 4063} 4064 4065static void 4066setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) 4067{ 4068 alias_set_type set = get_varargs_alias_set (); 4069 int i; 4070 4071 /* Reset to zero, as there might be a sysv vaarg used 4072 before. */ 4073 ix86_varargs_gpr_size = 0; 4074 ix86_varargs_fpr_size = 0; 4075 4076 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) 4077 { 4078 rtx reg, mem; 4079 4080 mem = gen_rtx_MEM (Pmode, 4081 plus_constant (Pmode, virtual_incoming_args_rtx, 4082 i * UNITS_PER_WORD)); 4083 MEM_NOTRAP_P (mem) = 1; 4084 set_mem_alias_set (mem, set); 4085 4086 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); 4087 emit_move_insn (mem, reg); 4088 } 4089} 4090 4091static void 4092ix86_setup_incoming_varargs (cumulative_args_t cum_v, 4093 const function_arg_info &arg, 4094 int *, int no_rtl) 4095{ 4096 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 4097 CUMULATIVE_ARGS next_cum; 4098 tree fntype; 4099 4100 /* This argument doesn't appear to be used anymore. Which is good, 4101 because the old code here didn't suppress rtl generation. */ 4102 gcc_assert (!no_rtl); 4103 4104 if (!TARGET_64BIT) 4105 return; 4106 4107 fntype = TREE_TYPE (current_function_decl); 4108 4109 /* For varargs, we do not want to skip the dummy va_dcl argument. 4110 For stdargs, we do want to skip the last named argument. */ 4111 next_cum = *cum; 4112 if (stdarg_p (fntype)) 4113 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg); 4114 4115 if (cum->call_abi == MS_ABI) 4116 setup_incoming_varargs_ms_64 (&next_cum); 4117 else 4118 setup_incoming_varargs_64 (&next_cum); 4119} 4120 4121/* Checks if TYPE is of kind va_list char *. */ 4122 4123static bool 4124is_va_list_char_pointer (tree type) 4125{ 4126 tree canonic; 4127 4128 /* For 32-bit it is always true. */ 4129 if (!TARGET_64BIT) 4130 return true; 4131 canonic = ix86_canonical_va_list_type (type); 4132 return (canonic == ms_va_list_type_node 4133 || (ix86_abi == MS_ABI && canonic == va_list_type_node)); 4134} 4135 4136/* Implement va_start. */ 4137 4138static void 4139ix86_va_start (tree valist, rtx nextarg) 4140{ 4141 HOST_WIDE_INT words, n_gpr, n_fpr; 4142 tree f_gpr, f_fpr, f_ovf, f_sav; 4143 tree gpr, fpr, ovf, sav, t; 4144 tree type; 4145 rtx ovf_rtx; 4146 4147 if (flag_split_stack 4148 && cfun->machine->split_stack_varargs_pointer == NULL_RTX) 4149 { 4150 unsigned int scratch_regno; 4151 4152 /* When we are splitting the stack, we can't refer to the stack 4153 arguments using internal_arg_pointer, because they may be on 4154 the old stack. The split stack prologue will arrange to 4155 leave a pointer to the old stack arguments in a scratch 4156 register, which we here copy to a pseudo-register. The split 4157 stack prologue can't set the pseudo-register directly because 4158 it (the prologue) runs before any registers have been saved. */ 4159 4160 scratch_regno = split_stack_prologue_scratch_regno (); 4161 if (scratch_regno != INVALID_REGNUM) 4162 { 4163 rtx reg; 4164 rtx_insn *seq; 4165 4166 reg = gen_reg_rtx (Pmode); 4167 cfun->machine->split_stack_varargs_pointer = reg; 4168 4169 start_sequence (); 4170 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); 4171 seq = get_insns (); 4172 end_sequence (); 4173 4174 push_topmost_sequence (); 4175 emit_insn_after (seq, entry_of_function ()); 4176 pop_topmost_sequence (); 4177 } 4178 } 4179 4180 /* Only 64bit target needs something special. */ 4181 if (is_va_list_char_pointer (TREE_TYPE (valist))) 4182 { 4183 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) 4184 std_expand_builtin_va_start (valist, nextarg); 4185 else 4186 { 4187 rtx va_r, next; 4188 4189 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); 4190 next = expand_binop (ptr_mode, add_optab, 4191 cfun->machine->split_stack_varargs_pointer, 4192 crtl->args.arg_offset_rtx, 4193 NULL_RTX, 0, OPTAB_LIB_WIDEN); 4194 convert_move (va_r, next, 0); 4195 } 4196 return; 4197 } 4198 4199 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 4200 f_fpr = DECL_CHAIN (f_gpr); 4201 f_ovf = DECL_CHAIN (f_fpr); 4202 f_sav = DECL_CHAIN (f_ovf); 4203 4204 valist = build_simple_mem_ref (valist); 4205 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); 4206 /* The following should be folded into the MEM_REF offset. */ 4207 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), 4208 f_gpr, NULL_TREE); 4209 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), 4210 f_fpr, NULL_TREE); 4211 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), 4212 f_ovf, NULL_TREE); 4213 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), 4214 f_sav, NULL_TREE); 4215 4216 /* Count number of gp and fp argument registers used. */ 4217 words = crtl->args.info.words; 4218 n_gpr = crtl->args.info.regno; 4219 n_fpr = crtl->args.info.sse_regno; 4220 4221 if (cfun->va_list_gpr_size) 4222 { 4223 type = TREE_TYPE (gpr); 4224 t = build2 (MODIFY_EXPR, type, 4225 gpr, build_int_cst (type, n_gpr * 8)); 4226 TREE_SIDE_EFFECTS (t) = 1; 4227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4228 } 4229 4230 if (TARGET_SSE && cfun->va_list_fpr_size) 4231 { 4232 type = TREE_TYPE (fpr); 4233 t = build2 (MODIFY_EXPR, type, fpr, 4234 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); 4235 TREE_SIDE_EFFECTS (t) = 1; 4236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4237 } 4238 4239 /* Find the overflow area. */ 4240 type = TREE_TYPE (ovf); 4241 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) 4242 ovf_rtx = crtl->args.internal_arg_pointer; 4243 else 4244 ovf_rtx = cfun->machine->split_stack_varargs_pointer; 4245 t = make_tree (type, ovf_rtx); 4246 if (words != 0) 4247 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); 4248 4249 t = build2 (MODIFY_EXPR, type, ovf, t); 4250 TREE_SIDE_EFFECTS (t) = 1; 4251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4252 4253 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) 4254 { 4255 /* Find the register save area. 4256 Prologue of the function save it right above stack frame. */ 4257 type = TREE_TYPE (sav); 4258 t = make_tree (type, frame_pointer_rtx); 4259 if (!ix86_varargs_gpr_size) 4260 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); 4261 4262 t = build2 (MODIFY_EXPR, type, sav, t); 4263 TREE_SIDE_EFFECTS (t) = 1; 4264 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4265 } 4266} 4267 4268/* Implement va_arg. */ 4269 4270static tree 4271ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 4272 gimple_seq *post_p) 4273{ 4274 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 4275 tree f_gpr, f_fpr, f_ovf, f_sav; 4276 tree gpr, fpr, ovf, sav, t; 4277 int size, rsize; 4278 tree lab_false, lab_over = NULL_TREE; 4279 tree addr, t2; 4280 rtx container; 4281 int indirect_p = 0; 4282 tree ptrtype; 4283 machine_mode nat_mode; 4284 unsigned int arg_boundary; 4285 unsigned int type_align; 4286 4287 /* Only 64bit target needs something special. */ 4288 if (is_va_list_char_pointer (TREE_TYPE (valist))) 4289 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 4290 4291 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 4292 f_fpr = DECL_CHAIN (f_gpr); 4293 f_ovf = DECL_CHAIN (f_fpr); 4294 f_sav = DECL_CHAIN (f_ovf); 4295 4296 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), 4297 valist, f_gpr, NULL_TREE); 4298 4299 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 4300 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 4301 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 4302 4303 indirect_p = pass_va_arg_by_reference (type); 4304 if (indirect_p) 4305 type = build_pointer_type (type); 4306 size = arg_int_size_in_bytes (type); 4307 rsize = CEIL (size, UNITS_PER_WORD); 4308 4309 nat_mode = type_natural_mode (type, NULL, false); 4310 switch (nat_mode) 4311 { 4312 case E_V8SFmode: 4313 case E_V8SImode: 4314 case E_V32QImode: 4315 case E_V16HImode: 4316 case E_V4DFmode: 4317 case E_V4DImode: 4318 case E_V16SFmode: 4319 case E_V16SImode: 4320 case E_V64QImode: 4321 case E_V32HImode: 4322 case E_V8DFmode: 4323 case E_V8DImode: 4324 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ 4325 if (!TARGET_64BIT_MS_ABI) 4326 { 4327 container = NULL; 4328 break; 4329 } 4330 /* FALLTHRU */ 4331 4332 default: 4333 container = construct_container (nat_mode, TYPE_MODE (type), 4334 type, 0, X86_64_REGPARM_MAX, 4335 X86_64_SSE_REGPARM_MAX, intreg, 4336 0); 4337 break; 4338 } 4339 4340 /* Pull the value out of the saved registers. */ 4341 4342 addr = create_tmp_var (ptr_type_node, "addr"); 4343 type_align = TYPE_ALIGN (type); 4344 4345 if (container) 4346 { 4347 int needed_intregs, needed_sseregs; 4348 bool need_temp; 4349 tree int_addr, sse_addr; 4350 4351 lab_false = create_artificial_label (UNKNOWN_LOCATION); 4352 lab_over = create_artificial_label (UNKNOWN_LOCATION); 4353 4354 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 4355 4356 need_temp = (!REG_P (container) 4357 && ((needed_intregs && TYPE_ALIGN (type) > 64) 4358 || TYPE_ALIGN (type) > 128)); 4359 4360 /* In case we are passing structure, verify that it is consecutive block 4361 on the register save area. If not we need to do moves. */ 4362 if (!need_temp && !REG_P (container)) 4363 { 4364 /* Verify that all registers are strictly consecutive */ 4365 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 4366 { 4367 int i; 4368 4369 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4370 { 4371 rtx slot = XVECEXP (container, 0, i); 4372 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 4373 || INTVAL (XEXP (slot, 1)) != i * 16) 4374 need_temp = true; 4375 } 4376 } 4377 else 4378 { 4379 int i; 4380 4381 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 4382 { 4383 rtx slot = XVECEXP (container, 0, i); 4384 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 4385 || INTVAL (XEXP (slot, 1)) != i * 8) 4386 need_temp = true; 4387 } 4388 } 4389 } 4390 if (!need_temp) 4391 { 4392 int_addr = addr; 4393 sse_addr = addr; 4394 } 4395 else 4396 { 4397 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 4398 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 4399 } 4400 4401 /* First ensure that we fit completely in registers. */ 4402 if (needed_intregs) 4403 { 4404 t = build_int_cst (TREE_TYPE (gpr), 4405 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); 4406 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 4407 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4408 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4409 gimplify_and_add (t, pre_p); 4410 } 4411 if (needed_sseregs) 4412 { 4413 t = build_int_cst (TREE_TYPE (fpr), 4414 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 4415 + X86_64_REGPARM_MAX * 8); 4416 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 4417 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 4418 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 4419 gimplify_and_add (t, pre_p); 4420 } 4421 4422 /* Compute index to start of area used for integer regs. */ 4423 if (needed_intregs) 4424 { 4425 /* int_addr = gpr + sav; */ 4426 t = fold_build_pointer_plus (sav, gpr); 4427 gimplify_assign (int_addr, t, pre_p); 4428 } 4429 if (needed_sseregs) 4430 { 4431 /* sse_addr = fpr + sav; */ 4432 t = fold_build_pointer_plus (sav, fpr); 4433 gimplify_assign (sse_addr, t, pre_p); 4434 } 4435 if (need_temp) 4436 { 4437 int i, prev_size = 0; 4438 tree temp = create_tmp_var (type, "va_arg_tmp"); 4439 4440 /* addr = &temp; */ 4441 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 4442 gimplify_assign (addr, t, pre_p); 4443 4444 for (i = 0; i < XVECLEN (container, 0); i++) 4445 { 4446 rtx slot = XVECEXP (container, 0, i); 4447 rtx reg = XEXP (slot, 0); 4448 machine_mode mode = GET_MODE (reg); 4449 tree piece_type; 4450 tree addr_type; 4451 tree daddr_type; 4452 tree src_addr, src; 4453 int src_offset; 4454 tree dest_addr, dest; 4455 int cur_size = GET_MODE_SIZE (mode); 4456 4457 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); 4458 prev_size = INTVAL (XEXP (slot, 1)); 4459 if (prev_size + cur_size > size) 4460 { 4461 cur_size = size - prev_size; 4462 unsigned int nbits = cur_size * BITS_PER_UNIT; 4463 if (!int_mode_for_size (nbits, 1).exists (&mode)) 4464 mode = QImode; 4465 } 4466 piece_type = lang_hooks.types.type_for_mode (mode, 1); 4467 if (mode == GET_MODE (reg)) 4468 addr_type = build_pointer_type (piece_type); 4469 else 4470 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, 4471 true); 4472 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, 4473 true); 4474 4475 if (SSE_REGNO_P (REGNO (reg))) 4476 { 4477 src_addr = sse_addr; 4478 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 4479 } 4480 else 4481 { 4482 src_addr = int_addr; 4483 src_offset = REGNO (reg) * 8; 4484 } 4485 src_addr = fold_convert (addr_type, src_addr); 4486 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); 4487 4488 dest_addr = fold_convert (daddr_type, addr); 4489 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); 4490 if (cur_size == GET_MODE_SIZE (mode)) 4491 { 4492 src = build_va_arg_indirect_ref (src_addr); 4493 dest = build_va_arg_indirect_ref (dest_addr); 4494 4495 gimplify_assign (dest, src, pre_p); 4496 } 4497 else 4498 { 4499 tree copy 4500 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 4501 3, dest_addr, src_addr, 4502 size_int (cur_size)); 4503 gimplify_and_add (copy, pre_p); 4504 } 4505 prev_size += cur_size; 4506 } 4507 } 4508 4509 if (needed_intregs) 4510 { 4511 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 4512 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 4513 gimplify_assign (gpr, t, pre_p); 4514 /* The GPR save area guarantees only 8-byte alignment. */ 4515 if (!need_temp) 4516 type_align = MIN (type_align, 64); 4517 } 4518 4519 if (needed_sseregs) 4520 { 4521 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 4522 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 4523 gimplify_assign (unshare_expr (fpr), t, pre_p); 4524 } 4525 4526 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); 4527 4528 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); 4529 } 4530 4531 /* ... otherwise out of the overflow area. */ 4532 4533 /* When we align parameter on stack for caller, if the parameter 4534 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be 4535 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee 4536 here with caller. */ 4537 arg_boundary = ix86_function_arg_boundary (VOIDmode, type); 4538 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) 4539 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; 4540 4541 /* Care for on-stack alignment if needed. */ 4542 if (arg_boundary <= 64 || size == 0) 4543 t = ovf; 4544 else 4545 { 4546 HOST_WIDE_INT align = arg_boundary / 8; 4547 t = fold_build_pointer_plus_hwi (ovf, align - 1); 4548 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 4549 build_int_cst (TREE_TYPE (t), -align)); 4550 } 4551 4552 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 4553 gimplify_assign (addr, t, pre_p); 4554 4555 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); 4556 gimplify_assign (unshare_expr (ovf), t, pre_p); 4557 4558 if (container) 4559 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); 4560 4561 type = build_aligned_type (type, type_align); 4562 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); 4563 addr = fold_convert (ptrtype, addr); 4564 4565 if (indirect_p) 4566 addr = build_va_arg_indirect_ref (addr); 4567 return build_va_arg_indirect_ref (addr); 4568} 4569 4570/* Return true if OPNUM's MEM should be matched 4571 in movabs* patterns. */ 4572 4573bool 4574ix86_check_movabs (rtx insn, int opnum) 4575{ 4576 rtx set, mem; 4577 4578 set = PATTERN (insn); 4579 if (GET_CODE (set) == PARALLEL) 4580 set = XVECEXP (set, 0, 0); 4581 gcc_assert (GET_CODE (set) == SET); 4582 mem = XEXP (set, opnum); 4583 while (SUBREG_P (mem)) 4584 mem = SUBREG_REG (mem); 4585 gcc_assert (MEM_P (mem)); 4586 return volatile_ok || !MEM_VOLATILE_P (mem); 4587} 4588 4589/* Return false if INSN contains a MEM with a non-default address space. */ 4590bool 4591ix86_check_no_addr_space (rtx insn) 4592{ 4593 subrtx_var_iterator::array_type array; 4594 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) 4595 { 4596 rtx x = *iter; 4597 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) 4598 return false; 4599 } 4600 return true; 4601} 4602 4603/* Initialize the table of extra 80387 mathematical constants. */ 4604 4605static void 4606init_ext_80387_constants (void) 4607{ 4608 static const char * cst[5] = 4609 { 4610 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 4611 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 4612 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 4613 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 4614 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 4615 }; 4616 int i; 4617 4618 for (i = 0; i < 5; i++) 4619 { 4620 real_from_string (&ext_80387_constants_table[i], cst[i]); 4621 /* Ensure each constant is rounded to XFmode precision. */ 4622 real_convert (&ext_80387_constants_table[i], 4623 XFmode, &ext_80387_constants_table[i]); 4624 } 4625 4626 ext_80387_constants_init = 1; 4627} 4628 4629/* Return non-zero if the constant is something that 4630 can be loaded with a special instruction. */ 4631 4632int 4633standard_80387_constant_p (rtx x) 4634{ 4635 machine_mode mode = GET_MODE (x); 4636 4637 const REAL_VALUE_TYPE *r; 4638 4639 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) 4640 return -1; 4641 4642 if (x == CONST0_RTX (mode)) 4643 return 1; 4644 if (x == CONST1_RTX (mode)) 4645 return 2; 4646 4647 r = CONST_DOUBLE_REAL_VALUE (x); 4648 4649 /* For XFmode constants, try to find a special 80387 instruction when 4650 optimizing for size or on those CPUs that benefit from them. */ 4651 if (mode == XFmode 4652 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS) 4653 && !flag_rounding_math) 4654 { 4655 int i; 4656 4657 if (! ext_80387_constants_init) 4658 init_ext_80387_constants (); 4659 4660 for (i = 0; i < 5; i++) 4661 if (real_identical (r, &ext_80387_constants_table[i])) 4662 return i + 3; 4663 } 4664 4665 /* Load of the constant -0.0 or -1.0 will be split as 4666 fldz;fchs or fld1;fchs sequence. */ 4667 if (real_isnegzero (r)) 4668 return 8; 4669 if (real_identical (r, &dconstm1)) 4670 return 9; 4671 4672 return 0; 4673} 4674 4675/* Return the opcode of the special instruction to be used to load 4676 the constant X. */ 4677 4678const char * 4679standard_80387_constant_opcode (rtx x) 4680{ 4681 switch (standard_80387_constant_p (x)) 4682 { 4683 case 1: 4684 return "fldz"; 4685 case 2: 4686 return "fld1"; 4687 case 3: 4688 return "fldlg2"; 4689 case 4: 4690 return "fldln2"; 4691 case 5: 4692 return "fldl2e"; 4693 case 6: 4694 return "fldl2t"; 4695 case 7: 4696 return "fldpi"; 4697 case 8: 4698 case 9: 4699 return "#"; 4700 default: 4701 gcc_unreachable (); 4702 } 4703} 4704 4705/* Return the CONST_DOUBLE representing the 80387 constant that is 4706 loaded by the specified special instruction. The argument IDX 4707 matches the return value from standard_80387_constant_p. */ 4708 4709rtx 4710standard_80387_constant_rtx (int idx) 4711{ 4712 int i; 4713 4714 if (! ext_80387_constants_init) 4715 init_ext_80387_constants (); 4716 4717 switch (idx) 4718 { 4719 case 3: 4720 case 4: 4721 case 5: 4722 case 6: 4723 case 7: 4724 i = idx - 3; 4725 break; 4726 4727 default: 4728 gcc_unreachable (); 4729 } 4730 4731 return const_double_from_real_value (ext_80387_constants_table[i], 4732 XFmode); 4733} 4734 4735/* Return 1 if X is all bits 0 and 2 if X is all bits 1 4736 in supported SSE/AVX vector mode. */ 4737 4738int 4739standard_sse_constant_p (rtx x, machine_mode pred_mode) 4740{ 4741 machine_mode mode; 4742 4743 if (!TARGET_SSE) 4744 return 0; 4745 4746 mode = GET_MODE (x); 4747 4748 if (x == const0_rtx || const0_operand (x, mode)) 4749 return 1; 4750 4751 if (x == constm1_rtx || vector_all_ones_operand (x, mode)) 4752 { 4753 /* VOIDmode integer constant, get mode from the predicate. */ 4754 if (mode == VOIDmode) 4755 mode = pred_mode; 4756 4757 switch (GET_MODE_SIZE (mode)) 4758 { 4759 case 64: 4760 if (TARGET_AVX512F) 4761 return 2; 4762 break; 4763 case 32: 4764 if (TARGET_AVX2) 4765 return 2; 4766 break; 4767 case 16: 4768 if (TARGET_SSE2) 4769 return 2; 4770 break; 4771 case 0: 4772 /* VOIDmode */ 4773 gcc_unreachable (); 4774 default: 4775 break; 4776 } 4777 } 4778 4779 return 0; 4780} 4781 4782/* Return the opcode of the special instruction to be used to load 4783 the constant operands[1] into operands[0]. */ 4784 4785const char * 4786standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) 4787{ 4788 machine_mode mode; 4789 rtx x = operands[1]; 4790 4791 gcc_assert (TARGET_SSE); 4792 4793 mode = GET_MODE (x); 4794 4795 if (x == const0_rtx || const0_operand (x, mode)) 4796 { 4797 switch (get_attr_mode (insn)) 4798 { 4799 case MODE_TI: 4800 if (!EXT_REX_SSE_REG_P (operands[0])) 4801 return "%vpxor\t%0, %d0"; 4802 /* FALLTHRU */ 4803 case MODE_XI: 4804 case MODE_OI: 4805 if (EXT_REX_SSE_REG_P (operands[0])) 4806 return (TARGET_AVX512VL 4807 ? "vpxord\t%x0, %x0, %x0" 4808 : "vpxord\t%g0, %g0, %g0"); 4809 return "vpxor\t%x0, %x0, %x0"; 4810 4811 case MODE_V2DF: 4812 if (!EXT_REX_SSE_REG_P (operands[0])) 4813 return "%vxorpd\t%0, %d0"; 4814 /* FALLTHRU */ 4815 case MODE_V8DF: 4816 case MODE_V4DF: 4817 if (!EXT_REX_SSE_REG_P (operands[0])) 4818 return "vxorpd\t%x0, %x0, %x0"; 4819 else if (TARGET_AVX512DQ) 4820 return (TARGET_AVX512VL 4821 ? "vxorpd\t%x0, %x0, %x0" 4822 : "vxorpd\t%g0, %g0, %g0"); 4823 else 4824 return (TARGET_AVX512VL 4825 ? "vpxorq\t%x0, %x0, %x0" 4826 : "vpxorq\t%g0, %g0, %g0"); 4827 4828 case MODE_V4SF: 4829 if (!EXT_REX_SSE_REG_P (operands[0])) 4830 return "%vxorps\t%0, %d0"; 4831 /* FALLTHRU */ 4832 case MODE_V16SF: 4833 case MODE_V8SF: 4834 if (!EXT_REX_SSE_REG_P (operands[0])) 4835 return "vxorps\t%x0, %x0, %x0"; 4836 else if (TARGET_AVX512DQ) 4837 return (TARGET_AVX512VL 4838 ? "vxorps\t%x0, %x0, %x0" 4839 : "vxorps\t%g0, %g0, %g0"); 4840 else 4841 return (TARGET_AVX512VL 4842 ? "vpxord\t%x0, %x0, %x0" 4843 : "vpxord\t%g0, %g0, %g0"); 4844 4845 default: 4846 gcc_unreachable (); 4847 } 4848 } 4849 else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) 4850 { 4851 enum attr_mode insn_mode = get_attr_mode (insn); 4852 4853 switch (insn_mode) 4854 { 4855 case MODE_XI: 4856 case MODE_V8DF: 4857 case MODE_V16SF: 4858 gcc_assert (TARGET_AVX512F); 4859 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; 4860 4861 case MODE_OI: 4862 case MODE_V4DF: 4863 case MODE_V8SF: 4864 gcc_assert (TARGET_AVX2); 4865 /* FALLTHRU */ 4866 case MODE_TI: 4867 case MODE_V2DF: 4868 case MODE_V4SF: 4869 gcc_assert (TARGET_SSE2); 4870 if (!EXT_REX_SSE_REG_P (operands[0])) 4871 return (TARGET_AVX 4872 ? "vpcmpeqd\t%0, %0, %0" 4873 : "pcmpeqd\t%0, %0"); 4874 else if (TARGET_AVX512VL) 4875 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; 4876 else 4877 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; 4878 4879 default: 4880 gcc_unreachable (); 4881 } 4882 } 4883 4884 gcc_unreachable (); 4885} 4886 4887/* Returns true if INSN can be transformed from a memory load 4888 to a supported FP constant load. */ 4889 4890bool 4891ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) 4892{ 4893 rtx src = find_constant_src (insn); 4894 4895 gcc_assert (REG_P (dst)); 4896 4897 if (src == NULL 4898 || (SSE_REGNO_P (REGNO (dst)) 4899 && standard_sse_constant_p (src, GET_MODE (dst)) != 1) 4900 || (STACK_REGNO_P (REGNO (dst)) 4901 && standard_80387_constant_p (src) < 1)) 4902 return false; 4903 4904 return true; 4905} 4906 4907/* Predicate for pre-reload splitters with associated instructions, 4908 which can match any time before the split1 pass (usually combine), 4909 then are unconditionally split in that pass and should not be 4910 matched again afterwards. */ 4911 4912bool 4913ix86_pre_reload_split (void) 4914{ 4915 return (can_create_pseudo_p () 4916 && !(cfun->curr_properties & PROP_rtl_split_insns)); 4917} 4918 4919/* Return the opcode of the TYPE_SSEMOV instruction. To move from 4920 or to xmm16-xmm31/ymm16-ymm31 registers, we either require 4921 TARGET_AVX512VL or it is a register to register move which can 4922 be done with zmm register move. */ 4923 4924static const char * 4925ix86_get_ssemov (rtx *operands, unsigned size, 4926 enum attr_mode insn_mode, machine_mode mode) 4927{ 4928 char buf[128]; 4929 bool misaligned_p = (misaligned_operand (operands[0], mode) 4930 || misaligned_operand (operands[1], mode)); 4931 bool evex_reg_p = (size == 64 4932 || EXT_REX_SSE_REG_P (operands[0]) 4933 || EXT_REX_SSE_REG_P (operands[1])); 4934 machine_mode scalar_mode; 4935 4936 const char *opcode = NULL; 4937 enum 4938 { 4939 opcode_int, 4940 opcode_float, 4941 opcode_double 4942 } type = opcode_int; 4943 4944 switch (insn_mode) 4945 { 4946 case MODE_V16SF: 4947 case MODE_V8SF: 4948 case MODE_V4SF: 4949 scalar_mode = E_SFmode; 4950 type = opcode_float; 4951 break; 4952 case MODE_V8DF: 4953 case MODE_V4DF: 4954 case MODE_V2DF: 4955 scalar_mode = E_DFmode; 4956 type = opcode_double; 4957 break; 4958 case MODE_XI: 4959 case MODE_OI: 4960 case MODE_TI: 4961 scalar_mode = GET_MODE_INNER (mode); 4962 break; 4963 default: 4964 gcc_unreachable (); 4965 } 4966 4967 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL, 4968 we can only use zmm register move without memory operand. */ 4969 if (evex_reg_p 4970 && !TARGET_AVX512VL 4971 && GET_MODE_SIZE (mode) < 64) 4972 { 4973 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow 4974 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when 4975 AVX512VL is disabled, LRA can still generate reg to 4976 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit 4977 modes. */ 4978 if (memory_operand (operands[0], mode) 4979 || memory_operand (operands[1], mode)) 4980 gcc_unreachable (); 4981 size = 64; 4982 switch (type) 4983 { 4984 case opcode_int: 4985 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; 4986 break; 4987 case opcode_float: 4988 opcode = misaligned_p ? "vmovups" : "vmovaps"; 4989 break; 4990 case opcode_double: 4991 opcode = misaligned_p ? "vmovupd" : "vmovapd"; 4992 break; 4993 } 4994 } 4995 else if (SCALAR_FLOAT_MODE_P (scalar_mode)) 4996 { 4997 switch (scalar_mode) 4998 { 4999 case E_SFmode: 5000 opcode = misaligned_p ? "%vmovups" : "%vmovaps"; 5001 break; 5002 case E_DFmode: 5003 opcode = misaligned_p ? "%vmovupd" : "%vmovapd"; 5004 break; 5005 case E_TFmode: 5006 if (evex_reg_p) 5007 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; 5008 else 5009 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; 5010 break; 5011 default: 5012 gcc_unreachable (); 5013 } 5014 } 5015 else if (SCALAR_INT_MODE_P (scalar_mode)) 5016 { 5017 switch (scalar_mode) 5018 { 5019 case E_QImode: 5020 if (evex_reg_p) 5021 opcode = (misaligned_p 5022 ? (TARGET_AVX512BW 5023 ? "vmovdqu8" 5024 : "vmovdqu64") 5025 : "vmovdqa64"); 5026 else 5027 opcode = (misaligned_p 5028 ? (TARGET_AVX512BW 5029 ? "vmovdqu8" 5030 : "%vmovdqu") 5031 : "%vmovdqa"); 5032 break; 5033 case E_HImode: 5034 if (evex_reg_p) 5035 opcode = (misaligned_p 5036 ? (TARGET_AVX512BW 5037 ? "vmovdqu16" 5038 : "vmovdqu64") 5039 : "vmovdqa64"); 5040 else 5041 opcode = (misaligned_p 5042 ? (TARGET_AVX512BW 5043 ? "vmovdqu16" 5044 : "%vmovdqu") 5045 : "%vmovdqa"); 5046 break; 5047 case E_SImode: 5048 if (evex_reg_p) 5049 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32"; 5050 else 5051 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; 5052 break; 5053 case E_DImode: 5054 case E_TImode: 5055 case E_OImode: 5056 if (evex_reg_p) 5057 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; 5058 else 5059 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa"; 5060 break; 5061 case E_XImode: 5062 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; 5063 break; 5064 default: 5065 gcc_unreachable (); 5066 } 5067 } 5068 else 5069 gcc_unreachable (); 5070 5071 switch (size) 5072 { 5073 case 64: 5074 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}", 5075 opcode); 5076 break; 5077 case 32: 5078 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}", 5079 opcode); 5080 break; 5081 case 16: 5082 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}", 5083 opcode); 5084 break; 5085 default: 5086 gcc_unreachable (); 5087 } 5088 output_asm_insn (buf, operands); 5089 return ""; 5090} 5091 5092/* Return the template of the TYPE_SSEMOV instruction to move 5093 operands[1] into operands[0]. */ 5094 5095const char * 5096ix86_output_ssemov (rtx_insn *insn, rtx *operands) 5097{ 5098 machine_mode mode = GET_MODE (operands[0]); 5099 if (get_attr_type (insn) != TYPE_SSEMOV 5100 || mode != GET_MODE (operands[1])) 5101 gcc_unreachable (); 5102 5103 enum attr_mode insn_mode = get_attr_mode (insn); 5104 5105 switch (insn_mode) 5106 { 5107 case MODE_XI: 5108 case MODE_V8DF: 5109 case MODE_V16SF: 5110 return ix86_get_ssemov (operands, 64, insn_mode, mode); 5111 5112 case MODE_OI: 5113 case MODE_V4DF: 5114 case MODE_V8SF: 5115 return ix86_get_ssemov (operands, 32, insn_mode, mode); 5116 5117 case MODE_TI: 5118 case MODE_V2DF: 5119 case MODE_V4SF: 5120 return ix86_get_ssemov (operands, 16, insn_mode, mode); 5121 5122 case MODE_DI: 5123 /* Handle broken assemblers that require movd instead of movq. */ 5124 if (!HAVE_AS_IX86_INTERUNIT_MOVQ 5125 && (GENERAL_REG_P (operands[0]) 5126 || GENERAL_REG_P (operands[1]))) 5127 return "%vmovd\t{%1, %0|%0, %1}"; 5128 else 5129 return "%vmovq\t{%1, %0|%0, %1}"; 5130 5131 case MODE_SI: 5132 return "%vmovd\t{%1, %0|%0, %1}"; 5133 5134 case MODE_DF: 5135 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) 5136 return "vmovsd\t{%d1, %0|%0, %d1}"; 5137 else 5138 return "%vmovsd\t{%1, %0|%0, %1}"; 5139 5140 case MODE_SF: 5141 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) 5142 return "vmovss\t{%d1, %0|%0, %d1}"; 5143 else 5144 return "%vmovss\t{%1, %0|%0, %1}"; 5145 5146 case MODE_V1DF: 5147 gcc_assert (!TARGET_AVX); 5148 return "movlpd\t{%1, %0|%0, %1}"; 5149 5150 case MODE_V2SF: 5151 if (TARGET_AVX && REG_P (operands[0])) 5152 return "vmovlps\t{%1, %d0|%d0, %1}"; 5153 else 5154 return "%vmovlps\t{%1, %0|%0, %1}"; 5155 5156 default: 5157 gcc_unreachable (); 5158 } 5159} 5160 5161/* Returns true if OP contains a symbol reference */ 5162 5163bool 5164symbolic_reference_mentioned_p (rtx op) 5165{ 5166 const char *fmt; 5167 int i; 5168 5169 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 5170 return true; 5171 5172 fmt = GET_RTX_FORMAT (GET_CODE (op)); 5173 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 5174 { 5175 if (fmt[i] == 'E') 5176 { 5177 int j; 5178 5179 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 5180 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 5181 return true; 5182 } 5183 5184 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 5185 return true; 5186 } 5187 5188 return false; 5189} 5190 5191/* Return true if it is appropriate to emit `ret' instructions in the 5192 body of a function. Do this only if the epilogue is simple, needing a 5193 couple of insns. Prior to reloading, we can't tell how many registers 5194 must be saved, so return false then. Return false if there is no frame 5195 marker to de-allocate. */ 5196 5197bool 5198ix86_can_use_return_insn_p (void) 5199{ 5200 if (ix86_function_naked (current_function_decl)) 5201 return false; 5202 5203 /* Don't use `ret' instruction in interrupt handler. */ 5204 if (! reload_completed 5205 || frame_pointer_needed 5206 || cfun->machine->func_type != TYPE_NORMAL) 5207 return 0; 5208 5209 /* Don't allow more than 32k pop, since that's all we can do 5210 with one instruction. */ 5211 if (crtl->args.pops_args && crtl->args.size >= 32768) 5212 return 0; 5213 5214 struct ix86_frame &frame = cfun->machine->frame; 5215 return (frame.stack_pointer_offset == UNITS_PER_WORD 5216 && (frame.nregs + frame.nsseregs) == 0); 5217} 5218 5219/* Return stack frame size. get_frame_size () returns used stack slots 5220 during compilation, which may be optimized out later. If stack frame 5221 is needed, stack_frame_required should be true. */ 5222 5223static HOST_WIDE_INT 5224ix86_get_frame_size (void) 5225{ 5226 if (cfun->machine->stack_frame_required) 5227 return get_frame_size (); 5228 else 5229 return 0; 5230} 5231 5232/* Value should be nonzero if functions must have frame pointers. 5233 Zero means the frame pointer need not be set up (and parms may 5234 be accessed via the stack pointer) in functions that seem suitable. */ 5235 5236static bool 5237ix86_frame_pointer_required (void) 5238{ 5239 /* If we accessed previous frames, then the generated code expects 5240 to be able to access the saved ebp value in our frame. */ 5241 if (cfun->machine->accesses_prev_frame) 5242 return true; 5243 5244 /* Several x86 os'es need a frame pointer for other reasons, 5245 usually pertaining to setjmp. */ 5246 if (SUBTARGET_FRAME_POINTER_REQUIRED) 5247 return true; 5248 5249 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ 5250 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) 5251 return true; 5252 5253 /* Win64 SEH, very large frames need a frame-pointer as maximum stack 5254 allocation is 4GB. */ 5255 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE) 5256 return true; 5257 5258 /* SSE saves require frame-pointer when stack is misaligned. */ 5259 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) 5260 return true; 5261 5262 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER 5263 turns off the frame pointer by default. Turn it back on now if 5264 we've not got a leaf function. */ 5265 if (TARGET_OMIT_LEAF_FRAME_POINTER 5266 && (!crtl->is_leaf 5267 || ix86_current_function_calls_tls_descriptor)) 5268 return true; 5269 5270 if (crtl->profile && !flag_fentry) 5271 return true; 5272 5273 return false; 5274} 5275 5276/* Record that the current function accesses previous call frames. */ 5277 5278void 5279ix86_setup_frame_addresses (void) 5280{ 5281 cfun->machine->accesses_prev_frame = 1; 5282} 5283 5284#ifndef USE_HIDDEN_LINKONCE 5285# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) 5286# define USE_HIDDEN_LINKONCE 1 5287# else 5288# define USE_HIDDEN_LINKONCE 0 5289# endif 5290#endif 5291 5292/* Label count for call and return thunks. It is used to make unique 5293 labels in call and return thunks. */ 5294static int indirectlabelno; 5295 5296/* True if call thunk function is needed. */ 5297static bool indirect_thunk_needed = false; 5298 5299/* Bit masks of integer registers, which contain branch target, used 5300 by call thunk functions. */ 5301static int indirect_thunks_used; 5302 5303/* True if return thunk function is needed. */ 5304static bool indirect_return_needed = false; 5305 5306/* True if return thunk function via CX is needed. */ 5307static bool indirect_return_via_cx; 5308 5309#ifndef INDIRECT_LABEL 5310# define INDIRECT_LABEL "LIND" 5311#endif 5312 5313/* Indicate what prefix is needed for an indirect branch. */ 5314enum indirect_thunk_prefix 5315{ 5316 indirect_thunk_prefix_none, 5317 indirect_thunk_prefix_nt 5318}; 5319 5320/* Return the prefix needed for an indirect branch INSN. */ 5321 5322enum indirect_thunk_prefix 5323indirect_thunk_need_prefix (rtx_insn *insn) 5324{ 5325 enum indirect_thunk_prefix need_prefix; 5326 if ((cfun->machine->indirect_branch_type 5327 == indirect_branch_thunk_extern) 5328 && ix86_notrack_prefixed_insn_p (insn)) 5329 { 5330 /* NOTRACK prefix is only used with external thunk so that it 5331 can be properly updated to support CET at run-time. */ 5332 need_prefix = indirect_thunk_prefix_nt; 5333 } 5334 else 5335 need_prefix = indirect_thunk_prefix_none; 5336 return need_prefix; 5337} 5338 5339/* Fills in the label name that should be used for the indirect thunk. */ 5340 5341static void 5342indirect_thunk_name (char name[32], unsigned int regno, 5343 enum indirect_thunk_prefix need_prefix, 5344 bool ret_p) 5345{ 5346 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) 5347 gcc_unreachable (); 5348 5349 if (USE_HIDDEN_LINKONCE) 5350 { 5351 const char *prefix; 5352 5353 if (need_prefix == indirect_thunk_prefix_nt 5354 && regno != INVALID_REGNUM) 5355 { 5356 /* NOTRACK prefix is only used with external thunk via 5357 register so that NOTRACK prefix can be added to indirect 5358 branch via register to support CET at run-time. */ 5359 prefix = "_nt"; 5360 } 5361 else 5362 prefix = ""; 5363 5364 const char *ret = ret_p ? "return" : "indirect"; 5365 5366 if (regno != INVALID_REGNUM) 5367 { 5368 const char *reg_prefix; 5369 if (LEGACY_INT_REGNO_P (regno)) 5370 reg_prefix = TARGET_64BIT ? "r" : "e"; 5371 else 5372 reg_prefix = ""; 5373 sprintf (name, "__x86_%s_thunk%s_%s%s", 5374 ret, prefix, reg_prefix, reg_names[regno]); 5375 } 5376 else 5377 sprintf (name, "__x86_%s_thunk%s", ret, prefix); 5378 } 5379 else 5380 { 5381 if (regno != INVALID_REGNUM) 5382 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); 5383 else 5384 { 5385 if (ret_p) 5386 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); 5387 else 5388 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); 5389 } 5390 } 5391} 5392 5393/* Output a call and return thunk for indirect branch. If REGNO != -1, 5394 the function address is in REGNO and the call and return thunk looks like: 5395 5396 call L2 5397 L1: 5398 pause 5399 lfence 5400 jmp L1 5401 L2: 5402 mov %REG, (%sp) 5403 ret 5404 5405 Otherwise, the function address is on the top of stack and the 5406 call and return thunk looks like: 5407 5408 call L2 5409 L1: 5410 pause 5411 lfence 5412 jmp L1 5413 L2: 5414 lea WORD_SIZE(%sp), %sp 5415 ret 5416 */ 5417 5418static void 5419output_indirect_thunk (unsigned int regno) 5420{ 5421 char indirectlabel1[32]; 5422 char indirectlabel2[32]; 5423 5424 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, 5425 indirectlabelno++); 5426 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, 5427 indirectlabelno++); 5428 5429 /* Call */ 5430 fputs ("\tcall\t", asm_out_file); 5431 assemble_name_raw (asm_out_file, indirectlabel2); 5432 fputc ('\n', asm_out_file); 5433 5434 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); 5435 5436 /* AMD and Intel CPUs prefer each a different instruction as loop filler. 5437 Usage of both pause + lfence is compromise solution. */ 5438 fprintf (asm_out_file, "\tpause\n\tlfence\n"); 5439 5440 /* Jump. */ 5441 fputs ("\tjmp\t", asm_out_file); 5442 assemble_name_raw (asm_out_file, indirectlabel1); 5443 fputc ('\n', asm_out_file); 5444 5445 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); 5446 5447 /* The above call insn pushed a word to stack. Adjust CFI info. */ 5448 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) 5449 { 5450 if (! dwarf2out_do_cfi_asm ()) 5451 { 5452 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); 5453 xcfi->dw_cfi_opc = DW_CFA_advance_loc4; 5454 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); 5455 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); 5456 } 5457 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> (); 5458 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; 5459 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; 5460 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); 5461 dwarf2out_emit_cfi (xcfi); 5462 } 5463 5464 if (regno != INVALID_REGNUM) 5465 { 5466 /* MOV. */ 5467 rtx xops[2]; 5468 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); 5469 xops[1] = gen_rtx_REG (word_mode, regno); 5470 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); 5471 } 5472 else 5473 { 5474 /* LEA. */ 5475 rtx xops[2]; 5476 xops[0] = stack_pointer_rtx; 5477 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); 5478 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); 5479 } 5480 5481 fputs ("\tret\n", asm_out_file); 5482} 5483 5484/* Output a funtion with a call and return thunk for indirect branch. 5485 If REGNO != INVALID_REGNUM, the function address is in REGNO. 5486 Otherwise, the function address is on the top of stack. Thunk is 5487 used for function return if RET_P is true. */ 5488 5489static void 5490output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, 5491 unsigned int regno, bool ret_p) 5492{ 5493 char name[32]; 5494 tree decl; 5495 5496 /* Create __x86_indirect_thunk. */ 5497 indirect_thunk_name (name, regno, need_prefix, ret_p); 5498 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 5499 get_identifier (name), 5500 build_function_type_list (void_type_node, NULL_TREE)); 5501 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 5502 NULL_TREE, void_type_node); 5503 TREE_PUBLIC (decl) = 1; 5504 TREE_STATIC (decl) = 1; 5505 DECL_IGNORED_P (decl) = 1; 5506 5507#if TARGET_MACHO 5508 if (TARGET_MACHO) 5509 { 5510 switch_to_section (darwin_sections[picbase_thunk_section]); 5511 fputs ("\t.weak_definition\t", asm_out_file); 5512 assemble_name (asm_out_file, name); 5513 fputs ("\n\t.private_extern\t", asm_out_file); 5514 assemble_name (asm_out_file, name); 5515 putc ('\n', asm_out_file); 5516 ASM_OUTPUT_LABEL (asm_out_file, name); 5517 DECL_WEAK (decl) = 1; 5518 } 5519 else 5520#endif 5521 if (USE_HIDDEN_LINKONCE) 5522 { 5523 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); 5524 5525 targetm.asm_out.unique_section (decl, 0); 5526 switch_to_section (get_named_section (decl, NULL, 0)); 5527 5528 targetm.asm_out.globalize_label (asm_out_file, name); 5529 fputs ("\t.hidden\t", asm_out_file); 5530 assemble_name (asm_out_file, name); 5531 putc ('\n', asm_out_file); 5532 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 5533 } 5534 else 5535 { 5536 switch_to_section (text_section); 5537 ASM_OUTPUT_LABEL (asm_out_file, name); 5538 } 5539 5540 DECL_INITIAL (decl) = make_node (BLOCK); 5541 current_function_decl = decl; 5542 allocate_struct_function (decl, false); 5543 init_function_start (decl); 5544 /* We're about to hide the function body from callees of final_* by 5545 emitting it directly; tell them we're a thunk, if they care. */ 5546 cfun->is_thunk = true; 5547 first_function_block_is_cold = false; 5548 /* Make sure unwind info is emitted for the thunk if needed. */ 5549 final_start_function (emit_barrier (), asm_out_file, 1); 5550 5551 output_indirect_thunk (regno); 5552 5553 final_end_function (); 5554 init_insn_lengths (); 5555 free_after_compilation (cfun); 5556 set_cfun (NULL); 5557 current_function_decl = NULL; 5558} 5559 5560static int pic_labels_used; 5561 5562/* Fills in the label name that should be used for a pc thunk for 5563 the given register. */ 5564 5565static void 5566get_pc_thunk_name (char name[32], unsigned int regno) 5567{ 5568 gcc_assert (!TARGET_64BIT); 5569 5570 if (USE_HIDDEN_LINKONCE) 5571 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); 5572 else 5573 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 5574} 5575 5576 5577/* This function generates code for -fpic that loads %ebx with 5578 the return address of the caller and then returns. */ 5579 5580static void 5581ix86_code_end (void) 5582{ 5583 rtx xops[2]; 5584 unsigned int regno; 5585 5586 if (indirect_return_needed) 5587 output_indirect_thunk_function (indirect_thunk_prefix_none, 5588 INVALID_REGNUM, true); 5589 if (indirect_return_via_cx) 5590 output_indirect_thunk_function (indirect_thunk_prefix_none, 5591 CX_REG, true); 5592 if (indirect_thunk_needed) 5593 output_indirect_thunk_function (indirect_thunk_prefix_none, 5594 INVALID_REGNUM, false); 5595 5596 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) 5597 { 5598 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; 5599 if ((indirect_thunks_used & (1 << i))) 5600 output_indirect_thunk_function (indirect_thunk_prefix_none, 5601 regno, false); 5602 } 5603 5604 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) 5605 { 5606 char name[32]; 5607 tree decl; 5608 5609 if ((indirect_thunks_used & (1 << regno))) 5610 output_indirect_thunk_function (indirect_thunk_prefix_none, 5611 regno, false); 5612 5613 if (!(pic_labels_used & (1 << regno))) 5614 continue; 5615 5616 get_pc_thunk_name (name, regno); 5617 5618 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 5619 get_identifier (name), 5620 build_function_type_list (void_type_node, NULL_TREE)); 5621 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 5622 NULL_TREE, void_type_node); 5623 TREE_PUBLIC (decl) = 1; 5624 TREE_STATIC (decl) = 1; 5625 DECL_IGNORED_P (decl) = 1; 5626 5627#if TARGET_MACHO 5628 if (TARGET_MACHO) 5629 { 5630 switch_to_section (darwin_sections[picbase_thunk_section]); 5631 fputs ("\t.weak_definition\t", asm_out_file); 5632 assemble_name (asm_out_file, name); 5633 fputs ("\n\t.private_extern\t", asm_out_file); 5634 assemble_name (asm_out_file, name); 5635 putc ('\n', asm_out_file); 5636 ASM_OUTPUT_LABEL (asm_out_file, name); 5637 DECL_WEAK (decl) = 1; 5638 } 5639 else 5640#endif 5641 if (USE_HIDDEN_LINKONCE) 5642 { 5643 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); 5644 5645 targetm.asm_out.unique_section (decl, 0); 5646 switch_to_section (get_named_section (decl, NULL, 0)); 5647 5648 targetm.asm_out.globalize_label (asm_out_file, name); 5649 fputs ("\t.hidden\t", asm_out_file); 5650 assemble_name (asm_out_file, name); 5651 putc ('\n', asm_out_file); 5652 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 5653 } 5654 else 5655 { 5656 switch_to_section (text_section); 5657 ASM_OUTPUT_LABEL (asm_out_file, name); 5658 } 5659 5660 DECL_INITIAL (decl) = make_node (BLOCK); 5661 current_function_decl = decl; 5662 allocate_struct_function (decl, false); 5663 init_function_start (decl); 5664 /* We're about to hide the function body from callees of final_* by 5665 emitting it directly; tell them we're a thunk, if they care. */ 5666 cfun->is_thunk = true; 5667 first_function_block_is_cold = false; 5668 /* Make sure unwind info is emitted for the thunk if needed. */ 5669 final_start_function (emit_barrier (), asm_out_file, 1); 5670 5671 /* Pad stack IP move with 4 instructions (two NOPs count 5672 as one instruction). */ 5673 if (TARGET_PAD_SHORT_FUNCTION) 5674 { 5675 int i = 8; 5676 5677 while (i--) 5678 fputs ("\tnop\n", asm_out_file); 5679 } 5680 5681 xops[0] = gen_rtx_REG (Pmode, regno); 5682 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); 5683 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); 5684 output_asm_insn ("%!ret", NULL); 5685 final_end_function (); 5686 init_insn_lengths (); 5687 free_after_compilation (cfun); 5688 set_cfun (NULL); 5689 current_function_decl = NULL; 5690 } 5691 5692 if (flag_split_stack) 5693 file_end_indicate_split_stack (); 5694} 5695 5696/* Emit code for the SET_GOT patterns. */ 5697 5698const char * 5699output_set_got (rtx dest, rtx label) 5700{ 5701 rtx xops[3]; 5702 5703 xops[0] = dest; 5704 5705 if (TARGET_VXWORKS_RTP && flag_pic) 5706 { 5707 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ 5708 xops[2] = gen_rtx_MEM (Pmode, 5709 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); 5710 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 5711 5712 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. 5713 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as 5714 an unadorned address. */ 5715 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); 5716 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; 5717 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); 5718 return ""; 5719 } 5720 5721 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 5722 5723 if (flag_pic) 5724 { 5725 char name[32]; 5726 get_pc_thunk_name (name, REGNO (dest)); 5727 pic_labels_used |= 1 << REGNO (dest); 5728 5729 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 5730 xops[2] = gen_rtx_MEM (QImode, xops[2]); 5731 output_asm_insn ("%!call\t%X2", xops); 5732 5733#if TARGET_MACHO 5734 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. 5735 This is what will be referenced by the Mach-O PIC subsystem. */ 5736 if (machopic_should_output_picbase_label () || !label) 5737 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); 5738 5739 /* When we are restoring the pic base at the site of a nonlocal label, 5740 and we decided to emit the pic base above, we will still output a 5741 local label used for calculating the correction offset (even though 5742 the offset will be 0 in that case). */ 5743 if (label) 5744 targetm.asm_out.internal_label (asm_out_file, "L", 5745 CODE_LABEL_NUMBER (label)); 5746#endif 5747 } 5748 else 5749 { 5750 if (TARGET_MACHO) 5751 /* We don't need a pic base, we're not producing pic. */ 5752 gcc_unreachable (); 5753 5754 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 5755 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); 5756 targetm.asm_out.internal_label (asm_out_file, "L", 5757 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 5758 } 5759 5760 if (!TARGET_MACHO) 5761 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); 5762 5763 return ""; 5764} 5765 5766/* Generate an "push" pattern for input ARG. */ 5767 5768rtx 5769gen_push (rtx arg) 5770{ 5771 struct machine_function *m = cfun->machine; 5772 5773 if (m->fs.cfa_reg == stack_pointer_rtx) 5774 m->fs.cfa_offset += UNITS_PER_WORD; 5775 m->fs.sp_offset += UNITS_PER_WORD; 5776 5777 if (REG_P (arg) && GET_MODE (arg) != word_mode) 5778 arg = gen_rtx_REG (word_mode, REGNO (arg)); 5779 5780 return gen_rtx_SET (gen_rtx_MEM (word_mode, 5781 gen_rtx_PRE_DEC (Pmode, 5782 stack_pointer_rtx)), 5783 arg); 5784} 5785 5786/* Generate an "pop" pattern for input ARG. */ 5787 5788rtx 5789gen_pop (rtx arg) 5790{ 5791 if (REG_P (arg) && GET_MODE (arg) != word_mode) 5792 arg = gen_rtx_REG (word_mode, REGNO (arg)); 5793 5794 return gen_rtx_SET (arg, 5795 gen_rtx_MEM (word_mode, 5796 gen_rtx_POST_INC (Pmode, 5797 stack_pointer_rtx))); 5798} 5799 5800/* Return >= 0 if there is an unused call-clobbered register available 5801 for the entire function. */ 5802 5803static unsigned int 5804ix86_select_alt_pic_regnum (void) 5805{ 5806 if (ix86_use_pseudo_pic_reg ()) 5807 return INVALID_REGNUM; 5808 5809 if (crtl->is_leaf 5810 && !crtl->profile 5811 && !ix86_current_function_calls_tls_descriptor) 5812 { 5813 int i, drap; 5814 /* Can't use the same register for both PIC and DRAP. */ 5815 if (crtl->drap_reg) 5816 drap = REGNO (crtl->drap_reg); 5817 else 5818 drap = -1; 5819 for (i = 2; i >= 0; --i) 5820 if (i != drap && !df_regs_ever_live_p (i)) 5821 return i; 5822 } 5823 5824 return INVALID_REGNUM; 5825} 5826 5827/* Return true if REGNO is used by the epilogue. */ 5828 5829bool 5830ix86_epilogue_uses (int regno) 5831{ 5832 /* If there are no caller-saved registers, we preserve all registers, 5833 except for MMX and x87 registers which aren't supported when saving 5834 and restoring registers. Don't explicitly save SP register since 5835 it is always preserved. */ 5836 return (epilogue_completed 5837 && cfun->machine->no_caller_saved_registers 5838 && !fixed_regs[regno] 5839 && !STACK_REGNO_P (regno) 5840 && !MMX_REGNO_P (regno)); 5841} 5842 5843/* Return nonzero if register REGNO can be used as a scratch register 5844 in peephole2. */ 5845 5846static bool 5847ix86_hard_regno_scratch_ok (unsigned int regno) 5848{ 5849 /* If there are no caller-saved registers, we can't use any register 5850 as a scratch register after epilogue and use REGNO as scratch 5851 register only if it has been used before to avoid saving and 5852 restoring it. */ 5853 return (!cfun->machine->no_caller_saved_registers 5854 || (!epilogue_completed 5855 && df_regs_ever_live_p (regno))); 5856} 5857 5858/* Return TRUE if we need to save REGNO. */ 5859 5860bool 5861ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) 5862{ 5863 /* If there are no caller-saved registers, we preserve all registers, 5864 except for MMX and x87 registers which aren't supported when saving 5865 and restoring registers. Don't explicitly save SP register since 5866 it is always preserved. */ 5867 if (cfun->machine->no_caller_saved_registers) 5868 { 5869 /* Don't preserve registers used for function return value. */ 5870 rtx reg = crtl->return_rtx; 5871 if (reg) 5872 { 5873 unsigned int i = REGNO (reg); 5874 unsigned int nregs = REG_NREGS (reg); 5875 while (nregs-- > 0) 5876 if ((i + nregs) == regno) 5877 return false; 5878 } 5879 5880 return (df_regs_ever_live_p (regno) 5881 && !fixed_regs[regno] 5882 && !STACK_REGNO_P (regno) 5883 && !MMX_REGNO_P (regno) 5884 && (regno != HARD_FRAME_POINTER_REGNUM 5885 || !frame_pointer_needed)); 5886 } 5887 5888 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM 5889 && pic_offset_table_rtx) 5890 { 5891 if (ix86_use_pseudo_pic_reg ()) 5892 { 5893 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to 5894 _mcount in prologue. */ 5895 if (!TARGET_64BIT && flag_pic && crtl->profile) 5896 return true; 5897 } 5898 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) 5899 || crtl->profile 5900 || crtl->calls_eh_return 5901 || crtl->uses_const_pool 5902 || cfun->has_nonlocal_label) 5903 return ix86_select_alt_pic_regnum () == INVALID_REGNUM; 5904 } 5905 5906 if (crtl->calls_eh_return && maybe_eh_return) 5907 { 5908 unsigned i; 5909 for (i = 0; ; i++) 5910 { 5911 unsigned test = EH_RETURN_DATA_REGNO (i); 5912 if (test == INVALID_REGNUM) 5913 break; 5914 if (test == regno) 5915 return true; 5916 } 5917 } 5918 5919 if (ignore_outlined && cfun->machine->call_ms2sysv) 5920 { 5921 unsigned count = cfun->machine->call_ms2sysv_extra_regs 5922 + xlogue_layout::MIN_REGS; 5923 if (xlogue_layout::is_stub_managed_reg (regno, count)) 5924 return false; 5925 } 5926 5927 if (crtl->drap_reg 5928 && regno == REGNO (crtl->drap_reg) 5929 && !cfun->machine->no_drap_save_restore) 5930 return true; 5931 5932 return (df_regs_ever_live_p (regno) 5933 && !call_used_or_fixed_reg_p (regno) 5934 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 5935} 5936 5937/* Return number of saved general prupose registers. */ 5938 5939static int 5940ix86_nsaved_regs (void) 5941{ 5942 int nregs = 0; 5943 int regno; 5944 5945 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5946 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) 5947 nregs ++; 5948 return nregs; 5949} 5950 5951/* Return number of saved SSE registers. */ 5952 5953static int 5954ix86_nsaved_sseregs (void) 5955{ 5956 int nregs = 0; 5957 int regno; 5958 5959 if (!TARGET_64BIT_MS_ABI) 5960 return 0; 5961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 5962 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) 5963 nregs ++; 5964 return nregs; 5965} 5966 5967/* Given FROM and TO register numbers, say whether this elimination is 5968 allowed. If stack alignment is needed, we can only replace argument 5969 pointer with hard frame pointer, or replace frame pointer with stack 5970 pointer. Otherwise, frame pointer elimination is automatically 5971 handled and all other eliminations are valid. */ 5972 5973static bool 5974ix86_can_eliminate (const int from, const int to) 5975{ 5976 if (stack_realign_fp) 5977 return ((from == ARG_POINTER_REGNUM 5978 && to == HARD_FRAME_POINTER_REGNUM) 5979 || (from == FRAME_POINTER_REGNUM 5980 && to == STACK_POINTER_REGNUM)); 5981 else 5982 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; 5983} 5984 5985/* Return the offset between two registers, one to be eliminated, and the other 5986 its replacement, at the start of a routine. */ 5987 5988HOST_WIDE_INT 5989ix86_initial_elimination_offset (int from, int to) 5990{ 5991 struct ix86_frame &frame = cfun->machine->frame; 5992 5993 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 5994 return frame.hard_frame_pointer_offset; 5995 else if (from == FRAME_POINTER_REGNUM 5996 && to == HARD_FRAME_POINTER_REGNUM) 5997 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 5998 else 5999 { 6000 gcc_assert (to == STACK_POINTER_REGNUM); 6001 6002 if (from == ARG_POINTER_REGNUM) 6003 return frame.stack_pointer_offset; 6004 6005 gcc_assert (from == FRAME_POINTER_REGNUM); 6006 return frame.stack_pointer_offset - frame.frame_pointer_offset; 6007 } 6008} 6009 6010/* Emits a warning for unsupported msabi to sysv pro/epilogues. */ 6011void warn_once_call_ms2sysv_xlogues (const char *feature) 6012{ 6013 static bool warned_once = false; 6014 if (!warned_once) 6015 { 6016 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", 6017 feature); 6018 warned_once = true; 6019 } 6020} 6021 6022/* Return the probing interval for -fstack-clash-protection. */ 6023 6024static HOST_WIDE_INT 6025get_probe_interval (void) 6026{ 6027 if (flag_stack_clash_protection) 6028 return (HOST_WIDE_INT_1U 6029 << param_stack_clash_protection_probe_interval); 6030 else 6031 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); 6032} 6033 6034/* When using -fsplit-stack, the allocation routines set a field in 6035 the TCB to the bottom of the stack plus this much space, measured 6036 in bytes. */ 6037 6038#define SPLIT_STACK_AVAILABLE 256 6039 6040/* Fill structure ix86_frame about frame of currently computed function. */ 6041 6042static void 6043ix86_compute_frame_layout (void) 6044{ 6045 struct ix86_frame *frame = &cfun->machine->frame; 6046 struct machine_function *m = cfun->machine; 6047 unsigned HOST_WIDE_INT stack_alignment_needed; 6048 HOST_WIDE_INT offset; 6049 unsigned HOST_WIDE_INT preferred_alignment; 6050 HOST_WIDE_INT size = ix86_get_frame_size (); 6051 HOST_WIDE_INT to_allocate; 6052 6053 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit 6054 * ms_abi functions that call a sysv function. We now need to prune away 6055 * cases where it should be disabled. */ 6056 if (TARGET_64BIT && m->call_ms2sysv) 6057 { 6058 gcc_assert (TARGET_64BIT_MS_ABI); 6059 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); 6060 gcc_assert (!TARGET_SEH); 6061 gcc_assert (TARGET_SSE); 6062 gcc_assert (!ix86_using_red_zone ()); 6063 6064 if (crtl->calls_eh_return) 6065 { 6066 gcc_assert (!reload_completed); 6067 m->call_ms2sysv = false; 6068 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return"); 6069 } 6070 6071 else if (ix86_static_chain_on_stack) 6072 { 6073 gcc_assert (!reload_completed); 6074 m->call_ms2sysv = false; 6075 warn_once_call_ms2sysv_xlogues ("static call chains"); 6076 } 6077 6078 /* Finally, compute which registers the stub will manage. */ 6079 else 6080 { 6081 unsigned count = xlogue_layout::count_stub_managed_regs (); 6082 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; 6083 m->call_ms2sysv_pad_in = 0; 6084 } 6085 } 6086 6087 frame->nregs = ix86_nsaved_regs (); 6088 frame->nsseregs = ix86_nsaved_sseregs (); 6089 6090 /* 64-bit MS ABI seem to require stack alignment to be always 16, 6091 except for function prologues, leaf functions and when the defult 6092 incoming stack boundary is overriden at command line or via 6093 force_align_arg_pointer attribute. 6094 6095 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants 6096 at call sites, including profile function calls. 6097 */ 6098 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO) 6099 && crtl->preferred_stack_boundary < 128) 6100 && (!crtl->is_leaf || cfun->calls_alloca != 0 6101 || ix86_current_function_calls_tls_descriptor 6102 || (TARGET_MACHO && crtl->profile) 6103 || ix86_incoming_stack_boundary < 128)) 6104 { 6105 crtl->preferred_stack_boundary = 128; 6106 crtl->stack_alignment_needed = 128; 6107 } 6108 6109 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; 6110 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; 6111 6112 gcc_assert (!size || stack_alignment_needed); 6113 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 6114 gcc_assert (preferred_alignment <= stack_alignment_needed); 6115 6116 /* The only ABI saving SSE regs should be 64-bit ms_abi. */ 6117 gcc_assert (TARGET_64BIT || !frame->nsseregs); 6118 if (TARGET_64BIT && m->call_ms2sysv) 6119 { 6120 gcc_assert (stack_alignment_needed >= 16); 6121 gcc_assert (!frame->nsseregs); 6122 } 6123 6124 /* For SEH we have to limit the amount of code movement into the prologue. 6125 At present we do this via a BLOCKAGE, at which point there's very little 6126 scheduling that can be done, which means that there's very little point 6127 in doing anything except PUSHs. */ 6128 if (TARGET_SEH) 6129 m->use_fast_prologue_epilogue = false; 6130 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) 6131 { 6132 int count = frame->nregs; 6133 struct cgraph_node *node = cgraph_node::get (current_function_decl); 6134 6135 /* The fast prologue uses move instead of push to save registers. This 6136 is significantly longer, but also executes faster as modern hardware 6137 can execute the moves in parallel, but can't do that for push/pop. 6138 6139 Be careful about choosing what prologue to emit: When function takes 6140 many instructions to execute we may use slow version as well as in 6141 case function is known to be outside hot spot (this is known with 6142 feedback only). Weight the size of function by number of registers 6143 to save as it is cheap to use one or two push instructions but very 6144 slow to use many of them. 6145 6146 Calling this hook multiple times with the same frame requirements 6147 must produce the same layout, since the RA might otherwise be 6148 unable to reach a fixed point or might fail its final sanity checks. 6149 This means that once we've assumed that a function does or doesn't 6150 have a particular size, we have to stick to that assumption 6151 regardless of how the function has changed since. */ 6152 if (count) 6153 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 6154 if (node->frequency < NODE_FREQUENCY_NORMAL 6155 || (flag_branch_probabilities 6156 && node->frequency < NODE_FREQUENCY_HOT)) 6157 m->use_fast_prologue_epilogue = false; 6158 else 6159 { 6160 if (count != frame->expensive_count) 6161 { 6162 frame->expensive_count = count; 6163 frame->expensive_p = expensive_function_p (count); 6164 } 6165 m->use_fast_prologue_epilogue = !frame->expensive_p; 6166 } 6167 } 6168 6169 frame->save_regs_using_mov 6170 = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue 6171 /* If static stack checking is enabled and done with probes, 6172 the registers need to be saved before allocating the frame. */ 6173 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); 6174 6175 /* Skip return address and error code in exception handler. */ 6176 offset = INCOMING_FRAME_SP_OFFSET; 6177 6178 /* Skip pushed static chain. */ 6179 if (ix86_static_chain_on_stack) 6180 offset += UNITS_PER_WORD; 6181 6182 /* Skip saved base pointer. */ 6183 if (frame_pointer_needed) 6184 offset += UNITS_PER_WORD; 6185 frame->hfp_save_offset = offset; 6186 6187 /* The traditional frame pointer location is at the top of the frame. */ 6188 frame->hard_frame_pointer_offset = offset; 6189 6190 /* Register save area */ 6191 offset += frame->nregs * UNITS_PER_WORD; 6192 frame->reg_save_offset = offset; 6193 6194 /* Calculate the size of the va-arg area (not including padding, if any). */ 6195 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; 6196 6197 /* Also adjust stack_realign_offset for the largest alignment of 6198 stack slot actually used. */ 6199 if (stack_realign_fp 6200 || (cfun->machine->max_used_stack_alignment != 0 6201 && (offset % cfun->machine->max_used_stack_alignment) != 0)) 6202 { 6203 /* We may need a 16-byte aligned stack for the remainder of the 6204 register save area, but the stack frame for the local function 6205 may require a greater alignment if using AVX/2/512. In order 6206 to avoid wasting space, we first calculate the space needed for 6207 the rest of the register saves, add that to the stack pointer, 6208 and then realign the stack to the boundary of the start of the 6209 frame for the local function. */ 6210 HOST_WIDE_INT space_needed = 0; 6211 HOST_WIDE_INT sse_reg_space_needed = 0; 6212 6213 if (TARGET_64BIT) 6214 { 6215 if (m->call_ms2sysv) 6216 { 6217 m->call_ms2sysv_pad_in = 0; 6218 space_needed = xlogue_layout::get_instance ().get_stack_space_used (); 6219 } 6220 6221 else if (frame->nsseregs) 6222 /* The only ABI that has saved SSE registers (Win64) also has a 6223 16-byte aligned default stack. However, many programs violate 6224 the ABI, and Wine64 forces stack realignment to compensate. */ 6225 space_needed = frame->nsseregs * 16; 6226 6227 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); 6228 6229 /* 64-bit frame->va_arg_size should always be a multiple of 16, but 6230 rounding to be pedantic. */ 6231 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); 6232 } 6233 else 6234 space_needed = frame->va_arg_size; 6235 6236 /* Record the allocation size required prior to the realignment AND. */ 6237 frame->stack_realign_allocate = space_needed; 6238 6239 /* The re-aligned stack starts at frame->stack_realign_offset. Values 6240 before this point are not directly comparable with values below 6241 this point. Use sp_valid_at to determine if the stack pointer is 6242 valid for a given offset, fp_valid_at for the frame pointer, or 6243 choose_baseaddr to have a base register chosen for you. 6244 6245 Note that the result of (frame->stack_realign_offset 6246 & (stack_alignment_needed - 1)) may not equal zero. */ 6247 offset = ROUND_UP (offset + space_needed, stack_alignment_needed); 6248 frame->stack_realign_offset = offset - space_needed; 6249 frame->sse_reg_save_offset = frame->stack_realign_offset 6250 + sse_reg_space_needed; 6251 } 6252 else 6253 { 6254 frame->stack_realign_offset = offset; 6255 6256 if (TARGET_64BIT && m->call_ms2sysv) 6257 { 6258 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); 6259 offset += xlogue_layout::get_instance ().get_stack_space_used (); 6260 } 6261 6262 /* Align and set SSE register save area. */ 6263 else if (frame->nsseregs) 6264 { 6265 /* If the incoming stack boundary is at least 16 bytes, or DRAP is 6266 required and the DRAP re-alignment boundary is at least 16 bytes, 6267 then we want the SSE register save area properly aligned. */ 6268 if (ix86_incoming_stack_boundary >= 128 6269 || (stack_realign_drap && stack_alignment_needed >= 16)) 6270 offset = ROUND_UP (offset, 16); 6271 offset += frame->nsseregs * 16; 6272 } 6273 frame->sse_reg_save_offset = offset; 6274 offset += frame->va_arg_size; 6275 } 6276 6277 /* Align start of frame for local function. When a function call 6278 is removed, it may become a leaf function. But if argument may 6279 be passed on stack, we need to align the stack when there is no 6280 tail call. */ 6281 if (m->call_ms2sysv 6282 || frame->va_arg_size != 0 6283 || size != 0 6284 || !crtl->is_leaf 6285 || (!crtl->tail_call_emit 6286 && cfun->machine->outgoing_args_on_stack) 6287 || cfun->calls_alloca 6288 || ix86_current_function_calls_tls_descriptor) 6289 offset = ROUND_UP (offset, stack_alignment_needed); 6290 6291 /* Frame pointer points here. */ 6292 frame->frame_pointer_offset = offset; 6293 6294 offset += size; 6295 6296 /* Add outgoing arguments area. Can be skipped if we eliminated 6297 all the function calls as dead code. 6298 Skipping is however impossible when function calls alloca. Alloca 6299 expander assumes that last crtl->outgoing_args_size 6300 of stack frame are unused. */ 6301 if (ACCUMULATE_OUTGOING_ARGS 6302 && (!crtl->is_leaf || cfun->calls_alloca 6303 || ix86_current_function_calls_tls_descriptor)) 6304 { 6305 offset += crtl->outgoing_args_size; 6306 frame->outgoing_arguments_size = crtl->outgoing_args_size; 6307 } 6308 else 6309 frame->outgoing_arguments_size = 0; 6310 6311 /* Align stack boundary. Only needed if we're calling another function 6312 or using alloca. */ 6313 if (!crtl->is_leaf || cfun->calls_alloca 6314 || ix86_current_function_calls_tls_descriptor) 6315 offset = ROUND_UP (offset, preferred_alignment); 6316 6317 /* We've reached end of stack frame. */ 6318 frame->stack_pointer_offset = offset; 6319 6320 /* Size prologue needs to allocate. */ 6321 to_allocate = offset - frame->sse_reg_save_offset; 6322 6323 if ((!to_allocate && frame->nregs <= 1) 6324 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) 6325 /* If stack clash probing needs a loop, then it needs a 6326 scratch register. But the returned register is only guaranteed 6327 to be safe to use after register saves are complete. So if 6328 stack clash protections are enabled and the allocated frame is 6329 larger than the probe interval, then use pushes to save 6330 callee saved registers. */ 6331 || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) 6332 frame->save_regs_using_mov = false; 6333 6334 if (ix86_using_red_zone () 6335 && crtl->sp_is_unchanging 6336 && crtl->is_leaf 6337 && !ix86_pc_thunk_call_expanded 6338 && !ix86_current_function_calls_tls_descriptor) 6339 { 6340 frame->red_zone_size = to_allocate; 6341 if (frame->save_regs_using_mov) 6342 frame->red_zone_size += frame->nregs * UNITS_PER_WORD; 6343 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 6344 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 6345 } 6346 else 6347 frame->red_zone_size = 0; 6348 frame->stack_pointer_offset -= frame->red_zone_size; 6349 6350 /* The SEH frame pointer location is near the bottom of the frame. 6351 This is enforced by the fact that the difference between the 6352 stack pointer and the frame pointer is limited to 240 bytes in 6353 the unwind data structure. */ 6354 if (TARGET_SEH) 6355 { 6356 /* Force the frame pointer to point at or below the lowest register save 6357 area, see the SEH code in config/i386/winnt.c for the rationale. */ 6358 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset; 6359 6360 /* If we can leave the frame pointer where it is, do so; however return 6361 the establisher frame for __builtin_frame_address (0) or else if the 6362 frame overflows the SEH maximum frame size. 6363 6364 Note that the value returned by __builtin_frame_address (0) is quite 6365 constrained, because setjmp is piggybacked on the SEH machinery with 6366 recent versions of MinGW: 6367 6368 # elif defined(__SEH__) 6369 # if defined(__aarch64__) || defined(_ARM64_) 6370 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry()) 6371 # elif (__MINGW_GCC_VERSION < 40702) 6372 # define setjmp(BUF) _setjmp((BUF), mingw_getsp()) 6373 # else 6374 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0)) 6375 # endif 6376 6377 and the second argument passed to _setjmp, if not null, is forwarded 6378 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has 6379 built an ExceptionRecord on the fly describing the setjmp buffer). */ 6380 const HOST_WIDE_INT diff 6381 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; 6382 if (diff <= 255 && !crtl->accesses_prior_frames) 6383 { 6384 /* The resulting diff will be a multiple of 16 lower than 255, 6385 i.e. at most 240 as required by the unwind data structure. */ 6386 frame->hard_frame_pointer_offset += (diff & 15); 6387 } 6388 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames) 6389 { 6390 /* Ideally we'd determine what portion of the local stack frame 6391 (within the constraint of the lowest 240) is most heavily used. 6392 But without that complication, simply bias the frame pointer 6393 by 128 bytes so as to maximize the amount of the local stack 6394 frame that is addressable with 8-bit offsets. */ 6395 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; 6396 } 6397 else 6398 frame->hard_frame_pointer_offset = frame->hfp_save_offset; 6399 } 6400} 6401 6402/* This is semi-inlined memory_address_length, but simplified 6403 since we know that we're always dealing with reg+offset, and 6404 to avoid having to create and discard all that rtl. */ 6405 6406static inline int 6407choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) 6408{ 6409 int len = 4; 6410 6411 if (offset == 0) 6412 { 6413 /* EBP and R13 cannot be encoded without an offset. */ 6414 len = (regno == BP_REG || regno == R13_REG); 6415 } 6416 else if (IN_RANGE (offset, -128, 127)) 6417 len = 1; 6418 6419 /* ESP and R12 must be encoded with a SIB byte. */ 6420 if (regno == SP_REG || regno == R12_REG) 6421 len++; 6422 6423 return len; 6424} 6425 6426/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in 6427 the frame save area. The register is saved at CFA - CFA_OFFSET. */ 6428 6429static bool 6430sp_valid_at (HOST_WIDE_INT cfa_offset) 6431{ 6432 const struct machine_frame_state &fs = cfun->machine->fs; 6433 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) 6434 { 6435 /* Validate that the cfa_offset isn't in a "no-man's land". */ 6436 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); 6437 return false; 6438 } 6439 return fs.sp_valid; 6440} 6441 6442/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in 6443 the frame save area. The register is saved at CFA - CFA_OFFSET. */ 6444 6445static inline bool 6446fp_valid_at (HOST_WIDE_INT cfa_offset) 6447{ 6448 const struct machine_frame_state &fs = cfun->machine->fs; 6449 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) 6450 { 6451 /* Validate that the cfa_offset isn't in a "no-man's land". */ 6452 gcc_assert (cfa_offset >= fs.sp_realigned_offset); 6453 return false; 6454 } 6455 return fs.fp_valid; 6456} 6457 6458/* Choose a base register based upon alignment requested, speed and/or 6459 size. */ 6460 6461static void 6462choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, 6463 HOST_WIDE_INT &base_offset, 6464 unsigned int align_reqested, unsigned int *align) 6465{ 6466 const struct machine_function *m = cfun->machine; 6467 unsigned int hfp_align; 6468 unsigned int drap_align; 6469 unsigned int sp_align; 6470 bool hfp_ok = fp_valid_at (cfa_offset); 6471 bool drap_ok = m->fs.drap_valid; 6472 bool sp_ok = sp_valid_at (cfa_offset); 6473 6474 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; 6475 6476 /* Filter out any registers that don't meet the requested alignment 6477 criteria. */ 6478 if (align_reqested) 6479 { 6480 if (m->fs.realigned) 6481 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; 6482 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION 6483 notes (which we would need to use a realigned stack pointer), 6484 so disable on SEH targets. */ 6485 else if (m->fs.sp_realigned) 6486 sp_align = crtl->stack_alignment_needed; 6487 6488 hfp_ok = hfp_ok && hfp_align >= align_reqested; 6489 drap_ok = drap_ok && drap_align >= align_reqested; 6490 sp_ok = sp_ok && sp_align >= align_reqested; 6491 } 6492 6493 if (m->use_fast_prologue_epilogue) 6494 { 6495 /* Choose the base register most likely to allow the most scheduling 6496 opportunities. Generally FP is valid throughout the function, 6497 while DRAP must be reloaded within the epilogue. But choose either 6498 over the SP due to increased encoding size. */ 6499 6500 if (hfp_ok) 6501 { 6502 base_reg = hard_frame_pointer_rtx; 6503 base_offset = m->fs.fp_offset - cfa_offset; 6504 } 6505 else if (drap_ok) 6506 { 6507 base_reg = crtl->drap_reg; 6508 base_offset = 0 - cfa_offset; 6509 } 6510 else if (sp_ok) 6511 { 6512 base_reg = stack_pointer_rtx; 6513 base_offset = m->fs.sp_offset - cfa_offset; 6514 } 6515 } 6516 else 6517 { 6518 HOST_WIDE_INT toffset; 6519 int len = 16, tlen; 6520 6521 /* Choose the base register with the smallest address encoding. 6522 With a tie, choose FP > DRAP > SP. */ 6523 if (sp_ok) 6524 { 6525 base_reg = stack_pointer_rtx; 6526 base_offset = m->fs.sp_offset - cfa_offset; 6527 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); 6528 } 6529 if (drap_ok) 6530 { 6531 toffset = 0 - cfa_offset; 6532 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); 6533 if (tlen <= len) 6534 { 6535 base_reg = crtl->drap_reg; 6536 base_offset = toffset; 6537 len = tlen; 6538 } 6539 } 6540 if (hfp_ok) 6541 { 6542 toffset = m->fs.fp_offset - cfa_offset; 6543 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); 6544 if (tlen <= len) 6545 { 6546 base_reg = hard_frame_pointer_rtx; 6547 base_offset = toffset; 6548 } 6549 } 6550 } 6551 6552 /* Set the align return value. */ 6553 if (align) 6554 { 6555 if (base_reg == stack_pointer_rtx) 6556 *align = sp_align; 6557 else if (base_reg == crtl->drap_reg) 6558 *align = drap_align; 6559 else if (base_reg == hard_frame_pointer_rtx) 6560 *align = hfp_align; 6561 } 6562} 6563 6564/* Return an RTX that points to CFA_OFFSET within the stack frame and 6565 the alignment of address. If ALIGN is non-null, it should point to 6566 an alignment value (in bits) that is preferred or zero and will 6567 recieve the alignment of the base register that was selected, 6568 irrespective of rather or not CFA_OFFSET is a multiple of that 6569 alignment value. If it is possible for the base register offset to be 6570 non-immediate then SCRATCH_REGNO should specify a scratch register to 6571 use. 6572 6573 The valid base registers are taken from CFUN->MACHINE->FS. */ 6574 6575static rtx 6576choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, 6577 unsigned int scratch_regno = INVALID_REGNUM) 6578{ 6579 rtx base_reg = NULL; 6580 HOST_WIDE_INT base_offset = 0; 6581 6582 /* If a specific alignment is requested, try to get a base register 6583 with that alignment first. */ 6584 if (align && *align) 6585 choose_basereg (cfa_offset, base_reg, base_offset, *align, align); 6586 6587 if (!base_reg) 6588 choose_basereg (cfa_offset, base_reg, base_offset, 0, align); 6589 6590 gcc_assert (base_reg != NULL); 6591 6592 rtx base_offset_rtx = GEN_INT (base_offset); 6593 6594 if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) 6595 { 6596 gcc_assert (scratch_regno != INVALID_REGNUM); 6597 6598 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); 6599 emit_move_insn (scratch_reg, base_offset_rtx); 6600 6601 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); 6602 } 6603 6604 return plus_constant (Pmode, base_reg, base_offset); 6605} 6606 6607/* Emit code to save registers in the prologue. */ 6608 6609static void 6610ix86_emit_save_regs (void) 6611{ 6612 unsigned int regno; 6613 rtx_insn *insn; 6614 6615 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) 6616 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) 6617 { 6618 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); 6619 RTX_FRAME_RELATED_P (insn) = 1; 6620 } 6621} 6622 6623/* Emit a single register save at CFA - CFA_OFFSET. */ 6624 6625static void 6626ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, 6627 HOST_WIDE_INT cfa_offset) 6628{ 6629 struct machine_function *m = cfun->machine; 6630 rtx reg = gen_rtx_REG (mode, regno); 6631 rtx mem, addr, base, insn; 6632 unsigned int align = GET_MODE_ALIGNMENT (mode); 6633 6634 addr = choose_baseaddr (cfa_offset, &align); 6635 mem = gen_frame_mem (mode, addr); 6636 6637 /* The location aligment depends upon the base register. */ 6638 align = MIN (GET_MODE_ALIGNMENT (mode), align); 6639 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); 6640 set_mem_align (mem, align); 6641 6642 insn = emit_insn (gen_rtx_SET (mem, reg)); 6643 RTX_FRAME_RELATED_P (insn) = 1; 6644 6645 base = addr; 6646 if (GET_CODE (base) == PLUS) 6647 base = XEXP (base, 0); 6648 gcc_checking_assert (REG_P (base)); 6649 6650 /* When saving registers into a re-aligned local stack frame, avoid 6651 any tricky guessing by dwarf2out. */ 6652 if (m->fs.realigned) 6653 { 6654 gcc_checking_assert (stack_realign_drap); 6655 6656 if (regno == REGNO (crtl->drap_reg)) 6657 { 6658 /* A bit of a hack. We force the DRAP register to be saved in 6659 the re-aligned stack frame, which provides us with a copy 6660 of the CFA that will last past the prologue. Install it. */ 6661 gcc_checking_assert (cfun->machine->fs.fp_valid); 6662 addr = plus_constant (Pmode, hard_frame_pointer_rtx, 6663 cfun->machine->fs.fp_offset - cfa_offset); 6664 mem = gen_rtx_MEM (mode, addr); 6665 add_reg_note (insn, REG_CFA_DEF_CFA, mem); 6666 } 6667 else 6668 { 6669 /* The frame pointer is a stable reference within the 6670 aligned frame. Use it. */ 6671 gcc_checking_assert (cfun->machine->fs.fp_valid); 6672 addr = plus_constant (Pmode, hard_frame_pointer_rtx, 6673 cfun->machine->fs.fp_offset - cfa_offset); 6674 mem = gen_rtx_MEM (mode, addr); 6675 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); 6676 } 6677 } 6678 6679 else if (base == stack_pointer_rtx && m->fs.sp_realigned 6680 && cfa_offset >= m->fs.sp_realigned_offset) 6681 { 6682 gcc_checking_assert (stack_realign_fp); 6683 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); 6684 } 6685 6686 /* The memory may not be relative to the current CFA register, 6687 which means that we may need to generate a new pattern for 6688 use by the unwind info. */ 6689 else if (base != m->fs.cfa_reg) 6690 { 6691 addr = plus_constant (Pmode, m->fs.cfa_reg, 6692 m->fs.cfa_offset - cfa_offset); 6693 mem = gen_rtx_MEM (mode, addr); 6694 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); 6695 } 6696} 6697 6698/* Emit code to save registers using MOV insns. 6699 First register is stored at CFA - CFA_OFFSET. */ 6700static void 6701ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) 6702{ 6703 unsigned int regno; 6704 6705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 6706 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) 6707 { 6708 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); 6709 cfa_offset -= UNITS_PER_WORD; 6710 } 6711} 6712 6713/* Emit code to save SSE registers using MOV insns. 6714 First register is stored at CFA - CFA_OFFSET. */ 6715static void 6716ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) 6717{ 6718 unsigned int regno; 6719 6720 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 6721 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) 6722 { 6723 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); 6724 cfa_offset -= GET_MODE_SIZE (V4SFmode); 6725 } 6726} 6727 6728static GTY(()) rtx queued_cfa_restores; 6729 6730/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack 6731 manipulation insn. The value is on the stack at CFA - CFA_OFFSET. 6732 Don't add the note if the previously saved value will be left untouched 6733 within stack red-zone till return, as unwinders can find the same value 6734 in the register and on the stack. */ 6735 6736static void 6737ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) 6738{ 6739 if (!crtl->shrink_wrapped 6740 && cfa_offset <= cfun->machine->fs.red_zone_offset) 6741 return; 6742 6743 if (insn) 6744 { 6745 add_reg_note (insn, REG_CFA_RESTORE, reg); 6746 RTX_FRAME_RELATED_P (insn) = 1; 6747 } 6748 else 6749 queued_cfa_restores 6750 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); 6751} 6752 6753/* Add queued REG_CFA_RESTORE notes if any to INSN. */ 6754 6755static void 6756ix86_add_queued_cfa_restore_notes (rtx insn) 6757{ 6758 rtx last; 6759 if (!queued_cfa_restores) 6760 return; 6761 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) 6762 ; 6763 XEXP (last, 1) = REG_NOTES (insn); 6764 REG_NOTES (insn) = queued_cfa_restores; 6765 queued_cfa_restores = NULL_RTX; 6766 RTX_FRAME_RELATED_P (insn) = 1; 6767} 6768 6769/* Expand prologue or epilogue stack adjustment. 6770 The pattern exist to put a dependency on all ebp-based memory accesses. 6771 STYLE should be negative if instructions should be marked as frame related, 6772 zero if %r11 register is live and cannot be freely used and positive 6773 otherwise. */ 6774 6775static rtx 6776pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, 6777 int style, bool set_cfa) 6778{ 6779 struct machine_function *m = cfun->machine; 6780 rtx addend = offset; 6781 rtx insn; 6782 bool add_frame_related_expr = false; 6783 6784 if (!x86_64_immediate_operand (offset, Pmode)) 6785 { 6786 /* r11 is used by indirect sibcall return as well, set before the 6787 epilogue and used after the epilogue. */ 6788 if (style) 6789 addend = gen_rtx_REG (Pmode, R11_REG); 6790 else 6791 { 6792 gcc_assert (src != hard_frame_pointer_rtx 6793 && dest != hard_frame_pointer_rtx); 6794 addend = hard_frame_pointer_rtx; 6795 } 6796 emit_insn (gen_rtx_SET (addend, offset)); 6797 if (style < 0) 6798 add_frame_related_expr = true; 6799 } 6800 6801 insn = emit_insn (gen_pro_epilogue_adjust_stack_add 6802 (Pmode, dest, src, addend)); 6803 if (style >= 0) 6804 ix86_add_queued_cfa_restore_notes (insn); 6805 6806 if (set_cfa) 6807 { 6808 rtx r; 6809 6810 gcc_assert (m->fs.cfa_reg == src); 6811 m->fs.cfa_offset += INTVAL (offset); 6812 m->fs.cfa_reg = dest; 6813 6814 r = gen_rtx_PLUS (Pmode, src, offset); 6815 r = gen_rtx_SET (dest, r); 6816 add_reg_note (insn, REG_CFA_ADJUST_CFA, r); 6817 RTX_FRAME_RELATED_P (insn) = 1; 6818 } 6819 else if (style < 0) 6820 { 6821 RTX_FRAME_RELATED_P (insn) = 1; 6822 if (add_frame_related_expr) 6823 { 6824 rtx r = gen_rtx_PLUS (Pmode, src, offset); 6825 r = gen_rtx_SET (dest, r); 6826 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); 6827 } 6828 } 6829 6830 if (dest == stack_pointer_rtx) 6831 { 6832 HOST_WIDE_INT ooffset = m->fs.sp_offset; 6833 bool valid = m->fs.sp_valid; 6834 bool realigned = m->fs.sp_realigned; 6835 6836 if (src == hard_frame_pointer_rtx) 6837 { 6838 valid = m->fs.fp_valid; 6839 realigned = false; 6840 ooffset = m->fs.fp_offset; 6841 } 6842 else if (src == crtl->drap_reg) 6843 { 6844 valid = m->fs.drap_valid; 6845 realigned = false; 6846 ooffset = 0; 6847 } 6848 else 6849 { 6850 /* Else there are two possibilities: SP itself, which we set 6851 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is 6852 taken care of this by hand along the eh_return path. */ 6853 gcc_checking_assert (src == stack_pointer_rtx 6854 || offset == const0_rtx); 6855 } 6856 6857 m->fs.sp_offset = ooffset - INTVAL (offset); 6858 m->fs.sp_valid = valid; 6859 m->fs.sp_realigned = realigned; 6860 } 6861 return insn; 6862} 6863 6864/* Find an available register to be used as dynamic realign argument 6865 pointer regsiter. Such a register will be written in prologue and 6866 used in begin of body, so it must not be 6867 1. parameter passing register. 6868 2. GOT pointer. 6869 We reuse static-chain register if it is available. Otherwise, we 6870 use DI for i386 and R13 for x86-64. We chose R13 since it has 6871 shorter encoding. 6872 6873 Return: the regno of chosen register. */ 6874 6875static unsigned int 6876find_drap_reg (void) 6877{ 6878 tree decl = cfun->decl; 6879 6880 /* Always use callee-saved register if there are no caller-saved 6881 registers. */ 6882 if (TARGET_64BIT) 6883 { 6884 /* Use R13 for nested function or function need static chain. 6885 Since function with tail call may use any caller-saved 6886 registers in epilogue, DRAP must not use caller-saved 6887 register in such case. */ 6888 if (DECL_STATIC_CHAIN (decl) 6889 || cfun->machine->no_caller_saved_registers 6890 || crtl->tail_call_emit) 6891 return R13_REG; 6892 6893 return R10_REG; 6894 } 6895 else 6896 { 6897 /* Use DI for nested function or function need static chain. 6898 Since function with tail call may use any caller-saved 6899 registers in epilogue, DRAP must not use caller-saved 6900 register in such case. */ 6901 if (DECL_STATIC_CHAIN (decl) 6902 || cfun->machine->no_caller_saved_registers 6903 || crtl->tail_call_emit) 6904 return DI_REG; 6905 6906 /* Reuse static chain register if it isn't used for parameter 6907 passing. */ 6908 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) 6909 { 6910 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); 6911 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) 6912 return CX_REG; 6913 } 6914 return DI_REG; 6915 } 6916} 6917 6918/* Return minimum incoming stack alignment. */ 6919 6920static unsigned int 6921ix86_minimum_incoming_stack_boundary (bool sibcall) 6922{ 6923 unsigned int incoming_stack_boundary; 6924 6925 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ 6926 if (cfun->machine->func_type != TYPE_NORMAL) 6927 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; 6928 /* Prefer the one specified at command line. */ 6929 else if (ix86_user_incoming_stack_boundary) 6930 incoming_stack_boundary = ix86_user_incoming_stack_boundary; 6931 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary 6932 if -mstackrealign is used, it isn't used for sibcall check and 6933 estimated stack alignment is 128bit. */ 6934 else if (!sibcall 6935 && ix86_force_align_arg_pointer 6936 && crtl->stack_alignment_estimated == 128) 6937 incoming_stack_boundary = MIN_STACK_BOUNDARY; 6938 else 6939 incoming_stack_boundary = ix86_default_incoming_stack_boundary; 6940 6941 /* Incoming stack alignment can be changed on individual functions 6942 via force_align_arg_pointer attribute. We use the smallest 6943 incoming stack boundary. */ 6944 if (incoming_stack_boundary > MIN_STACK_BOUNDARY 6945 && lookup_attribute ("force_align_arg_pointer", 6946 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 6947 incoming_stack_boundary = MIN_STACK_BOUNDARY; 6948 6949 /* The incoming stack frame has to be aligned at least at 6950 parm_stack_boundary. */ 6951 if (incoming_stack_boundary < crtl->parm_stack_boundary) 6952 incoming_stack_boundary = crtl->parm_stack_boundary; 6953 6954 /* Stack at entrance of main is aligned by runtime. We use the 6955 smallest incoming stack boundary. */ 6956 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY 6957 && DECL_NAME (current_function_decl) 6958 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 6959 && DECL_FILE_SCOPE_P (current_function_decl)) 6960 incoming_stack_boundary = MAIN_STACK_BOUNDARY; 6961 6962 return incoming_stack_boundary; 6963} 6964 6965/* Update incoming stack boundary and estimated stack alignment. */ 6966 6967static void 6968ix86_update_stack_boundary (void) 6969{ 6970 ix86_incoming_stack_boundary 6971 = ix86_minimum_incoming_stack_boundary (false); 6972 6973 /* x86_64 vararg needs 16byte stack alignment for register save area. */ 6974 if (TARGET_64BIT 6975 && cfun->stdarg 6976 && crtl->stack_alignment_estimated < 128) 6977 crtl->stack_alignment_estimated = 128; 6978 6979 /* __tls_get_addr needs to be called with 16-byte aligned stack. */ 6980 if (ix86_tls_descriptor_calls_expanded_in_cfun 6981 && crtl->preferred_stack_boundary < 128) 6982 crtl->preferred_stack_boundary = 128; 6983} 6984 6985/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is 6986 needed or an rtx for DRAP otherwise. */ 6987 6988static rtx 6989ix86_get_drap_rtx (void) 6990{ 6991 /* We must use DRAP if there are outgoing arguments on stack or 6992 the stack pointer register is clobbered by asm statment and 6993 ACCUMULATE_OUTGOING_ARGS is false. */ 6994 if (ix86_force_drap 6995 || ((cfun->machine->outgoing_args_on_stack 6996 || crtl->sp_is_clobbered_by_asm) 6997 && !ACCUMULATE_OUTGOING_ARGS)) 6998 crtl->need_drap = true; 6999 7000 if (stack_realign_drap) 7001 { 7002 /* Assign DRAP to vDRAP and returns vDRAP */ 7003 unsigned int regno = find_drap_reg (); 7004 rtx drap_vreg; 7005 rtx arg_ptr; 7006 rtx_insn *seq, *insn; 7007 7008 arg_ptr = gen_rtx_REG (Pmode, regno); 7009 crtl->drap_reg = arg_ptr; 7010 7011 start_sequence (); 7012 drap_vreg = copy_to_reg (arg_ptr); 7013 seq = get_insns (); 7014 end_sequence (); 7015 7016 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); 7017 if (!optimize) 7018 { 7019 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); 7020 RTX_FRAME_RELATED_P (insn) = 1; 7021 } 7022 return drap_vreg; 7023 } 7024 else 7025 return NULL; 7026} 7027 7028/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 7029 7030static rtx 7031ix86_internal_arg_pointer (void) 7032{ 7033 return virtual_incoming_args_rtx; 7034} 7035 7036struct scratch_reg { 7037 rtx reg; 7038 bool saved; 7039}; 7040 7041/* Return a short-lived scratch register for use on function entry. 7042 In 32-bit mode, it is valid only after the registers are saved 7043 in the prologue. This register must be released by means of 7044 release_scratch_register_on_entry once it is dead. */ 7045 7046static void 7047get_scratch_register_on_entry (struct scratch_reg *sr) 7048{ 7049 int regno; 7050 7051 sr->saved = false; 7052 7053 if (TARGET_64BIT) 7054 { 7055 /* We always use R11 in 64-bit mode. */ 7056 regno = R11_REG; 7057 } 7058 else 7059 { 7060 tree decl = current_function_decl, fntype = TREE_TYPE (decl); 7061 bool fastcall_p 7062 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; 7063 bool thiscall_p 7064 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; 7065 bool static_chain_p = DECL_STATIC_CHAIN (decl); 7066 int regparm = ix86_function_regparm (fntype, decl); 7067 int drap_regno 7068 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; 7069 7070 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax 7071 for the static chain register. */ 7072 if ((regparm < 1 || (fastcall_p && !static_chain_p)) 7073 && drap_regno != AX_REG) 7074 regno = AX_REG; 7075 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx 7076 for the static chain register. */ 7077 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) 7078 regno = AX_REG; 7079 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) 7080 regno = DX_REG; 7081 /* ecx is the static chain register. */ 7082 else if (regparm < 3 && !fastcall_p && !thiscall_p 7083 && !static_chain_p 7084 && drap_regno != CX_REG) 7085 regno = CX_REG; 7086 else if (ix86_save_reg (BX_REG, true, false)) 7087 regno = BX_REG; 7088 /* esi is the static chain register. */ 7089 else if (!(regparm == 3 && static_chain_p) 7090 && ix86_save_reg (SI_REG, true, false)) 7091 regno = SI_REG; 7092 else if (ix86_save_reg (DI_REG, true, false)) 7093 regno = DI_REG; 7094 else 7095 { 7096 regno = (drap_regno == AX_REG ? DX_REG : AX_REG); 7097 sr->saved = true; 7098 } 7099 } 7100 7101 sr->reg = gen_rtx_REG (Pmode, regno); 7102 if (sr->saved) 7103 { 7104 rtx_insn *insn = emit_insn (gen_push (sr->reg)); 7105 RTX_FRAME_RELATED_P (insn) = 1; 7106 } 7107} 7108 7109/* Release a scratch register obtained from the preceding function. 7110 7111 If RELEASE_VIA_POP is true, we just pop the register off the stack 7112 to release it. This is what non-Linux systems use with -fstack-check. 7113 7114 Otherwise we use OFFSET to locate the saved register and the 7115 allocated stack space becomes part of the local frame and is 7116 deallocated by the epilogue. */ 7117 7118static void 7119release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, 7120 bool release_via_pop) 7121{ 7122 if (sr->saved) 7123 { 7124 if (release_via_pop) 7125 { 7126 struct machine_function *m = cfun->machine; 7127 rtx x, insn = emit_insn (gen_pop (sr->reg)); 7128 7129 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ 7130 RTX_FRAME_RELATED_P (insn) = 1; 7131 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); 7132 x = gen_rtx_SET (stack_pointer_rtx, x); 7133 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); 7134 m->fs.sp_offset -= UNITS_PER_WORD; 7135 } 7136 else 7137 { 7138 rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); 7139 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); 7140 emit_insn (x); 7141 } 7142 } 7143} 7144 7145/* Emit code to adjust the stack pointer by SIZE bytes while probing it. 7146 7147 This differs from the next routine in that it tries hard to prevent 7148 attacks that jump the stack guard. Thus it is never allowed to allocate 7149 more than PROBE_INTERVAL bytes of stack space without a suitable 7150 probe. 7151 7152 INT_REGISTERS_SAVED is true if integer registers have already been 7153 pushed on the stack. */ 7154 7155static void 7156ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, 7157 const bool int_registers_saved) 7158{ 7159 struct machine_function *m = cfun->machine; 7160 7161 /* If this function does not statically allocate stack space, then 7162 no probes are needed. */ 7163 if (!size) 7164 { 7165 /* However, the allocation of space via pushes for register 7166 saves could be viewed as allocating space, but without the 7167 need to probe. */ 7168 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) 7169 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); 7170 else 7171 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); 7172 return; 7173 } 7174 7175 /* If we are a noreturn function, then we have to consider the 7176 possibility that we're called via a jump rather than a call. 7177 7178 Thus we don't have the implicit probe generated by saving the 7179 return address into the stack at the call. Thus, the stack 7180 pointer could be anywhere in the guard page. The safe thing 7181 to do is emit a probe now. 7182 7183 The probe can be avoided if we have already emitted any callee 7184 register saves into the stack or have a frame pointer (which will 7185 have been saved as well). Those saves will function as implicit 7186 probes. 7187 7188 ?!? This should be revamped to work like aarch64 and s390 where 7189 we track the offset from the most recent probe. Normally that 7190 offset would be zero. For a noreturn function we would reset 7191 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then 7192 we just probe when we cross PROBE_INTERVAL. */ 7193 if (TREE_THIS_VOLATILE (cfun->decl) 7194 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) 7195 { 7196 /* We can safely use any register here since we're just going to push 7197 its value and immediately pop it back. But we do try and avoid 7198 argument passing registers so as not to introduce dependencies in 7199 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ 7200 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); 7201 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg)); 7202 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg)); 7203 m->fs.sp_offset -= UNITS_PER_WORD; 7204 if (m->fs.cfa_reg == stack_pointer_rtx) 7205 { 7206 m->fs.cfa_offset -= UNITS_PER_WORD; 7207 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); 7208 x = gen_rtx_SET (stack_pointer_rtx, x); 7209 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); 7210 RTX_FRAME_RELATED_P (insn_push) = 1; 7211 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); 7212 x = gen_rtx_SET (stack_pointer_rtx, x); 7213 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); 7214 RTX_FRAME_RELATED_P (insn_pop) = 1; 7215 } 7216 emit_insn (gen_blockage ()); 7217 } 7218 7219 /* If we allocate less than the size of the guard statically, 7220 then no probing is necessary, but we do need to allocate 7221 the stack. */ 7222 if (size < (1 << param_stack_clash_protection_guard_size)) 7223 { 7224 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7225 GEN_INT (-size), -1, 7226 m->fs.cfa_reg == stack_pointer_rtx); 7227 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); 7228 return; 7229 } 7230 7231 /* We're allocating a large enough stack frame that we need to 7232 emit probes. Either emit them inline or in a loop depending 7233 on the size. */ 7234 HOST_WIDE_INT probe_interval = get_probe_interval (); 7235 if (size <= 4 * probe_interval) 7236 { 7237 HOST_WIDE_INT i; 7238 for (i = probe_interval; i <= size; i += probe_interval) 7239 { 7240 /* Allocate PROBE_INTERVAL bytes. */ 7241 rtx insn 7242 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7243 GEN_INT (-probe_interval), -1, 7244 m->fs.cfa_reg == stack_pointer_rtx); 7245 add_reg_note (insn, REG_STACK_CHECK, const0_rtx); 7246 7247 /* And probe at *sp. */ 7248 emit_stack_probe (stack_pointer_rtx); 7249 emit_insn (gen_blockage ()); 7250 } 7251 7252 /* We need to allocate space for the residual, but we do not need 7253 to probe the residual. */ 7254 HOST_WIDE_INT residual = (i - probe_interval - size); 7255 if (residual) 7256 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7257 GEN_INT (residual), -1, 7258 m->fs.cfa_reg == stack_pointer_rtx); 7259 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); 7260 } 7261 else 7262 { 7263 /* We expect the GP registers to be saved when probes are used 7264 as the probing sequences might need a scratch register and 7265 the routine to allocate one assumes the integer registers 7266 have already been saved. */ 7267 gcc_assert (int_registers_saved); 7268 7269 struct scratch_reg sr; 7270 get_scratch_register_on_entry (&sr); 7271 7272 /* If we needed to save a register, then account for any space 7273 that was pushed (we are not going to pop the register when 7274 we do the restore). */ 7275 if (sr.saved) 7276 size -= UNITS_PER_WORD; 7277 7278 /* Step 1: round SIZE down to a multiple of the interval. */ 7279 HOST_WIDE_INT rounded_size = size & -probe_interval; 7280 7281 /* Step 2: compute final value of the loop counter. Use lea if 7282 possible. */ 7283 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); 7284 rtx insn; 7285 if (address_no_seg_operand (addr, Pmode)) 7286 insn = emit_insn (gen_rtx_SET (sr.reg, addr)); 7287 else 7288 { 7289 emit_move_insn (sr.reg, GEN_INT (-rounded_size)); 7290 insn = emit_insn (gen_rtx_SET (sr.reg, 7291 gen_rtx_PLUS (Pmode, sr.reg, 7292 stack_pointer_rtx))); 7293 } 7294 if (m->fs.cfa_reg == stack_pointer_rtx) 7295 { 7296 add_reg_note (insn, REG_CFA_DEF_CFA, 7297 plus_constant (Pmode, sr.reg, 7298 m->fs.cfa_offset + rounded_size)); 7299 RTX_FRAME_RELATED_P (insn) = 1; 7300 } 7301 7302 /* Step 3: the loop. */ 7303 rtx size_rtx = GEN_INT (rounded_size); 7304 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, 7305 size_rtx)); 7306 if (m->fs.cfa_reg == stack_pointer_rtx) 7307 { 7308 m->fs.cfa_offset += rounded_size; 7309 add_reg_note (insn, REG_CFA_DEF_CFA, 7310 plus_constant (Pmode, stack_pointer_rtx, 7311 m->fs.cfa_offset)); 7312 RTX_FRAME_RELATED_P (insn) = 1; 7313 } 7314 m->fs.sp_offset += rounded_size; 7315 emit_insn (gen_blockage ()); 7316 7317 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE 7318 is equal to ROUNDED_SIZE. */ 7319 7320 if (size != rounded_size) 7321 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7322 GEN_INT (rounded_size - size), -1, 7323 m->fs.cfa_reg == stack_pointer_rtx); 7324 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); 7325 7326 /* This does not deallocate the space reserved for the scratch 7327 register. That will be deallocated in the epilogue. */ 7328 release_scratch_register_on_entry (&sr, size, false); 7329 } 7330 7331 /* Make sure nothing is scheduled before we are done. */ 7332 emit_insn (gen_blockage ()); 7333} 7334 7335/* Emit code to adjust the stack pointer by SIZE bytes while probing it. 7336 7337 INT_REGISTERS_SAVED is true if integer registers have already been 7338 pushed on the stack. */ 7339 7340static void 7341ix86_adjust_stack_and_probe (HOST_WIDE_INT size, 7342 const bool int_registers_saved) 7343{ 7344 /* We skip the probe for the first interval + a small dope of 4 words and 7345 probe that many bytes past the specified size to maintain a protection 7346 area at the botton of the stack. */ 7347 const int dope = 4 * UNITS_PER_WORD; 7348 rtx size_rtx = GEN_INT (size), last; 7349 7350 /* See if we have a constant small number of probes to generate. If so, 7351 that's the easy case. The run-time loop is made up of 9 insns in the 7352 generic case while the compile-time loop is made up of 3+2*(n-1) insns 7353 for n # of intervals. */ 7354 if (size <= 4 * get_probe_interval ()) 7355 { 7356 HOST_WIDE_INT i, adjust; 7357 bool first_probe = true; 7358 7359 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for 7360 values of N from 1 until it exceeds SIZE. If only one probe is 7361 needed, this will not generate any code. Then adjust and probe 7362 to PROBE_INTERVAL + SIZE. */ 7363 for (i = get_probe_interval (); i < size; i += get_probe_interval ()) 7364 { 7365 if (first_probe) 7366 { 7367 adjust = 2 * get_probe_interval () + dope; 7368 first_probe = false; 7369 } 7370 else 7371 adjust = get_probe_interval (); 7372 7373 emit_insn (gen_rtx_SET (stack_pointer_rtx, 7374 plus_constant (Pmode, stack_pointer_rtx, 7375 -adjust))); 7376 emit_stack_probe (stack_pointer_rtx); 7377 } 7378 7379 if (first_probe) 7380 adjust = size + get_probe_interval () + dope; 7381 else 7382 adjust = size + get_probe_interval () - i; 7383 7384 emit_insn (gen_rtx_SET (stack_pointer_rtx, 7385 plus_constant (Pmode, stack_pointer_rtx, 7386 -adjust))); 7387 emit_stack_probe (stack_pointer_rtx); 7388 7389 /* Adjust back to account for the additional first interval. */ 7390 last = emit_insn (gen_rtx_SET (stack_pointer_rtx, 7391 plus_constant (Pmode, stack_pointer_rtx, 7392 (get_probe_interval () 7393 + dope)))); 7394 } 7395 7396 /* Otherwise, do the same as above, but in a loop. Note that we must be 7397 extra careful with variables wrapping around because we might be at 7398 the very top (or the very bottom) of the address space and we have 7399 to be able to handle this case properly; in particular, we use an 7400 equality test for the loop condition. */ 7401 else 7402 { 7403 /* We expect the GP registers to be saved when probes are used 7404 as the probing sequences might need a scratch register and 7405 the routine to allocate one assumes the integer registers 7406 have already been saved. */ 7407 gcc_assert (int_registers_saved); 7408 7409 HOST_WIDE_INT rounded_size; 7410 struct scratch_reg sr; 7411 7412 get_scratch_register_on_entry (&sr); 7413 7414 /* If we needed to save a register, then account for any space 7415 that was pushed (we are not going to pop the register when 7416 we do the restore). */ 7417 if (sr.saved) 7418 size -= UNITS_PER_WORD; 7419 7420 /* Step 1: round SIZE to the previous multiple of the interval. */ 7421 7422 rounded_size = ROUND_DOWN (size, get_probe_interval ()); 7423 7424 7425 /* Step 2: compute initial and final value of the loop counter. */ 7426 7427 /* SP = SP_0 + PROBE_INTERVAL. */ 7428 emit_insn (gen_rtx_SET (stack_pointer_rtx, 7429 plus_constant (Pmode, stack_pointer_rtx, 7430 - (get_probe_interval () + dope)))); 7431 7432 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ 7433 if (rounded_size <= (HOST_WIDE_INT_1 << 31)) 7434 emit_insn (gen_rtx_SET (sr.reg, 7435 plus_constant (Pmode, stack_pointer_rtx, 7436 -rounded_size))); 7437 else 7438 { 7439 emit_move_insn (sr.reg, GEN_INT (-rounded_size)); 7440 emit_insn (gen_rtx_SET (sr.reg, 7441 gen_rtx_PLUS (Pmode, sr.reg, 7442 stack_pointer_rtx))); 7443 } 7444 7445 7446 /* Step 3: the loop 7447 7448 do 7449 { 7450 SP = SP + PROBE_INTERVAL 7451 probe at SP 7452 } 7453 while (SP != LAST_ADDR) 7454 7455 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for 7456 values of N from 1 until it is equal to ROUNDED_SIZE. */ 7457 7458 emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg, size_rtx)); 7459 7460 7461 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot 7462 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ 7463 7464 if (size != rounded_size) 7465 { 7466 emit_insn (gen_rtx_SET (stack_pointer_rtx, 7467 plus_constant (Pmode, stack_pointer_rtx, 7468 rounded_size - size))); 7469 emit_stack_probe (stack_pointer_rtx); 7470 } 7471 7472 /* Adjust back to account for the additional first interval. */ 7473 last = emit_insn (gen_rtx_SET (stack_pointer_rtx, 7474 plus_constant (Pmode, stack_pointer_rtx, 7475 (get_probe_interval () 7476 + dope)))); 7477 7478 /* This does not deallocate the space reserved for the scratch 7479 register. That will be deallocated in the epilogue. */ 7480 release_scratch_register_on_entry (&sr, size, false); 7481 } 7482 7483 /* Even if the stack pointer isn't the CFA register, we need to correctly 7484 describe the adjustments made to it, in particular differentiate the 7485 frame-related ones from the frame-unrelated ones. */ 7486 if (size > 0) 7487 { 7488 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); 7489 XVECEXP (expr, 0, 0) 7490 = gen_rtx_SET (stack_pointer_rtx, 7491 plus_constant (Pmode, stack_pointer_rtx, -size)); 7492 XVECEXP (expr, 0, 1) 7493 = gen_rtx_SET (stack_pointer_rtx, 7494 plus_constant (Pmode, stack_pointer_rtx, 7495 get_probe_interval () + dope + size)); 7496 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); 7497 RTX_FRAME_RELATED_P (last) = 1; 7498 7499 cfun->machine->fs.sp_offset += size; 7500 } 7501 7502 /* Make sure nothing is scheduled before we are done. */ 7503 emit_insn (gen_blockage ()); 7504} 7505 7506/* Adjust the stack pointer up to REG while probing it. */ 7507 7508const char * 7509output_adjust_stack_and_probe (rtx reg) 7510{ 7511 static int labelno = 0; 7512 char loop_lab[32]; 7513 rtx xops[2]; 7514 7515 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 7516 7517 /* Loop. */ 7518 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 7519 7520 /* SP = SP + PROBE_INTERVAL. */ 7521 xops[0] = stack_pointer_rtx; 7522 xops[1] = GEN_INT (get_probe_interval ()); 7523 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); 7524 7525 /* Probe at SP. */ 7526 xops[1] = const0_rtx; 7527 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); 7528 7529 /* Test if SP == LAST_ADDR. */ 7530 xops[0] = stack_pointer_rtx; 7531 xops[1] = reg; 7532 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); 7533 7534 /* Branch. */ 7535 fputs ("\tjne\t", asm_out_file); 7536 assemble_name_raw (asm_out_file, loop_lab); 7537 fputc ('\n', asm_out_file); 7538 7539 return ""; 7540} 7541 7542/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 7543 inclusive. These are offsets from the current stack pointer. 7544 7545 INT_REGISTERS_SAVED is true if integer registers have already been 7546 pushed on the stack. */ 7547 7548static void 7549ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, 7550 const bool int_registers_saved) 7551{ 7552 /* See if we have a constant small number of probes to generate. If so, 7553 that's the easy case. The run-time loop is made up of 6 insns in the 7554 generic case while the compile-time loop is made up of n insns for n # 7555 of intervals. */ 7556 if (size <= 6 * get_probe_interval ()) 7557 { 7558 HOST_WIDE_INT i; 7559 7560 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until 7561 it exceeds SIZE. If only one probe is needed, this will not 7562 generate any code. Then probe at FIRST + SIZE. */ 7563 for (i = get_probe_interval (); i < size; i += get_probe_interval ()) 7564 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, 7565 -(first + i))); 7566 7567 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, 7568 -(first + size))); 7569 } 7570 7571 /* Otherwise, do the same as above, but in a loop. Note that we must be 7572 extra careful with variables wrapping around because we might be at 7573 the very top (or the very bottom) of the address space and we have 7574 to be able to handle this case properly; in particular, we use an 7575 equality test for the loop condition. */ 7576 else 7577 { 7578 /* We expect the GP registers to be saved when probes are used 7579 as the probing sequences might need a scratch register and 7580 the routine to allocate one assumes the integer registers 7581 have already been saved. */ 7582 gcc_assert (int_registers_saved); 7583 7584 HOST_WIDE_INT rounded_size, last; 7585 struct scratch_reg sr; 7586 7587 get_scratch_register_on_entry (&sr); 7588 7589 7590 /* Step 1: round SIZE to the previous multiple of the interval. */ 7591 7592 rounded_size = ROUND_DOWN (size, get_probe_interval ()); 7593 7594 7595 /* Step 2: compute initial and final value of the loop counter. */ 7596 7597 /* TEST_OFFSET = FIRST. */ 7598 emit_move_insn (sr.reg, GEN_INT (-first)); 7599 7600 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ 7601 last = first + rounded_size; 7602 7603 7604 /* Step 3: the loop 7605 7606 do 7607 { 7608 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 7609 probe at TEST_ADDR 7610 } 7611 while (TEST_ADDR != LAST_ADDR) 7612 7613 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 7614 until it is equal to ROUNDED_SIZE. */ 7615 7616 emit_insn 7617 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last))); 7618 7619 7620 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 7621 that SIZE is equal to ROUNDED_SIZE. */ 7622 7623 if (size != rounded_size) 7624 emit_stack_probe (plus_constant (Pmode, 7625 gen_rtx_PLUS (Pmode, 7626 stack_pointer_rtx, 7627 sr.reg), 7628 rounded_size - size)); 7629 7630 release_scratch_register_on_entry (&sr, size, true); 7631 } 7632 7633 /* Make sure nothing is scheduled before we are done. */ 7634 emit_insn (gen_blockage ()); 7635} 7636 7637/* Probe a range of stack addresses from REG to END, inclusive. These are 7638 offsets from the current stack pointer. */ 7639 7640const char * 7641output_probe_stack_range (rtx reg, rtx end) 7642{ 7643 static int labelno = 0; 7644 char loop_lab[32]; 7645 rtx xops[3]; 7646 7647 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 7648 7649 /* Loop. */ 7650 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 7651 7652 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 7653 xops[0] = reg; 7654 xops[1] = GEN_INT (get_probe_interval ()); 7655 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); 7656 7657 /* Probe at TEST_ADDR. */ 7658 xops[0] = stack_pointer_rtx; 7659 xops[1] = reg; 7660 xops[2] = const0_rtx; 7661 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); 7662 7663 /* Test if TEST_ADDR == LAST_ADDR. */ 7664 xops[0] = reg; 7665 xops[1] = end; 7666 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); 7667 7668 /* Branch. */ 7669 fputs ("\tjne\t", asm_out_file); 7670 assemble_name_raw (asm_out_file, loop_lab); 7671 fputc ('\n', asm_out_file); 7672 7673 return ""; 7674} 7675 7676/* Set stack_frame_required to false if stack frame isn't required. 7677 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack 7678 slot used if stack frame is required and CHECK_STACK_SLOT is true. */ 7679 7680static void 7681ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, 7682 bool check_stack_slot) 7683{ 7684 HARD_REG_SET set_up_by_prologue, prologue_used; 7685 basic_block bb; 7686 7687 CLEAR_HARD_REG_SET (prologue_used); 7688 CLEAR_HARD_REG_SET (set_up_by_prologue); 7689 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); 7690 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); 7691 add_to_hard_reg_set (&set_up_by_prologue, Pmode, 7692 HARD_FRAME_POINTER_REGNUM); 7693 7694 /* The preferred stack alignment is the minimum stack alignment. */ 7695 if (stack_alignment > crtl->preferred_stack_boundary) 7696 stack_alignment = crtl->preferred_stack_boundary; 7697 7698 bool require_stack_frame = false; 7699 7700 FOR_EACH_BB_FN (bb, cfun) 7701 { 7702 rtx_insn *insn; 7703 FOR_BB_INSNS (bb, insn) 7704 if (NONDEBUG_INSN_P (insn) 7705 && requires_stack_frame_p (insn, prologue_used, 7706 set_up_by_prologue)) 7707 { 7708 require_stack_frame = true; 7709 7710 if (check_stack_slot) 7711 { 7712 /* Find the maximum stack alignment. */ 7713 subrtx_iterator::array_type array; 7714 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) 7715 if (MEM_P (*iter) 7716 && (reg_mentioned_p (stack_pointer_rtx, 7717 *iter) 7718 || reg_mentioned_p (frame_pointer_rtx, 7719 *iter))) 7720 { 7721 unsigned int alignment = MEM_ALIGN (*iter); 7722 if (alignment > stack_alignment) 7723 stack_alignment = alignment; 7724 } 7725 } 7726 } 7727 } 7728 7729 cfun->machine->stack_frame_required = require_stack_frame; 7730} 7731 7732/* Finalize stack_realign_needed and frame_pointer_needed flags, which 7733 will guide prologue/epilogue to be generated in correct form. */ 7734 7735static void 7736ix86_finalize_stack_frame_flags (void) 7737{ 7738 /* Check if stack realign is really needed after reload, and 7739 stores result in cfun */ 7740 unsigned int incoming_stack_boundary 7741 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary 7742 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); 7743 unsigned int stack_alignment 7744 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor 7745 ? crtl->max_used_stack_slot_alignment 7746 : crtl->stack_alignment_needed); 7747 unsigned int stack_realign 7748 = (incoming_stack_boundary < stack_alignment); 7749 bool recompute_frame_layout_p = false; 7750 7751 if (crtl->stack_realign_finalized) 7752 { 7753 /* After stack_realign_needed is finalized, we can't no longer 7754 change it. */ 7755 gcc_assert (crtl->stack_realign_needed == stack_realign); 7756 return; 7757 } 7758 7759 /* It is always safe to compute max_used_stack_alignment. We 7760 compute it only if 128-bit aligned load/store may be generated 7761 on misaligned stack slot which will lead to segfault. */ 7762 bool check_stack_slot 7763 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128); 7764 ix86_find_max_used_stack_alignment (stack_alignment, 7765 check_stack_slot); 7766 7767 /* If the only reason for frame_pointer_needed is that we conservatively 7768 assumed stack realignment might be needed or -fno-omit-frame-pointer 7769 is used, but in the end nothing that needed the stack alignment had 7770 been spilled nor stack access, clear frame_pointer_needed and say we 7771 don't need stack realignment. */ 7772 if ((stack_realign || (!flag_omit_frame_pointer && optimize)) 7773 && frame_pointer_needed 7774 && crtl->is_leaf 7775 && crtl->sp_is_unchanging 7776 && !ix86_current_function_calls_tls_descriptor 7777 && !crtl->accesses_prior_frames 7778 && !cfun->calls_alloca 7779 && !crtl->calls_eh_return 7780 /* See ira_setup_eliminable_regset for the rationale. */ 7781 && !(STACK_CHECK_MOVING_SP 7782 && flag_stack_check 7783 && flag_exceptions 7784 && cfun->can_throw_non_call_exceptions) 7785 && !ix86_frame_pointer_required () 7786 && ix86_get_frame_size () == 0 7787 && ix86_nsaved_sseregs () == 0 7788 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) 7789 { 7790 if (cfun->machine->stack_frame_required) 7791 { 7792 /* Stack frame is required. If stack alignment needed is less 7793 than incoming stack boundary, don't realign stack. */ 7794 stack_realign = incoming_stack_boundary < stack_alignment; 7795 if (!stack_realign) 7796 { 7797 crtl->max_used_stack_slot_alignment 7798 = incoming_stack_boundary; 7799 crtl->stack_alignment_needed 7800 = incoming_stack_boundary; 7801 /* Also update preferred_stack_boundary for leaf 7802 functions. */ 7803 crtl->preferred_stack_boundary 7804 = incoming_stack_boundary; 7805 } 7806 } 7807 else 7808 { 7809 /* If drap has been set, but it actually isn't live at the 7810 start of the function, there is no reason to set it up. */ 7811 if (crtl->drap_reg) 7812 { 7813 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; 7814 if (! REGNO_REG_SET_P (DF_LR_IN (bb), 7815 REGNO (crtl->drap_reg))) 7816 { 7817 crtl->drap_reg = NULL_RTX; 7818 crtl->need_drap = false; 7819 } 7820 } 7821 else 7822 cfun->machine->no_drap_save_restore = true; 7823 7824 frame_pointer_needed = false; 7825 stack_realign = false; 7826 crtl->max_used_stack_slot_alignment = incoming_stack_boundary; 7827 crtl->stack_alignment_needed = incoming_stack_boundary; 7828 crtl->stack_alignment_estimated = incoming_stack_boundary; 7829 if (crtl->preferred_stack_boundary > incoming_stack_boundary) 7830 crtl->preferred_stack_boundary = incoming_stack_boundary; 7831 df_finish_pass (true); 7832 df_scan_alloc (NULL); 7833 df_scan_blocks (); 7834 df_compute_regs_ever_live (true); 7835 df_analyze (); 7836 7837 if (flag_var_tracking) 7838 { 7839 /* Since frame pointer is no longer available, replace it with 7840 stack pointer - UNITS_PER_WORD in debug insns. */ 7841 df_ref ref, next; 7842 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); 7843 ref; ref = next) 7844 { 7845 next = DF_REF_NEXT_REG (ref); 7846 if (!DF_REF_INSN_INFO (ref)) 7847 continue; 7848 7849 /* Make sure the next ref is for a different instruction, 7850 so that we're not affected by the rescan. */ 7851 rtx_insn *insn = DF_REF_INSN (ref); 7852 while (next && DF_REF_INSN (next) == insn) 7853 next = DF_REF_NEXT_REG (next); 7854 7855 if (DEBUG_INSN_P (insn)) 7856 { 7857 bool changed = false; 7858 for (; ref != next; ref = DF_REF_NEXT_REG (ref)) 7859 { 7860 rtx *loc = DF_REF_LOC (ref); 7861 if (*loc == hard_frame_pointer_rtx) 7862 { 7863 *loc = plus_constant (Pmode, 7864 stack_pointer_rtx, 7865 -UNITS_PER_WORD); 7866 changed = true; 7867 } 7868 } 7869 if (changed) 7870 df_insn_rescan (insn); 7871 } 7872 } 7873 } 7874 7875 recompute_frame_layout_p = true; 7876 } 7877 } 7878 else if (crtl->max_used_stack_slot_alignment >= 128 7879 && cfun->machine->stack_frame_required) 7880 { 7881 /* We don't need to realign stack. max_used_stack_alignment is 7882 used to decide how stack frame should be aligned. This is 7883 independent of any psABIs nor 32-bit vs 64-bit. */ 7884 cfun->machine->max_used_stack_alignment 7885 = stack_alignment / BITS_PER_UNIT; 7886 } 7887 7888 if (crtl->stack_realign_needed != stack_realign) 7889 recompute_frame_layout_p = true; 7890 crtl->stack_realign_needed = stack_realign; 7891 crtl->stack_realign_finalized = true; 7892 if (recompute_frame_layout_p) 7893 ix86_compute_frame_layout (); 7894} 7895 7896/* Delete SET_GOT right after entry block if it is allocated to reg. */ 7897 7898static void 7899ix86_elim_entry_set_got (rtx reg) 7900{ 7901 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; 7902 rtx_insn *c_insn = BB_HEAD (bb); 7903 if (!NONDEBUG_INSN_P (c_insn)) 7904 c_insn = next_nonnote_nondebug_insn (c_insn); 7905 if (c_insn && NONJUMP_INSN_P (c_insn)) 7906 { 7907 rtx pat = PATTERN (c_insn); 7908 if (GET_CODE (pat) == PARALLEL) 7909 { 7910 rtx vec = XVECEXP (pat, 0, 0); 7911 if (GET_CODE (vec) == SET 7912 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT 7913 && REGNO (XEXP (vec, 0)) == REGNO (reg)) 7914 delete_insn (c_insn); 7915 } 7916 } 7917} 7918 7919static rtx 7920gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) 7921{ 7922 rtx addr, mem; 7923 7924 if (offset) 7925 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); 7926 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); 7927 return gen_rtx_SET (store ? mem : reg, store ? reg : mem); 7928} 7929 7930static inline rtx 7931gen_frame_load (rtx reg, rtx frame_reg, int offset) 7932{ 7933 return gen_frame_set (reg, frame_reg, offset, false); 7934} 7935 7936static inline rtx 7937gen_frame_store (rtx reg, rtx frame_reg, int offset) 7938{ 7939 return gen_frame_set (reg, frame_reg, offset, true); 7940} 7941 7942static void 7943ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) 7944{ 7945 struct machine_function *m = cfun->machine; 7946 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS 7947 + m->call_ms2sysv_extra_regs; 7948 rtvec v = rtvec_alloc (ncregs + 1); 7949 unsigned int align, i, vi = 0; 7950 rtx_insn *insn; 7951 rtx sym, addr; 7952 rtx rax = gen_rtx_REG (word_mode, AX_REG); 7953 const class xlogue_layout &xlogue = xlogue_layout::get_instance (); 7954 7955 /* AL should only be live with sysv_abi. */ 7956 gcc_assert (!ix86_eax_live_at_start_p ()); 7957 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); 7958 7959 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather 7960 we've actually realigned the stack or not. */ 7961 align = GET_MODE_ALIGNMENT (V4SFmode); 7962 addr = choose_baseaddr (frame.stack_realign_offset 7963 + xlogue.get_stub_ptr_offset (), &align, AX_REG); 7964 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); 7965 7966 emit_insn (gen_rtx_SET (rax, addr)); 7967 7968 /* Get the stub symbol. */ 7969 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP 7970 : XLOGUE_STUB_SAVE); 7971 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); 7972 7973 for (i = 0; i < ncregs; ++i) 7974 { 7975 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); 7976 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), 7977 r.regno); 7978 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset); 7979 } 7980 7981 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); 7982 7983 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); 7984 RTX_FRAME_RELATED_P (insn) = true; 7985} 7986 7987/* Generate and return an insn body to AND X with Y. */ 7988 7989static rtx_insn * 7990gen_and2_insn (rtx x, rtx y) 7991{ 7992 enum insn_code icode = optab_handler (and_optab, GET_MODE (x)); 7993 7994 gcc_assert (insn_operand_matches (icode, 0, x)); 7995 gcc_assert (insn_operand_matches (icode, 1, x)); 7996 gcc_assert (insn_operand_matches (icode, 2, y)); 7997 7998 return GEN_FCN (icode) (x, x, y); 7999} 8000 8001/* Expand the prologue into a bunch of separate insns. */ 8002 8003void 8004ix86_expand_prologue (void) 8005{ 8006 struct machine_function *m = cfun->machine; 8007 rtx insn, t; 8008 HOST_WIDE_INT allocate; 8009 bool int_registers_saved; 8010 bool sse_registers_saved; 8011 bool save_stub_call_needed; 8012 rtx static_chain = NULL_RTX; 8013 8014 if (ix86_function_naked (current_function_decl)) 8015 return; 8016 8017 ix86_finalize_stack_frame_flags (); 8018 8019 /* DRAP should not coexist with stack_realign_fp */ 8020 gcc_assert (!(crtl->drap_reg && stack_realign_fp)); 8021 8022 memset (&m->fs, 0, sizeof (m->fs)); 8023 8024 /* Initialize CFA state for before the prologue. */ 8025 m->fs.cfa_reg = stack_pointer_rtx; 8026 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; 8027 8028 /* Track SP offset to the CFA. We continue tracking this after we've 8029 swapped the CFA register away from SP. In the case of re-alignment 8030 this is fudged; we're interested to offsets within the local frame. */ 8031 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; 8032 m->fs.sp_valid = true; 8033 m->fs.sp_realigned = false; 8034 8035 const struct ix86_frame &frame = cfun->machine->frame; 8036 8037 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) 8038 { 8039 /* We should have already generated an error for any use of 8040 ms_hook on a nested function. */ 8041 gcc_checking_assert (!ix86_static_chain_on_stack); 8042 8043 /* Check if profiling is active and we shall use profiling before 8044 prologue variant. If so sorry. */ 8045 if (crtl->profile && flag_fentry != 0) 8046 sorry ("%<ms_hook_prologue%> attribute is not compatible " 8047 "with %<-mfentry%> for 32-bit"); 8048 8049 /* In ix86_asm_output_function_label we emitted: 8050 8b ff movl.s %edi,%edi 8051 55 push %ebp 8052 8b ec movl.s %esp,%ebp 8053 8054 This matches the hookable function prologue in Win32 API 8055 functions in Microsoft Windows XP Service Pack 2 and newer. 8056 Wine uses this to enable Windows apps to hook the Win32 API 8057 functions provided by Wine. 8058 8059 What that means is that we've already set up the frame pointer. */ 8060 8061 if (frame_pointer_needed 8062 && !(crtl->drap_reg && crtl->stack_realign_needed)) 8063 { 8064 rtx push, mov; 8065 8066 /* We've decided to use the frame pointer already set up. 8067 Describe this to the unwinder by pretending that both 8068 push and mov insns happen right here. 8069 8070 Putting the unwind info here at the end of the ms_hook 8071 is done so that we can make absolutely certain we get 8072 the required byte sequence at the start of the function, 8073 rather than relying on an assembler that can produce 8074 the exact encoding required. 8075 8076 However it does mean (in the unpatched case) that we have 8077 a 1 insn window where the asynchronous unwind info is 8078 incorrect. However, if we placed the unwind info at 8079 its correct location we would have incorrect unwind info 8080 in the patched case. Which is probably all moot since 8081 I don't expect Wine generates dwarf2 unwind info for the 8082 system libraries that use this feature. */ 8083 8084 insn = emit_insn (gen_blockage ()); 8085 8086 push = gen_push (hard_frame_pointer_rtx); 8087 mov = gen_rtx_SET (hard_frame_pointer_rtx, 8088 stack_pointer_rtx); 8089 RTX_FRAME_RELATED_P (push) = 1; 8090 RTX_FRAME_RELATED_P (mov) = 1; 8091 8092 RTX_FRAME_RELATED_P (insn) = 1; 8093 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 8094 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); 8095 8096 /* Note that gen_push incremented m->fs.cfa_offset, even 8097 though we didn't emit the push insn here. */ 8098 m->fs.cfa_reg = hard_frame_pointer_rtx; 8099 m->fs.fp_offset = m->fs.cfa_offset; 8100 m->fs.fp_valid = true; 8101 } 8102 else 8103 { 8104 /* The frame pointer is not needed so pop %ebp again. 8105 This leaves us with a pristine state. */ 8106 emit_insn (gen_pop (hard_frame_pointer_rtx)); 8107 } 8108 } 8109 8110 /* The first insn of a function that accepts its static chain on the 8111 stack is to push the register that would be filled in by a direct 8112 call. This insn will be skipped by the trampoline. */ 8113 else if (ix86_static_chain_on_stack) 8114 { 8115 static_chain = ix86_static_chain (cfun->decl, false); 8116 insn = emit_insn (gen_push (static_chain)); 8117 emit_insn (gen_blockage ()); 8118 8119 /* We don't want to interpret this push insn as a register save, 8120 only as a stack adjustment. The real copy of the register as 8121 a save will be done later, if needed. */ 8122 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); 8123 t = gen_rtx_SET (stack_pointer_rtx, t); 8124 add_reg_note (insn, REG_CFA_ADJUST_CFA, t); 8125 RTX_FRAME_RELATED_P (insn) = 1; 8126 } 8127 8128 /* Emit prologue code to adjust stack alignment and setup DRAP, in case 8129 of DRAP is needed and stack realignment is really needed after reload */ 8130 if (stack_realign_drap) 8131 { 8132 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8133 8134 /* Can't use DRAP in interrupt function. */ 8135 if (cfun->machine->func_type != TYPE_NORMAL) 8136 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " 8137 "in interrupt service routine. This may be worked " 8138 "around by avoiding functions with aggregate return."); 8139 8140 /* Only need to push parameter pointer reg if it is caller saved. */ 8141 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) 8142 { 8143 /* Push arg pointer reg */ 8144 insn = emit_insn (gen_push (crtl->drap_reg)); 8145 RTX_FRAME_RELATED_P (insn) = 1; 8146 } 8147 8148 /* Grab the argument pointer. */ 8149 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); 8150 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); 8151 RTX_FRAME_RELATED_P (insn) = 1; 8152 m->fs.cfa_reg = crtl->drap_reg; 8153 m->fs.cfa_offset = 0; 8154 8155 /* Align the stack. */ 8156 insn = emit_insn (gen_and2_insn (stack_pointer_rtx, 8157 GEN_INT (-align_bytes))); 8158 RTX_FRAME_RELATED_P (insn) = 1; 8159 8160 /* Replicate the return address on the stack so that return 8161 address can be reached via (argp - 1) slot. This is needed 8162 to implement macro RETURN_ADDR_RTX and intrinsic function 8163 expand_builtin_return_addr etc. */ 8164 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); 8165 t = gen_frame_mem (word_mode, t); 8166 insn = emit_insn (gen_push (t)); 8167 RTX_FRAME_RELATED_P (insn) = 1; 8168 8169 /* For the purposes of frame and register save area addressing, 8170 we've started over with a new frame. */ 8171 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; 8172 m->fs.realigned = true; 8173 8174 if (static_chain) 8175 { 8176 /* Replicate static chain on the stack so that static chain 8177 can be reached via (argp - 2) slot. This is needed for 8178 nested function with stack realignment. */ 8179 insn = emit_insn (gen_push (static_chain)); 8180 RTX_FRAME_RELATED_P (insn) = 1; 8181 } 8182 } 8183 8184 int_registers_saved = (frame.nregs == 0); 8185 sse_registers_saved = (frame.nsseregs == 0); 8186 save_stub_call_needed = (m->call_ms2sysv); 8187 gcc_assert (sse_registers_saved || !save_stub_call_needed); 8188 8189 if (frame_pointer_needed && !m->fs.fp_valid) 8190 { 8191 /* Note: AT&T enter does NOT have reversed args. Enter is probably 8192 slower on all targets. Also sdb didn't like it. */ 8193 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 8194 RTX_FRAME_RELATED_P (insn) = 1; 8195 8196 if (m->fs.sp_offset == frame.hard_frame_pointer_offset) 8197 { 8198 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 8199 RTX_FRAME_RELATED_P (insn) = 1; 8200 8201 if (m->fs.cfa_reg == stack_pointer_rtx) 8202 m->fs.cfa_reg = hard_frame_pointer_rtx; 8203 m->fs.fp_offset = m->fs.sp_offset; 8204 m->fs.fp_valid = true; 8205 } 8206 } 8207 8208 if (!int_registers_saved) 8209 { 8210 /* If saving registers via PUSH, do so now. */ 8211 if (!frame.save_regs_using_mov) 8212 { 8213 ix86_emit_save_regs (); 8214 int_registers_saved = true; 8215 gcc_assert (m->fs.sp_offset == frame.reg_save_offset); 8216 } 8217 8218 /* When using red zone we may start register saving before allocating 8219 the stack frame saving one cycle of the prologue. However, avoid 8220 doing this if we have to probe the stack; at least on x86_64 the 8221 stack probe can turn into a call that clobbers a red zone location. */ 8222 else if (ix86_using_red_zone () 8223 && (! TARGET_STACK_PROBE 8224 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) 8225 { 8226 ix86_emit_save_regs_using_mov (frame.reg_save_offset); 8227 int_registers_saved = true; 8228 } 8229 } 8230 8231 if (stack_realign_fp) 8232 { 8233 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8234 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); 8235 8236 /* Record last valid frame pointer offset. */ 8237 m->fs.sp_realigned_fp_last = frame.reg_save_offset; 8238 8239 /* The computation of the size of the re-aligned stack frame means 8240 that we must allocate the size of the register save area before 8241 performing the actual alignment. Otherwise we cannot guarantee 8242 that there's enough storage above the realignment point. */ 8243 allocate = frame.reg_save_offset - m->fs.sp_offset 8244 + frame.stack_realign_allocate; 8245 if (allocate) 8246 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8247 GEN_INT (-allocate), -1, false); 8248 8249 /* Align the stack. */ 8250 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes))); 8251 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); 8252 m->fs.sp_realigned_offset = m->fs.sp_offset 8253 - frame.stack_realign_allocate; 8254 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. 8255 Beyond this point, stack access should be done via choose_baseaddr or 8256 by using sp_valid_at and fp_valid_at to determine the correct base 8257 register. Henceforth, any CFA offset should be thought of as logical 8258 and not physical. */ 8259 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); 8260 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); 8261 m->fs.sp_realigned = true; 8262 8263 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which 8264 is needed to describe where a register is saved using a realigned 8265 stack pointer, so we need to invalidate the stack pointer for that 8266 target. */ 8267 if (TARGET_SEH) 8268 m->fs.sp_valid = false; 8269 8270 /* If SP offset is non-immediate after allocation of the stack frame, 8271 then emit SSE saves or stub call prior to allocating the rest of the 8272 stack frame. This is less efficient for the out-of-line stub because 8273 we can't combine allocations across the call barrier, but it's better 8274 than using a scratch register. */ 8275 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset 8276 - m->fs.sp_realigned_offset), 8277 Pmode)) 8278 { 8279 if (!sse_registers_saved) 8280 { 8281 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); 8282 sse_registers_saved = true; 8283 } 8284 else if (save_stub_call_needed) 8285 { 8286 ix86_emit_outlined_ms2sysv_save (frame); 8287 save_stub_call_needed = false; 8288 } 8289 } 8290 } 8291 8292 allocate = frame.stack_pointer_offset - m->fs.sp_offset; 8293 8294 if (flag_stack_usage_info) 8295 { 8296 /* We start to count from ARG_POINTER. */ 8297 HOST_WIDE_INT stack_size = frame.stack_pointer_offset; 8298 8299 /* If it was realigned, take into account the fake frame. */ 8300 if (stack_realign_drap) 8301 { 8302 if (ix86_static_chain_on_stack) 8303 stack_size += UNITS_PER_WORD; 8304 8305 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) 8306 stack_size += UNITS_PER_WORD; 8307 8308 /* This over-estimates by 1 minimal-stack-alignment-unit but 8309 mitigates that by counting in the new return address slot. */ 8310 current_function_dynamic_stack_size 8311 += crtl->stack_alignment_needed / BITS_PER_UNIT; 8312 } 8313 8314 current_function_static_stack_size = stack_size; 8315 } 8316 8317 /* On SEH target with very large frame size, allocate an area to save 8318 SSE registers (as the very large allocation won't be described). */ 8319 if (TARGET_SEH 8320 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE 8321 && !sse_registers_saved) 8322 { 8323 HOST_WIDE_INT sse_size 8324 = frame.sse_reg_save_offset - frame.reg_save_offset; 8325 8326 gcc_assert (int_registers_saved); 8327 8328 /* No need to do stack checking as the area will be immediately 8329 written. */ 8330 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8331 GEN_INT (-sse_size), -1, 8332 m->fs.cfa_reg == stack_pointer_rtx); 8333 allocate -= sse_size; 8334 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); 8335 sse_registers_saved = true; 8336 } 8337 8338 /* The stack has already been decremented by the instruction calling us 8339 so probe if the size is non-negative to preserve the protection area. */ 8340 if (allocate >= 0 8341 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 8342 || flag_stack_clash_protection)) 8343 { 8344 if (flag_stack_clash_protection) 8345 { 8346 ix86_adjust_stack_and_probe_stack_clash (allocate, 8347 int_registers_saved); 8348 allocate = 0; 8349 } 8350 else if (STACK_CHECK_MOVING_SP) 8351 { 8352 if (!(crtl->is_leaf && !cfun->calls_alloca 8353 && allocate <= get_probe_interval ())) 8354 { 8355 ix86_adjust_stack_and_probe (allocate, int_registers_saved); 8356 allocate = 0; 8357 } 8358 } 8359 else 8360 { 8361 HOST_WIDE_INT size = allocate; 8362 8363 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) 8364 size = 0x80000000 - get_stack_check_protect () - 1; 8365 8366 if (TARGET_STACK_PROBE) 8367 { 8368 if (crtl->is_leaf && !cfun->calls_alloca) 8369 { 8370 if (size > get_probe_interval ()) 8371 ix86_emit_probe_stack_range (0, size, int_registers_saved); 8372 } 8373 else 8374 ix86_emit_probe_stack_range (0, 8375 size + get_stack_check_protect (), 8376 int_registers_saved); 8377 } 8378 else 8379 { 8380 if (crtl->is_leaf && !cfun->calls_alloca) 8381 { 8382 if (size > get_probe_interval () 8383 && size > get_stack_check_protect ()) 8384 ix86_emit_probe_stack_range (get_stack_check_protect (), 8385 (size 8386 - get_stack_check_protect ()), 8387 int_registers_saved); 8388 } 8389 else 8390 ix86_emit_probe_stack_range (get_stack_check_protect (), size, 8391 int_registers_saved); 8392 } 8393 } 8394 } 8395 8396 if (allocate == 0) 8397 ; 8398 else if (!ix86_target_stack_probe () 8399 || frame.stack_pointer_offset < CHECK_STACK_LIMIT) 8400 { 8401 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8402 GEN_INT (-allocate), -1, 8403 m->fs.cfa_reg == stack_pointer_rtx); 8404 } 8405 else 8406 { 8407 rtx eax = gen_rtx_REG (Pmode, AX_REG); 8408 rtx r10 = NULL; 8409 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); 8410 bool eax_live = ix86_eax_live_at_start_p (); 8411 bool r10_live = false; 8412 8413 if (TARGET_64BIT) 8414 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); 8415 8416 if (eax_live) 8417 { 8418 insn = emit_insn (gen_push (eax)); 8419 allocate -= UNITS_PER_WORD; 8420 /* Note that SEH directives need to continue tracking the stack 8421 pointer even after the frame pointer has been set up. */ 8422 if (sp_is_cfa_reg || TARGET_SEH) 8423 { 8424 if (sp_is_cfa_reg) 8425 m->fs.cfa_offset += UNITS_PER_WORD; 8426 RTX_FRAME_RELATED_P (insn) = 1; 8427 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 8428 gen_rtx_SET (stack_pointer_rtx, 8429 plus_constant (Pmode, 8430 stack_pointer_rtx, 8431 -UNITS_PER_WORD))); 8432 } 8433 } 8434 8435 if (r10_live) 8436 { 8437 r10 = gen_rtx_REG (Pmode, R10_REG); 8438 insn = emit_insn (gen_push (r10)); 8439 allocate -= UNITS_PER_WORD; 8440 if (sp_is_cfa_reg || TARGET_SEH) 8441 { 8442 if (sp_is_cfa_reg) 8443 m->fs.cfa_offset += UNITS_PER_WORD; 8444 RTX_FRAME_RELATED_P (insn) = 1; 8445 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 8446 gen_rtx_SET (stack_pointer_rtx, 8447 plus_constant (Pmode, 8448 stack_pointer_rtx, 8449 -UNITS_PER_WORD))); 8450 } 8451 } 8452 8453 emit_move_insn (eax, GEN_INT (allocate)); 8454 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax)); 8455 8456 /* Use the fact that AX still contains ALLOCATE. */ 8457 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub 8458 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax)); 8459 8460 if (sp_is_cfa_reg || TARGET_SEH) 8461 { 8462 if (sp_is_cfa_reg) 8463 m->fs.cfa_offset += allocate; 8464 RTX_FRAME_RELATED_P (insn) = 1; 8465 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 8466 gen_rtx_SET (stack_pointer_rtx, 8467 plus_constant (Pmode, stack_pointer_rtx, 8468 -allocate))); 8469 } 8470 m->fs.sp_offset += allocate; 8471 8472 /* Use stack_pointer_rtx for relative addressing so that code works for 8473 realigned stack. But this means that we need a blockage to prevent 8474 stores based on the frame pointer from being scheduled before. */ 8475 if (r10_live && eax_live) 8476 { 8477 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); 8478 emit_move_insn (gen_rtx_REG (word_mode, R10_REG), 8479 gen_frame_mem (word_mode, t)); 8480 t = plus_constant (Pmode, t, UNITS_PER_WORD); 8481 emit_move_insn (gen_rtx_REG (word_mode, AX_REG), 8482 gen_frame_mem (word_mode, t)); 8483 emit_insn (gen_memory_blockage ()); 8484 } 8485 else if (eax_live || r10_live) 8486 { 8487 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); 8488 emit_move_insn (gen_rtx_REG (word_mode, 8489 (eax_live ? AX_REG : R10_REG)), 8490 gen_frame_mem (word_mode, t)); 8491 emit_insn (gen_memory_blockage ()); 8492 } 8493 } 8494 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); 8495 8496 /* If we havn't already set up the frame pointer, do so now. */ 8497 if (frame_pointer_needed && !m->fs.fp_valid) 8498 { 8499 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, 8500 GEN_INT (frame.stack_pointer_offset 8501 - frame.hard_frame_pointer_offset)); 8502 insn = emit_insn (insn); 8503 RTX_FRAME_RELATED_P (insn) = 1; 8504 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); 8505 8506 if (m->fs.cfa_reg == stack_pointer_rtx) 8507 m->fs.cfa_reg = hard_frame_pointer_rtx; 8508 m->fs.fp_offset = frame.hard_frame_pointer_offset; 8509 m->fs.fp_valid = true; 8510 } 8511 8512 if (!int_registers_saved) 8513 ix86_emit_save_regs_using_mov (frame.reg_save_offset); 8514 if (!sse_registers_saved) 8515 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); 8516 else if (save_stub_call_needed) 8517 ix86_emit_outlined_ms2sysv_save (frame); 8518 8519 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT 8520 in PROLOGUE. */ 8521 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) 8522 { 8523 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); 8524 insn = emit_insn (gen_set_got (pic)); 8525 RTX_FRAME_RELATED_P (insn) = 1; 8526 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); 8527 emit_insn (gen_prologue_use (pic)); 8528 /* Deleting already emmitted SET_GOT if exist and allocated to 8529 REAL_PIC_OFFSET_TABLE_REGNUM. */ 8530 ix86_elim_entry_set_got (pic); 8531 } 8532 8533 if (crtl->drap_reg && !crtl->stack_realign_needed) 8534 { 8535 /* vDRAP is setup but after reload it turns out stack realign 8536 isn't necessary, here we will emit prologue to setup DRAP 8537 without stack realign adjustment */ 8538 t = choose_baseaddr (0, NULL); 8539 emit_insn (gen_rtx_SET (crtl->drap_reg, t)); 8540 } 8541 8542 /* Prevent instructions from being scheduled into register save push 8543 sequence when access to the redzone area is done through frame pointer. 8544 The offset between the frame pointer and the stack pointer is calculated 8545 relative to the value of the stack pointer at the end of the function 8546 prologue, and moving instructions that access redzone area via frame 8547 pointer inside push sequence violates this assumption. */ 8548 if (frame_pointer_needed && frame.red_zone_size) 8549 emit_insn (gen_memory_blockage ()); 8550 8551 /* SEH requires that the prologue end within 256 bytes of the start of 8552 the function. Prevent instruction schedules that would extend that. 8553 Further, prevent alloca modifications to the stack pointer from being 8554 combined with prologue modifications. */ 8555 if (TARGET_SEH) 8556 emit_insn (gen_prologue_use (stack_pointer_rtx)); 8557} 8558 8559/* Emit code to restore REG using a POP insn. */ 8560 8561static void 8562ix86_emit_restore_reg_using_pop (rtx reg) 8563{ 8564 struct machine_function *m = cfun->machine; 8565 rtx_insn *insn = emit_insn (gen_pop (reg)); 8566 8567 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); 8568 m->fs.sp_offset -= UNITS_PER_WORD; 8569 8570 if (m->fs.cfa_reg == crtl->drap_reg 8571 && REGNO (reg) == REGNO (crtl->drap_reg)) 8572 { 8573 /* Previously we'd represented the CFA as an expression 8574 like *(%ebp - 8). We've just popped that value from 8575 the stack, which means we need to reset the CFA to 8576 the drap register. This will remain until we restore 8577 the stack pointer. */ 8578 add_reg_note (insn, REG_CFA_DEF_CFA, reg); 8579 RTX_FRAME_RELATED_P (insn) = 1; 8580 8581 /* This means that the DRAP register is valid for addressing too. */ 8582 m->fs.drap_valid = true; 8583 return; 8584 } 8585 8586 if (m->fs.cfa_reg == stack_pointer_rtx) 8587 { 8588 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); 8589 x = gen_rtx_SET (stack_pointer_rtx, x); 8590 add_reg_note (insn, REG_CFA_ADJUST_CFA, x); 8591 RTX_FRAME_RELATED_P (insn) = 1; 8592 8593 m->fs.cfa_offset -= UNITS_PER_WORD; 8594 } 8595 8596 /* When the frame pointer is the CFA, and we pop it, we are 8597 swapping back to the stack pointer as the CFA. This happens 8598 for stack frames that don't allocate other data, so we assume 8599 the stack pointer is now pointing at the return address, i.e. 8600 the function entry state, which makes the offset be 1 word. */ 8601 if (reg == hard_frame_pointer_rtx) 8602 { 8603 m->fs.fp_valid = false; 8604 if (m->fs.cfa_reg == hard_frame_pointer_rtx) 8605 { 8606 m->fs.cfa_reg = stack_pointer_rtx; 8607 m->fs.cfa_offset -= UNITS_PER_WORD; 8608 8609 add_reg_note (insn, REG_CFA_DEF_CFA, 8610 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 8611 GEN_INT (m->fs.cfa_offset))); 8612 RTX_FRAME_RELATED_P (insn) = 1; 8613 } 8614 } 8615} 8616 8617/* Emit code to restore saved registers using POP insns. */ 8618 8619static void 8620ix86_emit_restore_regs_using_pop (void) 8621{ 8622 unsigned int regno; 8623 8624 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8625 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) 8626 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); 8627} 8628 8629/* Emit code and notes for the LEAVE instruction. If insn is non-null, 8630 omits the emit and only attaches the notes. */ 8631 8632static void 8633ix86_emit_leave (rtx_insn *insn) 8634{ 8635 struct machine_function *m = cfun->machine; 8636 8637 if (!insn) 8638 insn = emit_insn (gen_leave (word_mode)); 8639 8640 ix86_add_queued_cfa_restore_notes (insn); 8641 8642 gcc_assert (m->fs.fp_valid); 8643 m->fs.sp_valid = true; 8644 m->fs.sp_realigned = false; 8645 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; 8646 m->fs.fp_valid = false; 8647 8648 if (m->fs.cfa_reg == hard_frame_pointer_rtx) 8649 { 8650 m->fs.cfa_reg = stack_pointer_rtx; 8651 m->fs.cfa_offset = m->fs.sp_offset; 8652 8653 add_reg_note (insn, REG_CFA_DEF_CFA, 8654 plus_constant (Pmode, stack_pointer_rtx, 8655 m->fs.sp_offset)); 8656 RTX_FRAME_RELATED_P (insn) = 1; 8657 } 8658 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, 8659 m->fs.fp_offset); 8660} 8661 8662/* Emit code to restore saved registers using MOV insns. 8663 First register is restored from CFA - CFA_OFFSET. */ 8664static void 8665ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, 8666 bool maybe_eh_return) 8667{ 8668 struct machine_function *m = cfun->machine; 8669 unsigned int regno; 8670 8671 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8672 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) 8673 { 8674 rtx reg = gen_rtx_REG (word_mode, regno); 8675 rtx mem; 8676 rtx_insn *insn; 8677 8678 mem = choose_baseaddr (cfa_offset, NULL); 8679 mem = gen_frame_mem (word_mode, mem); 8680 insn = emit_move_insn (reg, mem); 8681 8682 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) 8683 { 8684 /* Previously we'd represented the CFA as an expression 8685 like *(%ebp - 8). We've just popped that value from 8686 the stack, which means we need to reset the CFA to 8687 the drap register. This will remain until we restore 8688 the stack pointer. */ 8689 add_reg_note (insn, REG_CFA_DEF_CFA, reg); 8690 RTX_FRAME_RELATED_P (insn) = 1; 8691 8692 /* This means that the DRAP register is valid for addressing. */ 8693 m->fs.drap_valid = true; 8694 } 8695 else 8696 ix86_add_cfa_restore_note (NULL, reg, cfa_offset); 8697 8698 cfa_offset -= UNITS_PER_WORD; 8699 } 8700} 8701 8702/* Emit code to restore saved registers using MOV insns. 8703 First register is restored from CFA - CFA_OFFSET. */ 8704static void 8705ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, 8706 bool maybe_eh_return) 8707{ 8708 unsigned int regno; 8709 8710 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8711 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) 8712 { 8713 rtx reg = gen_rtx_REG (V4SFmode, regno); 8714 rtx mem; 8715 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); 8716 8717 mem = choose_baseaddr (cfa_offset, &align); 8718 mem = gen_rtx_MEM (V4SFmode, mem); 8719 8720 /* The location aligment depends upon the base register. */ 8721 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); 8722 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); 8723 set_mem_align (mem, align); 8724 emit_insn (gen_rtx_SET (reg, mem)); 8725 8726 ix86_add_cfa_restore_note (NULL, reg, cfa_offset); 8727 8728 cfa_offset -= GET_MODE_SIZE (V4SFmode); 8729 } 8730} 8731 8732static void 8733ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, 8734 bool use_call, int style) 8735{ 8736 struct machine_function *m = cfun->machine; 8737 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS 8738 + m->call_ms2sysv_extra_regs; 8739 rtvec v; 8740 unsigned int elems_needed, align, i, vi = 0; 8741 rtx_insn *insn; 8742 rtx sym, tmp; 8743 rtx rsi = gen_rtx_REG (word_mode, SI_REG); 8744 rtx r10 = NULL_RTX; 8745 const class xlogue_layout &xlogue = xlogue_layout::get_instance (); 8746 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); 8747 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; 8748 rtx rsi_frame_load = NULL_RTX; 8749 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; 8750 enum xlogue_stub stub; 8751 8752 gcc_assert (!m->fs.fp_valid || frame_pointer_needed); 8753 8754 /* If using a realigned stack, we should never start with padding. */ 8755 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); 8756 8757 /* Setup RSI as the stub's base pointer. */ 8758 align = GET_MODE_ALIGNMENT (V4SFmode); 8759 tmp = choose_baseaddr (rsi_offset, &align, SI_REG); 8760 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); 8761 8762 emit_insn (gen_rtx_SET (rsi, tmp)); 8763 8764 /* Get a symbol for the stub. */ 8765 if (frame_pointer_needed) 8766 stub = use_call ? XLOGUE_STUB_RESTORE_HFP 8767 : XLOGUE_STUB_RESTORE_HFP_TAIL; 8768 else 8769 stub = use_call ? XLOGUE_STUB_RESTORE 8770 : XLOGUE_STUB_RESTORE_TAIL; 8771 sym = xlogue.get_stub_rtx (stub); 8772 8773 elems_needed = ncregs; 8774 if (use_call) 8775 elems_needed += 1; 8776 else 8777 elems_needed += frame_pointer_needed ? 5 : 3; 8778 v = rtvec_alloc (elems_needed); 8779 8780 /* We call the epilogue stub when we need to pop incoming args or we are 8781 doing a sibling call as the tail. Otherwise, we will emit a jmp to the 8782 epilogue stub and it is the tail-call. */ 8783 if (use_call) 8784 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); 8785 else 8786 { 8787 RTVEC_ELT (v, vi++) = ret_rtx; 8788 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); 8789 if (frame_pointer_needed) 8790 { 8791 rtx rbp = gen_rtx_REG (DImode, BP_REG); 8792 gcc_assert (m->fs.fp_valid); 8793 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); 8794 8795 tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8)); 8796 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); 8797 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); 8798 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); 8799 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); 8800 } 8801 else 8802 { 8803 /* If no hard frame pointer, we set R10 to the SP restore value. */ 8804 gcc_assert (!m->fs.fp_valid); 8805 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); 8806 gcc_assert (m->fs.sp_valid); 8807 8808 r10 = gen_rtx_REG (DImode, R10_REG); 8809 tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset)); 8810 emit_insn (gen_rtx_SET (r10, tmp)); 8811 8812 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); 8813 } 8814 } 8815 8816 /* Generate frame load insns and restore notes. */ 8817 for (i = 0; i < ncregs; ++i) 8818 { 8819 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); 8820 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; 8821 rtx reg, frame_load; 8822 8823 reg = gen_rtx_REG (mode, r.regno); 8824 frame_load = gen_frame_load (reg, rsi, r.offset); 8825 8826 /* Save RSI frame load insn & note to add last. */ 8827 if (r.regno == SI_REG) 8828 { 8829 gcc_assert (!rsi_frame_load); 8830 rsi_frame_load = frame_load; 8831 rsi_restore_offset = r.offset; 8832 } 8833 else 8834 { 8835 RTVEC_ELT (v, vi++) = frame_load; 8836 ix86_add_cfa_restore_note (NULL, reg, r.offset); 8837 } 8838 } 8839 8840 /* Add RSI frame load & restore note at the end. */ 8841 gcc_assert (rsi_frame_load); 8842 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); 8843 RTVEC_ELT (v, vi++) = rsi_frame_load; 8844 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), 8845 rsi_restore_offset); 8846 8847 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ 8848 if (!use_call && !frame_pointer_needed) 8849 { 8850 gcc_assert (m->fs.sp_valid); 8851 gcc_assert (!m->fs.sp_realigned); 8852 8853 /* At this point, R10 should point to frame.stack_realign_offset. */ 8854 if (m->fs.cfa_reg == stack_pointer_rtx) 8855 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; 8856 m->fs.sp_offset = frame.stack_realign_offset; 8857 } 8858 8859 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); 8860 tmp = gen_rtx_PARALLEL (VOIDmode, v); 8861 if (use_call) 8862 insn = emit_insn (tmp); 8863 else 8864 { 8865 insn = emit_jump_insn (tmp); 8866 JUMP_LABEL (insn) = ret_rtx; 8867 8868 if (frame_pointer_needed) 8869 ix86_emit_leave (insn); 8870 else 8871 { 8872 /* Need CFA adjust note. */ 8873 tmp = gen_rtx_SET (stack_pointer_rtx, r10); 8874 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); 8875 } 8876 } 8877 8878 RTX_FRAME_RELATED_P (insn) = true; 8879 ix86_add_queued_cfa_restore_notes (insn); 8880 8881 /* If we're not doing a tail-call, we need to adjust the stack. */ 8882 if (use_call && m->fs.sp_valid) 8883 { 8884 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; 8885 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8886 GEN_INT (dealloc), style, 8887 m->fs.cfa_reg == stack_pointer_rtx); 8888 } 8889} 8890 8891/* Restore function stack, frame, and registers. */ 8892 8893void 8894ix86_expand_epilogue (int style) 8895{ 8896 struct machine_function *m = cfun->machine; 8897 struct machine_frame_state frame_state_save = m->fs; 8898 bool restore_regs_via_mov; 8899 bool using_drap; 8900 bool restore_stub_is_tail = false; 8901 8902 if (ix86_function_naked (current_function_decl)) 8903 { 8904 /* The program should not reach this point. */ 8905 emit_insn (gen_ud2 ()); 8906 return; 8907 } 8908 8909 ix86_finalize_stack_frame_flags (); 8910 const struct ix86_frame &frame = cfun->machine->frame; 8911 8912 m->fs.sp_realigned = stack_realign_fp; 8913 m->fs.sp_valid = stack_realign_fp 8914 || !frame_pointer_needed 8915 || crtl->sp_is_unchanging; 8916 gcc_assert (!m->fs.sp_valid 8917 || m->fs.sp_offset == frame.stack_pointer_offset); 8918 8919 /* The FP must be valid if the frame pointer is present. */ 8920 gcc_assert (frame_pointer_needed == m->fs.fp_valid); 8921 gcc_assert (!m->fs.fp_valid 8922 || m->fs.fp_offset == frame.hard_frame_pointer_offset); 8923 8924 /* We must have *some* valid pointer to the stack frame. */ 8925 gcc_assert (m->fs.sp_valid || m->fs.fp_valid); 8926 8927 /* The DRAP is never valid at this point. */ 8928 gcc_assert (!m->fs.drap_valid); 8929 8930 /* See the comment about red zone and frame 8931 pointer usage in ix86_expand_prologue. */ 8932 if (frame_pointer_needed && frame.red_zone_size) 8933 emit_insn (gen_memory_blockage ()); 8934 8935 using_drap = crtl->drap_reg && crtl->stack_realign_needed; 8936 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); 8937 8938 /* Determine the CFA offset of the end of the red-zone. */ 8939 m->fs.red_zone_offset = 0; 8940 if (ix86_using_red_zone () && crtl->args.pops_args < 65536) 8941 { 8942 /* The red-zone begins below return address and error code in 8943 exception handler. */ 8944 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; 8945 8946 /* When the register save area is in the aligned portion of 8947 the stack, determine the maximum runtime displacement that 8948 matches up with the aligned frame. */ 8949 if (stack_realign_drap) 8950 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT 8951 + UNITS_PER_WORD); 8952 } 8953 8954 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; 8955 8956 /* Special care must be taken for the normal return case of a function 8957 using eh_return: the eax and edx registers are marked as saved, but 8958 not restored along this path. Adjust the save location to match. */ 8959 if (crtl->calls_eh_return && style != 2) 8960 reg_save_offset -= 2 * UNITS_PER_WORD; 8961 8962 /* EH_RETURN requires the use of moves to function properly. */ 8963 if (crtl->calls_eh_return) 8964 restore_regs_via_mov = true; 8965 /* SEH requires the use of pops to identify the epilogue. */ 8966 else if (TARGET_SEH) 8967 restore_regs_via_mov = false; 8968 /* If we're only restoring one register and sp cannot be used then 8969 using a move instruction to restore the register since it's 8970 less work than reloading sp and popping the register. */ 8971 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) 8972 restore_regs_via_mov = true; 8973 else if (TARGET_EPILOGUE_USING_MOVE 8974 && cfun->machine->use_fast_prologue_epilogue 8975 && (frame.nregs > 1 8976 || m->fs.sp_offset != reg_save_offset)) 8977 restore_regs_via_mov = true; 8978 else if (frame_pointer_needed 8979 && !frame.nregs 8980 && m->fs.sp_offset != reg_save_offset) 8981 restore_regs_via_mov = true; 8982 else if (frame_pointer_needed 8983 && TARGET_USE_LEAVE 8984 && cfun->machine->use_fast_prologue_epilogue 8985 && frame.nregs == 1) 8986 restore_regs_via_mov = true; 8987 else 8988 restore_regs_via_mov = false; 8989 8990 if (restore_regs_via_mov || frame.nsseregs) 8991 { 8992 /* Ensure that the entire register save area is addressable via 8993 the stack pointer, if we will restore SSE regs via sp. */ 8994 if (TARGET_64BIT 8995 && m->fs.sp_offset > 0x7fffffff 8996 && sp_valid_at (frame.stack_realign_offset + 1) 8997 && (frame.nsseregs + frame.nregs) != 0) 8998 { 8999 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 9000 GEN_INT (m->fs.sp_offset 9001 - frame.sse_reg_save_offset), 9002 style, 9003 m->fs.cfa_reg == stack_pointer_rtx); 9004 } 9005 } 9006 9007 /* If there are any SSE registers to restore, then we have to do it 9008 via moves, since there's obviously no pop for SSE regs. */ 9009 if (frame.nsseregs) 9010 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, 9011 style == 2); 9012 9013 if (m->call_ms2sysv) 9014 { 9015 int pop_incoming_args = crtl->args.pops_args && crtl->args.size; 9016 9017 /* We cannot use a tail-call for the stub if: 9018 1. We have to pop incoming args, 9019 2. We have additional int regs to restore, or 9020 3. A sibling call will be the tail-call, or 9021 4. We are emitting an eh_return_internal epilogue. 9022 9023 TODO: Item 4 has not yet tested! 9024 9025 If any of the above are true, we will call the stub rather than 9026 jump to it. */ 9027 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); 9028 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style); 9029 } 9030 9031 /* If using out-of-line stub that is a tail-call, then...*/ 9032 if (m->call_ms2sysv && restore_stub_is_tail) 9033 { 9034 /* TODO: parinoid tests. (remove eventually) */ 9035 gcc_assert (m->fs.sp_valid); 9036 gcc_assert (!m->fs.sp_realigned); 9037 gcc_assert (!m->fs.fp_valid); 9038 gcc_assert (!m->fs.realigned); 9039 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); 9040 gcc_assert (!crtl->drap_reg); 9041 gcc_assert (!frame.nregs); 9042 } 9043 else if (restore_regs_via_mov) 9044 { 9045 rtx t; 9046 9047 if (frame.nregs) 9048 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2); 9049 9050 /* eh_return epilogues need %ecx added to the stack pointer. */ 9051 if (style == 2) 9052 { 9053 rtx sa = EH_RETURN_STACKADJ_RTX; 9054 rtx_insn *insn; 9055 9056 /* %ecx can't be used for both DRAP register and eh_return. */ 9057 if (crtl->drap_reg) 9058 gcc_assert (REGNO (crtl->drap_reg) != CX_REG); 9059 9060 /* regparm nested functions don't work with eh_return. */ 9061 gcc_assert (!ix86_static_chain_on_stack); 9062 9063 if (frame_pointer_needed) 9064 { 9065 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 9066 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); 9067 emit_insn (gen_rtx_SET (sa, t)); 9068 9069 /* NB: eh_return epilogues must restore the frame pointer 9070 in word_mode since the upper 32 bits of RBP register 9071 can have any values. */ 9072 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx); 9073 rtx frame_reg = gen_rtx_REG (word_mode, 9074 HARD_FRAME_POINTER_REGNUM); 9075 insn = emit_move_insn (frame_reg, t); 9076 9077 /* Note that we use SA as a temporary CFA, as the return 9078 address is at the proper place relative to it. We 9079 pretend this happens at the FP restore insn because 9080 prior to this insn the FP would be stored at the wrong 9081 offset relative to SA, and after this insn we have no 9082 other reasonable register to use for the CFA. We don't 9083 bother resetting the CFA to the SP for the duration of 9084 the return insn, unless the control flow instrumentation 9085 is done. In this case the SP is used later and we have 9086 to reset CFA to SP. */ 9087 add_reg_note (insn, REG_CFA_DEF_CFA, 9088 plus_constant (Pmode, sa, UNITS_PER_WORD)); 9089 ix86_add_queued_cfa_restore_notes (insn); 9090 add_reg_note (insn, REG_CFA_RESTORE, frame_reg); 9091 RTX_FRAME_RELATED_P (insn) = 1; 9092 9093 m->fs.cfa_reg = sa; 9094 m->fs.cfa_offset = UNITS_PER_WORD; 9095 m->fs.fp_valid = false; 9096 9097 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 9098 const0_rtx, style, 9099 flag_cf_protection); 9100 } 9101 else 9102 { 9103 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 9104 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); 9105 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); 9106 ix86_add_queued_cfa_restore_notes (insn); 9107 9108 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); 9109 if (m->fs.cfa_offset != UNITS_PER_WORD) 9110 { 9111 m->fs.cfa_offset = UNITS_PER_WORD; 9112 add_reg_note (insn, REG_CFA_DEF_CFA, 9113 plus_constant (Pmode, stack_pointer_rtx, 9114 UNITS_PER_WORD)); 9115 RTX_FRAME_RELATED_P (insn) = 1; 9116 } 9117 } 9118 m->fs.sp_offset = UNITS_PER_WORD; 9119 m->fs.sp_valid = true; 9120 m->fs.sp_realigned = false; 9121 } 9122 } 9123 else 9124 { 9125 /* SEH requires that the function end with (1) a stack adjustment 9126 if necessary, (2) a sequence of pops, and (3) a return or 9127 jump instruction. Prevent insns from the function body from 9128 being scheduled into this sequence. */ 9129 if (TARGET_SEH) 9130 { 9131 /* Prevent a catch region from being adjacent to the standard 9132 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda 9133 nor several other flags that would be interesting to test are 9134 set up yet. */ 9135 if (flag_non_call_exceptions) 9136 emit_insn (gen_nops (const1_rtx)); 9137 else 9138 emit_insn (gen_blockage ()); 9139 } 9140 9141 /* First step is to deallocate the stack frame so that we can 9142 pop the registers. If the stack pointer was realigned, it needs 9143 to be restored now. Also do it on SEH target for very large 9144 frame as the emitted instructions aren't allowed by the ABI 9145 in epilogues. */ 9146 if (!m->fs.sp_valid || m->fs.sp_realigned 9147 || (TARGET_SEH 9148 && (m->fs.sp_offset - reg_save_offset 9149 >= SEH_MAX_FRAME_SIZE))) 9150 { 9151 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, 9152 GEN_INT (m->fs.fp_offset 9153 - reg_save_offset), 9154 style, false); 9155 } 9156 else if (m->fs.sp_offset != reg_save_offset) 9157 { 9158 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 9159 GEN_INT (m->fs.sp_offset 9160 - reg_save_offset), 9161 style, 9162 m->fs.cfa_reg == stack_pointer_rtx); 9163 } 9164 9165 ix86_emit_restore_regs_using_pop (); 9166 } 9167 9168 /* If we used a stack pointer and haven't already got rid of it, 9169 then do so now. */ 9170 if (m->fs.fp_valid) 9171 { 9172 /* If the stack pointer is valid and pointing at the frame 9173 pointer store address, then we only need a pop. */ 9174 if (sp_valid_at (frame.hfp_save_offset) 9175 && m->fs.sp_offset == frame.hfp_save_offset) 9176 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); 9177 /* Leave results in shorter dependency chains on CPUs that are 9178 able to grok it fast. */ 9179 else if (TARGET_USE_LEAVE 9180 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) 9181 || !cfun->machine->use_fast_prologue_epilogue) 9182 ix86_emit_leave (NULL); 9183 else 9184 { 9185 pro_epilogue_adjust_stack (stack_pointer_rtx, 9186 hard_frame_pointer_rtx, 9187 const0_rtx, style, !using_drap); 9188 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); 9189 } 9190 } 9191 9192 if (using_drap) 9193 { 9194 int param_ptr_offset = UNITS_PER_WORD; 9195 rtx_insn *insn; 9196 9197 gcc_assert (stack_realign_drap); 9198 9199 if (ix86_static_chain_on_stack) 9200 param_ptr_offset += UNITS_PER_WORD; 9201 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) 9202 param_ptr_offset += UNITS_PER_WORD; 9203 9204 insn = emit_insn (gen_rtx_SET 9205 (stack_pointer_rtx, 9206 gen_rtx_PLUS (Pmode, 9207 crtl->drap_reg, 9208 GEN_INT (-param_ptr_offset)))); 9209 m->fs.cfa_reg = stack_pointer_rtx; 9210 m->fs.cfa_offset = param_ptr_offset; 9211 m->fs.sp_offset = param_ptr_offset; 9212 m->fs.realigned = false; 9213 9214 add_reg_note (insn, REG_CFA_DEF_CFA, 9215 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 9216 GEN_INT (param_ptr_offset))); 9217 RTX_FRAME_RELATED_P (insn) = 1; 9218 9219 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) 9220 ix86_emit_restore_reg_using_pop (crtl->drap_reg); 9221 } 9222 9223 /* At this point the stack pointer must be valid, and we must have 9224 restored all of the registers. We may not have deallocated the 9225 entire stack frame. We've delayed this until now because it may 9226 be possible to merge the local stack deallocation with the 9227 deallocation forced by ix86_static_chain_on_stack. */ 9228 gcc_assert (m->fs.sp_valid); 9229 gcc_assert (!m->fs.sp_realigned); 9230 gcc_assert (!m->fs.fp_valid); 9231 gcc_assert (!m->fs.realigned); 9232 if (m->fs.sp_offset != UNITS_PER_WORD) 9233 { 9234 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 9235 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), 9236 style, true); 9237 } 9238 else 9239 ix86_add_queued_cfa_restore_notes (get_last_insn ()); 9240 9241 /* Sibcall epilogues don't want a return instruction. */ 9242 if (style == 0) 9243 { 9244 m->fs = frame_state_save; 9245 return; 9246 } 9247 9248 if (cfun->machine->func_type != TYPE_NORMAL) 9249 emit_jump_insn (gen_interrupt_return ()); 9250 else if (crtl->args.pops_args && crtl->args.size) 9251 { 9252 rtx popc = GEN_INT (crtl->args.pops_args); 9253 9254 /* i386 can only pop 64K bytes. If asked to pop more, pop return 9255 address, do explicit add, and jump indirectly to the caller. */ 9256 9257 if (crtl->args.pops_args >= 65536) 9258 { 9259 rtx ecx = gen_rtx_REG (SImode, CX_REG); 9260 rtx_insn *insn; 9261 9262 /* There is no "pascal" calling convention in any 64bit ABI. */ 9263 gcc_assert (!TARGET_64BIT); 9264 9265 insn = emit_insn (gen_pop (ecx)); 9266 m->fs.cfa_offset -= UNITS_PER_WORD; 9267 m->fs.sp_offset -= UNITS_PER_WORD; 9268 9269 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); 9270 x = gen_rtx_SET (stack_pointer_rtx, x); 9271 add_reg_note (insn, REG_CFA_ADJUST_CFA, x); 9272 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); 9273 RTX_FRAME_RELATED_P (insn) = 1; 9274 9275 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 9276 popc, -1, true); 9277 emit_jump_insn (gen_simple_return_indirect_internal (ecx)); 9278 } 9279 else 9280 emit_jump_insn (gen_simple_return_pop_internal (popc)); 9281 } 9282 else if (!m->call_ms2sysv || !restore_stub_is_tail) 9283 { 9284 /* In case of return from EH a simple return cannot be used 9285 as a return address will be compared with a shadow stack 9286 return address. Use indirect jump instead. */ 9287 if (style == 2 && flag_cf_protection) 9288 { 9289 /* Register used in indirect jump must be in word_mode. But 9290 Pmode may not be the same as word_mode for x32. */ 9291 rtx ecx = gen_rtx_REG (word_mode, CX_REG); 9292 rtx_insn *insn; 9293 9294 insn = emit_insn (gen_pop (ecx)); 9295 m->fs.cfa_offset -= UNITS_PER_WORD; 9296 m->fs.sp_offset -= UNITS_PER_WORD; 9297 9298 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); 9299 x = gen_rtx_SET (stack_pointer_rtx, x); 9300 add_reg_note (insn, REG_CFA_ADJUST_CFA, x); 9301 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); 9302 RTX_FRAME_RELATED_P (insn) = 1; 9303 9304 emit_jump_insn (gen_simple_return_indirect_internal (ecx)); 9305 } 9306 else 9307 emit_jump_insn (gen_simple_return_internal ()); 9308 } 9309 9310 /* Restore the state back to the state from the prologue, 9311 so that it's correct for the next epilogue. */ 9312 m->fs = frame_state_save; 9313} 9314 9315/* Reset from the function's potential modifications. */ 9316 9317static void 9318ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) 9319{ 9320 if (pic_offset_table_rtx 9321 && !ix86_use_pseudo_pic_reg ()) 9322 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); 9323 9324 if (TARGET_MACHO) 9325 { 9326 rtx_insn *insn = get_last_insn (); 9327 rtx_insn *deleted_debug_label = NULL; 9328 9329 /* Mach-O doesn't support labels at the end of objects, so if 9330 it looks like we might want one, take special action. 9331 First, collect any sequence of deleted debug labels. */ 9332 while (insn 9333 && NOTE_P (insn) 9334 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) 9335 { 9336 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL 9337 notes only, instead set their CODE_LABEL_NUMBER to -1, 9338 otherwise there would be code generation differences 9339 in between -g and -g0. */ 9340 if (NOTE_P (insn) && NOTE_KIND (insn) 9341 == NOTE_INSN_DELETED_DEBUG_LABEL) 9342 deleted_debug_label = insn; 9343 insn = PREV_INSN (insn); 9344 } 9345 9346 /* If we have: 9347 label: 9348 barrier 9349 then this needs to be detected, so skip past the barrier. */ 9350 9351 if (insn && BARRIER_P (insn)) 9352 insn = PREV_INSN (insn); 9353 9354 /* Up to now we've only seen notes or barriers. */ 9355 if (insn) 9356 { 9357 if (LABEL_P (insn) 9358 || (NOTE_P (insn) 9359 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) 9360 /* Trailing label. */ 9361 fputs ("\tnop\n", file); 9362 else if (cfun && ! cfun->is_thunk) 9363 { 9364 /* See if we have a completely empty function body, skipping 9365 the special case of the picbase thunk emitted as asm. */ 9366 while (insn && ! INSN_P (insn)) 9367 insn = PREV_INSN (insn); 9368 /* If we don't find any insns, we've got an empty function body; 9369 I.e. completely empty - without a return or branch. This is 9370 taken as the case where a function body has been removed 9371 because it contains an inline __builtin_unreachable(). GCC 9372 declares that reaching __builtin_unreachable() means UB so 9373 we're not obliged to do anything special; however, we want 9374 non-zero-sized function bodies. To meet this, and help the 9375 user out, let's trap the case. */ 9376 if (insn == NULL) 9377 fputs ("\tud2\n", file); 9378 } 9379 } 9380 else if (deleted_debug_label) 9381 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) 9382 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) 9383 CODE_LABEL_NUMBER (insn) = -1; 9384 } 9385} 9386 9387/* Return a scratch register to use in the split stack prologue. The 9388 split stack prologue is used for -fsplit-stack. It is the first 9389 instructions in the function, even before the regular prologue. 9390 The scratch register can be any caller-saved register which is not 9391 used for parameters or for the static chain. */ 9392 9393static unsigned int 9394split_stack_prologue_scratch_regno (void) 9395{ 9396 if (TARGET_64BIT) 9397 return R11_REG; 9398 else 9399 { 9400 bool is_fastcall, is_thiscall; 9401 int regparm; 9402 9403 is_fastcall = (lookup_attribute ("fastcall", 9404 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) 9405 != NULL); 9406 is_thiscall = (lookup_attribute ("thiscall", 9407 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) 9408 != NULL); 9409 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); 9410 9411 if (is_fastcall) 9412 { 9413 if (DECL_STATIC_CHAIN (cfun->decl)) 9414 { 9415 sorry ("%<-fsplit-stack%> does not support fastcall with " 9416 "nested function"); 9417 return INVALID_REGNUM; 9418 } 9419 return AX_REG; 9420 } 9421 else if (is_thiscall) 9422 { 9423 if (!DECL_STATIC_CHAIN (cfun->decl)) 9424 return DX_REG; 9425 return AX_REG; 9426 } 9427 else if (regparm < 3) 9428 { 9429 if (!DECL_STATIC_CHAIN (cfun->decl)) 9430 return CX_REG; 9431 else 9432 { 9433 if (regparm >= 2) 9434 { 9435 sorry ("%<-fsplit-stack%> does not support 2 register " 9436 "parameters for a nested function"); 9437 return INVALID_REGNUM; 9438 } 9439 return DX_REG; 9440 } 9441 } 9442 else 9443 { 9444 /* FIXME: We could make this work by pushing a register 9445 around the addition and comparison. */ 9446 sorry ("%<-fsplit-stack%> does not support 3 register parameters"); 9447 return INVALID_REGNUM; 9448 } 9449 } 9450} 9451 9452/* A SYMBOL_REF for the function which allocates new stackspace for 9453 -fsplit-stack. */ 9454 9455static GTY(()) rtx split_stack_fn; 9456 9457/* A SYMBOL_REF for the more stack function when using the large 9458 model. */ 9459 9460static GTY(()) rtx split_stack_fn_large; 9461 9462/* Return location of the stack guard value in the TLS block. */ 9463 9464rtx 9465ix86_split_stack_guard (void) 9466{ 9467 int offset; 9468 addr_space_t as = DEFAULT_TLS_SEG_REG; 9469 rtx r; 9470 9471 gcc_assert (flag_split_stack); 9472 9473#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET 9474 offset = TARGET_THREAD_SPLIT_STACK_OFFSET; 9475#else 9476 gcc_unreachable (); 9477#endif 9478 9479 r = GEN_INT (offset); 9480 r = gen_const_mem (Pmode, r); 9481 set_mem_addr_space (r, as); 9482 9483 return r; 9484} 9485 9486/* Handle -fsplit-stack. These are the first instructions in the 9487 function, even before the regular prologue. */ 9488 9489void 9490ix86_expand_split_stack_prologue (void) 9491{ 9492 HOST_WIDE_INT allocate; 9493 unsigned HOST_WIDE_INT args_size; 9494 rtx_code_label *label; 9495 rtx limit, current, allocate_rtx, call_fusage; 9496 rtx_insn *call_insn; 9497 rtx scratch_reg = NULL_RTX; 9498 rtx_code_label *varargs_label = NULL; 9499 rtx fn; 9500 9501 gcc_assert (flag_split_stack && reload_completed); 9502 9503 ix86_finalize_stack_frame_flags (); 9504 struct ix86_frame &frame = cfun->machine->frame; 9505 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; 9506 9507 /* This is the label we will branch to if we have enough stack 9508 space. We expect the basic block reordering pass to reverse this 9509 branch if optimizing, so that we branch in the unlikely case. */ 9510 label = gen_label_rtx (); 9511 9512 /* We need to compare the stack pointer minus the frame size with 9513 the stack boundary in the TCB. The stack boundary always gives 9514 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we 9515 can compare directly. Otherwise we need to do an addition. */ 9516 9517 limit = ix86_split_stack_guard (); 9518 9519 if (allocate < SPLIT_STACK_AVAILABLE) 9520 current = stack_pointer_rtx; 9521 else 9522 { 9523 unsigned int scratch_regno; 9524 rtx offset; 9525 9526 /* We need a scratch register to hold the stack pointer minus 9527 the required frame size. Since this is the very start of the 9528 function, the scratch register can be any caller-saved 9529 register which is not used for parameters. */ 9530 offset = GEN_INT (- allocate); 9531 scratch_regno = split_stack_prologue_scratch_regno (); 9532 if (scratch_regno == INVALID_REGNUM) 9533 return; 9534 scratch_reg = gen_rtx_REG (Pmode, scratch_regno); 9535 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) 9536 { 9537 /* We don't use gen_add in this case because it will 9538 want to split to lea, but when not optimizing the insn 9539 will not be split after this point. */ 9540 emit_insn (gen_rtx_SET (scratch_reg, 9541 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 9542 offset))); 9543 } 9544 else 9545 { 9546 emit_move_insn (scratch_reg, offset); 9547 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx)); 9548 } 9549 current = scratch_reg; 9550 } 9551 9552 ix86_expand_branch (GEU, current, limit, label); 9553 rtx_insn *jump_insn = get_last_insn (); 9554 JUMP_LABEL (jump_insn) = label; 9555 9556 /* Mark the jump as very likely to be taken. */ 9557 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); 9558 9559 if (split_stack_fn == NULL_RTX) 9560 { 9561 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); 9562 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; 9563 } 9564 fn = split_stack_fn; 9565 9566 /* Get more stack space. We pass in the desired stack space and the 9567 size of the arguments to copy to the new stack. In 32-bit mode 9568 we push the parameters; __morestack will return on a new stack 9569 anyhow. In 64-bit mode we pass the parameters in r10 and 9570 r11. */ 9571 allocate_rtx = GEN_INT (allocate); 9572 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; 9573 call_fusage = NULL_RTX; 9574 rtx pop = NULL_RTX; 9575 if (TARGET_64BIT) 9576 { 9577 rtx reg10, reg11; 9578 9579 reg10 = gen_rtx_REG (Pmode, R10_REG); 9580 reg11 = gen_rtx_REG (Pmode, R11_REG); 9581 9582 /* If this function uses a static chain, it will be in %r10. 9583 Preserve it across the call to __morestack. */ 9584 if (DECL_STATIC_CHAIN (cfun->decl)) 9585 { 9586 rtx rax; 9587 9588 rax = gen_rtx_REG (word_mode, AX_REG); 9589 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); 9590 use_reg (&call_fusage, rax); 9591 } 9592 9593 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) 9594 && !TARGET_PECOFF) 9595 { 9596 HOST_WIDE_INT argval; 9597 9598 gcc_assert (Pmode == DImode); 9599 /* When using the large model we need to load the address 9600 into a register, and we've run out of registers. So we 9601 switch to a different calling convention, and we call a 9602 different function: __morestack_large. We pass the 9603 argument size in the upper 32 bits of r10 and pass the 9604 frame size in the lower 32 bits. */ 9605 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); 9606 gcc_assert ((args_size & 0xffffffff) == args_size); 9607 9608 if (split_stack_fn_large == NULL_RTX) 9609 { 9610 split_stack_fn_large 9611 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); 9612 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; 9613 } 9614 if (ix86_cmodel == CM_LARGE_PIC) 9615 { 9616 rtx_code_label *label; 9617 rtx x; 9618 9619 label = gen_label_rtx (); 9620 emit_label (label); 9621 LABEL_PRESERVE_P (label) = 1; 9622 emit_insn (gen_set_rip_rex64 (reg10, label)); 9623 emit_insn (gen_set_got_offset_rex64 (reg11, label)); 9624 emit_insn (gen_add2_insn (reg10, reg11)); 9625 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), 9626 UNSPEC_GOT); 9627 x = gen_rtx_CONST (Pmode, x); 9628 emit_move_insn (reg11, x); 9629 x = gen_rtx_PLUS (Pmode, reg10, reg11); 9630 x = gen_const_mem (Pmode, x); 9631 emit_move_insn (reg11, x); 9632 } 9633 else 9634 emit_move_insn (reg11, split_stack_fn_large); 9635 9636 fn = reg11; 9637 9638 argval = ((args_size << 16) << 16) + allocate; 9639 emit_move_insn (reg10, GEN_INT (argval)); 9640 } 9641 else 9642 { 9643 emit_move_insn (reg10, allocate_rtx); 9644 emit_move_insn (reg11, GEN_INT (args_size)); 9645 use_reg (&call_fusage, reg11); 9646 } 9647 9648 use_reg (&call_fusage, reg10); 9649 } 9650 else 9651 { 9652 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); 9653 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); 9654 insn = emit_insn (gen_push (allocate_rtx)); 9655 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); 9656 pop = GEN_INT (2 * UNITS_PER_WORD); 9657 } 9658 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), 9659 GEN_INT (UNITS_PER_WORD), constm1_rtx, 9660 pop, false); 9661 add_function_usage_to (call_insn, call_fusage); 9662 if (!TARGET_64BIT) 9663 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); 9664 /* Indicate that this function can't jump to non-local gotos. */ 9665 make_reg_eh_region_note_nothrow_nononlocal (call_insn); 9666 9667 /* In order to make call/return prediction work right, we now need 9668 to execute a return instruction. See 9669 libgcc/config/i386/morestack.S for the details on how this works. 9670 9671 For flow purposes gcc must not see this as a return 9672 instruction--we need control flow to continue at the subsequent 9673 label. Therefore, we use an unspec. */ 9674 gcc_assert (crtl->args.pops_args < 65536); 9675 rtx_insn *ret_insn 9676 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); 9677 9678 if ((flag_cf_protection & CF_BRANCH)) 9679 { 9680 /* Insert ENDBR since __morestack will jump back here via indirect 9681 call. */ 9682 rtx cet_eb = gen_nop_endbr (); 9683 emit_insn_after (cet_eb, ret_insn); 9684 } 9685 9686 /* If we are in 64-bit mode and this function uses a static chain, 9687 we saved %r10 in %rax before calling _morestack. */ 9688 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) 9689 emit_move_insn (gen_rtx_REG (word_mode, R10_REG), 9690 gen_rtx_REG (word_mode, AX_REG)); 9691 9692 /* If this function calls va_start, we need to store a pointer to 9693 the arguments on the old stack, because they may not have been 9694 all copied to the new stack. At this point the old stack can be 9695 found at the frame pointer value used by __morestack, because 9696 __morestack has set that up before calling back to us. Here we 9697 store that pointer in a scratch register, and in 9698 ix86_expand_prologue we store the scratch register in a stack 9699 slot. */ 9700 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 9701 { 9702 unsigned int scratch_regno; 9703 rtx frame_reg; 9704 int words; 9705 9706 scratch_regno = split_stack_prologue_scratch_regno (); 9707 scratch_reg = gen_rtx_REG (Pmode, scratch_regno); 9708 frame_reg = gen_rtx_REG (Pmode, BP_REG); 9709 9710 /* 64-bit: 9711 fp -> old fp value 9712 return address within this function 9713 return address of caller of this function 9714 stack arguments 9715 So we add three words to get to the stack arguments. 9716 9717 32-bit: 9718 fp -> old fp value 9719 return address within this function 9720 first argument to __morestack 9721 second argument to __morestack 9722 return address of caller of this function 9723 stack arguments 9724 So we add five words to get to the stack arguments. 9725 */ 9726 words = TARGET_64BIT ? 3 : 5; 9727 emit_insn (gen_rtx_SET (scratch_reg, 9728 gen_rtx_PLUS (Pmode, frame_reg, 9729 GEN_INT (words * UNITS_PER_WORD)))); 9730 9731 varargs_label = gen_label_rtx (); 9732 emit_jump_insn (gen_jump (varargs_label)); 9733 JUMP_LABEL (get_last_insn ()) = varargs_label; 9734 9735 emit_barrier (); 9736 } 9737 9738 emit_label (label); 9739 LABEL_NUSES (label) = 1; 9740 9741 /* If this function calls va_start, we now have to set the scratch 9742 register for the case where we do not call __morestack. In this 9743 case we need to set it based on the stack pointer. */ 9744 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 9745 { 9746 emit_insn (gen_rtx_SET (scratch_reg, 9747 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 9748 GEN_INT (UNITS_PER_WORD)))); 9749 9750 emit_label (varargs_label); 9751 LABEL_NUSES (varargs_label) = 1; 9752 } 9753} 9754 9755/* We may have to tell the dataflow pass that the split stack prologue 9756 is initializing a scratch register. */ 9757 9758static void 9759ix86_live_on_entry (bitmap regs) 9760{ 9761 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) 9762 { 9763 gcc_assert (flag_split_stack); 9764 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); 9765 } 9766} 9767 9768/* Extract the parts of an RTL expression that is a valid memory address 9769 for an instruction. Return 0 if the structure of the address is 9770 grossly off. Return -1 if the address contains ASHIFT, so it is not 9771 strictly valid, but still used for computing length of lea instruction. */ 9772 9773int 9774ix86_decompose_address (rtx addr, struct ix86_address *out) 9775{ 9776 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 9777 rtx base_reg, index_reg; 9778 HOST_WIDE_INT scale = 1; 9779 rtx scale_rtx = NULL_RTX; 9780 rtx tmp; 9781 int retval = 1; 9782 addr_space_t seg = ADDR_SPACE_GENERIC; 9783 9784 /* Allow zero-extended SImode addresses, 9785 they will be emitted with addr32 prefix. */ 9786 if (TARGET_64BIT && GET_MODE (addr) == DImode) 9787 { 9788 if (GET_CODE (addr) == ZERO_EXTEND 9789 && GET_MODE (XEXP (addr, 0)) == SImode) 9790 { 9791 addr = XEXP (addr, 0); 9792 if (CONST_INT_P (addr)) 9793 return 0; 9794 } 9795 else if (GET_CODE (addr) == AND 9796 && const_32bit_mask (XEXP (addr, 1), DImode)) 9797 { 9798 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); 9799 if (addr == NULL_RTX) 9800 return 0; 9801 9802 if (CONST_INT_P (addr)) 9803 return 0; 9804 } 9805 } 9806 9807 /* Allow SImode subregs of DImode addresses, 9808 they will be emitted with addr32 prefix. */ 9809 if (TARGET_64BIT && GET_MODE (addr) == SImode) 9810 { 9811 if (SUBREG_P (addr) 9812 && GET_MODE (SUBREG_REG (addr)) == DImode) 9813 { 9814 addr = SUBREG_REG (addr); 9815 if (CONST_INT_P (addr)) 9816 return 0; 9817 } 9818 } 9819 9820 if (REG_P (addr)) 9821 base = addr; 9822 else if (SUBREG_P (addr)) 9823 { 9824 if (REG_P (SUBREG_REG (addr))) 9825 base = addr; 9826 else 9827 return 0; 9828 } 9829 else if (GET_CODE (addr) == PLUS) 9830 { 9831 rtx addends[4], op; 9832 int n = 0, i; 9833 9834 op = addr; 9835 do 9836 { 9837 if (n >= 4) 9838 return 0; 9839 addends[n++] = XEXP (op, 1); 9840 op = XEXP (op, 0); 9841 } 9842 while (GET_CODE (op) == PLUS); 9843 if (n >= 4) 9844 return 0; 9845 addends[n] = op; 9846 9847 for (i = n; i >= 0; --i) 9848 { 9849 op = addends[i]; 9850 switch (GET_CODE (op)) 9851 { 9852 case MULT: 9853 if (index) 9854 return 0; 9855 index = XEXP (op, 0); 9856 scale_rtx = XEXP (op, 1); 9857 break; 9858 9859 case ASHIFT: 9860 if (index) 9861 return 0; 9862 index = XEXP (op, 0); 9863 tmp = XEXP (op, 1); 9864 if (!CONST_INT_P (tmp)) 9865 return 0; 9866 scale = INTVAL (tmp); 9867 if ((unsigned HOST_WIDE_INT) scale > 3) 9868 return 0; 9869 scale = 1 << scale; 9870 break; 9871 9872 case ZERO_EXTEND: 9873 op = XEXP (op, 0); 9874 if (GET_CODE (op) != UNSPEC) 9875 return 0; 9876 /* FALLTHRU */ 9877 9878 case UNSPEC: 9879 if (XINT (op, 1) == UNSPEC_TP 9880 && TARGET_TLS_DIRECT_SEG_REFS 9881 && seg == ADDR_SPACE_GENERIC) 9882 seg = DEFAULT_TLS_SEG_REG; 9883 else 9884 return 0; 9885 break; 9886 9887 case SUBREG: 9888 if (!REG_P (SUBREG_REG (op))) 9889 return 0; 9890 /* FALLTHRU */ 9891 9892 case REG: 9893 if (!base) 9894 base = op; 9895 else if (!index) 9896 index = op; 9897 else 9898 return 0; 9899 break; 9900 9901 case CONST: 9902 case CONST_INT: 9903 case SYMBOL_REF: 9904 case LABEL_REF: 9905 if (disp) 9906 return 0; 9907 disp = op; 9908 break; 9909 9910 default: 9911 return 0; 9912 } 9913 } 9914 } 9915 else if (GET_CODE (addr) == MULT) 9916 { 9917 index = XEXP (addr, 0); /* index*scale */ 9918 scale_rtx = XEXP (addr, 1); 9919 } 9920 else if (GET_CODE (addr) == ASHIFT) 9921 { 9922 /* We're called for lea too, which implements ashift on occasion. */ 9923 index = XEXP (addr, 0); 9924 tmp = XEXP (addr, 1); 9925 if (!CONST_INT_P (tmp)) 9926 return 0; 9927 scale = INTVAL (tmp); 9928 if ((unsigned HOST_WIDE_INT) scale > 3) 9929 return 0; 9930 scale = 1 << scale; 9931 retval = -1; 9932 } 9933 else 9934 disp = addr; /* displacement */ 9935 9936 if (index) 9937 { 9938 if (REG_P (index)) 9939 ; 9940 else if (SUBREG_P (index) 9941 && REG_P (SUBREG_REG (index))) 9942 ; 9943 else 9944 return 0; 9945 } 9946 9947 /* Extract the integral value of scale. */ 9948 if (scale_rtx) 9949 { 9950 if (!CONST_INT_P (scale_rtx)) 9951 return 0; 9952 scale = INTVAL (scale_rtx); 9953 } 9954 9955 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; 9956 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; 9957 9958 /* Avoid useless 0 displacement. */ 9959 if (disp == const0_rtx && (base || index)) 9960 disp = NULL_RTX; 9961 9962 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 9963 if (base_reg && index_reg && scale == 1 9964 && (REGNO (index_reg) == ARG_POINTER_REGNUM 9965 || REGNO (index_reg) == FRAME_POINTER_REGNUM 9966 || REGNO (index_reg) == SP_REG)) 9967 { 9968 std::swap (base, index); 9969 std::swap (base_reg, index_reg); 9970 } 9971 9972 /* Special case: %ebp cannot be encoded as a base without a displacement. 9973 Similarly %r13. */ 9974 if (!disp && base_reg 9975 && (REGNO (base_reg) == ARG_POINTER_REGNUM 9976 || REGNO (base_reg) == FRAME_POINTER_REGNUM 9977 || REGNO (base_reg) == BP_REG 9978 || REGNO (base_reg) == R13_REG)) 9979 disp = const0_rtx; 9980 9981 /* Special case: on K6, [%esi] makes the instruction vector decoded. 9982 Avoid this by transforming to [%esi+0]. 9983 Reload calls address legitimization without cfun defined, so we need 9984 to test cfun for being non-NULL. */ 9985 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) 9986 && base_reg && !index_reg && !disp 9987 && REGNO (base_reg) == SI_REG) 9988 disp = const0_rtx; 9989 9990 /* Special case: encode reg+reg instead of reg*2. */ 9991 if (!base && index && scale == 2) 9992 base = index, base_reg = index_reg, scale = 1; 9993 9994 /* Special case: scaling cannot be encoded without base or displacement. */ 9995 if (!base && !disp && index && scale != 1) 9996 disp = const0_rtx; 9997 9998 out->base = base; 9999 out->index = index; 10000 out->disp = disp; 10001 out->scale = scale; 10002 out->seg = seg; 10003 10004 return retval; 10005} 10006 10007/* Return cost of the memory address x. 10008 For i386, it is better to use a complex address than let gcc copy 10009 the address into a reg and make a new pseudo. But not if the address 10010 requires to two regs - that would mean more pseudos with longer 10011 lifetimes. */ 10012static int 10013ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) 10014{ 10015 struct ix86_address parts; 10016 int cost = 1; 10017 int ok = ix86_decompose_address (x, &parts); 10018 10019 gcc_assert (ok); 10020 10021 if (parts.base && SUBREG_P (parts.base)) 10022 parts.base = SUBREG_REG (parts.base); 10023 if (parts.index && SUBREG_P (parts.index)) 10024 parts.index = SUBREG_REG (parts.index); 10025 10026 /* Attempt to minimize number of registers in the address by increasing 10027 address cost for each used register. We don't increase address cost 10028 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" 10029 is not invariant itself it most likely means that base or index is not 10030 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, 10031 which is not profitable for x86. */ 10032 if (parts.base 10033 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 10034 && (current_pass->type == GIMPLE_PASS 10035 || !pic_offset_table_rtx 10036 || !REG_P (parts.base) 10037 || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) 10038 cost++; 10039 10040 if (parts.index 10041 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 10042 && (current_pass->type == GIMPLE_PASS 10043 || !pic_offset_table_rtx 10044 || !REG_P (parts.index) 10045 || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) 10046 cost++; 10047 10048 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 10049 since it's predecode logic can't detect the length of instructions 10050 and it degenerates to vector decoded. Increase cost of such 10051 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 10052 to split such addresses or even refuse such addresses at all. 10053 10054 Following addressing modes are affected: 10055 [base+scale*index] 10056 [scale*index+disp] 10057 [base+index] 10058 10059 The first and last case may be avoidable by explicitly coding the zero in 10060 memory address, but I don't have AMD-K6 machine handy to check this 10061 theory. */ 10062 10063 if (TARGET_K6 10064 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 10065 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 10066 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 10067 cost += 10; 10068 10069 return cost; 10070} 10071 10072/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 10073 this is used for to form addresses to local data when -fPIC is in 10074 use. */ 10075 10076static bool 10077darwin_local_data_pic (rtx disp) 10078{ 10079 return (GET_CODE (disp) == UNSPEC 10080 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); 10081} 10082 10083/* True if operand X should be loaded from GOT. */ 10084 10085bool 10086ix86_force_load_from_GOT_p (rtx x) 10087{ 10088 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X) 10089 && !TARGET_PECOFF && !TARGET_MACHO 10090 && !flag_pic 10091 && ix86_cmodel != CM_LARGE 10092 && GET_CODE (x) == SYMBOL_REF 10093 && SYMBOL_REF_FUNCTION_P (x) 10094 && (!flag_plt 10095 || (SYMBOL_REF_DECL (x) 10096 && lookup_attribute ("noplt", 10097 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) 10098 && !SYMBOL_REF_LOCAL_P (x)); 10099} 10100 10101/* Determine if a given RTX is a valid constant. We already know this 10102 satisfies CONSTANT_P. */ 10103 10104static bool 10105ix86_legitimate_constant_p (machine_mode mode, rtx x) 10106{ 10107 switch (GET_CODE (x)) 10108 { 10109 case CONST: 10110 x = XEXP (x, 0); 10111 10112 if (GET_CODE (x) == PLUS) 10113 { 10114 if (!CONST_INT_P (XEXP (x, 1))) 10115 return false; 10116 x = XEXP (x, 0); 10117 } 10118 10119 if (TARGET_MACHO && darwin_local_data_pic (x)) 10120 return true; 10121 10122 /* Only some unspecs are valid as "constants". */ 10123 if (GET_CODE (x) == UNSPEC) 10124 switch (XINT (x, 1)) 10125 { 10126 case UNSPEC_GOT: 10127 case UNSPEC_GOTOFF: 10128 case UNSPEC_PLTOFF: 10129 return TARGET_64BIT; 10130 case UNSPEC_TPOFF: 10131 case UNSPEC_NTPOFF: 10132 x = XVECEXP (x, 0, 0); 10133 return (GET_CODE (x) == SYMBOL_REF 10134 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 10135 case UNSPEC_DTPOFF: 10136 x = XVECEXP (x, 0, 0); 10137 return (GET_CODE (x) == SYMBOL_REF 10138 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 10139 default: 10140 return false; 10141 } 10142 10143 /* We must have drilled down to a symbol. */ 10144 if (GET_CODE (x) == LABEL_REF) 10145 return true; 10146 if (GET_CODE (x) != SYMBOL_REF) 10147 return false; 10148 /* FALLTHRU */ 10149 10150 case SYMBOL_REF: 10151 /* TLS symbols are never valid. */ 10152 if (SYMBOL_REF_TLS_MODEL (x)) 10153 return false; 10154 10155 /* DLLIMPORT symbols are never valid. */ 10156 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 10157 && SYMBOL_REF_DLLIMPORT_P (x)) 10158 return false; 10159 10160#if TARGET_MACHO 10161 /* mdynamic-no-pic */ 10162 if (MACHO_DYNAMIC_NO_PIC_P) 10163 return machopic_symbol_defined_p (x); 10164#endif 10165 10166 /* External function address should be loaded 10167 via the GOT slot to avoid PLT. */ 10168 if (ix86_force_load_from_GOT_p (x)) 10169 return false; 10170 10171 break; 10172 10173 CASE_CONST_SCALAR_INT: 10174 switch (mode) 10175 { 10176 case E_TImode: 10177 if (TARGET_64BIT) 10178 return true; 10179 /* FALLTHRU */ 10180 case E_OImode: 10181 case E_XImode: 10182 if (!standard_sse_constant_p (x, mode)) 10183 return false; 10184 default: 10185 break; 10186 } 10187 break; 10188 10189 case CONST_VECTOR: 10190 if (!standard_sse_constant_p (x, mode)) 10191 return false; 10192 10193 default: 10194 break; 10195 } 10196 10197 /* Otherwise we handle everything else in the move patterns. */ 10198 return true; 10199} 10200 10201/* Determine if it's legal to put X into the constant pool. This 10202 is not possible for the address of thread-local symbols, which 10203 is checked above. */ 10204 10205static bool 10206ix86_cannot_force_const_mem (machine_mode mode, rtx x) 10207{ 10208 /* We can put any immediate constant in memory. */ 10209 switch (GET_CODE (x)) 10210 { 10211 CASE_CONST_ANY: 10212 return false; 10213 10214 default: 10215 break; 10216 } 10217 10218 return !ix86_legitimate_constant_p (mode, x); 10219} 10220 10221/* Nonzero if the symbol is marked as dllimport, or as stub-variable, 10222 otherwise zero. */ 10223 10224static bool 10225is_imported_p (rtx x) 10226{ 10227 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES 10228 || GET_CODE (x) != SYMBOL_REF) 10229 return false; 10230 10231 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); 10232} 10233 10234 10235/* Nonzero if the constant value X is a legitimate general operand 10236 when generating PIC code. It is given that flag_pic is on and 10237 that X satisfies CONSTANT_P. */ 10238 10239bool 10240legitimate_pic_operand_p (rtx x) 10241{ 10242 rtx inner; 10243 10244 switch (GET_CODE (x)) 10245 { 10246 case CONST: 10247 inner = XEXP (x, 0); 10248 if (GET_CODE (inner) == PLUS 10249 && CONST_INT_P (XEXP (inner, 1))) 10250 inner = XEXP (inner, 0); 10251 10252 /* Only some unspecs are valid as "constants". */ 10253 if (GET_CODE (inner) == UNSPEC) 10254 switch (XINT (inner, 1)) 10255 { 10256 case UNSPEC_GOT: 10257 case UNSPEC_GOTOFF: 10258 case UNSPEC_PLTOFF: 10259 return TARGET_64BIT; 10260 case UNSPEC_TPOFF: 10261 x = XVECEXP (inner, 0, 0); 10262 return (GET_CODE (x) == SYMBOL_REF 10263 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 10264 case UNSPEC_MACHOPIC_OFFSET: 10265 return legitimate_pic_address_disp_p (x); 10266 default: 10267 return false; 10268 } 10269 /* FALLTHRU */ 10270 10271 case SYMBOL_REF: 10272 case LABEL_REF: 10273 return legitimate_pic_address_disp_p (x); 10274 10275 default: 10276 return true; 10277 } 10278} 10279 10280/* Determine if a given CONST RTX is a valid memory displacement 10281 in PIC mode. */ 10282 10283bool 10284legitimate_pic_address_disp_p (rtx disp) 10285{ 10286 bool saw_plus; 10287 10288 /* In 64bit mode we can allow direct addresses of symbols and labels 10289 when they are not dynamic symbols. */ 10290 if (TARGET_64BIT) 10291 { 10292 rtx op0 = disp, op1; 10293 10294 switch (GET_CODE (disp)) 10295 { 10296 case LABEL_REF: 10297 return true; 10298 10299 case CONST: 10300 if (GET_CODE (XEXP (disp, 0)) != PLUS) 10301 break; 10302 op0 = XEXP (XEXP (disp, 0), 0); 10303 op1 = XEXP (XEXP (disp, 0), 1); 10304 if (!CONST_INT_P (op1)) 10305 break; 10306 if (GET_CODE (op0) == UNSPEC 10307 && (XINT (op0, 1) == UNSPEC_DTPOFF 10308 || XINT (op0, 1) == UNSPEC_NTPOFF) 10309 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) 10310 return true; 10311 if (INTVAL (op1) >= 16*1024*1024 10312 || INTVAL (op1) < -16*1024*1024) 10313 break; 10314 if (GET_CODE (op0) == LABEL_REF) 10315 return true; 10316 if (GET_CODE (op0) == CONST 10317 && GET_CODE (XEXP (op0, 0)) == UNSPEC 10318 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) 10319 return true; 10320 if (GET_CODE (op0) == UNSPEC 10321 && XINT (op0, 1) == UNSPEC_PCREL) 10322 return true; 10323 if (GET_CODE (op0) != SYMBOL_REF) 10324 break; 10325 /* FALLTHRU */ 10326 10327 case SYMBOL_REF: 10328 /* TLS references should always be enclosed in UNSPEC. 10329 The dllimported symbol needs always to be resolved. */ 10330 if (SYMBOL_REF_TLS_MODEL (op0) 10331 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) 10332 return false; 10333 10334 if (TARGET_PECOFF) 10335 { 10336 if (is_imported_p (op0)) 10337 return true; 10338 10339 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0)) 10340 break; 10341 10342 /* Non-external-weak function symbols need to be resolved only 10343 for the large model. Non-external symbols don't need to be 10344 resolved for large and medium models. For the small model, 10345 we don't need to resolve anything here. */ 10346 if ((ix86_cmodel != CM_LARGE_PIC 10347 && SYMBOL_REF_FUNCTION_P (op0) 10348 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0))) 10349 || !SYMBOL_REF_EXTERNAL_P (op0) 10350 || ix86_cmodel == CM_SMALL_PIC) 10351 return true; 10352 } 10353 else if (!SYMBOL_REF_FAR_ADDR_P (op0) 10354 && (SYMBOL_REF_LOCAL_P (op0) 10355 || (HAVE_LD_PIE_COPYRELOC 10356 && flag_pie 10357 && !SYMBOL_REF_WEAK (op0) 10358 && !SYMBOL_REF_FUNCTION_P (op0))) 10359 && ix86_cmodel != CM_LARGE_PIC) 10360 return true; 10361 break; 10362 10363 default: 10364 break; 10365 } 10366 } 10367 if (GET_CODE (disp) != CONST) 10368 return false; 10369 disp = XEXP (disp, 0); 10370 10371 if (TARGET_64BIT) 10372 { 10373 /* We are unsafe to allow PLUS expressions. This limit allowed distance 10374 of GOT tables. We should not need these anyway. */ 10375 if (GET_CODE (disp) != UNSPEC 10376 || (XINT (disp, 1) != UNSPEC_GOTPCREL 10377 && XINT (disp, 1) != UNSPEC_GOTOFF 10378 && XINT (disp, 1) != UNSPEC_PCREL 10379 && XINT (disp, 1) != UNSPEC_PLTOFF)) 10380 return false; 10381 10382 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 10383 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 10384 return false; 10385 return true; 10386 } 10387 10388 saw_plus = false; 10389 if (GET_CODE (disp) == PLUS) 10390 { 10391 if (!CONST_INT_P (XEXP (disp, 1))) 10392 return false; 10393 disp = XEXP (disp, 0); 10394 saw_plus = true; 10395 } 10396 10397 if (TARGET_MACHO && darwin_local_data_pic (disp)) 10398 return true; 10399 10400 if (GET_CODE (disp) != UNSPEC) 10401 return false; 10402 10403 switch (XINT (disp, 1)) 10404 { 10405 case UNSPEC_GOT: 10406 if (saw_plus) 10407 return false; 10408 /* We need to check for both symbols and labels because VxWorks loads 10409 text labels with @GOT rather than @GOTOFF. See gotoff_operand for 10410 details. */ 10411 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 10412 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); 10413 case UNSPEC_GOTOFF: 10414 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 10415 While ABI specify also 32bit relocation but we don't produce it in 10416 small PIC model at all. */ 10417 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 10418 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 10419 && !TARGET_64BIT) 10420 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); 10421 return false; 10422 case UNSPEC_GOTTPOFF: 10423 case UNSPEC_GOTNTPOFF: 10424 case UNSPEC_INDNTPOFF: 10425 if (saw_plus) 10426 return false; 10427 disp = XVECEXP (disp, 0, 0); 10428 return (GET_CODE (disp) == SYMBOL_REF 10429 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 10430 case UNSPEC_NTPOFF: 10431 disp = XVECEXP (disp, 0, 0); 10432 return (GET_CODE (disp) == SYMBOL_REF 10433 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 10434 case UNSPEC_DTPOFF: 10435 disp = XVECEXP (disp, 0, 0); 10436 return (GET_CODE (disp) == SYMBOL_REF 10437 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 10438 } 10439 10440 return false; 10441} 10442 10443/* Determine if op is suitable RTX for an address register. 10444 Return naked register if a register or a register subreg is 10445 found, otherwise return NULL_RTX. */ 10446 10447static rtx 10448ix86_validate_address_register (rtx op) 10449{ 10450 machine_mode mode = GET_MODE (op); 10451 10452 /* Only SImode or DImode registers can form the address. */ 10453 if (mode != SImode && mode != DImode) 10454 return NULL_RTX; 10455 10456 if (REG_P (op)) 10457 return op; 10458 else if (SUBREG_P (op)) 10459 { 10460 rtx reg = SUBREG_REG (op); 10461 10462 if (!REG_P (reg)) 10463 return NULL_RTX; 10464 10465 mode = GET_MODE (reg); 10466 10467 /* Don't allow SUBREGs that span more than a word. It can 10468 lead to spill failures when the register is one word out 10469 of a two word structure. */ 10470 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 10471 return NULL_RTX; 10472 10473 /* Allow only SUBREGs of non-eliminable hard registers. */ 10474 if (register_no_elim_operand (reg, mode)) 10475 return reg; 10476 } 10477 10478 /* Op is not a register. */ 10479 return NULL_RTX; 10480} 10481 10482/* Recognizes RTL expressions that are valid memory addresses for an 10483 instruction. The MODE argument is the machine mode for the MEM 10484 expression that wants to use this address. 10485 10486 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 10487 convert common non-canonical forms to canonical form so that they will 10488 be recognized. */ 10489 10490static bool 10491ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) 10492{ 10493 struct ix86_address parts; 10494 rtx base, index, disp; 10495 HOST_WIDE_INT scale; 10496 addr_space_t seg; 10497 10498 if (ix86_decompose_address (addr, &parts) <= 0) 10499 /* Decomposition failed. */ 10500 return false; 10501 10502 base = parts.base; 10503 index = parts.index; 10504 disp = parts.disp; 10505 scale = parts.scale; 10506 seg = parts.seg; 10507 10508 /* Validate base register. */ 10509 if (base) 10510 { 10511 rtx reg = ix86_validate_address_register (base); 10512 10513 if (reg == NULL_RTX) 10514 return false; 10515 10516 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 10517 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 10518 /* Base is not valid. */ 10519 return false; 10520 } 10521 10522 /* Validate index register. */ 10523 if (index) 10524 { 10525 rtx reg = ix86_validate_address_register (index); 10526 10527 if (reg == NULL_RTX) 10528 return false; 10529 10530 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 10531 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 10532 /* Index is not valid. */ 10533 return false; 10534 } 10535 10536 /* Index and base should have the same mode. */ 10537 if (base && index 10538 && GET_MODE (base) != GET_MODE (index)) 10539 return false; 10540 10541 /* Address override works only on the (%reg) part of %fs:(%reg). */ 10542 if (seg != ADDR_SPACE_GENERIC 10543 && ((base && GET_MODE (base) != word_mode) 10544 || (index && GET_MODE (index) != word_mode))) 10545 return false; 10546 10547 /* Validate scale factor. */ 10548 if (scale != 1) 10549 { 10550 if (!index) 10551 /* Scale without index. */ 10552 return false; 10553 10554 if (scale != 2 && scale != 4 && scale != 8) 10555 /* Scale is not a valid multiplier. */ 10556 return false; 10557 } 10558 10559 /* Validate displacement. */ 10560 if (disp) 10561 { 10562 if (GET_CODE (disp) == CONST 10563 && GET_CODE (XEXP (disp, 0)) == UNSPEC 10564 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) 10565 switch (XINT (XEXP (disp, 0), 1)) 10566 { 10567 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit 10568 when used. While ABI specify also 32bit relocations, we 10569 don't produce them at all and use IP relative instead. 10570 Allow GOT in 32bit mode for both PIC and non-PIC if symbol 10571 should be loaded via GOT. */ 10572 case UNSPEC_GOT: 10573 if (!TARGET_64BIT 10574 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) 10575 goto is_legitimate_pic; 10576 /* FALLTHRU */ 10577 case UNSPEC_GOTOFF: 10578 gcc_assert (flag_pic); 10579 if (!TARGET_64BIT) 10580 goto is_legitimate_pic; 10581 10582 /* 64bit address unspec. */ 10583 return false; 10584 10585 case UNSPEC_GOTPCREL: 10586 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) 10587 goto is_legitimate_pic; 10588 /* FALLTHRU */ 10589 case UNSPEC_PCREL: 10590 gcc_assert (flag_pic); 10591 goto is_legitimate_pic; 10592 10593 case UNSPEC_GOTTPOFF: 10594 case UNSPEC_GOTNTPOFF: 10595 case UNSPEC_INDNTPOFF: 10596 case UNSPEC_NTPOFF: 10597 case UNSPEC_DTPOFF: 10598 break; 10599 10600 default: 10601 /* Invalid address unspec. */ 10602 return false; 10603 } 10604 10605 else if (SYMBOLIC_CONST (disp) 10606 && (flag_pic 10607 || (TARGET_MACHO 10608#if TARGET_MACHO 10609 && MACHOPIC_INDIRECT 10610 && !machopic_operand_p (disp) 10611#endif 10612 ))) 10613 { 10614 10615 is_legitimate_pic: 10616 if (TARGET_64BIT && (index || base)) 10617 { 10618 /* foo@dtpoff(%rX) is ok. */ 10619 if (GET_CODE (disp) != CONST 10620 || GET_CODE (XEXP (disp, 0)) != PLUS 10621 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 10622 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) 10623 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 10624 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 10625 /* Non-constant pic memory reference. */ 10626 return false; 10627 } 10628 else if ((!TARGET_MACHO || flag_pic) 10629 && ! legitimate_pic_address_disp_p (disp)) 10630 /* Displacement is an invalid pic construct. */ 10631 return false; 10632#if TARGET_MACHO 10633 else if (MACHO_DYNAMIC_NO_PIC_P 10634 && !ix86_legitimate_constant_p (Pmode, disp)) 10635 /* displacment must be referenced via non_lazy_pointer */ 10636 return false; 10637#endif 10638 10639 /* This code used to verify that a symbolic pic displacement 10640 includes the pic_offset_table_rtx register. 10641 10642 While this is good idea, unfortunately these constructs may 10643 be created by "adds using lea" optimization for incorrect 10644 code like: 10645 10646 int a; 10647 int foo(int i) 10648 { 10649 return *(&a+i); 10650 } 10651 10652 This code is nonsensical, but results in addressing 10653 GOT table with pic_offset_table_rtx base. We can't 10654 just refuse it easily, since it gets matched by 10655 "addsi3" pattern, that later gets split to lea in the 10656 case output register differs from input. While this 10657 can be handled by separate addsi pattern for this case 10658 that never results in lea, this seems to be easier and 10659 correct fix for crash to disable this test. */ 10660 } 10661 else if (GET_CODE (disp) != LABEL_REF 10662 && !CONST_INT_P (disp) 10663 && (GET_CODE (disp) != CONST 10664 || !ix86_legitimate_constant_p (Pmode, disp)) 10665 && (GET_CODE (disp) != SYMBOL_REF 10666 || !ix86_legitimate_constant_p (Pmode, disp))) 10667 /* Displacement is not constant. */ 10668 return false; 10669 else if (TARGET_64BIT 10670 && !x86_64_immediate_operand (disp, VOIDmode)) 10671 /* Displacement is out of range. */ 10672 return false; 10673 /* In x32 mode, constant addresses are sign extended to 64bit, so 10674 we have to prevent addresses from 0x80000000 to 0xffffffff. */ 10675 else if (TARGET_X32 && !(index || base) 10676 && CONST_INT_P (disp) 10677 && val_signbit_known_set_p (SImode, INTVAL (disp))) 10678 return false; 10679 } 10680 10681 /* Everything looks valid. */ 10682 return true; 10683} 10684 10685/* Determine if a given RTX is a valid constant address. */ 10686 10687bool 10688constant_address_p (rtx x) 10689{ 10690 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); 10691} 10692 10693/* Return a unique alias set for the GOT. */ 10694 10695alias_set_type 10696ix86_GOT_alias_set (void) 10697{ 10698 static alias_set_type set = -1; 10699 if (set == -1) 10700 set = new_alias_set (); 10701 return set; 10702} 10703 10704/* Return a legitimate reference for ORIG (an address) using the 10705 register REG. If REG is 0, a new pseudo is generated. 10706 10707 There are two types of references that must be handled: 10708 10709 1. Global data references must load the address from the GOT, via 10710 the PIC reg. An insn is emitted to do this load, and the reg is 10711 returned. 10712 10713 2. Static data references, constant pool addresses, and code labels 10714 compute the address as an offset from the GOT, whose base is in 10715 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 10716 differentiate them from global data objects. The returned 10717 address is the PIC reg + an unspec constant. 10718 10719 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC 10720 reg also appears in the address. */ 10721 10722rtx 10723legitimize_pic_address (rtx orig, rtx reg) 10724{ 10725 rtx addr = orig; 10726 rtx new_rtx = orig; 10727 10728#if TARGET_MACHO 10729 if (TARGET_MACHO && !TARGET_64BIT) 10730 { 10731 if (reg == 0) 10732 reg = gen_reg_rtx (Pmode); 10733 /* Use the generic Mach-O PIC machinery. */ 10734 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 10735 } 10736#endif 10737 10738 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) 10739 { 10740 rtx tmp = legitimize_pe_coff_symbol (addr, true); 10741 if (tmp) 10742 return tmp; 10743 } 10744 10745 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 10746 new_rtx = addr; 10747 else if ((!TARGET_64BIT 10748 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) 10749 && !TARGET_PECOFF 10750 && gotoff_operand (addr, Pmode)) 10751 { 10752 /* This symbol may be referenced via a displacement 10753 from the PIC base address (@GOTOFF). */ 10754 if (GET_CODE (addr) == CONST) 10755 addr = XEXP (addr, 0); 10756 10757 if (GET_CODE (addr) == PLUS) 10758 { 10759 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), 10760 UNSPEC_GOTOFF); 10761 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); 10762 } 10763 else 10764 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 10765 10766 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 10767 10768 if (TARGET_64BIT) 10769 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); 10770 10771 if (reg != 0) 10772 { 10773 gcc_assert (REG_P (reg)); 10774 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, 10775 new_rtx, reg, 1, OPTAB_DIRECT); 10776 } 10777 else 10778 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 10779 } 10780 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 10781 /* We can't use @GOTOFF for text labels 10782 on VxWorks, see gotoff_operand. */ 10783 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) 10784 { 10785 rtx tmp = legitimize_pe_coff_symbol (addr, true); 10786 if (tmp) 10787 return tmp; 10788 10789 /* For x64 PE-COFF there is no GOT table, 10790 so we use address directly. */ 10791 if (TARGET_64BIT && TARGET_PECOFF) 10792 { 10793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); 10794 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 10795 } 10796 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) 10797 { 10798 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 10799 UNSPEC_GOTPCREL); 10800 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 10801 new_rtx = gen_const_mem (Pmode, new_rtx); 10802 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 10803 } 10804 else 10805 { 10806 /* This symbol must be referenced via a load 10807 from the Global Offset Table (@GOT). */ 10808 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 10809 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 10810 if (TARGET_64BIT) 10811 new_rtx = force_reg (Pmode, new_rtx); 10812 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 10813 new_rtx = gen_const_mem (Pmode, new_rtx); 10814 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 10815 } 10816 10817 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); 10818 } 10819 else 10820 { 10821 if (CONST_INT_P (addr) 10822 && !x86_64_immediate_operand (addr, VOIDmode)) 10823 new_rtx = copy_to_suggested_reg (addr, reg, Pmode); 10824 else if (GET_CODE (addr) == CONST) 10825 { 10826 addr = XEXP (addr, 0); 10827 10828 /* We must match stuff we generate before. Assume the only 10829 unspecs that can get here are ours. Not that we could do 10830 anything with them anyway.... */ 10831 if (GET_CODE (addr) == UNSPEC 10832 || (GET_CODE (addr) == PLUS 10833 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 10834 return orig; 10835 gcc_assert (GET_CODE (addr) == PLUS); 10836 } 10837 10838 if (GET_CODE (addr) == PLUS) 10839 { 10840 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 10841 10842 /* Check first to see if this is a constant 10843 offset from a @GOTOFF symbol reference. */ 10844 if (!TARGET_PECOFF 10845 && gotoff_operand (op0, Pmode) 10846 && CONST_INT_P (op1)) 10847 { 10848 if (!TARGET_64BIT) 10849 { 10850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 10851 UNSPEC_GOTOFF); 10852 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); 10853 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 10854 10855 if (reg != 0) 10856 { 10857 gcc_assert (REG_P (reg)); 10858 new_rtx = expand_simple_binop (Pmode, PLUS, 10859 pic_offset_table_rtx, 10860 new_rtx, reg, 1, 10861 OPTAB_DIRECT); 10862 } 10863 else 10864 new_rtx 10865 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 10866 } 10867 else 10868 { 10869 if (INTVAL (op1) < -16*1024*1024 10870 || INTVAL (op1) >= 16*1024*1024) 10871 { 10872 if (!x86_64_immediate_operand (op1, Pmode)) 10873 op1 = force_reg (Pmode, op1); 10874 10875 new_rtx 10876 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 10877 } 10878 } 10879 } 10880 else 10881 { 10882 rtx base = legitimize_pic_address (op0, reg); 10883 machine_mode mode = GET_MODE (base); 10884 new_rtx 10885 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); 10886 10887 if (CONST_INT_P (new_rtx)) 10888 { 10889 if (INTVAL (new_rtx) < -16*1024*1024 10890 || INTVAL (new_rtx) >= 16*1024*1024) 10891 { 10892 if (!x86_64_immediate_operand (new_rtx, mode)) 10893 new_rtx = force_reg (mode, new_rtx); 10894 10895 new_rtx 10896 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); 10897 } 10898 else 10899 new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); 10900 } 10901 else 10902 { 10903 /* For %rip addressing, we have to use 10904 just disp32, not base nor index. */ 10905 if (TARGET_64BIT 10906 && (GET_CODE (base) == SYMBOL_REF 10907 || GET_CODE (base) == LABEL_REF)) 10908 base = force_reg (mode, base); 10909 if (GET_CODE (new_rtx) == PLUS 10910 && CONSTANT_P (XEXP (new_rtx, 1))) 10911 { 10912 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); 10913 new_rtx = XEXP (new_rtx, 1); 10914 } 10915 new_rtx = gen_rtx_PLUS (mode, base, new_rtx); 10916 } 10917 } 10918 } 10919 } 10920 return new_rtx; 10921} 10922 10923/* Load the thread pointer. If TO_REG is true, force it into a register. */ 10924 10925static rtx 10926get_thread_pointer (machine_mode tp_mode, bool to_reg) 10927{ 10928 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 10929 10930 if (GET_MODE (tp) != tp_mode) 10931 { 10932 gcc_assert (GET_MODE (tp) == SImode); 10933 gcc_assert (tp_mode == DImode); 10934 10935 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); 10936 } 10937 10938 if (to_reg) 10939 tp = copy_to_mode_reg (tp_mode, tp); 10940 10941 return tp; 10942} 10943 10944/* Construct the SYMBOL_REF for the tls_get_addr function. */ 10945 10946static GTY(()) rtx ix86_tls_symbol; 10947 10948static rtx 10949ix86_tls_get_addr (void) 10950{ 10951 if (!ix86_tls_symbol) 10952 { 10953 const char *sym 10954 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) 10955 ? "___tls_get_addr" : "__tls_get_addr"); 10956 10957 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); 10958 } 10959 10960 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) 10961 { 10962 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), 10963 UNSPEC_PLTOFF); 10964 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, 10965 gen_rtx_CONST (Pmode, unspec)); 10966 } 10967 10968 return ix86_tls_symbol; 10969} 10970 10971/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 10972 10973static GTY(()) rtx ix86_tls_module_base_symbol; 10974 10975rtx 10976ix86_tls_module_base (void) 10977{ 10978 if (!ix86_tls_module_base_symbol) 10979 { 10980 ix86_tls_module_base_symbol 10981 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_"); 10982 10983 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 10984 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 10985 } 10986 10987 return ix86_tls_module_base_symbol; 10988} 10989 10990/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is 10991 false if we expect this to be used for a memory address and true if 10992 we expect to load the address into a register. */ 10993 10994rtx 10995legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) 10996{ 10997 rtx dest, base, off; 10998 rtx pic = NULL_RTX, tp = NULL_RTX; 10999 machine_mode tp_mode = Pmode; 11000 int type; 11001 11002 /* Fall back to global dynamic model if tool chain cannot support local 11003 dynamic. */ 11004 if (TARGET_SUN_TLS && !TARGET_64BIT 11005 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM 11006 && model == TLS_MODEL_LOCAL_DYNAMIC) 11007 model = TLS_MODEL_GLOBAL_DYNAMIC; 11008 11009 switch (model) 11010 { 11011 case TLS_MODEL_GLOBAL_DYNAMIC: 11012 if (!TARGET_64BIT) 11013 { 11014 if (flag_pic && !TARGET_PECOFF) 11015 pic = pic_offset_table_rtx; 11016 else 11017 { 11018 pic = gen_reg_rtx (Pmode); 11019 emit_insn (gen_set_got (pic)); 11020 } 11021 } 11022 11023 if (TARGET_GNU2_TLS) 11024 { 11025 dest = gen_reg_rtx (ptr_mode); 11026 if (TARGET_64BIT) 11027 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x)); 11028 else 11029 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); 11030 11031 tp = get_thread_pointer (ptr_mode, true); 11032 dest = gen_rtx_PLUS (ptr_mode, tp, dest); 11033 if (GET_MODE (dest) != Pmode) 11034 dest = gen_rtx_ZERO_EXTEND (Pmode, dest); 11035 dest = force_reg (Pmode, dest); 11036 11037 if (GET_MODE (x) != Pmode) 11038 x = gen_rtx_ZERO_EXTEND (Pmode, x); 11039 11040 set_unique_reg_note (get_last_insn (), REG_EQUAL, x); 11041 } 11042 else 11043 { 11044 rtx caddr = ix86_tls_get_addr (); 11045 11046 dest = gen_reg_rtx (Pmode); 11047 if (TARGET_64BIT) 11048 { 11049 rtx rax = gen_rtx_REG (Pmode, AX_REG); 11050 rtx_insn *insns; 11051 11052 start_sequence (); 11053 emit_call_insn 11054 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr)); 11055 insns = get_insns (); 11056 end_sequence (); 11057 11058 if (GET_MODE (x) != Pmode) 11059 x = gen_rtx_ZERO_EXTEND (Pmode, x); 11060 11061 RTL_CONST_CALL_P (insns) = 1; 11062 emit_libcall_block (insns, dest, rax, x); 11063 } 11064 else 11065 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); 11066 } 11067 break; 11068 11069 case TLS_MODEL_LOCAL_DYNAMIC: 11070 if (!TARGET_64BIT) 11071 { 11072 if (flag_pic) 11073 pic = pic_offset_table_rtx; 11074 else 11075 { 11076 pic = gen_reg_rtx (Pmode); 11077 emit_insn (gen_set_got (pic)); 11078 } 11079 } 11080 11081 if (TARGET_GNU2_TLS) 11082 { 11083 rtx tmp = ix86_tls_module_base (); 11084 11085 base = gen_reg_rtx (ptr_mode); 11086 if (TARGET_64BIT) 11087 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp)); 11088 else 11089 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); 11090 11091 tp = get_thread_pointer (ptr_mode, true); 11092 if (GET_MODE (base) != Pmode) 11093 base = gen_rtx_ZERO_EXTEND (Pmode, base); 11094 base = force_reg (Pmode, base); 11095 } 11096 else 11097 { 11098 rtx caddr = ix86_tls_get_addr (); 11099 11100 base = gen_reg_rtx (Pmode); 11101 if (TARGET_64BIT) 11102 { 11103 rtx rax = gen_rtx_REG (Pmode, AX_REG); 11104 rtx_insn *insns; 11105 rtx eqv; 11106 11107 start_sequence (); 11108 emit_call_insn 11109 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr)); 11110 insns = get_insns (); 11111 end_sequence (); 11112 11113 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to 11114 share the LD_BASE result with other LD model accesses. */ 11115 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 11116 UNSPEC_TLS_LD_BASE); 11117 11118 RTL_CONST_CALL_P (insns) = 1; 11119 emit_libcall_block (insns, base, rax, eqv); 11120 } 11121 else 11122 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); 11123 } 11124 11125 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 11126 off = gen_rtx_CONST (Pmode, off); 11127 11128 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 11129 11130 if (TARGET_GNU2_TLS) 11131 { 11132 if (GET_MODE (tp) != Pmode) 11133 { 11134 dest = lowpart_subreg (ptr_mode, dest, Pmode); 11135 dest = gen_rtx_PLUS (ptr_mode, tp, dest); 11136 dest = gen_rtx_ZERO_EXTEND (Pmode, dest); 11137 } 11138 else 11139 dest = gen_rtx_PLUS (Pmode, tp, dest); 11140 dest = force_reg (Pmode, dest); 11141 11142 if (GET_MODE (x) != Pmode) 11143 x = gen_rtx_ZERO_EXTEND (Pmode, x); 11144 11145 set_unique_reg_note (get_last_insn (), REG_EQUAL, x); 11146 } 11147 break; 11148 11149 case TLS_MODEL_INITIAL_EXEC: 11150 if (TARGET_64BIT) 11151 { 11152 if (TARGET_SUN_TLS && !TARGET_X32) 11153 { 11154 /* The Sun linker took the AMD64 TLS spec literally 11155 and can only handle %rax as destination of the 11156 initial executable code sequence. */ 11157 11158 dest = gen_reg_rtx (DImode); 11159 emit_insn (gen_tls_initial_exec_64_sun (dest, x)); 11160 return dest; 11161 } 11162 11163 /* Generate DImode references to avoid %fs:(%reg32) 11164 problems and linker IE->LE relaxation bug. */ 11165 tp_mode = DImode; 11166 pic = NULL; 11167 type = UNSPEC_GOTNTPOFF; 11168 } 11169 else if (flag_pic) 11170 { 11171 pic = pic_offset_table_rtx; 11172 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 11173 } 11174 else if (!TARGET_ANY_GNU_TLS) 11175 { 11176 pic = gen_reg_rtx (Pmode); 11177 emit_insn (gen_set_got (pic)); 11178 type = UNSPEC_GOTTPOFF; 11179 } 11180 else 11181 { 11182 pic = NULL; 11183 type = UNSPEC_INDNTPOFF; 11184 } 11185 11186 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); 11187 off = gen_rtx_CONST (tp_mode, off); 11188 if (pic) 11189 off = gen_rtx_PLUS (tp_mode, pic, off); 11190 off = gen_const_mem (tp_mode, off); 11191 set_mem_alias_set (off, ix86_GOT_alias_set ()); 11192 11193 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 11194 { 11195 base = get_thread_pointer (tp_mode, 11196 for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 11197 off = force_reg (tp_mode, off); 11198 dest = gen_rtx_PLUS (tp_mode, base, off); 11199 if (tp_mode != Pmode) 11200 dest = convert_to_mode (Pmode, dest, 1); 11201 } 11202 else 11203 { 11204 base = get_thread_pointer (Pmode, true); 11205 dest = gen_reg_rtx (Pmode); 11206 emit_insn (gen_sub3_insn (dest, base, off)); 11207 } 11208 break; 11209 11210 case TLS_MODEL_LOCAL_EXEC: 11211 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 11212 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 11213 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 11214 off = gen_rtx_CONST (Pmode, off); 11215 11216 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 11217 { 11218 base = get_thread_pointer (Pmode, 11219 for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 11220 return gen_rtx_PLUS (Pmode, base, off); 11221 } 11222 else 11223 { 11224 base = get_thread_pointer (Pmode, true); 11225 dest = gen_reg_rtx (Pmode); 11226 emit_insn (gen_sub3_insn (dest, base, off)); 11227 } 11228 break; 11229 11230 default: 11231 gcc_unreachable (); 11232 } 11233 11234 return dest; 11235} 11236 11237/* Return true if OP refers to a TLS address. */ 11238bool 11239ix86_tls_address_pattern_p (rtx op) 11240{ 11241 subrtx_var_iterator::array_type array; 11242 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) 11243 { 11244 rtx op = *iter; 11245 if (MEM_P (op)) 11246 { 11247 rtx *x = &XEXP (op, 0); 11248 while (GET_CODE (*x) == PLUS) 11249 { 11250 int i; 11251 for (i = 0; i < 2; i++) 11252 { 11253 rtx u = XEXP (*x, i); 11254 if (GET_CODE (u) == ZERO_EXTEND) 11255 u = XEXP (u, 0); 11256 if (GET_CODE (u) == UNSPEC 11257 && XINT (u, 1) == UNSPEC_TP) 11258 return true; 11259 } 11260 x = &XEXP (*x, 0); 11261 } 11262 11263 iter.skip_subrtxes (); 11264 } 11265 } 11266 11267 return false; 11268} 11269 11270/* Rewrite *LOC so that it refers to a default TLS address space. */ 11271void 11272ix86_rewrite_tls_address_1 (rtx *loc) 11273{ 11274 subrtx_ptr_iterator::array_type array; 11275 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) 11276 { 11277 rtx *loc = *iter; 11278 if (MEM_P (*loc)) 11279 { 11280 rtx addr = XEXP (*loc, 0); 11281 rtx *x = &addr; 11282 while (GET_CODE (*x) == PLUS) 11283 { 11284 int i; 11285 for (i = 0; i < 2; i++) 11286 { 11287 rtx u = XEXP (*x, i); 11288 if (GET_CODE (u) == ZERO_EXTEND) 11289 u = XEXP (u, 0); 11290 if (GET_CODE (u) == UNSPEC 11291 && XINT (u, 1) == UNSPEC_TP) 11292 { 11293 addr_space_t as = DEFAULT_TLS_SEG_REG; 11294 11295 *x = XEXP (*x, 1 - i); 11296 11297 *loc = replace_equiv_address_nv (*loc, addr, true); 11298 set_mem_addr_space (*loc, as); 11299 return; 11300 } 11301 } 11302 x = &XEXP (*x, 0); 11303 } 11304 11305 iter.skip_subrtxes (); 11306 } 11307 } 11308} 11309 11310/* Rewrite instruction pattern involvning TLS address 11311 so that it refers to a default TLS address space. */ 11312rtx 11313ix86_rewrite_tls_address (rtx pattern) 11314{ 11315 pattern = copy_insn (pattern); 11316 ix86_rewrite_tls_address_1 (&pattern); 11317 return pattern; 11318} 11319 11320/* Create or return the unique __imp_DECL dllimport symbol corresponding 11321 to symbol DECL if BEIMPORT is true. Otherwise create or return the 11322 unique refptr-DECL symbol corresponding to symbol DECL. */ 11323 11324struct dllimport_hasher : ggc_cache_ptr_hash<tree_map> 11325{ 11326 static inline hashval_t hash (tree_map *m) { return m->hash; } 11327 static inline bool 11328 equal (tree_map *a, tree_map *b) 11329 { 11330 return a->base.from == b->base.from; 11331 } 11332 11333 static int 11334 keep_cache_entry (tree_map *&m) 11335 { 11336 return ggc_marked_p (m->base.from); 11337 } 11338}; 11339 11340static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map; 11341 11342static tree 11343get_dllimport_decl (tree decl, bool beimport) 11344{ 11345 struct tree_map *h, in; 11346 const char *name; 11347 const char *prefix; 11348 size_t namelen, prefixlen; 11349 char *imp_name; 11350 tree to; 11351 rtx rtl; 11352 11353 if (!dllimport_map) 11354 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512); 11355 11356 in.hash = htab_hash_pointer (decl); 11357 in.base.from = decl; 11358 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); 11359 h = *loc; 11360 if (h) 11361 return h->to; 11362 11363 *loc = h = ggc_alloc<tree_map> (); 11364 h->hash = in.hash; 11365 h->base.from = decl; 11366 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), 11367 VAR_DECL, NULL, ptr_type_node); 11368 DECL_ARTIFICIAL (to) = 1; 11369 DECL_IGNORED_P (to) = 1; 11370 DECL_EXTERNAL (to) = 1; 11371 TREE_READONLY (to) = 1; 11372 11373 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 11374 name = targetm.strip_name_encoding (name); 11375 if (beimport) 11376 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 11377 ? "*__imp_" : "*__imp__"; 11378 else 11379 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; 11380 namelen = strlen (name); 11381 prefixlen = strlen (prefix); 11382 imp_name = (char *) alloca (namelen + prefixlen + 1); 11383 memcpy (imp_name, prefix, prefixlen); 11384 memcpy (imp_name + prefixlen, name, namelen + 1); 11385 11386 name = ggc_alloc_string (imp_name, namelen + prefixlen); 11387 rtl = gen_rtx_SYMBOL_REF (Pmode, name); 11388 SET_SYMBOL_REF_DECL (rtl, to); 11389 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; 11390 if (!beimport) 11391 { 11392 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; 11393#ifdef SUB_TARGET_RECORD_STUB 11394 SUB_TARGET_RECORD_STUB (name); 11395#endif 11396 } 11397 11398 rtl = gen_const_mem (Pmode, rtl); 11399 set_mem_alias_set (rtl, ix86_GOT_alias_set ()); 11400 11401 SET_DECL_RTL (to, rtl); 11402 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); 11403 11404 return to; 11405} 11406 11407/* Expand SYMBOL into its corresponding far-address symbol. 11408 WANT_REG is true if we require the result be a register. */ 11409 11410static rtx 11411legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) 11412{ 11413 tree imp_decl; 11414 rtx x; 11415 11416 gcc_assert (SYMBOL_REF_DECL (symbol)); 11417 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); 11418 11419 x = DECL_RTL (imp_decl); 11420 if (want_reg) 11421 x = force_reg (Pmode, x); 11422 return x; 11423} 11424 11425/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is 11426 true if we require the result be a register. */ 11427 11428static rtx 11429legitimize_dllimport_symbol (rtx symbol, bool want_reg) 11430{ 11431 tree imp_decl; 11432 rtx x; 11433 11434 gcc_assert (SYMBOL_REF_DECL (symbol)); 11435 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); 11436 11437 x = DECL_RTL (imp_decl); 11438 if (want_reg) 11439 x = force_reg (Pmode, x); 11440 return x; 11441} 11442 11443/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG 11444 is true if we require the result be a register. */ 11445 11446rtx 11447legitimize_pe_coff_symbol (rtx addr, bool inreg) 11448{ 11449 if (!TARGET_PECOFF) 11450 return NULL_RTX; 11451 11452 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 11453 { 11454 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) 11455 return legitimize_dllimport_symbol (addr, inreg); 11456 if (GET_CODE (addr) == CONST 11457 && GET_CODE (XEXP (addr, 0)) == PLUS 11458 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 11459 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) 11460 { 11461 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); 11462 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); 11463 } 11464 } 11465 11466 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) 11467 return NULL_RTX; 11468 if (GET_CODE (addr) == SYMBOL_REF 11469 && !is_imported_p (addr) 11470 && SYMBOL_REF_EXTERNAL_P (addr) 11471 && SYMBOL_REF_DECL (addr)) 11472 return legitimize_pe_coff_extern_decl (addr, inreg); 11473 11474 if (GET_CODE (addr) == CONST 11475 && GET_CODE (XEXP (addr, 0)) == PLUS 11476 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 11477 && !is_imported_p (XEXP (XEXP (addr, 0), 0)) 11478 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) 11479 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) 11480 { 11481 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); 11482 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); 11483 } 11484 return NULL_RTX; 11485} 11486 11487/* Try machine-dependent ways of modifying an illegitimate address 11488 to be legitimate. If we find one, return the new, valid address. 11489 This macro is used in only one place: `memory_address' in explow.c. 11490 11491 OLDX is the address as it was before break_out_memory_refs was called. 11492 In some cases it is useful to look at this to decide what needs to be done. 11493 11494 It is always safe for this macro to do nothing. It exists to recognize 11495 opportunities to optimize the output. 11496 11497 For the 80386, we handle X+REG by loading X into a register R and 11498 using R+REG. R will go in a general reg and indexing will be used. 11499 However, if REG is a broken-out memory address or multiplication, 11500 nothing needs to be done because REG can certainly go in a general reg. 11501 11502 When -fpic is used, special handling is needed for symbolic references. 11503 See comments by legitimize_pic_address in i386.c for details. */ 11504 11505static rtx 11506ix86_legitimize_address (rtx x, rtx, machine_mode mode) 11507{ 11508 bool changed = false; 11509 unsigned log; 11510 11511 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 11512 if (log) 11513 return legitimize_tls_address (x, (enum tls_model) log, false); 11514 if (GET_CODE (x) == CONST 11515 && GET_CODE (XEXP (x, 0)) == PLUS 11516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 11517 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 11518 { 11519 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), 11520 (enum tls_model) log, false); 11521 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 11522 } 11523 11524 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 11525 { 11526 rtx tmp = legitimize_pe_coff_symbol (x, true); 11527 if (tmp) 11528 return tmp; 11529 } 11530 11531 if (flag_pic && SYMBOLIC_CONST (x)) 11532 return legitimize_pic_address (x, 0); 11533 11534#if TARGET_MACHO 11535 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) 11536 return machopic_indirect_data_reference (x, 0); 11537#endif 11538 11539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 11540 if (GET_CODE (x) == ASHIFT 11541 && CONST_INT_P (XEXP (x, 1)) 11542 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 11543 { 11544 changed = true; 11545 log = INTVAL (XEXP (x, 1)); 11546 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 11547 GEN_INT (1 << log)); 11548 } 11549 11550 if (GET_CODE (x) == PLUS) 11551 { 11552 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 11553 11554 if (GET_CODE (XEXP (x, 0)) == ASHIFT 11555 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 11556 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 11557 { 11558 changed = true; 11559 log = INTVAL (XEXP (XEXP (x, 0), 1)); 11560 XEXP (x, 0) = gen_rtx_MULT (Pmode, 11561 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 11562 GEN_INT (1 << log)); 11563 } 11564 11565 if (GET_CODE (XEXP (x, 1)) == ASHIFT 11566 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 11567 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 11568 { 11569 changed = true; 11570 log = INTVAL (XEXP (XEXP (x, 1), 1)); 11571 XEXP (x, 1) = gen_rtx_MULT (Pmode, 11572 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 11573 GEN_INT (1 << log)); 11574 } 11575 11576 /* Put multiply first if it isn't already. */ 11577 if (GET_CODE (XEXP (x, 1)) == MULT) 11578 { 11579 std::swap (XEXP (x, 0), XEXP (x, 1)); 11580 changed = true; 11581 } 11582 11583 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 11584 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 11585 created by virtual register instantiation, register elimination, and 11586 similar optimizations. */ 11587 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 11588 { 11589 changed = true; 11590 x = gen_rtx_PLUS (Pmode, 11591 gen_rtx_PLUS (Pmode, XEXP (x, 0), 11592 XEXP (XEXP (x, 1), 0)), 11593 XEXP (XEXP (x, 1), 1)); 11594 } 11595 11596 /* Canonicalize 11597 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 11598 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 11599 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 11600 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 11601 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 11602 && CONSTANT_P (XEXP (x, 1))) 11603 { 11604 rtx constant; 11605 rtx other = NULL_RTX; 11606 11607 if (CONST_INT_P (XEXP (x, 1))) 11608 { 11609 constant = XEXP (x, 1); 11610 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 11611 } 11612 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) 11613 { 11614 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 11615 other = XEXP (x, 1); 11616 } 11617 else 11618 constant = 0; 11619 11620 if (constant) 11621 { 11622 changed = true; 11623 x = gen_rtx_PLUS (Pmode, 11624 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 11625 XEXP (XEXP (XEXP (x, 0), 1), 0)), 11626 plus_constant (Pmode, other, 11627 INTVAL (constant))); 11628 } 11629 } 11630 11631 if (changed && ix86_legitimate_address_p (mode, x, false)) 11632 return x; 11633 11634 if (GET_CODE (XEXP (x, 0)) == MULT) 11635 { 11636 changed = true; 11637 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); 11638 } 11639 11640 if (GET_CODE (XEXP (x, 1)) == MULT) 11641 { 11642 changed = true; 11643 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); 11644 } 11645 11646 if (changed 11647 && REG_P (XEXP (x, 1)) 11648 && REG_P (XEXP (x, 0))) 11649 return x; 11650 11651 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 11652 { 11653 changed = true; 11654 x = legitimize_pic_address (x, 0); 11655 } 11656 11657 if (changed && ix86_legitimate_address_p (mode, x, false)) 11658 return x; 11659 11660 if (REG_P (XEXP (x, 0))) 11661 { 11662 rtx temp = gen_reg_rtx (Pmode); 11663 rtx val = force_operand (XEXP (x, 1), temp); 11664 if (val != temp) 11665 { 11666 val = convert_to_mode (Pmode, val, 1); 11667 emit_move_insn (temp, val); 11668 } 11669 11670 XEXP (x, 1) = temp; 11671 return x; 11672 } 11673 11674 else if (REG_P (XEXP (x, 1))) 11675 { 11676 rtx temp = gen_reg_rtx (Pmode); 11677 rtx val = force_operand (XEXP (x, 0), temp); 11678 if (val != temp) 11679 { 11680 val = convert_to_mode (Pmode, val, 1); 11681 emit_move_insn (temp, val); 11682 } 11683 11684 XEXP (x, 0) = temp; 11685 return x; 11686 } 11687 } 11688 11689 return x; 11690} 11691 11692/* Print an integer constant expression in assembler syntax. Addition 11693 and subtraction are the only arithmetic that may appear in these 11694 expressions. FILE is the stdio stream to write to, X is the rtx, and 11695 CODE is the operand print code from the output string. */ 11696 11697static void 11698output_pic_addr_const (FILE *file, rtx x, int code) 11699{ 11700 char buf[256]; 11701 11702 switch (GET_CODE (x)) 11703 { 11704 case PC: 11705 gcc_assert (flag_pic); 11706 putc ('.', file); 11707 break; 11708 11709 case SYMBOL_REF: 11710 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) 11711 output_addr_const (file, x); 11712 else 11713 { 11714 const char *name = XSTR (x, 0); 11715 11716 /* Mark the decl as referenced so that cgraph will 11717 output the function. */ 11718 if (SYMBOL_REF_DECL (x)) 11719 mark_decl_referenced (SYMBOL_REF_DECL (x)); 11720 11721#if TARGET_MACHO 11722 if (MACHOPIC_INDIRECT 11723 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 11724 name = machopic_indirection_name (x, /*stub_p=*/true); 11725#endif 11726 assemble_name (file, name); 11727 } 11728 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) 11729 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 11730 fputs ("@PLT", file); 11731 break; 11732 11733 case LABEL_REF: 11734 x = XEXP (x, 0); 11735 /* FALLTHRU */ 11736 case CODE_LABEL: 11737 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 11738 assemble_name (asm_out_file, buf); 11739 break; 11740 11741 case CONST_INT: 11742 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 11743 break; 11744 11745 case CONST: 11746 /* This used to output parentheses around the expression, 11747 but that does not work on the 386 (either ATT or BSD assembler). */ 11748 output_pic_addr_const (file, XEXP (x, 0), code); 11749 break; 11750 11751 case CONST_DOUBLE: 11752 /* We can't handle floating point constants; 11753 TARGET_PRINT_OPERAND must handle them. */ 11754 output_operand_lossage ("floating constant misused"); 11755 break; 11756 11757 case PLUS: 11758 /* Some assemblers need integer constants to appear first. */ 11759 if (CONST_INT_P (XEXP (x, 0))) 11760 { 11761 output_pic_addr_const (file, XEXP (x, 0), code); 11762 putc ('+', file); 11763 output_pic_addr_const (file, XEXP (x, 1), code); 11764 } 11765 else 11766 { 11767 gcc_assert (CONST_INT_P (XEXP (x, 1))); 11768 output_pic_addr_const (file, XEXP (x, 1), code); 11769 putc ('+', file); 11770 output_pic_addr_const (file, XEXP (x, 0), code); 11771 } 11772 break; 11773 11774 case MINUS: 11775 if (!TARGET_MACHO) 11776 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 11777 output_pic_addr_const (file, XEXP (x, 0), code); 11778 putc ('-', file); 11779 output_pic_addr_const (file, XEXP (x, 1), code); 11780 if (!TARGET_MACHO) 11781 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 11782 break; 11783 11784 case UNSPEC: 11785 gcc_assert (XVECLEN (x, 0) == 1); 11786 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 11787 switch (XINT (x, 1)) 11788 { 11789 case UNSPEC_GOT: 11790 fputs ("@GOT", file); 11791 break; 11792 case UNSPEC_GOTOFF: 11793 fputs ("@GOTOFF", file); 11794 break; 11795 case UNSPEC_PLTOFF: 11796 fputs ("@PLTOFF", file); 11797 break; 11798 case UNSPEC_PCREL: 11799 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 11800 "(%rip)" : "[rip]", file); 11801 break; 11802 case UNSPEC_GOTPCREL: 11803 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 11804 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); 11805 break; 11806 case UNSPEC_GOTTPOFF: 11807 /* FIXME: This might be @TPOFF in Sun ld too. */ 11808 fputs ("@gottpoff", file); 11809 break; 11810 case UNSPEC_TPOFF: 11811 fputs ("@tpoff", file); 11812 break; 11813 case UNSPEC_NTPOFF: 11814 if (TARGET_64BIT) 11815 fputs ("@tpoff", file); 11816 else 11817 fputs ("@ntpoff", file); 11818 break; 11819 case UNSPEC_DTPOFF: 11820 fputs ("@dtpoff", file); 11821 break; 11822 case UNSPEC_GOTNTPOFF: 11823 if (TARGET_64BIT) 11824 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 11825 "@gottpoff(%rip)": "@gottpoff[rip]", file); 11826 else 11827 fputs ("@gotntpoff", file); 11828 break; 11829 case UNSPEC_INDNTPOFF: 11830 fputs ("@indntpoff", file); 11831 break; 11832#if TARGET_MACHO 11833 case UNSPEC_MACHOPIC_OFFSET: 11834 putc ('-', file); 11835 machopic_output_function_base_name (file); 11836 break; 11837#endif 11838 default: 11839 output_operand_lossage ("invalid UNSPEC as operand"); 11840 break; 11841 } 11842 break; 11843 11844 default: 11845 output_operand_lossage ("invalid expression as operand"); 11846 } 11847} 11848 11849/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 11850 We need to emit DTP-relative relocations. */ 11851 11852static void ATTRIBUTE_UNUSED 11853i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 11854{ 11855 fputs (ASM_LONG, file); 11856 output_addr_const (file, x); 11857 fputs ("@dtpoff", file); 11858 switch (size) 11859 { 11860 case 4: 11861 break; 11862 case 8: 11863 fputs (", 0", file); 11864 break; 11865 default: 11866 gcc_unreachable (); 11867 } 11868} 11869 11870/* Return true if X is a representation of the PIC register. This copes 11871 with calls from ix86_find_base_term, where the register might have 11872 been replaced by a cselib value. */ 11873 11874static bool 11875ix86_pic_register_p (rtx x) 11876{ 11877 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) 11878 return (pic_offset_table_rtx 11879 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); 11880 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) 11881 return true; 11882 else if (!REG_P (x)) 11883 return false; 11884 else if (pic_offset_table_rtx) 11885 { 11886 if (REGNO (x) == REGNO (pic_offset_table_rtx)) 11887 return true; 11888 if (HARD_REGISTER_P (x) 11889 && !HARD_REGISTER_P (pic_offset_table_rtx) 11890 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) 11891 return true; 11892 return false; 11893 } 11894 else 11895 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; 11896} 11897 11898/* Helper function for ix86_delegitimize_address. 11899 Attempt to delegitimize TLS local-exec accesses. */ 11900 11901static rtx 11902ix86_delegitimize_tls_address (rtx orig_x) 11903{ 11904 rtx x = orig_x, unspec; 11905 struct ix86_address addr; 11906 11907 if (!TARGET_TLS_DIRECT_SEG_REFS) 11908 return orig_x; 11909 if (MEM_P (x)) 11910 x = XEXP (x, 0); 11911 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) 11912 return orig_x; 11913 if (ix86_decompose_address (x, &addr) == 0 11914 || addr.seg != DEFAULT_TLS_SEG_REG 11915 || addr.disp == NULL_RTX 11916 || GET_CODE (addr.disp) != CONST) 11917 return orig_x; 11918 unspec = XEXP (addr.disp, 0); 11919 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) 11920 unspec = XEXP (unspec, 0); 11921 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) 11922 return orig_x; 11923 x = XVECEXP (unspec, 0, 0); 11924 gcc_assert (GET_CODE (x) == SYMBOL_REF); 11925 if (unspec != XEXP (addr.disp, 0)) 11926 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); 11927 if (addr.index) 11928 { 11929 rtx idx = addr.index; 11930 if (addr.scale != 1) 11931 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); 11932 x = gen_rtx_PLUS (Pmode, idx, x); 11933 } 11934 if (addr.base) 11935 x = gen_rtx_PLUS (Pmode, addr.base, x); 11936 if (MEM_P (orig_x)) 11937 x = replace_equiv_address_nv (orig_x, x); 11938 return x; 11939} 11940 11941/* In the name of slightly smaller debug output, and to cater to 11942 general assembler lossage, recognize PIC+GOTOFF and turn it back 11943 into a direct symbol reference. 11944 11945 On Darwin, this is necessary to avoid a crash, because Darwin 11946 has a different PIC label for each routine but the DWARF debugging 11947 information is not associated with any particular routine, so it's 11948 necessary to remove references to the PIC label from RTL stored by 11949 the DWARF output code. 11950 11951 This helper is used in the normal ix86_delegitimize_address 11952 entrypoint (e.g. used in the target delegitimization hook) and 11953 in ix86_find_base_term. As compile time memory optimization, we 11954 avoid allocating rtxes that will not change anything on the outcome 11955 of the callers (find_base_value and find_base_term). */ 11956 11957static inline rtx 11958ix86_delegitimize_address_1 (rtx x, bool base_term_p) 11959{ 11960 rtx orig_x = delegitimize_mem_from_attrs (x); 11961 /* addend is NULL or some rtx if x is something+GOTOFF where 11962 something doesn't include the PIC register. */ 11963 rtx addend = NULL_RTX; 11964 /* reg_addend is NULL or a multiple of some register. */ 11965 rtx reg_addend = NULL_RTX; 11966 /* const_addend is NULL or a const_int. */ 11967 rtx const_addend = NULL_RTX; 11968 /* This is the result, or NULL. */ 11969 rtx result = NULL_RTX; 11970 11971 x = orig_x; 11972 11973 if (MEM_P (x)) 11974 x = XEXP (x, 0); 11975 11976 if (TARGET_64BIT) 11977 { 11978 if (GET_CODE (x) == CONST 11979 && GET_CODE (XEXP (x, 0)) == PLUS 11980 && GET_MODE (XEXP (x, 0)) == Pmode 11981 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 11982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC 11983 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) 11984 { 11985 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx 11986 base. A CONST can't be arg_pointer_rtx based. */ 11987 if (base_term_p && MEM_P (orig_x)) 11988 return orig_x; 11989 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); 11990 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); 11991 if (MEM_P (orig_x)) 11992 x = replace_equiv_address_nv (orig_x, x); 11993 return x; 11994 } 11995 11996 if (GET_CODE (x) == CONST 11997 && GET_CODE (XEXP (x, 0)) == UNSPEC 11998 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL 11999 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) 12000 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) 12001 { 12002 x = XVECEXP (XEXP (x, 0), 0, 0); 12003 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) 12004 { 12005 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x)); 12006 if (x == NULL_RTX) 12007 return orig_x; 12008 } 12009 return x; 12010 } 12011 12012 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) 12013 return ix86_delegitimize_tls_address (orig_x); 12014 12015 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic 12016 and -mcmodel=medium -fpic. */ 12017 } 12018 12019 if (GET_CODE (x) != PLUS 12020 || GET_CODE (XEXP (x, 1)) != CONST) 12021 return ix86_delegitimize_tls_address (orig_x); 12022 12023 if (ix86_pic_register_p (XEXP (x, 0))) 12024 /* %ebx + GOT/GOTOFF */ 12025 ; 12026 else if (GET_CODE (XEXP (x, 0)) == PLUS) 12027 { 12028 /* %ebx + %reg * scale + GOT/GOTOFF */ 12029 reg_addend = XEXP (x, 0); 12030 if (ix86_pic_register_p (XEXP (reg_addend, 0))) 12031 reg_addend = XEXP (reg_addend, 1); 12032 else if (ix86_pic_register_p (XEXP (reg_addend, 1))) 12033 reg_addend = XEXP (reg_addend, 0); 12034 else 12035 { 12036 reg_addend = NULL_RTX; 12037 addend = XEXP (x, 0); 12038 } 12039 } 12040 else 12041 addend = XEXP (x, 0); 12042 12043 x = XEXP (XEXP (x, 1), 0); 12044 if (GET_CODE (x) == PLUS 12045 && CONST_INT_P (XEXP (x, 1))) 12046 { 12047 const_addend = XEXP (x, 1); 12048 x = XEXP (x, 0); 12049 } 12050 12051 if (GET_CODE (x) == UNSPEC 12052 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) 12053 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) 12054 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC 12055 && !MEM_P (orig_x) && !addend))) 12056 result = XVECEXP (x, 0, 0); 12057 12058 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) 12059 && !MEM_P (orig_x)) 12060 result = XVECEXP (x, 0, 0); 12061 12062 if (! result) 12063 return ix86_delegitimize_tls_address (orig_x); 12064 12065 /* For (PLUS something CONST_INT) both find_base_{value,term} just 12066 recurse on the first operand. */ 12067 if (const_addend && !base_term_p) 12068 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); 12069 if (reg_addend) 12070 result = gen_rtx_PLUS (Pmode, reg_addend, result); 12071 if (addend) 12072 { 12073 /* If the rest of original X doesn't involve the PIC register, add 12074 addend and subtract pic_offset_table_rtx. This can happen e.g. 12075 for code like: 12076 leal (%ebx, %ecx, 4), %ecx 12077 ... 12078 movl foo@GOTOFF(%ecx), %edx 12079 in which case we return (%ecx - %ebx) + foo 12080 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg 12081 and reload has completed. Don't do the latter for debug, 12082 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ 12083 if (pic_offset_table_rtx 12084 && (!reload_completed || !ix86_use_pseudo_pic_reg ())) 12085 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), 12086 pic_offset_table_rtx), 12087 result); 12088 else if (base_term_p 12089 && pic_offset_table_rtx 12090 && !TARGET_MACHO 12091 && !TARGET_VXWORKS_RTP) 12092 { 12093 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 12094 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); 12095 result = gen_rtx_PLUS (Pmode, tmp, result); 12096 } 12097 else 12098 return orig_x; 12099 } 12100 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) 12101 { 12102 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode); 12103 if (result == NULL_RTX) 12104 return orig_x; 12105 } 12106 return result; 12107} 12108 12109/* The normal instantiation of the above template. */ 12110 12111static rtx 12112ix86_delegitimize_address (rtx x) 12113{ 12114 return ix86_delegitimize_address_1 (x, false); 12115} 12116 12117/* If X is a machine specific address (i.e. a symbol or label being 12118 referenced as a displacement from the GOT implemented using an 12119 UNSPEC), then return the base term. Otherwise return X. */ 12120 12121rtx 12122ix86_find_base_term (rtx x) 12123{ 12124 rtx term; 12125 12126 if (TARGET_64BIT) 12127 { 12128 if (GET_CODE (x) != CONST) 12129 return x; 12130 term = XEXP (x, 0); 12131 if (GET_CODE (term) == PLUS 12132 && CONST_INT_P (XEXP (term, 1))) 12133 term = XEXP (term, 0); 12134 if (GET_CODE (term) != UNSPEC 12135 || (XINT (term, 1) != UNSPEC_GOTPCREL 12136 && XINT (term, 1) != UNSPEC_PCREL)) 12137 return x; 12138 12139 return XVECEXP (term, 0, 0); 12140 } 12141 12142 return ix86_delegitimize_address_1 (x, true); 12143} 12144 12145/* Return true if X shouldn't be emitted into the debug info. 12146 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ 12147 symbol easily into the .debug_info section, so we need not to 12148 delegitimize, but instead assemble as @gotoff. 12149 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically 12150 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ 12151 12152static bool 12153ix86_const_not_ok_for_debug_p (rtx x) 12154{ 12155 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) 12156 return true; 12157 12158 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) 12159 return true; 12160 12161 return false; 12162} 12163 12164static void 12165put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, 12166 bool fp, FILE *file) 12167{ 12168 const char *suffix; 12169 12170 if (mode == CCFPmode) 12171 { 12172 code = ix86_fp_compare_code_to_integer (code); 12173 mode = CCmode; 12174 } 12175 if (reverse) 12176 code = reverse_condition (code); 12177 12178 switch (code) 12179 { 12180 case EQ: 12181 gcc_assert (mode != CCGZmode); 12182 switch (mode) 12183 { 12184 case E_CCAmode: 12185 suffix = "a"; 12186 break; 12187 case E_CCCmode: 12188 suffix = "c"; 12189 break; 12190 case E_CCOmode: 12191 suffix = "o"; 12192 break; 12193 case E_CCPmode: 12194 suffix = "p"; 12195 break; 12196 case E_CCSmode: 12197 suffix = "s"; 12198 break; 12199 default: 12200 suffix = "e"; 12201 break; 12202 } 12203 break; 12204 case NE: 12205 gcc_assert (mode != CCGZmode); 12206 switch (mode) 12207 { 12208 case E_CCAmode: 12209 suffix = "na"; 12210 break; 12211 case E_CCCmode: 12212 suffix = "nc"; 12213 break; 12214 case E_CCOmode: 12215 suffix = "no"; 12216 break; 12217 case E_CCPmode: 12218 suffix = "np"; 12219 break; 12220 case E_CCSmode: 12221 suffix = "ns"; 12222 break; 12223 default: 12224 suffix = "ne"; 12225 break; 12226 } 12227 break; 12228 case GT: 12229 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 12230 suffix = "g"; 12231 break; 12232 case GTU: 12233 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 12234 Those same assemblers have the same but opposite lossage on cmov. */ 12235 if (mode == CCmode) 12236 suffix = fp ? "nbe" : "a"; 12237 else 12238 gcc_unreachable (); 12239 break; 12240 case LT: 12241 switch (mode) 12242 { 12243 case E_CCNOmode: 12244 case E_CCGOCmode: 12245 suffix = "s"; 12246 break; 12247 12248 case E_CCmode: 12249 case E_CCGCmode: 12250 case E_CCGZmode: 12251 suffix = "l"; 12252 break; 12253 12254 default: 12255 gcc_unreachable (); 12256 } 12257 break; 12258 case LTU: 12259 if (mode == CCmode || mode == CCGZmode) 12260 suffix = "b"; 12261 else if (mode == CCCmode) 12262 suffix = fp ? "b" : "c"; 12263 else 12264 gcc_unreachable (); 12265 break; 12266 case GE: 12267 switch (mode) 12268 { 12269 case E_CCNOmode: 12270 case E_CCGOCmode: 12271 suffix = "ns"; 12272 break; 12273 12274 case E_CCmode: 12275 case E_CCGCmode: 12276 case E_CCGZmode: 12277 suffix = "ge"; 12278 break; 12279 12280 default: 12281 gcc_unreachable (); 12282 } 12283 break; 12284 case GEU: 12285 if (mode == CCmode || mode == CCGZmode) 12286 suffix = "nb"; 12287 else if (mode == CCCmode) 12288 suffix = fp ? "nb" : "nc"; 12289 else 12290 gcc_unreachable (); 12291 break; 12292 case LE: 12293 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 12294 suffix = "le"; 12295 break; 12296 case LEU: 12297 if (mode == CCmode) 12298 suffix = "be"; 12299 else 12300 gcc_unreachable (); 12301 break; 12302 case UNORDERED: 12303 suffix = fp ? "u" : "p"; 12304 break; 12305 case ORDERED: 12306 suffix = fp ? "nu" : "np"; 12307 break; 12308 default: 12309 gcc_unreachable (); 12310 } 12311 fputs (suffix, file); 12312} 12313 12314/* Print the name of register X to FILE based on its machine mode and number. 12315 If CODE is 'w', pretend the mode is HImode. 12316 If CODE is 'b', pretend the mode is QImode. 12317 If CODE is 'k', pretend the mode is SImode. 12318 If CODE is 'q', pretend the mode is DImode. 12319 If CODE is 'x', pretend the mode is V4SFmode. 12320 If CODE is 't', pretend the mode is V8SFmode. 12321 If CODE is 'g', pretend the mode is V16SFmode. 12322 If CODE is 'h', pretend the reg is the 'high' byte register. 12323 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. 12324 If CODE is 'd', duplicate the operand for AVX instruction. 12325 If CODE is 'V', print naked full integer register name without %. 12326 */ 12327 12328void 12329print_reg (rtx x, int code, FILE *file) 12330{ 12331 const char *reg; 12332 int msize; 12333 unsigned int regno; 12334 bool duplicated; 12335 12336 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') 12337 putc ('%', file); 12338 12339 if (x == pc_rtx) 12340 { 12341 gcc_assert (TARGET_64BIT); 12342 fputs ("rip", file); 12343 return; 12344 } 12345 12346 if (code == 'y' && STACK_TOP_P (x)) 12347 { 12348 fputs ("st(0)", file); 12349 return; 12350 } 12351 12352 if (code == 'w') 12353 msize = 2; 12354 else if (code == 'b') 12355 msize = 1; 12356 else if (code == 'k') 12357 msize = 4; 12358 else if (code == 'q') 12359 msize = 8; 12360 else if (code == 'h') 12361 msize = 0; 12362 else if (code == 'x') 12363 msize = 16; 12364 else if (code == 't') 12365 msize = 32; 12366 else if (code == 'g') 12367 msize = 64; 12368 else 12369 msize = GET_MODE_SIZE (GET_MODE (x)); 12370 12371 regno = REGNO (x); 12372 12373 if (regno == ARG_POINTER_REGNUM 12374 || regno == FRAME_POINTER_REGNUM 12375 || regno == FPSR_REG) 12376 { 12377 output_operand_lossage 12378 ("invalid use of register '%s'", reg_names[regno]); 12379 return; 12380 } 12381 else if (regno == FLAGS_REG) 12382 { 12383 output_operand_lossage ("invalid use of asm flag output"); 12384 return; 12385 } 12386 12387 if (code == 'V') 12388 { 12389 if (GENERAL_REGNO_P (regno)) 12390 msize = GET_MODE_SIZE (word_mode); 12391 else 12392 error ("%<V%> modifier on non-integer register"); 12393 } 12394 12395 duplicated = code == 'd' && TARGET_AVX; 12396 12397 switch (msize) 12398 { 12399 case 16: 12400 case 12: 12401 case 8: 12402 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) 12403 warning (0, "unsupported size for integer register"); 12404 /* FALLTHRU */ 12405 case 4: 12406 if (LEGACY_INT_REGNO_P (regno)) 12407 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file); 12408 /* FALLTHRU */ 12409 case 2: 12410 normal: 12411 reg = hi_reg_name[regno]; 12412 break; 12413 case 1: 12414 if (regno >= ARRAY_SIZE (qi_reg_name)) 12415 goto normal; 12416 if (!ANY_QI_REGNO_P (regno)) 12417 error ("unsupported size for integer register"); 12418 reg = qi_reg_name[regno]; 12419 break; 12420 case 0: 12421 if (regno >= ARRAY_SIZE (qi_high_reg_name)) 12422 goto normal; 12423 reg = qi_high_reg_name[regno]; 12424 break; 12425 case 32: 12426 case 64: 12427 if (SSE_REGNO_P (regno)) 12428 { 12429 gcc_assert (!duplicated); 12430 putc (msize == 32 ? 'y' : 'z', file); 12431 reg = hi_reg_name[regno] + 1; 12432 break; 12433 } 12434 goto normal; 12435 default: 12436 gcc_unreachable (); 12437 } 12438 12439 fputs (reg, file); 12440 12441 /* Irritatingly, AMD extended registers use 12442 different naming convention: "r%d[bwd]" */ 12443 if (REX_INT_REGNO_P (regno)) 12444 { 12445 gcc_assert (TARGET_64BIT); 12446 switch (msize) 12447 { 12448 case 0: 12449 error ("extended registers have no high halves"); 12450 break; 12451 case 1: 12452 putc ('b', file); 12453 break; 12454 case 2: 12455 putc ('w', file); 12456 break; 12457 case 4: 12458 putc ('d', file); 12459 break; 12460 case 8: 12461 /* no suffix */ 12462 break; 12463 default: 12464 error ("unsupported operand size for extended register"); 12465 break; 12466 } 12467 return; 12468 } 12469 12470 if (duplicated) 12471 { 12472 if (ASSEMBLER_DIALECT == ASM_ATT) 12473 fprintf (file, ", %%%s", reg); 12474 else 12475 fprintf (file, ", %s", reg); 12476 } 12477} 12478 12479/* Meaning of CODE: 12480 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 12481 C -- print opcode suffix for set/cmov insn. 12482 c -- like C, but print reversed condition 12483 F,f -- likewise, but for floating-point. 12484 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 12485 otherwise nothing 12486 R -- print embedded rounding and sae. 12487 r -- print only sae. 12488 z -- print the opcode suffix for the size of the current operand. 12489 Z -- likewise, with special suffixes for x87 instructions. 12490 * -- print a star (in certain assembler syntax) 12491 A -- print an absolute memory reference. 12492 E -- print address with DImode register names if TARGET_64BIT. 12493 w -- print the operand as if it's a "word" (HImode) even if it isn't. 12494 s -- print a shift double count, followed by the assemblers argument 12495 delimiter. 12496 b -- print the QImode name of the register for the indicated operand. 12497 %b0 would print %al if operands[0] is reg 0. 12498 w -- likewise, print the HImode name of the register. 12499 k -- likewise, print the SImode name of the register. 12500 q -- likewise, print the DImode name of the register. 12501 x -- likewise, print the V4SFmode name of the register. 12502 t -- likewise, print the V8SFmode name of the register. 12503 g -- likewise, print the V16SFmode name of the register. 12504 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 12505 y -- print "st(0)" instead of "st" as a register. 12506 d -- print duplicated register operand for AVX instruction. 12507 D -- print condition for SSE cmp instruction. 12508 P -- if PIC, print an @PLT suffix. 12509 p -- print raw symbol name. 12510 X -- don't print any sort of PIC '@' suffix for a symbol. 12511 & -- print some in-use local-dynamic symbol name. 12512 H -- print a memory address offset by 8; used for sse high-parts 12513 Y -- print condition for XOP pcom* instruction. 12514 V -- print naked full integer register name without %. 12515 + -- print a branch hint as 'cs' or 'ds' prefix 12516 ; -- print a semicolon (after prefixes due to bug in older gas). 12517 ~ -- print "i" if TARGET_AVX2, "f" otherwise. 12518 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode 12519 M -- print addr32 prefix for TARGET_X32 with VSIB address. 12520 ! -- print NOTRACK prefix for jxx/call/ret instructions if required. 12521 */ 12522 12523void 12524ix86_print_operand (FILE *file, rtx x, int code) 12525{ 12526 if (code) 12527 { 12528 switch (code) 12529 { 12530 case 'A': 12531 switch (ASSEMBLER_DIALECT) 12532 { 12533 case ASM_ATT: 12534 putc ('*', file); 12535 break; 12536 12537 case ASM_INTEL: 12538 /* Intel syntax. For absolute addresses, registers should not 12539 be surrounded by braces. */ 12540 if (!REG_P (x)) 12541 { 12542 putc ('[', file); 12543 ix86_print_operand (file, x, 0); 12544 putc (']', file); 12545 return; 12546 } 12547 break; 12548 12549 default: 12550 gcc_unreachable (); 12551 } 12552 12553 ix86_print_operand (file, x, 0); 12554 return; 12555 12556 case 'E': 12557 /* Wrap address in an UNSPEC to declare special handling. */ 12558 if (TARGET_64BIT) 12559 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); 12560 12561 output_address (VOIDmode, x); 12562 return; 12563 12564 case 'L': 12565 if (ASSEMBLER_DIALECT == ASM_ATT) 12566 putc ('l', file); 12567 return; 12568 12569 case 'W': 12570 if (ASSEMBLER_DIALECT == ASM_ATT) 12571 putc ('w', file); 12572 return; 12573 12574 case 'B': 12575 if (ASSEMBLER_DIALECT == ASM_ATT) 12576 putc ('b', file); 12577 return; 12578 12579 case 'Q': 12580 if (ASSEMBLER_DIALECT == ASM_ATT) 12581 putc ('l', file); 12582 return; 12583 12584 case 'S': 12585 if (ASSEMBLER_DIALECT == ASM_ATT) 12586 putc ('s', file); 12587 return; 12588 12589 case 'T': 12590 if (ASSEMBLER_DIALECT == ASM_ATT) 12591 putc ('t', file); 12592 return; 12593 12594 case 'O': 12595#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 12596 if (ASSEMBLER_DIALECT != ASM_ATT) 12597 return; 12598 12599 switch (GET_MODE_SIZE (GET_MODE (x))) 12600 { 12601 case 2: 12602 putc ('w', file); 12603 break; 12604 12605 case 4: 12606 putc ('l', file); 12607 break; 12608 12609 case 8: 12610 putc ('q', file); 12611 break; 12612 12613 default: 12614 output_operand_lossage ("invalid operand size for operand " 12615 "code 'O'"); 12616 return; 12617 } 12618 12619 putc ('.', file); 12620#endif 12621 return; 12622 12623 case 'z': 12624 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 12625 { 12626 /* Opcodes don't get size suffixes if using Intel opcodes. */ 12627 if (ASSEMBLER_DIALECT == ASM_INTEL) 12628 return; 12629 12630 switch (GET_MODE_SIZE (GET_MODE (x))) 12631 { 12632 case 1: 12633 putc ('b', file); 12634 return; 12635 12636 case 2: 12637 putc ('w', file); 12638 return; 12639 12640 case 4: 12641 putc ('l', file); 12642 return; 12643 12644 case 8: 12645 putc ('q', file); 12646 return; 12647 12648 default: 12649 output_operand_lossage ("invalid operand size for operand " 12650 "code 'z'"); 12651 return; 12652 } 12653 } 12654 12655 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 12656 warning (0, "non-integer operand used with operand code %<z%>"); 12657 /* FALLTHRU */ 12658 12659 case 'Z': 12660 /* 387 opcodes don't get size suffixes if using Intel opcodes. */ 12661 if (ASSEMBLER_DIALECT == ASM_INTEL) 12662 return; 12663 12664 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 12665 { 12666 switch (GET_MODE_SIZE (GET_MODE (x))) 12667 { 12668 case 2: 12669#ifdef HAVE_AS_IX86_FILDS 12670 putc ('s', file); 12671#endif 12672 return; 12673 12674 case 4: 12675 putc ('l', file); 12676 return; 12677 12678 case 8: 12679#ifdef HAVE_AS_IX86_FILDQ 12680 putc ('q', file); 12681#else 12682 fputs ("ll", file); 12683#endif 12684 return; 12685 12686 default: 12687 break; 12688 } 12689 } 12690 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 12691 { 12692 /* 387 opcodes don't get size suffixes 12693 if the operands are registers. */ 12694 if (STACK_REG_P (x)) 12695 return; 12696 12697 switch (GET_MODE_SIZE (GET_MODE (x))) 12698 { 12699 case 4: 12700 putc ('s', file); 12701 return; 12702 12703 case 8: 12704 putc ('l', file); 12705 return; 12706 12707 case 12: 12708 case 16: 12709 putc ('t', file); 12710 return; 12711 12712 default: 12713 break; 12714 } 12715 } 12716 else 12717 { 12718 output_operand_lossage ("invalid operand type used with " 12719 "operand code 'Z'"); 12720 return; 12721 } 12722 12723 output_operand_lossage ("invalid operand size for operand code 'Z'"); 12724 return; 12725 12726 case 'd': 12727 case 'b': 12728 case 'w': 12729 case 'k': 12730 case 'q': 12731 case 'h': 12732 case 't': 12733 case 'g': 12734 case 'y': 12735 case 'x': 12736 case 'X': 12737 case 'P': 12738 case 'p': 12739 case 'V': 12740 break; 12741 12742 case 's': 12743 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) 12744 { 12745 ix86_print_operand (file, x, 0); 12746 fputs (", ", file); 12747 } 12748 return; 12749 12750 case 'Y': 12751 switch (GET_CODE (x)) 12752 { 12753 case NE: 12754 fputs ("neq", file); 12755 break; 12756 case EQ: 12757 fputs ("eq", file); 12758 break; 12759 case GE: 12760 case GEU: 12761 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); 12762 break; 12763 case GT: 12764 case GTU: 12765 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); 12766 break; 12767 case LE: 12768 case LEU: 12769 fputs ("le", file); 12770 break; 12771 case LT: 12772 case LTU: 12773 fputs ("lt", file); 12774 break; 12775 case UNORDERED: 12776 fputs ("unord", file); 12777 break; 12778 case ORDERED: 12779 fputs ("ord", file); 12780 break; 12781 case UNEQ: 12782 fputs ("ueq", file); 12783 break; 12784 case UNGE: 12785 fputs ("nlt", file); 12786 break; 12787 case UNGT: 12788 fputs ("nle", file); 12789 break; 12790 case UNLE: 12791 fputs ("ule", file); 12792 break; 12793 case UNLT: 12794 fputs ("ult", file); 12795 break; 12796 case LTGT: 12797 fputs ("une", file); 12798 break; 12799 default: 12800 output_operand_lossage ("operand is not a condition code, " 12801 "invalid operand code 'Y'"); 12802 return; 12803 } 12804 return; 12805 12806 case 'D': 12807 /* Little bit of braindamage here. The SSE compare instructions 12808 does use completely different names for the comparisons that the 12809 fp conditional moves. */ 12810 switch (GET_CODE (x)) 12811 { 12812 case UNEQ: 12813 if (TARGET_AVX) 12814 { 12815 fputs ("eq_us", file); 12816 break; 12817 } 12818 /* FALLTHRU */ 12819 case EQ: 12820 fputs ("eq", file); 12821 break; 12822 case UNLT: 12823 if (TARGET_AVX) 12824 { 12825 fputs ("nge", file); 12826 break; 12827 } 12828 /* FALLTHRU */ 12829 case LT: 12830 fputs ("lt", file); 12831 break; 12832 case UNLE: 12833 if (TARGET_AVX) 12834 { 12835 fputs ("ngt", file); 12836 break; 12837 } 12838 /* FALLTHRU */ 12839 case LE: 12840 fputs ("le", file); 12841 break; 12842 case UNORDERED: 12843 fputs ("unord", file); 12844 break; 12845 case LTGT: 12846 if (TARGET_AVX) 12847 { 12848 fputs ("neq_oq", file); 12849 break; 12850 } 12851 /* FALLTHRU */ 12852 case NE: 12853 fputs ("neq", file); 12854 break; 12855 case GE: 12856 if (TARGET_AVX) 12857 { 12858 fputs ("ge", file); 12859 break; 12860 } 12861 /* FALLTHRU */ 12862 case UNGE: 12863 fputs ("nlt", file); 12864 break; 12865 case GT: 12866 if (TARGET_AVX) 12867 { 12868 fputs ("gt", file); 12869 break; 12870 } 12871 /* FALLTHRU */ 12872 case UNGT: 12873 fputs ("nle", file); 12874 break; 12875 case ORDERED: 12876 fputs ("ord", file); 12877 break; 12878 default: 12879 output_operand_lossage ("operand is not a condition code, " 12880 "invalid operand code 'D'"); 12881 return; 12882 } 12883 return; 12884 12885 case 'F': 12886 case 'f': 12887#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 12888 if (ASSEMBLER_DIALECT == ASM_ATT) 12889 putc ('.', file); 12890 gcc_fallthrough (); 12891#endif 12892 12893 case 'C': 12894 case 'c': 12895 if (!COMPARISON_P (x)) 12896 { 12897 output_operand_lossage ("operand is not a condition code, " 12898 "invalid operand code '%c'", code); 12899 return; 12900 } 12901 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 12902 code == 'c' || code == 'f', 12903 code == 'F' || code == 'f', 12904 file); 12905 return; 12906 12907 case 'H': 12908 if (!offsettable_memref_p (x)) 12909 { 12910 output_operand_lossage ("operand is not an offsettable memory " 12911 "reference, invalid operand code 'H'"); 12912 return; 12913 } 12914 /* It doesn't actually matter what mode we use here, as we're 12915 only going to use this for printing. */ 12916 x = adjust_address_nv (x, DImode, 8); 12917 /* Output 'qword ptr' for intel assembler dialect. */ 12918 if (ASSEMBLER_DIALECT == ASM_INTEL) 12919 code = 'q'; 12920 break; 12921 12922 case 'K': 12923 if (!CONST_INT_P (x)) 12924 { 12925 output_operand_lossage ("operand is not an integer, invalid " 12926 "operand code 'K'"); 12927 return; 12928 } 12929 12930 if (INTVAL (x) & IX86_HLE_ACQUIRE) 12931#ifdef HAVE_AS_IX86_HLE 12932 fputs ("xacquire ", file); 12933#else 12934 fputs ("\n" ASM_BYTE "0xf2\n\t", file); 12935#endif 12936 else if (INTVAL (x) & IX86_HLE_RELEASE) 12937#ifdef HAVE_AS_IX86_HLE 12938 fputs ("xrelease ", file); 12939#else 12940 fputs ("\n" ASM_BYTE "0xf3\n\t", file); 12941#endif 12942 /* We do not want to print value of the operand. */ 12943 return; 12944 12945 case 'N': 12946 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 12947 fputs ("{z}", file); 12948 return; 12949 12950 case 'r': 12951 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) 12952 { 12953 output_operand_lossage ("operand is not a specific integer, " 12954 "invalid operand code 'r'"); 12955 return; 12956 } 12957 12958 if (ASSEMBLER_DIALECT == ASM_INTEL) 12959 fputs (", ", file); 12960 12961 fputs ("{sae}", file); 12962 12963 if (ASSEMBLER_DIALECT == ASM_ATT) 12964 fputs (", ", file); 12965 12966 return; 12967 12968 case 'R': 12969 if (!CONST_INT_P (x)) 12970 { 12971 output_operand_lossage ("operand is not an integer, invalid " 12972 "operand code 'R'"); 12973 return; 12974 } 12975 12976 if (ASSEMBLER_DIALECT == ASM_INTEL) 12977 fputs (", ", file); 12978 12979 switch (INTVAL (x)) 12980 { 12981 case ROUND_NEAREST_INT | ROUND_SAE: 12982 fputs ("{rn-sae}", file); 12983 break; 12984 case ROUND_NEG_INF | ROUND_SAE: 12985 fputs ("{rd-sae}", file); 12986 break; 12987 case ROUND_POS_INF | ROUND_SAE: 12988 fputs ("{ru-sae}", file); 12989 break; 12990 case ROUND_ZERO | ROUND_SAE: 12991 fputs ("{rz-sae}", file); 12992 break; 12993 default: 12994 output_operand_lossage ("operand is not a specific integer, " 12995 "invalid operand code 'R'"); 12996 } 12997 12998 if (ASSEMBLER_DIALECT == ASM_ATT) 12999 fputs (", ", file); 13000 13001 return; 13002 13003 case '*': 13004 if (ASSEMBLER_DIALECT == ASM_ATT) 13005 putc ('*', file); 13006 return; 13007 13008 case '&': 13009 { 13010 const char *name = get_some_local_dynamic_name (); 13011 if (name == NULL) 13012 output_operand_lossage ("'%%&' used without any " 13013 "local dynamic TLS references"); 13014 else 13015 assemble_name (file, name); 13016 return; 13017 } 13018 13019 case '+': 13020 { 13021 rtx x; 13022 13023 if (!optimize 13024 || optimize_function_for_size_p (cfun) 13025 || !TARGET_BRANCH_PREDICTION_HINTS) 13026 return; 13027 13028 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 13029 if (x) 13030 { 13031 int pred_val = profile_probability::from_reg_br_prob_note 13032 (XINT (x, 0)).to_reg_br_prob_base (); 13033 13034 if (pred_val < REG_BR_PROB_BASE * 45 / 100 13035 || pred_val > REG_BR_PROB_BASE * 55 / 100) 13036 { 13037 bool taken = pred_val > REG_BR_PROB_BASE / 2; 13038 bool cputaken 13039 = final_forward_branch_p (current_output_insn) == 0; 13040 13041 /* Emit hints only in the case default branch prediction 13042 heuristics would fail. */ 13043 if (taken != cputaken) 13044 { 13045 /* We use 3e (DS) prefix for taken branches and 13046 2e (CS) prefix for not taken branches. */ 13047 if (taken) 13048 fputs ("ds ; ", file); 13049 else 13050 fputs ("cs ; ", file); 13051 } 13052 } 13053 } 13054 return; 13055 } 13056 13057 case ';': 13058#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX 13059 putc (';', file); 13060#endif 13061 return; 13062 13063 case '~': 13064 putc (TARGET_AVX2 ? 'i' : 'f', file); 13065 return; 13066 13067 case 'M': 13068 if (TARGET_X32) 13069 { 13070 /* NB: 32-bit indices in VSIB address are sign-extended 13071 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is 13072 sign-extended to 0xfffffffff7fa3010 which is invalid 13073 address. Add addr32 prefix if there is no base 13074 register nor symbol. */ 13075 bool ok; 13076 struct ix86_address parts; 13077 ok = ix86_decompose_address (x, &parts); 13078 gcc_assert (ok && parts.index == NULL_RTX); 13079 if (parts.base == NULL_RTX 13080 && (parts.disp == NULL_RTX 13081 || !symbolic_operand (parts.disp, 13082 GET_MODE (parts.disp)))) 13083 fputs ("addr32 ", file); 13084 } 13085 return; 13086 13087 case '^': 13088 if (TARGET_64BIT && Pmode != word_mode) 13089 fputs ("addr32 ", file); 13090 return; 13091 13092 case '!': 13093 if (ix86_notrack_prefixed_insn_p (current_output_insn)) 13094 fputs ("notrack ", file); 13095 return; 13096 13097 default: 13098 output_operand_lossage ("invalid operand code '%c'", code); 13099 } 13100 } 13101 13102 if (REG_P (x)) 13103 print_reg (x, code, file); 13104 13105 else if (MEM_P (x)) 13106 { 13107 rtx addr = XEXP (x, 0); 13108 13109 /* No `byte ptr' prefix for call instructions ... */ 13110 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') 13111 { 13112 machine_mode mode = GET_MODE (x); 13113 const char *size; 13114 13115 /* Check for explicit size override codes. */ 13116 if (code == 'b') 13117 size = "BYTE"; 13118 else if (code == 'w') 13119 size = "WORD"; 13120 else if (code == 'k') 13121 size = "DWORD"; 13122 else if (code == 'q') 13123 size = "QWORD"; 13124 else if (code == 'x') 13125 size = "XMMWORD"; 13126 else if (code == 't') 13127 size = "YMMWORD"; 13128 else if (code == 'g') 13129 size = "ZMMWORD"; 13130 else if (mode == BLKmode) 13131 /* ... or BLKmode operands, when not overridden. */ 13132 size = NULL; 13133 else 13134 switch (GET_MODE_SIZE (mode)) 13135 { 13136 case 1: size = "BYTE"; break; 13137 case 2: size = "WORD"; break; 13138 case 4: size = "DWORD"; break; 13139 case 8: size = "QWORD"; break; 13140 case 12: size = "TBYTE"; break; 13141 case 16: 13142 if (mode == XFmode) 13143 size = "TBYTE"; 13144 else 13145 size = "XMMWORD"; 13146 break; 13147 case 32: size = "YMMWORD"; break; 13148 case 64: size = "ZMMWORD"; break; 13149 default: 13150 gcc_unreachable (); 13151 } 13152 if (size) 13153 { 13154 fputs (size, file); 13155 fputs (" PTR ", file); 13156 } 13157 } 13158 13159 if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) 13160 output_operand_lossage ("invalid constraints for operand"); 13161 else 13162 ix86_print_operand_address_as 13163 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); 13164 } 13165 13166 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) 13167 { 13168 long l; 13169 13170 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); 13171 13172 if (ASSEMBLER_DIALECT == ASM_ATT) 13173 putc ('$', file); 13174 /* Sign extend 32bit SFmode immediate to 8 bytes. */ 13175 if (code == 'q') 13176 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", 13177 (unsigned long long) (int) l); 13178 else 13179 fprintf (file, "0x%08x", (unsigned int) l); 13180 } 13181 13182 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) 13183 { 13184 long l[2]; 13185 13186 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); 13187 13188 if (ASSEMBLER_DIALECT == ASM_ATT) 13189 putc ('$', file); 13190 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); 13191 } 13192 13193 /* These float cases don't actually occur as immediate operands. */ 13194 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) 13195 { 13196 char dstr[30]; 13197 13198 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 13199 fputs (dstr, file); 13200 } 13201 13202 else 13203 { 13204 /* We have patterns that allow zero sets of memory, for instance. 13205 In 64-bit mode, we should probably support all 8-byte vectors, 13206 since we can in fact encode that into an immediate. */ 13207 if (GET_CODE (x) == CONST_VECTOR) 13208 { 13209 if (x != CONST0_RTX (GET_MODE (x))) 13210 output_operand_lossage ("invalid vector immediate"); 13211 x = const0_rtx; 13212 } 13213 13214 if (code != 'P' && code != 'p') 13215 { 13216 if (CONST_INT_P (x)) 13217 { 13218 if (ASSEMBLER_DIALECT == ASM_ATT) 13219 putc ('$', file); 13220 } 13221 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 13222 || GET_CODE (x) == LABEL_REF) 13223 { 13224 if (ASSEMBLER_DIALECT == ASM_ATT) 13225 putc ('$', file); 13226 else 13227 fputs ("OFFSET FLAT:", file); 13228 } 13229 } 13230 if (CONST_INT_P (x)) 13231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 13232 else if (flag_pic || MACHOPIC_INDIRECT) 13233 output_pic_addr_const (file, x, code); 13234 else 13235 output_addr_const (file, x); 13236 } 13237} 13238 13239static bool 13240ix86_print_operand_punct_valid_p (unsigned char code) 13241{ 13242 return (code == '*' || code == '+' || code == '&' || code == ';' 13243 || code == '~' || code == '^' || code == '!'); 13244} 13245 13246/* Print a memory operand whose address is ADDR. */ 13247 13248static void 13249ix86_print_operand_address_as (FILE *file, rtx addr, 13250 addr_space_t as, bool no_rip) 13251{ 13252 struct ix86_address parts; 13253 rtx base, index, disp; 13254 int scale; 13255 int ok; 13256 bool vsib = false; 13257 int code = 0; 13258 13259 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) 13260 { 13261 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); 13262 gcc_assert (parts.index == NULL_RTX); 13263 parts.index = XVECEXP (addr, 0, 1); 13264 parts.scale = INTVAL (XVECEXP (addr, 0, 2)); 13265 addr = XVECEXP (addr, 0, 0); 13266 vsib = true; 13267 } 13268 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) 13269 { 13270 gcc_assert (TARGET_64BIT); 13271 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); 13272 code = 'q'; 13273 } 13274 else 13275 ok = ix86_decompose_address (addr, &parts); 13276 13277 gcc_assert (ok); 13278 13279 base = parts.base; 13280 index = parts.index; 13281 disp = parts.disp; 13282 scale = parts.scale; 13283 13284 if (ADDR_SPACE_GENERIC_P (as)) 13285 as = parts.seg; 13286 else 13287 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); 13288 13289 if (!ADDR_SPACE_GENERIC_P (as)) 13290 { 13291 if (ASSEMBLER_DIALECT == ASM_ATT) 13292 putc ('%', file); 13293 13294 switch (as) 13295 { 13296 case ADDR_SPACE_SEG_FS: 13297 fputs ("fs:", file); 13298 break; 13299 case ADDR_SPACE_SEG_GS: 13300 fputs ("gs:", file); 13301 break; 13302 default: 13303 gcc_unreachable (); 13304 } 13305 } 13306 13307 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 13308 if (TARGET_64BIT && !base && !index && !no_rip) 13309 { 13310 rtx symbol = disp; 13311 13312 if (GET_CODE (disp) == CONST 13313 && GET_CODE (XEXP (disp, 0)) == PLUS 13314 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 13315 symbol = XEXP (XEXP (disp, 0), 0); 13316 13317 if (GET_CODE (symbol) == LABEL_REF 13318 || (GET_CODE (symbol) == SYMBOL_REF 13319 && SYMBOL_REF_TLS_MODEL (symbol) == 0)) 13320 base = pc_rtx; 13321 } 13322 13323 if (!base && !index) 13324 { 13325 /* Displacement only requires special attention. */ 13326 if (CONST_INT_P (disp)) 13327 { 13328 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) 13329 fputs ("ds:", file); 13330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 13331 } 13332 /* Load the external function address via the GOT slot to avoid PLT. */ 13333 else if (GET_CODE (disp) == CONST 13334 && GET_CODE (XEXP (disp, 0)) == UNSPEC 13335 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL 13336 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) 13337 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) 13338 output_pic_addr_const (file, disp, 0); 13339 else if (flag_pic) 13340 output_pic_addr_const (file, disp, 0); 13341 else 13342 output_addr_const (file, disp); 13343 } 13344 else 13345 { 13346 /* Print SImode register names to force addr32 prefix. */ 13347 if (SImode_address_operand (addr, VOIDmode)) 13348 { 13349 if (flag_checking) 13350 { 13351 gcc_assert (TARGET_64BIT); 13352 switch (GET_CODE (addr)) 13353 { 13354 case SUBREG: 13355 gcc_assert (GET_MODE (addr) == SImode); 13356 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); 13357 break; 13358 case ZERO_EXTEND: 13359 case AND: 13360 gcc_assert (GET_MODE (addr) == DImode); 13361 break; 13362 default: 13363 gcc_unreachable (); 13364 } 13365 } 13366 gcc_assert (!code); 13367 code = 'k'; 13368 } 13369 else if (code == 0 13370 && TARGET_X32 13371 && disp 13372 && CONST_INT_P (disp) 13373 && INTVAL (disp) < -16*1024*1024) 13374 { 13375 /* X32 runs in 64-bit mode, where displacement, DISP, in 13376 address DISP(%r64), is encoded as 32-bit immediate sign- 13377 extended from 32-bit to 64-bit. For -0x40000300(%r64), 13378 address is %r64 + 0xffffffffbffffd00. When %r64 < 13379 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, 13380 which is invalid for x32. The correct address is %r64 13381 - 0x40000300 == 0xf7ffdd64. To properly encode 13382 -0x40000300(%r64) for x32, we zero-extend negative 13383 displacement by forcing addr32 prefix which truncates 13384 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should 13385 zero-extend all negative displacements, including -1(%rsp). 13386 However, for small negative displacements, sign-extension 13387 won't cause overflow. We only zero-extend negative 13388 displacements if they < -16*1024*1024, which is also used 13389 to check legitimate address displacements for PIC. */ 13390 code = 'k'; 13391 } 13392 13393 /* Since the upper 32 bits of RSP are always zero for x32, 13394 we can encode %esp as %rsp to avoid 0x67 prefix if 13395 there is no index register. */ 13396 if (TARGET_X32 && Pmode == SImode 13397 && !index && base && REG_P (base) && REGNO (base) == SP_REG) 13398 code = 'q'; 13399 13400 if (ASSEMBLER_DIALECT == ASM_ATT) 13401 { 13402 if (disp) 13403 { 13404 if (flag_pic) 13405 output_pic_addr_const (file, disp, 0); 13406 else if (GET_CODE (disp) == LABEL_REF) 13407 output_asm_label (disp); 13408 else 13409 output_addr_const (file, disp); 13410 } 13411 13412 putc ('(', file); 13413 if (base) 13414 print_reg (base, code, file); 13415 if (index) 13416 { 13417 putc (',', file); 13418 print_reg (index, vsib ? 0 : code, file); 13419 if (scale != 1 || vsib) 13420 fprintf (file, ",%d", scale); 13421 } 13422 putc (')', file); 13423 } 13424 else 13425 { 13426 rtx offset = NULL_RTX; 13427 13428 if (disp) 13429 { 13430 /* Pull out the offset of a symbol; print any symbol itself. */ 13431 if (GET_CODE (disp) == CONST 13432 && GET_CODE (XEXP (disp, 0)) == PLUS 13433 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 13434 { 13435 offset = XEXP (XEXP (disp, 0), 1); 13436 disp = gen_rtx_CONST (VOIDmode, 13437 XEXP (XEXP (disp, 0), 0)); 13438 } 13439 13440 if (flag_pic) 13441 output_pic_addr_const (file, disp, 0); 13442 else if (GET_CODE (disp) == LABEL_REF) 13443 output_asm_label (disp); 13444 else if (CONST_INT_P (disp)) 13445 offset = disp; 13446 else 13447 output_addr_const (file, disp); 13448 } 13449 13450 putc ('[', file); 13451 if (base) 13452 { 13453 print_reg (base, code, file); 13454 if (offset) 13455 { 13456 if (INTVAL (offset) >= 0) 13457 putc ('+', file); 13458 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 13459 } 13460 } 13461 else if (offset) 13462 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 13463 else 13464 putc ('0', file); 13465 13466 if (index) 13467 { 13468 putc ('+', file); 13469 print_reg (index, vsib ? 0 : code, file); 13470 if (scale != 1 || vsib) 13471 fprintf (file, "*%d", scale); 13472 } 13473 putc (']', file); 13474 } 13475 } 13476} 13477 13478static void 13479ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) 13480{ 13481 if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) 13482 output_operand_lossage ("invalid constraints for operand"); 13483 else 13484 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); 13485} 13486 13487/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ 13488 13489static bool 13490i386_asm_output_addr_const_extra (FILE *file, rtx x) 13491{ 13492 rtx op; 13493 13494 if (GET_CODE (x) != UNSPEC) 13495 return false; 13496 13497 op = XVECEXP (x, 0, 0); 13498 switch (XINT (x, 1)) 13499 { 13500 case UNSPEC_GOTOFF: 13501 output_addr_const (file, op); 13502 fputs ("@gotoff", file); 13503 break; 13504 case UNSPEC_GOTTPOFF: 13505 output_addr_const (file, op); 13506 /* FIXME: This might be @TPOFF in Sun ld. */ 13507 fputs ("@gottpoff", file); 13508 break; 13509 case UNSPEC_TPOFF: 13510 output_addr_const (file, op); 13511 fputs ("@tpoff", file); 13512 break; 13513 case UNSPEC_NTPOFF: 13514 output_addr_const (file, op); 13515 if (TARGET_64BIT) 13516 fputs ("@tpoff", file); 13517 else 13518 fputs ("@ntpoff", file); 13519 break; 13520 case UNSPEC_DTPOFF: 13521 output_addr_const (file, op); 13522 fputs ("@dtpoff", file); 13523 break; 13524 case UNSPEC_GOTNTPOFF: 13525 output_addr_const (file, op); 13526 if (TARGET_64BIT) 13527 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 13528 "@gottpoff(%rip)" : "@gottpoff[rip]", file); 13529 else 13530 fputs ("@gotntpoff", file); 13531 break; 13532 case UNSPEC_INDNTPOFF: 13533 output_addr_const (file, op); 13534 fputs ("@indntpoff", file); 13535 break; 13536#if TARGET_MACHO 13537 case UNSPEC_MACHOPIC_OFFSET: 13538 output_addr_const (file, op); 13539 putc ('-', file); 13540 machopic_output_function_base_name (file); 13541 break; 13542#endif 13543 13544 default: 13545 return false; 13546 } 13547 13548 return true; 13549} 13550 13551 13552/* Output code to perform a 387 binary operation in INSN, one of PLUS, 13553 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 13554 is the expression of the binary operation. The output may either be 13555 emitted here, or returned to the caller, like all output_* functions. 13556 13557 There is no guarantee that the operands are the same mode, as they 13558 might be within FLOAT or FLOAT_EXTEND expressions. */ 13559 13560#ifndef SYSV386_COMPAT 13561/* Set to 1 for compatibility with brain-damaged assemblers. No-one 13562 wants to fix the assemblers because that causes incompatibility 13563 with gcc. No-one wants to fix gcc because that causes 13564 incompatibility with assemblers... You can use the option of 13565 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 13566#define SYSV386_COMPAT 1 13567#endif 13568 13569const char * 13570output_387_binary_op (rtx_insn *insn, rtx *operands) 13571{ 13572 static char buf[40]; 13573 const char *p; 13574 bool is_sse 13575 = (SSE_REG_P (operands[0]) 13576 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); 13577 13578 if (is_sse) 13579 p = "%v"; 13580 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 13581 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 13582 p = "fi"; 13583 else 13584 p = "f"; 13585 13586 strcpy (buf, p); 13587 13588 switch (GET_CODE (operands[3])) 13589 { 13590 case PLUS: 13591 p = "add"; break; 13592 case MINUS: 13593 p = "sub"; break; 13594 case MULT: 13595 p = "mul"; break; 13596 case DIV: 13597 p = "div"; break; 13598 default: 13599 gcc_unreachable (); 13600 } 13601 13602 strcat (buf, p); 13603 13604 if (is_sse) 13605 { 13606 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; 13607 strcat (buf, p); 13608 13609 if (TARGET_AVX) 13610 p = "\t{%2, %1, %0|%0, %1, %2}"; 13611 else 13612 p = "\t{%2, %0|%0, %2}"; 13613 13614 strcat (buf, p); 13615 return buf; 13616 } 13617 13618 /* Even if we do not want to check the inputs, this documents input 13619 constraints. Which helps in understanding the following code. */ 13620 if (flag_checking) 13621 { 13622 if (STACK_REG_P (operands[0]) 13623 && ((REG_P (operands[1]) 13624 && REGNO (operands[0]) == REGNO (operands[1]) 13625 && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) 13626 || (REG_P (operands[2]) 13627 && REGNO (operands[0]) == REGNO (operands[2]) 13628 && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) 13629 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 13630 ; /* ok */ 13631 else 13632 gcc_unreachable (); 13633 } 13634 13635 switch (GET_CODE (operands[3])) 13636 { 13637 case MULT: 13638 case PLUS: 13639 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 13640 std::swap (operands[1], operands[2]); 13641 13642 /* know operands[0] == operands[1]. */ 13643 13644 if (MEM_P (operands[2])) 13645 { 13646 p = "%Z2\t%2"; 13647 break; 13648 } 13649 13650 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 13651 { 13652 if (STACK_TOP_P (operands[0])) 13653 /* How is it that we are storing to a dead operand[2]? 13654 Well, presumably operands[1] is dead too. We can't 13655 store the result to st(0) as st(0) gets popped on this 13656 instruction. Instead store to operands[2] (which I 13657 think has to be st(1)). st(1) will be popped later. 13658 gcc <= 2.8.1 didn't have this check and generated 13659 assembly code that the Unixware assembler rejected. */ 13660 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 13661 else 13662 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 13663 break; 13664 } 13665 13666 if (STACK_TOP_P (operands[0])) 13667 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 13668 else 13669 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 13670 break; 13671 13672 case MINUS: 13673 case DIV: 13674 if (MEM_P (operands[1])) 13675 { 13676 p = "r%Z1\t%1"; 13677 break; 13678 } 13679 13680 if (MEM_P (operands[2])) 13681 { 13682 p = "%Z2\t%2"; 13683 break; 13684 } 13685 13686 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 13687 { 13688#if SYSV386_COMPAT 13689 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 13690 derived assemblers, confusingly reverse the direction of 13691 the operation for fsub{r} and fdiv{r} when the 13692 destination register is not st(0). The Intel assembler 13693 doesn't have this brain damage. Read !SYSV386_COMPAT to 13694 figure out what the hardware really does. */ 13695 if (STACK_TOP_P (operands[0])) 13696 p = "{p\t%0, %2|rp\t%2, %0}"; 13697 else 13698 p = "{rp\t%2, %0|p\t%0, %2}"; 13699#else 13700 if (STACK_TOP_P (operands[0])) 13701 /* As above for fmul/fadd, we can't store to st(0). */ 13702 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 13703 else 13704 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 13705#endif 13706 break; 13707 } 13708 13709 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 13710 { 13711#if SYSV386_COMPAT 13712 if (STACK_TOP_P (operands[0])) 13713 p = "{rp\t%0, %1|p\t%1, %0}"; 13714 else 13715 p = "{p\t%1, %0|rp\t%0, %1}"; 13716#else 13717 if (STACK_TOP_P (operands[0])) 13718 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 13719 else 13720 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 13721#endif 13722 break; 13723 } 13724 13725 if (STACK_TOP_P (operands[0])) 13726 { 13727 if (STACK_TOP_P (operands[1])) 13728 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 13729 else 13730 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 13731 break; 13732 } 13733 else if (STACK_TOP_P (operands[1])) 13734 { 13735#if SYSV386_COMPAT 13736 p = "{\t%1, %0|r\t%0, %1}"; 13737#else 13738 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 13739#endif 13740 } 13741 else 13742 { 13743#if SYSV386_COMPAT 13744 p = "{r\t%2, %0|\t%0, %2}"; 13745#else 13746 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 13747#endif 13748 } 13749 break; 13750 13751 default: 13752 gcc_unreachable (); 13753 } 13754 13755 strcat (buf, p); 13756 return buf; 13757} 13758 13759/* Return needed mode for entity in optimize_mode_switching pass. */ 13760 13761static int 13762ix86_dirflag_mode_needed (rtx_insn *insn) 13763{ 13764 if (CALL_P (insn)) 13765 { 13766 if (cfun->machine->func_type == TYPE_NORMAL) 13767 return X86_DIRFLAG_ANY; 13768 else 13769 /* No need to emit CLD in interrupt handler for TARGET_CLD. */ 13770 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; 13771 } 13772 13773 if (recog_memoized (insn) < 0) 13774 return X86_DIRFLAG_ANY; 13775 13776 if (get_attr_type (insn) == TYPE_STR) 13777 { 13778 /* Emit cld instruction if stringops are used in the function. */ 13779 if (cfun->machine->func_type == TYPE_NORMAL) 13780 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; 13781 else 13782 return X86_DIRFLAG_RESET; 13783 } 13784 13785 return X86_DIRFLAG_ANY; 13786} 13787 13788/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ 13789 13790static bool 13791ix86_check_avx_upper_register (const_rtx exp) 13792{ 13793 return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; 13794} 13795 13796/* Return needed mode for entity in optimize_mode_switching pass. */ 13797 13798static int 13799ix86_avx_u128_mode_needed (rtx_insn *insn) 13800{ 13801 if (CALL_P (insn)) 13802 { 13803 rtx link; 13804 13805 /* Needed mode is set to AVX_U128_CLEAN if there are 13806 no 256bit or 512bit modes used in function arguments. */ 13807 for (link = CALL_INSN_FUNCTION_USAGE (insn); 13808 link; 13809 link = XEXP (link, 1)) 13810 { 13811 if (GET_CODE (XEXP (link, 0)) == USE) 13812 { 13813 rtx arg = XEXP (XEXP (link, 0), 0); 13814 13815 if (ix86_check_avx_upper_register (arg)) 13816 return AVX_U128_DIRTY; 13817 } 13818 } 13819 13820 /* If the function is known to preserve some SSE registers, 13821 RA and previous passes can legitimately rely on that for 13822 modes wider than 256 bits. It's only safe to issue a 13823 vzeroupper if all SSE registers are clobbered. */ 13824 const function_abi &abi = insn_callee_abi (insn); 13825 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS], 13826 abi.mode_clobbers (V4DImode))) 13827 return AVX_U128_ANY; 13828 13829 return AVX_U128_CLEAN; 13830 } 13831 13832 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. 13833 Hardware changes state only when a 256bit register is written to, 13834 but we need to prevent the compiler from moving optimal insertion 13835 point above eventual read from 256bit or 512 bit register. */ 13836 subrtx_iterator::array_type array; 13837 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 13838 if (ix86_check_avx_upper_register (*iter)) 13839 return AVX_U128_DIRTY; 13840 13841 return AVX_U128_ANY; 13842} 13843 13844/* Return mode that i387 must be switched into 13845 prior to the execution of insn. */ 13846 13847static int 13848ix86_i387_mode_needed (int entity, rtx_insn *insn) 13849{ 13850 enum attr_i387_cw mode; 13851 13852 /* The mode UNINITIALIZED is used to store control word after a 13853 function call or ASM pattern. The mode ANY specify that function 13854 has no requirements on the control word and make no changes in the 13855 bits we are interested in. */ 13856 13857 if (CALL_P (insn) 13858 || (NONJUMP_INSN_P (insn) 13859 && (asm_noperands (PATTERN (insn)) >= 0 13860 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 13861 return I387_CW_UNINITIALIZED; 13862 13863 if (recog_memoized (insn) < 0) 13864 return I387_CW_ANY; 13865 13866 mode = get_attr_i387_cw (insn); 13867 13868 switch (entity) 13869 { 13870 case I387_ROUNDEVEN: 13871 if (mode == I387_CW_ROUNDEVEN) 13872 return mode; 13873 break; 13874 13875 case I387_TRUNC: 13876 if (mode == I387_CW_TRUNC) 13877 return mode; 13878 break; 13879 13880 case I387_FLOOR: 13881 if (mode == I387_CW_FLOOR) 13882 return mode; 13883 break; 13884 13885 case I387_CEIL: 13886 if (mode == I387_CW_CEIL) 13887 return mode; 13888 break; 13889 13890 default: 13891 gcc_unreachable (); 13892 } 13893 13894 return I387_CW_ANY; 13895} 13896 13897/* Return mode that entity must be switched into 13898 prior to the execution of insn. */ 13899 13900static int 13901ix86_mode_needed (int entity, rtx_insn *insn) 13902{ 13903 switch (entity) 13904 { 13905 case X86_DIRFLAG: 13906 return ix86_dirflag_mode_needed (insn); 13907 case AVX_U128: 13908 return ix86_avx_u128_mode_needed (insn); 13909 case I387_ROUNDEVEN: 13910 case I387_TRUNC: 13911 case I387_FLOOR: 13912 case I387_CEIL: 13913 return ix86_i387_mode_needed (entity, insn); 13914 default: 13915 gcc_unreachable (); 13916 } 13917 return 0; 13918} 13919 13920/* Check if a 256bit or 512bit AVX register is referenced in stores. */ 13921 13922static void 13923ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) 13924 { 13925 if (ix86_check_avx_upper_register (dest)) 13926 { 13927 bool *used = (bool *) data; 13928 *used = true; 13929 } 13930 } 13931 13932/* Calculate mode of upper 128bit AVX registers after the insn. */ 13933 13934static int 13935ix86_avx_u128_mode_after (int mode, rtx_insn *insn) 13936{ 13937 rtx pat = PATTERN (insn); 13938 13939 if (vzeroupper_pattern (pat, VOIDmode) 13940 || vzeroall_pattern (pat, VOIDmode)) 13941 return AVX_U128_CLEAN; 13942 13943 /* We know that state is clean after CALL insn if there are no 13944 256bit or 512bit registers used in the function return register. */ 13945 if (CALL_P (insn)) 13946 { 13947 bool avx_upper_reg_found = false; 13948 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); 13949 13950 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; 13951 } 13952 13953 /* Otherwise, return current mode. Remember that if insn 13954 references AVX 256bit or 512bit registers, the mode was already 13955 changed to DIRTY from MODE_NEEDED. */ 13956 return mode; 13957} 13958 13959/* Return the mode that an insn results in. */ 13960 13961static int 13962ix86_mode_after (int entity, int mode, rtx_insn *insn) 13963{ 13964 switch (entity) 13965 { 13966 case X86_DIRFLAG: 13967 return mode; 13968 case AVX_U128: 13969 return ix86_avx_u128_mode_after (mode, insn); 13970 case I387_ROUNDEVEN: 13971 case I387_TRUNC: 13972 case I387_FLOOR: 13973 case I387_CEIL: 13974 return mode; 13975 default: 13976 gcc_unreachable (); 13977 } 13978} 13979 13980static int 13981ix86_dirflag_mode_entry (void) 13982{ 13983 /* For TARGET_CLD or in the interrupt handler we can't assume 13984 direction flag state at function entry. */ 13985 if (TARGET_CLD 13986 || cfun->machine->func_type != TYPE_NORMAL) 13987 return X86_DIRFLAG_ANY; 13988 13989 return X86_DIRFLAG_RESET; 13990} 13991 13992static int 13993ix86_avx_u128_mode_entry (void) 13994{ 13995 tree arg; 13996 13997 /* Entry mode is set to AVX_U128_DIRTY if there are 13998 256bit or 512bit modes used in function arguments. */ 13999 for (arg = DECL_ARGUMENTS (current_function_decl); arg; 14000 arg = TREE_CHAIN (arg)) 14001 { 14002 rtx incoming = DECL_INCOMING_RTL (arg); 14003 14004 if (incoming && ix86_check_avx_upper_register (incoming)) 14005 return AVX_U128_DIRTY; 14006 } 14007 14008 return AVX_U128_CLEAN; 14009} 14010 14011/* Return a mode that ENTITY is assumed to be 14012 switched to at function entry. */ 14013 14014static int 14015ix86_mode_entry (int entity) 14016{ 14017 switch (entity) 14018 { 14019 case X86_DIRFLAG: 14020 return ix86_dirflag_mode_entry (); 14021 case AVX_U128: 14022 return ix86_avx_u128_mode_entry (); 14023 case I387_ROUNDEVEN: 14024 case I387_TRUNC: 14025 case I387_FLOOR: 14026 case I387_CEIL: 14027 return I387_CW_ANY; 14028 default: 14029 gcc_unreachable (); 14030 } 14031} 14032 14033static int 14034ix86_avx_u128_mode_exit (void) 14035{ 14036 rtx reg = crtl->return_rtx; 14037 14038 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit 14039 or 512 bit modes used in the function return register. */ 14040 if (reg && ix86_check_avx_upper_register (reg)) 14041 return AVX_U128_DIRTY; 14042 14043 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit 14044 modes used in function arguments, otherwise return AVX_U128_CLEAN. 14045 */ 14046 return ix86_avx_u128_mode_entry (); 14047} 14048 14049/* Return a mode that ENTITY is assumed to be 14050 switched to at function exit. */ 14051 14052static int 14053ix86_mode_exit (int entity) 14054{ 14055 switch (entity) 14056 { 14057 case X86_DIRFLAG: 14058 return X86_DIRFLAG_ANY; 14059 case AVX_U128: 14060 return ix86_avx_u128_mode_exit (); 14061 case I387_ROUNDEVEN: 14062 case I387_TRUNC: 14063 case I387_FLOOR: 14064 case I387_CEIL: 14065 return I387_CW_ANY; 14066 default: 14067 gcc_unreachable (); 14068 } 14069} 14070 14071static int 14072ix86_mode_priority (int, int n) 14073{ 14074 return n; 14075} 14076 14077/* Output code to initialize control word copies used by trunc?f?i and 14078 rounding patterns. CURRENT_MODE is set to current control word, 14079 while NEW_MODE is set to new control word. */ 14080 14081static void 14082emit_i387_cw_initialization (int mode) 14083{ 14084 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 14085 rtx new_mode; 14086 14087 enum ix86_stack_slot slot; 14088 14089 rtx reg = gen_reg_rtx (HImode); 14090 14091 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 14092 emit_move_insn (reg, copy_rtx (stored_mode)); 14093 14094 switch (mode) 14095 { 14096 case I387_CW_ROUNDEVEN: 14097 /* round to nearest */ 14098 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 14099 slot = SLOT_CW_ROUNDEVEN; 14100 break; 14101 14102 case I387_CW_TRUNC: 14103 /* round toward zero (truncate) */ 14104 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 14105 slot = SLOT_CW_TRUNC; 14106 break; 14107 14108 case I387_CW_FLOOR: 14109 /* round down toward -oo */ 14110 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 14111 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 14112 slot = SLOT_CW_FLOOR; 14113 break; 14114 14115 case I387_CW_CEIL: 14116 /* round up toward +oo */ 14117 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 14118 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 14119 slot = SLOT_CW_CEIL; 14120 break; 14121 14122 default: 14123 gcc_unreachable (); 14124 } 14125 14126 gcc_assert (slot < MAX_386_STACK_LOCALS); 14127 14128 new_mode = assign_386_stack_local (HImode, slot); 14129 emit_move_insn (new_mode, reg); 14130} 14131 14132/* Generate one or more insns to set ENTITY to MODE. */ 14133 14134static void 14135ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, 14136 HARD_REG_SET regs_live ATTRIBUTE_UNUSED) 14137{ 14138 switch (entity) 14139 { 14140 case X86_DIRFLAG: 14141 if (mode == X86_DIRFLAG_RESET) 14142 emit_insn (gen_cld ()); 14143 break; 14144 case AVX_U128: 14145 if (mode == AVX_U128_CLEAN) 14146 emit_insn (gen_avx_vzeroupper ()); 14147 break; 14148 case I387_ROUNDEVEN: 14149 case I387_TRUNC: 14150 case I387_FLOOR: 14151 case I387_CEIL: 14152 if (mode != I387_CW_ANY 14153 && mode != I387_CW_UNINITIALIZED) 14154 emit_i387_cw_initialization (mode); 14155 break; 14156 default: 14157 gcc_unreachable (); 14158 } 14159} 14160 14161/* Output code for INSN to convert a float to a signed int. OPERANDS 14162 are the insn operands. The output may be [HSD]Imode and the input 14163 operand may be [SDX]Fmode. */ 14164 14165const char * 14166output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) 14167{ 14168 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); 14169 bool dimode_p = GET_MODE (operands[0]) == DImode; 14170 int round_mode = get_attr_i387_cw (insn); 14171 14172 static char buf[40]; 14173 const char *p; 14174 14175 /* Jump through a hoop or two for DImode, since the hardware has no 14176 non-popping instruction. We used to do this a different way, but 14177 that was somewhat fragile and broke with post-reload splitters. */ 14178 if ((dimode_p || fisttp) && !stack_top_dies) 14179 output_asm_insn ("fld\t%y1", operands); 14180 14181 gcc_assert (STACK_TOP_P (operands[1])); 14182 gcc_assert (MEM_P (operands[0])); 14183 gcc_assert (GET_MODE (operands[1]) != TFmode); 14184 14185 if (fisttp) 14186 return "fisttp%Z0\t%0"; 14187 14188 strcpy (buf, "fist"); 14189 14190 if (round_mode != I387_CW_ANY) 14191 output_asm_insn ("fldcw\t%3", operands); 14192 14193 p = "p%Z0\t%0"; 14194 strcat (buf, p + !(stack_top_dies || dimode_p)); 14195 14196 output_asm_insn (buf, operands); 14197 14198 if (round_mode != I387_CW_ANY) 14199 output_asm_insn ("fldcw\t%2", operands); 14200 14201 return ""; 14202} 14203 14204/* Output code for x87 ffreep insn. The OPNO argument, which may only 14205 have the values zero or one, indicates the ffreep insn's operand 14206 from the OPERANDS array. */ 14207 14208static const char * 14209output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 14210{ 14211 if (TARGET_USE_FFREEP) 14212#ifdef HAVE_AS_IX86_FFREEP 14213 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 14214#else 14215 { 14216 static char retval[32]; 14217 int regno = REGNO (operands[opno]); 14218 14219 gcc_assert (STACK_REGNO_P (regno)); 14220 14221 regno -= FIRST_STACK_REG; 14222 14223 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); 14224 return retval; 14225 } 14226#endif 14227 14228 return opno ? "fstp\t%y1" : "fstp\t%y0"; 14229} 14230 14231 14232/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 14233 should be used. UNORDERED_P is true when fucom should be used. */ 14234 14235const char * 14236output_fp_compare (rtx_insn *insn, rtx *operands, 14237 bool eflags_p, bool unordered_p) 14238{ 14239 rtx *xops = eflags_p ? &operands[0] : &operands[1]; 14240 bool stack_top_dies; 14241 14242 static char buf[40]; 14243 const char *p; 14244 14245 gcc_assert (STACK_TOP_P (xops[0])); 14246 14247 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); 14248 14249 if (eflags_p) 14250 { 14251 p = unordered_p ? "fucomi" : "fcomi"; 14252 strcpy (buf, p); 14253 14254 p = "p\t{%y1, %0|%0, %y1}"; 14255 strcat (buf, p + !stack_top_dies); 14256 14257 return buf; 14258 } 14259 14260 if (STACK_REG_P (xops[1]) 14261 && stack_top_dies 14262 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) 14263 { 14264 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); 14265 14266 /* If both the top of the 387 stack die, and the other operand 14267 is also a stack register that dies, then this must be a 14268 `fcompp' float compare. */ 14269 p = unordered_p ? "fucompp" : "fcompp"; 14270 strcpy (buf, p); 14271 } 14272 else if (const0_operand (xops[1], VOIDmode)) 14273 { 14274 gcc_assert (!unordered_p); 14275 strcpy (buf, "ftst"); 14276 } 14277 else 14278 { 14279 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) 14280 { 14281 gcc_assert (!unordered_p); 14282 p = "ficom"; 14283 } 14284 else 14285 p = unordered_p ? "fucom" : "fcom"; 14286 14287 strcpy (buf, p); 14288 14289 p = "p%Z2\t%y2"; 14290 strcat (buf, p + !stack_top_dies); 14291 } 14292 14293 output_asm_insn (buf, operands); 14294 return "fnstsw\t%0"; 14295} 14296 14297void 14298ix86_output_addr_vec_elt (FILE *file, int value) 14299{ 14300 const char *directive = ASM_LONG; 14301 14302#ifdef ASM_QUAD 14303 if (TARGET_LP64) 14304 directive = ASM_QUAD; 14305#else 14306 gcc_assert (!TARGET_64BIT); 14307#endif 14308 14309 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 14310} 14311 14312void 14313ix86_output_addr_diff_elt (FILE *file, int value, int rel) 14314{ 14315 const char *directive = ASM_LONG; 14316 14317#ifdef ASM_QUAD 14318 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) 14319 directive = ASM_QUAD; 14320#else 14321 gcc_assert (!TARGET_64BIT); 14322#endif 14323 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ 14324 if (TARGET_64BIT || TARGET_VXWORKS_RTP) 14325 fprintf (file, "%s%s%d-%s%d\n", 14326 directive, LPREFIX, value, LPREFIX, rel); 14327#if TARGET_MACHO 14328 else if (TARGET_MACHO) 14329 { 14330 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); 14331 machopic_output_function_base_name (file); 14332 putc ('\n', file); 14333 } 14334#endif 14335 else if (HAVE_AS_GOTOFF_IN_DATA) 14336 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); 14337 else 14338 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", 14339 GOT_SYMBOL_NAME, LPREFIX, value); 14340} 14341 14342#define LEA_MAX_STALL (3) 14343#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) 14344 14345/* Increase given DISTANCE in half-cycles according to 14346 dependencies between PREV and NEXT instructions. 14347 Add 1 half-cycle if there is no dependency and 14348 go to next cycle if there is some dependecy. */ 14349 14350static unsigned int 14351increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) 14352{ 14353 df_ref def, use; 14354 14355 if (!prev || !next) 14356 return distance + (distance & 1) + 2; 14357 14358 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) 14359 return distance + 1; 14360 14361 FOR_EACH_INSN_USE (use, next) 14362 FOR_EACH_INSN_DEF (def, prev) 14363 if (!DF_REF_IS_ARTIFICIAL (def) 14364 && DF_REF_REGNO (use) == DF_REF_REGNO (def)) 14365 return distance + (distance & 1) + 2; 14366 14367 return distance + 1; 14368} 14369 14370/* Function checks if instruction INSN defines register number 14371 REGNO1 or REGNO2. */ 14372 14373bool 14374insn_defines_reg (unsigned int regno1, unsigned int regno2, 14375 rtx_insn *insn) 14376{ 14377 df_ref def; 14378 14379 FOR_EACH_INSN_DEF (def, insn) 14380 if (DF_REF_REG_DEF_P (def) 14381 && !DF_REF_IS_ARTIFICIAL (def) 14382 && (regno1 == DF_REF_REGNO (def) 14383 || regno2 == DF_REF_REGNO (def))) 14384 return true; 14385 14386 return false; 14387} 14388 14389/* Function checks if instruction INSN uses register number 14390 REGNO as a part of address expression. */ 14391 14392static bool 14393insn_uses_reg_mem (unsigned int regno, rtx insn) 14394{ 14395 df_ref use; 14396 14397 FOR_EACH_INSN_USE (use, insn) 14398 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) 14399 return true; 14400 14401 return false; 14402} 14403 14404/* Search backward for non-agu definition of register number REGNO1 14405 or register number REGNO2 in basic block starting from instruction 14406 START up to head of basic block or instruction INSN. 14407 14408 Function puts true value into *FOUND var if definition was found 14409 and false otherwise. 14410 14411 Distance in half-cycles between START and found instruction or head 14412 of BB is added to DISTANCE and returned. */ 14413 14414static int 14415distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, 14416 rtx_insn *insn, int distance, 14417 rtx_insn *start, bool *found) 14418{ 14419 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; 14420 rtx_insn *prev = start; 14421 rtx_insn *next = NULL; 14422 14423 *found = false; 14424 14425 while (prev 14426 && prev != insn 14427 && distance < LEA_SEARCH_THRESHOLD) 14428 { 14429 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) 14430 { 14431 distance = increase_distance (prev, next, distance); 14432 if (insn_defines_reg (regno1, regno2, prev)) 14433 { 14434 if (recog_memoized (prev) < 0 14435 || get_attr_type (prev) != TYPE_LEA) 14436 { 14437 *found = true; 14438 return distance; 14439 } 14440 } 14441 14442 next = prev; 14443 } 14444 if (prev == BB_HEAD (bb)) 14445 break; 14446 14447 prev = PREV_INSN (prev); 14448 } 14449 14450 return distance; 14451} 14452 14453/* Search backward for non-agu definition of register number REGNO1 14454 or register number REGNO2 in INSN's basic block until 14455 1. Pass LEA_SEARCH_THRESHOLD instructions, or 14456 2. Reach neighbor BBs boundary, or 14457 3. Reach agu definition. 14458 Returns the distance between the non-agu definition point and INSN. 14459 If no definition point, returns -1. */ 14460 14461static int 14462distance_non_agu_define (unsigned int regno1, unsigned int regno2, 14463 rtx_insn *insn) 14464{ 14465 basic_block bb = BLOCK_FOR_INSN (insn); 14466 int distance = 0; 14467 bool found = false; 14468 14469 if (insn != BB_HEAD (bb)) 14470 distance = distance_non_agu_define_in_bb (regno1, regno2, insn, 14471 distance, PREV_INSN (insn), 14472 &found); 14473 14474 if (!found && distance < LEA_SEARCH_THRESHOLD) 14475 { 14476 edge e; 14477 edge_iterator ei; 14478 bool simple_loop = false; 14479 14480 FOR_EACH_EDGE (e, ei, bb->preds) 14481 if (e->src == bb) 14482 { 14483 simple_loop = true; 14484 break; 14485 } 14486 14487 if (simple_loop) 14488 distance = distance_non_agu_define_in_bb (regno1, regno2, 14489 insn, distance, 14490 BB_END (bb), &found); 14491 else 14492 { 14493 int shortest_dist = -1; 14494 bool found_in_bb = false; 14495 14496 FOR_EACH_EDGE (e, ei, bb->preds) 14497 { 14498 int bb_dist 14499 = distance_non_agu_define_in_bb (regno1, regno2, 14500 insn, distance, 14501 BB_END (e->src), 14502 &found_in_bb); 14503 if (found_in_bb) 14504 { 14505 if (shortest_dist < 0) 14506 shortest_dist = bb_dist; 14507 else if (bb_dist > 0) 14508 shortest_dist = MIN (bb_dist, shortest_dist); 14509 14510 found = true; 14511 } 14512 } 14513 14514 distance = shortest_dist; 14515 } 14516 } 14517 14518 /* get_attr_type may modify recog data. We want to make sure 14519 that recog data is valid for instruction INSN, on which 14520 distance_non_agu_define is called. INSN is unchanged here. */ 14521 extract_insn_cached (insn); 14522 14523 if (!found) 14524 return -1; 14525 14526 return distance >> 1; 14527} 14528 14529/* Return the distance in half-cycles between INSN and the next 14530 insn that uses register number REGNO in memory address added 14531 to DISTANCE. Return -1 if REGNO0 is set. 14532 14533 Put true value into *FOUND if register usage was found and 14534 false otherwise. 14535 Put true value into *REDEFINED if register redefinition was 14536 found and false otherwise. */ 14537 14538static int 14539distance_agu_use_in_bb (unsigned int regno, 14540 rtx_insn *insn, int distance, rtx_insn *start, 14541 bool *found, bool *redefined) 14542{ 14543 basic_block bb = NULL; 14544 rtx_insn *next = start; 14545 rtx_insn *prev = NULL; 14546 14547 *found = false; 14548 *redefined = false; 14549 14550 if (start != NULL_RTX) 14551 { 14552 bb = BLOCK_FOR_INSN (start); 14553 if (start != BB_HEAD (bb)) 14554 /* If insn and start belong to the same bb, set prev to insn, 14555 so the call to increase_distance will increase the distance 14556 between insns by 1. */ 14557 prev = insn; 14558 } 14559 14560 while (next 14561 && next != insn 14562 && distance < LEA_SEARCH_THRESHOLD) 14563 { 14564 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) 14565 { 14566 distance = increase_distance(prev, next, distance); 14567 if (insn_uses_reg_mem (regno, next)) 14568 { 14569 /* Return DISTANCE if OP0 is used in memory 14570 address in NEXT. */ 14571 *found = true; 14572 return distance; 14573 } 14574 14575 if (insn_defines_reg (regno, INVALID_REGNUM, next)) 14576 { 14577 /* Return -1 if OP0 is set in NEXT. */ 14578 *redefined = true; 14579 return -1; 14580 } 14581 14582 prev = next; 14583 } 14584 14585 if (next == BB_END (bb)) 14586 break; 14587 14588 next = NEXT_INSN (next); 14589 } 14590 14591 return distance; 14592} 14593 14594/* Return the distance between INSN and the next insn that uses 14595 register number REGNO0 in memory address. Return -1 if no such 14596 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ 14597 14598static int 14599distance_agu_use (unsigned int regno0, rtx_insn *insn) 14600{ 14601 basic_block bb = BLOCK_FOR_INSN (insn); 14602 int distance = 0; 14603 bool found = false; 14604 bool redefined = false; 14605 14606 if (insn != BB_END (bb)) 14607 distance = distance_agu_use_in_bb (regno0, insn, distance, 14608 NEXT_INSN (insn), 14609 &found, &redefined); 14610 14611 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) 14612 { 14613 edge e; 14614 edge_iterator ei; 14615 bool simple_loop = false; 14616 14617 FOR_EACH_EDGE (e, ei, bb->succs) 14618 if (e->dest == bb) 14619 { 14620 simple_loop = true; 14621 break; 14622 } 14623 14624 if (simple_loop) 14625 distance = distance_agu_use_in_bb (regno0, insn, 14626 distance, BB_HEAD (bb), 14627 &found, &redefined); 14628 else 14629 { 14630 int shortest_dist = -1; 14631 bool found_in_bb = false; 14632 bool redefined_in_bb = false; 14633 14634 FOR_EACH_EDGE (e, ei, bb->succs) 14635 { 14636 int bb_dist 14637 = distance_agu_use_in_bb (regno0, insn, 14638 distance, BB_HEAD (e->dest), 14639 &found_in_bb, &redefined_in_bb); 14640 if (found_in_bb) 14641 { 14642 if (shortest_dist < 0) 14643 shortest_dist = bb_dist; 14644 else if (bb_dist > 0) 14645 shortest_dist = MIN (bb_dist, shortest_dist); 14646 14647 found = true; 14648 } 14649 } 14650 14651 distance = shortest_dist; 14652 } 14653 } 14654 14655 if (!found || redefined) 14656 return -1; 14657 14658 return distance >> 1; 14659} 14660 14661/* Define this macro to tune LEA priority vs ADD, it take effect when 14662 there is a dilemma of choosing LEA or ADD 14663 Negative value: ADD is more preferred than LEA 14664 Zero: Neutral 14665 Positive value: LEA is more preferred than ADD. */ 14666#define IX86_LEA_PRIORITY 0 14667 14668/* Return true if usage of lea INSN has performance advantage 14669 over a sequence of instructions. Instructions sequence has 14670 SPLIT_COST cycles higher latency than lea latency. */ 14671 14672static bool 14673ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, 14674 unsigned int regno2, int split_cost, bool has_scale) 14675{ 14676 int dist_define, dist_use; 14677 14678 /* For Atom processors newer than Bonnell, if using a 2-source or 14679 3-source LEA for non-destructive destination purposes, or due to 14680 wanting ability to use SCALE, the use of LEA is justified. */ 14681 if (!TARGET_BONNELL) 14682 { 14683 if (has_scale) 14684 return true; 14685 if (split_cost < 1) 14686 return false; 14687 if (regno0 == regno1 || regno0 == regno2) 14688 return false; 14689 return true; 14690 } 14691 14692 rtx_insn *rinsn = recog_data.insn; 14693 14694 dist_define = distance_non_agu_define (regno1, regno2, insn); 14695 dist_use = distance_agu_use (regno0, insn); 14696 14697 /* distance_non_agu_define can call extract_insn_cached. If this function 14698 is called from define_split conditions, that can break insn splitting, 14699 because split_insns works by clearing recog_data.insn and then modifying 14700 recog_data.operand array and match the various split conditions. */ 14701 if (recog_data.insn != rinsn) 14702 recog_data.insn = NULL; 14703 14704 if (dist_define < 0 || dist_define >= LEA_MAX_STALL) 14705 { 14706 /* If there is no non AGU operand definition, no AGU 14707 operand usage and split cost is 0 then both lea 14708 and non lea variants have same priority. Currently 14709 we prefer lea for 64 bit code and non lea on 32 bit 14710 code. */ 14711 if (dist_use < 0 && split_cost == 0) 14712 return TARGET_64BIT || IX86_LEA_PRIORITY; 14713 else 14714 return true; 14715 } 14716 14717 /* With longer definitions distance lea is more preferable. 14718 Here we change it to take into account splitting cost and 14719 lea priority. */ 14720 dist_define += split_cost + IX86_LEA_PRIORITY; 14721 14722 /* If there is no use in memory addess then we just check 14723 that split cost exceeds AGU stall. */ 14724 if (dist_use < 0) 14725 return dist_define > LEA_MAX_STALL; 14726 14727 /* If this insn has both backward non-agu dependence and forward 14728 agu dependence, the one with short distance takes effect. */ 14729 return dist_define >= dist_use; 14730} 14731 14732/* Return true if it is legal to clobber flags by INSN and 14733 false otherwise. */ 14734 14735static bool 14736ix86_ok_to_clobber_flags (rtx_insn *insn) 14737{ 14738 basic_block bb = BLOCK_FOR_INSN (insn); 14739 df_ref use; 14740 bitmap live; 14741 14742 while (insn) 14743 { 14744 if (NONDEBUG_INSN_P (insn)) 14745 { 14746 FOR_EACH_INSN_USE (use, insn) 14747 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) 14748 return false; 14749 14750 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) 14751 return true; 14752 } 14753 14754 if (insn == BB_END (bb)) 14755 break; 14756 14757 insn = NEXT_INSN (insn); 14758 } 14759 14760 live = df_get_live_out(bb); 14761 return !REGNO_REG_SET_P (live, FLAGS_REG); 14762} 14763 14764/* Return true if we need to split op0 = op1 + op2 into a sequence of 14765 move and add to avoid AGU stalls. */ 14766 14767bool 14768ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) 14769{ 14770 unsigned int regno0, regno1, regno2; 14771 14772 /* Check if we need to optimize. */ 14773 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) 14774 return false; 14775 14776 /* Check it is correct to split here. */ 14777 if (!ix86_ok_to_clobber_flags(insn)) 14778 return false; 14779 14780 regno0 = true_regnum (operands[0]); 14781 regno1 = true_regnum (operands[1]); 14782 regno2 = true_regnum (operands[2]); 14783 14784 /* We need to split only adds with non destructive 14785 destination operand. */ 14786 if (regno0 == regno1 || regno0 == regno2) 14787 return false; 14788 else 14789 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); 14790} 14791 14792/* Return true if we should emit lea instruction instead of mov 14793 instruction. */ 14794 14795bool 14796ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) 14797{ 14798 unsigned int regno0, regno1; 14799 14800 /* Check if we need to optimize. */ 14801 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) 14802 return false; 14803 14804 /* Use lea for reg to reg moves only. */ 14805 if (!REG_P (operands[0]) || !REG_P (operands[1])) 14806 return false; 14807 14808 regno0 = true_regnum (operands[0]); 14809 regno1 = true_regnum (operands[1]); 14810 14811 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); 14812} 14813 14814/* Return true if we need to split lea into a sequence of 14815 instructions to avoid AGU stalls. */ 14816 14817bool 14818ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) 14819{ 14820 unsigned int regno0, regno1, regno2; 14821 int split_cost; 14822 struct ix86_address parts; 14823 int ok; 14824 14825 /* The "at least two components" test below might not catch simple 14826 move or zero extension insns if parts.base is non-NULL and parts.disp 14827 is const0_rtx as the only components in the address, e.g. if the 14828 register is %rbp or %r13. As this test is much cheaper and moves or 14829 zero extensions are the common case, do this check first. */ 14830 if (REG_P (operands[1]) 14831 || (SImode_address_operand (operands[1], VOIDmode) 14832 && REG_P (XEXP (operands[1], 0)))) 14833 return false; 14834 14835 /* Check if it is OK to split here. */ 14836 if (!ix86_ok_to_clobber_flags (insn)) 14837 return false; 14838 14839 ok = ix86_decompose_address (operands[1], &parts); 14840 gcc_assert (ok); 14841 14842 /* There should be at least two components in the address. */ 14843 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) 14844 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) 14845 return false; 14846 14847 /* We should not split into add if non legitimate pic 14848 operand is used as displacement. */ 14849 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) 14850 return false; 14851 14852 regno0 = true_regnum (operands[0]) ; 14853 regno1 = INVALID_REGNUM; 14854 regno2 = INVALID_REGNUM; 14855 14856 if (parts.base) 14857 regno1 = true_regnum (parts.base); 14858 if (parts.index) 14859 regno2 = true_regnum (parts.index); 14860 14861 /* Use add for a = a + b and a = b + a since it is faster and shorter 14862 than lea for most processors. For the processors like BONNELL, if 14863 the destination register of LEA holds an actual address which will 14864 be used soon, LEA is better and otherwise ADD is better. */ 14865 if (!TARGET_BONNELL 14866 && parts.scale == 1 14867 && (!parts.disp || parts.disp == const0_rtx) 14868 && (regno0 == regno1 || regno0 == regno2)) 14869 return true; 14870 14871 /* Check we need to optimize. */ 14872 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) 14873 return false; 14874 14875 split_cost = 0; 14876 14877 /* Compute how many cycles we will add to execution time 14878 if split lea into a sequence of instructions. */ 14879 if (parts.base || parts.index) 14880 { 14881 /* Have to use mov instruction if non desctructive 14882 destination form is used. */ 14883 if (regno1 != regno0 && regno2 != regno0) 14884 split_cost += 1; 14885 14886 /* Have to add index to base if both exist. */ 14887 if (parts.base && parts.index) 14888 split_cost += 1; 14889 14890 /* Have to use shift and adds if scale is 2 or greater. */ 14891 if (parts.scale > 1) 14892 { 14893 if (regno0 != regno1) 14894 split_cost += 1; 14895 else if (regno2 == regno0) 14896 split_cost += 4; 14897 else 14898 split_cost += parts.scale; 14899 } 14900 14901 /* Have to use add instruction with immediate if 14902 disp is non zero. */ 14903 if (parts.disp && parts.disp != const0_rtx) 14904 split_cost += 1; 14905 14906 /* Subtract the price of lea. */ 14907 split_cost -= 1; 14908 } 14909 14910 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, 14911 parts.scale > 1); 14912} 14913 14914/* Return true if it is ok to optimize an ADD operation to LEA 14915 operation to avoid flag register consumation. For most processors, 14916 ADD is faster than LEA. For the processors like BONNELL, if the 14917 destination register of LEA holds an actual address which will be 14918 used soon, LEA is better and otherwise ADD is better. */ 14919 14920bool 14921ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) 14922{ 14923 unsigned int regno0 = true_regnum (operands[0]); 14924 unsigned int regno1 = true_regnum (operands[1]); 14925 unsigned int regno2 = true_regnum (operands[2]); 14926 14927 /* If a = b + c, (a!=b && a!=c), must use lea form. */ 14928 if (regno0 != regno1 && regno0 != regno2) 14929 return true; 14930 14931 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) 14932 return false; 14933 14934 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); 14935} 14936 14937/* Return true if destination reg of SET_BODY is shift count of 14938 USE_BODY. */ 14939 14940static bool 14941ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) 14942{ 14943 rtx set_dest; 14944 rtx shift_rtx; 14945 int i; 14946 14947 /* Retrieve destination of SET_BODY. */ 14948 switch (GET_CODE (set_body)) 14949 { 14950 case SET: 14951 set_dest = SET_DEST (set_body); 14952 if (!set_dest || !REG_P (set_dest)) 14953 return false; 14954 break; 14955 case PARALLEL: 14956 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) 14957 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), 14958 use_body)) 14959 return true; 14960 /* FALLTHROUGH */ 14961 default: 14962 return false; 14963 } 14964 14965 /* Retrieve shift count of USE_BODY. */ 14966 switch (GET_CODE (use_body)) 14967 { 14968 case SET: 14969 shift_rtx = XEXP (use_body, 1); 14970 break; 14971 case PARALLEL: 14972 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) 14973 if (ix86_dep_by_shift_count_body (set_body, 14974 XVECEXP (use_body, 0, i))) 14975 return true; 14976 /* FALLTHROUGH */ 14977 default: 14978 return false; 14979 } 14980 14981 if (shift_rtx 14982 && (GET_CODE (shift_rtx) == ASHIFT 14983 || GET_CODE (shift_rtx) == LSHIFTRT 14984 || GET_CODE (shift_rtx) == ASHIFTRT 14985 || GET_CODE (shift_rtx) == ROTATE 14986 || GET_CODE (shift_rtx) == ROTATERT)) 14987 { 14988 rtx shift_count = XEXP (shift_rtx, 1); 14989 14990 /* Return true if shift count is dest of SET_BODY. */ 14991 if (REG_P (shift_count)) 14992 { 14993 /* Add check since it can be invoked before register 14994 allocation in pre-reload schedule. */ 14995 if (reload_completed 14996 && true_regnum (set_dest) == true_regnum (shift_count)) 14997 return true; 14998 else if (REGNO(set_dest) == REGNO(shift_count)) 14999 return true; 15000 } 15001 } 15002 15003 return false; 15004} 15005 15006/* Return true if destination reg of SET_INSN is shift count of 15007 USE_INSN. */ 15008 15009bool 15010ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) 15011{ 15012 return ix86_dep_by_shift_count_body (PATTERN (set_insn), 15013 PATTERN (use_insn)); 15014} 15015 15016/* Return TRUE or FALSE depending on whether the unary operator meets the 15017 appropriate constraints. */ 15018 15019bool 15020ix86_unary_operator_ok (enum rtx_code, 15021 machine_mode, 15022 rtx operands[2]) 15023{ 15024 /* If one of operands is memory, source and destination must match. */ 15025 if ((MEM_P (operands[0]) 15026 || MEM_P (operands[1])) 15027 && ! rtx_equal_p (operands[0], operands[1])) 15028 return false; 15029 return true; 15030} 15031 15032/* Return TRUE if the operands to a vec_interleave_{high,low}v2df 15033 are ok, keeping in mind the possible movddup alternative. */ 15034 15035bool 15036ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) 15037{ 15038 if (MEM_P (operands[0])) 15039 return rtx_equal_p (operands[0], operands[1 + high]); 15040 if (MEM_P (operands[1]) && MEM_P (operands[2])) 15041 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); 15042 return true; 15043} 15044 15045/* A subroutine of ix86_build_signbit_mask. If VECT is true, 15046 then replicate the value for all elements of the vector 15047 register. */ 15048 15049rtx 15050ix86_build_const_vector (machine_mode mode, bool vect, rtx value) 15051{ 15052 int i, n_elt; 15053 rtvec v; 15054 machine_mode scalar_mode; 15055 15056 switch (mode) 15057 { 15058 case E_V64QImode: 15059 case E_V32QImode: 15060 case E_V16QImode: 15061 case E_V32HImode: 15062 case E_V16HImode: 15063 case E_V8HImode: 15064 case E_V16SImode: 15065 case E_V8SImode: 15066 case E_V4SImode: 15067 case E_V8DImode: 15068 case E_V4DImode: 15069 case E_V2DImode: 15070 gcc_assert (vect); 15071 /* FALLTHRU */ 15072 case E_V16SFmode: 15073 case E_V8SFmode: 15074 case E_V4SFmode: 15075 case E_V8DFmode: 15076 case E_V4DFmode: 15077 case E_V2DFmode: 15078 n_elt = GET_MODE_NUNITS (mode); 15079 v = rtvec_alloc (n_elt); 15080 scalar_mode = GET_MODE_INNER (mode); 15081 15082 RTVEC_ELT (v, 0) = value; 15083 15084 for (i = 1; i < n_elt; ++i) 15085 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); 15086 15087 return gen_rtx_CONST_VECTOR (mode, v); 15088 15089 default: 15090 gcc_unreachable (); 15091 } 15092} 15093 15094/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders 15095 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE 15096 for an SSE register. If VECT is true, then replicate the mask for 15097 all elements of the vector register. If INVERT is true, then create 15098 a mask excluding the sign bit. */ 15099 15100rtx 15101ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) 15102{ 15103 machine_mode vec_mode, imode; 15104 wide_int w; 15105 rtx mask, v; 15106 15107 switch (mode) 15108 { 15109 case E_V16SImode: 15110 case E_V16SFmode: 15111 case E_V8SImode: 15112 case E_V4SImode: 15113 case E_V8SFmode: 15114 case E_V4SFmode: 15115 vec_mode = mode; 15116 imode = SImode; 15117 break; 15118 15119 case E_V8DImode: 15120 case E_V4DImode: 15121 case E_V2DImode: 15122 case E_V8DFmode: 15123 case E_V4DFmode: 15124 case E_V2DFmode: 15125 vec_mode = mode; 15126 imode = DImode; 15127 break; 15128 15129 case E_TImode: 15130 case E_TFmode: 15131 vec_mode = VOIDmode; 15132 imode = TImode; 15133 break; 15134 15135 default: 15136 gcc_unreachable (); 15137 } 15138 15139 machine_mode inner_mode = GET_MODE_INNER (mode); 15140 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, 15141 GET_MODE_BITSIZE (inner_mode)); 15142 if (invert) 15143 w = wi::bit_not (w); 15144 15145 /* Force this value into the low part of a fp vector constant. */ 15146 mask = immed_wide_int_const (w, imode); 15147 mask = gen_lowpart (inner_mode, mask); 15148 15149 if (vec_mode == VOIDmode) 15150 return force_reg (inner_mode, mask); 15151 15152 v = ix86_build_const_vector (vec_mode, vect, mask); 15153 return force_reg (vec_mode, v); 15154} 15155 15156/* Return TRUE or FALSE depending on whether the first SET in INSN 15157 has source and destination with matching CC modes, and that the 15158 CC mode is at least as constrained as REQ_MODE. */ 15159 15160bool 15161ix86_match_ccmode (rtx insn, machine_mode req_mode) 15162{ 15163 rtx set; 15164 machine_mode set_mode; 15165 15166 set = PATTERN (insn); 15167 if (GET_CODE (set) == PARALLEL) 15168 set = XVECEXP (set, 0, 0); 15169 gcc_assert (GET_CODE (set) == SET); 15170 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 15171 15172 set_mode = GET_MODE (SET_DEST (set)); 15173 switch (set_mode) 15174 { 15175 case E_CCNOmode: 15176 if (req_mode != CCNOmode 15177 && (req_mode != CCmode 15178 || XEXP (SET_SRC (set), 1) != const0_rtx)) 15179 return false; 15180 break; 15181 case E_CCmode: 15182 if (req_mode == CCGCmode) 15183 return false; 15184 /* FALLTHRU */ 15185 case E_CCGCmode: 15186 if (req_mode == CCGOCmode || req_mode == CCNOmode) 15187 return false; 15188 /* FALLTHRU */ 15189 case E_CCGOCmode: 15190 if (req_mode == CCZmode) 15191 return false; 15192 /* FALLTHRU */ 15193 case E_CCZmode: 15194 break; 15195 15196 case E_CCGZmode: 15197 15198 case E_CCAmode: 15199 case E_CCCmode: 15200 case E_CCOmode: 15201 case E_CCPmode: 15202 case E_CCSmode: 15203 if (set_mode != req_mode) 15204 return false; 15205 break; 15206 15207 default: 15208 gcc_unreachable (); 15209 } 15210 15211 return GET_MODE (SET_SRC (set)) == set_mode; 15212} 15213 15214machine_mode 15215ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 15216{ 15217 machine_mode mode = GET_MODE (op0); 15218 15219 if (SCALAR_FLOAT_MODE_P (mode)) 15220 { 15221 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); 15222 return CCFPmode; 15223 } 15224 15225 switch (code) 15226 { 15227 /* Only zero flag is needed. */ 15228 case EQ: /* ZF=0 */ 15229 case NE: /* ZF!=0 */ 15230 return CCZmode; 15231 /* Codes needing carry flag. */ 15232 case GEU: /* CF=0 */ 15233 case LTU: /* CF=1 */ 15234 /* Detect overflow checks. They need just the carry flag. */ 15235 if (GET_CODE (op0) == PLUS 15236 && (rtx_equal_p (op1, XEXP (op0, 0)) 15237 || rtx_equal_p (op1, XEXP (op0, 1)))) 15238 return CCCmode; 15239 else 15240 return CCmode; 15241 case GTU: /* CF=0 & ZF=0 */ 15242 case LEU: /* CF=1 | ZF=1 */ 15243 return CCmode; 15244 /* Codes possibly doable only with sign flag when 15245 comparing against zero. */ 15246 case GE: /* SF=OF or SF=0 */ 15247 case LT: /* SF<>OF or SF=1 */ 15248 if (op1 == const0_rtx) 15249 return CCGOCmode; 15250 else 15251 /* For other cases Carry flag is not required. */ 15252 return CCGCmode; 15253 /* Codes doable only with sign flag when comparing 15254 against zero, but we miss jump instruction for it 15255 so we need to use relational tests against overflow 15256 that thus needs to be zero. */ 15257 case GT: /* ZF=0 & SF=OF */ 15258 case LE: /* ZF=1 | SF<>OF */ 15259 if (op1 == const0_rtx) 15260 return CCNOmode; 15261 else 15262 return CCGCmode; 15263 /* strcmp pattern do (use flags) and combine may ask us for proper 15264 mode. */ 15265 case USE: 15266 return CCmode; 15267 default: 15268 gcc_unreachable (); 15269 } 15270} 15271 15272/* Return the fixed registers used for condition codes. */ 15273 15274static bool 15275ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 15276{ 15277 *p1 = FLAGS_REG; 15278 *p2 = INVALID_REGNUM; 15279 return true; 15280} 15281 15282/* If two condition code modes are compatible, return a condition code 15283 mode which is compatible with both. Otherwise, return 15284 VOIDmode. */ 15285 15286static machine_mode 15287ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) 15288{ 15289 if (m1 == m2) 15290 return m1; 15291 15292 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 15293 return VOIDmode; 15294 15295 if ((m1 == CCGCmode && m2 == CCGOCmode) 15296 || (m1 == CCGOCmode && m2 == CCGCmode)) 15297 return CCGCmode; 15298 15299 if ((m1 == CCNOmode && m2 == CCGOCmode) 15300 || (m1 == CCGOCmode && m2 == CCNOmode)) 15301 return CCNOmode; 15302 15303 if (m1 == CCZmode 15304 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) 15305 return m2; 15306 else if (m2 == CCZmode 15307 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) 15308 return m1; 15309 15310 switch (m1) 15311 { 15312 default: 15313 gcc_unreachable (); 15314 15315 case E_CCmode: 15316 case E_CCGCmode: 15317 case E_CCGOCmode: 15318 case E_CCNOmode: 15319 case E_CCAmode: 15320 case E_CCCmode: 15321 case E_CCOmode: 15322 case E_CCPmode: 15323 case E_CCSmode: 15324 case E_CCZmode: 15325 switch (m2) 15326 { 15327 default: 15328 return VOIDmode; 15329 15330 case E_CCmode: 15331 case E_CCGCmode: 15332 case E_CCGOCmode: 15333 case E_CCNOmode: 15334 case E_CCAmode: 15335 case E_CCCmode: 15336 case E_CCOmode: 15337 case E_CCPmode: 15338 case E_CCSmode: 15339 case E_CCZmode: 15340 return CCmode; 15341 } 15342 15343 case E_CCFPmode: 15344 /* These are only compatible with themselves, which we already 15345 checked above. */ 15346 return VOIDmode; 15347 } 15348} 15349 15350/* Return strategy to use for floating-point. We assume that fcomi is always 15351 preferrable where available, since that is also true when looking at size 15352 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ 15353 15354enum ix86_fpcmp_strategy 15355ix86_fp_comparison_strategy (enum rtx_code) 15356{ 15357 /* Do fcomi/sahf based test when profitable. */ 15358 15359 if (TARGET_CMOVE) 15360 return IX86_FPCMP_COMI; 15361 15362 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) 15363 return IX86_FPCMP_SAHF; 15364 15365 return IX86_FPCMP_ARITH; 15366} 15367 15368/* Convert comparison codes we use to represent FP comparison to integer 15369 code that will result in proper branch. Return UNKNOWN if no such code 15370 is available. */ 15371 15372enum rtx_code 15373ix86_fp_compare_code_to_integer (enum rtx_code code) 15374{ 15375 switch (code) 15376 { 15377 case GT: 15378 return GTU; 15379 case GE: 15380 return GEU; 15381 case ORDERED: 15382 case UNORDERED: 15383 return code; 15384 case UNEQ: 15385 return EQ; 15386 case UNLT: 15387 return LTU; 15388 case UNLE: 15389 return LEU; 15390 case LTGT: 15391 return NE; 15392 default: 15393 return UNKNOWN; 15394 } 15395} 15396 15397/* Zero extend possibly SImode EXP to Pmode register. */ 15398rtx 15399ix86_zero_extend_to_Pmode (rtx exp) 15400{ 15401 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); 15402} 15403 15404/* Return true if the function being called was marked with attribute 15405 "noplt" or using -fno-plt and we are compiling for non-PIC. We need 15406 to handle the non-PIC case in the backend because there is no easy 15407 interface for the front-end to force non-PLT calls to use the GOT. 15408 This is currently used only with 64-bit or 32-bit GOT32X ELF targets 15409 to call the function marked "noplt" indirectly. */ 15410 15411static bool 15412ix86_nopic_noplt_attribute_p (rtx call_op) 15413{ 15414 if (flag_pic || ix86_cmodel == CM_LARGE 15415 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) 15416 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF 15417 || SYMBOL_REF_LOCAL_P (call_op)) 15418 return false; 15419 15420 tree symbol_decl = SYMBOL_REF_DECL (call_op); 15421 15422 if (!flag_plt 15423 || (symbol_decl != NULL_TREE 15424 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) 15425 return true; 15426 15427 return false; 15428} 15429 15430/* Helper to output the jmp/call. */ 15431static void 15432ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) 15433{ 15434 if (thunk_name != NULL) 15435 { 15436 fprintf (asm_out_file, "\tjmp\t"); 15437 assemble_name (asm_out_file, thunk_name); 15438 putc ('\n', asm_out_file); 15439 } 15440 else 15441 output_indirect_thunk (regno); 15442} 15443 15444/* Output indirect branch via a call and return thunk. CALL_OP is a 15445 register which contains the branch target. XASM is the assembly 15446 template for CALL_OP. Branch is a tail call if SIBCALL_P is true. 15447 A normal call is converted to: 15448 15449 call __x86_indirect_thunk_reg 15450 15451 and a tail call is converted to: 15452 15453 jmp __x86_indirect_thunk_reg 15454 */ 15455 15456static void 15457ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) 15458{ 15459 char thunk_name_buf[32]; 15460 char *thunk_name; 15461 enum indirect_thunk_prefix need_prefix 15462 = indirect_thunk_need_prefix (current_output_insn); 15463 int regno = REGNO (call_op); 15464 15465 if (cfun->machine->indirect_branch_type 15466 != indirect_branch_thunk_inline) 15467 { 15468 if (cfun->machine->indirect_branch_type == indirect_branch_thunk) 15469 { 15470 int i = regno; 15471 if (i >= FIRST_REX_INT_REG) 15472 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); 15473 indirect_thunks_used |= 1 << i; 15474 } 15475 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); 15476 thunk_name = thunk_name_buf; 15477 } 15478 else 15479 thunk_name = NULL; 15480 15481 if (sibcall_p) 15482 ix86_output_jmp_thunk_or_indirect (thunk_name, regno); 15483 else 15484 { 15485 if (thunk_name != NULL) 15486 { 15487 fprintf (asm_out_file, "\tcall\t"); 15488 assemble_name (asm_out_file, thunk_name); 15489 putc ('\n', asm_out_file); 15490 return; 15491 } 15492 15493 char indirectlabel1[32]; 15494 char indirectlabel2[32]; 15495 15496 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, 15497 INDIRECT_LABEL, 15498 indirectlabelno++); 15499 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, 15500 INDIRECT_LABEL, 15501 indirectlabelno++); 15502 15503 /* Jump. */ 15504 fputs ("\tjmp\t", asm_out_file); 15505 assemble_name_raw (asm_out_file, indirectlabel2); 15506 fputc ('\n', asm_out_file); 15507 15508 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); 15509 15510 ix86_output_jmp_thunk_or_indirect (thunk_name, regno); 15511 15512 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); 15513 15514 /* Call. */ 15515 fputs ("\tcall\t", asm_out_file); 15516 assemble_name_raw (asm_out_file, indirectlabel1); 15517 fputc ('\n', asm_out_file); 15518 } 15519} 15520 15521/* Output indirect branch via a call and return thunk. CALL_OP is 15522 the branch target. XASM is the assembly template for CALL_OP. 15523 Branch is a tail call if SIBCALL_P is true. A normal call is 15524 converted to: 15525 15526 jmp L2 15527 L1: 15528 push CALL_OP 15529 jmp __x86_indirect_thunk 15530 L2: 15531 call L1 15532 15533 and a tail call is converted to: 15534 15535 push CALL_OP 15536 jmp __x86_indirect_thunk 15537 */ 15538 15539static void 15540ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, 15541 bool sibcall_p) 15542{ 15543 char thunk_name_buf[32]; 15544 char *thunk_name; 15545 char push_buf[64]; 15546 enum indirect_thunk_prefix need_prefix 15547 = indirect_thunk_need_prefix (current_output_insn); 15548 int regno = -1; 15549 15550 if (cfun->machine->indirect_branch_type 15551 != indirect_branch_thunk_inline) 15552 { 15553 if (cfun->machine->indirect_branch_type == indirect_branch_thunk) 15554 indirect_thunk_needed = true; 15555 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); 15556 thunk_name = thunk_name_buf; 15557 } 15558 else 15559 thunk_name = NULL; 15560 15561 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", 15562 TARGET_64BIT ? 'q' : 'l', xasm); 15563 15564 if (sibcall_p) 15565 { 15566 output_asm_insn (push_buf, &call_op); 15567 ix86_output_jmp_thunk_or_indirect (thunk_name, regno); 15568 } 15569 else 15570 { 15571 char indirectlabel1[32]; 15572 char indirectlabel2[32]; 15573 15574 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, 15575 INDIRECT_LABEL, 15576 indirectlabelno++); 15577 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, 15578 INDIRECT_LABEL, 15579 indirectlabelno++); 15580 15581 /* Jump. */ 15582 fputs ("\tjmp\t", asm_out_file); 15583 assemble_name_raw (asm_out_file, indirectlabel2); 15584 fputc ('\n', asm_out_file); 15585 15586 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); 15587 15588 /* An external function may be called via GOT, instead of PLT. */ 15589 if (MEM_P (call_op)) 15590 { 15591 struct ix86_address parts; 15592 rtx addr = XEXP (call_op, 0); 15593 if (ix86_decompose_address (addr, &parts) 15594 && parts.base == stack_pointer_rtx) 15595 { 15596 /* Since call will adjust stack by -UNITS_PER_WORD, 15597 we must convert "disp(stack, index, scale)" to 15598 "disp+UNITS_PER_WORD(stack, index, scale)". */ 15599 if (parts.index) 15600 { 15601 addr = gen_rtx_MULT (Pmode, parts.index, 15602 GEN_INT (parts.scale)); 15603 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, 15604 addr); 15605 } 15606 else 15607 addr = stack_pointer_rtx; 15608 15609 rtx disp; 15610 if (parts.disp != NULL_RTX) 15611 disp = plus_constant (Pmode, parts.disp, 15612 UNITS_PER_WORD); 15613 else 15614 disp = GEN_INT (UNITS_PER_WORD); 15615 15616 addr = gen_rtx_PLUS (Pmode, addr, disp); 15617 call_op = gen_rtx_MEM (GET_MODE (call_op), addr); 15618 } 15619 } 15620 15621 output_asm_insn (push_buf, &call_op); 15622 15623 ix86_output_jmp_thunk_or_indirect (thunk_name, regno); 15624 15625 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); 15626 15627 /* Call. */ 15628 fputs ("\tcall\t", asm_out_file); 15629 assemble_name_raw (asm_out_file, indirectlabel1); 15630 fputc ('\n', asm_out_file); 15631 } 15632} 15633 15634/* Output indirect branch via a call and return thunk. CALL_OP is 15635 the branch target. XASM is the assembly template for CALL_OP. 15636 Branch is a tail call if SIBCALL_P is true. */ 15637 15638static void 15639ix86_output_indirect_branch (rtx call_op, const char *xasm, 15640 bool sibcall_p) 15641{ 15642 if (REG_P (call_op)) 15643 ix86_output_indirect_branch_via_reg (call_op, sibcall_p); 15644 else 15645 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); 15646} 15647 15648/* Output indirect jump. CALL_OP is the jump target. */ 15649 15650const char * 15651ix86_output_indirect_jmp (rtx call_op) 15652{ 15653 if (cfun->machine->indirect_branch_type != indirect_branch_keep) 15654 { 15655 /* We can't have red-zone since "call" in the indirect thunk 15656 pushes the return address onto stack, destroying red-zone. */ 15657 if (ix86_red_zone_size != 0) 15658 gcc_unreachable (); 15659 15660 ix86_output_indirect_branch (call_op, "%0", true); 15661 return ""; 15662 } 15663 else 15664 return "%!jmp\t%A0"; 15665} 15666 15667/* Output return instrumentation for current function if needed. */ 15668 15669static void 15670output_return_instrumentation (void) 15671{ 15672 if (ix86_instrument_return != instrument_return_none 15673 && flag_fentry 15674 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) 15675 { 15676 if (ix86_flag_record_return) 15677 fprintf (asm_out_file, "1:\n"); 15678 switch (ix86_instrument_return) 15679 { 15680 case instrument_return_call: 15681 fprintf (asm_out_file, "\tcall\t__return__\n"); 15682 break; 15683 case instrument_return_nop5: 15684 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ 15685 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); 15686 break; 15687 case instrument_return_none: 15688 break; 15689 } 15690 15691 if (ix86_flag_record_return) 15692 { 15693 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); 15694 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); 15695 fprintf (asm_out_file, "\t.previous\n"); 15696 } 15697 } 15698} 15699 15700/* Output function return. CALL_OP is the jump target. Add a REP 15701 prefix to RET if LONG_P is true and function return is kept. */ 15702 15703const char * 15704ix86_output_function_return (bool long_p) 15705{ 15706 output_return_instrumentation (); 15707 15708 if (cfun->machine->function_return_type != indirect_branch_keep) 15709 { 15710 char thunk_name[32]; 15711 enum indirect_thunk_prefix need_prefix 15712 = indirect_thunk_need_prefix (current_output_insn); 15713 15714 if (cfun->machine->function_return_type 15715 != indirect_branch_thunk_inline) 15716 { 15717 bool need_thunk = (cfun->machine->function_return_type 15718 == indirect_branch_thunk); 15719 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix, 15720 true); 15721 indirect_return_needed |= need_thunk; 15722 fprintf (asm_out_file, "\tjmp\t"); 15723 assemble_name (asm_out_file, thunk_name); 15724 putc ('\n', asm_out_file); 15725 } 15726 else 15727 output_indirect_thunk (INVALID_REGNUM); 15728 15729 return ""; 15730 } 15731 15732 if (!long_p) 15733 return "%!ret"; 15734 15735 return "rep%; ret"; 15736} 15737 15738/* Output indirect function return. RET_OP is the function return 15739 target. */ 15740 15741const char * 15742ix86_output_indirect_function_return (rtx ret_op) 15743{ 15744 if (cfun->machine->function_return_type != indirect_branch_keep) 15745 { 15746 char thunk_name[32]; 15747 enum indirect_thunk_prefix need_prefix 15748 = indirect_thunk_need_prefix (current_output_insn); 15749 unsigned int regno = REGNO (ret_op); 15750 gcc_assert (regno == CX_REG); 15751 15752 if (cfun->machine->function_return_type 15753 != indirect_branch_thunk_inline) 15754 { 15755 bool need_thunk = (cfun->machine->function_return_type 15756 == indirect_branch_thunk); 15757 indirect_thunk_name (thunk_name, regno, need_prefix, true); 15758 15759 if (need_thunk) 15760 { 15761 indirect_return_via_cx = true; 15762 indirect_thunks_used |= 1 << CX_REG; 15763 } 15764 fprintf (asm_out_file, "\tjmp\t"); 15765 assemble_name (asm_out_file, thunk_name); 15766 putc ('\n', asm_out_file); 15767 } 15768 else 15769 output_indirect_thunk (regno); 15770 15771 return ""; 15772 } 15773 else 15774 return "%!jmp\t%A0"; 15775} 15776 15777/* Output the assembly for a call instruction. */ 15778 15779const char * 15780ix86_output_call_insn (rtx_insn *insn, rtx call_op) 15781{ 15782 bool direct_p = constant_call_address_operand (call_op, VOIDmode); 15783 bool output_indirect_p 15784 = (!TARGET_SEH 15785 && cfun->machine->indirect_branch_type != indirect_branch_keep); 15786 bool seh_nop_p = false; 15787 const char *xasm; 15788 15789 if (SIBLING_CALL_P (insn)) 15790 { 15791 output_return_instrumentation (); 15792 if (direct_p) 15793 { 15794 if (ix86_nopic_noplt_attribute_p (call_op)) 15795 { 15796 direct_p = false; 15797 if (TARGET_64BIT) 15798 { 15799 if (output_indirect_p) 15800 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; 15801 else 15802 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; 15803 } 15804 else 15805 { 15806 if (output_indirect_p) 15807 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; 15808 else 15809 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; 15810 } 15811 } 15812 else 15813 xasm = "%!jmp\t%P0"; 15814 } 15815 /* SEH epilogue detection requires the indirect branch case 15816 to include REX.W. */ 15817 else if (TARGET_SEH) 15818 xasm = "%!rex.W jmp\t%A0"; 15819 else 15820 { 15821 if (output_indirect_p) 15822 xasm = "%0"; 15823 else 15824 xasm = "%!jmp\t%A0"; 15825 } 15826 15827 if (output_indirect_p && !direct_p) 15828 ix86_output_indirect_branch (call_op, xasm, true); 15829 else 15830 output_asm_insn (xasm, &call_op); 15831 return ""; 15832 } 15833 15834 /* SEH unwinding can require an extra nop to be emitted in several 15835 circumstances. Determine if we have one of those. */ 15836 if (TARGET_SEH) 15837 { 15838 rtx_insn *i; 15839 15840 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) 15841 { 15842 /* Prevent a catch region from being adjacent to a jump that would 15843 be interpreted as an epilogue sequence by the unwinder. */ 15844 if (JUMP_P(i) && CROSSING_JUMP_P (i)) 15845 { 15846 seh_nop_p = true; 15847 break; 15848 } 15849 15850 /* If we get to another real insn, we don't need the nop. */ 15851 if (INSN_P (i)) 15852 break; 15853 15854 /* If we get to the epilogue note, prevent a catch region from 15855 being adjacent to the standard epilogue sequence. Note that, 15856 if non-call exceptions are enabled, we already did it during 15857 epilogue expansion, or else, if the insn can throw internally, 15858 we already did it during the reorg pass. */ 15859 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG 15860 && !flag_non_call_exceptions 15861 && !can_throw_internal (insn)) 15862 { 15863 seh_nop_p = true; 15864 break; 15865 } 15866 } 15867 15868 /* If we didn't find a real insn following the call, prevent the 15869 unwinder from looking into the next function. */ 15870 if (i == NULL) 15871 seh_nop_p = true; 15872 } 15873 15874 if (direct_p) 15875 { 15876 if (ix86_nopic_noplt_attribute_p (call_op)) 15877 { 15878 direct_p = false; 15879 if (TARGET_64BIT) 15880 { 15881 if (output_indirect_p) 15882 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; 15883 else 15884 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; 15885 } 15886 else 15887 { 15888 if (output_indirect_p) 15889 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; 15890 else 15891 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; 15892 } 15893 } 15894 else 15895 xasm = "%!call\t%P0"; 15896 } 15897 else 15898 { 15899 if (output_indirect_p) 15900 xasm = "%0"; 15901 else 15902 xasm = "%!call\t%A0"; 15903 } 15904 15905 if (output_indirect_p && !direct_p) 15906 ix86_output_indirect_branch (call_op, xasm, false); 15907 else 15908 output_asm_insn (xasm, &call_op); 15909 15910 if (seh_nop_p) 15911 return "nop"; 15912 15913 return ""; 15914} 15915 15916/* Return a MEM corresponding to a stack slot with mode MODE. 15917 Allocate a new slot if necessary. 15918 15919 The RTL for a function can have several slots available: N is 15920 which slot to use. */ 15921 15922rtx 15923assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) 15924{ 15925 struct stack_local_entry *s; 15926 15927 gcc_assert (n < MAX_386_STACK_LOCALS); 15928 15929 for (s = ix86_stack_locals; s; s = s->next) 15930 if (s->mode == mode && s->n == n) 15931 return validize_mem (copy_rtx (s->rtl)); 15932 15933 s = ggc_alloc<stack_local_entry> (); 15934 s->n = n; 15935 s->mode = mode; 15936 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 15937 15938 s->next = ix86_stack_locals; 15939 ix86_stack_locals = s; 15940 return validize_mem (copy_rtx (s->rtl)); 15941} 15942 15943static void 15944ix86_instantiate_decls (void) 15945{ 15946 struct stack_local_entry *s; 15947 15948 for (s = ix86_stack_locals; s; s = s->next) 15949 if (s->rtl != NULL_RTX) 15950 instantiate_decl_rtl (s->rtl); 15951} 15952 15953/* Check whether x86 address PARTS is a pc-relative address. */ 15954 15955bool 15956ix86_rip_relative_addr_p (struct ix86_address *parts) 15957{ 15958 rtx base, index, disp; 15959 15960 base = parts->base; 15961 index = parts->index; 15962 disp = parts->disp; 15963 15964 if (disp && !base && !index) 15965 { 15966 if (TARGET_64BIT) 15967 { 15968 rtx symbol = disp; 15969 15970 if (GET_CODE (disp) == CONST) 15971 symbol = XEXP (disp, 0); 15972 if (GET_CODE (symbol) == PLUS 15973 && CONST_INT_P (XEXP (symbol, 1))) 15974 symbol = XEXP (symbol, 0); 15975 15976 if (GET_CODE (symbol) == LABEL_REF 15977 || (GET_CODE (symbol) == SYMBOL_REF 15978 && SYMBOL_REF_TLS_MODEL (symbol) == 0) 15979 || (GET_CODE (symbol) == UNSPEC 15980 && (XINT (symbol, 1) == UNSPEC_GOTPCREL 15981 || XINT (symbol, 1) == UNSPEC_PCREL 15982 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) 15983 return true; 15984 } 15985 } 15986 return false; 15987} 15988 15989/* Calculate the length of the memory address in the instruction encoding. 15990 Includes addr32 prefix, does not include the one-byte modrm, opcode, 15991 or other prefixes. We never generate addr32 prefix for LEA insn. */ 15992 15993int 15994memory_address_length (rtx addr, bool lea) 15995{ 15996 struct ix86_address parts; 15997 rtx base, index, disp; 15998 int len; 15999 int ok; 16000 16001 if (GET_CODE (addr) == PRE_DEC 16002 || GET_CODE (addr) == POST_INC 16003 || GET_CODE (addr) == PRE_MODIFY 16004 || GET_CODE (addr) == POST_MODIFY) 16005 return 0; 16006 16007 ok = ix86_decompose_address (addr, &parts); 16008 gcc_assert (ok); 16009 16010 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; 16011 16012 /* If this is not LEA instruction, add the length of addr32 prefix. */ 16013 if (TARGET_64BIT && !lea 16014 && (SImode_address_operand (addr, VOIDmode) 16015 || (parts.base && GET_MODE (parts.base) == SImode) 16016 || (parts.index && GET_MODE (parts.index) == SImode))) 16017 len++; 16018 16019 base = parts.base; 16020 index = parts.index; 16021 disp = parts.disp; 16022 16023 if (base && SUBREG_P (base)) 16024 base = SUBREG_REG (base); 16025 if (index && SUBREG_P (index)) 16026 index = SUBREG_REG (index); 16027 16028 gcc_assert (base == NULL_RTX || REG_P (base)); 16029 gcc_assert (index == NULL_RTX || REG_P (index)); 16030 16031 /* Rule of thumb: 16032 - esp as the base always wants an index, 16033 - ebp as the base always wants a displacement, 16034 - r12 as the base always wants an index, 16035 - r13 as the base always wants a displacement. */ 16036 16037 /* Register Indirect. */ 16038 if (base && !index && !disp) 16039 { 16040 /* esp (for its index) and ebp (for its displacement) need 16041 the two-byte modrm form. Similarly for r12 and r13 in 64-bit 16042 code. */ 16043 if (base == arg_pointer_rtx 16044 || base == frame_pointer_rtx 16045 || REGNO (base) == SP_REG 16046 || REGNO (base) == BP_REG 16047 || REGNO (base) == R12_REG 16048 || REGNO (base) == R13_REG) 16049 len++; 16050 } 16051 16052 /* Direct Addressing. In 64-bit mode mod 00 r/m 5 16053 is not disp32, but disp32(%rip), so for disp32 16054 SIB byte is needed, unless print_operand_address 16055 optimizes it into disp32(%rip) or (%rip) is implied 16056 by UNSPEC. */ 16057 else if (disp && !base && !index) 16058 { 16059 len += 4; 16060 if (!ix86_rip_relative_addr_p (&parts)) 16061 len++; 16062 } 16063 else 16064 { 16065 /* Find the length of the displacement constant. */ 16066 if (disp) 16067 { 16068 if (base && satisfies_constraint_K (disp)) 16069 len += 1; 16070 else 16071 len += 4; 16072 } 16073 /* ebp always wants a displacement. Similarly r13. */ 16074 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) 16075 len++; 16076 16077 /* An index requires the two-byte modrm form.... */ 16078 if (index 16079 /* ...like esp (or r12), which always wants an index. */ 16080 || base == arg_pointer_rtx 16081 || base == frame_pointer_rtx 16082 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) 16083 len++; 16084 } 16085 16086 return len; 16087} 16088 16089/* Compute default value for "length_immediate" attribute. When SHORTFORM 16090 is set, expect that insn have 8bit immediate alternative. */ 16091int 16092ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) 16093{ 16094 int len = 0; 16095 int i; 16096 extract_insn_cached (insn); 16097 for (i = recog_data.n_operands - 1; i >= 0; --i) 16098 if (CONSTANT_P (recog_data.operand[i])) 16099 { 16100 enum attr_mode mode = get_attr_mode (insn); 16101 16102 gcc_assert (!len); 16103 if (shortform && CONST_INT_P (recog_data.operand[i])) 16104 { 16105 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); 16106 switch (mode) 16107 { 16108 case MODE_QI: 16109 len = 1; 16110 continue; 16111 case MODE_HI: 16112 ival = trunc_int_for_mode (ival, HImode); 16113 break; 16114 case MODE_SI: 16115 ival = trunc_int_for_mode (ival, SImode); 16116 break; 16117 default: 16118 break; 16119 } 16120 if (IN_RANGE (ival, -128, 127)) 16121 { 16122 len = 1; 16123 continue; 16124 } 16125 } 16126 switch (mode) 16127 { 16128 case MODE_QI: 16129 len = 1; 16130 break; 16131 case MODE_HI: 16132 len = 2; 16133 break; 16134 case MODE_SI: 16135 len = 4; 16136 break; 16137 /* Immediates for DImode instructions are encoded 16138 as 32bit sign extended values. */ 16139 case MODE_DI: 16140 len = 4; 16141 break; 16142 default: 16143 fatal_insn ("unknown insn mode", insn); 16144 } 16145 } 16146 return len; 16147} 16148 16149/* Compute default value for "length_address" attribute. */ 16150int 16151ix86_attr_length_address_default (rtx_insn *insn) 16152{ 16153 int i; 16154 16155 if (get_attr_type (insn) == TYPE_LEA) 16156 { 16157 rtx set = PATTERN (insn), addr; 16158 16159 if (GET_CODE (set) == PARALLEL) 16160 set = XVECEXP (set, 0, 0); 16161 16162 gcc_assert (GET_CODE (set) == SET); 16163 16164 addr = SET_SRC (set); 16165 16166 return memory_address_length (addr, true); 16167 } 16168 16169 extract_insn_cached (insn); 16170 for (i = recog_data.n_operands - 1; i >= 0; --i) 16171 { 16172 rtx op = recog_data.operand[i]; 16173 if (MEM_P (op)) 16174 { 16175 constrain_operands_cached (insn, reload_completed); 16176 if (which_alternative != -1) 16177 { 16178 const char *constraints = recog_data.constraints[i]; 16179 int alt = which_alternative; 16180 16181 while (*constraints == '=' || *constraints == '+') 16182 constraints++; 16183 while (alt-- > 0) 16184 while (*constraints++ != ',') 16185 ; 16186 /* Skip ignored operands. */ 16187 if (*constraints == 'X') 16188 continue; 16189 } 16190 16191 int len = memory_address_length (XEXP (op, 0), false); 16192 16193 /* Account for segment prefix for non-default addr spaces. */ 16194 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) 16195 len++; 16196 16197 return len; 16198 } 16199 } 16200 return 0; 16201} 16202 16203/* Compute default value for "length_vex" attribute. It includes 16204 2 or 3 byte VEX prefix and 1 opcode byte. */ 16205 16206int 16207ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, 16208 bool has_vex_w) 16209{ 16210 int i; 16211 16212 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 16213 byte VEX prefix. */ 16214 if (!has_0f_opcode || has_vex_w) 16215 return 3 + 1; 16216 16217 /* We can always use 2 byte VEX prefix in 32bit. */ 16218 if (!TARGET_64BIT) 16219 return 2 + 1; 16220 16221 extract_insn_cached (insn); 16222 16223 for (i = recog_data.n_operands - 1; i >= 0; --i) 16224 if (REG_P (recog_data.operand[i])) 16225 { 16226 /* REX.W bit uses 3 byte VEX prefix. */ 16227 if (GET_MODE (recog_data.operand[i]) == DImode 16228 && GENERAL_REG_P (recog_data.operand[i])) 16229 return 3 + 1; 16230 } 16231 else 16232 { 16233 /* REX.X or REX.B bits use 3 byte VEX prefix. */ 16234 if (MEM_P (recog_data.operand[i]) 16235 && x86_extended_reg_mentioned_p (recog_data.operand[i])) 16236 return 3 + 1; 16237 } 16238 16239 return 2 + 1; 16240} 16241 16242 16243static bool 16244ix86_class_likely_spilled_p (reg_class_t); 16245 16246/* Returns true if lhs of insn is HW function argument register and set up 16247 is_spilled to true if it is likely spilled HW register. */ 16248static bool 16249insn_is_function_arg (rtx insn, bool* is_spilled) 16250{ 16251 rtx dst; 16252 16253 if (!NONDEBUG_INSN_P (insn)) 16254 return false; 16255 /* Call instructions are not movable, ignore it. */ 16256 if (CALL_P (insn)) 16257 return false; 16258 insn = PATTERN (insn); 16259 if (GET_CODE (insn) == PARALLEL) 16260 insn = XVECEXP (insn, 0, 0); 16261 if (GET_CODE (insn) != SET) 16262 return false; 16263 dst = SET_DEST (insn); 16264 if (REG_P (dst) && HARD_REGISTER_P (dst) 16265 && ix86_function_arg_regno_p (REGNO (dst))) 16266 { 16267 /* Is it likely spilled HW register? */ 16268 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) 16269 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) 16270 *is_spilled = true; 16271 return true; 16272 } 16273 return false; 16274} 16275 16276/* Add output dependencies for chain of function adjacent arguments if only 16277 there is a move to likely spilled HW register. Return first argument 16278 if at least one dependence was added or NULL otherwise. */ 16279static rtx_insn * 16280add_parameter_dependencies (rtx_insn *call, rtx_insn *head) 16281{ 16282 rtx_insn *insn; 16283 rtx_insn *last = call; 16284 rtx_insn *first_arg = NULL; 16285 bool is_spilled = false; 16286 16287 head = PREV_INSN (head); 16288 16289 /* Find nearest to call argument passing instruction. */ 16290 while (true) 16291 { 16292 last = PREV_INSN (last); 16293 if (last == head) 16294 return NULL; 16295 if (!NONDEBUG_INSN_P (last)) 16296 continue; 16297 if (insn_is_function_arg (last, &is_spilled)) 16298 break; 16299 return NULL; 16300 } 16301 16302 first_arg = last; 16303 while (true) 16304 { 16305 insn = PREV_INSN (last); 16306 if (!INSN_P (insn)) 16307 break; 16308 if (insn == head) 16309 break; 16310 if (!NONDEBUG_INSN_P (insn)) 16311 { 16312 last = insn; 16313 continue; 16314 } 16315 if (insn_is_function_arg (insn, &is_spilled)) 16316 { 16317 /* Add output depdendence between two function arguments if chain 16318 of output arguments contains likely spilled HW registers. */ 16319 if (is_spilled) 16320 add_dependence (first_arg, insn, REG_DEP_OUTPUT); 16321 first_arg = last = insn; 16322 } 16323 else 16324 break; 16325 } 16326 if (!is_spilled) 16327 return NULL; 16328 return first_arg; 16329} 16330 16331/* Add output or anti dependency from insn to first_arg to restrict its code 16332 motion. */ 16333static void 16334avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) 16335{ 16336 rtx set; 16337 rtx tmp; 16338 16339 set = single_set (insn); 16340 if (!set) 16341 return; 16342 tmp = SET_DEST (set); 16343 if (REG_P (tmp)) 16344 { 16345 /* Add output dependency to the first function argument. */ 16346 add_dependence (first_arg, insn, REG_DEP_OUTPUT); 16347 return; 16348 } 16349 /* Add anti dependency. */ 16350 add_dependence (first_arg, insn, REG_DEP_ANTI); 16351} 16352 16353/* Avoid cross block motion of function argument through adding dependency 16354 from the first non-jump instruction in bb. */ 16355static void 16356add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) 16357{ 16358 rtx_insn *insn = BB_END (bb); 16359 16360 while (insn) 16361 { 16362 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) 16363 { 16364 rtx set = single_set (insn); 16365 if (set) 16366 { 16367 avoid_func_arg_motion (arg, insn); 16368 return; 16369 } 16370 } 16371 if (insn == BB_HEAD (bb)) 16372 return; 16373 insn = PREV_INSN (insn); 16374 } 16375} 16376 16377/* Hook for pre-reload schedule - avoid motion of function arguments 16378 passed in likely spilled HW registers. */ 16379static void 16380ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) 16381{ 16382 rtx_insn *insn; 16383 rtx_insn *first_arg = NULL; 16384 if (reload_completed) 16385 return; 16386 while (head != tail && DEBUG_INSN_P (head)) 16387 head = NEXT_INSN (head); 16388 for (insn = tail; insn != head; insn = PREV_INSN (insn)) 16389 if (INSN_P (insn) && CALL_P (insn)) 16390 { 16391 first_arg = add_parameter_dependencies (insn, head); 16392 if (first_arg) 16393 { 16394 /* Add dependee for first argument to predecessors if only 16395 region contains more than one block. */ 16396 basic_block bb = BLOCK_FOR_INSN (insn); 16397 int rgn = CONTAINING_RGN (bb->index); 16398 int nr_blks = RGN_NR_BLOCKS (rgn); 16399 /* Skip trivial regions and region head blocks that can have 16400 predecessors outside of region. */ 16401 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) 16402 { 16403 edge e; 16404 edge_iterator ei; 16405 16406 /* Regions are SCCs with the exception of selective 16407 scheduling with pipelining of outer blocks enabled. 16408 So also check that immediate predecessors of a non-head 16409 block are in the same region. */ 16410 FOR_EACH_EDGE (e, ei, bb->preds) 16411 { 16412 /* Avoid creating of loop-carried dependencies through 16413 using topological ordering in the region. */ 16414 if (rgn == CONTAINING_RGN (e->src->index) 16415 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) 16416 add_dependee_for_func_arg (first_arg, e->src); 16417 } 16418 } 16419 insn = first_arg; 16420 if (insn == head) 16421 break; 16422 } 16423 } 16424 else if (first_arg) 16425 avoid_func_arg_motion (first_arg, insn); 16426} 16427 16428/* Hook for pre-reload schedule - set priority of moves from likely spilled 16429 HW registers to maximum, to schedule them at soon as possible. These are 16430 moves from function argument registers at the top of the function entry 16431 and moves from function return value registers after call. */ 16432static int 16433ix86_adjust_priority (rtx_insn *insn, int priority) 16434{ 16435 rtx set; 16436 16437 if (reload_completed) 16438 return priority; 16439 16440 if (!NONDEBUG_INSN_P (insn)) 16441 return priority; 16442 16443 set = single_set (insn); 16444 if (set) 16445 { 16446 rtx tmp = SET_SRC (set); 16447 if (REG_P (tmp) 16448 && HARD_REGISTER_P (tmp) 16449 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) 16450 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) 16451 return current_sched_info->sched_max_insns_priority; 16452 } 16453 16454 return priority; 16455} 16456 16457/* Prepare for scheduling pass. */ 16458static void 16459ix86_sched_init_global (FILE *, int, int) 16460{ 16461 /* Install scheduling hooks for current CPU. Some of these hooks are used 16462 in time-critical parts of the scheduler, so we only set them up when 16463 they are actually used. */ 16464 switch (ix86_tune) 16465 { 16466 case PROCESSOR_CORE2: 16467 case PROCESSOR_NEHALEM: 16468 case PROCESSOR_SANDYBRIDGE: 16469 case PROCESSOR_HASWELL: 16470 case PROCESSOR_GENERIC: 16471 /* Do not perform multipass scheduling for pre-reload schedule 16472 to save compile time. */ 16473 if (reload_completed) 16474 { 16475 ix86_core2i7_init_hooks (); 16476 break; 16477 } 16478 /* Fall through. */ 16479 default: 16480 targetm.sched.dfa_post_advance_cycle = NULL; 16481 targetm.sched.first_cycle_multipass_init = NULL; 16482 targetm.sched.first_cycle_multipass_begin = NULL; 16483 targetm.sched.first_cycle_multipass_issue = NULL; 16484 targetm.sched.first_cycle_multipass_backtrack = NULL; 16485 targetm.sched.first_cycle_multipass_end = NULL; 16486 targetm.sched.first_cycle_multipass_fini = NULL; 16487 break; 16488 } 16489} 16490 16491 16492/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ 16493 16494static HOST_WIDE_INT 16495ix86_static_rtx_alignment (machine_mode mode) 16496{ 16497 if (mode == DFmode) 16498 return 64; 16499 if (ALIGN_MODE_128 (mode)) 16500 return MAX (128, GET_MODE_ALIGNMENT (mode)); 16501 return GET_MODE_ALIGNMENT (mode); 16502} 16503 16504/* Implement TARGET_CONSTANT_ALIGNMENT. */ 16505 16506static HOST_WIDE_INT 16507ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) 16508{ 16509 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST 16510 || TREE_CODE (exp) == INTEGER_CST) 16511 { 16512 machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); 16513 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); 16514 return MAX (mode_align, align); 16515 } 16516 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 16517 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 16518 return BITS_PER_WORD; 16519 16520 return align; 16521} 16522 16523/* Implement TARGET_EMPTY_RECORD_P. */ 16524 16525static bool 16526ix86_is_empty_record (const_tree type) 16527{ 16528 if (!TARGET_64BIT) 16529 return false; 16530 return default_is_empty_record (type); 16531} 16532 16533/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ 16534 16535static void 16536ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) 16537{ 16538 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 16539 16540 if (!cum->warn_empty) 16541 return; 16542 16543 if (!TYPE_EMPTY_P (type)) 16544 return; 16545 16546 /* Don't warn if the function isn't visible outside of the TU. */ 16547 if (cum->decl && !TREE_PUBLIC (cum->decl)) 16548 return; 16549 16550 const_tree ctx = get_ultimate_context (cum->decl); 16551 if (ctx != NULL_TREE 16552 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) 16553 return; 16554 16555 /* If the actual size of the type is zero, then there is no change 16556 in how objects of this size are passed. */ 16557 if (int_size_in_bytes (type) == 0) 16558 return; 16559 16560 warning (OPT_Wabi, "empty class %qT parameter passing ABI " 16561 "changes in %<-fabi-version=12%> (GCC 8)", type); 16562 16563 /* Only warn once. */ 16564 cum->warn_empty = false; 16565} 16566 16567/* This hook returns name of multilib ABI. */ 16568 16569static const char * 16570ix86_get_multilib_abi_name (void) 16571{ 16572 if (!(TARGET_64BIT_P (ix86_isa_flags))) 16573 return "i386"; 16574 else if (TARGET_X32_P (ix86_isa_flags)) 16575 return "x32"; 16576 else 16577 return "x86_64"; 16578} 16579 16580/* Compute the alignment for a variable for Intel MCU psABI. TYPE is 16581 the data type, and ALIGN is the alignment that the object would 16582 ordinarily have. */ 16583 16584static int 16585iamcu_alignment (tree type, int align) 16586{ 16587 machine_mode mode; 16588 16589 if (align < 32 || TYPE_USER_ALIGN (type)) 16590 return align; 16591 16592 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 16593 bytes. */ 16594 mode = TYPE_MODE (strip_array_types (type)); 16595 switch (GET_MODE_CLASS (mode)) 16596 { 16597 case MODE_INT: 16598 case MODE_COMPLEX_INT: 16599 case MODE_COMPLEX_FLOAT: 16600 case MODE_FLOAT: 16601 case MODE_DECIMAL_FLOAT: 16602 return 32; 16603 default: 16604 return align; 16605 } 16606} 16607 16608/* Compute the alignment for a static variable. 16609 TYPE is the data type, and ALIGN is the alignment that 16610 the object would ordinarily have. The value of this function is used 16611 instead of that alignment to align the object. */ 16612 16613int 16614ix86_data_alignment (tree type, unsigned int align, bool opt) 16615{ 16616 /* GCC 4.8 and earlier used to incorrectly assume this alignment even 16617 for symbols from other compilation units or symbols that don't need 16618 to bind locally. In order to preserve some ABI compatibility with 16619 those compilers, ensure we don't decrease alignment from what we 16620 used to assume. */ 16621 16622 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); 16623 16624 /* A data structure, equal or greater than the size of a cache line 16625 (64 bytes in the Pentium 4 and other recent Intel processors, including 16626 processors based on Intel Core microarchitecture) should be aligned 16627 so that its base address is a multiple of a cache line size. */ 16628 16629 unsigned int max_align 16630 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); 16631 16632 if (max_align < BITS_PER_WORD) 16633 max_align = BITS_PER_WORD; 16634 16635 switch (ix86_align_data_type) 16636 { 16637 case ix86_align_data_type_abi: opt = false; break; 16638 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; 16639 case ix86_align_data_type_cacheline: break; 16640 } 16641 16642 if (TARGET_IAMCU) 16643 align = iamcu_alignment (type, align); 16644 16645 if (opt 16646 && AGGREGATE_TYPE_P (type) 16647 && TYPE_SIZE (type) 16648 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) 16649 { 16650 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat) 16651 && align < max_align_compat) 16652 align = max_align_compat; 16653 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align) 16654 && align < max_align) 16655 align = max_align; 16656 } 16657 16658 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 16659 to 16byte boundary. */ 16660 if (TARGET_64BIT) 16661 { 16662 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) 16663 && TYPE_SIZE (type) 16664 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 16665 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) 16666 && align < 128) 16667 return 128; 16668 } 16669 16670 if (!opt) 16671 return align; 16672 16673 if (TREE_CODE (type) == ARRAY_TYPE) 16674 { 16675 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 16676 return 64; 16677 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 16678 return 128; 16679 } 16680 else if (TREE_CODE (type) == COMPLEX_TYPE) 16681 { 16682 16683 if (TYPE_MODE (type) == DCmode && align < 64) 16684 return 64; 16685 if ((TYPE_MODE (type) == XCmode 16686 || TYPE_MODE (type) == TCmode) && align < 128) 16687 return 128; 16688 } 16689 else if ((TREE_CODE (type) == RECORD_TYPE 16690 || TREE_CODE (type) == UNION_TYPE 16691 || TREE_CODE (type) == QUAL_UNION_TYPE) 16692 && TYPE_FIELDS (type)) 16693 { 16694 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 16695 return 64; 16696 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 16697 return 128; 16698 } 16699 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 16700 || TREE_CODE (type) == INTEGER_TYPE) 16701 { 16702 if (TYPE_MODE (type) == DFmode && align < 64) 16703 return 64; 16704 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 16705 return 128; 16706 } 16707 16708 return align; 16709} 16710 16711/* Compute the alignment for a local variable or a stack slot. EXP is 16712 the data type or decl itself, MODE is the widest mode available and 16713 ALIGN is the alignment that the object would ordinarily have. The 16714 value of this macro is used instead of that alignment to align the 16715 object. */ 16716 16717unsigned int 16718ix86_local_alignment (tree exp, machine_mode mode, 16719 unsigned int align) 16720{ 16721 tree type, decl; 16722 16723 if (exp && DECL_P (exp)) 16724 { 16725 type = TREE_TYPE (exp); 16726 decl = exp; 16727 } 16728 else 16729 { 16730 type = exp; 16731 decl = NULL; 16732 } 16733 16734 /* Don't do dynamic stack realignment for long long objects with 16735 -mpreferred-stack-boundary=2. */ 16736 if (!TARGET_64BIT 16737 && align == 64 16738 && ix86_preferred_stack_boundary < 64 16739 && (mode == DImode || (type && TYPE_MODE (type) == DImode)) 16740 && (!type || !TYPE_USER_ALIGN (type)) 16741 && (!decl || !DECL_USER_ALIGN (decl))) 16742 align = 32; 16743 16744 /* If TYPE is NULL, we are allocating a stack slot for caller-save 16745 register in MODE. We will return the largest alignment of XF 16746 and DF. */ 16747 if (!type) 16748 { 16749 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) 16750 align = GET_MODE_ALIGNMENT (DFmode); 16751 return align; 16752 } 16753 16754 /* Don't increase alignment for Intel MCU psABI. */ 16755 if (TARGET_IAMCU) 16756 return align; 16757 16758 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 16759 to 16byte boundary. Exact wording is: 16760 16761 An array uses the same alignment as its elements, except that a local or 16762 global array variable of length at least 16 bytes or 16763 a C99 variable-length array variable always has alignment of at least 16 bytes. 16764 16765 This was added to allow use of aligned SSE instructions at arrays. This 16766 rule is meant for static storage (where compiler cannot do the analysis 16767 by itself). We follow it for automatic variables only when convenient. 16768 We fully control everything in the function compiled and functions from 16769 other unit cannot rely on the alignment. 16770 16771 Exclude va_list type. It is the common case of local array where 16772 we cannot benefit from the alignment. 16773 16774 TODO: Probably one should optimize for size only when var is not escaping. */ 16775 if (TARGET_64BIT && optimize_function_for_speed_p (cfun) 16776 && TARGET_SSE) 16777 { 16778 if (AGGREGATE_TYPE_P (type) 16779 && (va_list_type_node == NULL_TREE 16780 || (TYPE_MAIN_VARIANT (type) 16781 != TYPE_MAIN_VARIANT (va_list_type_node))) 16782 && TYPE_SIZE (type) 16783 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 16784 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) 16785 && align < 128) 16786 return 128; 16787 } 16788 if (TREE_CODE (type) == ARRAY_TYPE) 16789 { 16790 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 16791 return 64; 16792 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 16793 return 128; 16794 } 16795 else if (TREE_CODE (type) == COMPLEX_TYPE) 16796 { 16797 if (TYPE_MODE (type) == DCmode && align < 64) 16798 return 64; 16799 if ((TYPE_MODE (type) == XCmode 16800 || TYPE_MODE (type) == TCmode) && align < 128) 16801 return 128; 16802 } 16803 else if ((TREE_CODE (type) == RECORD_TYPE 16804 || TREE_CODE (type) == UNION_TYPE 16805 || TREE_CODE (type) == QUAL_UNION_TYPE) 16806 && TYPE_FIELDS (type)) 16807 { 16808 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 16809 return 64; 16810 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 16811 return 128; 16812 } 16813 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 16814 || TREE_CODE (type) == INTEGER_TYPE) 16815 { 16816 16817 if (TYPE_MODE (type) == DFmode && align < 64) 16818 return 64; 16819 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 16820 return 128; 16821 } 16822 return align; 16823} 16824 16825/* Compute the minimum required alignment for dynamic stack realignment 16826 purposes for a local variable, parameter or a stack slot. EXP is 16827 the data type or decl itself, MODE is its mode and ALIGN is the 16828 alignment that the object would ordinarily have. */ 16829 16830unsigned int 16831ix86_minimum_alignment (tree exp, machine_mode mode, 16832 unsigned int align) 16833{ 16834 tree type, decl; 16835 16836 if (exp && DECL_P (exp)) 16837 { 16838 type = TREE_TYPE (exp); 16839 decl = exp; 16840 } 16841 else 16842 { 16843 type = exp; 16844 decl = NULL; 16845 } 16846 16847 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) 16848 return align; 16849 16850 /* Don't do dynamic stack realignment for long long objects with 16851 -mpreferred-stack-boundary=2. */ 16852 if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) 16853 && (!type || !TYPE_USER_ALIGN (type)) 16854 && (!decl || !DECL_USER_ALIGN (decl))) 16855 { 16856 gcc_checking_assert (!TARGET_STV); 16857 return 32; 16858 } 16859 16860 return align; 16861} 16862 16863/* Find a location for the static chain incoming to a nested function. 16864 This is a register, unless all free registers are used by arguments. */ 16865 16866static rtx 16867ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) 16868{ 16869 unsigned regno; 16870 16871 if (TARGET_64BIT) 16872 { 16873 /* We always use R10 in 64-bit mode. */ 16874 regno = R10_REG; 16875 } 16876 else 16877 { 16878 const_tree fntype, fndecl; 16879 unsigned int ccvt; 16880 16881 /* By default in 32-bit mode we use ECX to pass the static chain. */ 16882 regno = CX_REG; 16883 16884 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) 16885 { 16886 fntype = TREE_TYPE (fndecl_or_type); 16887 fndecl = fndecl_or_type; 16888 } 16889 else 16890 { 16891 fntype = fndecl_or_type; 16892 fndecl = NULL; 16893 } 16894 16895 ccvt = ix86_get_callcvt (fntype); 16896 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) 16897 { 16898 /* Fastcall functions use ecx/edx for arguments, which leaves 16899 us with EAX for the static chain. 16900 Thiscall functions use ecx for arguments, which also 16901 leaves us with EAX for the static chain. */ 16902 regno = AX_REG; 16903 } 16904 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) 16905 { 16906 /* Thiscall functions use ecx for arguments, which leaves 16907 us with EAX and EDX for the static chain. 16908 We are using for abi-compatibility EAX. */ 16909 regno = AX_REG; 16910 } 16911 else if (ix86_function_regparm (fntype, fndecl) == 3) 16912 { 16913 /* For regparm 3, we have no free call-clobbered registers in 16914 which to store the static chain. In order to implement this, 16915 we have the trampoline push the static chain to the stack. 16916 However, we can't push a value below the return address when 16917 we call the nested function directly, so we have to use an 16918 alternate entry point. For this we use ESI, and have the 16919 alternate entry point push ESI, so that things appear the 16920 same once we're executing the nested function. */ 16921 if (incoming_p) 16922 { 16923 if (fndecl == current_function_decl 16924 && !ix86_static_chain_on_stack) 16925 { 16926 gcc_assert (!reload_completed); 16927 ix86_static_chain_on_stack = true; 16928 } 16929 return gen_frame_mem (SImode, 16930 plus_constant (Pmode, 16931 arg_pointer_rtx, -8)); 16932 } 16933 regno = SI_REG; 16934 } 16935 } 16936 16937 return gen_rtx_REG (Pmode, regno); 16938} 16939 16940/* Emit RTL insns to initialize the variable parts of a trampoline. 16941 FNDECL is the decl of the target address; M_TRAMP is a MEM for 16942 the trampoline, and CHAIN_VALUE is an RTX for the static chain 16943 to be passed to the target function. */ 16944 16945static void 16946ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) 16947{ 16948 rtx mem, fnaddr; 16949 int opcode; 16950 int offset = 0; 16951 bool need_endbr = (flag_cf_protection & CF_BRANCH); 16952 16953 fnaddr = XEXP (DECL_RTL (fndecl), 0); 16954 16955 if (TARGET_64BIT) 16956 { 16957 int size; 16958 16959 if (need_endbr) 16960 { 16961 /* Insert ENDBR64. */ 16962 mem = adjust_address (m_tramp, SImode, offset); 16963 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); 16964 offset += 4; 16965 } 16966 16967 /* Load the function address to r11. Try to load address using 16968 the shorter movl instead of movabs. We may want to support 16969 movq for kernel mode, but kernel does not use trampolines at 16970 the moment. FNADDR is a 32bit address and may not be in 16971 DImode when ptr_mode == SImode. Always use movl in this 16972 case. */ 16973 if (ptr_mode == SImode 16974 || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 16975 { 16976 fnaddr = copy_addr_to_reg (fnaddr); 16977 16978 mem = adjust_address (m_tramp, HImode, offset); 16979 emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); 16980 16981 mem = adjust_address (m_tramp, SImode, offset + 2); 16982 emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); 16983 offset += 6; 16984 } 16985 else 16986 { 16987 mem = adjust_address (m_tramp, HImode, offset); 16988 emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); 16989 16990 mem = adjust_address (m_tramp, DImode, offset + 2); 16991 emit_move_insn (mem, fnaddr); 16992 offset += 10; 16993 } 16994 16995 /* Load static chain using movabs to r10. Use the shorter movl 16996 instead of movabs when ptr_mode == SImode. */ 16997 if (ptr_mode == SImode) 16998 { 16999 opcode = 0xba41; 17000 size = 6; 17001 } 17002 else 17003 { 17004 opcode = 0xba49; 17005 size = 10; 17006 } 17007 17008 mem = adjust_address (m_tramp, HImode, offset); 17009 emit_move_insn (mem, gen_int_mode (opcode, HImode)); 17010 17011 mem = adjust_address (m_tramp, ptr_mode, offset + 2); 17012 emit_move_insn (mem, chain_value); 17013 offset += size; 17014 17015 /* Jump to r11; the last (unused) byte is a nop, only there to 17016 pad the write out to a single 32-bit store. */ 17017 mem = adjust_address (m_tramp, SImode, offset); 17018 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); 17019 offset += 4; 17020 } 17021 else 17022 { 17023 rtx disp, chain; 17024 17025 /* Depending on the static chain location, either load a register 17026 with a constant, or push the constant to the stack. All of the 17027 instructions are the same size. */ 17028 chain = ix86_static_chain (fndecl, true); 17029 if (REG_P (chain)) 17030 { 17031 switch (REGNO (chain)) 17032 { 17033 case AX_REG: 17034 opcode = 0xb8; break; 17035 case CX_REG: 17036 opcode = 0xb9; break; 17037 default: 17038 gcc_unreachable (); 17039 } 17040 } 17041 else 17042 opcode = 0x68; 17043 17044 if (need_endbr) 17045 { 17046 /* Insert ENDBR32. */ 17047 mem = adjust_address (m_tramp, SImode, offset); 17048 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); 17049 offset += 4; 17050 } 17051 17052 mem = adjust_address (m_tramp, QImode, offset); 17053 emit_move_insn (mem, gen_int_mode (opcode, QImode)); 17054 17055 mem = adjust_address (m_tramp, SImode, offset + 1); 17056 emit_move_insn (mem, chain_value); 17057 offset += 5; 17058 17059 mem = adjust_address (m_tramp, QImode, offset); 17060 emit_move_insn (mem, gen_int_mode (0xe9, QImode)); 17061 17062 mem = adjust_address (m_tramp, SImode, offset + 1); 17063 17064 /* Compute offset from the end of the jmp to the target function. 17065 In the case in which the trampoline stores the static chain on 17066 the stack, we need to skip the first insn which pushes the 17067 (call-saved) register static chain; this push is 1 byte. */ 17068 offset += 5; 17069 int skip = MEM_P (chain) ? 1 : 0; 17070 /* Skip ENDBR32 at the entry of the target function. */ 17071 if (need_endbr 17072 && !cgraph_node::get (fndecl)->only_called_directly_p ()) 17073 skip += 4; 17074 disp = expand_binop (SImode, sub_optab, fnaddr, 17075 plus_constant (Pmode, XEXP (m_tramp, 0), 17076 offset - skip), 17077 NULL_RTX, 1, OPTAB_DIRECT); 17078 emit_move_insn (mem, disp); 17079 } 17080 17081 gcc_assert (offset <= TRAMPOLINE_SIZE); 17082 17083#ifdef HAVE_ENABLE_EXECUTE_STACK 17084#ifdef CHECK_EXECUTE_STACK_ENABLED 17085 if (CHECK_EXECUTE_STACK_ENABLED) 17086#endif 17087 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 17088 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 17089#endif 17090} 17091 17092static bool 17093ix86_allocate_stack_slots_for_args (void) 17094{ 17095 /* Naked functions should not allocate stack slots for arguments. */ 17096 return !ix86_function_naked (current_function_decl); 17097} 17098 17099static bool 17100ix86_warn_func_return (tree decl) 17101{ 17102 /* Naked functions are implemented entirely in assembly, including the 17103 return sequence, so suppress warnings about this. */ 17104 return !ix86_function_naked (decl); 17105} 17106 17107/* Return the shift count of a vector by scalar shift builtin second argument 17108 ARG1. */ 17109static tree 17110ix86_vector_shift_count (tree arg1) 17111{ 17112 if (tree_fits_uhwi_p (arg1)) 17113 return arg1; 17114 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) 17115 { 17116 /* The count argument is weird, passed in as various 128-bit 17117 (or 64-bit) vectors, the low 64 bits from it are the count. */ 17118 unsigned char buf[16]; 17119 int len = native_encode_expr (arg1, buf, 16); 17120 if (len == 0) 17121 return NULL_TREE; 17122 tree t = native_interpret_expr (uint64_type_node, buf, len); 17123 if (t && tree_fits_uhwi_p (t)) 17124 return t; 17125 } 17126 return NULL_TREE; 17127} 17128 17129static tree 17130ix86_fold_builtin (tree fndecl, int n_args, 17131 tree *args, bool ignore ATTRIBUTE_UNUSED) 17132{ 17133 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) 17134 { 17135 enum ix86_builtins fn_code 17136 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); 17137 enum rtx_code rcode; 17138 bool is_vshift; 17139 unsigned HOST_WIDE_INT mask; 17140 17141 switch (fn_code) 17142 { 17143 case IX86_BUILTIN_CPU_IS: 17144 case IX86_BUILTIN_CPU_SUPPORTS: 17145 gcc_assert (n_args == 1); 17146 return fold_builtin_cpu (fndecl, args); 17147 17148 case IX86_BUILTIN_NANQ: 17149 case IX86_BUILTIN_NANSQ: 17150 { 17151 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 17152 const char *str = c_getstr (*args); 17153 int quiet = fn_code == IX86_BUILTIN_NANQ; 17154 REAL_VALUE_TYPE real; 17155 17156 if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) 17157 return build_real (type, real); 17158 return NULL_TREE; 17159 } 17160 17161 case IX86_BUILTIN_INFQ: 17162 case IX86_BUILTIN_HUGE_VALQ: 17163 { 17164 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 17165 REAL_VALUE_TYPE inf; 17166 real_inf (&inf); 17167 return build_real (type, inf); 17168 } 17169 17170 case IX86_BUILTIN_TZCNT16: 17171 case IX86_BUILTIN_CTZS: 17172 case IX86_BUILTIN_TZCNT32: 17173 case IX86_BUILTIN_TZCNT64: 17174 gcc_assert (n_args == 1); 17175 if (TREE_CODE (args[0]) == INTEGER_CST) 17176 { 17177 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 17178 tree arg = args[0]; 17179 if (fn_code == IX86_BUILTIN_TZCNT16 17180 || fn_code == IX86_BUILTIN_CTZS) 17181 arg = fold_convert (short_unsigned_type_node, arg); 17182 if (integer_zerop (arg)) 17183 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); 17184 else 17185 return fold_const_call (CFN_CTZ, type, arg); 17186 } 17187 break; 17188 17189 case IX86_BUILTIN_LZCNT16: 17190 case IX86_BUILTIN_CLZS: 17191 case IX86_BUILTIN_LZCNT32: 17192 case IX86_BUILTIN_LZCNT64: 17193 gcc_assert (n_args == 1); 17194 if (TREE_CODE (args[0]) == INTEGER_CST) 17195 { 17196 tree type = TREE_TYPE (TREE_TYPE (fndecl)); 17197 tree arg = args[0]; 17198 if (fn_code == IX86_BUILTIN_LZCNT16 17199 || fn_code == IX86_BUILTIN_CLZS) 17200 arg = fold_convert (short_unsigned_type_node, arg); 17201 if (integer_zerop (arg)) 17202 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); 17203 else 17204 return fold_const_call (CFN_CLZ, type, arg); 17205 } 17206 break; 17207 17208 case IX86_BUILTIN_BEXTR32: 17209 case IX86_BUILTIN_BEXTR64: 17210 case IX86_BUILTIN_BEXTRI32: 17211 case IX86_BUILTIN_BEXTRI64: 17212 gcc_assert (n_args == 2); 17213 if (tree_fits_uhwi_p (args[1])) 17214 { 17215 unsigned HOST_WIDE_INT res = 0; 17216 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); 17217 unsigned int start = tree_to_uhwi (args[1]); 17218 unsigned int len = (start & 0xff00) >> 8; 17219 start &= 0xff; 17220 if (start >= prec || len == 0) 17221 res = 0; 17222 else if (!tree_fits_uhwi_p (args[0])) 17223 break; 17224 else 17225 res = tree_to_uhwi (args[0]) >> start; 17226 if (len > prec) 17227 len = prec; 17228 if (len < HOST_BITS_PER_WIDE_INT) 17229 res &= (HOST_WIDE_INT_1U << len) - 1; 17230 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); 17231 } 17232 break; 17233 17234 case IX86_BUILTIN_BZHI32: 17235 case IX86_BUILTIN_BZHI64: 17236 gcc_assert (n_args == 2); 17237 if (tree_fits_uhwi_p (args[1])) 17238 { 17239 unsigned int idx = tree_to_uhwi (args[1]) & 0xff; 17240 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) 17241 return args[0]; 17242 if (idx == 0) 17243 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); 17244 if (!tree_fits_uhwi_p (args[0])) 17245 break; 17246 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); 17247 res &= ~(HOST_WIDE_INT_M1U << idx); 17248 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); 17249 } 17250 break; 17251 17252 case IX86_BUILTIN_PDEP32: 17253 case IX86_BUILTIN_PDEP64: 17254 gcc_assert (n_args == 2); 17255 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) 17256 { 17257 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); 17258 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); 17259 unsigned HOST_WIDE_INT res = 0; 17260 unsigned HOST_WIDE_INT m, k = 1; 17261 for (m = 1; m; m <<= 1) 17262 if ((mask & m) != 0) 17263 { 17264 if ((src & k) != 0) 17265 res |= m; 17266 k <<= 1; 17267 } 17268 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); 17269 } 17270 break; 17271 17272 case IX86_BUILTIN_PEXT32: 17273 case IX86_BUILTIN_PEXT64: 17274 gcc_assert (n_args == 2); 17275 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) 17276 { 17277 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); 17278 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); 17279 unsigned HOST_WIDE_INT res = 0; 17280 unsigned HOST_WIDE_INT m, k = 1; 17281 for (m = 1; m; m <<= 1) 17282 if ((mask & m) != 0) 17283 { 17284 if ((src & m) != 0) 17285 res |= k; 17286 k <<= 1; 17287 } 17288 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); 17289 } 17290 break; 17291 17292 case IX86_BUILTIN_MOVMSKPS: 17293 case IX86_BUILTIN_PMOVMSKB: 17294 case IX86_BUILTIN_MOVMSKPD: 17295 case IX86_BUILTIN_PMOVMSKB128: 17296 case IX86_BUILTIN_MOVMSKPD256: 17297 case IX86_BUILTIN_MOVMSKPS256: 17298 case IX86_BUILTIN_PMOVMSKB256: 17299 gcc_assert (n_args == 1); 17300 if (TREE_CODE (args[0]) == VECTOR_CST) 17301 { 17302 HOST_WIDE_INT res = 0; 17303 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) 17304 { 17305 tree e = VECTOR_CST_ELT (args[0], i); 17306 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) 17307 { 17308 if (wi::neg_p (wi::to_wide (e))) 17309 res |= HOST_WIDE_INT_1 << i; 17310 } 17311 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) 17312 { 17313 if (TREE_REAL_CST (e).sign) 17314 res |= HOST_WIDE_INT_1 << i; 17315 } 17316 else 17317 return NULL_TREE; 17318 } 17319 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); 17320 } 17321 break; 17322 17323 case IX86_BUILTIN_PSLLD: 17324 case IX86_BUILTIN_PSLLD128: 17325 case IX86_BUILTIN_PSLLD128_MASK: 17326 case IX86_BUILTIN_PSLLD256: 17327 case IX86_BUILTIN_PSLLD256_MASK: 17328 case IX86_BUILTIN_PSLLD512: 17329 case IX86_BUILTIN_PSLLDI: 17330 case IX86_BUILTIN_PSLLDI128: 17331 case IX86_BUILTIN_PSLLDI128_MASK: 17332 case IX86_BUILTIN_PSLLDI256: 17333 case IX86_BUILTIN_PSLLDI256_MASK: 17334 case IX86_BUILTIN_PSLLDI512: 17335 case IX86_BUILTIN_PSLLQ: 17336 case IX86_BUILTIN_PSLLQ128: 17337 case IX86_BUILTIN_PSLLQ128_MASK: 17338 case IX86_BUILTIN_PSLLQ256: 17339 case IX86_BUILTIN_PSLLQ256_MASK: 17340 case IX86_BUILTIN_PSLLQ512: 17341 case IX86_BUILTIN_PSLLQI: 17342 case IX86_BUILTIN_PSLLQI128: 17343 case IX86_BUILTIN_PSLLQI128_MASK: 17344 case IX86_BUILTIN_PSLLQI256: 17345 case IX86_BUILTIN_PSLLQI256_MASK: 17346 case IX86_BUILTIN_PSLLQI512: 17347 case IX86_BUILTIN_PSLLW: 17348 case IX86_BUILTIN_PSLLW128: 17349 case IX86_BUILTIN_PSLLW128_MASK: 17350 case IX86_BUILTIN_PSLLW256: 17351 case IX86_BUILTIN_PSLLW256_MASK: 17352 case IX86_BUILTIN_PSLLW512_MASK: 17353 case IX86_BUILTIN_PSLLWI: 17354 case IX86_BUILTIN_PSLLWI128: 17355 case IX86_BUILTIN_PSLLWI128_MASK: 17356 case IX86_BUILTIN_PSLLWI256: 17357 case IX86_BUILTIN_PSLLWI256_MASK: 17358 case IX86_BUILTIN_PSLLWI512_MASK: 17359 rcode = ASHIFT; 17360 is_vshift = false; 17361 goto do_shift; 17362 case IX86_BUILTIN_PSRAD: 17363 case IX86_BUILTIN_PSRAD128: 17364 case IX86_BUILTIN_PSRAD128_MASK: 17365 case IX86_BUILTIN_PSRAD256: 17366 case IX86_BUILTIN_PSRAD256_MASK: 17367 case IX86_BUILTIN_PSRAD512: 17368 case IX86_BUILTIN_PSRADI: 17369 case IX86_BUILTIN_PSRADI128: 17370 case IX86_BUILTIN_PSRADI128_MASK: 17371 case IX86_BUILTIN_PSRADI256: 17372 case IX86_BUILTIN_PSRADI256_MASK: 17373 case IX86_BUILTIN_PSRADI512: 17374 case IX86_BUILTIN_PSRAQ128_MASK: 17375 case IX86_BUILTIN_PSRAQ256_MASK: 17376 case IX86_BUILTIN_PSRAQ512: 17377 case IX86_BUILTIN_PSRAQI128_MASK: 17378 case IX86_BUILTIN_PSRAQI256_MASK: 17379 case IX86_BUILTIN_PSRAQI512: 17380 case IX86_BUILTIN_PSRAW: 17381 case IX86_BUILTIN_PSRAW128: 17382 case IX86_BUILTIN_PSRAW128_MASK: 17383 case IX86_BUILTIN_PSRAW256: 17384 case IX86_BUILTIN_PSRAW256_MASK: 17385 case IX86_BUILTIN_PSRAW512: 17386 case IX86_BUILTIN_PSRAWI: 17387 case IX86_BUILTIN_PSRAWI128: 17388 case IX86_BUILTIN_PSRAWI128_MASK: 17389 case IX86_BUILTIN_PSRAWI256: 17390 case IX86_BUILTIN_PSRAWI256_MASK: 17391 case IX86_BUILTIN_PSRAWI512: 17392 rcode = ASHIFTRT; 17393 is_vshift = false; 17394 goto do_shift; 17395 case IX86_BUILTIN_PSRLD: 17396 case IX86_BUILTIN_PSRLD128: 17397 case IX86_BUILTIN_PSRLD128_MASK: 17398 case IX86_BUILTIN_PSRLD256: 17399 case IX86_BUILTIN_PSRLD256_MASK: 17400 case IX86_BUILTIN_PSRLD512: 17401 case IX86_BUILTIN_PSRLDI: 17402 case IX86_BUILTIN_PSRLDI128: 17403 case IX86_BUILTIN_PSRLDI128_MASK: 17404 case IX86_BUILTIN_PSRLDI256: 17405 case IX86_BUILTIN_PSRLDI256_MASK: 17406 case IX86_BUILTIN_PSRLDI512: 17407 case IX86_BUILTIN_PSRLQ: 17408 case IX86_BUILTIN_PSRLQ128: 17409 case IX86_BUILTIN_PSRLQ128_MASK: 17410 case IX86_BUILTIN_PSRLQ256: 17411 case IX86_BUILTIN_PSRLQ256_MASK: 17412 case IX86_BUILTIN_PSRLQ512: 17413 case IX86_BUILTIN_PSRLQI: 17414 case IX86_BUILTIN_PSRLQI128: 17415 case IX86_BUILTIN_PSRLQI128_MASK: 17416 case IX86_BUILTIN_PSRLQI256: 17417 case IX86_BUILTIN_PSRLQI256_MASK: 17418 case IX86_BUILTIN_PSRLQI512: 17419 case IX86_BUILTIN_PSRLW: 17420 case IX86_BUILTIN_PSRLW128: 17421 case IX86_BUILTIN_PSRLW128_MASK: 17422 case IX86_BUILTIN_PSRLW256: 17423 case IX86_BUILTIN_PSRLW256_MASK: 17424 case IX86_BUILTIN_PSRLW512: 17425 case IX86_BUILTIN_PSRLWI: 17426 case IX86_BUILTIN_PSRLWI128: 17427 case IX86_BUILTIN_PSRLWI128_MASK: 17428 case IX86_BUILTIN_PSRLWI256: 17429 case IX86_BUILTIN_PSRLWI256_MASK: 17430 case IX86_BUILTIN_PSRLWI512: 17431 rcode = LSHIFTRT; 17432 is_vshift = false; 17433 goto do_shift; 17434 case IX86_BUILTIN_PSLLVV16HI: 17435 case IX86_BUILTIN_PSLLVV16SI: 17436 case IX86_BUILTIN_PSLLVV2DI: 17437 case IX86_BUILTIN_PSLLVV2DI_MASK: 17438 case IX86_BUILTIN_PSLLVV32HI: 17439 case IX86_BUILTIN_PSLLVV4DI: 17440 case IX86_BUILTIN_PSLLVV4DI_MASK: 17441 case IX86_BUILTIN_PSLLVV4SI: 17442 case IX86_BUILTIN_PSLLVV4SI_MASK: 17443 case IX86_BUILTIN_PSLLVV8DI: 17444 case IX86_BUILTIN_PSLLVV8HI: 17445 case IX86_BUILTIN_PSLLVV8SI: 17446 case IX86_BUILTIN_PSLLVV8SI_MASK: 17447 rcode = ASHIFT; 17448 is_vshift = true; 17449 goto do_shift; 17450 case IX86_BUILTIN_PSRAVQ128: 17451 case IX86_BUILTIN_PSRAVQ256: 17452 case IX86_BUILTIN_PSRAVV16HI: 17453 case IX86_BUILTIN_PSRAVV16SI: 17454 case IX86_BUILTIN_PSRAVV32HI: 17455 case IX86_BUILTIN_PSRAVV4SI: 17456 case IX86_BUILTIN_PSRAVV4SI_MASK: 17457 case IX86_BUILTIN_PSRAVV8DI: 17458 case IX86_BUILTIN_PSRAVV8HI: 17459 case IX86_BUILTIN_PSRAVV8SI: 17460 case IX86_BUILTIN_PSRAVV8SI_MASK: 17461 rcode = ASHIFTRT; 17462 is_vshift = true; 17463 goto do_shift; 17464 case IX86_BUILTIN_PSRLVV16HI: 17465 case IX86_BUILTIN_PSRLVV16SI: 17466 case IX86_BUILTIN_PSRLVV2DI: 17467 case IX86_BUILTIN_PSRLVV2DI_MASK: 17468 case IX86_BUILTIN_PSRLVV32HI: 17469 case IX86_BUILTIN_PSRLVV4DI: 17470 case IX86_BUILTIN_PSRLVV4DI_MASK: 17471 case IX86_BUILTIN_PSRLVV4SI: 17472 case IX86_BUILTIN_PSRLVV4SI_MASK: 17473 case IX86_BUILTIN_PSRLVV8DI: 17474 case IX86_BUILTIN_PSRLVV8HI: 17475 case IX86_BUILTIN_PSRLVV8SI: 17476 case IX86_BUILTIN_PSRLVV8SI_MASK: 17477 rcode = LSHIFTRT; 17478 is_vshift = true; 17479 goto do_shift; 17480 17481 do_shift: 17482 gcc_assert (n_args >= 2); 17483 if (TREE_CODE (args[0]) != VECTOR_CST) 17484 break; 17485 mask = HOST_WIDE_INT_M1U; 17486 if (n_args > 2) 17487 { 17488 /* This is masked shift. */ 17489 if (!tree_fits_uhwi_p (args[n_args - 1]) 17490 || TREE_SIDE_EFFECTS (args[n_args - 2])) 17491 break; 17492 mask = tree_to_uhwi (args[n_args - 1]); 17493 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); 17494 mask |= HOST_WIDE_INT_M1U << elems; 17495 if (mask != HOST_WIDE_INT_M1U 17496 && TREE_CODE (args[n_args - 2]) != VECTOR_CST) 17497 break; 17498 if (mask == (HOST_WIDE_INT_M1U << elems)) 17499 return args[n_args - 2]; 17500 } 17501 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) 17502 break; 17503 if (tree tem = (is_vshift ? integer_one_node 17504 : ix86_vector_shift_count (args[1]))) 17505 { 17506 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); 17507 unsigned HOST_WIDE_INT prec 17508 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); 17509 if (count == 0 && mask == HOST_WIDE_INT_M1U) 17510 return args[0]; 17511 if (count >= prec) 17512 { 17513 if (rcode == ASHIFTRT) 17514 count = prec - 1; 17515 else if (mask == HOST_WIDE_INT_M1U) 17516 return build_zero_cst (TREE_TYPE (args[0])); 17517 } 17518 tree countt = NULL_TREE; 17519 if (!is_vshift) 17520 { 17521 if (count >= prec) 17522 countt = integer_zero_node; 17523 else 17524 countt = build_int_cst (integer_type_node, count); 17525 } 17526 tree_vector_builder builder; 17527 if (mask != HOST_WIDE_INT_M1U || is_vshift) 17528 builder.new_vector (TREE_TYPE (args[0]), 17529 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), 17530 1); 17531 else 17532 builder.new_unary_operation (TREE_TYPE (args[0]), args[0], 17533 false); 17534 unsigned int cnt = builder.encoded_nelts (); 17535 for (unsigned int i = 0; i < cnt; ++i) 17536 { 17537 tree elt = VECTOR_CST_ELT (args[0], i); 17538 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) 17539 return NULL_TREE; 17540 tree type = TREE_TYPE (elt); 17541 if (rcode == LSHIFTRT) 17542 elt = fold_convert (unsigned_type_for (type), elt); 17543 if (is_vshift) 17544 { 17545 countt = VECTOR_CST_ELT (args[1], i); 17546 if (TREE_CODE (countt) != INTEGER_CST 17547 || TREE_OVERFLOW (countt)) 17548 return NULL_TREE; 17549 if (wi::neg_p (wi::to_wide (countt)) 17550 || wi::to_widest (countt) >= prec) 17551 { 17552 if (rcode == ASHIFTRT) 17553 countt = build_int_cst (TREE_TYPE (countt), 17554 prec - 1); 17555 else 17556 { 17557 elt = build_zero_cst (TREE_TYPE (elt)); 17558 countt = build_zero_cst (TREE_TYPE (countt)); 17559 } 17560 } 17561 } 17562 else if (count >= prec) 17563 elt = build_zero_cst (TREE_TYPE (elt)); 17564 elt = const_binop (rcode == ASHIFT 17565 ? LSHIFT_EXPR : RSHIFT_EXPR, 17566 TREE_TYPE (elt), elt, countt); 17567 if (!elt || TREE_CODE (elt) != INTEGER_CST) 17568 return NULL_TREE; 17569 if (rcode == LSHIFTRT) 17570 elt = fold_convert (type, elt); 17571 if ((mask & (HOST_WIDE_INT_1U << i)) == 0) 17572 { 17573 elt = VECTOR_CST_ELT (args[n_args - 2], i); 17574 if (TREE_CODE (elt) != INTEGER_CST 17575 || TREE_OVERFLOW (elt)) 17576 return NULL_TREE; 17577 } 17578 builder.quick_push (elt); 17579 } 17580 return builder.build (); 17581 } 17582 break; 17583 17584 default: 17585 break; 17586 } 17587 } 17588 17589#ifdef SUBTARGET_FOLD_BUILTIN 17590 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); 17591#endif 17592 17593 return NULL_TREE; 17594} 17595 17596/* Fold a MD builtin (use ix86_fold_builtin for folding into 17597 constant) in GIMPLE. */ 17598 17599bool 17600ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) 17601{ 17602 gimple *stmt = gsi_stmt (*gsi); 17603 tree fndecl = gimple_call_fndecl (stmt); 17604 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); 17605 int n_args = gimple_call_num_args (stmt); 17606 enum ix86_builtins fn_code 17607 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); 17608 tree decl = NULL_TREE; 17609 tree arg0, arg1, arg2; 17610 enum rtx_code rcode; 17611 unsigned HOST_WIDE_INT count; 17612 bool is_vshift; 17613 17614 switch (fn_code) 17615 { 17616 case IX86_BUILTIN_TZCNT32: 17617 decl = builtin_decl_implicit (BUILT_IN_CTZ); 17618 goto fold_tzcnt_lzcnt; 17619 17620 case IX86_BUILTIN_TZCNT64: 17621 decl = builtin_decl_implicit (BUILT_IN_CTZLL); 17622 goto fold_tzcnt_lzcnt; 17623 17624 case IX86_BUILTIN_LZCNT32: 17625 decl = builtin_decl_implicit (BUILT_IN_CLZ); 17626 goto fold_tzcnt_lzcnt; 17627 17628 case IX86_BUILTIN_LZCNT64: 17629 decl = builtin_decl_implicit (BUILT_IN_CLZLL); 17630 goto fold_tzcnt_lzcnt; 17631 17632 fold_tzcnt_lzcnt: 17633 gcc_assert (n_args == 1); 17634 arg0 = gimple_call_arg (stmt, 0); 17635 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) 17636 { 17637 int prec = TYPE_PRECISION (TREE_TYPE (arg0)); 17638 /* If arg0 is provably non-zero, optimize into generic 17639 __builtin_c[tl]z{,ll} function the middle-end handles 17640 better. */ 17641 if (!expr_not_equal_to (arg0, wi::zero (prec))) 17642 return false; 17643 17644 location_t loc = gimple_location (stmt); 17645 gimple *g = gimple_build_call (decl, 1, arg0); 17646 gimple_set_location (g, loc); 17647 tree lhs = make_ssa_name (integer_type_node); 17648 gimple_call_set_lhs (g, lhs); 17649 gsi_insert_before (gsi, g, GSI_SAME_STMT); 17650 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); 17651 gimple_set_location (g, loc); 17652 gsi_replace (gsi, g, false); 17653 return true; 17654 } 17655 break; 17656 17657 case IX86_BUILTIN_BZHI32: 17658 case IX86_BUILTIN_BZHI64: 17659 gcc_assert (n_args == 2); 17660 arg1 = gimple_call_arg (stmt, 1); 17661 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt)) 17662 { 17663 unsigned int idx = tree_to_uhwi (arg1) & 0xff; 17664 arg0 = gimple_call_arg (stmt, 0); 17665 if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) 17666 break; 17667 location_t loc = gimple_location (stmt); 17668 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); 17669 gimple_set_location (g, loc); 17670 gsi_replace (gsi, g, false); 17671 return true; 17672 } 17673 break; 17674 17675 case IX86_BUILTIN_PDEP32: 17676 case IX86_BUILTIN_PDEP64: 17677 case IX86_BUILTIN_PEXT32: 17678 case IX86_BUILTIN_PEXT64: 17679 gcc_assert (n_args == 2); 17680 arg1 = gimple_call_arg (stmt, 1); 17681 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt)) 17682 { 17683 location_t loc = gimple_location (stmt); 17684 arg0 = gimple_call_arg (stmt, 0); 17685 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); 17686 gimple_set_location (g, loc); 17687 gsi_replace (gsi, g, false); 17688 return true; 17689 } 17690 break; 17691 17692 case IX86_BUILTIN_PSLLD: 17693 case IX86_BUILTIN_PSLLD128: 17694 case IX86_BUILTIN_PSLLD128_MASK: 17695 case IX86_BUILTIN_PSLLD256: 17696 case IX86_BUILTIN_PSLLD256_MASK: 17697 case IX86_BUILTIN_PSLLD512: 17698 case IX86_BUILTIN_PSLLDI: 17699 case IX86_BUILTIN_PSLLDI128: 17700 case IX86_BUILTIN_PSLLDI128_MASK: 17701 case IX86_BUILTIN_PSLLDI256: 17702 case IX86_BUILTIN_PSLLDI256_MASK: 17703 case IX86_BUILTIN_PSLLDI512: 17704 case IX86_BUILTIN_PSLLQ: 17705 case IX86_BUILTIN_PSLLQ128: 17706 case IX86_BUILTIN_PSLLQ128_MASK: 17707 case IX86_BUILTIN_PSLLQ256: 17708 case IX86_BUILTIN_PSLLQ256_MASK: 17709 case IX86_BUILTIN_PSLLQ512: 17710 case IX86_BUILTIN_PSLLQI: 17711 case IX86_BUILTIN_PSLLQI128: 17712 case IX86_BUILTIN_PSLLQI128_MASK: 17713 case IX86_BUILTIN_PSLLQI256: 17714 case IX86_BUILTIN_PSLLQI256_MASK: 17715 case IX86_BUILTIN_PSLLQI512: 17716 case IX86_BUILTIN_PSLLW: 17717 case IX86_BUILTIN_PSLLW128: 17718 case IX86_BUILTIN_PSLLW128_MASK: 17719 case IX86_BUILTIN_PSLLW256: 17720 case IX86_BUILTIN_PSLLW256_MASK: 17721 case IX86_BUILTIN_PSLLW512_MASK: 17722 case IX86_BUILTIN_PSLLWI: 17723 case IX86_BUILTIN_PSLLWI128: 17724 case IX86_BUILTIN_PSLLWI128_MASK: 17725 case IX86_BUILTIN_PSLLWI256: 17726 case IX86_BUILTIN_PSLLWI256_MASK: 17727 case IX86_BUILTIN_PSLLWI512_MASK: 17728 rcode = ASHIFT; 17729 is_vshift = false; 17730 goto do_shift; 17731 case IX86_BUILTIN_PSRAD: 17732 case IX86_BUILTIN_PSRAD128: 17733 case IX86_BUILTIN_PSRAD128_MASK: 17734 case IX86_BUILTIN_PSRAD256: 17735 case IX86_BUILTIN_PSRAD256_MASK: 17736 case IX86_BUILTIN_PSRAD512: 17737 case IX86_BUILTIN_PSRADI: 17738 case IX86_BUILTIN_PSRADI128: 17739 case IX86_BUILTIN_PSRADI128_MASK: 17740 case IX86_BUILTIN_PSRADI256: 17741 case IX86_BUILTIN_PSRADI256_MASK: 17742 case IX86_BUILTIN_PSRADI512: 17743 case IX86_BUILTIN_PSRAQ128_MASK: 17744 case IX86_BUILTIN_PSRAQ256_MASK: 17745 case IX86_BUILTIN_PSRAQ512: 17746 case IX86_BUILTIN_PSRAQI128_MASK: 17747 case IX86_BUILTIN_PSRAQI256_MASK: 17748 case IX86_BUILTIN_PSRAQI512: 17749 case IX86_BUILTIN_PSRAW: 17750 case IX86_BUILTIN_PSRAW128: 17751 case IX86_BUILTIN_PSRAW128_MASK: 17752 case IX86_BUILTIN_PSRAW256: 17753 case IX86_BUILTIN_PSRAW256_MASK: 17754 case IX86_BUILTIN_PSRAW512: 17755 case IX86_BUILTIN_PSRAWI: 17756 case IX86_BUILTIN_PSRAWI128: 17757 case IX86_BUILTIN_PSRAWI128_MASK: 17758 case IX86_BUILTIN_PSRAWI256: 17759 case IX86_BUILTIN_PSRAWI256_MASK: 17760 case IX86_BUILTIN_PSRAWI512: 17761 rcode = ASHIFTRT; 17762 is_vshift = false; 17763 goto do_shift; 17764 case IX86_BUILTIN_PSRLD: 17765 case IX86_BUILTIN_PSRLD128: 17766 case IX86_BUILTIN_PSRLD128_MASK: 17767 case IX86_BUILTIN_PSRLD256: 17768 case IX86_BUILTIN_PSRLD256_MASK: 17769 case IX86_BUILTIN_PSRLD512: 17770 case IX86_BUILTIN_PSRLDI: 17771 case IX86_BUILTIN_PSRLDI128: 17772 case IX86_BUILTIN_PSRLDI128_MASK: 17773 case IX86_BUILTIN_PSRLDI256: 17774 case IX86_BUILTIN_PSRLDI256_MASK: 17775 case IX86_BUILTIN_PSRLDI512: 17776 case IX86_BUILTIN_PSRLQ: 17777 case IX86_BUILTIN_PSRLQ128: 17778 case IX86_BUILTIN_PSRLQ128_MASK: 17779 case IX86_BUILTIN_PSRLQ256: 17780 case IX86_BUILTIN_PSRLQ256_MASK: 17781 case IX86_BUILTIN_PSRLQ512: 17782 case IX86_BUILTIN_PSRLQI: 17783 case IX86_BUILTIN_PSRLQI128: 17784 case IX86_BUILTIN_PSRLQI128_MASK: 17785 case IX86_BUILTIN_PSRLQI256: 17786 case IX86_BUILTIN_PSRLQI256_MASK: 17787 case IX86_BUILTIN_PSRLQI512: 17788 case IX86_BUILTIN_PSRLW: 17789 case IX86_BUILTIN_PSRLW128: 17790 case IX86_BUILTIN_PSRLW128_MASK: 17791 case IX86_BUILTIN_PSRLW256: 17792 case IX86_BUILTIN_PSRLW256_MASK: 17793 case IX86_BUILTIN_PSRLW512: 17794 case IX86_BUILTIN_PSRLWI: 17795 case IX86_BUILTIN_PSRLWI128: 17796 case IX86_BUILTIN_PSRLWI128_MASK: 17797 case IX86_BUILTIN_PSRLWI256: 17798 case IX86_BUILTIN_PSRLWI256_MASK: 17799 case IX86_BUILTIN_PSRLWI512: 17800 rcode = LSHIFTRT; 17801 is_vshift = false; 17802 goto do_shift; 17803 case IX86_BUILTIN_PSLLVV16HI: 17804 case IX86_BUILTIN_PSLLVV16SI: 17805 case IX86_BUILTIN_PSLLVV2DI: 17806 case IX86_BUILTIN_PSLLVV2DI_MASK: 17807 case IX86_BUILTIN_PSLLVV32HI: 17808 case IX86_BUILTIN_PSLLVV4DI: 17809 case IX86_BUILTIN_PSLLVV4DI_MASK: 17810 case IX86_BUILTIN_PSLLVV4SI: 17811 case IX86_BUILTIN_PSLLVV4SI_MASK: 17812 case IX86_BUILTIN_PSLLVV8DI: 17813 case IX86_BUILTIN_PSLLVV8HI: 17814 case IX86_BUILTIN_PSLLVV8SI: 17815 case IX86_BUILTIN_PSLLVV8SI_MASK: 17816 rcode = ASHIFT; 17817 is_vshift = true; 17818 goto do_shift; 17819 case IX86_BUILTIN_PSRAVQ128: 17820 case IX86_BUILTIN_PSRAVQ256: 17821 case IX86_BUILTIN_PSRAVV16HI: 17822 case IX86_BUILTIN_PSRAVV16SI: 17823 case IX86_BUILTIN_PSRAVV32HI: 17824 case IX86_BUILTIN_PSRAVV4SI: 17825 case IX86_BUILTIN_PSRAVV4SI_MASK: 17826 case IX86_BUILTIN_PSRAVV8DI: 17827 case IX86_BUILTIN_PSRAVV8HI: 17828 case IX86_BUILTIN_PSRAVV8SI: 17829 case IX86_BUILTIN_PSRAVV8SI_MASK: 17830 rcode = ASHIFTRT; 17831 is_vshift = true; 17832 goto do_shift; 17833 case IX86_BUILTIN_PSRLVV16HI: 17834 case IX86_BUILTIN_PSRLVV16SI: 17835 case IX86_BUILTIN_PSRLVV2DI: 17836 case IX86_BUILTIN_PSRLVV2DI_MASK: 17837 case IX86_BUILTIN_PSRLVV32HI: 17838 case IX86_BUILTIN_PSRLVV4DI: 17839 case IX86_BUILTIN_PSRLVV4DI_MASK: 17840 case IX86_BUILTIN_PSRLVV4SI: 17841 case IX86_BUILTIN_PSRLVV4SI_MASK: 17842 case IX86_BUILTIN_PSRLVV8DI: 17843 case IX86_BUILTIN_PSRLVV8HI: 17844 case IX86_BUILTIN_PSRLVV8SI: 17845 case IX86_BUILTIN_PSRLVV8SI_MASK: 17846 rcode = LSHIFTRT; 17847 is_vshift = true; 17848 goto do_shift; 17849 17850 do_shift: 17851 gcc_assert (n_args >= 2); 17852 if (!gimple_call_lhs (stmt)) 17853 break; 17854 arg0 = gimple_call_arg (stmt, 0); 17855 arg1 = gimple_call_arg (stmt, 1); 17856 if (n_args > 2) 17857 { 17858 /* This is masked shift. Only optimize if the mask is all ones. */ 17859 tree argl = gimple_call_arg (stmt, n_args - 1); 17860 if (!tree_fits_uhwi_p (argl)) 17861 break; 17862 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); 17863 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); 17864 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) 17865 break; 17866 } 17867 if (is_vshift) 17868 { 17869 if (TREE_CODE (arg1) != VECTOR_CST) 17870 break; 17871 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); 17872 if (integer_zerop (arg1)) 17873 count = 0; 17874 else if (rcode == ASHIFTRT) 17875 break; 17876 else 17877 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) 17878 { 17879 tree elt = VECTOR_CST_ELT (arg1, i); 17880 if (!wi::neg_p (wi::to_wide (elt)) 17881 && wi::to_widest (elt) < count) 17882 return false; 17883 } 17884 } 17885 else 17886 { 17887 arg1 = ix86_vector_shift_count (arg1); 17888 if (!arg1) 17889 break; 17890 count = tree_to_uhwi (arg1); 17891 } 17892 if (count == 0) 17893 { 17894 /* Just return the first argument for shift by 0. */ 17895 location_t loc = gimple_location (stmt); 17896 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); 17897 gimple_set_location (g, loc); 17898 gsi_replace (gsi, g, false); 17899 return true; 17900 } 17901 if (rcode != ASHIFTRT 17902 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) 17903 { 17904 /* For shift counts equal or greater than precision, except for 17905 arithmetic right shift the result is zero. */ 17906 location_t loc = gimple_location (stmt); 17907 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), 17908 build_zero_cst (TREE_TYPE (arg0))); 17909 gimple_set_location (g, loc); 17910 gsi_replace (gsi, g, false); 17911 return true; 17912 } 17913 break; 17914 17915 case IX86_BUILTIN_SHUFPD: 17916 arg2 = gimple_call_arg (stmt, 2); 17917 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt)) 17918 { 17919 location_t loc = gimple_location (stmt); 17920 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2); 17921 arg0 = gimple_call_arg (stmt, 0); 17922 arg1 = gimple_call_arg (stmt, 1); 17923 tree itype = long_long_integer_type_node; 17924 tree vtype = build_vector_type (itype, 2); /* V2DI */ 17925 tree_vector_builder elts (vtype, 2, 1); 17926 /* Ignore bits other than the lowest 2. */ 17927 elts.quick_push (build_int_cst (itype, imask & 1)); 17928 imask >>= 1; 17929 elts.quick_push (build_int_cst (itype, 2 + (imask & 1))); 17930 tree omask = elts.build (); 17931 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), 17932 VEC_PERM_EXPR, 17933 arg0, arg1, omask); 17934 gimple_set_location (g, loc); 17935 gsi_replace (gsi, g, false); 17936 return true; 17937 } 17938 // Do not error yet, the constant could be propagated later? 17939 break; 17940 17941 default: 17942 break; 17943 } 17944 17945 return false; 17946} 17947 17948/* Handler for an SVML-style interface to 17949 a library with vectorized intrinsics. */ 17950 17951tree 17952ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) 17953{ 17954 char name[20]; 17955 tree fntype, new_fndecl, args; 17956 unsigned arity; 17957 const char *bname; 17958 machine_mode el_mode, in_mode; 17959 int n, in_n; 17960 17961 /* The SVML is suitable for unsafe math only. */ 17962 if (!flag_unsafe_math_optimizations) 17963 return NULL_TREE; 17964 17965 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 17966 n = TYPE_VECTOR_SUBPARTS (type_out); 17967 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 17968 in_n = TYPE_VECTOR_SUBPARTS (type_in); 17969 if (el_mode != in_mode 17970 || n != in_n) 17971 return NULL_TREE; 17972 17973 switch (fn) 17974 { 17975 CASE_CFN_EXP: 17976 CASE_CFN_LOG: 17977 CASE_CFN_LOG10: 17978 CASE_CFN_POW: 17979 CASE_CFN_TANH: 17980 CASE_CFN_TAN: 17981 CASE_CFN_ATAN: 17982 CASE_CFN_ATAN2: 17983 CASE_CFN_ATANH: 17984 CASE_CFN_CBRT: 17985 CASE_CFN_SINH: 17986 CASE_CFN_SIN: 17987 CASE_CFN_ASINH: 17988 CASE_CFN_ASIN: 17989 CASE_CFN_COSH: 17990 CASE_CFN_COS: 17991 CASE_CFN_ACOSH: 17992 CASE_CFN_ACOS: 17993 if ((el_mode != DFmode || n != 2) 17994 && (el_mode != SFmode || n != 4)) 17995 return NULL_TREE; 17996 break; 17997 17998 default: 17999 return NULL_TREE; 18000 } 18001 18002 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); 18003 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 18004 18005 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) 18006 strcpy (name, "vmlsLn4"); 18007 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) 18008 strcpy (name, "vmldLn2"); 18009 else if (n == 4) 18010 { 18011 sprintf (name, "vmls%s", bname+10); 18012 name[strlen (name)-1] = '4'; 18013 } 18014 else 18015 sprintf (name, "vmld%s2", bname+10); 18016 18017 /* Convert to uppercase. */ 18018 name[4] &= ~0x20; 18019 18020 arity = 0; 18021 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) 18022 arity++; 18023 18024 if (arity == 1) 18025 fntype = build_function_type_list (type_out, type_in, NULL); 18026 else 18027 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 18028 18029 /* Build a function declaration for the vectorized function. */ 18030 new_fndecl = build_decl (BUILTINS_LOCATION, 18031 FUNCTION_DECL, get_identifier (name), fntype); 18032 TREE_PUBLIC (new_fndecl) = 1; 18033 DECL_EXTERNAL (new_fndecl) = 1; 18034 DECL_IS_NOVOPS (new_fndecl) = 1; 18035 TREE_READONLY (new_fndecl) = 1; 18036 18037 return new_fndecl; 18038} 18039 18040/* Handler for an ACML-style interface to 18041 a library with vectorized intrinsics. */ 18042 18043tree 18044ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) 18045{ 18046 char name[20] = "__vr.._"; 18047 tree fntype, new_fndecl, args; 18048 unsigned arity; 18049 const char *bname; 18050 machine_mode el_mode, in_mode; 18051 int n, in_n; 18052 18053 /* The ACML is 64bits only and suitable for unsafe math only as 18054 it does not correctly support parts of IEEE with the required 18055 precision such as denormals. */ 18056 if (!TARGET_64BIT 18057 || !flag_unsafe_math_optimizations) 18058 return NULL_TREE; 18059 18060 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 18061 n = TYPE_VECTOR_SUBPARTS (type_out); 18062 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 18063 in_n = TYPE_VECTOR_SUBPARTS (type_in); 18064 if (el_mode != in_mode 18065 || n != in_n) 18066 return NULL_TREE; 18067 18068 switch (fn) 18069 { 18070 CASE_CFN_SIN: 18071 CASE_CFN_COS: 18072 CASE_CFN_EXP: 18073 CASE_CFN_LOG: 18074 CASE_CFN_LOG2: 18075 CASE_CFN_LOG10: 18076 if (el_mode == DFmode && n == 2) 18077 { 18078 name[4] = 'd'; 18079 name[5] = '2'; 18080 } 18081 else if (el_mode == SFmode && n == 4) 18082 { 18083 name[4] = 's'; 18084 name[5] = '4'; 18085 } 18086 else 18087 return NULL_TREE; 18088 break; 18089 18090 default: 18091 return NULL_TREE; 18092 } 18093 18094 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); 18095 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); 18096 sprintf (name + 7, "%s", bname+10); 18097 18098 arity = 0; 18099 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) 18100 arity++; 18101 18102 if (arity == 1) 18103 fntype = build_function_type_list (type_out, type_in, NULL); 18104 else 18105 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 18106 18107 /* Build a function declaration for the vectorized function. */ 18108 new_fndecl = build_decl (BUILTINS_LOCATION, 18109 FUNCTION_DECL, get_identifier (name), fntype); 18110 TREE_PUBLIC (new_fndecl) = 1; 18111 DECL_EXTERNAL (new_fndecl) = 1; 18112 DECL_IS_NOVOPS (new_fndecl) = 1; 18113 TREE_READONLY (new_fndecl) = 1; 18114 18115 return new_fndecl; 18116} 18117 18118/* Returns a decl of a function that implements scatter store with 18119 register type VECTYPE and index type INDEX_TYPE and SCALE. 18120 Return NULL_TREE if it is not available. */ 18121 18122static tree 18123ix86_vectorize_builtin_scatter (const_tree vectype, 18124 const_tree index_type, int scale) 18125{ 18126 bool si; 18127 enum ix86_builtins code; 18128 18129 if (!TARGET_AVX512F) 18130 return NULL_TREE; 18131 18132 if ((TREE_CODE (index_type) != INTEGER_TYPE 18133 && !POINTER_TYPE_P (index_type)) 18134 || (TYPE_MODE (index_type) != SImode 18135 && TYPE_MODE (index_type) != DImode)) 18136 return NULL_TREE; 18137 18138 if (TYPE_PRECISION (index_type) > POINTER_SIZE) 18139 return NULL_TREE; 18140 18141 /* v*scatter* insn sign extends index to pointer mode. */ 18142 if (TYPE_PRECISION (index_type) < POINTER_SIZE 18143 && TYPE_UNSIGNED (index_type)) 18144 return NULL_TREE; 18145 18146 /* Scale can be 1, 2, 4 or 8. */ 18147 if (scale <= 0 18148 || scale > 8 18149 || (scale & (scale - 1)) != 0) 18150 return NULL_TREE; 18151 18152 si = TYPE_MODE (index_type) == SImode; 18153 switch (TYPE_MODE (vectype)) 18154 { 18155 case E_V8DFmode: 18156 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; 18157 break; 18158 case E_V8DImode: 18159 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; 18160 break; 18161 case E_V16SFmode: 18162 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; 18163 break; 18164 case E_V16SImode: 18165 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; 18166 break; 18167 case E_V4DFmode: 18168 if (TARGET_AVX512VL) 18169 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; 18170 else 18171 return NULL_TREE; 18172 break; 18173 case E_V4DImode: 18174 if (TARGET_AVX512VL) 18175 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; 18176 else 18177 return NULL_TREE; 18178 break; 18179 case E_V8SFmode: 18180 if (TARGET_AVX512VL) 18181 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; 18182 else 18183 return NULL_TREE; 18184 break; 18185 case E_V8SImode: 18186 if (TARGET_AVX512VL) 18187 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; 18188 else 18189 return NULL_TREE; 18190 break; 18191 case E_V2DFmode: 18192 if (TARGET_AVX512VL) 18193 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; 18194 else 18195 return NULL_TREE; 18196 break; 18197 case E_V2DImode: 18198 if (TARGET_AVX512VL) 18199 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; 18200 else 18201 return NULL_TREE; 18202 break; 18203 case E_V4SFmode: 18204 if (TARGET_AVX512VL) 18205 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; 18206 else 18207 return NULL_TREE; 18208 break; 18209 case E_V4SImode: 18210 if (TARGET_AVX512VL) 18211 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; 18212 else 18213 return NULL_TREE; 18214 break; 18215 default: 18216 return NULL_TREE; 18217 } 18218 18219 return get_ix86_builtin (code); 18220} 18221 18222/* Return true if it is safe to use the rsqrt optabs to optimize 18223 1.0/sqrt. */ 18224 18225static bool 18226use_rsqrt_p () 18227{ 18228 return (TARGET_SSE && TARGET_SSE_MATH 18229 && flag_finite_math_only 18230 && !flag_trapping_math 18231 && flag_unsafe_math_optimizations); 18232} 18233 18234/* Helper for avx_vpermilps256_operand et al. This is also used by 18235 the expansion functions to turn the parallel back into a mask. 18236 The return value is 0 for no match and the imm8+1 for a match. */ 18237 18238int 18239avx_vpermilp_parallel (rtx par, machine_mode mode) 18240{ 18241 unsigned i, nelt = GET_MODE_NUNITS (mode); 18242 unsigned mask = 0; 18243 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ 18244 18245 if (XVECLEN (par, 0) != (int) nelt) 18246 return 0; 18247 18248 /* Validate that all of the elements are constants, and not totally 18249 out of range. Copy the data into an integral array to make the 18250 subsequent checks easier. */ 18251 for (i = 0; i < nelt; ++i) 18252 { 18253 rtx er = XVECEXP (par, 0, i); 18254 unsigned HOST_WIDE_INT ei; 18255 18256 if (!CONST_INT_P (er)) 18257 return 0; 18258 ei = INTVAL (er); 18259 if (ei >= nelt) 18260 return 0; 18261 ipar[i] = ei; 18262 } 18263 18264 switch (mode) 18265 { 18266 case E_V8DFmode: 18267 /* In the 512-bit DFmode case, we can only move elements within 18268 a 128-bit lane. First fill the second part of the mask, 18269 then fallthru. */ 18270 for (i = 4; i < 6; ++i) 18271 { 18272 if (ipar[i] < 4 || ipar[i] >= 6) 18273 return 0; 18274 mask |= (ipar[i] - 4) << i; 18275 } 18276 for (i = 6; i < 8; ++i) 18277 { 18278 if (ipar[i] < 6) 18279 return 0; 18280 mask |= (ipar[i] - 6) << i; 18281 } 18282 /* FALLTHRU */ 18283 18284 case E_V4DFmode: 18285 /* In the 256-bit DFmode case, we can only move elements within 18286 a 128-bit lane. */ 18287 for (i = 0; i < 2; ++i) 18288 { 18289 if (ipar[i] >= 2) 18290 return 0; 18291 mask |= ipar[i] << i; 18292 } 18293 for (i = 2; i < 4; ++i) 18294 { 18295 if (ipar[i] < 2) 18296 return 0; 18297 mask |= (ipar[i] - 2) << i; 18298 } 18299 break; 18300 18301 case E_V16SFmode: 18302 /* In 512 bit SFmode case, permutation in the upper 256 bits 18303 must mirror the permutation in the lower 256-bits. */ 18304 for (i = 0; i < 8; ++i) 18305 if (ipar[i] + 8 != ipar[i + 8]) 18306 return 0; 18307 /* FALLTHRU */ 18308 18309 case E_V8SFmode: 18310 /* In 256 bit SFmode case, we have full freedom of 18311 movement within the low 128-bit lane, but the high 128-bit 18312 lane must mirror the exact same pattern. */ 18313 for (i = 0; i < 4; ++i) 18314 if (ipar[i] + 4 != ipar[i + 4]) 18315 return 0; 18316 nelt = 4; 18317 /* FALLTHRU */ 18318 18319 case E_V2DFmode: 18320 case E_V4SFmode: 18321 /* In the 128-bit case, we've full freedom in the placement of 18322 the elements from the source operand. */ 18323 for (i = 0; i < nelt; ++i) 18324 mask |= ipar[i] << (i * (nelt / 2)); 18325 break; 18326 18327 default: 18328 gcc_unreachable (); 18329 } 18330 18331 /* Make sure success has a non-zero value by adding one. */ 18332 return mask + 1; 18333} 18334 18335/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by 18336 the expansion functions to turn the parallel back into a mask. 18337 The return value is 0 for no match and the imm8+1 for a match. */ 18338 18339int 18340avx_vperm2f128_parallel (rtx par, machine_mode mode) 18341{ 18342 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; 18343 unsigned mask = 0; 18344 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ 18345 18346 if (XVECLEN (par, 0) != (int) nelt) 18347 return 0; 18348 18349 /* Validate that all of the elements are constants, and not totally 18350 out of range. Copy the data into an integral array to make the 18351 subsequent checks easier. */ 18352 for (i = 0; i < nelt; ++i) 18353 { 18354 rtx er = XVECEXP (par, 0, i); 18355 unsigned HOST_WIDE_INT ei; 18356 18357 if (!CONST_INT_P (er)) 18358 return 0; 18359 ei = INTVAL (er); 18360 if (ei >= 2 * nelt) 18361 return 0; 18362 ipar[i] = ei; 18363 } 18364 18365 /* Validate that the halves of the permute are halves. */ 18366 for (i = 0; i < nelt2 - 1; ++i) 18367 if (ipar[i] + 1 != ipar[i + 1]) 18368 return 0; 18369 for (i = nelt2; i < nelt - 1; ++i) 18370 if (ipar[i] + 1 != ipar[i + 1]) 18371 return 0; 18372 18373 /* Reconstruct the mask. */ 18374 for (i = 0; i < 2; ++i) 18375 { 18376 unsigned e = ipar[i * nelt2]; 18377 if (e % nelt2) 18378 return 0; 18379 e /= nelt2; 18380 mask |= e << (i * 4); 18381 } 18382 18383 /* Make sure success has a non-zero value by adding one. */ 18384 return mask + 1; 18385} 18386 18387/* Return a register priority for hard reg REGNO. */ 18388static int 18389ix86_register_priority (int hard_regno) 18390{ 18391 /* ebp and r13 as the base always wants a displacement, r12 as the 18392 base always wants an index. So discourage their usage in an 18393 address. */ 18394 if (hard_regno == R12_REG || hard_regno == R13_REG) 18395 return 0; 18396 if (hard_regno == BP_REG) 18397 return 1; 18398 /* New x86-64 int registers result in bigger code size. Discourage 18399 them. */ 18400 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) 18401 return 2; 18402 /* New x86-64 SSE registers result in bigger code size. Discourage 18403 them. */ 18404 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) 18405 return 2; 18406 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)) 18407 return 1; 18408 /* Usage of AX register results in smaller code. Prefer it. */ 18409 if (hard_regno == AX_REG) 18410 return 4; 18411 return 3; 18412} 18413 18414/* Implement TARGET_PREFERRED_RELOAD_CLASS. 18415 18416 Put float CONST_DOUBLE in the constant pool instead of fp regs. 18417 QImode must go into class Q_REGS. 18418 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 18419 movdf to do mem-to-mem moves through integer regs. */ 18420 18421static reg_class_t 18422ix86_preferred_reload_class (rtx x, reg_class_t regclass) 18423{ 18424 machine_mode mode = GET_MODE (x); 18425 18426 /* We're only allowed to return a subclass of CLASS. Many of the 18427 following checks fail for NO_REGS, so eliminate that early. */ 18428 if (regclass == NO_REGS) 18429 return NO_REGS; 18430 18431 /* All classes can load zeros. */ 18432 if (x == CONST0_RTX (mode)) 18433 return regclass; 18434 18435 /* Force constants into memory if we are loading a (nonzero) constant into 18436 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK 18437 instructions to load from a constant. */ 18438 if (CONSTANT_P (x) 18439 && (MAYBE_MMX_CLASS_P (regclass) 18440 || MAYBE_SSE_CLASS_P (regclass) 18441 || MAYBE_MASK_CLASS_P (regclass))) 18442 return NO_REGS; 18443 18444 /* Floating-point constants need more complex checks. */ 18445 if (CONST_DOUBLE_P (x)) 18446 { 18447 /* General regs can load everything. */ 18448 if (INTEGER_CLASS_P (regclass)) 18449 return regclass; 18450 18451 /* Floats can load 0 and 1 plus some others. Note that we eliminated 18452 zero above. We only want to wind up preferring 80387 registers if 18453 we plan on doing computation with them. */ 18454 if (IS_STACK_MODE (mode) 18455 && standard_80387_constant_p (x) > 0) 18456 { 18457 /* Limit class to FP regs. */ 18458 if (FLOAT_CLASS_P (regclass)) 18459 return FLOAT_REGS; 18460 } 18461 18462 return NO_REGS; 18463 } 18464 18465 /* Prefer SSE regs only, if we can use them for math. */ 18466 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 18467 return SSE_CLASS_P (regclass) ? regclass : NO_REGS; 18468 18469 /* Generally when we see PLUS here, it's the function invariant 18470 (plus soft-fp const_int). Which can only be computed into general 18471 regs. */ 18472 if (GET_CODE (x) == PLUS) 18473 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; 18474 18475 /* QImode constants are easy to load, but non-constant QImode data 18476 must go into Q_REGS. */ 18477 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 18478 { 18479 if (Q_CLASS_P (regclass)) 18480 return regclass; 18481 else if (reg_class_subset_p (Q_REGS, regclass)) 18482 return Q_REGS; 18483 else 18484 return NO_REGS; 18485 } 18486 18487 return regclass; 18488} 18489 18490/* Discourage putting floating-point values in SSE registers unless 18491 SSE math is being used, and likewise for the 387 registers. */ 18492static reg_class_t 18493ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) 18494{ 18495 /* Restrict the output reload class to the register bank that we are doing 18496 math on. If we would like not to return a subset of CLASS, reject this 18497 alternative: if reload cannot do this, it will still use its choice. */ 18498 machine_mode mode = GET_MODE (x); 18499 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 18500 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; 18501 18502 if (IS_STACK_MODE (mode)) 18503 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; 18504 18505 return regclass; 18506} 18507 18508static reg_class_t 18509ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, 18510 machine_mode mode, secondary_reload_info *sri) 18511{ 18512 /* Double-word spills from general registers to non-offsettable memory 18513 references (zero-extended addresses) require special handling. */ 18514 if (TARGET_64BIT 18515 && MEM_P (x) 18516 && GET_MODE_SIZE (mode) > UNITS_PER_WORD 18517 && INTEGER_CLASS_P (rclass) 18518 && !offsettable_memref_p (x)) 18519 { 18520 sri->icode = (in_p 18521 ? CODE_FOR_reload_noff_load 18522 : CODE_FOR_reload_noff_store); 18523 /* Add the cost of moving address to a temporary. */ 18524 sri->extra_cost = 1; 18525 18526 return NO_REGS; 18527 } 18528 18529 /* QImode spills from non-QI registers require 18530 intermediate register on 32bit targets. */ 18531 if (mode == QImode 18532 && ((!TARGET_64BIT && !in_p 18533 && INTEGER_CLASS_P (rclass) 18534 && MAYBE_NON_Q_CLASS_P (rclass)) 18535 || (!TARGET_AVX512DQ 18536 && MAYBE_MASK_CLASS_P (rclass)))) 18537 { 18538 int regno = true_regnum (x); 18539 18540 /* Return Q_REGS if the operand is in memory. */ 18541 if (regno == -1) 18542 return Q_REGS; 18543 18544 return NO_REGS; 18545 } 18546 18547 /* This condition handles corner case where an expression involving 18548 pointers gets vectorized. We're trying to use the address of a 18549 stack slot as a vector initializer. 18550 18551 (set (reg:V2DI 74 [ vect_cst_.2 ]) 18552 (vec_duplicate:V2DI (reg/f:DI 20 frame))) 18553 18554 Eventually frame gets turned into sp+offset like this: 18555 18556 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) 18557 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) 18558 (const_int 392 [0x188])))) 18559 18560 That later gets turned into: 18561 18562 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) 18563 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) 18564 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) 18565 18566 We'll have the following reload recorded: 18567 18568 Reload 0: reload_in (DI) = 18569 (plus:DI (reg/f:DI 7 sp) 18570 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) 18571 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) 18572 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine 18573 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) 18574 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) 18575 reload_reg_rtx: (reg:V2DI 22 xmm1) 18576 18577 Which isn't going to work since SSE instructions can't handle scalar 18578 additions. Returning GENERAL_REGS forces the addition into integer 18579 register and reload can handle subsequent reloads without problems. */ 18580 18581 if (in_p && GET_CODE (x) == PLUS 18582 && SSE_CLASS_P (rclass) 18583 && SCALAR_INT_MODE_P (mode)) 18584 return GENERAL_REGS; 18585 18586 return NO_REGS; 18587} 18588 18589/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ 18590 18591static bool 18592ix86_class_likely_spilled_p (reg_class_t rclass) 18593{ 18594 switch (rclass) 18595 { 18596 case AREG: 18597 case DREG: 18598 case CREG: 18599 case BREG: 18600 case AD_REGS: 18601 case SIREG: 18602 case DIREG: 18603 case SSE_FIRST_REG: 18604 case FP_TOP_REG: 18605 case FP_SECOND_REG: 18606 return true; 18607 18608 default: 18609 break; 18610 } 18611 18612 return false; 18613} 18614 18615/* If we are copying between registers from different register sets 18616 (e.g. FP and integer), we may need a memory location. 18617 18618 The function can't work reliably when one of the CLASSES is a class 18619 containing registers from multiple sets. We avoid this by never combining 18620 different sets in a single alternative in the machine description. 18621 Ensure that this constraint holds to avoid unexpected surprises. 18622 18623 When STRICT is false, we are being called from REGISTER_MOVE_COST, 18624 so do not enforce these sanity checks. 18625 18626 To optimize register_move_cost performance, define inline variant. */ 18627 18628static inline bool 18629inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, 18630 reg_class_t class2, int strict) 18631{ 18632 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) 18633 return false; 18634 18635 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 18636 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 18637 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 18638 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 18639 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 18640 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) 18641 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) 18642 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) 18643 { 18644 gcc_assert (!strict || lra_in_progress); 18645 return true; 18646 } 18647 18648 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 18649 return true; 18650 18651 /* ??? This is a lie. We do have moves between mmx/general, and for 18652 mmx/sse2. But by saying we need secondary memory we discourage the 18653 register allocator from using the mmx registers unless needed. */ 18654 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 18655 return true; 18656 18657 /* Between mask and general, we have moves no larger than word size. */ 18658 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) 18659 { 18660 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) 18661 || GET_MODE_SIZE (mode) > UNITS_PER_WORD) 18662 return true; 18663 } 18664 18665 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 18666 { 18667 /* SSE1 doesn't have any direct moves from other classes. */ 18668 if (!TARGET_SSE2) 18669 return true; 18670 18671 /* Between SSE and general, we have moves no larger than word size. */ 18672 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)) 18673 || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode) 18674 || GET_MODE_SIZE (mode) > UNITS_PER_WORD) 18675 return true; 18676 18677 /* If the target says that inter-unit moves are more expensive 18678 than moving through memory, then don't generate them. */ 18679 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) 18680 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) 18681 return true; 18682 } 18683 18684 return false; 18685} 18686 18687/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ 18688 18689static bool 18690ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, 18691 reg_class_t class2) 18692{ 18693 return inline_secondary_memory_needed (mode, class1, class2, true); 18694} 18695 18696/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. 18697 18698 get_secondary_mem widens integral modes to BITS_PER_WORD. 18699 There is no need to emit full 64 bit move on 64 bit targets 18700 for integral modes that can be moved using 32 bit move. */ 18701 18702static machine_mode 18703ix86_secondary_memory_needed_mode (machine_mode mode) 18704{ 18705 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) 18706 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); 18707 return mode; 18708} 18709 18710/* Implement the TARGET_CLASS_MAX_NREGS hook. 18711 18712 On the 80386, this is the size of MODE in words, 18713 except in the FP regs, where a single reg is always enough. */ 18714 18715static unsigned char 18716ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) 18717{ 18718 if (MAYBE_INTEGER_CLASS_P (rclass)) 18719 { 18720 if (mode == XFmode) 18721 return (TARGET_64BIT ? 2 : 3); 18722 else if (mode == XCmode) 18723 return (TARGET_64BIT ? 4 : 6); 18724 else 18725 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 18726 } 18727 else 18728 { 18729 if (COMPLEX_MODE_P (mode)) 18730 return 2; 18731 else 18732 return 1; 18733 } 18734} 18735 18736/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ 18737 18738static bool 18739ix86_can_change_mode_class (machine_mode from, machine_mode to, 18740 reg_class_t regclass) 18741{ 18742 if (from == to) 18743 return true; 18744 18745 /* x87 registers can't do subreg at all, as all values are reformatted 18746 to extended precision. */ 18747 if (MAYBE_FLOAT_CLASS_P (regclass)) 18748 return false; 18749 18750 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) 18751 { 18752 /* Vector registers do not support QI or HImode loads. If we don't 18753 disallow a change to these modes, reload will assume it's ok to 18754 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 18755 the vec_dupv4hi pattern. */ 18756 if (GET_MODE_SIZE (from) < 4) 18757 return false; 18758 } 18759 18760 return true; 18761} 18762 18763/* Return index of MODE in the sse load/store tables. */ 18764 18765static inline int 18766sse_store_index (machine_mode mode) 18767{ 18768 switch (GET_MODE_SIZE (mode)) 18769 { 18770 case 4: 18771 return 0; 18772 case 8: 18773 return 1; 18774 case 16: 18775 return 2; 18776 case 32: 18777 return 3; 18778 case 64: 18779 return 4; 18780 default: 18781 return -1; 18782 } 18783} 18784 18785/* Return the cost of moving data of mode M between a 18786 register and memory. A value of 2 is the default; this cost is 18787 relative to those in `REGISTER_MOVE_COST'. 18788 18789 This function is used extensively by register_move_cost that is used to 18790 build tables at startup. Make it inline in this case. 18791 When IN is 2, return maximum of in and out move cost. 18792 18793 If moving between registers and memory is more expensive than 18794 between two registers, you should define this macro to express the 18795 relative cost. 18796 18797 Model also increased moving costs of QImode registers in non 18798 Q_REGS classes. 18799 */ 18800static inline int 18801inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) 18802{ 18803 int cost; 18804 if (FLOAT_CLASS_P (regclass)) 18805 { 18806 int index; 18807 switch (mode) 18808 { 18809 case E_SFmode: 18810 index = 0; 18811 break; 18812 case E_DFmode: 18813 index = 1; 18814 break; 18815 case E_XFmode: 18816 index = 2; 18817 break; 18818 default: 18819 return 100; 18820 } 18821 if (in == 2) 18822 return MAX (ix86_cost->hard_register.fp_load [index], 18823 ix86_cost->hard_register.fp_store [index]); 18824 return in ? ix86_cost->hard_register.fp_load [index] 18825 : ix86_cost->hard_register.fp_store [index]; 18826 } 18827 if (SSE_CLASS_P (regclass)) 18828 { 18829 int index = sse_store_index (mode); 18830 if (index == -1) 18831 return 100; 18832 if (in == 2) 18833 return MAX (ix86_cost->hard_register.sse_load [index], 18834 ix86_cost->hard_register.sse_store [index]); 18835 return in ? ix86_cost->hard_register.sse_load [index] 18836 : ix86_cost->hard_register.sse_store [index]; 18837 } 18838 if (MMX_CLASS_P (regclass)) 18839 { 18840 int index; 18841 switch (GET_MODE_SIZE (mode)) 18842 { 18843 case 4: 18844 index = 0; 18845 break; 18846 case 8: 18847 index = 1; 18848 break; 18849 default: 18850 return 100; 18851 } 18852 if (in == 2) 18853 return MAX (ix86_cost->hard_register.mmx_load [index], 18854 ix86_cost->hard_register.mmx_store [index]); 18855 return in ? ix86_cost->hard_register.mmx_load [index] 18856 : ix86_cost->hard_register.mmx_store [index]; 18857 } 18858 switch (GET_MODE_SIZE (mode)) 18859 { 18860 case 1: 18861 if (Q_CLASS_P (regclass) || TARGET_64BIT) 18862 { 18863 if (!in) 18864 return ix86_cost->hard_register.int_store[0]; 18865 if (TARGET_PARTIAL_REG_DEPENDENCY 18866 && optimize_function_for_speed_p (cfun)) 18867 cost = ix86_cost->hard_register.movzbl_load; 18868 else 18869 cost = ix86_cost->hard_register.int_load[0]; 18870 if (in == 2) 18871 return MAX (cost, ix86_cost->hard_register.int_store[0]); 18872 return cost; 18873 } 18874 else 18875 { 18876 if (in == 2) 18877 return MAX (ix86_cost->hard_register.movzbl_load, 18878 ix86_cost->hard_register.int_store[0] + 4); 18879 if (in) 18880 return ix86_cost->hard_register.movzbl_load; 18881 else 18882 return ix86_cost->hard_register.int_store[0] + 4; 18883 } 18884 break; 18885 case 2: 18886 if (in == 2) 18887 return MAX (ix86_cost->hard_register.int_load[1], 18888 ix86_cost->hard_register.int_store[1]); 18889 return in ? ix86_cost->hard_register.int_load[1] 18890 : ix86_cost->hard_register.int_store[1]; 18891 default: 18892 if (in == 2) 18893 cost = MAX (ix86_cost->hard_register.int_load[2], 18894 ix86_cost->hard_register.int_store[2]); 18895 else if (in) 18896 cost = ix86_cost->hard_register.int_load[2]; 18897 else 18898 cost = ix86_cost->hard_register.int_store[2]; 18899 /* Multiply with the number of GPR moves needed. */ 18900 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); 18901 } 18902} 18903 18904static int 18905ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) 18906{ 18907 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); 18908} 18909 18910 18911/* Return the cost of moving data from a register in class CLASS1 to 18912 one in class CLASS2. 18913 18914 It is not required that the cost always equal 2 when FROM is the same as TO; 18915 on some machines it is expensive to move between registers if they are not 18916 general registers. */ 18917 18918static int 18919ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, 18920 reg_class_t class2_i) 18921{ 18922 enum reg_class class1 = (enum reg_class) class1_i; 18923 enum reg_class class2 = (enum reg_class) class2_i; 18924 18925 /* In case we require secondary memory, compute cost of the store followed 18926 by load. In order to avoid bad register allocation choices, we need 18927 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 18928 18929 if (inline_secondary_memory_needed (mode, class1, class2, false)) 18930 { 18931 int cost = 1; 18932 18933 cost += inline_memory_move_cost (mode, class1, 2); 18934 cost += inline_memory_move_cost (mode, class2, 2); 18935 18936 /* In case of copying from general_purpose_register we may emit multiple 18937 stores followed by single load causing memory size mismatch stall. 18938 Count this as arbitrarily high cost of 20. */ 18939 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD 18940 && TARGET_MEMORY_MISMATCH_STALL 18941 && targetm.class_max_nregs (class1, mode) 18942 > targetm.class_max_nregs (class2, mode)) 18943 cost += 20; 18944 18945 /* In the case of FP/MMX moves, the registers actually overlap, and we 18946 have to switch modes in order to treat them differently. */ 18947 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 18948 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 18949 cost += 20; 18950 18951 return cost; 18952 } 18953 18954 /* Moves between MMX and non-MMX units require secondary memory. */ 18955 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 18956 gcc_unreachable (); 18957 18958 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 18959 return (SSE_CLASS_P (class1) 18960 ? ix86_cost->hard_register.sse_to_integer 18961 : ix86_cost->hard_register.integer_to_sse); 18962 18963 if (MAYBE_FLOAT_CLASS_P (class1)) 18964 return ix86_cost->hard_register.fp_move; 18965 if (MAYBE_SSE_CLASS_P (class1)) 18966 { 18967 if (GET_MODE_BITSIZE (mode) <= 128) 18968 return ix86_cost->hard_register.xmm_move; 18969 if (GET_MODE_BITSIZE (mode) <= 256) 18970 return ix86_cost->hard_register.ymm_move; 18971 return ix86_cost->hard_register.zmm_move; 18972 } 18973 if (MAYBE_MMX_CLASS_P (class1)) 18974 return ix86_cost->hard_register.mmx_move; 18975 return 2; 18976} 18977 18978/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in 18979 words of a value of mode MODE but can be less for certain modes in 18980 special long registers. 18981 18982 Actually there are no two word move instructions for consecutive 18983 registers. And only registers 0-3 may have mov byte instructions 18984 applied to them. */ 18985 18986static unsigned int 18987ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) 18988{ 18989 if (GENERAL_REGNO_P (regno)) 18990 { 18991 if (mode == XFmode) 18992 return TARGET_64BIT ? 2 : 3; 18993 if (mode == XCmode) 18994 return TARGET_64BIT ? 4 : 6; 18995 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 18996 } 18997 if (COMPLEX_MODE_P (mode)) 18998 return 2; 18999 /* Register pair for mask registers. */ 19000 if (mode == P2QImode || mode == P2HImode) 19001 return 2; 19002 if (mode == V64SFmode || mode == V64SImode) 19003 return 4; 19004 return 1; 19005} 19006 19007/* Implement REGMODE_NATURAL_SIZE(MODE). */ 19008unsigned int 19009ix86_regmode_natural_size (machine_mode mode) 19010{ 19011 if (mode == P2HImode || mode == P2QImode) 19012 return GET_MODE_SIZE (mode) / 2; 19013 return UNITS_PER_WORD; 19014} 19015 19016/* Implement TARGET_HARD_REGNO_MODE_OK. */ 19017 19018static bool 19019ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 19020{ 19021 /* Flags and only flags can only hold CCmode values. */ 19022 if (CC_REGNO_P (regno)) 19023 return GET_MODE_CLASS (mode) == MODE_CC; 19024 if (GET_MODE_CLASS (mode) == MODE_CC 19025 || GET_MODE_CLASS (mode) == MODE_RANDOM) 19026 return false; 19027 if (STACK_REGNO_P (regno)) 19028 return VALID_FP_MODE_P (mode); 19029 if (MASK_REGNO_P (regno)) 19030 { 19031 /* Register pair only starts at even register number. */ 19032 if ((mode == P2QImode || mode == P2HImode)) 19033 return MASK_PAIR_REGNO_P(regno); 19034 19035 return (VALID_MASK_REG_MODE (mode) 19036 || (TARGET_AVX512BW 19037 && VALID_MASK_AVX512BW_MODE (mode))); 19038 } 19039 19040 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 19041 return false; 19042 19043 if (SSE_REGNO_P (regno)) 19044 { 19045 /* We implement the move patterns for all vector modes into and 19046 out of SSE registers, even when no operation instructions 19047 are available. */ 19048 19049 /* For AVX-512 we allow, regardless of regno: 19050 - XI mode 19051 - any of 512-bit wide vector mode 19052 - any scalar mode. */ 19053 if (TARGET_AVX512F 19054 && (mode == XImode 19055 || VALID_AVX512F_REG_MODE (mode) 19056 || VALID_AVX512F_SCALAR_MODE (mode))) 19057 return true; 19058 19059 /* For AVX-5124FMAPS or AVX-5124VNNIW 19060 allow V64SF and V64SI modes for special regnos. */ 19061 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) 19062 && (mode == V64SFmode || mode == V64SImode) 19063 && MOD4_SSE_REGNO_P (regno)) 19064 return true; 19065 19066 /* TODO check for QI/HI scalars. */ 19067 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ 19068 if (TARGET_AVX512VL 19069 && (mode == OImode 19070 || mode == TImode 19071 || VALID_AVX256_REG_MODE (mode) 19072 || VALID_AVX512VL_128_REG_MODE (mode))) 19073 return true; 19074 19075 /* xmm16-xmm31 are only available for AVX-512. */ 19076 if (EXT_REX_SSE_REGNO_P (regno)) 19077 return false; 19078 19079 /* OImode and AVX modes are available only when AVX is enabled. */ 19080 return ((TARGET_AVX 19081 && VALID_AVX256_REG_OR_OI_MODE (mode)) 19082 || VALID_SSE_REG_MODE (mode) 19083 || VALID_SSE2_REG_MODE (mode) 19084 || VALID_MMX_REG_MODE (mode) 19085 || VALID_MMX_REG_MODE_3DNOW (mode)); 19086 } 19087 if (MMX_REGNO_P (regno)) 19088 { 19089 /* We implement the move patterns for 3DNOW modes even in MMX mode, 19090 so if the register is available at all, then we can move data of 19091 the given mode into or out of it. */ 19092 return (VALID_MMX_REG_MODE (mode) 19093 || VALID_MMX_REG_MODE_3DNOW (mode)); 19094 } 19095 19096 if (mode == QImode) 19097 { 19098 /* Take care for QImode values - they can be in non-QI regs, 19099 but then they do cause partial register stalls. */ 19100 if (ANY_QI_REGNO_P (regno)) 19101 return true; 19102 if (!TARGET_PARTIAL_REG_STALL) 19103 return true; 19104 /* LRA checks if the hard register is OK for the given mode. 19105 QImode values can live in non-QI regs, so we allow all 19106 registers here. */ 19107 if (lra_in_progress) 19108 return true; 19109 return !can_create_pseudo_p (); 19110 } 19111 /* We handle both integer and floats in the general purpose registers. */ 19112 else if (VALID_INT_MODE_P (mode)) 19113 return true; 19114 else if (VALID_FP_MODE_P (mode)) 19115 return true; 19116 else if (VALID_DFP_MODE_P (mode)) 19117 return true; 19118 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 19119 on to use that value in smaller contexts, this can easily force a 19120 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 19121 supporting DImode, allow it. */ 19122 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 19123 return true; 19124 19125 return false; 19126} 19127 19128/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that 19129 saves SSE registers across calls is Win64 (thus no need to check the 19130 current ABI here), and with AVX enabled Win64 only guarantees that 19131 the low 16 bytes are saved. */ 19132 19133static bool 19134ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, 19135 machine_mode mode) 19136{ 19137 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; 19138} 19139 19140/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 19141 tieable integer mode. */ 19142 19143static bool 19144ix86_tieable_integer_mode_p (machine_mode mode) 19145{ 19146 switch (mode) 19147 { 19148 case E_HImode: 19149 case E_SImode: 19150 return true; 19151 19152 case E_QImode: 19153 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 19154 19155 case E_DImode: 19156 return TARGET_64BIT; 19157 19158 default: 19159 return false; 19160 } 19161} 19162 19163/* Implement TARGET_MODES_TIEABLE_P. 19164 19165 Return true if MODE1 is accessible in a register that can hold MODE2 19166 without copying. That is, all register classes that can hold MODE2 19167 can also hold MODE1. */ 19168 19169static bool 19170ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) 19171{ 19172 if (mode1 == mode2) 19173 return true; 19174 19175 if (ix86_tieable_integer_mode_p (mode1) 19176 && ix86_tieable_integer_mode_p (mode2)) 19177 return true; 19178 19179 /* MODE2 being XFmode implies fp stack or general regs, which means we 19180 can tie any smaller floating point modes to it. Note that we do not 19181 tie this with TFmode. */ 19182 if (mode2 == XFmode) 19183 return mode1 == SFmode || mode1 == DFmode; 19184 19185 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 19186 that we can tie it with SFmode. */ 19187 if (mode2 == DFmode) 19188 return mode1 == SFmode; 19189 19190 /* If MODE2 is only appropriate for an SSE register, then tie with 19191 any other mode acceptable to SSE registers. */ 19192 if (GET_MODE_SIZE (mode2) == 64 19193 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 19194 return (GET_MODE_SIZE (mode1) == 64 19195 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); 19196 if (GET_MODE_SIZE (mode2) == 32 19197 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 19198 return (GET_MODE_SIZE (mode1) == 32 19199 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); 19200 if (GET_MODE_SIZE (mode2) == 16 19201 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 19202 return (GET_MODE_SIZE (mode1) == 16 19203 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); 19204 19205 /* If MODE2 is appropriate for an MMX register, then tie 19206 with any other mode acceptable to MMX registers. */ 19207 if (GET_MODE_SIZE (mode2) == 8 19208 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 19209 return (GET_MODE_SIZE (mode1) == 8 19210 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); 19211 19212 return false; 19213} 19214 19215/* Return the cost of moving between two registers of mode MODE. */ 19216 19217static int 19218ix86_set_reg_reg_cost (machine_mode mode) 19219{ 19220 unsigned int units = UNITS_PER_WORD; 19221 19222 switch (GET_MODE_CLASS (mode)) 19223 { 19224 default: 19225 break; 19226 19227 case MODE_CC: 19228 units = GET_MODE_SIZE (CCmode); 19229 break; 19230 19231 case MODE_FLOAT: 19232 if ((TARGET_SSE && mode == TFmode) 19233 || (TARGET_80387 && mode == XFmode) 19234 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) 19235 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) 19236 units = GET_MODE_SIZE (mode); 19237 break; 19238 19239 case MODE_COMPLEX_FLOAT: 19240 if ((TARGET_SSE && mode == TCmode) 19241 || (TARGET_80387 && mode == XCmode) 19242 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) 19243 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) 19244 units = GET_MODE_SIZE (mode); 19245 break; 19246 19247 case MODE_VECTOR_INT: 19248 case MODE_VECTOR_FLOAT: 19249 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) 19250 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) 19251 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 19252 || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 19253 || ((TARGET_MMX || TARGET_MMX_WITH_SSE) 19254 && VALID_MMX_REG_MODE (mode))) 19255 units = GET_MODE_SIZE (mode); 19256 } 19257 19258 /* Return the cost of moving between two registers of mode MODE, 19259 assuming that the move will be in pieces of at most UNITS bytes. */ 19260 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); 19261} 19262 19263/* Return cost of vector operation in MODE given that scalar version has 19264 COST. */ 19265 19266static int 19267ix86_vec_cost (machine_mode mode, int cost) 19268{ 19269 if (!VECTOR_MODE_P (mode)) 19270 return cost; 19271 19272 if (GET_MODE_BITSIZE (mode) == 128 19273 && TARGET_SSE_SPLIT_REGS) 19274 return cost * 2; 19275 if (GET_MODE_BITSIZE (mode) > 128 19276 && TARGET_AVX256_SPLIT_REGS) 19277 return cost * GET_MODE_BITSIZE (mode) / 128; 19278 return cost; 19279} 19280 19281/* Return cost of multiplication in MODE. */ 19282 19283static int 19284ix86_multiplication_cost (const struct processor_costs *cost, 19285 enum machine_mode mode) 19286{ 19287 machine_mode inner_mode = mode; 19288 if (VECTOR_MODE_P (mode)) 19289 inner_mode = GET_MODE_INNER (mode); 19290 19291 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19292 return inner_mode == DFmode ? cost->mulsd : cost->mulss; 19293 else if (X87_FLOAT_MODE_P (mode)) 19294 return cost->fmul; 19295 else if (FLOAT_MODE_P (mode)) 19296 return ix86_vec_cost (mode, 19297 inner_mode == DFmode ? cost->mulsd : cost->mulss); 19298 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 19299 { 19300 /* vpmullq is used in this case. No emulation is needed. */ 19301 if (TARGET_AVX512DQ) 19302 return ix86_vec_cost (mode, cost->mulss); 19303 19304 /* V*QImode is emulated with 7-13 insns. */ 19305 if (mode == V16QImode || mode == V32QImode) 19306 { 19307 int extra = 11; 19308 if (TARGET_XOP && mode == V16QImode) 19309 extra = 5; 19310 else if (TARGET_SSSE3) 19311 extra = 6; 19312 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra); 19313 } 19314 /* V*DImode is emulated with 5-8 insns. */ 19315 else if (mode == V2DImode || mode == V4DImode) 19316 { 19317 if (TARGET_XOP && mode == V2DImode) 19318 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3); 19319 else 19320 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5); 19321 } 19322 /* Without sse4.1, we don't have PMULLD; it's emulated with 7 19323 insns, including two PMULUDQ. */ 19324 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) 19325 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); 19326 else 19327 return ix86_vec_cost (mode, cost->mulss); 19328 } 19329 else 19330 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); 19331} 19332 19333/* Return cost of multiplication in MODE. */ 19334 19335static int 19336ix86_division_cost (const struct processor_costs *cost, 19337 enum machine_mode mode) 19338{ 19339 machine_mode inner_mode = mode; 19340 if (VECTOR_MODE_P (mode)) 19341 inner_mode = GET_MODE_INNER (mode); 19342 19343 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19344 return inner_mode == DFmode ? cost->divsd : cost->divss; 19345 else if (X87_FLOAT_MODE_P (mode)) 19346 return cost->fdiv; 19347 else if (FLOAT_MODE_P (mode)) 19348 return ix86_vec_cost (mode, 19349 inner_mode == DFmode ? cost->divsd : cost->divss); 19350 else 19351 return cost->divide[MODE_INDEX (mode)]; 19352} 19353 19354#define COSTS_N_BYTES(N) ((N) * 2) 19355 19356/* Return cost of shift in MODE. 19357 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. 19358 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE 19359 if op1 is a result of subreg. 19360 19361 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ 19362 19363static int 19364ix86_shift_rotate_cost (const struct processor_costs *cost, 19365 enum machine_mode mode, bool constant_op1, 19366 HOST_WIDE_INT op1_val, 19367 bool speed, 19368 bool and_in_op1, 19369 bool shift_and_truncate, 19370 bool *skip_op0, bool *skip_op1) 19371{ 19372 if (skip_op0) 19373 *skip_op0 = *skip_op1 = false; 19374 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 19375 { 19376 /* V*QImode is emulated with 1-11 insns. */ 19377 if (mode == V16QImode || mode == V32QImode) 19378 { 19379 int count = 11; 19380 if (TARGET_XOP && mode == V16QImode) 19381 { 19382 /* For XOP we use vpshab, which requires a broadcast of the 19383 value to the variable shift insn. For constants this 19384 means a V16Q const in mem; even when we can perform the 19385 shift with one insn set the cost to prefer paddb. */ 19386 if (constant_op1) 19387 { 19388 if (skip_op1) 19389 *skip_op1 = true; 19390 return ix86_vec_cost (mode, 19391 cost->sse_op 19392 + (speed 19393 ? 2 19394 : COSTS_N_BYTES 19395 (GET_MODE_UNIT_SIZE (mode)))); 19396 } 19397 count = 3; 19398 } 19399 else if (TARGET_SSSE3) 19400 count = 7; 19401 return ix86_vec_cost (mode, cost->sse_op * count); 19402 } 19403 else 19404 return ix86_vec_cost (mode, cost->sse_op); 19405 } 19406 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 19407 { 19408 if (constant_op1) 19409 { 19410 if (op1_val > 32) 19411 return cost->shift_const + COSTS_N_INSNS (2); 19412 else 19413 return cost->shift_const * 2; 19414 } 19415 else 19416 { 19417 if (and_in_op1) 19418 return cost->shift_var * 2; 19419 else 19420 return cost->shift_var * 6 + COSTS_N_INSNS (2); 19421 } 19422 } 19423 else 19424 { 19425 if (constant_op1) 19426 return cost->shift_const; 19427 else if (shift_and_truncate) 19428 { 19429 if (skip_op0) 19430 *skip_op0 = *skip_op1 = true; 19431 /* Return the cost after shift-and truncation. */ 19432 return cost->shift_var; 19433 } 19434 else 19435 return cost->shift_var; 19436 } 19437 return cost->shift_const; 19438} 19439 19440/* Compute a (partial) cost for rtx X. Return true if the complete 19441 cost has been computed, and false if subexpressions should be 19442 scanned. In either case, *TOTAL contains the cost result. */ 19443 19444static bool 19445ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, 19446 int *total, bool speed) 19447{ 19448 rtx mask; 19449 enum rtx_code code = GET_CODE (x); 19450 enum rtx_code outer_code = (enum rtx_code) outer_code_i; 19451 const struct processor_costs *cost 19452 = speed ? ix86_tune_cost : &ix86_size_cost; 19453 int src_cost; 19454 19455 switch (code) 19456 { 19457 case SET: 19458 if (register_operand (SET_DEST (x), VOIDmode) 19459 && register_operand (SET_SRC (x), VOIDmode)) 19460 { 19461 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); 19462 return true; 19463 } 19464 19465 if (register_operand (SET_SRC (x), VOIDmode)) 19466 /* Avoid potentially incorrect high cost from rtx_costs 19467 for non-tieable SUBREGs. */ 19468 src_cost = 0; 19469 else 19470 { 19471 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); 19472 19473 if (CONSTANT_P (SET_SRC (x))) 19474 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add 19475 a small value, possibly zero for cheap constants. */ 19476 src_cost += COSTS_N_INSNS (1); 19477 } 19478 19479 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); 19480 return true; 19481 19482 case CONST_INT: 19483 case CONST: 19484 case LABEL_REF: 19485 case SYMBOL_REF: 19486 if (x86_64_immediate_operand (x, VOIDmode)) 19487 *total = 0; 19488 else 19489 *total = 1; 19490 return true; 19491 19492 case CONST_DOUBLE: 19493 if (IS_STACK_MODE (mode)) 19494 switch (standard_80387_constant_p (x)) 19495 { 19496 case -1: 19497 case 0: 19498 break; 19499 case 1: /* 0.0 */ 19500 *total = 1; 19501 return true; 19502 default: /* Other constants */ 19503 *total = 2; 19504 return true; 19505 } 19506 /* FALLTHRU */ 19507 19508 case CONST_VECTOR: 19509 switch (standard_sse_constant_p (x, mode)) 19510 { 19511 case 0: 19512 break; 19513 case 1: /* 0: xor eliminates false dependency */ 19514 *total = 0; 19515 return true; 19516 default: /* -1: cmp contains false dependency */ 19517 *total = 1; 19518 return true; 19519 } 19520 /* FALLTHRU */ 19521 19522 case CONST_WIDE_INT: 19523 /* Fall back to (MEM (SYMBOL_REF)), since that's where 19524 it'll probably end up. Add a penalty for size. */ 19525 *total = (COSTS_N_INSNS (1) 19526 + (!TARGET_64BIT && flag_pic) 19527 + (GET_MODE_SIZE (mode) <= 4 19528 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); 19529 return true; 19530 19531 case ZERO_EXTEND: 19532 /* The zero extensions is often completely free on x86_64, so make 19533 it as cheap as possible. */ 19534 if (TARGET_64BIT && mode == DImode 19535 && GET_MODE (XEXP (x, 0)) == SImode) 19536 *total = 1; 19537 else if (TARGET_ZERO_EXTEND_WITH_AND) 19538 *total = cost->add; 19539 else 19540 *total = cost->movzx; 19541 return false; 19542 19543 case SIGN_EXTEND: 19544 *total = cost->movsx; 19545 return false; 19546 19547 case ASHIFT: 19548 if (SCALAR_INT_MODE_P (mode) 19549 && GET_MODE_SIZE (mode) < UNITS_PER_WORD 19550 && CONST_INT_P (XEXP (x, 1))) 19551 { 19552 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 19553 if (value == 1) 19554 { 19555 *total = cost->add; 19556 return false; 19557 } 19558 if ((value == 2 || value == 3) 19559 && cost->lea <= cost->shift_const) 19560 { 19561 *total = cost->lea; 19562 return false; 19563 } 19564 } 19565 /* FALLTHRU */ 19566 19567 case ROTATE: 19568 case ASHIFTRT: 19569 case LSHIFTRT: 19570 case ROTATERT: 19571 bool skip_op0, skip_op1; 19572 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)), 19573 CONST_INT_P (XEXP (x, 1)) 19574 ? INTVAL (XEXP (x, 1)) : -1, 19575 speed, 19576 GET_CODE (XEXP (x, 1)) == AND, 19577 SUBREG_P (XEXP (x, 1)) 19578 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND, 19579 &skip_op0, &skip_op1); 19580 if (skip_op0 || skip_op1) 19581 { 19582 if (!skip_op0) 19583 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); 19584 if (!skip_op1) 19585 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); 19586 return true; 19587 } 19588 return false; 19589 19590 case FMA: 19591 { 19592 rtx sub; 19593 19594 gcc_assert (FLOAT_MODE_P (mode)); 19595 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); 19596 19597 *total = ix86_vec_cost (mode, 19598 GET_MODE_INNER (mode) == SFmode 19599 ? cost->fmass : cost->fmasd); 19600 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); 19601 19602 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ 19603 sub = XEXP (x, 0); 19604 if (GET_CODE (sub) == NEG) 19605 sub = XEXP (sub, 0); 19606 *total += rtx_cost (sub, mode, FMA, 0, speed); 19607 19608 sub = XEXP (x, 2); 19609 if (GET_CODE (sub) == NEG) 19610 sub = XEXP (sub, 0); 19611 *total += rtx_cost (sub, mode, FMA, 2, speed); 19612 return true; 19613 } 19614 19615 case MULT: 19616 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) 19617 { 19618 rtx op0 = XEXP (x, 0); 19619 rtx op1 = XEXP (x, 1); 19620 int nbits; 19621 if (CONST_INT_P (XEXP (x, 1))) 19622 { 19623 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 19624 for (nbits = 0; value != 0; value &= value - 1) 19625 nbits++; 19626 } 19627 else 19628 /* This is arbitrary. */ 19629 nbits = 7; 19630 19631 /* Compute costs correctly for widening multiplication. */ 19632 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 19633 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 19634 == GET_MODE_SIZE (mode)) 19635 { 19636 int is_mulwiden = 0; 19637 machine_mode inner_mode = GET_MODE (op0); 19638 19639 if (GET_CODE (op0) == GET_CODE (op1)) 19640 is_mulwiden = 1, op1 = XEXP (op1, 0); 19641 else if (CONST_INT_P (op1)) 19642 { 19643 if (GET_CODE (op0) == SIGN_EXTEND) 19644 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 19645 == INTVAL (op1); 19646 else 19647 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 19648 } 19649 19650 if (is_mulwiden) 19651 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 19652 } 19653 19654 *total = (cost->mult_init[MODE_INDEX (mode)] 19655 + nbits * cost->mult_bit 19656 + rtx_cost (op0, mode, outer_code, opno, speed) 19657 + rtx_cost (op1, mode, outer_code, opno, speed)); 19658 19659 return true; 19660 } 19661 *total = ix86_multiplication_cost (cost, mode); 19662 return false; 19663 19664 case DIV: 19665 case UDIV: 19666 case MOD: 19667 case UMOD: 19668 *total = ix86_division_cost (cost, mode); 19669 return false; 19670 19671 case PLUS: 19672 if (GET_MODE_CLASS (mode) == MODE_INT 19673 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) 19674 { 19675 if (GET_CODE (XEXP (x, 0)) == PLUS 19676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 19677 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) 19678 && CONSTANT_P (XEXP (x, 1))) 19679 { 19680 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 19681 if (val == 2 || val == 4 || val == 8) 19682 { 19683 *total = cost->lea; 19684 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, 19685 outer_code, opno, speed); 19686 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, 19687 outer_code, opno, speed); 19688 *total += rtx_cost (XEXP (x, 1), mode, 19689 outer_code, opno, speed); 19690 return true; 19691 } 19692 } 19693 else if (GET_CODE (XEXP (x, 0)) == MULT 19694 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 19695 { 19696 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 19697 if (val == 2 || val == 4 || val == 8) 19698 { 19699 *total = cost->lea; 19700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, 19701 outer_code, opno, speed); 19702 *total += rtx_cost (XEXP (x, 1), mode, 19703 outer_code, opno, speed); 19704 return true; 19705 } 19706 } 19707 else if (GET_CODE (XEXP (x, 0)) == PLUS) 19708 { 19709 /* Add with carry, ignore the cost of adding a carry flag. */ 19710 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode)) 19711 *total = cost->add; 19712 else 19713 { 19714 *total = cost->lea; 19715 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, 19716 outer_code, opno, speed); 19717 } 19718 19719 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, 19720 outer_code, opno, speed); 19721 *total += rtx_cost (XEXP (x, 1), mode, 19722 outer_code, opno, speed); 19723 return true; 19724 } 19725 } 19726 /* FALLTHRU */ 19727 19728 case MINUS: 19729 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ 19730 if (GET_MODE_CLASS (mode) == MODE_INT 19731 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD 19732 && GET_CODE (XEXP (x, 0)) == MINUS 19733 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)) 19734 { 19735 *total = cost->add; 19736 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, 19737 outer_code, opno, speed); 19738 *total += rtx_cost (XEXP (x, 1), mode, 19739 outer_code, opno, speed); 19740 return true; 19741 } 19742 19743 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19744 { 19745 *total = cost->addss; 19746 return false; 19747 } 19748 else if (X87_FLOAT_MODE_P (mode)) 19749 { 19750 *total = cost->fadd; 19751 return false; 19752 } 19753 else if (FLOAT_MODE_P (mode)) 19754 { 19755 *total = ix86_vec_cost (mode, cost->addss); 19756 return false; 19757 } 19758 /* FALLTHRU */ 19759 19760 case AND: 19761 case IOR: 19762 case XOR: 19763 if (GET_MODE_CLASS (mode) == MODE_INT 19764 && GET_MODE_SIZE (mode) > UNITS_PER_WORD) 19765 { 19766 *total = (cost->add * 2 19767 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) 19768 << (GET_MODE (XEXP (x, 0)) != DImode)) 19769 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) 19770 << (GET_MODE (XEXP (x, 1)) != DImode))); 19771 return true; 19772 } 19773 /* FALLTHRU */ 19774 19775 case NEG: 19776 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19777 { 19778 *total = cost->sse_op; 19779 return false; 19780 } 19781 else if (X87_FLOAT_MODE_P (mode)) 19782 { 19783 *total = cost->fchs; 19784 return false; 19785 } 19786 else if (FLOAT_MODE_P (mode)) 19787 { 19788 *total = ix86_vec_cost (mode, cost->sse_op); 19789 return false; 19790 } 19791 /* FALLTHRU */ 19792 19793 case NOT: 19794 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 19795 *total = ix86_vec_cost (mode, cost->sse_op); 19796 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 19797 *total = cost->add * 2; 19798 else 19799 *total = cost->add; 19800 return false; 19801 19802 case COMPARE: 19803 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 19804 && XEXP (XEXP (x, 0), 1) == const1_rtx 19805 && CONST_INT_P (XEXP (XEXP (x, 0), 2)) 19806 && XEXP (x, 1) == const0_rtx) 19807 { 19808 /* This kind of construct is implemented using test[bwl]. 19809 Treat it as if we had an AND. */ 19810 mode = GET_MODE (XEXP (XEXP (x, 0), 0)); 19811 *total = (cost->add 19812 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, 19813 opno, speed) 19814 + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); 19815 return true; 19816 } 19817 19818 if (GET_CODE (XEXP (x, 0)) == PLUS 19819 && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1))) 19820 { 19821 /* This is an overflow detection, count it as a normal compare. */ 19822 *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)), 19823 COMPARE, 0, speed); 19824 return true; 19825 } 19826 19827 /* The embedded comparison operand is completely free. */ 19828 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) 19829 && XEXP (x, 1) == const0_rtx) 19830 *total = 0; 19831 19832 return false; 19833 19834 case FLOAT_EXTEND: 19835 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) 19836 *total = 0; 19837 else 19838 *total = ix86_vec_cost (mode, cost->addss); 19839 return false; 19840 19841 case FLOAT_TRUNCATE: 19842 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) 19843 *total = cost->fadd; 19844 else 19845 *total = ix86_vec_cost (mode, cost->addss); 19846 return false; 19847 19848 case ABS: 19849 /* SSE requires memory load for the constant operand. It may make 19850 sense to account for this. Of course the constant operand may or 19851 may not be reused. */ 19852 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19853 *total = cost->sse_op; 19854 else if (X87_FLOAT_MODE_P (mode)) 19855 *total = cost->fabs; 19856 else if (FLOAT_MODE_P (mode)) 19857 *total = ix86_vec_cost (mode, cost->sse_op); 19858 return false; 19859 19860 case SQRT: 19861 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 19862 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; 19863 else if (X87_FLOAT_MODE_P (mode)) 19864 *total = cost->fsqrt; 19865 else if (FLOAT_MODE_P (mode)) 19866 *total = ix86_vec_cost (mode, 19867 mode == SFmode ? cost->sqrtss : cost->sqrtsd); 19868 return false; 19869 19870 case UNSPEC: 19871 if (XINT (x, 1) == UNSPEC_TP) 19872 *total = 0; 19873 return false; 19874 19875 case VEC_SELECT: 19876 case VEC_CONCAT: 19877 case VEC_DUPLICATE: 19878 /* ??? Assume all of these vector manipulation patterns are 19879 recognizable. In which case they all pretty much have the 19880 same cost. */ 19881 *total = cost->sse_op; 19882 return true; 19883 case VEC_MERGE: 19884 mask = XEXP (x, 2); 19885 /* This is masked instruction, assume the same cost, 19886 as nonmasked variant. */ 19887 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) 19888 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); 19889 else 19890 *total = cost->sse_op; 19891 return true; 19892 19893 default: 19894 return false; 19895 } 19896} 19897 19898#if TARGET_MACHO 19899 19900static int current_machopic_label_num; 19901 19902/* Given a symbol name and its associated stub, write out the 19903 definition of the stub. */ 19904 19905void 19906machopic_output_stub (FILE *file, const char *symb, const char *stub) 19907{ 19908 unsigned int length; 19909 char *binder_name, *symbol_name, lazy_ptr_name[32]; 19910 int label = ++current_machopic_label_num; 19911 19912 /* For 64-bit we shouldn't get here. */ 19913 gcc_assert (!TARGET_64BIT); 19914 19915 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 19916 symb = targetm.strip_name_encoding (symb); 19917 19918 length = strlen (stub); 19919 binder_name = XALLOCAVEC (char, length + 32); 19920 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 19921 19922 length = strlen (symb); 19923 symbol_name = XALLOCAVEC (char, length + 32); 19924 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 19925 19926 sprintf (lazy_ptr_name, "L%d$lz", label); 19927 19928 if (MACHOPIC_ATT_STUB) 19929 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); 19930 else if (MACHOPIC_PURE) 19931 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); 19932 else 19933 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 19934 19935 fprintf (file, "%s:\n", stub); 19936 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 19937 19938 if (MACHOPIC_ATT_STUB) 19939 { 19940 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); 19941 } 19942 else if (MACHOPIC_PURE) 19943 { 19944 /* PIC stub. */ 19945 /* 25-byte PIC stub using "CALL get_pc_thunk". */ 19946 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); 19947 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ 19948 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", 19949 label, lazy_ptr_name, label); 19950 fprintf (file, "\tjmp\t*%%ecx\n"); 19951 } 19952 else 19953 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 19954 19955 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus 19956 it needs no stub-binding-helper. */ 19957 if (MACHOPIC_ATT_STUB) 19958 return; 19959 19960 fprintf (file, "%s:\n", binder_name); 19961 19962 if (MACHOPIC_PURE) 19963 { 19964 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); 19965 fprintf (file, "\tpushl\t%%ecx\n"); 19966 } 19967 else 19968 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 19969 19970 fputs ("\tjmp\tdyld_stub_binding_helper\n", file); 19971 19972 /* N.B. Keep the correspondence of these 19973 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the 19974 old-pic/new-pic/non-pic stubs; altering this will break 19975 compatibility with existing dylibs. */ 19976 if (MACHOPIC_PURE) 19977 { 19978 /* 25-byte PIC stub using "CALL get_pc_thunk". */ 19979 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); 19980 } 19981 else 19982 /* 16-byte -mdynamic-no-pic stub. */ 19983 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); 19984 19985 fprintf (file, "%s:\n", lazy_ptr_name); 19986 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 19987 fprintf (file, ASM_LONG "%s\n", binder_name); 19988} 19989#endif /* TARGET_MACHO */ 19990 19991/* Order the registers for register allocator. */ 19992 19993void 19994x86_order_regs_for_local_alloc (void) 19995{ 19996 int pos = 0; 19997 int i; 19998 19999 /* First allocate the local general purpose registers. */ 20000 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 20001 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i)) 20002 reg_alloc_order [pos++] = i; 20003 20004 /* Global general purpose registers. */ 20005 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 20006 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i)) 20007 reg_alloc_order [pos++] = i; 20008 20009 /* x87 registers come first in case we are doing FP math 20010 using them. */ 20011 if (!TARGET_SSE_MATH) 20012 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 20013 reg_alloc_order [pos++] = i; 20014 20015 /* SSE registers. */ 20016 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 20017 reg_alloc_order [pos++] = i; 20018 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 20019 reg_alloc_order [pos++] = i; 20020 20021 /* Extended REX SSE registers. */ 20022 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) 20023 reg_alloc_order [pos++] = i; 20024 20025 /* Mask register. */ 20026 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) 20027 reg_alloc_order [pos++] = i; 20028 20029 /* x87 registers. */ 20030 if (TARGET_SSE_MATH) 20031 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 20032 reg_alloc_order [pos++] = i; 20033 20034 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 20035 reg_alloc_order [pos++] = i; 20036 20037 /* Initialize the rest of array as we do not allocate some registers 20038 at all. */ 20039 while (pos < FIRST_PSEUDO_REGISTER) 20040 reg_alloc_order [pos++] = 0; 20041} 20042 20043static bool 20044ix86_ms_bitfield_layout_p (const_tree record_type) 20045{ 20046 return ((TARGET_MS_BITFIELD_LAYOUT 20047 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 20048 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); 20049} 20050 20051/* Returns an expression indicating where the this parameter is 20052 located on entry to the FUNCTION. */ 20053 20054static rtx 20055x86_this_parameter (tree function) 20056{ 20057 tree type = TREE_TYPE (function); 20058 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; 20059 int nregs; 20060 20061 if (TARGET_64BIT) 20062 { 20063 const int *parm_regs; 20064 20065 if (ix86_function_type_abi (type) == MS_ABI) 20066 parm_regs = x86_64_ms_abi_int_parameter_registers; 20067 else 20068 parm_regs = x86_64_int_parameter_registers; 20069 return gen_rtx_REG (Pmode, parm_regs[aggr]); 20070 } 20071 20072 nregs = ix86_function_regparm (type, function); 20073 20074 if (nregs > 0 && !stdarg_p (type)) 20075 { 20076 int regno; 20077 unsigned int ccvt = ix86_get_callcvt (type); 20078 20079 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) 20080 regno = aggr ? DX_REG : CX_REG; 20081 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) 20082 { 20083 regno = CX_REG; 20084 if (aggr) 20085 return gen_rtx_MEM (SImode, 20086 plus_constant (Pmode, stack_pointer_rtx, 4)); 20087 } 20088 else 20089 { 20090 regno = AX_REG; 20091 if (aggr) 20092 { 20093 regno = DX_REG; 20094 if (nregs == 1) 20095 return gen_rtx_MEM (SImode, 20096 plus_constant (Pmode, 20097 stack_pointer_rtx, 4)); 20098 } 20099 } 20100 return gen_rtx_REG (SImode, regno); 20101 } 20102 20103 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, 20104 aggr ? 8 : 4)); 20105} 20106 20107/* Determine whether x86_output_mi_thunk can succeed. */ 20108 20109static bool 20110x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, 20111 const_tree function) 20112{ 20113 /* 64-bit can handle anything. */ 20114 if (TARGET_64BIT) 20115 return true; 20116 20117 /* For 32-bit, everything's fine if we have one free register. */ 20118 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 20119 return true; 20120 20121 /* Need a free register for vcall_offset. */ 20122 if (vcall_offset) 20123 return false; 20124 20125 /* Need a free register for GOT references. */ 20126 if (flag_pic && !targetm.binds_local_p (function)) 20127 return false; 20128 20129 /* Otherwise ok. */ 20130 return true; 20131} 20132 20133/* Output the assembler code for a thunk function. THUNK_DECL is the 20134 declaration for the thunk function itself, FUNCTION is the decl for 20135 the target function. DELTA is an immediate constant offset to be 20136 added to THIS. If VCALL_OFFSET is nonzero, the word at 20137 *(*this + vcall_offset) should be added to THIS. */ 20138 20139static void 20140x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, 20141 HOST_WIDE_INT vcall_offset, tree function) 20142{ 20143 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); 20144 rtx this_param = x86_this_parameter (function); 20145 rtx this_reg, tmp, fnaddr; 20146 unsigned int tmp_regno; 20147 rtx_insn *insn; 20148 20149 if (TARGET_64BIT) 20150 tmp_regno = R10_REG; 20151 else 20152 { 20153 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); 20154 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) 20155 tmp_regno = AX_REG; 20156 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) 20157 tmp_regno = DX_REG; 20158 else 20159 tmp_regno = CX_REG; 20160 } 20161 20162 emit_note (NOTE_INSN_PROLOGUE_END); 20163 20164 /* CET is enabled, insert EB instruction. */ 20165 if ((flag_cf_protection & CF_BRANCH)) 20166 emit_insn (gen_nop_endbr ()); 20167 20168 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 20169 pull it in now and let DELTA benefit. */ 20170 if (REG_P (this_param)) 20171 this_reg = this_param; 20172 else if (vcall_offset) 20173 { 20174 /* Put the this parameter into %eax. */ 20175 this_reg = gen_rtx_REG (Pmode, AX_REG); 20176 emit_move_insn (this_reg, this_param); 20177 } 20178 else 20179 this_reg = NULL_RTX; 20180 20181 /* Adjust the this parameter by a fixed constant. */ 20182 if (delta) 20183 { 20184 rtx delta_rtx = GEN_INT (delta); 20185 rtx delta_dst = this_reg ? this_reg : this_param; 20186 20187 if (TARGET_64BIT) 20188 { 20189 if (!x86_64_general_operand (delta_rtx, Pmode)) 20190 { 20191 tmp = gen_rtx_REG (Pmode, tmp_regno); 20192 emit_move_insn (tmp, delta_rtx); 20193 delta_rtx = tmp; 20194 } 20195 } 20196 20197 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); 20198 } 20199 20200 /* Adjust the this parameter by a value stored in the vtable. */ 20201 if (vcall_offset) 20202 { 20203 rtx vcall_addr, vcall_mem, this_mem; 20204 20205 tmp = gen_rtx_REG (Pmode, tmp_regno); 20206 20207 this_mem = gen_rtx_MEM (ptr_mode, this_reg); 20208 if (Pmode != ptr_mode) 20209 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); 20210 emit_move_insn (tmp, this_mem); 20211 20212 /* Adjust the this parameter. */ 20213 vcall_addr = plus_constant (Pmode, tmp, vcall_offset); 20214 if (TARGET_64BIT 20215 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) 20216 { 20217 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); 20218 emit_move_insn (tmp2, GEN_INT (vcall_offset)); 20219 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); 20220 } 20221 20222 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); 20223 if (Pmode != ptr_mode) 20224 emit_insn (gen_addsi_1_zext (this_reg, 20225 gen_rtx_REG (ptr_mode, 20226 REGNO (this_reg)), 20227 vcall_mem)); 20228 else 20229 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); 20230 } 20231 20232 /* If necessary, drop THIS back to its stack slot. */ 20233 if (this_reg && this_reg != this_param) 20234 emit_move_insn (this_param, this_reg); 20235 20236 fnaddr = XEXP (DECL_RTL (function), 0); 20237 if (TARGET_64BIT) 20238 { 20239 if (!flag_pic || targetm.binds_local_p (function) 20240 || TARGET_PECOFF) 20241 ; 20242 else 20243 { 20244 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); 20245 tmp = gen_rtx_CONST (Pmode, tmp); 20246 fnaddr = gen_const_mem (Pmode, tmp); 20247 } 20248 } 20249 else 20250 { 20251 if (!flag_pic || targetm.binds_local_p (function)) 20252 ; 20253#if TARGET_MACHO 20254 else if (TARGET_MACHO) 20255 { 20256 fnaddr = machopic_indirect_call_target (DECL_RTL (function)); 20257 fnaddr = XEXP (fnaddr, 0); 20258 } 20259#endif /* TARGET_MACHO */ 20260 else 20261 { 20262 tmp = gen_rtx_REG (Pmode, CX_REG); 20263 output_set_got (tmp, NULL_RTX); 20264 20265 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); 20266 fnaddr = gen_rtx_CONST (Pmode, fnaddr); 20267 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); 20268 fnaddr = gen_const_mem (Pmode, fnaddr); 20269 } 20270 } 20271 20272 /* Our sibling call patterns do not allow memories, because we have no 20273 predicate that can distinguish between frame and non-frame memory. 20274 For our purposes here, we can get away with (ab)using a jump pattern, 20275 because we're going to do no optimization. */ 20276 if (MEM_P (fnaddr)) 20277 { 20278 if (sibcall_insn_operand (fnaddr, word_mode)) 20279 { 20280 fnaddr = XEXP (DECL_RTL (function), 0); 20281 tmp = gen_rtx_MEM (QImode, fnaddr); 20282 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); 20283 tmp = emit_call_insn (tmp); 20284 SIBLING_CALL_P (tmp) = 1; 20285 } 20286 else 20287 emit_jump_insn (gen_indirect_jump (fnaddr)); 20288 } 20289 else 20290 { 20291 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) 20292 { 20293 // CM_LARGE_PIC always uses pseudo PIC register which is 20294 // uninitialized. Since FUNCTION is local and calling it 20295 // doesn't go through PLT, we use scratch register %r11 as 20296 // PIC register and initialize it here. 20297 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); 20298 ix86_init_large_pic_reg (tmp_regno); 20299 fnaddr = legitimize_pic_address (fnaddr, 20300 gen_rtx_REG (Pmode, tmp_regno)); 20301 } 20302 20303 if (!sibcall_insn_operand (fnaddr, word_mode)) 20304 { 20305 tmp = gen_rtx_REG (word_mode, tmp_regno); 20306 if (GET_MODE (fnaddr) != word_mode) 20307 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); 20308 emit_move_insn (tmp, fnaddr); 20309 fnaddr = tmp; 20310 } 20311 20312 tmp = gen_rtx_MEM (QImode, fnaddr); 20313 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); 20314 tmp = emit_call_insn (tmp); 20315 SIBLING_CALL_P (tmp) = 1; 20316 } 20317 emit_barrier (); 20318 20319 /* Emit just enough of rest_of_compilation to get the insns emitted. */ 20320 insn = get_insns (); 20321 shorten_branches (insn); 20322 assemble_start_function (thunk_fndecl, fnname); 20323 final_start_function (insn, file, 1); 20324 final (insn, file, 1); 20325 final_end_function (); 20326 assemble_end_function (thunk_fndecl, fnname); 20327} 20328 20329static void 20330x86_file_start (void) 20331{ 20332 default_file_start (); 20333 if (TARGET_16BIT) 20334 fputs ("\t.code16gcc\n", asm_out_file); 20335#if TARGET_MACHO 20336 darwin_file_start (); 20337#endif 20338 if (X86_FILE_START_VERSION_DIRECTIVE) 20339 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 20340 if (X86_FILE_START_FLTUSED) 20341 fputs ("\t.global\t__fltused\n", asm_out_file); 20342 if (ix86_asm_dialect == ASM_INTEL) 20343 fputs ("\t.intel_syntax noprefix\n", asm_out_file); 20344} 20345 20346int 20347x86_field_alignment (tree type, int computed) 20348{ 20349 machine_mode mode; 20350 20351 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 20352 return computed; 20353 if (TARGET_IAMCU) 20354 return iamcu_alignment (type, computed); 20355 mode = TYPE_MODE (strip_array_types (type)); 20356 if (mode == DFmode || mode == DCmode 20357 || GET_MODE_CLASS (mode) == MODE_INT 20358 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 20359 return MIN (32, computed); 20360 return computed; 20361} 20362 20363/* Print call to TARGET to FILE. */ 20364 20365static void 20366x86_print_call_or_nop (FILE *file, const char *target) 20367{ 20368 if (flag_nop_mcount || !strcmp (target, "nop")) 20369 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ 20370 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); 20371 else 20372 fprintf (file, "1:\tcall\t%s\n", target); 20373} 20374 20375static bool 20376current_fentry_name (const char **name) 20377{ 20378 tree attr = lookup_attribute ("fentry_name", 20379 DECL_ATTRIBUTES (current_function_decl)); 20380 if (!attr) 20381 return false; 20382 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); 20383 return true; 20384} 20385 20386static bool 20387current_fentry_section (const char **name) 20388{ 20389 tree attr = lookup_attribute ("fentry_section", 20390 DECL_ATTRIBUTES (current_function_decl)); 20391 if (!attr) 20392 return false; 20393 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); 20394 return true; 20395} 20396 20397/* Output assembler code to FILE to increment profiler label # LABELNO 20398 for profiling a function entry. */ 20399void 20400x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 20401{ 20402 if (cfun->machine->endbr_queued_at_entrance) 20403 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32"); 20404 20405 const char *mcount_name = MCOUNT_NAME; 20406 20407 if (current_fentry_name (&mcount_name)) 20408 ; 20409 else if (fentry_name) 20410 mcount_name = fentry_name; 20411 else if (flag_fentry) 20412 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; 20413 20414 if (TARGET_64BIT) 20415 { 20416#ifndef NO_PROFILE_COUNTERS 20417 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); 20418#endif 20419 20420 if (!TARGET_PECOFF && flag_pic) 20421 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); 20422 else 20423 x86_print_call_or_nop (file, mcount_name); 20424 } 20425 else if (flag_pic) 20426 { 20427#ifndef NO_PROFILE_COUNTERS 20428 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", 20429 LPREFIX, labelno); 20430#endif 20431 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); 20432 } 20433 else 20434 { 20435#ifndef NO_PROFILE_COUNTERS 20436 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", 20437 LPREFIX, labelno); 20438#endif 20439 x86_print_call_or_nop (file, mcount_name); 20440 } 20441 20442 if (flag_record_mcount 20443 || lookup_attribute ("fentry_section", 20444 DECL_ATTRIBUTES (current_function_decl))) 20445 { 20446 const char *sname = "__mcount_loc"; 20447 20448 if (current_fentry_section (&sname)) 20449 ; 20450 else if (fentry_section) 20451 sname = fentry_section; 20452 20453 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname); 20454 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); 20455 fprintf (file, "\t.previous\n"); 20456 } 20457} 20458 20459/* We don't have exact information about the insn sizes, but we may assume 20460 quite safely that we are informed about all 1 byte insns and memory 20461 address sizes. This is enough to eliminate unnecessary padding in 20462 99% of cases. */ 20463 20464int 20465ix86_min_insn_size (rtx_insn *insn) 20466{ 20467 int l = 0, len; 20468 20469 if (!INSN_P (insn) || !active_insn_p (insn)) 20470 return 0; 20471 20472 /* Discard alignments we've emit and jump instructions. */ 20473 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 20474 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 20475 return 0; 20476 20477 /* Important case - calls are always 5 bytes. 20478 It is common to have many calls in the row. */ 20479 if (CALL_P (insn) 20480 && symbolic_reference_mentioned_p (PATTERN (insn)) 20481 && !SIBLING_CALL_P (insn)) 20482 return 5; 20483 len = get_attr_length (insn); 20484 if (len <= 1) 20485 return 1; 20486 20487 /* For normal instructions we rely on get_attr_length being exact, 20488 with a few exceptions. */ 20489 if (!JUMP_P (insn)) 20490 { 20491 enum attr_type type = get_attr_type (insn); 20492 20493 switch (type) 20494 { 20495 case TYPE_MULTI: 20496 if (GET_CODE (PATTERN (insn)) == ASM_INPUT 20497 || asm_noperands (PATTERN (insn)) >= 0) 20498 return 0; 20499 break; 20500 case TYPE_OTHER: 20501 case TYPE_FCMP: 20502 break; 20503 default: 20504 /* Otherwise trust get_attr_length. */ 20505 return len; 20506 } 20507 20508 l = get_attr_length_address (insn); 20509 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 20510 l = 4; 20511 } 20512 if (l) 20513 return 1+l; 20514 else 20515 return 2; 20516} 20517 20518#ifdef ASM_OUTPUT_MAX_SKIP_PAD 20519 20520/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 20521 window. */ 20522 20523static void 20524ix86_avoid_jump_mispredicts (void) 20525{ 20526 rtx_insn *insn, *start = get_insns (); 20527 int nbytes = 0, njumps = 0; 20528 bool isjump = false; 20529 20530 /* Look for all minimal intervals of instructions containing 4 jumps. 20531 The intervals are bounded by START and INSN. NBYTES is the total 20532 size of instructions in the interval including INSN and not including 20533 START. When the NBYTES is smaller than 16 bytes, it is possible 20534 that the end of START and INSN ends up in the same 16byte page. 20535 20536 The smallest offset in the page INSN can start is the case where START 20537 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 20538 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). 20539 20540 Don't consider asm goto as jump, while it can contain a jump, it doesn't 20541 have to, control transfer to label(s) can be performed through other 20542 means, and also we estimate minimum length of all asm stmts as 0. */ 20543 for (insn = start; insn; insn = NEXT_INSN (insn)) 20544 { 20545 int min_size; 20546 20547 if (LABEL_P (insn)) 20548 { 20549 align_flags alignment = label_to_alignment (insn); 20550 int align = alignment.levels[0].log; 20551 int max_skip = alignment.levels[0].maxskip; 20552 20553 if (max_skip > 15) 20554 max_skip = 15; 20555 /* If align > 3, only up to 16 - max_skip - 1 bytes can be 20556 already in the current 16 byte page, because otherwise 20557 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer 20558 bytes to reach 16 byte boundary. */ 20559 if (align <= 0 20560 || (align <= 3 && max_skip != (1 << align) - 1)) 20561 max_skip = 0; 20562 if (dump_file) 20563 fprintf (dump_file, "Label %i with max_skip %i\n", 20564 INSN_UID (insn), max_skip); 20565 if (max_skip) 20566 { 20567 while (nbytes + max_skip >= 16) 20568 { 20569 start = NEXT_INSN (start); 20570 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) 20571 || CALL_P (start)) 20572 njumps--, isjump = true; 20573 else 20574 isjump = false; 20575 nbytes -= ix86_min_insn_size (start); 20576 } 20577 } 20578 continue; 20579 } 20580 20581 min_size = ix86_min_insn_size (insn); 20582 nbytes += min_size; 20583 if (dump_file) 20584 fprintf (dump_file, "Insn %i estimated to %i bytes\n", 20585 INSN_UID (insn), min_size); 20586 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) 20587 || CALL_P (insn)) 20588 njumps++; 20589 else 20590 continue; 20591 20592 while (njumps > 3) 20593 { 20594 start = NEXT_INSN (start); 20595 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) 20596 || CALL_P (start)) 20597 njumps--, isjump = true; 20598 else 20599 isjump = false; 20600 nbytes -= ix86_min_insn_size (start); 20601 } 20602 gcc_assert (njumps >= 0); 20603 if (dump_file) 20604 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 20605 INSN_UID (start), INSN_UID (insn), nbytes); 20606 20607 if (njumps == 3 && isjump && nbytes < 16) 20608 { 20609 int padsize = 15 - nbytes + ix86_min_insn_size (insn); 20610 20611 if (dump_file) 20612 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 20613 INSN_UID (insn), padsize); 20614 emit_insn_before (gen_pad (GEN_INT (padsize)), insn); 20615 } 20616 } 20617} 20618#endif 20619 20620/* AMD Athlon works faster 20621 when RET is not destination of conditional jump or directly preceded 20622 by other jump instruction. We avoid the penalty by inserting NOP just 20623 before the RET instructions in such cases. */ 20624static void 20625ix86_pad_returns (void) 20626{ 20627 edge e; 20628 edge_iterator ei; 20629 20630 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 20631 { 20632 basic_block bb = e->src; 20633 rtx_insn *ret = BB_END (bb); 20634 rtx_insn *prev; 20635 bool replace = false; 20636 20637 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) 20638 || optimize_bb_for_size_p (bb)) 20639 continue; 20640 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 20641 if (active_insn_p (prev) || LABEL_P (prev)) 20642 break; 20643 if (prev && LABEL_P (prev)) 20644 { 20645 edge e; 20646 edge_iterator ei; 20647 20648 FOR_EACH_EDGE (e, ei, bb->preds) 20649 if (EDGE_FREQUENCY (e) && e->src->index >= 0 20650 && !(e->flags & EDGE_FALLTHRU)) 20651 { 20652 replace = true; 20653 break; 20654 } 20655 } 20656 if (!replace) 20657 { 20658 prev = prev_active_insn (ret); 20659 if (prev 20660 && ((JUMP_P (prev) && any_condjump_p (prev)) 20661 || CALL_P (prev))) 20662 replace = true; 20663 /* Empty functions get branch mispredict even when 20664 the jump destination is not visible to us. */ 20665 if (!prev && !optimize_function_for_size_p (cfun)) 20666 replace = true; 20667 } 20668 if (replace) 20669 { 20670 emit_jump_insn_before (gen_simple_return_internal_long (), ret); 20671 delete_insn (ret); 20672 } 20673 } 20674} 20675 20676/* Count the minimum number of instructions in BB. Return 4 if the 20677 number of instructions >= 4. */ 20678 20679static int 20680ix86_count_insn_bb (basic_block bb) 20681{ 20682 rtx_insn *insn; 20683 int insn_count = 0; 20684 20685 /* Count number of instructions in this block. Return 4 if the number 20686 of instructions >= 4. */ 20687 FOR_BB_INSNS (bb, insn) 20688 { 20689 /* Only happen in exit blocks. */ 20690 if (JUMP_P (insn) 20691 && ANY_RETURN_P (PATTERN (insn))) 20692 break; 20693 20694 if (NONDEBUG_INSN_P (insn) 20695 && GET_CODE (PATTERN (insn)) != USE 20696 && GET_CODE (PATTERN (insn)) != CLOBBER) 20697 { 20698 insn_count++; 20699 if (insn_count >= 4) 20700 return insn_count; 20701 } 20702 } 20703 20704 return insn_count; 20705} 20706 20707 20708/* Count the minimum number of instructions in code path in BB. 20709 Return 4 if the number of instructions >= 4. */ 20710 20711static int 20712ix86_count_insn (basic_block bb) 20713{ 20714 edge e; 20715 edge_iterator ei; 20716 int min_prev_count; 20717 20718 /* Only bother counting instructions along paths with no 20719 more than 2 basic blocks between entry and exit. Given 20720 that BB has an edge to exit, determine if a predecessor 20721 of BB has an edge from entry. If so, compute the number 20722 of instructions in the predecessor block. If there 20723 happen to be multiple such blocks, compute the minimum. */ 20724 min_prev_count = 4; 20725 FOR_EACH_EDGE (e, ei, bb->preds) 20726 { 20727 edge prev_e; 20728 edge_iterator prev_ei; 20729 20730 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) 20731 { 20732 min_prev_count = 0; 20733 break; 20734 } 20735 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) 20736 { 20737 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) 20738 { 20739 int count = ix86_count_insn_bb (e->src); 20740 if (count < min_prev_count) 20741 min_prev_count = count; 20742 break; 20743 } 20744 } 20745 } 20746 20747 if (min_prev_count < 4) 20748 min_prev_count += ix86_count_insn_bb (bb); 20749 20750 return min_prev_count; 20751} 20752 20753/* Pad short function to 4 instructions. */ 20754 20755static void 20756ix86_pad_short_function (void) 20757{ 20758 edge e; 20759 edge_iterator ei; 20760 20761 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 20762 { 20763 rtx_insn *ret = BB_END (e->src); 20764 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) 20765 { 20766 int insn_count = ix86_count_insn (e->src); 20767 20768 /* Pad short function. */ 20769 if (insn_count < 4) 20770 { 20771 rtx_insn *insn = ret; 20772 20773 /* Find epilogue. */ 20774 while (insn 20775 && (!NOTE_P (insn) 20776 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) 20777 insn = PREV_INSN (insn); 20778 20779 if (!insn) 20780 insn = ret; 20781 20782 /* Two NOPs count as one instruction. */ 20783 insn_count = 2 * (4 - insn_count); 20784 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); 20785 } 20786 } 20787 } 20788} 20789 20790/* Fix up a Windows system unwinder issue. If an EH region falls through into 20791 the epilogue, the Windows system unwinder will apply epilogue logic and 20792 produce incorrect offsets. This can be avoided by adding a nop between 20793 the last insn that can throw and the first insn of the epilogue. */ 20794 20795static void 20796ix86_seh_fixup_eh_fallthru (void) 20797{ 20798 edge e; 20799 edge_iterator ei; 20800 20801 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) 20802 { 20803 rtx_insn *insn, *next; 20804 20805 /* Find the beginning of the epilogue. */ 20806 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) 20807 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) 20808 break; 20809 if (insn == NULL) 20810 continue; 20811 20812 /* We only care about preceding insns that can throw. */ 20813 insn = prev_active_insn (insn); 20814 if (insn == NULL || !can_throw_internal (insn)) 20815 continue; 20816 20817 /* Do not separate calls from their debug information. */ 20818 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) 20819 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) 20820 insn = next; 20821 else 20822 break; 20823 20824 emit_insn_after (gen_nops (const1_rtx), insn); 20825 } 20826} 20827 20828/* Implement machine specific optimizations. We implement padding of returns 20829 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 20830static void 20831ix86_reorg (void) 20832{ 20833 /* We are freeing block_for_insn in the toplev to keep compatibility 20834 with old MDEP_REORGS that are not CFG based. Recompute it now. */ 20835 compute_bb_for_insn (); 20836 20837 if (TARGET_SEH && current_function_has_exception_handlers ()) 20838 ix86_seh_fixup_eh_fallthru (); 20839 20840 if (optimize && optimize_function_for_speed_p (cfun)) 20841 { 20842 if (TARGET_PAD_SHORT_FUNCTION) 20843 ix86_pad_short_function (); 20844 else if (TARGET_PAD_RETURNS) 20845 ix86_pad_returns (); 20846#ifdef ASM_OUTPUT_MAX_SKIP_PAD 20847 if (TARGET_FOUR_JUMP_LIMIT) 20848 ix86_avoid_jump_mispredicts (); 20849#endif 20850 } 20851} 20852 20853/* Return nonzero when QImode register that must be represented via REX prefix 20854 is used. */ 20855bool 20856x86_extended_QIreg_mentioned_p (rtx_insn *insn) 20857{ 20858 int i; 20859 extract_insn_cached (insn); 20860 for (i = 0; i < recog_data.n_operands; i++) 20861 if (GENERAL_REG_P (recog_data.operand[i]) 20862 && !QI_REGNO_P (REGNO (recog_data.operand[i]))) 20863 return true; 20864 return false; 20865} 20866 20867/* Return true when INSN mentions register that must be encoded using REX 20868 prefix. */ 20869bool 20870x86_extended_reg_mentioned_p (rtx insn) 20871{ 20872 subrtx_iterator::array_type array; 20873 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) 20874 { 20875 const_rtx x = *iter; 20876 if (REG_P (x) 20877 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) 20878 return true; 20879 } 20880 return false; 20881} 20882 20883/* If profitable, negate (without causing overflow) integer constant 20884 of mode MODE at location LOC. Return true in this case. */ 20885bool 20886x86_maybe_negate_const_int (rtx *loc, machine_mode mode) 20887{ 20888 HOST_WIDE_INT val; 20889 20890 if (!CONST_INT_P (*loc)) 20891 return false; 20892 20893 switch (mode) 20894 { 20895 case E_DImode: 20896 /* DImode x86_64 constants must fit in 32 bits. */ 20897 gcc_assert (x86_64_immediate_operand (*loc, mode)); 20898 20899 mode = SImode; 20900 break; 20901 20902 case E_SImode: 20903 case E_HImode: 20904 case E_QImode: 20905 break; 20906 20907 default: 20908 gcc_unreachable (); 20909 } 20910 20911 /* Avoid overflows. */ 20912 if (mode_signbit_p (mode, *loc)) 20913 return false; 20914 20915 val = INTVAL (*loc); 20916 20917 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. 20918 Exceptions: -128 encodes smaller than 128, so swap sign and op. */ 20919 if ((val < 0 && val != -128) 20920 || val == 128) 20921 { 20922 *loc = GEN_INT (-val); 20923 return true; 20924 } 20925 20926 return false; 20927} 20928 20929/* Generate an unsigned DImode/SImode to FP conversion. This is the same code 20930 optabs would emit if we didn't have TFmode patterns. */ 20931 20932void 20933x86_emit_floatuns (rtx operands[2]) 20934{ 20935 rtx_code_label *neglab, *donelab; 20936 rtx i0, i1, f0, in, out; 20937 machine_mode mode, inmode; 20938 20939 inmode = GET_MODE (operands[1]); 20940 gcc_assert (inmode == SImode || inmode == DImode); 20941 20942 out = operands[0]; 20943 in = force_reg (inmode, operands[1]); 20944 mode = GET_MODE (out); 20945 neglab = gen_label_rtx (); 20946 donelab = gen_label_rtx (); 20947 f0 = gen_reg_rtx (mode); 20948 20949 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); 20950 20951 expand_float (out, in, 0); 20952 20953 emit_jump_insn (gen_jump (donelab)); 20954 emit_barrier (); 20955 20956 emit_label (neglab); 20957 20958 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, 20959 1, OPTAB_DIRECT); 20960 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, 20961 1, OPTAB_DIRECT); 20962 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 20963 20964 expand_float (f0, i0, 0); 20965 20966 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); 20967 20968 emit_label (donelab); 20969} 20970 20971/* Target hook for scalar_mode_supported_p. */ 20972static bool 20973ix86_scalar_mode_supported_p (scalar_mode mode) 20974{ 20975 if (DECIMAL_FLOAT_MODE_P (mode)) 20976 return default_decimal_float_supported_p (); 20977 else if (mode == TFmode) 20978 return true; 20979 else 20980 return default_scalar_mode_supported_p (mode); 20981} 20982 20983/* Implements target hook vector_mode_supported_p. */ 20984static bool 20985ix86_vector_mode_supported_p (machine_mode mode) 20986{ 20987 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 20988 return true; 20989 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 20990 return true; 20991 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) 20992 return true; 20993 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) 20994 return true; 20995 if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode)) 20996 return true; 20997 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 20998 return true; 20999 return false; 21000} 21001 21002/* Target hook for c_mode_for_suffix. */ 21003static machine_mode 21004ix86_c_mode_for_suffix (char suffix) 21005{ 21006 if (suffix == 'q') 21007 return TFmode; 21008 if (suffix == 'w') 21009 return XFmode; 21010 21011 return VOIDmode; 21012} 21013 21014/* Worker function for TARGET_MD_ASM_ADJUST. 21015 21016 We implement asm flag outputs, and maintain source compatibility 21017 with the old cc0-based compiler. */ 21018 21019static rtx_insn * 21020ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/, 21021 vec<const char *> &constraints, 21022 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) 21023{ 21024 bool saw_asm_flag = false; 21025 21026 start_sequence (); 21027 for (unsigned i = 0, n = outputs.length (); i < n; ++i) 21028 { 21029 const char *con = constraints[i]; 21030 if (strncmp (con, "=@cc", 4) != 0) 21031 continue; 21032 con += 4; 21033 if (strchr (con, ',') != NULL) 21034 { 21035 error ("alternatives not allowed in %<asm%> flag output"); 21036 continue; 21037 } 21038 21039 bool invert = false; 21040 if (con[0] == 'n') 21041 invert = true, con++; 21042 21043 machine_mode mode = CCmode; 21044 rtx_code code = UNKNOWN; 21045 21046 switch (con[0]) 21047 { 21048 case 'a': 21049 if (con[1] == 0) 21050 mode = CCAmode, code = EQ; 21051 else if (con[1] == 'e' && con[2] == 0) 21052 mode = CCCmode, code = NE; 21053 break; 21054 case 'b': 21055 if (con[1] == 0) 21056 mode = CCCmode, code = EQ; 21057 else if (con[1] == 'e' && con[2] == 0) 21058 mode = CCAmode, code = NE; 21059 break; 21060 case 'c': 21061 if (con[1] == 0) 21062 mode = CCCmode, code = EQ; 21063 break; 21064 case 'e': 21065 if (con[1] == 0) 21066 mode = CCZmode, code = EQ; 21067 break; 21068 case 'g': 21069 if (con[1] == 0) 21070 mode = CCGCmode, code = GT; 21071 else if (con[1] == 'e' && con[2] == 0) 21072 mode = CCGCmode, code = GE; 21073 break; 21074 case 'l': 21075 if (con[1] == 0) 21076 mode = CCGCmode, code = LT; 21077 else if (con[1] == 'e' && con[2] == 0) 21078 mode = CCGCmode, code = LE; 21079 break; 21080 case 'o': 21081 if (con[1] == 0) 21082 mode = CCOmode, code = EQ; 21083 break; 21084 case 'p': 21085 if (con[1] == 0) 21086 mode = CCPmode, code = EQ; 21087 break; 21088 case 's': 21089 if (con[1] == 0) 21090 mode = CCSmode, code = EQ; 21091 break; 21092 case 'z': 21093 if (con[1] == 0) 21094 mode = CCZmode, code = EQ; 21095 break; 21096 } 21097 if (code == UNKNOWN) 21098 { 21099 error ("unknown %<asm%> flag output %qs", constraints[i]); 21100 continue; 21101 } 21102 if (invert) 21103 code = reverse_condition (code); 21104 21105 rtx dest = outputs[i]; 21106 if (!saw_asm_flag) 21107 { 21108 /* This is the first asm flag output. Here we put the flags 21109 register in as the real output and adjust the condition to 21110 allow it. */ 21111 constraints[i] = "=Bf"; 21112 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); 21113 saw_asm_flag = true; 21114 } 21115 else 21116 { 21117 /* We don't need the flags register as output twice. */ 21118 constraints[i] = "=X"; 21119 outputs[i] = gen_rtx_SCRATCH (SImode); 21120 } 21121 21122 rtx x = gen_rtx_REG (mode, FLAGS_REG); 21123 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); 21124 21125 machine_mode dest_mode = GET_MODE (dest); 21126 if (!SCALAR_INT_MODE_P (dest_mode)) 21127 { 21128 error ("invalid type for %<asm%> flag output"); 21129 continue; 21130 } 21131 21132 if (dest_mode == QImode) 21133 emit_insn (gen_rtx_SET (dest, x)); 21134 else 21135 { 21136 rtx reg = gen_reg_rtx (QImode); 21137 emit_insn (gen_rtx_SET (reg, x)); 21138 21139 reg = convert_to_mode (dest_mode, reg, 1); 21140 emit_move_insn (dest, reg); 21141 } 21142 } 21143 21144 rtx_insn *seq = get_insns (); 21145 end_sequence (); 21146 21147 if (saw_asm_flag) 21148 return seq; 21149 else 21150 { 21151 /* If we had no asm flag outputs, clobber the flags. */ 21152 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); 21153 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); 21154 return NULL; 21155 } 21156} 21157 21158/* Implements target vector targetm.asm.encode_section_info. */ 21159 21160static void ATTRIBUTE_UNUSED 21161ix86_encode_section_info (tree decl, rtx rtl, int first) 21162{ 21163 default_encode_section_info (decl, rtl, first); 21164 21165 if (ix86_in_large_data_p (decl)) 21166 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 21167} 21168 21169/* Worker function for REVERSE_CONDITION. */ 21170 21171enum rtx_code 21172ix86_reverse_condition (enum rtx_code code, machine_mode mode) 21173{ 21174 return (mode == CCFPmode 21175 ? reverse_condition_maybe_unordered (code) 21176 : reverse_condition (code)); 21177} 21178 21179/* Output code to perform an x87 FP register move, from OPERANDS[1] 21180 to OPERANDS[0]. */ 21181 21182const char * 21183output_387_reg_move (rtx_insn *insn, rtx *operands) 21184{ 21185 if (REG_P (operands[0])) 21186 { 21187 if (REG_P (operands[1]) 21188 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 21189 { 21190 if (REGNO (operands[0]) == FIRST_STACK_REG) 21191 return output_387_ffreep (operands, 0); 21192 return "fstp\t%y0"; 21193 } 21194 if (STACK_TOP_P (operands[0])) 21195 return "fld%Z1\t%y1"; 21196 return "fst\t%y0"; 21197 } 21198 else if (MEM_P (operands[0])) 21199 { 21200 gcc_assert (REG_P (operands[1])); 21201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 21202 return "fstp%Z0\t%y0"; 21203 else 21204 { 21205 /* There is no non-popping store to memory for XFmode. 21206 So if we need one, follow the store with a load. */ 21207 if (GET_MODE (operands[0]) == XFmode) 21208 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; 21209 else 21210 return "fst%Z0\t%y0"; 21211 } 21212 } 21213 else 21214 gcc_unreachable(); 21215} 21216#ifdef TARGET_SOLARIS 21217/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 21218 21219static void 21220i386_solaris_elf_named_section (const char *name, unsigned int flags, 21221 tree decl) 21222{ 21223 /* With Binutils 2.15, the "@unwind" marker must be specified on 21224 every occurrence of the ".eh_frame" section, not just the first 21225 one. */ 21226 if (TARGET_64BIT 21227 && strcmp (name, ".eh_frame") == 0) 21228 { 21229 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 21230 flags & SECTION_WRITE ? "aw" : "a"); 21231 return; 21232 } 21233 21234#ifndef USE_GAS 21235 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 21236 { 21237 solaris_elf_asm_comdat_section (name, flags, decl); 21238 return; 21239 } 21240 21241 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the 21242 SPARC assembler. One cannot mix single-letter flags and #exclude, so 21243 only emit the latter here. */ 21244 if (flags & SECTION_EXCLUDE) 21245 { 21246 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); 21247 return; 21248 } 21249#endif 21250 21251 default_elf_asm_named_section (name, flags, decl); 21252} 21253#endif /* TARGET_SOLARIS */ 21254 21255/* Return the mangling of TYPE if it is an extended fundamental type. */ 21256 21257static const char * 21258ix86_mangle_type (const_tree type) 21259{ 21260 type = TYPE_MAIN_VARIANT (type); 21261 21262 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 21263 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 21264 return NULL; 21265 21266 switch (TYPE_MODE (type)) 21267 { 21268 case E_TFmode: 21269 /* __float128 is "g". */ 21270 return "g"; 21271 case E_XFmode: 21272 /* "long double" or __float80 is "e". */ 21273 return "e"; 21274 default: 21275 return NULL; 21276 } 21277} 21278 21279static GTY(()) tree ix86_tls_stack_chk_guard_decl; 21280 21281static tree 21282ix86_stack_protect_guard (void) 21283{ 21284 if (TARGET_SSP_TLS_GUARD) 21285 { 21286 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); 21287 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); 21288 tree type = build_qualified_type (type_node, qual); 21289 tree t; 21290 21291 if (global_options_set.x_ix86_stack_protector_guard_symbol_str) 21292 { 21293 t = ix86_tls_stack_chk_guard_decl; 21294 21295 if (t == NULL) 21296 { 21297 rtx x; 21298 21299 t = build_decl 21300 (UNKNOWN_LOCATION, VAR_DECL, 21301 get_identifier (ix86_stack_protector_guard_symbol_str), 21302 type); 21303 TREE_STATIC (t) = 1; 21304 TREE_PUBLIC (t) = 1; 21305 DECL_EXTERNAL (t) = 1; 21306 TREE_USED (t) = 1; 21307 TREE_THIS_VOLATILE (t) = 1; 21308 DECL_ARTIFICIAL (t) = 1; 21309 DECL_IGNORED_P (t) = 1; 21310 21311 /* Do not share RTL as the declaration is visible outside of 21312 current function. */ 21313 x = DECL_RTL (t); 21314 RTX_FLAG (x, used) = 1; 21315 21316 ix86_tls_stack_chk_guard_decl = t; 21317 } 21318 } 21319 else 21320 { 21321 tree asptrtype = build_pointer_type (type); 21322 21323 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); 21324 t = build2 (MEM_REF, asptrtype, t, 21325 build_int_cst (asptrtype, 0)); 21326 TREE_THIS_VOLATILE (t) = 1; 21327 } 21328 21329 return t; 21330 } 21331 21332 return default_stack_protect_guard (); 21333} 21334 21335/* For 32-bit code we can save PIC register setup by using 21336 __stack_chk_fail_local hidden function instead of calling 21337 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 21338 register, so it is better to call __stack_chk_fail directly. */ 21339 21340static tree ATTRIBUTE_UNUSED 21341ix86_stack_protect_fail (void) 21342{ 21343 return TARGET_64BIT 21344 ? default_external_stack_protect_fail () 21345 : default_hidden_stack_protect_fail (); 21346} 21347 21348/* Select a format to encode pointers in exception handling data. CODE 21349 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 21350 true if the symbol may be affected by dynamic relocations. 21351 21352 ??? All x86 object file formats are capable of representing this. 21353 After all, the relocation needed is the same as for the call insn. 21354 Whether or not a particular assembler allows us to enter such, I 21355 guess we'll have to see. */ 21356 21357int 21358asm_preferred_eh_data_format (int code, int global) 21359{ 21360 /* PE-COFF is effectively always -fPIC because of the .reloc section. */ 21361 if (flag_pic || TARGET_PECOFF) 21362 { 21363 int type = DW_EH_PE_sdata8; 21364 if (!TARGET_64BIT 21365 || ix86_cmodel == CM_SMALL_PIC 21366 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 21367 type = DW_EH_PE_sdata4; 21368 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 21369 } 21370 21371 if (ix86_cmodel == CM_SMALL 21372 || (ix86_cmodel == CM_MEDIUM && code)) 21373 return DW_EH_PE_udata4; 21374 21375 return DW_EH_PE_absptr; 21376} 21377 21378/* Implement targetm.vectorize.builtin_vectorization_cost. */ 21379static int 21380ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, 21381 tree vectype, int) 21382{ 21383 bool fp = false; 21384 machine_mode mode = TImode; 21385 int index; 21386 if (vectype != NULL) 21387 { 21388 fp = FLOAT_TYPE_P (vectype); 21389 mode = TYPE_MODE (vectype); 21390 } 21391 21392 switch (type_of_cost) 21393 { 21394 case scalar_stmt: 21395 return fp ? ix86_cost->addss : COSTS_N_INSNS (1); 21396 21397 case scalar_load: 21398 /* load/store costs are relative to register move which is 2. Recompute 21399 it to COSTS_N_INSNS so everything have same base. */ 21400 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] 21401 : ix86_cost->int_load [2]) / 2; 21402 21403 case scalar_store: 21404 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] 21405 : ix86_cost->int_store [2]) / 2; 21406 21407 case vector_stmt: 21408 return ix86_vec_cost (mode, 21409 fp ? ix86_cost->addss : ix86_cost->sse_op); 21410 21411 case vector_load: 21412 index = sse_store_index (mode); 21413 /* See PR82713 - we may end up being called on non-vector type. */ 21414 if (index < 0) 21415 index = 2; 21416 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; 21417 21418 case vector_store: 21419 index = sse_store_index (mode); 21420 /* See PR82713 - we may end up being called on non-vector type. */ 21421 if (index < 0) 21422 index = 2; 21423 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; 21424 21425 case vec_to_scalar: 21426 case scalar_to_vec: 21427 return ix86_vec_cost (mode, ix86_cost->sse_op); 21428 21429 /* We should have separate costs for unaligned loads and gather/scatter. 21430 Do that incrementally. */ 21431 case unaligned_load: 21432 index = sse_store_index (mode); 21433 /* See PR82713 - we may end up being called on non-vector type. */ 21434 if (index < 0) 21435 index = 2; 21436 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; 21437 21438 case unaligned_store: 21439 index = sse_store_index (mode); 21440 /* See PR82713 - we may end up being called on non-vector type. */ 21441 if (index < 0) 21442 index = 2; 21443 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; 21444 21445 case vector_gather_load: 21446 return ix86_vec_cost (mode, 21447 COSTS_N_INSNS 21448 (ix86_cost->gather_static 21449 + ix86_cost->gather_per_elt 21450 * TYPE_VECTOR_SUBPARTS (vectype)) / 2); 21451 21452 case vector_scatter_store: 21453 return ix86_vec_cost (mode, 21454 COSTS_N_INSNS 21455 (ix86_cost->scatter_static 21456 + ix86_cost->scatter_per_elt 21457 * TYPE_VECTOR_SUBPARTS (vectype)) / 2); 21458 21459 case cond_branch_taken: 21460 return ix86_cost->cond_taken_branch_cost; 21461 21462 case cond_branch_not_taken: 21463 return ix86_cost->cond_not_taken_branch_cost; 21464 21465 case vec_perm: 21466 case vec_promote_demote: 21467 return ix86_vec_cost (mode, ix86_cost->sse_op); 21468 21469 case vec_construct: 21470 { 21471 /* N element inserts into SSE vectors. */ 21472 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; 21473 /* One vinserti128 for combining two SSE vectors for AVX256. */ 21474 if (GET_MODE_BITSIZE (mode) == 256) 21475 cost += ix86_vec_cost (mode, ix86_cost->addss); 21476 /* One vinserti64x4 and two vinserti128 for combining SSE 21477 and AVX256 vectors to AVX512. */ 21478 else if (GET_MODE_BITSIZE (mode) == 512) 21479 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); 21480 return cost; 21481 } 21482 21483 default: 21484 gcc_unreachable (); 21485 } 21486} 21487 21488 21489/* This function returns the calling abi specific va_list type node. 21490 It returns the FNDECL specific va_list type. */ 21491 21492static tree 21493ix86_fn_abi_va_list (tree fndecl) 21494{ 21495 if (!TARGET_64BIT) 21496 return va_list_type_node; 21497 gcc_assert (fndecl != NULL_TREE); 21498 21499 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI) 21500 return ms_va_list_type_node; 21501 else 21502 return sysv_va_list_type_node; 21503} 21504 21505/* Returns the canonical va_list type specified by TYPE. If there 21506 is no valid TYPE provided, it return NULL_TREE. */ 21507 21508static tree 21509ix86_canonical_va_list_type (tree type) 21510{ 21511 if (TARGET_64BIT) 21512 { 21513 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type))) 21514 return ms_va_list_type_node; 21515 21516 if ((TREE_CODE (type) == ARRAY_TYPE 21517 && integer_zerop (array_type_nelts (type))) 21518 || POINTER_TYPE_P (type)) 21519 { 21520 tree elem_type = TREE_TYPE (type); 21521 if (TREE_CODE (elem_type) == RECORD_TYPE 21522 && lookup_attribute ("sysv_abi va_list", 21523 TYPE_ATTRIBUTES (elem_type))) 21524 return sysv_va_list_type_node; 21525 } 21526 21527 return NULL_TREE; 21528 } 21529 21530 return std_canonical_va_list_type (type); 21531} 21532 21533/* Iterate through the target-specific builtin types for va_list. 21534 IDX denotes the iterator, *PTREE is set to the result type of 21535 the va_list builtin, and *PNAME to its internal type. 21536 Returns zero if there is no element for this index, otherwise 21537 IDX should be increased upon the next call. 21538 Note, do not iterate a base builtin's name like __builtin_va_list. 21539 Used from c_common_nodes_and_builtins. */ 21540 21541static int 21542ix86_enum_va_list (int idx, const char **pname, tree *ptree) 21543{ 21544 if (TARGET_64BIT) 21545 { 21546 switch (idx) 21547 { 21548 default: 21549 break; 21550 21551 case 0: 21552 *ptree = ms_va_list_type_node; 21553 *pname = "__builtin_ms_va_list"; 21554 return 1; 21555 21556 case 1: 21557 *ptree = sysv_va_list_type_node; 21558 *pname = "__builtin_sysv_va_list"; 21559 return 1; 21560 } 21561 } 21562 21563 return 0; 21564} 21565 21566#undef TARGET_SCHED_DISPATCH 21567#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch 21568#undef TARGET_SCHED_DISPATCH_DO 21569#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch 21570#undef TARGET_SCHED_REASSOCIATION_WIDTH 21571#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width 21572#undef TARGET_SCHED_REORDER 21573#define TARGET_SCHED_REORDER ix86_atom_sched_reorder 21574#undef TARGET_SCHED_ADJUST_PRIORITY 21575#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority 21576#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 21577#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \ 21578 ix86_dependencies_evaluation_hook 21579 21580 21581/* Implementation of reassociation_width target hook used by 21582 reassoc phase to identify parallelism level in reassociated 21583 tree. Statements tree_code is passed in OPC. Arguments type 21584 is passed in MODE. */ 21585 21586static int 21587ix86_reassociation_width (unsigned int op, machine_mode mode) 21588{ 21589 int width = 1; 21590 /* Vector part. */ 21591 if (VECTOR_MODE_P (mode)) 21592 { 21593 int div = 1; 21594 if (INTEGRAL_MODE_P (mode)) 21595 width = ix86_cost->reassoc_vec_int; 21596 else if (FLOAT_MODE_P (mode)) 21597 width = ix86_cost->reassoc_vec_fp; 21598 21599 if (width == 1) 21600 return 1; 21601 21602 /* Integer vector instructions execute in FP unit 21603 and can execute 3 additions and one multiplication per cycle. */ 21604 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 21605 || ix86_tune == PROCESSOR_ZNVER3) 21606 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) 21607 return 1; 21608 21609 /* Account for targets that splits wide vectors into multiple parts. */ 21610 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) 21611 div = GET_MODE_BITSIZE (mode) / 128; 21612 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) 21613 div = GET_MODE_BITSIZE (mode) / 64; 21614 width = (width + div - 1) / div; 21615 } 21616 /* Scalar part. */ 21617 else if (INTEGRAL_MODE_P (mode)) 21618 width = ix86_cost->reassoc_int; 21619 else if (FLOAT_MODE_P (mode)) 21620 width = ix86_cost->reassoc_fp; 21621 21622 /* Avoid using too many registers in 32bit mode. */ 21623 if (!TARGET_64BIT && width > 2) 21624 width = 2; 21625 return width; 21626} 21627 21628/* ??? No autovectorization into MMX or 3DNOW until we can reliably 21629 place emms and femms instructions. */ 21630 21631static machine_mode 21632ix86_preferred_simd_mode (scalar_mode mode) 21633{ 21634 if (!TARGET_SSE) 21635 return word_mode; 21636 21637 switch (mode) 21638 { 21639 case E_QImode: 21640 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) 21641 return V64QImode; 21642 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21643 return V32QImode; 21644 else 21645 return V16QImode; 21646 21647 case E_HImode: 21648 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256) 21649 return V32HImode; 21650 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21651 return V16HImode; 21652 else 21653 return V8HImode; 21654 21655 case E_SImode: 21656 if (TARGET_AVX512F && !TARGET_PREFER_AVX256) 21657 return V16SImode; 21658 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21659 return V8SImode; 21660 else 21661 return V4SImode; 21662 21663 case E_DImode: 21664 if (TARGET_AVX512F && !TARGET_PREFER_AVX256) 21665 return V8DImode; 21666 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21667 return V4DImode; 21668 else 21669 return V2DImode; 21670 21671 case E_SFmode: 21672 if (TARGET_AVX512F && !TARGET_PREFER_AVX256) 21673 return V16SFmode; 21674 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21675 return V8SFmode; 21676 else 21677 return V4SFmode; 21678 21679 case E_DFmode: 21680 if (TARGET_AVX512F && !TARGET_PREFER_AVX256) 21681 return V8DFmode; 21682 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21683 return V4DFmode; 21684 else if (TARGET_SSE2) 21685 return V2DFmode; 21686 /* FALLTHRU */ 21687 21688 default: 21689 return word_mode; 21690 } 21691} 21692 21693/* If AVX is enabled then try vectorizing with both 256bit and 128bit 21694 vectors. If AVX512F is enabled then try vectorizing with 512bit, 21695 256bit and 128bit vectors. */ 21696 21697static unsigned int 21698ix86_autovectorize_vector_modes (vector_modes *modes, bool all) 21699{ 21700 if (TARGET_AVX512F && !TARGET_PREFER_AVX256) 21701 { 21702 modes->safe_push (V64QImode); 21703 modes->safe_push (V32QImode); 21704 modes->safe_push (V16QImode); 21705 } 21706 else if (TARGET_AVX512F && all) 21707 { 21708 modes->safe_push (V32QImode); 21709 modes->safe_push (V16QImode); 21710 modes->safe_push (V64QImode); 21711 } 21712 else if (TARGET_AVX && !TARGET_PREFER_AVX128) 21713 { 21714 modes->safe_push (V32QImode); 21715 modes->safe_push (V16QImode); 21716 } 21717 else if (TARGET_AVX && all) 21718 { 21719 modes->safe_push (V16QImode); 21720 modes->safe_push (V32QImode); 21721 } 21722 else if (TARGET_MMX_WITH_SSE) 21723 modes->safe_push (V16QImode); 21724 21725 if (TARGET_MMX_WITH_SSE) 21726 modes->safe_push (V8QImode); 21727 21728 return 0; 21729} 21730 21731/* Implemenation of targetm.vectorize.get_mask_mode. */ 21732 21733static opt_machine_mode 21734ix86_get_mask_mode (machine_mode data_mode) 21735{ 21736 unsigned vector_size = GET_MODE_SIZE (data_mode); 21737 unsigned nunits = GET_MODE_NUNITS (data_mode); 21738 unsigned elem_size = vector_size / nunits; 21739 21740 /* Scalar mask case. */ 21741 if ((TARGET_AVX512F && vector_size == 64) 21742 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))) 21743 { 21744 if (elem_size == 4 21745 || elem_size == 8 21746 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2))) 21747 return smallest_int_mode_for_size (nunits); 21748 } 21749 21750 scalar_int_mode elem_mode 21751 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT); 21752 21753 gcc_assert (elem_size * nunits == vector_size); 21754 21755 return mode_for_vector (elem_mode, nunits); 21756} 21757 21758 21759 21760/* Return class of registers which could be used for pseudo of MODE 21761 and of class RCLASS for spilling instead of memory. Return NO_REGS 21762 if it is not possible or non-profitable. */ 21763 21764/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ 21765 21766static reg_class_t 21767ix86_spill_class (reg_class_t rclass, machine_mode mode) 21768{ 21769 if (0 && TARGET_GENERAL_REGS_SSE_SPILL 21770 && TARGET_SSE2 21771 && TARGET_INTER_UNIT_MOVES_TO_VEC 21772 && TARGET_INTER_UNIT_MOVES_FROM_VEC 21773 && (mode == SImode || (TARGET_64BIT && mode == DImode)) 21774 && INTEGER_CLASS_P (rclass)) 21775 return ALL_SSE_REGS; 21776 return NO_REGS; 21777} 21778 21779/* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation, 21780 but returns a lower bound. */ 21781 21782static unsigned int 21783ix86_max_noce_ifcvt_seq_cost (edge e) 21784{ 21785 bool predictable_p = predictable_edge_p (e); 21786 if (predictable_p) 21787 { 21788 if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost) 21789 return param_max_rtl_if_conversion_predictable_cost; 21790 } 21791 else 21792 { 21793 if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost) 21794 return param_max_rtl_if_conversion_unpredictable_cost; 21795 } 21796 21797 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2); 21798} 21799 21800/* Return true if SEQ is a good candidate as a replacement for the 21801 if-convertible sequence described in IF_INFO. */ 21802 21803static bool 21804ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) 21805{ 21806 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p) 21807 { 21808 int cmov_cnt = 0; 21809 /* Punt if SEQ contains more than one CMOV or FCMOV instruction. 21810 Maybe we should allow even more conditional moves as long as they 21811 are used far enough not to stall the CPU, or also consider 21812 IF_INFO->TEST_BB succ edge probabilities. */ 21813 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) 21814 { 21815 rtx set = single_set (insn); 21816 if (!set) 21817 continue; 21818 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE) 21819 continue; 21820 rtx src = SET_SRC (set); 21821 machine_mode mode = GET_MODE (src); 21822 if (GET_MODE_CLASS (mode) != MODE_INT 21823 && GET_MODE_CLASS (mode) != MODE_FLOAT) 21824 continue; 21825 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1))) 21826 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2)))) 21827 continue; 21828 /* insn is CMOV or FCMOV. */ 21829 if (++cmov_cnt > 1) 21830 return false; 21831 } 21832 } 21833 return default_noce_conversion_profitable_p (seq, if_info); 21834} 21835 21836/* Implement targetm.vectorize.init_cost. */ 21837 21838static void * 21839ix86_init_cost (class loop *) 21840{ 21841 unsigned *cost = XNEWVEC (unsigned, 3); 21842 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0; 21843 return cost; 21844} 21845 21846/* Implement targetm.vectorize.add_stmt_cost. */ 21847 21848static unsigned 21849ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 21850 class _stmt_vec_info *stmt_info, int misalign, 21851 enum vect_cost_model_location where) 21852{ 21853 unsigned *cost = (unsigned *) data; 21854 unsigned retval = 0; 21855 bool scalar_p 21856 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store); 21857 21858 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; 21859 int stmt_cost = - 1; 21860 21861 bool fp = false; 21862 machine_mode mode = scalar_p ? SImode : TImode; 21863 21864 if (vectype != NULL) 21865 { 21866 fp = FLOAT_TYPE_P (vectype); 21867 mode = TYPE_MODE (vectype); 21868 if (scalar_p) 21869 mode = TYPE_MODE (TREE_TYPE (vectype)); 21870 } 21871 21872 if ((kind == vector_stmt || kind == scalar_stmt) 21873 && stmt_info 21874 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) 21875 { 21876 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt); 21877 /*machine_mode inner_mode = mode; 21878 if (VECTOR_MODE_P (mode)) 21879 inner_mode = GET_MODE_INNER (mode);*/ 21880 21881 switch (subcode) 21882 { 21883 case PLUS_EXPR: 21884 case POINTER_PLUS_EXPR: 21885 case MINUS_EXPR: 21886 if (kind == scalar_stmt) 21887 { 21888 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 21889 stmt_cost = ix86_cost->addss; 21890 else if (X87_FLOAT_MODE_P (mode)) 21891 stmt_cost = ix86_cost->fadd; 21892 else 21893 stmt_cost = ix86_cost->add; 21894 } 21895 else 21896 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss 21897 : ix86_cost->sse_op); 21898 break; 21899 21900 case MULT_EXPR: 21901 case WIDEN_MULT_EXPR: 21902 case MULT_HIGHPART_EXPR: 21903 stmt_cost = ix86_multiplication_cost (ix86_cost, mode); 21904 break; 21905 case NEGATE_EXPR: 21906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 21907 stmt_cost = ix86_cost->sse_op; 21908 else if (X87_FLOAT_MODE_P (mode)) 21909 stmt_cost = ix86_cost->fchs; 21910 else if (VECTOR_MODE_P (mode)) 21911 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); 21912 else 21913 stmt_cost = ix86_cost->add; 21914 break; 21915 case TRUNC_DIV_EXPR: 21916 case CEIL_DIV_EXPR: 21917 case FLOOR_DIV_EXPR: 21918 case ROUND_DIV_EXPR: 21919 case TRUNC_MOD_EXPR: 21920 case CEIL_MOD_EXPR: 21921 case FLOOR_MOD_EXPR: 21922 case RDIV_EXPR: 21923 case ROUND_MOD_EXPR: 21924 case EXACT_DIV_EXPR: 21925 stmt_cost = ix86_division_cost (ix86_cost, mode); 21926 break; 21927 21928 case RSHIFT_EXPR: 21929 case LSHIFT_EXPR: 21930 case LROTATE_EXPR: 21931 case RROTATE_EXPR: 21932 { 21933 tree op2 = gimple_assign_rhs2 (stmt_info->stmt); 21934 stmt_cost = ix86_shift_rotate_cost 21935 (ix86_cost, mode, 21936 TREE_CODE (op2) == INTEGER_CST, 21937 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1, 21938 true, false, false, NULL, NULL); 21939 } 21940 break; 21941 case NOP_EXPR: 21942 /* Only sign-conversions are free. */ 21943 if (tree_nop_conversion_p 21944 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)), 21945 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) 21946 stmt_cost = 0; 21947 break; 21948 21949 case BIT_IOR_EXPR: 21950 case ABS_EXPR: 21951 case ABSU_EXPR: 21952 case MIN_EXPR: 21953 case MAX_EXPR: 21954 case BIT_XOR_EXPR: 21955 case BIT_AND_EXPR: 21956 case BIT_NOT_EXPR: 21957 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 21958 stmt_cost = ix86_cost->sse_op; 21959 else if (VECTOR_MODE_P (mode)) 21960 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); 21961 else 21962 stmt_cost = ix86_cost->add; 21963 break; 21964 default: 21965 break; 21966 } 21967 } 21968 21969 combined_fn cfn; 21970 if ((kind == vector_stmt || kind == scalar_stmt) 21971 && stmt_info 21972 && stmt_info->stmt 21973 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) 21974 switch (cfn) 21975 { 21976 case CFN_FMA: 21977 stmt_cost = ix86_vec_cost (mode, 21978 mode == SFmode ? ix86_cost->fmass 21979 : ix86_cost->fmasd); 21980 break; 21981 default: 21982 break; 21983 } 21984 21985 /* If we do elementwise loads into a vector then we are bound by 21986 latency and execution resources for the many scalar loads 21987 (AGU and load ports). Try to account for this by scaling the 21988 construction cost by the number of elements involved. */ 21989 if ((kind == vec_construct || kind == vec_to_scalar) 21990 && stmt_info 21991 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type 21992 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) 21993 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE 21994 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST) 21995 { 21996 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); 21997 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); 21998 } 21999 if (stmt_cost == -1) 22000 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); 22001 22002 /* Penalize DFmode vector operations for Bonnell. */ 22003 if (TARGET_BONNELL && kind == vector_stmt 22004 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode) 22005 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */ 22006 22007 /* Statements in an inner loop relative to the loop being 22008 vectorized are weighted more heavily. The value here is 22009 arbitrary and could potentially be improved with analysis. */ 22010 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) 22011 count *= 50; /* FIXME. */ 22012 22013 retval = (unsigned) (count * stmt_cost); 22014 22015 /* We need to multiply all vector stmt cost by 1.7 (estimated cost) 22016 for Silvermont as it has out of order integer pipeline and can execute 22017 2 scalar instruction per tick, but has in order SIMD pipeline. */ 22018 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS 22019 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt) 22020 { 22021 tree lhs_op = gimple_get_lhs (stmt_info->stmt); 22022 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE) 22023 retval = (retval * 17) / 10; 22024 } 22025 22026 cost[where] += retval; 22027 22028 return retval; 22029} 22030 22031/* Implement targetm.vectorize.finish_cost. */ 22032 22033static void 22034ix86_finish_cost (void *data, unsigned *prologue_cost, 22035 unsigned *body_cost, unsigned *epilogue_cost) 22036{ 22037 unsigned *cost = (unsigned *) data; 22038 *prologue_cost = cost[vect_prologue]; 22039 *body_cost = cost[vect_body]; 22040 *epilogue_cost = cost[vect_epilogue]; 22041} 22042 22043/* Implement targetm.vectorize.destroy_cost_data. */ 22044 22045static void 22046ix86_destroy_cost_data (void *data) 22047{ 22048 free (data); 22049} 22050 22051/* Validate target specific memory model bits in VAL. */ 22052 22053static unsigned HOST_WIDE_INT 22054ix86_memmodel_check (unsigned HOST_WIDE_INT val) 22055{ 22056 enum memmodel model = memmodel_from_int (val); 22057 bool strong; 22058 22059 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE 22060 |MEMMODEL_MASK) 22061 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE))) 22062 { 22063 warning (OPT_Winvalid_memory_model, 22064 "unknown architecture specific memory model"); 22065 return MEMMODEL_SEQ_CST; 22066 } 22067 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model)); 22068 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) 22069 { 22070 warning (OPT_Winvalid_memory_model, 22071 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger " 22072 "memory model"); 22073 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; 22074 } 22075 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) 22076 { 22077 warning (OPT_Winvalid_memory_model, 22078 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger " 22079 "memory model"); 22080 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; 22081 } 22082 return val; 22083} 22084 22085/* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int, 22086 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also 22087 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted, 22088 or number of vecsize_mangle variants that should be emitted. */ 22089 22090static int 22091ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, 22092 struct cgraph_simd_clone *clonei, 22093 tree base_type, int num) 22094{ 22095 int ret = 1; 22096 22097 if (clonei->simdlen 22098 && (clonei->simdlen < 2 22099 || clonei->simdlen > 1024 22100 || (clonei->simdlen & (clonei->simdlen - 1)) != 0)) 22101 { 22102 warning_at (DECL_SOURCE_LOCATION (node->decl), 0, 22103 "unsupported simdlen %d", clonei->simdlen); 22104 return 0; 22105 } 22106 22107 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl)); 22108 if (TREE_CODE (ret_type) != VOID_TYPE) 22109 switch (TYPE_MODE (ret_type)) 22110 { 22111 case E_QImode: 22112 case E_HImode: 22113 case E_SImode: 22114 case E_DImode: 22115 case E_SFmode: 22116 case E_DFmode: 22117 /* case E_SCmode: */ 22118 /* case E_DCmode: */ 22119 if (!AGGREGATE_TYPE_P (ret_type)) 22120 break; 22121 /* FALLTHRU */ 22122 default: 22123 warning_at (DECL_SOURCE_LOCATION (node->decl), 0, 22124 "unsupported return type %qT for simd", ret_type); 22125 return 0; 22126 } 22127 22128 tree t; 22129 int i; 22130 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); 22131 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); 22132 22133 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; 22134 t && t != void_list_node; t = TREE_CHAIN (t), i++) 22135 { 22136 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); 22137 switch (TYPE_MODE (arg_type)) 22138 { 22139 case E_QImode: 22140 case E_HImode: 22141 case E_SImode: 22142 case E_DImode: 22143 case E_SFmode: 22144 case E_DFmode: 22145 /* case E_SCmode: */ 22146 /* case E_DCmode: */ 22147 if (!AGGREGATE_TYPE_P (arg_type)) 22148 break; 22149 /* FALLTHRU */ 22150 default: 22151 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM) 22152 break; 22153 warning_at (DECL_SOURCE_LOCATION (node->decl), 0, 22154 "unsupported argument type %qT for simd", arg_type); 22155 return 0; 22156 } 22157 } 22158 22159 if (!TREE_PUBLIC (node->decl)) 22160 { 22161 /* If the function isn't exported, we can pick up just one ISA 22162 for the clones. */ 22163 if (TARGET_AVX512F) 22164 clonei->vecsize_mangle = 'e'; 22165 else if (TARGET_AVX2) 22166 clonei->vecsize_mangle = 'd'; 22167 else if (TARGET_AVX) 22168 clonei->vecsize_mangle = 'c'; 22169 else 22170 clonei->vecsize_mangle = 'b'; 22171 ret = 1; 22172 } 22173 else 22174 { 22175 clonei->vecsize_mangle = "bcde"[num]; 22176 ret = 4; 22177 } 22178 clonei->mask_mode = VOIDmode; 22179 switch (clonei->vecsize_mangle) 22180 { 22181 case 'b': 22182 clonei->vecsize_int = 128; 22183 clonei->vecsize_float = 128; 22184 break; 22185 case 'c': 22186 clonei->vecsize_int = 128; 22187 clonei->vecsize_float = 256; 22188 break; 22189 case 'd': 22190 clonei->vecsize_int = 256; 22191 clonei->vecsize_float = 256; 22192 break; 22193 case 'e': 22194 clonei->vecsize_int = 512; 22195 clonei->vecsize_float = 512; 22196 if (TYPE_MODE (base_type) == QImode) 22197 clonei->mask_mode = DImode; 22198 else 22199 clonei->mask_mode = SImode; 22200 break; 22201 } 22202 if (clonei->simdlen == 0) 22203 { 22204 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type))) 22205 clonei->simdlen = clonei->vecsize_int; 22206 else 22207 clonei->simdlen = clonei->vecsize_float; 22208 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type)); 22209 } 22210 else if (clonei->simdlen > 16) 22211 { 22212 /* For compatibility with ICC, use the same upper bounds 22213 for simdlen. In particular, for CTYPE below, use the return type, 22214 unless the function returns void, in that case use the characteristic 22215 type. If it is possible for given SIMDLEN to pass CTYPE value 22216 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs 22217 for 64-bit code), accept that SIMDLEN, otherwise warn and don't 22218 emit corresponding clone. */ 22219 tree ctype = ret_type; 22220 if (TREE_CODE (ret_type) == VOID_TYPE) 22221 ctype = base_type; 22222 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen; 22223 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype))) 22224 cnt /= clonei->vecsize_int; 22225 else 22226 cnt /= clonei->vecsize_float; 22227 if (cnt > (TARGET_64BIT ? 16 : 8)) 22228 { 22229 warning_at (DECL_SOURCE_LOCATION (node->decl), 0, 22230 "unsupported simdlen %d", clonei->simdlen); 22231 return 0; 22232 } 22233 } 22234 return ret; 22235} 22236 22237/* If SIMD clone NODE can't be used in a vectorized loop 22238 in current function, return -1, otherwise return a badness of using it 22239 (0 if it is most desirable from vecsize_mangle point of view, 1 22240 slightly less desirable, etc.). */ 22241 22242static int 22243ix86_simd_clone_usable (struct cgraph_node *node) 22244{ 22245 switch (node->simdclone->vecsize_mangle) 22246 { 22247 case 'b': 22248 if (!TARGET_SSE2) 22249 return -1; 22250 if (!TARGET_AVX) 22251 return 0; 22252 return TARGET_AVX2 ? 2 : 1; 22253 case 'c': 22254 if (!TARGET_AVX) 22255 return -1; 22256 return TARGET_AVX2 ? 1 : 0; 22257 case 'd': 22258 if (!TARGET_AVX2) 22259 return -1; 22260 return 0; 22261 case 'e': 22262 if (!TARGET_AVX512F) 22263 return -1; 22264 return 0; 22265 default: 22266 gcc_unreachable (); 22267 } 22268} 22269 22270/* This function adjusts the unroll factor based on 22271 the hardware capabilities. For ex, bdver3 has 22272 a loop buffer which makes unrolling of smaller 22273 loops less important. This function decides the 22274 unroll factor using number of memory references 22275 (value 32 is used) as a heuristic. */ 22276 22277static unsigned 22278ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) 22279{ 22280 basic_block *bbs; 22281 rtx_insn *insn; 22282 unsigned i; 22283 unsigned mem_count = 0; 22284 22285 if (!TARGET_ADJUST_UNROLL) 22286 return nunroll; 22287 22288 /* Count the number of memory references within the loop body. 22289 This value determines the unrolling factor for bdver3 and bdver4 22290 architectures. */ 22291 subrtx_iterator::array_type array; 22292 bbs = get_loop_body (loop); 22293 for (i = 0; i < loop->num_nodes; i++) 22294 FOR_BB_INSNS (bbs[i], insn) 22295 if (NONDEBUG_INSN_P (insn)) 22296 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) 22297 if (const_rtx x = *iter) 22298 if (MEM_P (x)) 22299 { 22300 machine_mode mode = GET_MODE (x); 22301 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 22302 if (n_words > 4) 22303 mem_count += 2; 22304 else 22305 mem_count += 1; 22306 } 22307 free (bbs); 22308 22309 if (mem_count && mem_count <=32) 22310 return MIN (nunroll, 32 / mem_count); 22311 22312 return nunroll; 22313} 22314 22315 22316/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */ 22317 22318static bool 22319ix86_float_exceptions_rounding_supported_p (void) 22320{ 22321 /* For x87 floating point with standard excess precision handling, 22322 there is no adddf3 pattern (since x87 floating point only has 22323 XFmode operations) so the default hook implementation gets this 22324 wrong. */ 22325 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH); 22326} 22327 22328/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ 22329 22330static void 22331ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 22332{ 22333 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH)) 22334 return; 22335 tree exceptions_var = create_tmp_var_raw (integer_type_node); 22336 if (TARGET_80387) 22337 { 22338 tree fenv_index_type = build_index_type (size_int (6)); 22339 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type); 22340 tree fenv_var = create_tmp_var_raw (fenv_type); 22341 TREE_ADDRESSABLE (fenv_var) = 1; 22342 tree fenv_ptr = build_pointer_type (fenv_type); 22343 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); 22344 fenv_addr = fold_convert (ptr_type_node, fenv_addr); 22345 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV); 22346 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV); 22347 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW); 22348 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX); 22349 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); 22350 tree hold_fnclex = build_call_expr (fnclex, 0); 22351 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, 22352 NULL_TREE, NULL_TREE); 22353 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, 22354 hold_fnclex); 22355 *clear = build_call_expr (fnclex, 0); 22356 tree sw_var = create_tmp_var_raw (short_unsigned_type_node); 22357 tree fnstsw_call = build_call_expr (fnstsw, 0); 22358 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var, 22359 fnstsw_call, NULL_TREE, NULL_TREE); 22360 tree exceptions_x87 = fold_convert (integer_type_node, sw_var); 22361 tree update_mod = build4 (TARGET_EXPR, integer_type_node, 22362 exceptions_var, exceptions_x87, 22363 NULL_TREE, NULL_TREE); 22364 *update = build2 (COMPOUND_EXPR, integer_type_node, 22365 sw_mod, update_mod); 22366 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr); 22367 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv); 22368 } 22369 if (TARGET_SSE && TARGET_SSE_MATH) 22370 { 22371 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); 22372 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); 22373 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR); 22374 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR); 22375 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); 22376 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node, 22377 mxcsr_orig_var, stmxcsr_hold_call, 22378 NULL_TREE, NULL_TREE); 22379 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node, 22380 mxcsr_orig_var, 22381 build_int_cst (unsigned_type_node, 0x1f80)); 22382 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val, 22383 build_int_cst (unsigned_type_node, 0xffffffc0)); 22384 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node, 22385 mxcsr_mod_var, hold_mod_val, 22386 NULL_TREE, NULL_TREE); 22387 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); 22388 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node, 22389 hold_assign_orig, hold_assign_mod); 22390 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all, 22391 ldmxcsr_hold_call); 22392 if (*hold) 22393 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all); 22394 else 22395 *hold = hold_all; 22396 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var); 22397 if (*clear) 22398 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear, 22399 ldmxcsr_clear_call); 22400 else 22401 *clear = ldmxcsr_clear_call; 22402 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0); 22403 tree exceptions_sse = fold_convert (integer_type_node, 22404 stxmcsr_update_call); 22405 if (*update) 22406 { 22407 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node, 22408 exceptions_var, exceptions_sse); 22409 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node, 22410 exceptions_var, exceptions_mod); 22411 *update = build2 (COMPOUND_EXPR, integer_type_node, *update, 22412 exceptions_assign); 22413 } 22414 else 22415 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var, 22416 exceptions_sse, NULL_TREE, NULL_TREE); 22417 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var); 22418 *update = build2 (COMPOUND_EXPR, void_type_node, *update, 22419 ldmxcsr_update_call); 22420 } 22421 tree atomic_feraiseexcept 22422 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 22423 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept, 22424 1, exceptions_var); 22425 *update = build2 (COMPOUND_EXPR, void_type_node, *update, 22426 atomic_feraiseexcept_call); 22427} 22428 22429#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES 22430/* For i386, common symbol is local only for non-PIE binaries. For 22431 x86-64, common symbol is local only for non-PIE binaries or linker 22432 supports copy reloc in PIE binaries. */ 22433 22434static bool 22435ix86_binds_local_p (const_tree exp) 22436{ 22437 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true, 22438 (!flag_pic 22439 || (TARGET_64BIT 22440 && HAVE_LD_PIE_COPYRELOC != 0))); 22441} 22442#endif 22443 22444/* If MEM is in the form of [base+offset], extract the two parts 22445 of address and set to BASE and OFFSET, otherwise return false. */ 22446 22447static bool 22448extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) 22449{ 22450 rtx addr; 22451 22452 gcc_assert (MEM_P (mem)); 22453 22454 addr = XEXP (mem, 0); 22455 22456 if (GET_CODE (addr) == CONST) 22457 addr = XEXP (addr, 0); 22458 22459 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF) 22460 { 22461 *base = addr; 22462 *offset = const0_rtx; 22463 return true; 22464 } 22465 22466 if (GET_CODE (addr) == PLUS 22467 && (REG_P (XEXP (addr, 0)) 22468 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) 22469 && CONST_INT_P (XEXP (addr, 1))) 22470 { 22471 *base = XEXP (addr, 0); 22472 *offset = XEXP (addr, 1); 22473 return true; 22474 } 22475 22476 return false; 22477} 22478 22479/* Given OPERANDS of consecutive load/store, check if we can merge 22480 them into move multiple. LOAD is true if they are load instructions. 22481 MODE is the mode of memory operands. */ 22482 22483bool 22484ix86_operands_ok_for_move_multiple (rtx *operands, bool load, 22485 machine_mode mode) 22486{ 22487 HOST_WIDE_INT offval_1, offval_2, msize; 22488 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2; 22489 22490 if (load) 22491 { 22492 mem_1 = operands[1]; 22493 mem_2 = operands[3]; 22494 reg_1 = operands[0]; 22495 reg_2 = operands[2]; 22496 } 22497 else 22498 { 22499 mem_1 = operands[0]; 22500 mem_2 = operands[2]; 22501 reg_1 = operands[1]; 22502 reg_2 = operands[3]; 22503 } 22504 22505 gcc_assert (REG_P (reg_1) && REG_P (reg_2)); 22506 22507 if (REGNO (reg_1) != REGNO (reg_2)) 22508 return false; 22509 22510 /* Check if the addresses are in the form of [base+offset]. */ 22511 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1)) 22512 return false; 22513 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2)) 22514 return false; 22515 22516 /* Check if the bases are the same. */ 22517 if (!rtx_equal_p (base_1, base_2)) 22518 return false; 22519 22520 offval_1 = INTVAL (offset_1); 22521 offval_2 = INTVAL (offset_2); 22522 msize = GET_MODE_SIZE (mode); 22523 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */ 22524 if (offval_1 + msize != offval_2) 22525 return false; 22526 22527 return true; 22528} 22529 22530/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ 22531 22532static bool 22533ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, 22534 optimization_type opt_type) 22535{ 22536 switch (op) 22537 { 22538 case asin_optab: 22539 case acos_optab: 22540 case log1p_optab: 22541 case exp_optab: 22542 case exp10_optab: 22543 case exp2_optab: 22544 case expm1_optab: 22545 case ldexp_optab: 22546 case scalb_optab: 22547 case round_optab: 22548 return opt_type == OPTIMIZE_FOR_SPEED; 22549 22550 case rint_optab: 22551 if (SSE_FLOAT_MODE_P (mode1) 22552 && TARGET_SSE_MATH 22553 && !flag_trapping_math 22554 && !TARGET_SSE4_1) 22555 return opt_type == OPTIMIZE_FOR_SPEED; 22556 return true; 22557 22558 case floor_optab: 22559 case ceil_optab: 22560 case btrunc_optab: 22561 if (SSE_FLOAT_MODE_P (mode1) 22562 && TARGET_SSE_MATH 22563 && !flag_trapping_math 22564 && TARGET_SSE4_1) 22565 return true; 22566 return opt_type == OPTIMIZE_FOR_SPEED; 22567 22568 case rsqrt_optab: 22569 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); 22570 22571 default: 22572 return true; 22573 } 22574} 22575 22576/* Address space support. 22577 22578 This is not "far pointers" in the 16-bit sense, but an easy way 22579 to use %fs and %gs segment prefixes. Therefore: 22580 22581 (a) All address spaces have the same modes, 22582 (b) All address spaces have the same addresss forms, 22583 (c) While %fs and %gs are technically subsets of the generic 22584 address space, they are probably not subsets of each other. 22585 (d) Since we have no access to the segment base register values 22586 without resorting to a system call, we cannot convert a 22587 non-default address space to a default address space. 22588 Therefore we do not claim %fs or %gs are subsets of generic. 22589 22590 Therefore we can (mostly) use the default hooks. */ 22591 22592/* All use of segmentation is assumed to make address 0 valid. */ 22593 22594static bool 22595ix86_addr_space_zero_address_valid (addr_space_t as) 22596{ 22597 return as != ADDR_SPACE_GENERIC; 22598} 22599 22600static void 22601ix86_init_libfuncs (void) 22602{ 22603 if (TARGET_64BIT) 22604 { 22605 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4"); 22606 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4"); 22607 } 22608 else 22609 { 22610 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4"); 22611 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4"); 22612 } 22613 22614#if TARGET_MACHO 22615 darwin_rename_builtins (); 22616#endif 22617} 22618 22619/* Set the value of FLT_EVAL_METHOD in float.h. When using only the 22620 FPU, assume that the fpcw is set to extended precision; when using 22621 only SSE, rounding is correct; when using both SSE and the FPU, 22622 the rounding precision is indeterminate, since either may be chosen 22623 apparently at random. */ 22624 22625static enum flt_eval_method 22626ix86_get_excess_precision (enum excess_precision_type type) 22627{ 22628 switch (type) 22629 { 22630 case EXCESS_PRECISION_TYPE_FAST: 22631 /* The fastest type to promote to will always be the native type, 22632 whether that occurs with implicit excess precision or 22633 otherwise. */ 22634 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; 22635 case EXCESS_PRECISION_TYPE_STANDARD: 22636 case EXCESS_PRECISION_TYPE_IMPLICIT: 22637 /* Otherwise, the excess precision we want when we are 22638 in a standards compliant mode, and the implicit precision we 22639 provide would be identical were it not for the unpredictable 22640 cases. */ 22641 if (!TARGET_80387) 22642 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; 22643 else if (!TARGET_MIX_SSE_I387) 22644 { 22645 if (!(TARGET_SSE && TARGET_SSE_MATH)) 22646 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE; 22647 else if (TARGET_SSE2) 22648 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT; 22649 } 22650 22651 /* If we are in standards compliant mode, but we know we will 22652 calculate in unpredictable precision, return 22653 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit 22654 excess precision if the target can't guarantee it will honor 22655 it. */ 22656 return (type == EXCESS_PRECISION_TYPE_STANDARD 22657 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT 22658 : FLT_EVAL_METHOD_UNPREDICTABLE); 22659 default: 22660 gcc_unreachable (); 22661 } 22662 22663 return FLT_EVAL_METHOD_UNPREDICTABLE; 22664} 22665 22666/* Implement PUSH_ROUNDING. On 386, we have pushw instruction that 22667 decrements by exactly 2 no matter what the position was, there is no pushb. 22668 22669 But as CIE data alignment factor on this arch is -4 for 32bit targets 22670 and -8 for 64bit targets, we need to make sure all stack pointer adjustments 22671 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */ 22672 22673poly_int64 22674ix86_push_rounding (poly_int64 bytes) 22675{ 22676 return ROUND_UP (bytes, UNITS_PER_WORD); 22677} 22678 22679/* Target-specific selftests. */ 22680 22681#if CHECKING_P 22682 22683namespace selftest { 22684 22685/* Verify that hard regs are dumped as expected (in compact mode). */ 22686 22687static void 22688ix86_test_dumping_hard_regs () 22689{ 22690 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0)); 22691 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1)); 22692} 22693 22694/* Test dumping an insn with repeated references to the same SCRATCH, 22695 to verify the rtx_reuse code. */ 22696 22697static void 22698ix86_test_dumping_memory_blockage () 22699{ 22700 set_new_first_and_last_insn (NULL, NULL); 22701 22702 rtx pat = gen_memory_blockage (); 22703 rtx_reuse_manager r; 22704 r.preprocess (pat); 22705 22706 /* Verify that the repeated references to the SCRATCH show use 22707 reuse IDS. The first should be prefixed with a reuse ID, 22708 and the second should be dumped as a "reuse_rtx" of that ID. 22709 The expected string assumes Pmode == DImode. */ 22710 if (Pmode == DImode) 22711 ASSERT_RTL_DUMP_EQ_WITH_REUSE 22712 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n" 22713 " (unspec:BLK [\n" 22714 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n" 22715 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r); 22716} 22717 22718/* Verify loading an RTL dump; specifically a dump of copying 22719 a param on x86_64 from a hard reg into the frame. 22720 This test is target-specific since the dump contains target-specific 22721 hard reg names. */ 22722 22723static void 22724ix86_test_loading_dump_fragment_1 () 22725{ 22726 rtl_dump_test t (SELFTEST_LOCATION, 22727 locate_file ("x86_64/copy-hard-reg-into-frame.rtl")); 22728 22729 rtx_insn *insn = get_insn_by_uid (1); 22730 22731 /* The block structure and indentation here is purely for 22732 readability; it mirrors the structure of the rtx. */ 22733 tree mem_expr; 22734 { 22735 rtx pat = PATTERN (insn); 22736 ASSERT_EQ (SET, GET_CODE (pat)); 22737 { 22738 rtx dest = SET_DEST (pat); 22739 ASSERT_EQ (MEM, GET_CODE (dest)); 22740 /* Verify the "/c" was parsed. */ 22741 ASSERT_TRUE (RTX_FLAG (dest, call)); 22742 ASSERT_EQ (SImode, GET_MODE (dest)); 22743 { 22744 rtx addr = XEXP (dest, 0); 22745 ASSERT_EQ (PLUS, GET_CODE (addr)); 22746 ASSERT_EQ (DImode, GET_MODE (addr)); 22747 { 22748 rtx lhs = XEXP (addr, 0); 22749 /* Verify that the "frame" REG was consolidated. */ 22750 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs); 22751 } 22752 { 22753 rtx rhs = XEXP (addr, 1); 22754 ASSERT_EQ (CONST_INT, GET_CODE (rhs)); 22755 ASSERT_EQ (-4, INTVAL (rhs)); 22756 } 22757 } 22758 /* Verify the "[1 i+0 S4 A32]" was parsed. */ 22759 ASSERT_EQ (1, MEM_ALIAS_SET (dest)); 22760 /* "i" should have been handled by synthesizing a global int 22761 variable named "i". */ 22762 mem_expr = MEM_EXPR (dest); 22763 ASSERT_NE (mem_expr, NULL); 22764 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr)); 22765 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr)); 22766 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr))); 22767 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr))); 22768 /* "+0". */ 22769 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest)); 22770 ASSERT_EQ (0, MEM_OFFSET (dest)); 22771 /* "S4". */ 22772 ASSERT_EQ (4, MEM_SIZE (dest)); 22773 /* "A32. */ 22774 ASSERT_EQ (32, MEM_ALIGN (dest)); 22775 } 22776 { 22777 rtx src = SET_SRC (pat); 22778 ASSERT_EQ (REG, GET_CODE (src)); 22779 ASSERT_EQ (SImode, GET_MODE (src)); 22780 ASSERT_EQ (5, REGNO (src)); 22781 tree reg_expr = REG_EXPR (src); 22782 /* "i" here should point to the same var as for the MEM_EXPR. */ 22783 ASSERT_EQ (reg_expr, mem_expr); 22784 } 22785 } 22786} 22787 22788/* Verify that the RTL loader copes with a call_insn dump. 22789 This test is target-specific since the dump contains a target-specific 22790 hard reg name. */ 22791 22792static void 22793ix86_test_loading_call_insn () 22794{ 22795 /* The test dump includes register "xmm0", where requires TARGET_SSE 22796 to exist. */ 22797 if (!TARGET_SSE) 22798 return; 22799 22800 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl")); 22801 22802 rtx_insn *insn = get_insns (); 22803 ASSERT_EQ (CALL_INSN, GET_CODE (insn)); 22804 22805 /* "/j". */ 22806 ASSERT_TRUE (RTX_FLAG (insn, jump)); 22807 22808 rtx pat = PATTERN (insn); 22809 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat))); 22810 22811 /* Verify REG_NOTES. */ 22812 { 22813 /* "(expr_list:REG_CALL_DECL". */ 22814 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn))); 22815 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn)); 22816 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0)); 22817 22818 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */ 22819 rtx_expr_list *note1 = note0->next (); 22820 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1)); 22821 22822 ASSERT_EQ (NULL, note1->next ()); 22823 } 22824 22825 /* Verify CALL_INSN_FUNCTION_USAGE. */ 22826 { 22827 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */ 22828 rtx_expr_list *usage 22829 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn)); 22830 ASSERT_EQ (EXPR_LIST, GET_CODE (usage)); 22831 ASSERT_EQ (DFmode, GET_MODE (usage)); 22832 ASSERT_EQ (USE, GET_CODE (usage->element ())); 22833 ASSERT_EQ (NULL, usage->next ()); 22834 } 22835} 22836 22837/* Verify that the RTL loader copes a dump from print_rtx_function. 22838 This test is target-specific since the dump contains target-specific 22839 hard reg names. */ 22840 22841static void 22842ix86_test_loading_full_dump () 22843{ 22844 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl")); 22845 22846 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); 22847 22848 rtx_insn *insn_1 = get_insn_by_uid (1); 22849 ASSERT_EQ (NOTE, GET_CODE (insn_1)); 22850 22851 rtx_insn *insn_7 = get_insn_by_uid (7); 22852 ASSERT_EQ (INSN, GET_CODE (insn_7)); 22853 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7))); 22854 22855 rtx_insn *insn_15 = get_insn_by_uid (15); 22856 ASSERT_EQ (INSN, GET_CODE (insn_15)); 22857 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15))); 22858 22859 /* Verify crtl->return_rtx. */ 22860 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx)); 22861 ASSERT_EQ (0, REGNO (crtl->return_rtx)); 22862 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx)); 22863} 22864 22865/* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns. 22866 In particular, verify that it correctly loads the 2nd operand. 22867 This test is target-specific since these are machine-specific 22868 operands (and enums). */ 22869 22870static void 22871ix86_test_loading_unspec () 22872{ 22873 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl")); 22874 22875 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); 22876 22877 ASSERT_TRUE (cfun); 22878 22879 /* Test of an UNSPEC. */ 22880 rtx_insn *insn = get_insns (); 22881 ASSERT_EQ (INSN, GET_CODE (insn)); 22882 rtx set = single_set (insn); 22883 ASSERT_NE (NULL, set); 22884 rtx dst = SET_DEST (set); 22885 ASSERT_EQ (MEM, GET_CODE (dst)); 22886 rtx src = SET_SRC (set); 22887 ASSERT_EQ (UNSPEC, GET_CODE (src)); 22888 ASSERT_EQ (BLKmode, GET_MODE (src)); 22889 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1)); 22890 22891 rtx v0 = XVECEXP (src, 0, 0); 22892 22893 /* Verify that the two uses of the first SCRATCH have pointer 22894 equality. */ 22895 rtx scratch_a = XEXP (dst, 0); 22896 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a)); 22897 22898 rtx scratch_b = XEXP (v0, 0); 22899 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b)); 22900 22901 ASSERT_EQ (scratch_a, scratch_b); 22902 22903 /* Verify that the two mems are thus treated as equal. */ 22904 ASSERT_TRUE (rtx_equal_p (dst, v0)); 22905 22906 /* Verify that the insn is recognized. */ 22907 ASSERT_NE(-1, recog_memoized (insn)); 22908 22909 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */ 22910 insn = NEXT_INSN (insn); 22911 ASSERT_EQ (INSN, GET_CODE (insn)); 22912 22913 set = single_set (insn); 22914 ASSERT_NE (NULL, set); 22915 22916 src = SET_SRC (set); 22917 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src)); 22918 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1)); 22919} 22920 22921/* Run all target-specific selftests. */ 22922 22923static void 22924ix86_run_selftests (void) 22925{ 22926 ix86_test_dumping_hard_regs (); 22927 ix86_test_dumping_memory_blockage (); 22928 22929 /* Various tests of loading RTL dumps, here because they contain 22930 ix86-isms (e.g. names of hard regs). */ 22931 ix86_test_loading_dump_fragment_1 (); 22932 ix86_test_loading_call_insn (); 22933 ix86_test_loading_full_dump (); 22934 ix86_test_loading_unspec (); 22935} 22936 22937} // namespace selftest 22938 22939#endif /* CHECKING_P */ 22940 22941/* Initialize the GCC target structure. */ 22942#undef TARGET_RETURN_IN_MEMORY 22943#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory 22944 22945#undef TARGET_LEGITIMIZE_ADDRESS 22946#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address 22947 22948#undef TARGET_ATTRIBUTE_TABLE 22949#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 22950#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P 22951#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true 22952#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 22953# undef TARGET_MERGE_DECL_ATTRIBUTES 22954# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 22955#endif 22956 22957#undef TARGET_COMP_TYPE_ATTRIBUTES 22958#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 22959 22960#undef TARGET_INIT_BUILTINS 22961#define TARGET_INIT_BUILTINS ix86_init_builtins 22962#undef TARGET_BUILTIN_DECL 22963#define TARGET_BUILTIN_DECL ix86_builtin_decl 22964#undef TARGET_EXPAND_BUILTIN 22965#define TARGET_EXPAND_BUILTIN ix86_expand_builtin 22966 22967#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 22968#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 22969 ix86_builtin_vectorized_function 22970 22971#undef TARGET_VECTORIZE_BUILTIN_GATHER 22972#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather 22973 22974#undef TARGET_VECTORIZE_BUILTIN_SCATTER 22975#define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter 22976 22977#undef TARGET_BUILTIN_RECIPROCAL 22978#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal 22979 22980#undef TARGET_ASM_FUNCTION_EPILOGUE 22981#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 22982 22983#undef TARGET_ENCODE_SECTION_INFO 22984#ifndef SUBTARGET_ENCODE_SECTION_INFO 22985#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 22986#else 22987#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 22988#endif 22989 22990#undef TARGET_ASM_OPEN_PAREN 22991#define TARGET_ASM_OPEN_PAREN "" 22992#undef TARGET_ASM_CLOSE_PAREN 22993#define TARGET_ASM_CLOSE_PAREN "" 22994 22995#undef TARGET_ASM_BYTE_OP 22996#define TARGET_ASM_BYTE_OP ASM_BYTE 22997 22998#undef TARGET_ASM_ALIGNED_HI_OP 22999#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 23000#undef TARGET_ASM_ALIGNED_SI_OP 23001#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 23002#ifdef ASM_QUAD 23003#undef TARGET_ASM_ALIGNED_DI_OP 23004#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 23005#endif 23006 23007#undef TARGET_PROFILE_BEFORE_PROLOGUE 23008#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue 23009 23010#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME 23011#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name 23012 23013#undef TARGET_ASM_UNALIGNED_HI_OP 23014#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 23015#undef TARGET_ASM_UNALIGNED_SI_OP 23016#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 23017#undef TARGET_ASM_UNALIGNED_DI_OP 23018#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 23019 23020#undef TARGET_PRINT_OPERAND 23021#define TARGET_PRINT_OPERAND ix86_print_operand 23022#undef TARGET_PRINT_OPERAND_ADDRESS 23023#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address 23024#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 23025#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p 23026#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA 23027#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra 23028 23029#undef TARGET_SCHED_INIT_GLOBAL 23030#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global 23031#undef TARGET_SCHED_ADJUST_COST 23032#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 23033#undef TARGET_SCHED_ISSUE_RATE 23034#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 23035#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 23036#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 23037 ia32_multipass_dfa_lookahead 23038#undef TARGET_SCHED_MACRO_FUSION_P 23039#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p 23040#undef TARGET_SCHED_MACRO_FUSION_PAIR_P 23041#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p 23042 23043#undef TARGET_FUNCTION_OK_FOR_SIBCALL 23044#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 23045 23046#undef TARGET_MEMMODEL_CHECK 23047#define TARGET_MEMMODEL_CHECK ix86_memmodel_check 23048 23049#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 23050#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv 23051 23052#ifdef HAVE_AS_TLS 23053#undef TARGET_HAVE_TLS 23054#define TARGET_HAVE_TLS true 23055#endif 23056#undef TARGET_CANNOT_FORCE_CONST_MEM 23057#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 23058#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 23059#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 23060 23061#undef TARGET_DELEGITIMIZE_ADDRESS 23062#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 23063 23064#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P 23065#define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p 23066 23067#undef TARGET_MS_BITFIELD_LAYOUT_P 23068#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 23069 23070#if TARGET_MACHO 23071#undef TARGET_BINDS_LOCAL_P 23072#define TARGET_BINDS_LOCAL_P darwin_binds_local_p 23073#else 23074#undef TARGET_BINDS_LOCAL_P 23075#define TARGET_BINDS_LOCAL_P ix86_binds_local_p 23076#endif 23077#if TARGET_DLLIMPORT_DECL_ATTRIBUTES 23078#undef TARGET_BINDS_LOCAL_P 23079#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p 23080#endif 23081 23082#undef TARGET_ASM_OUTPUT_MI_THUNK 23083#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 23084#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 23085#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 23086 23087#undef TARGET_ASM_FILE_START 23088#define TARGET_ASM_FILE_START x86_file_start 23089 23090#undef TARGET_OPTION_OVERRIDE 23091#define TARGET_OPTION_OVERRIDE ix86_option_override 23092 23093#undef TARGET_REGISTER_MOVE_COST 23094#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost 23095#undef TARGET_MEMORY_MOVE_COST 23096#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost 23097#undef TARGET_RTX_COSTS 23098#define TARGET_RTX_COSTS ix86_rtx_costs 23099#undef TARGET_ADDRESS_COST 23100#define TARGET_ADDRESS_COST ix86_address_cost 23101 23102#undef TARGET_FLAGS_REGNUM 23103#define TARGET_FLAGS_REGNUM FLAGS_REG 23104#undef TARGET_FIXED_CONDITION_CODE_REGS 23105#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 23106#undef TARGET_CC_MODES_COMPATIBLE 23107#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 23108 23109#undef TARGET_MACHINE_DEPENDENT_REORG 23110#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 23111 23112#undef TARGET_BUILD_BUILTIN_VA_LIST 23113#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 23114 23115#undef TARGET_FOLD_BUILTIN 23116#define TARGET_FOLD_BUILTIN ix86_fold_builtin 23117 23118#undef TARGET_GIMPLE_FOLD_BUILTIN 23119#define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin 23120 23121#undef TARGET_COMPARE_VERSION_PRIORITY 23122#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority 23123 23124#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY 23125#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ 23126 ix86_generate_version_dispatcher_body 23127 23128#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER 23129#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ 23130 ix86_get_function_versions_dispatcher 23131 23132#undef TARGET_ENUM_VA_LIST_P 23133#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list 23134 23135#undef TARGET_FN_ABI_VA_LIST 23136#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list 23137 23138#undef TARGET_CANONICAL_VA_LIST_TYPE 23139#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type 23140 23141#undef TARGET_EXPAND_BUILTIN_VA_START 23142#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start 23143 23144#undef TARGET_MD_ASM_ADJUST 23145#define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust 23146 23147#undef TARGET_C_EXCESS_PRECISION 23148#define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision 23149#undef TARGET_PROMOTE_PROTOTYPES 23150#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 23151#undef TARGET_SETUP_INCOMING_VARARGS 23152#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 23153#undef TARGET_MUST_PASS_IN_STACK 23154#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 23155#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS 23156#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args 23157#undef TARGET_FUNCTION_ARG_ADVANCE 23158#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance 23159#undef TARGET_FUNCTION_ARG 23160#define TARGET_FUNCTION_ARG ix86_function_arg 23161#undef TARGET_INIT_PIC_REG 23162#define TARGET_INIT_PIC_REG ix86_init_pic_reg 23163#undef TARGET_USE_PSEUDO_PIC_REG 23164#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg 23165#undef TARGET_FUNCTION_ARG_BOUNDARY 23166#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary 23167#undef TARGET_PASS_BY_REFERENCE 23168#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 23169#undef TARGET_INTERNAL_ARG_POINTER 23170#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 23171#undef TARGET_UPDATE_STACK_BOUNDARY 23172#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary 23173#undef TARGET_GET_DRAP_RTX 23174#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx 23175#undef TARGET_STRICT_ARGUMENT_NAMING 23176#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 23177#undef TARGET_STATIC_CHAIN 23178#define TARGET_STATIC_CHAIN ix86_static_chain 23179#undef TARGET_TRAMPOLINE_INIT 23180#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init 23181#undef TARGET_RETURN_POPS_ARGS 23182#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args 23183 23184#undef TARGET_WARN_FUNC_RETURN 23185#define TARGET_WARN_FUNC_RETURN ix86_warn_func_return 23186 23187#undef TARGET_LEGITIMATE_COMBINED_INSN 23188#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn 23189 23190#undef TARGET_ASAN_SHADOW_OFFSET 23191#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset 23192 23193#undef TARGET_GIMPLIFY_VA_ARG_EXPR 23194#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 23195 23196#undef TARGET_SCALAR_MODE_SUPPORTED_P 23197#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 23198 23199#undef TARGET_VECTOR_MODE_SUPPORTED_P 23200#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 23201 23202#undef TARGET_C_MODE_FOR_SUFFIX 23203#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix 23204 23205#ifdef HAVE_AS_TLS 23206#undef TARGET_ASM_OUTPUT_DWARF_DTPREL 23207#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 23208#endif 23209 23210#ifdef SUBTARGET_INSERT_ATTRIBUTES 23211#undef TARGET_INSERT_ATTRIBUTES 23212#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 23213#endif 23214 23215#undef TARGET_MANGLE_TYPE 23216#define TARGET_MANGLE_TYPE ix86_mangle_type 23217 23218#undef TARGET_STACK_PROTECT_GUARD 23219#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard 23220 23221#if !TARGET_MACHO 23222#undef TARGET_STACK_PROTECT_FAIL 23223#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 23224#endif 23225 23226#undef TARGET_FUNCTION_VALUE 23227#define TARGET_FUNCTION_VALUE ix86_function_value 23228 23229#undef TARGET_FUNCTION_VALUE_REGNO_P 23230#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p 23231 23232#undef TARGET_PROMOTE_FUNCTION_MODE 23233#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode 23234 23235#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE 23236#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change 23237 23238#undef TARGET_MEMBER_TYPE_FORCES_BLK 23239#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk 23240 23241#undef TARGET_INSTANTIATE_DECLS 23242#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls 23243 23244#undef TARGET_SECONDARY_RELOAD 23245#define TARGET_SECONDARY_RELOAD ix86_secondary_reload 23246#undef TARGET_SECONDARY_MEMORY_NEEDED 23247#define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed 23248#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 23249#define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode 23250 23251#undef TARGET_CLASS_MAX_NREGS 23252#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs 23253 23254#undef TARGET_PREFERRED_RELOAD_CLASS 23255#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class 23256#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS 23257#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class 23258#undef TARGET_CLASS_LIKELY_SPILLED_P 23259#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p 23260 23261#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 23262#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ 23263 ix86_builtin_vectorization_cost 23264#undef TARGET_VECTORIZE_VEC_PERM_CONST 23265#define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const 23266#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 23267#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ 23268 ix86_preferred_simd_mode 23269#undef TARGET_VECTORIZE_SPLIT_REDUCTION 23270#define TARGET_VECTORIZE_SPLIT_REDUCTION \ 23271 ix86_split_reduction 23272#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES 23273#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ 23274 ix86_autovectorize_vector_modes 23275#undef TARGET_VECTORIZE_GET_MASK_MODE 23276#define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode 23277#undef TARGET_VECTORIZE_INIT_COST 23278#define TARGET_VECTORIZE_INIT_COST ix86_init_cost 23279#undef TARGET_VECTORIZE_ADD_STMT_COST 23280#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost 23281#undef TARGET_VECTORIZE_FINISH_COST 23282#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost 23283#undef TARGET_VECTORIZE_DESTROY_COST_DATA 23284#define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data 23285 23286#undef TARGET_SET_CURRENT_FUNCTION 23287#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function 23288 23289#undef TARGET_OPTION_VALID_ATTRIBUTE_P 23290#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p 23291 23292#undef TARGET_OPTION_SAVE 23293#define TARGET_OPTION_SAVE ix86_function_specific_save 23294 23295#undef TARGET_OPTION_RESTORE 23296#define TARGET_OPTION_RESTORE ix86_function_specific_restore 23297 23298#undef TARGET_OPTION_POST_STREAM_IN 23299#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in 23300 23301#undef TARGET_OPTION_PRINT 23302#define TARGET_OPTION_PRINT ix86_function_specific_print 23303 23304#undef TARGET_OPTION_FUNCTION_VERSIONS 23305#define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions 23306 23307#undef TARGET_CAN_INLINE_P 23308#define TARGET_CAN_INLINE_P ix86_can_inline_p 23309 23310#undef TARGET_LEGITIMATE_ADDRESS_P 23311#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p 23312 23313#undef TARGET_REGISTER_PRIORITY 23314#define TARGET_REGISTER_PRIORITY ix86_register_priority 23315 23316#undef TARGET_REGISTER_USAGE_LEVELING_P 23317#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true 23318 23319#undef TARGET_LEGITIMATE_CONSTANT_P 23320#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p 23321 23322#undef TARGET_COMPUTE_FRAME_LAYOUT 23323#define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout 23324 23325#undef TARGET_FRAME_POINTER_REQUIRED 23326#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required 23327 23328#undef TARGET_CAN_ELIMINATE 23329#define TARGET_CAN_ELIMINATE ix86_can_eliminate 23330 23331#undef TARGET_EXTRA_LIVE_ON_ENTRY 23332#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry 23333 23334#undef TARGET_ASM_CODE_END 23335#define TARGET_ASM_CODE_END ix86_code_end 23336 23337#undef TARGET_CONDITIONAL_REGISTER_USAGE 23338#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage 23339 23340#undef TARGET_CANONICALIZE_COMPARISON 23341#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison 23342 23343#undef TARGET_LOOP_UNROLL_ADJUST 23344#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust 23345 23346/* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */ 23347#undef TARGET_SPILL_CLASS 23348#define TARGET_SPILL_CLASS ix86_spill_class 23349 23350#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN 23351#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \ 23352 ix86_simd_clone_compute_vecsize_and_simdlen 23353 23354#undef TARGET_SIMD_CLONE_ADJUST 23355#define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust 23356 23357#undef TARGET_SIMD_CLONE_USABLE 23358#define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable 23359 23360#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA 23361#define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa 23362 23363#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P 23364#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \ 23365 ix86_float_exceptions_rounding_supported_p 23366 23367#undef TARGET_MODE_EMIT 23368#define TARGET_MODE_EMIT ix86_emit_mode_set 23369 23370#undef TARGET_MODE_NEEDED 23371#define TARGET_MODE_NEEDED ix86_mode_needed 23372 23373#undef TARGET_MODE_AFTER 23374#define TARGET_MODE_AFTER ix86_mode_after 23375 23376#undef TARGET_MODE_ENTRY 23377#define TARGET_MODE_ENTRY ix86_mode_entry 23378 23379#undef TARGET_MODE_EXIT 23380#define TARGET_MODE_EXIT ix86_mode_exit 23381 23382#undef TARGET_MODE_PRIORITY 23383#define TARGET_MODE_PRIORITY ix86_mode_priority 23384 23385#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS 23386#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true 23387 23388#undef TARGET_OFFLOAD_OPTIONS 23389#define TARGET_OFFLOAD_OPTIONS \ 23390 ix86_offload_options 23391 23392#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 23393#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512 23394 23395#undef TARGET_OPTAB_SUPPORTED_P 23396#define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p 23397 23398#undef TARGET_HARD_REGNO_SCRATCH_OK 23399#define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok 23400 23401#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 23402#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 23403 23404#undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID 23405#define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid 23406 23407#undef TARGET_INIT_LIBFUNCS 23408#define TARGET_INIT_LIBFUNCS ix86_init_libfuncs 23409 23410#undef TARGET_EXPAND_DIVMOD_LIBFUNC 23411#define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc 23412 23413#undef TARGET_MAX_NOCE_IFCVT_SEQ_COST 23414#define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost 23415 23416#undef TARGET_NOCE_CONVERSION_PROFITABLE_P 23417#define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p 23418 23419#undef TARGET_HARD_REGNO_NREGS 23420#define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs 23421#undef TARGET_HARD_REGNO_MODE_OK 23422#define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok 23423 23424#undef TARGET_MODES_TIEABLE_P 23425#define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p 23426 23427#undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED 23428#define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ 23429 ix86_hard_regno_call_part_clobbered 23430 23431#undef TARGET_CAN_CHANGE_MODE_CLASS 23432#define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class 23433 23434#undef TARGET_STATIC_RTX_ALIGNMENT 23435#define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment 23436#undef TARGET_CONSTANT_ALIGNMENT 23437#define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment 23438 23439#undef TARGET_EMPTY_RECORD_P 23440#define TARGET_EMPTY_RECORD_P ix86_is_empty_record 23441 23442#undef TARGET_WARN_PARAMETER_PASSING_ABI 23443#define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi 23444 23445#undef TARGET_GET_MULTILIB_ABI_NAME 23446#define TARGET_GET_MULTILIB_ABI_NAME \ 23447 ix86_get_multilib_abi_name 23448 23449static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) 23450{ 23451#ifdef OPTION_GLIBC 23452 if (OPTION_GLIBC) 23453 return (built_in_function)fcode == BUILT_IN_MEMPCPY; 23454 else 23455 return false; 23456#else 23457 return false; 23458#endif 23459} 23460 23461#undef TARGET_LIBC_HAS_FAST_FUNCTION 23462#define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function 23463 23464#if CHECKING_P 23465#undef TARGET_RUN_TARGET_SELFTESTS 23466#define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests 23467#endif /* #if CHECKING_P */ 23468 23469struct gcc_target targetm = TARGET_INITIALIZER; 23470 23471#include "gt-i386.h" 23472