1/* Copyright (C) 1988-2020 Free Software Foundation, Inc. 2 3This file is part of GCC. 4 5GCC is free software; you can redistribute it and/or modify 6it under the terms of the GNU General Public License as published by 7the Free Software Foundation; either version 3, or (at your option) 8any later version. 9 10GCC is distributed in the hope that it will be useful, 11but WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13GNU General Public License for more details. 14 15You should have received a copy of the GNU General Public License 16along with GCC; see the file COPYING3. If not see 17<http://www.gnu.org/licenses/>. */ 18 19#define IN_TARGET_CODE 1 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "backend.h" 25#include "rtl.h" 26#include "tree.h" 27#include "memmodel.h" 28#include "gimple.h" 29#include "cfghooks.h" 30#include "cfgloop.h" 31#include "df.h" 32#include "tm_p.h" 33#include "stringpool.h" 34#include "expmed.h" 35#include "optabs.h" 36#include "regs.h" 37#include "emit-rtl.h" 38#include "recog.h" 39#include "cgraph.h" 40#include "diagnostic.h" 41#include "cfgbuild.h" 42#include "alias.h" 43#include "fold-const.h" 44#include "attribs.h" 45#include "calls.h" 46#include "stor-layout.h" 47#include "varasm.h" 48#include "output.h" 49#include "insn-attr.h" 50#include "flags.h" 51#include "except.h" 52#include "explow.h" 53#include "expr.h" 54#include "cfgrtl.h" 55#include "common/common-target.h" 56#include "langhooks.h" 57#include "reload.h" 58#include "gimplify.h" 59#include "dwarf2.h" 60#include "tm-constrs.h" 61#include "cselib.h" 62#include "sched-int.h" 63#include "opts.h" 64#include "tree-pass.h" 65#include "context.h" 66#include "pass_manager.h" 67#include "target-globals.h" 68#include "gimple-iterator.h" 69#include "tree-vectorizer.h" 70#include "shrink-wrap.h" 71#include "builtins.h" 72#include "rtl-iter.h" 73#include "tree-iterator.h" 74#include "dbgcnt.h" 75#include "case-cfn-macros.h" 76#include "dojump.h" 77#include "fold-const-call.h" 78#include "tree-vrp.h" 79#include "tree-ssanames.h" 80#include "selftest.h" 81#include "selftest-rtl.h" 82#include "print-rtl.h" 83#include "intl.h" 84#include "ifcvt.h" 85#include "symbol-summary.h" 86#include "ipa-prop.h" 87#include "ipa-fnsummary.h" 88#include "wide-int-bitmask.h" 89#include "tree-vector-builder.h" 90#include "debug.h" 91#include "dwarf2out.h" 92#include "i386-builtins.h" 93#include "i386-features.h" 94 95const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { 96 "savms64", 97 "resms64", 98 "resms64x", 99 "savms64f", 100 "resms64f", 101 "resms64fx" 102}; 103 104const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { 105/* The below offset values are where each register is stored for the layout 106 relative to incoming stack pointer. The value of each m_regs[].offset will 107 be relative to the incoming base pointer (rax or rsi) used by the stub. 108 109 s_instances: 0 1 2 3 110 Offset: realigned or aligned + 8 111 Register aligned aligned + 8 aligned w/HFP w/HFP */ 112 XMM15_REG, /* 0x10 0x18 0x10 0x18 */ 113 XMM14_REG, /* 0x20 0x28 0x20 0x28 */ 114 XMM13_REG, /* 0x30 0x38 0x30 0x38 */ 115 XMM12_REG, /* 0x40 0x48 0x40 0x48 */ 116 XMM11_REG, /* 0x50 0x58 0x50 0x58 */ 117 XMM10_REG, /* 0x60 0x68 0x60 0x68 */ 118 XMM9_REG, /* 0x70 0x78 0x70 0x78 */ 119 XMM8_REG, /* 0x80 0x88 0x80 0x88 */ 120 XMM7_REG, /* 0x90 0x98 0x90 0x98 */ 121 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ 122 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ 123 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ 124 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ 125 BP_REG, /* 0xc0 0xc8 N/A N/A */ 126 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ 127 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ 128 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ 129 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ 130}; 131 132/* Instantiate static const values. */ 133const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; 134const unsigned xlogue_layout::MIN_REGS; 135const unsigned xlogue_layout::MAX_REGS; 136const unsigned xlogue_layout::MAX_EXTRA_REGS; 137const unsigned xlogue_layout::VARIANT_COUNT; 138const unsigned xlogue_layout::STUB_NAME_MAX_LEN; 139 140/* Initialize xlogue_layout::s_stub_names to zero. */ 141char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] 142 [STUB_NAME_MAX_LEN]; 143 144/* Instantiates all xlogue_layout instances. */ 145const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { 146 xlogue_layout (0, false), 147 xlogue_layout (8, false), 148 xlogue_layout (0, true), 149 xlogue_layout (8, true) 150}; 151 152/* Return an appropriate const instance of xlogue_layout based upon values 153 in cfun->machine and crtl. */ 154const class xlogue_layout & 155xlogue_layout::get_instance () 156{ 157 enum xlogue_stub_sets stub_set; 158 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; 159 160 if (stack_realign_fp) 161 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; 162 else if (frame_pointer_needed) 163 stub_set = aligned_plus_8 164 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 165 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; 166 else 167 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; 168 169 return s_instances[stub_set]; 170} 171 172/* Determine how many clobbered registers can be saved by the stub. 173 Returns the count of registers the stub will save and restore. */ 174unsigned 175xlogue_layout::count_stub_managed_regs () 176{ 177 bool hfp = frame_pointer_needed || stack_realign_fp; 178 unsigned i, count; 179 unsigned regno; 180 181 for (count = i = MIN_REGS; i < MAX_REGS; ++i) 182 { 183 regno = REG_ORDER[i]; 184 if (regno == BP_REG && hfp) 185 continue; 186 if (!ix86_save_reg (regno, false, false)) 187 break; 188 ++count; 189 } 190 return count; 191} 192 193/* Determine if register REGNO is a stub managed register given the 194 total COUNT of stub managed registers. */ 195bool 196xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) 197{ 198 bool hfp = frame_pointer_needed || stack_realign_fp; 199 unsigned i; 200 201 for (i = 0; i < count; ++i) 202 { 203 gcc_assert (i < MAX_REGS); 204 if (REG_ORDER[i] == BP_REG && hfp) 205 ++count; 206 else if (REG_ORDER[i] == regno) 207 return true; 208 } 209 return false; 210} 211 212/* Constructor for xlogue_layout. */ 213xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) 214 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), 215 m_stack_align_off_in (stack_align_off_in) 216{ 217 HOST_WIDE_INT offset = stack_align_off_in; 218 unsigned i, j; 219 220 for (i = j = 0; i < MAX_REGS; ++i) 221 { 222 unsigned regno = REG_ORDER[i]; 223 224 if (regno == BP_REG && hfp) 225 continue; 226 if (SSE_REGNO_P (regno)) 227 { 228 offset += 16; 229 /* Verify that SSE regs are always aligned. */ 230 gcc_assert (!((stack_align_off_in + offset) & 15)); 231 } 232 else 233 offset += 8; 234 235 m_regs[j].regno = regno; 236 m_regs[j++].offset = offset - STUB_INDEX_OFFSET; 237 } 238 gcc_assert (j == m_nregs); 239} 240 241const char * 242xlogue_layout::get_stub_name (enum xlogue_stub stub, 243 unsigned n_extra_regs) 244{ 245 const int have_avx = TARGET_AVX; 246 char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; 247 248 /* Lazy init */ 249 if (!*name) 250 { 251 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", 252 (have_avx ? "avx" : "sse"), 253 STUB_BASE_NAMES[stub], 254 MIN_REGS + n_extra_regs); 255 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); 256 } 257 258 return name; 259} 260 261/* Return rtx of a symbol ref for the entry point (based upon 262 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ 263rtx 264xlogue_layout::get_stub_rtx (enum xlogue_stub stub) 265{ 266 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; 267 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); 268 gcc_assert (stub < XLOGUE_STUB_COUNT); 269 gcc_assert (crtl->stack_realign_finalized); 270 271 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); 272} 273 274unsigned scalar_chain::max_id = 0; 275 276namespace { 277 278/* Initialize new chain. */ 279 280scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) 281{ 282 smode = smode_; 283 vmode = vmode_; 284 285 chain_id = ++max_id; 286 287 if (dump_file) 288 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); 289 290 bitmap_obstack_initialize (NULL); 291 insns = BITMAP_ALLOC (NULL); 292 defs = BITMAP_ALLOC (NULL); 293 defs_conv = BITMAP_ALLOC (NULL); 294 queue = NULL; 295} 296 297/* Free chain's data. */ 298 299scalar_chain::~scalar_chain () 300{ 301 BITMAP_FREE (insns); 302 BITMAP_FREE (defs); 303 BITMAP_FREE (defs_conv); 304 bitmap_obstack_release (NULL); 305} 306 307/* Add instruction into chains' queue. */ 308 309void 310scalar_chain::add_to_queue (unsigned insn_uid) 311{ 312 if (bitmap_bit_p (insns, insn_uid) 313 || bitmap_bit_p (queue, insn_uid)) 314 return; 315 316 if (dump_file) 317 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", 318 insn_uid, chain_id); 319 bitmap_set_bit (queue, insn_uid); 320} 321 322general_scalar_chain::general_scalar_chain (enum machine_mode smode_, 323 enum machine_mode vmode_) 324 : scalar_chain (smode_, vmode_) 325{ 326 insns_conv = BITMAP_ALLOC (NULL); 327 n_sse_to_integer = 0; 328 n_integer_to_sse = 0; 329} 330 331general_scalar_chain::~general_scalar_chain () 332{ 333 BITMAP_FREE (insns_conv); 334} 335 336/* For DImode conversion, mark register defined by DEF as requiring 337 conversion. */ 338 339void 340general_scalar_chain::mark_dual_mode_def (df_ref def) 341{ 342 gcc_assert (DF_REF_REG_DEF_P (def)); 343 344 /* Record the def/insn pair so we can later efficiently iterate over 345 the defs to convert on insns not in the chain. */ 346 bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); 347 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) 348 { 349 if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) 350 && !reg_new) 351 return; 352 n_integer_to_sse++; 353 } 354 else 355 { 356 if (!reg_new) 357 return; 358 n_sse_to_integer++; 359 } 360 361 if (dump_file) 362 fprintf (dump_file, 363 " Mark r%d def in insn %d as requiring both modes in chain #%d\n", 364 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); 365} 366 367/* For TImode conversion, it is unused. */ 368 369void 370timode_scalar_chain::mark_dual_mode_def (df_ref) 371{ 372 gcc_unreachable (); 373} 374 375/* Check REF's chain to add new insns into a queue 376 and find registers requiring conversion. */ 377 378void 379scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) 380{ 381 df_link *chain; 382 383 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) 384 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); 385 add_to_queue (DF_REF_INSN_UID (ref)); 386 387 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) 388 { 389 unsigned uid = DF_REF_INSN_UID (chain->ref); 390 391 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) 392 continue; 393 394 if (!DF_REF_REG_MEM_P (chain->ref)) 395 { 396 if (bitmap_bit_p (insns, uid)) 397 continue; 398 399 if (bitmap_bit_p (candidates, uid)) 400 { 401 add_to_queue (uid); 402 continue; 403 } 404 } 405 406 if (DF_REF_REG_DEF_P (chain->ref)) 407 { 408 if (dump_file) 409 fprintf (dump_file, " r%d def in insn %d isn't convertible\n", 410 DF_REF_REGNO (chain->ref), uid); 411 mark_dual_mode_def (chain->ref); 412 } 413 else 414 { 415 if (dump_file) 416 fprintf (dump_file, " r%d use in insn %d isn't convertible\n", 417 DF_REF_REGNO (chain->ref), uid); 418 mark_dual_mode_def (ref); 419 } 420 } 421} 422 423/* Add instruction into a chain. */ 424 425void 426scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) 427{ 428 if (bitmap_bit_p (insns, insn_uid)) 429 return; 430 431 if (dump_file) 432 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); 433 434 bitmap_set_bit (insns, insn_uid); 435 436 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; 437 rtx def_set = single_set (insn); 438 if (def_set && REG_P (SET_DEST (def_set)) 439 && !HARD_REGISTER_P (SET_DEST (def_set))) 440 bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); 441 442 /* ??? The following is quadratic since analyze_register_chain 443 iterates over all refs to look for dual-mode regs. Instead this 444 should be done separately for all regs mentioned in the chain once. */ 445 df_ref ref; 446 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) 447 if (!HARD_REGISTER_P (DF_REF_REG (ref))) 448 analyze_register_chain (candidates, ref); 449 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) 450 if (!DF_REF_REG_MEM_P (ref)) 451 analyze_register_chain (candidates, ref); 452} 453 454/* Build new chain starting from insn INSN_UID recursively 455 adding all dependent uses and definitions. */ 456 457void 458scalar_chain::build (bitmap candidates, unsigned insn_uid) 459{ 460 queue = BITMAP_ALLOC (NULL); 461 bitmap_set_bit (queue, insn_uid); 462 463 if (dump_file) 464 fprintf (dump_file, "Building chain #%d...\n", chain_id); 465 466 while (!bitmap_empty_p (queue)) 467 { 468 insn_uid = bitmap_first_set_bit (queue); 469 bitmap_clear_bit (queue, insn_uid); 470 bitmap_clear_bit (candidates, insn_uid); 471 add_insn (candidates, insn_uid); 472 } 473 474 if (dump_file) 475 { 476 fprintf (dump_file, "Collected chain #%d...\n", chain_id); 477 fprintf (dump_file, " insns: "); 478 dump_bitmap (dump_file, insns); 479 if (!bitmap_empty_p (defs_conv)) 480 { 481 bitmap_iterator bi; 482 unsigned id; 483 const char *comma = ""; 484 fprintf (dump_file, " defs to convert: "); 485 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) 486 { 487 fprintf (dump_file, "%sr%d", comma, id); 488 comma = ", "; 489 } 490 fprintf (dump_file, "\n"); 491 } 492 } 493 494 BITMAP_FREE (queue); 495} 496 497/* Return a cost of building a vector costant 498 instead of using a scalar one. */ 499 500int 501general_scalar_chain::vector_const_cost (rtx exp) 502{ 503 gcc_assert (CONST_INT_P (exp)); 504 505 if (standard_sse_constant_p (exp, vmode)) 506 return ix86_cost->sse_op; 507 /* We have separate costs for SImode and DImode, use SImode costs 508 for smaller modes. */ 509 return ix86_cost->sse_load[smode == DImode ? 1 : 0]; 510} 511 512/* Compute a gain for chain conversion. */ 513 514int 515general_scalar_chain::compute_convert_gain () 516{ 517 bitmap_iterator bi; 518 unsigned insn_uid; 519 int gain = 0; 520 int cost = 0; 521 522 if (dump_file) 523 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); 524 525 /* SSE costs distinguish between SImode and DImode loads/stores, for 526 int costs factor in the number of GPRs involved. When supporting 527 smaller modes than SImode the int load/store costs need to be 528 adjusted as well. */ 529 unsigned sse_cost_idx = smode == DImode ? 1 : 0; 530 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; 531 532 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) 533 { 534 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; 535 rtx def_set = single_set (insn); 536 rtx src = SET_SRC (def_set); 537 rtx dst = SET_DEST (def_set); 538 int igain = 0; 539 540 if (REG_P (src) && REG_P (dst)) 541 igain += 2 * m - ix86_cost->xmm_move; 542 else if (REG_P (src) && MEM_P (dst)) 543 igain 544 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; 545 else if (MEM_P (src) && REG_P (dst)) 546 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; 547 else if (GET_CODE (src) == ASHIFT 548 || GET_CODE (src) == ASHIFTRT 549 || GET_CODE (src) == LSHIFTRT) 550 { 551 if (m == 2) 552 { 553 if (INTVAL (XEXP (src, 1)) >= 32) 554 igain += ix86_cost->add; 555 else 556 igain += ix86_cost->shift_const; 557 } 558 559 igain += ix86_cost->shift_const - ix86_cost->sse_op; 560 561 if (CONST_INT_P (XEXP (src, 0))) 562 igain -= vector_const_cost (XEXP (src, 0)); 563 } 564 else if (GET_CODE (src) == PLUS 565 || GET_CODE (src) == MINUS 566 || GET_CODE (src) == IOR 567 || GET_CODE (src) == XOR 568 || GET_CODE (src) == AND) 569 { 570 igain += m * ix86_cost->add - ix86_cost->sse_op; 571 /* Additional gain for andnot for targets without BMI. */ 572 if (GET_CODE (XEXP (src, 0)) == NOT 573 && !TARGET_BMI) 574 igain += m * ix86_cost->add; 575 576 if (CONST_INT_P (XEXP (src, 0))) 577 igain -= vector_const_cost (XEXP (src, 0)); 578 if (CONST_INT_P (XEXP (src, 1))) 579 igain -= vector_const_cost (XEXP (src, 1)); 580 } 581 else if (GET_CODE (src) == NEG 582 || GET_CODE (src) == NOT) 583 igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1); 584 else if (GET_CODE (src) == SMAX 585 || GET_CODE (src) == SMIN 586 || GET_CODE (src) == UMAX 587 || GET_CODE (src) == UMIN) 588 { 589 /* We do not have any conditional move cost, estimate it as a 590 reg-reg move. Comparisons are costed as adds. */ 591 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); 592 /* Integer SSE ops are all costed the same. */ 593 igain -= ix86_cost->sse_op; 594 } 595 else if (GET_CODE (src) == COMPARE) 596 { 597 /* Assume comparison cost is the same. */ 598 } 599 else if (CONST_INT_P (src)) 600 { 601 if (REG_P (dst)) 602 /* DImode can be immediate for TARGET_64BIT and SImode always. */ 603 igain += m * COSTS_N_INSNS (1); 604 else if (MEM_P (dst)) 605 igain += (m * ix86_cost->int_store[2] 606 - ix86_cost->sse_store[sse_cost_idx]); 607 igain -= vector_const_cost (src); 608 } 609 else 610 gcc_unreachable (); 611 612 if (igain != 0 && dump_file) 613 { 614 fprintf (dump_file, " Instruction gain %d for ", igain); 615 dump_insn_slim (dump_file, insn); 616 } 617 gain += igain; 618 } 619 620 if (dump_file) 621 fprintf (dump_file, " Instruction conversion gain: %d\n", gain); 622 623 /* Cost the integer to sse and sse to integer moves. */ 624 cost += n_sse_to_integer * ix86_cost->sse_to_integer; 625 /* ??? integer_to_sse but we only have that in the RA cost table. 626 Assume sse_to_integer/integer_to_sse are the same which they 627 are at the moment. */ 628 cost += n_integer_to_sse * ix86_cost->sse_to_integer; 629 630 if (dump_file) 631 fprintf (dump_file, " Registers conversion cost: %d\n", cost); 632 633 gain -= cost; 634 635 if (dump_file) 636 fprintf (dump_file, " Total gain: %d\n", gain); 637 638 return gain; 639} 640 641/* Insert generated conversion instruction sequence INSNS 642 after instruction AFTER. New BB may be required in case 643 instruction has EH region attached. */ 644 645void 646scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) 647{ 648 if (!control_flow_insn_p (after)) 649 { 650 emit_insn_after (insns, after); 651 return; 652 } 653 654 basic_block bb = BLOCK_FOR_INSN (after); 655 edge e = find_fallthru_edge (bb->succs); 656 gcc_assert (e); 657 658 basic_block new_bb = split_edge (e); 659 emit_insn_after (insns, BB_HEAD (new_bb)); 660} 661 662} // anon namespace 663 664/* Generate the canonical SET_SRC to move GPR to a VMODE vector register, 665 zeroing the upper parts. */ 666 667static rtx 668gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr) 669{ 670 switch (GET_MODE_NUNITS (vmode)) 671 { 672 case 1: 673 /* We are not using this case currently. */ 674 gcc_unreachable (); 675 case 2: 676 return gen_rtx_VEC_CONCAT (vmode, gpr, 677 CONST0_RTX (GET_MODE_INNER (vmode))); 678 default: 679 return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr), 680 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U)); 681 } 682} 683 684/* Make vector copies for all register REGNO definitions 685 and replace its uses in a chain. */ 686 687void 688general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) 689{ 690 rtx vreg = *defs_map.get (reg); 691 692 start_sequence (); 693 if (!TARGET_INTER_UNIT_MOVES_TO_VEC) 694 { 695 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); 696 if (smode == DImode && !TARGET_64BIT) 697 { 698 emit_move_insn (adjust_address (tmp, SImode, 0), 699 gen_rtx_SUBREG (SImode, reg, 0)); 700 emit_move_insn (adjust_address (tmp, SImode, 4), 701 gen_rtx_SUBREG (SImode, reg, 4)); 702 } 703 else 704 emit_move_insn (copy_rtx (tmp), reg); 705 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), 706 gen_gpr_to_xmm_move_src (vmode, tmp))); 707 } 708 else if (!TARGET_64BIT && smode == DImode) 709 { 710 if (TARGET_SSE4_1) 711 { 712 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), 713 CONST0_RTX (V4SImode), 714 gen_rtx_SUBREG (SImode, reg, 0))); 715 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), 716 gen_rtx_SUBREG (V4SImode, vreg, 0), 717 gen_rtx_SUBREG (SImode, reg, 4), 718 GEN_INT (2))); 719 } 720 else 721 { 722 rtx tmp = gen_reg_rtx (DImode); 723 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), 724 CONST0_RTX (V4SImode), 725 gen_rtx_SUBREG (SImode, reg, 0))); 726 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), 727 CONST0_RTX (V4SImode), 728 gen_rtx_SUBREG (SImode, reg, 4))); 729 emit_insn (gen_vec_interleave_lowv4si 730 (gen_rtx_SUBREG (V4SImode, vreg, 0), 731 gen_rtx_SUBREG (V4SImode, vreg, 0), 732 gen_rtx_SUBREG (V4SImode, tmp, 0))); 733 } 734 } 735 else 736 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), 737 gen_gpr_to_xmm_move_src (vmode, reg))); 738 rtx_insn *seq = get_insns (); 739 end_sequence (); 740 emit_conversion_insns (seq, insn); 741 742 if (dump_file) 743 fprintf (dump_file, 744 " Copied r%d to a vector register r%d for insn %d\n", 745 REGNO (reg), REGNO (vreg), INSN_UID (insn)); 746} 747 748/* Copy the definition SRC of INSN inside the chain to DST for 749 scalar uses outside of the chain. */ 750 751void 752general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) 753{ 754 start_sequence (); 755 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) 756 { 757 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); 758 emit_move_insn (tmp, src); 759 if (!TARGET_64BIT && smode == DImode) 760 { 761 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), 762 adjust_address (tmp, SImode, 0)); 763 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), 764 adjust_address (tmp, SImode, 4)); 765 } 766 else 767 emit_move_insn (dst, copy_rtx (tmp)); 768 } 769 else if (!TARGET_64BIT && smode == DImode) 770 { 771 if (TARGET_SSE4_1) 772 { 773 rtx tmp = gen_rtx_PARALLEL (VOIDmode, 774 gen_rtvec (1, const0_rtx)); 775 emit_insn 776 (gen_rtx_SET 777 (gen_rtx_SUBREG (SImode, dst, 0), 778 gen_rtx_VEC_SELECT (SImode, 779 gen_rtx_SUBREG (V4SImode, src, 0), 780 tmp))); 781 782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); 783 emit_insn 784 (gen_rtx_SET 785 (gen_rtx_SUBREG (SImode, dst, 4), 786 gen_rtx_VEC_SELECT (SImode, 787 gen_rtx_SUBREG (V4SImode, src, 0), 788 tmp))); 789 } 790 else 791 { 792 rtx vcopy = gen_reg_rtx (V2DImode); 793 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0)); 794 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), 795 gen_rtx_SUBREG (SImode, vcopy, 0)); 796 emit_move_insn (vcopy, 797 gen_rtx_LSHIFTRT (V2DImode, 798 vcopy, GEN_INT (32))); 799 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), 800 gen_rtx_SUBREG (SImode, vcopy, 0)); 801 } 802 } 803 else 804 emit_move_insn (dst, src); 805 806 rtx_insn *seq = get_insns (); 807 end_sequence (); 808 emit_conversion_insns (seq, insn); 809 810 if (dump_file) 811 fprintf (dump_file, 812 " Copied r%d to a scalar register r%d for insn %d\n", 813 REGNO (src), REGNO (dst), INSN_UID (insn)); 814} 815 816/* Convert operand OP in INSN. We should handle 817 memory operands and uninitialized registers. 818 All other register uses are converted during 819 registers conversion. */ 820 821void 822general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) 823{ 824 *op = copy_rtx_if_shared (*op); 825 826 if (GET_CODE (*op) == NOT) 827 { 828 convert_op (&XEXP (*op, 0), insn); 829 PUT_MODE (*op, vmode); 830 } 831 else if (MEM_P (*op)) 832 { 833 rtx tmp = gen_reg_rtx (GET_MODE (*op)); 834 835 /* Handle movabs. */ 836 if (!memory_operand (*op, GET_MODE (*op))) 837 { 838 rtx tmp2 = gen_reg_rtx (GET_MODE (*op)); 839 840 emit_insn_before (gen_rtx_SET (tmp2, *op), insn); 841 *op = tmp2; 842 } 843 844 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0), 845 gen_gpr_to_xmm_move_src (vmode, *op)), 846 insn); 847 *op = gen_rtx_SUBREG (vmode, tmp, 0); 848 849 if (dump_file) 850 fprintf (dump_file, " Preloading operand for insn %d into r%d\n", 851 INSN_UID (insn), REGNO (tmp)); 852 } 853 else if (REG_P (*op)) 854 { 855 *op = gen_rtx_SUBREG (vmode, *op, 0); 856 } 857 else if (CONST_INT_P (*op)) 858 { 859 rtx vec_cst; 860 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0); 861 862 /* Prefer all ones vector in case of -1. */ 863 if (constm1_operand (*op, GET_MODE (*op))) 864 vec_cst = CONSTM1_RTX (vmode); 865 else 866 { 867 unsigned n = GET_MODE_NUNITS (vmode); 868 rtx *v = XALLOCAVEC (rtx, n); 869 v[0] = *op; 870 for (unsigned i = 1; i < n; ++i) 871 v[i] = const0_rtx; 872 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v)); 873 } 874 875 if (!standard_sse_constant_p (vec_cst, vmode)) 876 { 877 start_sequence (); 878 vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); 879 rtx_insn *seq = get_insns (); 880 end_sequence (); 881 emit_insn_before (seq, insn); 882 } 883 884 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); 885 *op = tmp; 886 } 887 else 888 { 889 gcc_assert (SUBREG_P (*op)); 890 gcc_assert (GET_MODE (*op) == vmode); 891 } 892} 893 894/* Convert INSN to vector mode. */ 895 896void 897general_scalar_chain::convert_insn (rtx_insn *insn) 898{ 899 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */ 900 for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref)) 901 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) 902 { 903 df_link *use; 904 for (use = DF_REF_CHAIN (ref); use; use = use->next) 905 if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref)) 906 && (DF_REF_REG_MEM_P (use->ref) 907 || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref)))) 908 break; 909 if (use) 910 convert_reg (insn, DF_REF_REG (ref), 911 *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)])); 912 else if (MAY_HAVE_DEBUG_BIND_INSNS) 913 { 914 /* If we generated a scalar copy we can leave debug-insns 915 as-is, if not, we have to adjust them. */ 916 auto_vec<rtx_insn *, 5> to_reset_debug_insns; 917 for (use = DF_REF_CHAIN (ref); use; use = use->next) 918 if (DEBUG_INSN_P (DF_REF_INSN (use->ref))) 919 { 920 rtx_insn *debug_insn = DF_REF_INSN (use->ref); 921 /* If there's a reaching definition outside of the 922 chain we have to reset. */ 923 df_link *def; 924 for (def = DF_REF_CHAIN (use->ref); def; def = def->next) 925 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref))) 926 break; 927 if (def) 928 to_reset_debug_insns.safe_push (debug_insn); 929 else 930 { 931 *DF_REF_REAL_LOC (use->ref) 932 = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]); 933 df_insn_rescan (debug_insn); 934 } 935 } 936 /* Have to do the reset outside of the DF_CHAIN walk to not 937 disrupt it. */ 938 while (!to_reset_debug_insns.is_empty ()) 939 { 940 rtx_insn *debug_insn = to_reset_debug_insns.pop (); 941 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC (); 942 df_insn_rescan_debug_internal (debug_insn); 943 } 944 } 945 } 946 947 /* Replace uses in this insn with the defs we use in the chain. */ 948 for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref)) 949 if (!DF_REF_REG_MEM_P (ref)) 950 if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)])) 951 { 952 /* Also update a corresponding REG_DEAD note. */ 953 rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref)); 954 if (note) 955 XEXP (note, 0) = *vreg; 956 *DF_REF_REAL_LOC (ref) = *vreg; 957 } 958 959 rtx def_set = single_set (insn); 960 rtx src = SET_SRC (def_set); 961 rtx dst = SET_DEST (def_set); 962 rtx subreg; 963 964 if (MEM_P (dst) && !REG_P (src)) 965 { 966 /* There are no scalar integer instructions and therefore 967 temporary register usage is required. */ 968 rtx tmp = gen_reg_rtx (smode); 969 emit_conversion_insns (gen_move_insn (dst, tmp), insn); 970 dst = gen_rtx_SUBREG (vmode, tmp, 0); 971 } 972 else if (REG_P (dst)) 973 { 974 /* Replace the definition with a SUBREG to the definition we 975 use inside the chain. */ 976 rtx *vdef = defs_map.get (dst); 977 if (vdef) 978 dst = *vdef; 979 dst = gen_rtx_SUBREG (vmode, dst, 0); 980 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST 981 is a non-REG_P. So kill those off. */ 982 rtx note = find_reg_equal_equiv_note (insn); 983 if (note) 984 remove_note (insn, note); 985 } 986 987 switch (GET_CODE (src)) 988 { 989 case ASHIFT: 990 case ASHIFTRT: 991 case LSHIFTRT: 992 convert_op (&XEXP (src, 0), insn); 993 PUT_MODE (src, vmode); 994 break; 995 996 case PLUS: 997 case MINUS: 998 case IOR: 999 case XOR: 1000 case AND: 1001 case SMAX: 1002 case SMIN: 1003 case UMAX: 1004 case UMIN: 1005 convert_op (&XEXP (src, 0), insn); 1006 convert_op (&XEXP (src, 1), insn); 1007 PUT_MODE (src, vmode); 1008 break; 1009 1010 case NEG: 1011 src = XEXP (src, 0); 1012 convert_op (&src, insn); 1013 subreg = gen_reg_rtx (vmode); 1014 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); 1015 src = gen_rtx_MINUS (vmode, subreg, src); 1016 break; 1017 1018 case NOT: 1019 src = XEXP (src, 0); 1020 convert_op (&src, insn); 1021 subreg = gen_reg_rtx (vmode); 1022 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn); 1023 src = gen_rtx_XOR (vmode, src, subreg); 1024 break; 1025 1026 case MEM: 1027 if (!REG_P (dst)) 1028 convert_op (&src, insn); 1029 break; 1030 1031 case REG: 1032 if (!MEM_P (dst)) 1033 convert_op (&src, insn); 1034 break; 1035 1036 case SUBREG: 1037 gcc_assert (GET_MODE (src) == vmode); 1038 break; 1039 1040 case COMPARE: 1041 src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); 1042 1043 gcc_assert (REG_P (src) && GET_MODE (src) == DImode); 1044 subreg = gen_rtx_SUBREG (V2DImode, src, 0); 1045 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), 1046 copy_rtx_if_shared (subreg), 1047 copy_rtx_if_shared (subreg)), 1048 insn); 1049 dst = gen_rtx_REG (CCmode, FLAGS_REG); 1050 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), 1051 copy_rtx_if_shared (subreg)), 1052 UNSPEC_PTEST); 1053 break; 1054 1055 case CONST_INT: 1056 convert_op (&src, insn); 1057 break; 1058 1059 default: 1060 gcc_unreachable (); 1061 } 1062 1063 SET_SRC (def_set) = src; 1064 SET_DEST (def_set) = dst; 1065 1066 /* Drop possible dead definitions. */ 1067 PATTERN (insn) = def_set; 1068 1069 INSN_CODE (insn) = -1; 1070 int patt = recog_memoized (insn); 1071 if (patt == -1) 1072 fatal_insn_not_found (insn); 1073 df_insn_rescan (insn); 1074} 1075 1076/* Fix uses of converted REG in debug insns. */ 1077 1078void 1079timode_scalar_chain::fix_debug_reg_uses (rtx reg) 1080{ 1081 if (!flag_var_tracking) 1082 return; 1083 1084 df_ref ref, next; 1085 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) 1086 { 1087 rtx_insn *insn = DF_REF_INSN (ref); 1088 /* Make sure the next ref is for a different instruction, 1089 so that we're not affected by the rescan. */ 1090 next = DF_REF_NEXT_REG (ref); 1091 while (next && DF_REF_INSN (next) == insn) 1092 next = DF_REF_NEXT_REG (next); 1093 1094 if (DEBUG_INSN_P (insn)) 1095 { 1096 /* It may be a debug insn with a TImode variable in 1097 register. */ 1098 bool changed = false; 1099 for (; ref != next; ref = DF_REF_NEXT_REG (ref)) 1100 { 1101 rtx *loc = DF_REF_LOC (ref); 1102 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) 1103 { 1104 *loc = gen_rtx_SUBREG (TImode, *loc, 0); 1105 changed = true; 1106 } 1107 } 1108 if (changed) 1109 df_insn_rescan (insn); 1110 } 1111 } 1112} 1113 1114/* Convert INSN from TImode to V1T1mode. */ 1115 1116void 1117timode_scalar_chain::convert_insn (rtx_insn *insn) 1118{ 1119 rtx def_set = single_set (insn); 1120 rtx src = SET_SRC (def_set); 1121 rtx dst = SET_DEST (def_set); 1122 1123 switch (GET_CODE (dst)) 1124 { 1125 case REG: 1126 { 1127 rtx tmp = find_reg_equal_equiv_note (insn); 1128 if (tmp) 1129 PUT_MODE (XEXP (tmp, 0), V1TImode); 1130 PUT_MODE (dst, V1TImode); 1131 fix_debug_reg_uses (dst); 1132 } 1133 break; 1134 case MEM: 1135 PUT_MODE (dst, V1TImode); 1136 break; 1137 1138 default: 1139 gcc_unreachable (); 1140 } 1141 1142 switch (GET_CODE (src)) 1143 { 1144 case REG: 1145 PUT_MODE (src, V1TImode); 1146 /* Call fix_debug_reg_uses only if SRC is never defined. */ 1147 if (!DF_REG_DEF_CHAIN (REGNO (src))) 1148 fix_debug_reg_uses (src); 1149 break; 1150 1151 case MEM: 1152 PUT_MODE (src, V1TImode); 1153 break; 1154 1155 case CONST_WIDE_INT: 1156 if (NONDEBUG_INSN_P (insn)) 1157 { 1158 /* Since there are no instructions to store 128-bit constant, 1159 temporary register usage is required. */ 1160 rtx tmp = gen_reg_rtx (V1TImode); 1161 start_sequence (); 1162 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); 1163 src = validize_mem (force_const_mem (V1TImode, src)); 1164 rtx_insn *seq = get_insns (); 1165 end_sequence (); 1166 if (seq) 1167 emit_insn_before (seq, insn); 1168 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); 1169 dst = tmp; 1170 } 1171 break; 1172 1173 case CONST_INT: 1174 switch (standard_sse_constant_p (src, TImode)) 1175 { 1176 case 1: 1177 src = CONST0_RTX (GET_MODE (dst)); 1178 break; 1179 case 2: 1180 src = CONSTM1_RTX (GET_MODE (dst)); 1181 break; 1182 default: 1183 gcc_unreachable (); 1184 } 1185 if (NONDEBUG_INSN_P (insn)) 1186 { 1187 rtx tmp = gen_reg_rtx (V1TImode); 1188 /* Since there are no instructions to store standard SSE 1189 constant, temporary register usage is required. */ 1190 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); 1191 dst = tmp; 1192 } 1193 break; 1194 1195 default: 1196 gcc_unreachable (); 1197 } 1198 1199 SET_SRC (def_set) = src; 1200 SET_DEST (def_set) = dst; 1201 1202 /* Drop possible dead definitions. */ 1203 PATTERN (insn) = def_set; 1204 1205 INSN_CODE (insn) = -1; 1206 recog_memoized (insn); 1207 df_insn_rescan (insn); 1208} 1209 1210/* Generate copies from defs used by the chain but not defined therein. 1211 Also populates defs_map which is used later by convert_insn. */ 1212 1213void 1214general_scalar_chain::convert_registers () 1215{ 1216 bitmap_iterator bi; 1217 unsigned id; 1218 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) 1219 { 1220 rtx chain_reg = gen_reg_rtx (smode); 1221 defs_map.put (regno_reg_rtx[id], chain_reg); 1222 } 1223 EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi) 1224 for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref)) 1225 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) 1226 make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref)); 1227} 1228 1229/* Convert whole chain creating required register 1230 conversions and copies. */ 1231 1232int 1233scalar_chain::convert () 1234{ 1235 bitmap_iterator bi; 1236 unsigned id; 1237 int converted_insns = 0; 1238 1239 if (!dbg_cnt (stv_conversion)) 1240 return 0; 1241 1242 if (dump_file) 1243 fprintf (dump_file, "Converting chain #%d...\n", chain_id); 1244 1245 convert_registers (); 1246 1247 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) 1248 { 1249 convert_insn (DF_INSN_UID_GET (id)->insn); 1250 converted_insns++; 1251 } 1252 1253 return converted_insns; 1254} 1255 1256/* Return 1 if INSN uses or defines a hard register. 1257 Hard register uses in a memory address are ignored. 1258 Clobbers and flags definitions are ignored. */ 1259 1260static bool 1261has_non_address_hard_reg (rtx_insn *insn) 1262{ 1263 df_ref ref; 1264 FOR_EACH_INSN_DEF (ref, insn) 1265 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) 1266 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) 1267 && DF_REF_REGNO (ref) != FLAGS_REG) 1268 return true; 1269 1270 FOR_EACH_INSN_USE (ref, insn) 1271 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) 1272 return true; 1273 1274 return false; 1275} 1276 1277/* Check if comparison INSN may be transformed 1278 into vector comparison. Currently we transform 1279 zero checks only which look like: 1280 1281 (set (reg:CCZ 17 flags) 1282 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) 1283 (subreg:SI (reg:DI x) 0)) 1284 (const_int 0 [0]))) */ 1285 1286static bool 1287convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) 1288{ 1289 /* ??? Currently convertible for double-word DImode chain only. */ 1290 if (TARGET_64BIT || mode != DImode) 1291 return false; 1292 1293 if (!TARGET_SSE4_1) 1294 return false; 1295 1296 rtx def_set = single_set (insn); 1297 1298 gcc_assert (def_set); 1299 1300 rtx src = SET_SRC (def_set); 1301 rtx dst = SET_DEST (def_set); 1302 1303 gcc_assert (GET_CODE (src) == COMPARE); 1304 1305 if (GET_CODE (dst) != REG 1306 || REGNO (dst) != FLAGS_REG 1307 || GET_MODE (dst) != CCZmode) 1308 return false; 1309 1310 rtx op1 = XEXP (src, 0); 1311 rtx op2 = XEXP (src, 1); 1312 1313 if (op2 != CONST0_RTX (GET_MODE (op2))) 1314 return false; 1315 1316 if (GET_CODE (op1) != IOR) 1317 return false; 1318 1319 op2 = XEXP (op1, 1); 1320 op1 = XEXP (op1, 0); 1321 1322 if (!SUBREG_P (op1) 1323 || !SUBREG_P (op2) 1324 || GET_MODE (op1) != SImode 1325 || GET_MODE (op2) != SImode 1326 || ((SUBREG_BYTE (op1) != 0 1327 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) 1328 && (SUBREG_BYTE (op2) != 0 1329 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) 1330 return false; 1331 1332 op1 = SUBREG_REG (op1); 1333 op2 = SUBREG_REG (op2); 1334 1335 if (op1 != op2 1336 || !REG_P (op1) 1337 || GET_MODE (op1) != DImode) 1338 return false; 1339 1340 return true; 1341} 1342 1343/* The general version of scalar_to_vector_candidate_p. */ 1344 1345static bool 1346general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) 1347{ 1348 rtx def_set = single_set (insn); 1349 1350 if (!def_set) 1351 return false; 1352 1353 if (has_non_address_hard_reg (insn)) 1354 return false; 1355 1356 rtx src = SET_SRC (def_set); 1357 rtx dst = SET_DEST (def_set); 1358 1359 if (GET_CODE (src) == COMPARE) 1360 return convertible_comparison_p (insn, mode); 1361 1362 /* We are interested in "mode" only. */ 1363 if ((GET_MODE (src) != mode 1364 && !CONST_INT_P (src)) 1365 || GET_MODE (dst) != mode) 1366 return false; 1367 1368 if (!REG_P (dst) && !MEM_P (dst)) 1369 return false; 1370 1371 switch (GET_CODE (src)) 1372 { 1373 case ASHIFTRT: 1374 if (!TARGET_AVX512VL) 1375 return false; 1376 /* FALLTHRU */ 1377 1378 case ASHIFT: 1379 case LSHIFTRT: 1380 if (!CONST_INT_P (XEXP (src, 1)) 1381 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1)) 1382 return false; 1383 break; 1384 1385 case SMAX: 1386 case SMIN: 1387 case UMAX: 1388 case UMIN: 1389 if ((mode == DImode && !TARGET_AVX512VL) 1390 || (mode == SImode && !TARGET_SSE4_1)) 1391 return false; 1392 /* Fallthru. */ 1393 1394 case PLUS: 1395 case MINUS: 1396 case IOR: 1397 case XOR: 1398 case AND: 1399 if (!REG_P (XEXP (src, 1)) 1400 && !MEM_P (XEXP (src, 1)) 1401 && !CONST_INT_P (XEXP (src, 1))) 1402 return false; 1403 1404 if (GET_MODE (XEXP (src, 1)) != mode 1405 && !CONST_INT_P (XEXP (src, 1))) 1406 return false; 1407 break; 1408 1409 case NEG: 1410 case NOT: 1411 break; 1412 1413 case REG: 1414 return true; 1415 1416 case MEM: 1417 case CONST_INT: 1418 return REG_P (dst); 1419 1420 default: 1421 return false; 1422 } 1423 1424 if (!REG_P (XEXP (src, 0)) 1425 && !MEM_P (XEXP (src, 0)) 1426 && !CONST_INT_P (XEXP (src, 0)) 1427 /* Check for andnot case. */ 1428 && (GET_CODE (src) != AND 1429 || GET_CODE (XEXP (src, 0)) != NOT 1430 || !REG_P (XEXP (XEXP (src, 0), 0)))) 1431 return false; 1432 1433 if (GET_MODE (XEXP (src, 0)) != mode 1434 && !CONST_INT_P (XEXP (src, 0))) 1435 return false; 1436 1437 return true; 1438} 1439 1440/* The TImode version of scalar_to_vector_candidate_p. */ 1441 1442static bool 1443timode_scalar_to_vector_candidate_p (rtx_insn *insn) 1444{ 1445 rtx def_set = single_set (insn); 1446 1447 if (!def_set) 1448 return false; 1449 1450 if (has_non_address_hard_reg (insn)) 1451 return false; 1452 1453 rtx src = SET_SRC (def_set); 1454 rtx dst = SET_DEST (def_set); 1455 1456 /* Only TImode load and store are allowed. */ 1457 if (GET_MODE (dst) != TImode) 1458 return false; 1459 1460 if (MEM_P (dst)) 1461 { 1462 /* Check for store. Memory must be aligned or unaligned store 1463 is optimal. Only support store from register, standard SSE 1464 constant or CONST_WIDE_INT generated from piecewise store. 1465 1466 ??? Verify performance impact before enabling CONST_INT for 1467 __int128 store. */ 1468 if (misaligned_operand (dst, TImode) 1469 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) 1470 return false; 1471 1472 switch (GET_CODE (src)) 1473 { 1474 default: 1475 return false; 1476 1477 case REG: 1478 case CONST_WIDE_INT: 1479 return true; 1480 1481 case CONST_INT: 1482 return standard_sse_constant_p (src, TImode); 1483 } 1484 } 1485 else if (MEM_P (src)) 1486 { 1487 /* Check for load. Memory must be aligned or unaligned load is 1488 optimal. */ 1489 return (REG_P (dst) 1490 && (!misaligned_operand (src, TImode) 1491 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); 1492 } 1493 1494 return false; 1495} 1496 1497/* For a register REGNO, scan instructions for its defs and uses. 1498 Put REGNO in REGS if a def or use isn't in CANDIDATES. */ 1499 1500static void 1501timode_check_non_convertible_regs (bitmap candidates, bitmap regs, 1502 unsigned int regno) 1503{ 1504 for (df_ref def = DF_REG_DEF_CHAIN (regno); 1505 def; 1506 def = DF_REF_NEXT_REG (def)) 1507 { 1508 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) 1509 { 1510 if (dump_file) 1511 fprintf (dump_file, 1512 "r%d has non convertible def in insn %d\n", 1513 regno, DF_REF_INSN_UID (def)); 1514 1515 bitmap_set_bit (regs, regno); 1516 break; 1517 } 1518 } 1519 1520 for (df_ref ref = DF_REG_USE_CHAIN (regno); 1521 ref; 1522 ref = DF_REF_NEXT_REG (ref)) 1523 { 1524 /* Debug instructions are skipped. */ 1525 if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) 1526 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) 1527 { 1528 if (dump_file) 1529 fprintf (dump_file, 1530 "r%d has non convertible use in insn %d\n", 1531 regno, DF_REF_INSN_UID (ref)); 1532 1533 bitmap_set_bit (regs, regno); 1534 break; 1535 } 1536 } 1537} 1538 1539/* The TImode version of remove_non_convertible_regs. */ 1540 1541static void 1542timode_remove_non_convertible_regs (bitmap candidates) 1543{ 1544 bitmap_iterator bi; 1545 unsigned id; 1546 bitmap regs = BITMAP_ALLOC (NULL); 1547 1548 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) 1549 { 1550 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); 1551 rtx dest = SET_DEST (def_set); 1552 rtx src = SET_SRC (def_set); 1553 1554 if ((!REG_P (dest) 1555 || bitmap_bit_p (regs, REGNO (dest)) 1556 || HARD_REGISTER_P (dest)) 1557 && (!REG_P (src) 1558 || bitmap_bit_p (regs, REGNO (src)) 1559 || HARD_REGISTER_P (src))) 1560 continue; 1561 1562 if (REG_P (dest)) 1563 timode_check_non_convertible_regs (candidates, regs, 1564 REGNO (dest)); 1565 1566 if (REG_P (src)) 1567 timode_check_non_convertible_regs (candidates, regs, 1568 REGNO (src)); 1569 } 1570 1571 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) 1572 { 1573 for (df_ref def = DF_REG_DEF_CHAIN (id); 1574 def; 1575 def = DF_REF_NEXT_REG (def)) 1576 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) 1577 { 1578 if (dump_file) 1579 fprintf (dump_file, "Removing insn %d from candidates list\n", 1580 DF_REF_INSN_UID (def)); 1581 1582 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); 1583 } 1584 1585 for (df_ref ref = DF_REG_USE_CHAIN (id); 1586 ref; 1587 ref = DF_REF_NEXT_REG (ref)) 1588 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) 1589 { 1590 if (dump_file) 1591 fprintf (dump_file, "Removing insn %d from candidates list\n", 1592 DF_REF_INSN_UID (ref)); 1593 1594 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); 1595 } 1596 } 1597 1598 BITMAP_FREE (regs); 1599} 1600 1601/* Main STV pass function. Find and convert scalar 1602 instructions into vector mode when profitable. */ 1603 1604static unsigned int 1605convert_scalars_to_vector (bool timode_p) 1606{ 1607 basic_block bb; 1608 int converted_insns = 0; 1609 1610 bitmap_obstack_initialize (NULL); 1611 const machine_mode cand_mode[3] = { SImode, DImode, TImode }; 1612 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode }; 1613 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */ 1614 for (unsigned i = 0; i < 3; ++i) 1615 bitmap_initialize (&candidates[i], &bitmap_default_obstack); 1616 1617 calculate_dominance_info (CDI_DOMINATORS); 1618 df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS); 1619 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); 1620 df_analyze (); 1621 1622 /* Find all instructions we want to convert into vector mode. */ 1623 if (dump_file) 1624 fprintf (dump_file, "Searching for mode conversion candidates...\n"); 1625 1626 FOR_EACH_BB_FN (bb, cfun) 1627 { 1628 rtx_insn *insn; 1629 FOR_BB_INSNS (bb, insn) 1630 if (timode_p 1631 && timode_scalar_to_vector_candidate_p (insn)) 1632 { 1633 if (dump_file) 1634 fprintf (dump_file, " insn %d is marked as a TImode candidate\n", 1635 INSN_UID (insn)); 1636 1637 bitmap_set_bit (&candidates[2], INSN_UID (insn)); 1638 } 1639 else if (!timode_p) 1640 { 1641 /* Check {SI,DI}mode. */ 1642 for (unsigned i = 0; i <= 1; ++i) 1643 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i])) 1644 { 1645 if (dump_file) 1646 fprintf (dump_file, " insn %d is marked as a %s candidate\n", 1647 INSN_UID (insn), i == 0 ? "SImode" : "DImode"); 1648 1649 bitmap_set_bit (&candidates[i], INSN_UID (insn)); 1650 break; 1651 } 1652 } 1653 } 1654 1655 if (timode_p) 1656 timode_remove_non_convertible_regs (&candidates[2]); 1657 1658 for (unsigned i = 0; i <= 2; ++i) 1659 if (!bitmap_empty_p (&candidates[i])) 1660 break; 1661 else if (i == 2 && dump_file) 1662 fprintf (dump_file, "There are no candidates for optimization.\n"); 1663 1664 for (unsigned i = 0; i <= 2; ++i) 1665 while (!bitmap_empty_p (&candidates[i])) 1666 { 1667 unsigned uid = bitmap_first_set_bit (&candidates[i]); 1668 scalar_chain *chain; 1669 1670 if (cand_mode[i] == TImode) 1671 chain = new timode_scalar_chain; 1672 else 1673 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]); 1674 1675 /* Find instructions chain we want to convert to vector mode. 1676 Check all uses and definitions to estimate all required 1677 conversions. */ 1678 chain->build (&candidates[i], uid); 1679 1680 if (chain->compute_convert_gain () > 0) 1681 converted_insns += chain->convert (); 1682 else 1683 if (dump_file) 1684 fprintf (dump_file, "Chain #%d conversion is not profitable\n", 1685 chain->chain_id); 1686 1687 delete chain; 1688 } 1689 1690 if (dump_file) 1691 fprintf (dump_file, "Total insns converted: %d\n", converted_insns); 1692 1693 for (unsigned i = 0; i <= 2; ++i) 1694 bitmap_release (&candidates[i]); 1695 bitmap_obstack_release (NULL); 1696 df_process_deferred_rescans (); 1697 1698 /* Conversion means we may have 128bit register spills/fills 1699 which require aligned stack. */ 1700 if (converted_insns) 1701 { 1702 if (crtl->stack_alignment_needed < 128) 1703 crtl->stack_alignment_needed = 128; 1704 if (crtl->stack_alignment_estimated < 128) 1705 crtl->stack_alignment_estimated = 128; 1706 1707 crtl->stack_realign_needed 1708 = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated; 1709 crtl->stack_realign_tried = crtl->stack_realign_needed; 1710 1711 crtl->stack_realign_processed = true; 1712 1713 if (!crtl->drap_reg) 1714 { 1715 rtx drap_rtx = targetm.calls.get_drap_rtx (); 1716 1717 /* stack_realign_drap and drap_rtx must match. */ 1718 gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL)); 1719 1720 /* Do nothing if NULL is returned, 1721 which means DRAP is not needed. */ 1722 if (drap_rtx != NULL) 1723 { 1724 crtl->args.internal_arg_pointer = drap_rtx; 1725 1726 /* Call fixup_tail_calls to clean up 1727 REG_EQUIV note if DRAP is needed. */ 1728 fixup_tail_calls (); 1729 } 1730 } 1731 1732 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ 1733 if (TARGET_64BIT) 1734 for (tree parm = DECL_ARGUMENTS (current_function_decl); 1735 parm; parm = DECL_CHAIN (parm)) 1736 { 1737 if (TYPE_MODE (TREE_TYPE (parm)) != TImode) 1738 continue; 1739 if (DECL_RTL_SET_P (parm) 1740 && GET_MODE (DECL_RTL (parm)) == V1TImode) 1741 { 1742 rtx r = DECL_RTL (parm); 1743 if (REG_P (r)) 1744 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); 1745 } 1746 if (DECL_INCOMING_RTL (parm) 1747 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) 1748 { 1749 rtx r = DECL_INCOMING_RTL (parm); 1750 if (REG_P (r)) 1751 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); 1752 } 1753 } 1754 } 1755 1756 return 0; 1757} 1758 1759/* Modify the vzeroupper pattern in INSN so that it describes the effect 1760 that the instruction has on the SSE registers. LIVE_REGS are the set 1761 of registers that are live across the instruction. 1762 1763 For a live register R we use: 1764 1765 (set (reg:V2DF R) (reg:V2DF R)) 1766 1767 which preserves the low 128 bits but clobbers the upper bits. */ 1768 1769static void 1770ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs) 1771{ 1772 rtx pattern = PATTERN (insn); 1773 unsigned int nregs = TARGET_64BIT ? 16 : 8; 1774 unsigned int npats = nregs; 1775 for (unsigned int i = 0; i < nregs; ++i) 1776 { 1777 unsigned int regno = GET_SSE_REGNO (i); 1778 if (!bitmap_bit_p (live_regs, regno)) 1779 npats--; 1780 } 1781 if (npats == 0) 1782 return; 1783 rtvec vec = rtvec_alloc (npats + 1); 1784 RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0); 1785 for (unsigned int i = 0, j = 0; i < nregs; ++i) 1786 { 1787 unsigned int regno = GET_SSE_REGNO (i); 1788 if (!bitmap_bit_p (live_regs, regno)) 1789 continue; 1790 rtx reg = gen_rtx_REG (V2DImode, regno); 1791 ++j; 1792 RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg); 1793 } 1794 XVEC (pattern, 0) = vec; 1795 INSN_CODE (insn) = -1; 1796 df_insn_rescan (insn); 1797} 1798 1799/* Walk the vzeroupper instructions in the function and annotate them 1800 with the effect that they have on the SSE registers. */ 1801 1802static void 1803ix86_add_reg_usage_to_vzerouppers (void) 1804{ 1805 basic_block bb; 1806 rtx_insn *insn; 1807 auto_bitmap live_regs; 1808 1809 df_analyze (); 1810 FOR_EACH_BB_FN (bb, cfun) 1811 { 1812 bitmap_copy (live_regs, df_get_live_out (bb)); 1813 df_simulate_initialize_backwards (bb, live_regs); 1814 FOR_BB_INSNS_REVERSE (bb, insn) 1815 { 1816 if (!NONDEBUG_INSN_P (insn)) 1817 continue; 1818 if (vzeroupper_pattern (PATTERN (insn), VOIDmode)) 1819 ix86_add_reg_usage_to_vzeroupper (insn, live_regs); 1820 df_simulate_one_insn_backwards (bb, insn, live_regs); 1821 } 1822 } 1823} 1824 1825static unsigned int 1826rest_of_handle_insert_vzeroupper (void) 1827{ 1828 if (TARGET_VZEROUPPER) 1829 { 1830 /* vzeroupper instructions are inserted immediately after reload to 1831 account for possible spills from 256bit or 512bit registers. The pass 1832 reuses mode switching infrastructure by re-running mode insertion 1833 pass, so disable entities that have already been processed. */ 1834 for (int i = 0; i < MAX_386_ENTITIES; i++) 1835 ix86_optimize_mode_switching[i] = 0; 1836 1837 ix86_optimize_mode_switching[AVX_U128] = 1; 1838 1839 /* Call optimize_mode_switching. */ 1840 g->get_passes ()->execute_pass_mode_switching (); 1841 } 1842 ix86_add_reg_usage_to_vzerouppers (); 1843 return 0; 1844} 1845 1846namespace { 1847 1848const pass_data pass_data_insert_vzeroupper = 1849{ 1850 RTL_PASS, /* type */ 1851 "vzeroupper", /* name */ 1852 OPTGROUP_NONE, /* optinfo_flags */ 1853 TV_MACH_DEP, /* tv_id */ 1854 0, /* properties_required */ 1855 0, /* properties_provided */ 1856 0, /* properties_destroyed */ 1857 0, /* todo_flags_start */ 1858 TODO_df_finish, /* todo_flags_finish */ 1859}; 1860 1861class pass_insert_vzeroupper : public rtl_opt_pass 1862{ 1863public: 1864 pass_insert_vzeroupper(gcc::context *ctxt) 1865 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) 1866 {} 1867 1868 /* opt_pass methods: */ 1869 virtual bool gate (function *) 1870 { 1871 return TARGET_AVX 1872 && (TARGET_VZEROUPPER 1873 || cfun->machine->has_explicit_vzeroupper); 1874 } 1875 1876 virtual unsigned int execute (function *) 1877 { 1878 return rest_of_handle_insert_vzeroupper (); 1879 } 1880 1881}; // class pass_insert_vzeroupper 1882 1883const pass_data pass_data_stv = 1884{ 1885 RTL_PASS, /* type */ 1886 "stv", /* name */ 1887 OPTGROUP_NONE, /* optinfo_flags */ 1888 TV_MACH_DEP, /* tv_id */ 1889 0, /* properties_required */ 1890 0, /* properties_provided */ 1891 0, /* properties_destroyed */ 1892 0, /* todo_flags_start */ 1893 TODO_df_finish, /* todo_flags_finish */ 1894}; 1895 1896class pass_stv : public rtl_opt_pass 1897{ 1898public: 1899 pass_stv (gcc::context *ctxt) 1900 : rtl_opt_pass (pass_data_stv, ctxt), 1901 timode_p (false) 1902 {} 1903 1904 /* opt_pass methods: */ 1905 virtual bool gate (function *) 1906 { 1907 return ((!timode_p || TARGET_64BIT) 1908 && TARGET_STV && TARGET_SSE2 && optimize > 1); 1909 } 1910 1911 virtual unsigned int execute (function *) 1912 { 1913 return convert_scalars_to_vector (timode_p); 1914 } 1915 1916 opt_pass *clone () 1917 { 1918 return new pass_stv (m_ctxt); 1919 } 1920 1921 void set_pass_param (unsigned int n, bool param) 1922 { 1923 gcc_assert (n == 0); 1924 timode_p = param; 1925 } 1926 1927private: 1928 bool timode_p; 1929}; // class pass_stv 1930 1931} // anon namespace 1932 1933rtl_opt_pass * 1934make_pass_insert_vzeroupper (gcc::context *ctxt) 1935{ 1936 return new pass_insert_vzeroupper (ctxt); 1937} 1938 1939rtl_opt_pass * 1940make_pass_stv (gcc::context *ctxt) 1941{ 1942 return new pass_stv (ctxt); 1943} 1944 1945/* Inserting ENDBRANCH instructions. */ 1946 1947static unsigned int 1948rest_of_insert_endbranch (void) 1949{ 1950 timevar_push (TV_MACH_DEP); 1951 1952 rtx cet_eb; 1953 rtx_insn *insn; 1954 basic_block bb; 1955 1956 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is 1957 absent among function attributes. Later an optimization will be 1958 introduced to make analysis if an address of a static function is 1959 taken. A static function whose address is not taken will get a 1960 nocf_check attribute. This will allow to reduce the number of EB. */ 1961 1962 if (!lookup_attribute ("nocf_check", 1963 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) 1964 && (!flag_manual_endbr 1965 || lookup_attribute ("cf_check", 1966 DECL_ATTRIBUTES (cfun->decl))) 1967 && (!cgraph_node::get (cfun->decl)->only_called_directly_p () 1968 || ix86_cmodel == CM_LARGE 1969 || ix86_cmodel == CM_LARGE_PIC 1970 || flag_force_indirect_call 1971 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES 1972 && DECL_DLLIMPORT_P (cfun->decl)))) 1973 { 1974 /* Queue ENDBR insertion to x86_function_profiler. */ 1975 if (crtl->profile && flag_fentry) 1976 cfun->machine->endbr_queued_at_entrance = true; 1977 else 1978 { 1979 cet_eb = gen_nop_endbr (); 1980 1981 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; 1982 insn = BB_HEAD (bb); 1983 emit_insn_before (cet_eb, insn); 1984 } 1985 } 1986 1987 bb = 0; 1988 FOR_EACH_BB_FN (bb, cfun) 1989 { 1990 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); 1991 insn = NEXT_INSN (insn)) 1992 { 1993 if (CALL_P (insn)) 1994 { 1995 bool need_endbr; 1996 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; 1997 if (!need_endbr && !SIBLING_CALL_P (insn)) 1998 { 1999 rtx call = get_call_rtx_from (insn); 2000 rtx fnaddr = XEXP (call, 0); 2001 tree fndecl = NULL_TREE; 2002 2003 /* Also generate ENDBRANCH for non-tail call which 2004 may return via indirect branch. */ 2005 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 2006 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); 2007 if (fndecl == NULL_TREE) 2008 fndecl = MEM_EXPR (fnaddr); 2009 if (fndecl 2010 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE 2011 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) 2012 fndecl = NULL_TREE; 2013 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) 2014 { 2015 tree fntype = TREE_TYPE (fndecl); 2016 if (lookup_attribute ("indirect_return", 2017 TYPE_ATTRIBUTES (fntype))) 2018 need_endbr = true; 2019 } 2020 } 2021 if (!need_endbr) 2022 continue; 2023 /* Generate ENDBRANCH after CALL, which can return more than 2024 twice, setjmp-like functions. */ 2025 2026 cet_eb = gen_nop_endbr (); 2027 emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); 2028 continue; 2029 } 2030 2031 if (JUMP_P (insn) && flag_cet_switch) 2032 { 2033 rtx target = JUMP_LABEL (insn); 2034 if (target == NULL_RTX || ANY_RETURN_P (target)) 2035 continue; 2036 2037 /* Check the jump is a switch table. */ 2038 rtx_insn *label = as_a<rtx_insn *> (target); 2039 rtx_insn *table = next_insn (label); 2040 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) 2041 continue; 2042 2043 /* For the indirect jump find out all places it jumps and insert 2044 ENDBRANCH there. It should be done under a special flag to 2045 control ENDBRANCH generation for switch stmts. */ 2046 edge_iterator ei; 2047 edge e; 2048 basic_block dest_blk; 2049 2050 FOR_EACH_EDGE (e, ei, bb->succs) 2051 { 2052 rtx_insn *insn; 2053 2054 dest_blk = e->dest; 2055 insn = BB_HEAD (dest_blk); 2056 gcc_assert (LABEL_P (insn)); 2057 cet_eb = gen_nop_endbr (); 2058 emit_insn_after (cet_eb, insn); 2059 } 2060 continue; 2061 } 2062 2063 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)) 2064 { 2065 cet_eb = gen_nop_endbr (); 2066 emit_insn_after (cet_eb, insn); 2067 continue; 2068 } 2069 } 2070 } 2071 2072 timevar_pop (TV_MACH_DEP); 2073 return 0; 2074} 2075 2076namespace { 2077 2078const pass_data pass_data_insert_endbranch = 2079{ 2080 RTL_PASS, /* type. */ 2081 "cet", /* name. */ 2082 OPTGROUP_NONE, /* optinfo_flags. */ 2083 TV_MACH_DEP, /* tv_id. */ 2084 0, /* properties_required. */ 2085 0, /* properties_provided. */ 2086 0, /* properties_destroyed. */ 2087 0, /* todo_flags_start. */ 2088 0, /* todo_flags_finish. */ 2089}; 2090 2091class pass_insert_endbranch : public rtl_opt_pass 2092{ 2093public: 2094 pass_insert_endbranch (gcc::context *ctxt) 2095 : rtl_opt_pass (pass_data_insert_endbranch, ctxt) 2096 {} 2097 2098 /* opt_pass methods: */ 2099 virtual bool gate (function *) 2100 { 2101 return ((flag_cf_protection & CF_BRANCH)); 2102 } 2103 2104 virtual unsigned int execute (function *) 2105 { 2106 return rest_of_insert_endbranch (); 2107 } 2108 2109}; // class pass_insert_endbranch 2110 2111} // anon namespace 2112 2113rtl_opt_pass * 2114make_pass_insert_endbranch (gcc::context *ctxt) 2115{ 2116 return new pass_insert_endbranch (ctxt); 2117} 2118 2119/* At entry of the nearest common dominator for basic blocks with 2120 conversions, generate a single 2121 vxorps %xmmN, %xmmN, %xmmN 2122 for all 2123 vcvtss2sd op, %xmmN, %xmmX 2124 vcvtsd2ss op, %xmmN, %xmmX 2125 vcvtsi2ss op, %xmmN, %xmmX 2126 vcvtsi2sd op, %xmmN, %xmmX 2127 2128 NB: We want to generate only a single vxorps to cover the whole 2129 function. The LCM algorithm isn't appropriate here since it may 2130 place a vxorps inside the loop. */ 2131 2132static unsigned int 2133remove_partial_avx_dependency (void) 2134{ 2135 timevar_push (TV_MACH_DEP); 2136 2137 bitmap_obstack_initialize (NULL); 2138 bitmap convert_bbs = BITMAP_ALLOC (NULL); 2139 2140 basic_block bb; 2141 rtx_insn *insn, *set_insn; 2142 rtx set; 2143 rtx v4sf_const0 = NULL_RTX; 2144 2145 auto_vec<rtx_insn *> control_flow_insns; 2146 2147 /* We create invalid RTL initially so defer rescans. */ 2148 df_set_flags (DF_DEFER_INSN_RESCAN); 2149 2150 FOR_EACH_BB_FN (bb, cfun) 2151 { 2152 FOR_BB_INSNS (bb, insn) 2153 { 2154 if (!NONDEBUG_INSN_P (insn)) 2155 continue; 2156 2157 set = single_set (insn); 2158 if (!set) 2159 continue; 2160 2161 if (get_attr_avx_partial_xmm_update (insn) 2162 != AVX_PARTIAL_XMM_UPDATE_TRUE) 2163 continue; 2164 2165 if (!v4sf_const0) 2166 v4sf_const0 = gen_reg_rtx (V4SFmode); 2167 2168 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, 2169 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and 2170 vec_merge with subreg. */ 2171 rtx src = SET_SRC (set); 2172 rtx dest = SET_DEST (set); 2173 machine_mode dest_mode = GET_MODE (dest); 2174 2175 rtx zero; 2176 machine_mode dest_vecmode; 2177 if (dest_mode == E_SFmode) 2178 { 2179 dest_vecmode = V4SFmode; 2180 zero = v4sf_const0; 2181 } 2182 else 2183 { 2184 dest_vecmode = V2DFmode; 2185 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); 2186 } 2187 2188 /* Change source to vector mode. */ 2189 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); 2190 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, 2191 GEN_INT (HOST_WIDE_INT_1U)); 2192 /* Change destination to vector mode. */ 2193 rtx vec = gen_reg_rtx (dest_vecmode); 2194 /* Generate an XMM vector SET. */ 2195 set = gen_rtx_SET (vec, src); 2196 set_insn = emit_insn_before (set, insn); 2197 df_insn_rescan (set_insn); 2198 2199 if (cfun->can_throw_non_call_exceptions) 2200 { 2201 /* Handle REG_EH_REGION note. */ 2202 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); 2203 if (note) 2204 { 2205 control_flow_insns.safe_push (set_insn); 2206 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); 2207 } 2208 } 2209 2210 src = gen_rtx_SUBREG (dest_mode, vec, 0); 2211 set = gen_rtx_SET (dest, src); 2212 2213 /* Drop possible dead definitions. */ 2214 PATTERN (insn) = set; 2215 2216 INSN_CODE (insn) = -1; 2217 recog_memoized (insn); 2218 df_insn_rescan (insn); 2219 bitmap_set_bit (convert_bbs, bb->index); 2220 } 2221 } 2222 2223 if (v4sf_const0) 2224 { 2225 /* (Re-)discover loops so that bb->loop_father can be used in the 2226 analysis below. */ 2227 calculate_dominance_info (CDI_DOMINATORS); 2228 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); 2229 2230 /* Generate a vxorps at entry of the nearest dominator for basic 2231 blocks with conversions, which is in the fake loop that 2232 contains the whole function, so that there is only a single 2233 vxorps in the whole function. */ 2234 bb = nearest_common_dominator_for_set (CDI_DOMINATORS, 2235 convert_bbs); 2236 while (bb->loop_father->latch 2237 != EXIT_BLOCK_PTR_FOR_FN (cfun)) 2238 bb = get_immediate_dominator (CDI_DOMINATORS, 2239 bb->loop_father->header); 2240 2241 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); 2242 2243 insn = BB_HEAD (bb); 2244 while (insn && !NONDEBUG_INSN_P (insn)) 2245 { 2246 if (insn == BB_END (bb)) 2247 { 2248 insn = NULL; 2249 break; 2250 } 2251 insn = NEXT_INSN (insn); 2252 } 2253 if (insn == BB_HEAD (bb)) 2254 set_insn = emit_insn_before (set, insn); 2255 else 2256 set_insn = emit_insn_after (set, 2257 insn ? PREV_INSN (insn) : BB_END (bb)); 2258 df_insn_rescan (set_insn); 2259 loop_optimizer_finalize (); 2260 2261 if (!control_flow_insns.is_empty ()) 2262 { 2263 free_dominance_info (CDI_DOMINATORS); 2264 2265 unsigned int i; 2266 FOR_EACH_VEC_ELT (control_flow_insns, i, insn) 2267 if (control_flow_insn_p (insn)) 2268 { 2269 /* Split the block after insn. There will be a fallthru 2270 edge, which is OK so we keep it. We have to create 2271 the exception edges ourselves. */ 2272 bb = BLOCK_FOR_INSN (insn); 2273 split_block (bb, insn); 2274 rtl_make_eh_edge (NULL, bb, BB_END (bb)); 2275 } 2276 } 2277 } 2278 2279 df_process_deferred_rescans (); 2280 df_clear_flags (DF_DEFER_INSN_RESCAN); 2281 bitmap_obstack_release (NULL); 2282 BITMAP_FREE (convert_bbs); 2283 2284 timevar_pop (TV_MACH_DEP); 2285 return 0; 2286} 2287 2288namespace { 2289 2290const pass_data pass_data_remove_partial_avx_dependency = 2291{ 2292 RTL_PASS, /* type */ 2293 "rpad", /* name */ 2294 OPTGROUP_NONE, /* optinfo_flags */ 2295 TV_MACH_DEP, /* tv_id */ 2296 0, /* properties_required */ 2297 0, /* properties_provided */ 2298 0, /* properties_destroyed */ 2299 0, /* todo_flags_start */ 2300 0, /* todo_flags_finish */ 2301}; 2302 2303class pass_remove_partial_avx_dependency : public rtl_opt_pass 2304{ 2305public: 2306 pass_remove_partial_avx_dependency (gcc::context *ctxt) 2307 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) 2308 {} 2309 2310 /* opt_pass methods: */ 2311 virtual bool gate (function *) 2312 { 2313 return (TARGET_AVX 2314 && TARGET_SSE_PARTIAL_REG_DEPENDENCY 2315 && TARGET_SSE_MATH 2316 && optimize 2317 && optimize_function_for_speed_p (cfun)); 2318 } 2319 2320 virtual unsigned int execute (function *) 2321 { 2322 return remove_partial_avx_dependency (); 2323 } 2324}; // class pass_rpad 2325 2326} // anon namespace 2327 2328rtl_opt_pass * 2329make_pass_remove_partial_avx_dependency (gcc::context *ctxt) 2330{ 2331 return new pass_remove_partial_avx_dependency (ctxt); 2332} 2333 2334/* This compares the priority of target features in function DECL1 2335 and DECL2. It returns positive value if DECL1 is higher priority, 2336 negative value if DECL2 is higher priority and 0 if they are the 2337 same. */ 2338 2339int 2340ix86_compare_version_priority (tree decl1, tree decl2) 2341{ 2342 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); 2343 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); 2344 2345 return (int)priority1 - (int)priority2; 2346} 2347 2348/* V1 and V2 point to function versions with different priorities 2349 based on the target ISA. This function compares their priorities. */ 2350 2351static int 2352feature_compare (const void *v1, const void *v2) 2353{ 2354 typedef struct _function_version_info 2355 { 2356 tree version_decl; 2357 tree predicate_chain; 2358 unsigned int dispatch_priority; 2359 } function_version_info; 2360 2361 const function_version_info c1 = *(const function_version_info *)v1; 2362 const function_version_info c2 = *(const function_version_info *)v2; 2363 return (c2.dispatch_priority - c1.dispatch_priority); 2364} 2365 2366/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL 2367 to return a pointer to VERSION_DECL if the outcome of the expression 2368 formed by PREDICATE_CHAIN is true. This function will be called during 2369 version dispatch to decide which function version to execute. It returns 2370 the basic block at the end, to which more conditions can be added. */ 2371 2372static basic_block 2373add_condition_to_bb (tree function_decl, tree version_decl, 2374 tree predicate_chain, basic_block new_bb) 2375{ 2376 gimple *return_stmt; 2377 tree convert_expr, result_var; 2378 gimple *convert_stmt; 2379 gimple *call_cond_stmt; 2380 gimple *if_else_stmt; 2381 2382 basic_block bb1, bb2, bb3; 2383 edge e12, e23; 2384 2385 tree cond_var, and_expr_var = NULL_TREE; 2386 gimple_seq gseq; 2387 2388 tree predicate_decl, predicate_arg; 2389 2390 push_cfun (DECL_STRUCT_FUNCTION (function_decl)); 2391 2392 gcc_assert (new_bb != NULL); 2393 gseq = bb_seq (new_bb); 2394 2395 2396 convert_expr = build1 (CONVERT_EXPR, ptr_type_node, 2397 build_fold_addr_expr (version_decl)); 2398 result_var = create_tmp_var (ptr_type_node); 2399 convert_stmt = gimple_build_assign (result_var, convert_expr); 2400 return_stmt = gimple_build_return (result_var); 2401 2402 if (predicate_chain == NULL_TREE) 2403 { 2404 gimple_seq_add_stmt (&gseq, convert_stmt); 2405 gimple_seq_add_stmt (&gseq, return_stmt); 2406 set_bb_seq (new_bb, gseq); 2407 gimple_set_bb (convert_stmt, new_bb); 2408 gimple_set_bb (return_stmt, new_bb); 2409 pop_cfun (); 2410 return new_bb; 2411 } 2412 2413 while (predicate_chain != NULL) 2414 { 2415 cond_var = create_tmp_var (integer_type_node); 2416 predicate_decl = TREE_PURPOSE (predicate_chain); 2417 predicate_arg = TREE_VALUE (predicate_chain); 2418 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); 2419 gimple_call_set_lhs (call_cond_stmt, cond_var); 2420 2421 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); 2422 gimple_set_bb (call_cond_stmt, new_bb); 2423 gimple_seq_add_stmt (&gseq, call_cond_stmt); 2424 2425 predicate_chain = TREE_CHAIN (predicate_chain); 2426 2427 if (and_expr_var == NULL) 2428 and_expr_var = cond_var; 2429 else 2430 { 2431 gimple *assign_stmt; 2432 /* Use MIN_EXPR to check if any integer is zero?. 2433 and_expr_var = min_expr <cond_var, and_expr_var> */ 2434 assign_stmt = gimple_build_assign (and_expr_var, 2435 build2 (MIN_EXPR, integer_type_node, 2436 cond_var, and_expr_var)); 2437 2438 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); 2439 gimple_set_bb (assign_stmt, new_bb); 2440 gimple_seq_add_stmt (&gseq, assign_stmt); 2441 } 2442 } 2443 2444 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, 2445 integer_zero_node, 2446 NULL_TREE, NULL_TREE); 2447 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); 2448 gimple_set_bb (if_else_stmt, new_bb); 2449 gimple_seq_add_stmt (&gseq, if_else_stmt); 2450 2451 gimple_seq_add_stmt (&gseq, convert_stmt); 2452 gimple_seq_add_stmt (&gseq, return_stmt); 2453 set_bb_seq (new_bb, gseq); 2454 2455 bb1 = new_bb; 2456 e12 = split_block (bb1, if_else_stmt); 2457 bb2 = e12->dest; 2458 e12->flags &= ~EDGE_FALLTHRU; 2459 e12->flags |= EDGE_TRUE_VALUE; 2460 2461 e23 = split_block (bb2, return_stmt); 2462 2463 gimple_set_bb (convert_stmt, bb2); 2464 gimple_set_bb (return_stmt, bb2); 2465 2466 bb3 = e23->dest; 2467 make_edge (bb1, bb3, EDGE_FALSE_VALUE); 2468 2469 remove_edge (e23); 2470 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); 2471 2472 pop_cfun (); 2473 2474 return bb3; 2475} 2476 2477/* This function generates the dispatch function for 2478 multi-versioned functions. DISPATCH_DECL is the function which will 2479 contain the dispatch logic. FNDECLS are the function choices for 2480 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer 2481 in DISPATCH_DECL in which the dispatch code is generated. */ 2482 2483static int 2484dispatch_function_versions (tree dispatch_decl, 2485 void *fndecls_p, 2486 basic_block *empty_bb) 2487{ 2488 tree default_decl; 2489 gimple *ifunc_cpu_init_stmt; 2490 gimple_seq gseq; 2491 int ix; 2492 tree ele; 2493 vec<tree> *fndecls; 2494 unsigned int num_versions = 0; 2495 unsigned int actual_versions = 0; 2496 unsigned int i; 2497 2498 struct _function_version_info 2499 { 2500 tree version_decl; 2501 tree predicate_chain; 2502 unsigned int dispatch_priority; 2503 }*function_version_info; 2504 2505 gcc_assert (dispatch_decl != NULL 2506 && fndecls_p != NULL 2507 && empty_bb != NULL); 2508 2509 /*fndecls_p is actually a vector. */ 2510 fndecls = static_cast<vec<tree> *> (fndecls_p); 2511 2512 /* At least one more version other than the default. */ 2513 num_versions = fndecls->length (); 2514 gcc_assert (num_versions >= 2); 2515 2516 function_version_info = (struct _function_version_info *) 2517 XNEWVEC (struct _function_version_info, (num_versions - 1)); 2518 2519 /* The first version in the vector is the default decl. */ 2520 default_decl = (*fndecls)[0]; 2521 2522 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); 2523 2524 gseq = bb_seq (*empty_bb); 2525 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before 2526 constructors, so explicity call __builtin_cpu_init here. */ 2527 ifunc_cpu_init_stmt 2528 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); 2529 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); 2530 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); 2531 set_bb_seq (*empty_bb, gseq); 2532 2533 pop_cfun (); 2534 2535 2536 for (ix = 1; fndecls->iterate (ix, &ele); ++ix) 2537 { 2538 tree version_decl = ele; 2539 tree predicate_chain = NULL_TREE; 2540 unsigned int priority; 2541 /* Get attribute string, parse it and find the right predicate decl. 2542 The predicate function could be a lengthy combination of many 2543 features, like arch-type and various isa-variants. */ 2544 priority = get_builtin_code_for_version (version_decl, 2545 &predicate_chain); 2546 2547 if (predicate_chain == NULL_TREE) 2548 continue; 2549 2550 function_version_info [actual_versions].version_decl = version_decl; 2551 function_version_info [actual_versions].predicate_chain 2552 = predicate_chain; 2553 function_version_info [actual_versions].dispatch_priority = priority; 2554 actual_versions++; 2555 } 2556 2557 /* Sort the versions according to descending order of dispatch priority. The 2558 priority is based on the ISA. This is not a perfect solution. There 2559 could still be ambiguity. If more than one function version is suitable 2560 to execute, which one should be dispatched? In future, allow the user 2561 to specify a dispatch priority next to the version. */ 2562 qsort (function_version_info, actual_versions, 2563 sizeof (struct _function_version_info), feature_compare); 2564 2565 for (i = 0; i < actual_versions; ++i) 2566 *empty_bb = add_condition_to_bb (dispatch_decl, 2567 function_version_info[i].version_decl, 2568 function_version_info[i].predicate_chain, 2569 *empty_bb); 2570 2571 /* dispatch default version at the end. */ 2572 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, 2573 NULL, *empty_bb); 2574 2575 free (function_version_info); 2576 return 0; 2577} 2578 2579/* This function changes the assembler name for functions that are 2580 versions. If DECL is a function version and has a "target" 2581 attribute, it appends the attribute string to its assembler name. */ 2582 2583static tree 2584ix86_mangle_function_version_assembler_name (tree decl, tree id) 2585{ 2586 tree version_attr; 2587 const char *orig_name, *version_string; 2588 char *attr_str, *assembler_name; 2589 2590 if (DECL_DECLARED_INLINE_P (decl) 2591 && lookup_attribute ("gnu_inline", 2592 DECL_ATTRIBUTES (decl))) 2593 error_at (DECL_SOURCE_LOCATION (decl), 2594 "function versions cannot be marked as %<gnu_inline%>," 2595 " bodies have to be generated"); 2596 2597 if (DECL_VIRTUAL_P (decl) 2598 || DECL_VINDEX (decl)) 2599 sorry ("virtual function multiversioning not supported"); 2600 2601 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); 2602 2603 /* target attribute string cannot be NULL. */ 2604 gcc_assert (version_attr != NULL_TREE); 2605 2606 orig_name = IDENTIFIER_POINTER (id); 2607 version_string 2608 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); 2609 2610 if (strcmp (version_string, "default") == 0) 2611 return id; 2612 2613 attr_str = sorted_attr_string (TREE_VALUE (version_attr)); 2614 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); 2615 2616 sprintf (assembler_name, "%s.%s", orig_name, attr_str); 2617 2618 /* Allow assembler name to be modified if already set. */ 2619 if (DECL_ASSEMBLER_NAME_SET_P (decl)) 2620 SET_DECL_RTL (decl, NULL); 2621 2622 tree ret = get_identifier (assembler_name); 2623 XDELETEVEC (attr_str); 2624 XDELETEVEC (assembler_name); 2625 return ret; 2626} 2627 2628tree 2629ix86_mangle_decl_assembler_name (tree decl, tree id) 2630{ 2631 /* For function version, add the target suffix to the assembler name. */ 2632 if (TREE_CODE (decl) == FUNCTION_DECL 2633 && DECL_FUNCTION_VERSIONED (decl)) 2634 id = ix86_mangle_function_version_assembler_name (decl, id); 2635#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME 2636 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); 2637#endif 2638 2639 return id; 2640} 2641 2642/* Make a dispatcher declaration for the multi-versioned function DECL. 2643 Calls to DECL function will be replaced with calls to the dispatcher 2644 by the front-end. Returns the decl of the dispatcher function. */ 2645 2646tree 2647ix86_get_function_versions_dispatcher (void *decl) 2648{ 2649 tree fn = (tree) decl; 2650 struct cgraph_node *node = NULL; 2651 struct cgraph_node *default_node = NULL; 2652 struct cgraph_function_version_info *node_v = NULL; 2653 struct cgraph_function_version_info *first_v = NULL; 2654 2655 tree dispatch_decl = NULL; 2656 2657 struct cgraph_function_version_info *default_version_info = NULL; 2658 2659 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); 2660 2661 node = cgraph_node::get (fn); 2662 gcc_assert (node != NULL); 2663 2664 node_v = node->function_version (); 2665 gcc_assert (node_v != NULL); 2666 2667 if (node_v->dispatcher_resolver != NULL) 2668 return node_v->dispatcher_resolver; 2669 2670 /* Find the default version and make it the first node. */ 2671 first_v = node_v; 2672 /* Go to the beginning of the chain. */ 2673 while (first_v->prev != NULL) 2674 first_v = first_v->prev; 2675 default_version_info = first_v; 2676 while (default_version_info != NULL) 2677 { 2678 if (is_function_default_version 2679 (default_version_info->this_node->decl)) 2680 break; 2681 default_version_info = default_version_info->next; 2682 } 2683 2684 /* If there is no default node, just return NULL. */ 2685 if (default_version_info == NULL) 2686 return NULL; 2687 2688 /* Make default info the first node. */ 2689 if (first_v != default_version_info) 2690 { 2691 default_version_info->prev->next = default_version_info->next; 2692 if (default_version_info->next) 2693 default_version_info->next->prev = default_version_info->prev; 2694 first_v->prev = default_version_info; 2695 default_version_info->next = first_v; 2696 default_version_info->prev = NULL; 2697 } 2698 2699 default_node = default_version_info->this_node; 2700 2701#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) 2702 if (targetm.has_ifunc_p ()) 2703 { 2704 struct cgraph_function_version_info *it_v = NULL; 2705 struct cgraph_node *dispatcher_node = NULL; 2706 struct cgraph_function_version_info *dispatcher_version_info = NULL; 2707 2708 /* Right now, the dispatching is done via ifunc. */ 2709 dispatch_decl = make_dispatcher_decl (default_node->decl); 2710 2711 dispatcher_node = cgraph_node::get_create (dispatch_decl); 2712 gcc_assert (dispatcher_node != NULL); 2713 dispatcher_node->dispatcher_function = 1; 2714 dispatcher_version_info 2715 = dispatcher_node->insert_new_function_version (); 2716 dispatcher_version_info->next = default_version_info; 2717 dispatcher_node->definition = 1; 2718 2719 /* Set the dispatcher for all the versions. */ 2720 it_v = default_version_info; 2721 while (it_v != NULL) 2722 { 2723 it_v->dispatcher_resolver = dispatch_decl; 2724 it_v = it_v->next; 2725 } 2726 } 2727 else 2728#endif 2729 { 2730 error_at (DECL_SOURCE_LOCATION (default_node->decl), 2731 "multiversioning needs %<ifunc%> which is not supported " 2732 "on this target"); 2733 } 2734 2735 return dispatch_decl; 2736} 2737 2738/* Make the resolver function decl to dispatch the versions of 2739 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is 2740 ifunc alias that will point to the created resolver. Create an 2741 empty basic block in the resolver and store the pointer in 2742 EMPTY_BB. Return the decl of the resolver function. */ 2743 2744static tree 2745make_resolver_func (const tree default_decl, 2746 const tree ifunc_alias_decl, 2747 basic_block *empty_bb) 2748{ 2749 tree decl, type, t; 2750 2751 /* Create resolver function name based on default_decl. */ 2752 tree decl_name = clone_function_name (default_decl, "resolver"); 2753 const char *resolver_name = IDENTIFIER_POINTER (decl_name); 2754 2755 /* The resolver function should return a (void *). */ 2756 type = build_function_type_list (ptr_type_node, NULL_TREE); 2757 2758 decl = build_fn_decl (resolver_name, type); 2759 SET_DECL_ASSEMBLER_NAME (decl, decl_name); 2760 2761 DECL_NAME (decl) = decl_name; 2762 TREE_USED (decl) = 1; 2763 DECL_ARTIFICIAL (decl) = 1; 2764 DECL_IGNORED_P (decl) = 1; 2765 TREE_PUBLIC (decl) = 0; 2766 DECL_UNINLINABLE (decl) = 1; 2767 2768 /* Resolver is not external, body is generated. */ 2769 DECL_EXTERNAL (decl) = 0; 2770 DECL_EXTERNAL (ifunc_alias_decl) = 0; 2771 2772 DECL_CONTEXT (decl) = NULL_TREE; 2773 DECL_INITIAL (decl) = make_node (BLOCK); 2774 DECL_STATIC_CONSTRUCTOR (decl) = 0; 2775 2776 if (DECL_COMDAT_GROUP (default_decl) 2777 || TREE_PUBLIC (default_decl)) 2778 { 2779 /* In this case, each translation unit with a call to this 2780 versioned function will put out a resolver. Ensure it 2781 is comdat to keep just one copy. */ 2782 DECL_COMDAT (decl) = 1; 2783 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 2784 } 2785 else 2786 TREE_PUBLIC (ifunc_alias_decl) = 0; 2787 2788 /* Build result decl and add to function_decl. */ 2789 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); 2790 DECL_CONTEXT (t) = decl; 2791 DECL_ARTIFICIAL (t) = 1; 2792 DECL_IGNORED_P (t) = 1; 2793 DECL_RESULT (decl) = t; 2794 2795 gimplify_function_tree (decl); 2796 push_cfun (DECL_STRUCT_FUNCTION (decl)); 2797 *empty_bb = init_lowered_empty_function (decl, false, 2798 profile_count::uninitialized ()); 2799 2800 cgraph_node::add_new_function (decl, true); 2801 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); 2802 2803 pop_cfun (); 2804 2805 gcc_assert (ifunc_alias_decl != NULL); 2806 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ 2807 DECL_ATTRIBUTES (ifunc_alias_decl) 2808 = make_attribute ("ifunc", resolver_name, 2809 DECL_ATTRIBUTES (ifunc_alias_decl)); 2810 2811 /* Create the alias for dispatch to resolver here. */ 2812 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); 2813 return decl; 2814} 2815 2816/* Generate the dispatching code body to dispatch multi-versioned function 2817 DECL. The target hook is called to process the "target" attributes and 2818 provide the code to dispatch the right function at run-time. NODE points 2819 to the dispatcher decl whose body will be created. */ 2820 2821tree 2822ix86_generate_version_dispatcher_body (void *node_p) 2823{ 2824 tree resolver_decl; 2825 basic_block empty_bb; 2826 tree default_ver_decl; 2827 struct cgraph_node *versn; 2828 struct cgraph_node *node; 2829 2830 struct cgraph_function_version_info *node_version_info = NULL; 2831 struct cgraph_function_version_info *versn_info = NULL; 2832 2833 node = (cgraph_node *)node_p; 2834 2835 node_version_info = node->function_version (); 2836 gcc_assert (node->dispatcher_function 2837 && node_version_info != NULL); 2838 2839 if (node_version_info->dispatcher_resolver) 2840 return node_version_info->dispatcher_resolver; 2841 2842 /* The first version in the chain corresponds to the default version. */ 2843 default_ver_decl = node_version_info->next->this_node->decl; 2844 2845 /* node is going to be an alias, so remove the finalized bit. */ 2846 node->definition = false; 2847 2848 resolver_decl = make_resolver_func (default_ver_decl, 2849 node->decl, &empty_bb); 2850 2851 node_version_info->dispatcher_resolver = resolver_decl; 2852 2853 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); 2854 2855 auto_vec<tree, 2> fn_ver_vec; 2856 2857 for (versn_info = node_version_info->next; versn_info; 2858 versn_info = versn_info->next) 2859 { 2860 versn = versn_info->this_node; 2861 /* Check for virtual functions here again, as by this time it should 2862 have been determined if this function needs a vtable index or 2863 not. This happens for methods in derived classes that override 2864 virtual methods in base classes but are not explicitly marked as 2865 virtual. */ 2866 if (DECL_VINDEX (versn->decl)) 2867 sorry ("virtual function multiversioning not supported"); 2868 2869 fn_ver_vec.safe_push (versn->decl); 2870 } 2871 2872 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); 2873 cgraph_edge::rebuild_edges (); 2874 pop_cfun (); 2875 return resolver_decl; 2876} 2877 2878 2879