1/* Global, SSA-based optimizations using mathematical identities. 2 Copyright (C) 2005-2022 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 3, or (at your option) any 9later version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT 12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20/* Currently, the only mini-pass in this file tries to CSE reciprocal 21 operations. These are common in sequences such as this one: 22 23 modulus = sqrt(x*x + y*y + z*z); 24 x = x / modulus; 25 y = y / modulus; 26 z = z / modulus; 27 28 that can be optimized to 29 30 modulus = sqrt(x*x + y*y + z*z); 31 rmodulus = 1.0 / modulus; 32 x = x * rmodulus; 33 y = y * rmodulus; 34 z = z * rmodulus; 35 36 We do this for loop invariant divisors, and with this pass whenever 37 we notice that a division has the same divisor multiple times. 38 39 Of course, like in PRE, we don't insert a division if a dominator 40 already has one. However, this cannot be done as an extension of 41 PRE for several reasons. 42 43 First of all, with some experiments it was found out that the 44 transformation is not always useful if there are only two divisions 45 by the same divisor. This is probably because modern processors 46 can pipeline the divisions; on older, in-order processors it should 47 still be effective to optimize two divisions by the same number. 48 We make this a param, and it shall be called N in the remainder of 49 this comment. 50 51 Second, if trapping math is active, we have less freedom on where 52 to insert divisions: we can only do so in basic blocks that already 53 contain one. (If divisions don't trap, instead, we can insert 54 divisions elsewhere, which will be in blocks that are common dominators 55 of those that have the division). 56 57 We really don't want to compute the reciprocal unless a division will 58 be found. To do this, we won't insert the division in a basic block 59 that has less than N divisions *post-dominating* it. 60 61 The algorithm constructs a subset of the dominator tree, holding the 62 blocks containing the divisions and the common dominators to them, 63 and walk it twice. The first walk is in post-order, and it annotates 64 each block with the number of divisions that post-dominate it: this 65 gives information on where divisions can be inserted profitably. 66 The second walk is in pre-order, and it inserts divisions as explained 67 above, and replaces divisions by multiplications. 68 69 In the best case, the cost of the pass is O(n_statements). In the 70 worst-case, the cost is due to creating the dominator tree subset, 71 with a cost of O(n_basic_blocks ^ 2); however this can only happen 72 for n_statements / n_basic_blocks statements. So, the amortized cost 73 of creating the dominator tree subset is O(n_basic_blocks) and the 74 worst-case cost of the pass is O(n_statements * n_basic_blocks). 75 76 More practically, the cost will be small because there are few 77 divisions, and they tend to be in the same basic block, so insert_bb 78 is called very few times. 79 80 If we did this using domwalk.cc, an efficient implementation would have 81 to work on all the variables in a single pass, because we could not 82 work on just a subset of the dominator tree, as we do now, and the 83 cost would also be something like O(n_statements * n_basic_blocks). 84 The data structures would be more complex in order to work on all the 85 variables in a single pass. */ 86 87#include "config.h" 88#include "system.h" 89#include "coretypes.h" 90#include "backend.h" 91#include "target.h" 92#include "rtl.h" 93#include "tree.h" 94#include "gimple.h" 95#include "predict.h" 96#include "alloc-pool.h" 97#include "tree-pass.h" 98#include "ssa.h" 99#include "optabs-tree.h" 100#include "gimple-pretty-print.h" 101#include "alias.h" 102#include "fold-const.h" 103#include "gimple-fold.h" 104#include "gimple-iterator.h" 105#include "gimplify.h" 106#include "gimplify-me.h" 107#include "stor-layout.h" 108#include "tree-cfg.h" 109#include "tree-dfa.h" 110#include "tree-ssa.h" 111#include "builtins.h" 112#include "internal-fn.h" 113#include "case-cfn-macros.h" 114#include "optabs-libfuncs.h" 115#include "tree-eh.h" 116#include "targhooks.h" 117#include "domwalk.h" 118#include "tree-ssa-math-opts.h" 119 120/* This structure represents one basic block that either computes a 121 division, or is a common dominator for basic block that compute a 122 division. */ 123struct occurrence { 124 /* The basic block represented by this structure. */ 125 basic_block bb = basic_block(); 126 127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal 128 inserted in BB. */ 129 tree recip_def = tree(); 130 131 /* If non-NULL, the SSA_NAME holding the definition for a squared 132 reciprocal inserted in BB. */ 133 tree square_recip_def = tree(); 134 135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that 136 was inserted in BB. */ 137 gimple *recip_def_stmt = nullptr; 138 139 /* Pointer to a list of "struct occurrence"s for blocks dominated 140 by BB. */ 141 struct occurrence *children = nullptr; 142 143 /* Pointer to the next "struct occurrence"s in the list of blocks 144 sharing a common dominator. */ 145 struct occurrence *next = nullptr; 146 147 /* The number of divisions that are in BB before compute_merit. The 148 number of divisions that are in BB or post-dominate it after 149 compute_merit. */ 150 int num_divisions = 0; 151 152 /* True if the basic block has a division, false if it is a common 153 dominator for basic blocks that do. If it is false and trapping 154 math is active, BB is not a candidate for inserting a reciprocal. */ 155 bool bb_has_division = false; 156 157 /* Construct a struct occurrence for basic block BB, and whose 158 children list is headed by CHILDREN. */ 159 occurrence (basic_block bb, struct occurrence *children) 160 : bb (bb), children (children) 161 { 162 bb->aux = this; 163 } 164 165 /* Destroy a struct occurrence and remove it from its basic block. */ 166 ~occurrence () 167 { 168 bb->aux = nullptr; 169 } 170 171 /* Allocate memory for a struct occurrence from OCC_POOL. */ 172 static void* operator new (size_t); 173 174 /* Return memory for a struct occurrence to OCC_POOL. */ 175 static void operator delete (void*, size_t); 176}; 177 178static struct 179{ 180 /* Number of 1.0/X ops inserted. */ 181 int rdivs_inserted; 182 183 /* Number of 1.0/FUNC ops inserted. */ 184 int rfuncs_inserted; 185} reciprocal_stats; 186 187static struct 188{ 189 /* Number of cexpi calls inserted. */ 190 int inserted; 191 192 /* Number of conversions removed. */ 193 int conv_removed; 194 195} sincos_stats; 196 197static struct 198{ 199 /* Number of widening multiplication ops inserted. */ 200 int widen_mults_inserted; 201 202 /* Number of integer multiply-and-accumulate ops inserted. */ 203 int maccs_inserted; 204 205 /* Number of fp fused multiply-add ops inserted. */ 206 int fmas_inserted; 207 208 /* Number of divmod calls inserted. */ 209 int divmod_calls_inserted; 210 211 /* Number of highpart multiplication ops inserted. */ 212 int highpart_mults_inserted; 213} widen_mul_stats; 214 215/* The instance of "struct occurrence" representing the highest 216 interesting block in the dominator tree. */ 217static struct occurrence *occ_head; 218 219/* Allocation pool for getting instances of "struct occurrence". */ 220static object_allocator<occurrence> *occ_pool; 221 222void* occurrence::operator new (size_t n) 223{ 224 gcc_assert (n == sizeof(occurrence)); 225 return occ_pool->allocate_raw (); 226} 227 228void occurrence::operator delete (void *occ, size_t n) 229{ 230 gcc_assert (n == sizeof(occurrence)); 231 occ_pool->remove_raw (occ); 232} 233 234/* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a 235 list of "struct occurrence"s, one per basic block, having IDOM as 236 their common dominator. 237 238 We try to insert NEW_OCC as deep as possible in the tree, and we also 239 insert any other block that is a common dominator for BB and one 240 block already in the tree. */ 241 242static void 243insert_bb (struct occurrence *new_occ, basic_block idom, 244 struct occurrence **p_head) 245{ 246 struct occurrence *occ, **p_occ; 247 248 for (p_occ = p_head; (occ = *p_occ) != NULL; ) 249 { 250 basic_block bb = new_occ->bb, occ_bb = occ->bb; 251 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb); 252 if (dom == bb) 253 { 254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC 255 from its list. */ 256 *p_occ = occ->next; 257 occ->next = new_occ->children; 258 new_occ->children = occ; 259 260 /* Try the next block (it may as well be dominated by BB). */ 261 } 262 263 else if (dom == occ_bb) 264 { 265 /* OCC_BB dominates BB. Tail recurse to look deeper. */ 266 insert_bb (new_occ, dom, &occ->children); 267 return; 268 } 269 270 else if (dom != idom) 271 { 272 gcc_assert (!dom->aux); 273 274 /* There is a dominator between IDOM and BB, add it and make 275 two children out of NEW_OCC and OCC. First, remove OCC from 276 its list. */ 277 *p_occ = occ->next; 278 new_occ->next = occ; 279 occ->next = NULL; 280 281 /* None of the previous blocks has DOM as a dominator: if we tail 282 recursed, we would reexamine them uselessly. Just switch BB with 283 DOM, and go on looking for blocks dominated by DOM. */ 284 new_occ = new occurrence (dom, new_occ); 285 } 286 287 else 288 { 289 /* Nothing special, go on with the next element. */ 290 p_occ = &occ->next; 291 } 292 } 293 294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */ 295 new_occ->next = *p_head; 296 *p_head = new_occ; 297} 298 299/* Register that we found a division in BB. 300 IMPORTANCE is a measure of how much weighting to give 301 that division. Use IMPORTANCE = 2 to register a single 302 division. If the division is going to be found multiple 303 times use 1 (as it is with squares). */ 304 305static inline void 306register_division_in (basic_block bb, int importance) 307{ 308 struct occurrence *occ; 309 310 occ = (struct occurrence *) bb->aux; 311 if (!occ) 312 { 313 occ = new occurrence (bb, NULL); 314 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head); 315 } 316 317 occ->bb_has_division = true; 318 occ->num_divisions += importance; 319} 320 321 322/* Compute the number of divisions that postdominate each block in OCC and 323 its children. */ 324 325static void 326compute_merit (struct occurrence *occ) 327{ 328 struct occurrence *occ_child; 329 basic_block dom = occ->bb; 330 331 for (occ_child = occ->children; occ_child; occ_child = occ_child->next) 332 { 333 basic_block bb; 334 if (occ_child->children) 335 compute_merit (occ_child); 336 337 if (flag_exceptions) 338 bb = single_noncomplex_succ (dom); 339 else 340 bb = dom; 341 342 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb)) 343 occ->num_divisions += occ_child->num_divisions; 344 } 345} 346 347 348/* Return whether USE_STMT is a floating-point division by DEF. */ 349static inline bool 350is_division_by (gimple *use_stmt, tree def) 351{ 352 return is_gimple_assign (use_stmt) 353 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR 354 && gimple_assign_rhs2 (use_stmt) == def 355 /* Do not recognize x / x as valid division, as we are getting 356 confused later by replacing all immediate uses x in such 357 a stmt. */ 358 && gimple_assign_rhs1 (use_stmt) != def 359 && !stmt_can_throw_internal (cfun, use_stmt); 360} 361 362/* Return TRUE if USE_STMT is a multiplication of DEF by A. */ 363static inline bool 364is_mult_by (gimple *use_stmt, tree def, tree a) 365{ 366 if (gimple_code (use_stmt) == GIMPLE_ASSIGN 367 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR) 368 { 369 tree op0 = gimple_assign_rhs1 (use_stmt); 370 tree op1 = gimple_assign_rhs2 (use_stmt); 371 372 return (op0 == def && op1 == a) 373 || (op0 == a && op1 == def); 374 } 375 return 0; 376} 377 378/* Return whether USE_STMT is DEF * DEF. */ 379static inline bool 380is_square_of (gimple *use_stmt, tree def) 381{ 382 return is_mult_by (use_stmt, def, def); 383} 384 385/* Return whether USE_STMT is a floating-point division by 386 DEF * DEF. */ 387static inline bool 388is_division_by_square (gimple *use_stmt, tree def) 389{ 390 if (gimple_code (use_stmt) == GIMPLE_ASSIGN 391 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR 392 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt) 393 && !stmt_can_throw_internal (cfun, use_stmt)) 394 { 395 tree denominator = gimple_assign_rhs2 (use_stmt); 396 if (TREE_CODE (denominator) == SSA_NAME) 397 return is_square_of (SSA_NAME_DEF_STMT (denominator), def); 398 } 399 return 0; 400} 401 402/* Walk the subset of the dominator tree rooted at OCC, setting the 403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in 404 the given basic block. The field may be left NULL, of course, 405 if it is not possible or profitable to do the optimization. 406 407 DEF_BSI is an iterator pointing at the statement defining DEF. 408 If RECIP_DEF is set, a dominator already has a computation that can 409 be used. 410 411 If should_insert_square_recip is set, then this also inserts 412 the square of the reciprocal immediately after the definition 413 of the reciprocal. */ 414 415static void 416insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, 417 tree def, tree recip_def, tree square_recip_def, 418 int should_insert_square_recip, int threshold) 419{ 420 tree type; 421 gassign *new_stmt, *new_square_stmt; 422 gimple_stmt_iterator gsi; 423 struct occurrence *occ_child; 424 425 if (!recip_def 426 && (occ->bb_has_division || !flag_trapping_math) 427 /* Divide by two as all divisions are counted twice in 428 the costing loop. */ 429 && occ->num_divisions / 2 >= threshold) 430 { 431 /* Make a variable with the replacement and substitute it. */ 432 type = TREE_TYPE (def); 433 recip_def = create_tmp_reg (type, "reciptmp"); 434 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, 435 build_one_cst (type), def); 436 437 if (should_insert_square_recip) 438 { 439 square_recip_def = create_tmp_reg (type, "powmult_reciptmp"); 440 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR, 441 recip_def, recip_def); 442 } 443 444 if (occ->bb_has_division) 445 { 446 /* Case 1: insert before an existing division. */ 447 gsi = gsi_after_labels (occ->bb); 448 while (!gsi_end_p (gsi) 449 && (!is_division_by (gsi_stmt (gsi), def)) 450 && (!is_division_by_square (gsi_stmt (gsi), def))) 451 gsi_next (&gsi); 452 453 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 454 if (should_insert_square_recip) 455 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); 456 } 457 else if (def_gsi && occ->bb == gsi_bb (*def_gsi)) 458 { 459 /* Case 2: insert right after the definition. Note that this will 460 never happen if the definition statement can throw, because in 461 that case the sole successor of the statement's basic block will 462 dominate all the uses as well. */ 463 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); 464 if (should_insert_square_recip) 465 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT); 466 } 467 else 468 { 469 /* Case 3: insert in a basic block not containing defs/uses. */ 470 gsi = gsi_after_labels (occ->bb); 471 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); 472 if (should_insert_square_recip) 473 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT); 474 } 475 476 reciprocal_stats.rdivs_inserted++; 477 478 occ->recip_def_stmt = new_stmt; 479 } 480 481 occ->recip_def = recip_def; 482 occ->square_recip_def = square_recip_def; 483 for (occ_child = occ->children; occ_child; occ_child = occ_child->next) 484 insert_reciprocals (def_gsi, occ_child, def, recip_def, 485 square_recip_def, should_insert_square_recip, 486 threshold); 487} 488 489/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)). 490 Take as argument the use for (x * x). */ 491static inline void 492replace_reciprocal_squares (use_operand_p use_p) 493{ 494 gimple *use_stmt = USE_STMT (use_p); 495 basic_block bb = gimple_bb (use_stmt); 496 struct occurrence *occ = (struct occurrence *) bb->aux; 497 498 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def 499 && occ->recip_def) 500 { 501 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 502 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); 503 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def); 504 SET_USE (use_p, occ->square_recip_def); 505 fold_stmt_inplace (&gsi); 506 update_stmt (use_stmt); 507 } 508} 509 510 511/* Replace the division at USE_P with a multiplication by the reciprocal, if 512 possible. */ 513 514static inline void 515replace_reciprocal (use_operand_p use_p) 516{ 517 gimple *use_stmt = USE_STMT (use_p); 518 basic_block bb = gimple_bb (use_stmt); 519 struct occurrence *occ = (struct occurrence *) bb->aux; 520 521 if (optimize_bb_for_speed_p (bb) 522 && occ->recip_def && use_stmt != occ->recip_def_stmt) 523 { 524 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 525 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR); 526 SET_USE (use_p, occ->recip_def); 527 fold_stmt_inplace (&gsi); 528 update_stmt (use_stmt); 529 } 530} 531 532 533/* Free OCC and return one more "struct occurrence" to be freed. */ 534 535static struct occurrence * 536free_bb (struct occurrence *occ) 537{ 538 struct occurrence *child, *next; 539 540 /* First get the two pointers hanging off OCC. */ 541 next = occ->next; 542 child = occ->children; 543 delete occ; 544 545 /* Now ensure that we don't recurse unless it is necessary. */ 546 if (!child) 547 return next; 548 else 549 { 550 while (next) 551 next = free_bb (next); 552 553 return child; 554 } 555} 556 557/* Transform sequences like 558 t = sqrt (a) 559 x = 1.0 / t; 560 r1 = x * x; 561 r2 = a * x; 562 into: 563 t = sqrt (a) 564 r1 = 1.0 / a; 565 r2 = t; 566 x = r1 * r2; 567 depending on the uses of x, r1, r2. This removes one multiplication and 568 allows the sqrt and division operations to execute in parallel. 569 DEF_GSI is the gsi of the initial division by sqrt that defines 570 DEF (x in the example above). */ 571 572static void 573optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def) 574{ 575 gimple *use_stmt; 576 imm_use_iterator use_iter; 577 gimple *stmt = gsi_stmt (*def_gsi); 578 tree x = def; 579 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt); 580 tree div_rhs1 = gimple_assign_rhs1 (stmt); 581 582 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME 583 || TREE_CODE (div_rhs1) != REAL_CST 584 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1)) 585 return; 586 587 gcall *sqrt_stmt 588 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name)); 589 590 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt)) 591 return; 592 593 switch (gimple_call_combined_fn (sqrt_stmt)) 594 { 595 CASE_CFN_SQRT: 596 CASE_CFN_SQRT_FN: 597 break; 598 599 default: 600 return; 601 } 602 tree a = gimple_call_arg (sqrt_stmt, 0); 603 604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */ 605 606 /* Statements that use x in x * x. */ 607 auto_vec<gimple *> sqr_stmts; 608 /* Statements that use x in a * x. */ 609 auto_vec<gimple *> mult_stmts; 610 bool has_other_use = false; 611 bool mult_on_main_path = false; 612 613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x) 614 { 615 if (is_gimple_debug (use_stmt)) 616 continue; 617 if (is_square_of (use_stmt, x)) 618 { 619 sqr_stmts.safe_push (use_stmt); 620 if (gimple_bb (use_stmt) == gimple_bb (stmt)) 621 mult_on_main_path = true; 622 } 623 else if (is_mult_by (use_stmt, x, a)) 624 { 625 mult_stmts.safe_push (use_stmt); 626 if (gimple_bb (use_stmt) == gimple_bb (stmt)) 627 mult_on_main_path = true; 628 } 629 else 630 has_other_use = true; 631 } 632 633 /* In the x * x and a * x cases we just rewire stmt operands or 634 remove multiplications. In the has_other_use case we introduce 635 a multiplication so make sure we don't introduce a multiplication 636 on a path where there was none. */ 637 if (has_other_use && !mult_on_main_path) 638 return; 639 640 if (sqr_stmts.is_empty () && mult_stmts.is_empty ()) 641 return; 642 643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want 644 to be able to compose it from the sqr and mult cases. */ 645 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ())) 646 return; 647 648 if (dump_file) 649 { 650 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n"); 651 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE); 652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); 653 fprintf (dump_file, "\n"); 654 } 655 656 bool delete_div = !has_other_use; 657 tree sqr_ssa_name = NULL_TREE; 658 if (!sqr_stmts.is_empty ()) 659 { 660 /* r1 = x * x. Transform the original 661 x = 1.0 / t 662 into 663 tmp1 = 1.0 / a 664 r1 = tmp1. */ 665 666 sqr_ssa_name 667 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr"); 668 669 if (dump_file) 670 { 671 fprintf (dump_file, "Replacing original division\n"); 672 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); 673 fprintf (dump_file, "with new division\n"); 674 } 675 stmt 676 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt), 677 gimple_assign_rhs1 (stmt), a); 678 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT); 679 gsi_remove (def_gsi, true); 680 *def_gsi = gsi_for_stmt (stmt); 681 fold_stmt_inplace (def_gsi); 682 update_stmt (stmt); 683 684 if (dump_file) 685 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); 686 687 delete_div = false; 688 gimple *sqr_stmt; 689 unsigned int i; 690 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt) 691 { 692 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt); 693 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name); 694 update_stmt (sqr_stmt); 695 } 696 } 697 if (!mult_stmts.is_empty ()) 698 { 699 /* r2 = a * x. Transform this into: 700 r2 = t (The original sqrt (a)). */ 701 unsigned int i; 702 gimple *mult_stmt = NULL; 703 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt) 704 { 705 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt); 706 707 if (dump_file) 708 { 709 fprintf (dump_file, "Replacing squaring multiplication\n"); 710 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); 711 fprintf (dump_file, "with assignment\n"); 712 } 713 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name); 714 fold_stmt_inplace (&gsi2); 715 update_stmt (mult_stmt); 716 if (dump_file) 717 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE); 718 } 719 } 720 721 if (has_other_use) 722 { 723 /* Using the two temporaries tmp1, tmp2 from above 724 the original x is now: 725 x = tmp1 * tmp2. */ 726 gcc_assert (orig_sqrt_ssa_name); 727 gcc_assert (sqr_ssa_name); 728 729 gimple *new_stmt 730 = gimple_build_assign (x, MULT_EXPR, 731 orig_sqrt_ssa_name, sqr_ssa_name); 732 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); 733 update_stmt (stmt); 734 } 735 else if (delete_div) 736 { 737 /* Remove the original division. */ 738 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); 739 gsi_remove (&gsi2, true); 740 release_defs (stmt); 741 } 742 else 743 release_ssa_name (x); 744} 745 746/* Look for floating-point divisions among DEF's uses, and try to 747 replace them by multiplications with the reciprocal. Add 748 as many statements computing the reciprocal as needed. 749 750 DEF must be a GIMPLE register of a floating-point type. */ 751 752static void 753execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) 754{ 755 use_operand_p use_p, square_use_p; 756 imm_use_iterator use_iter, square_use_iter; 757 tree square_def; 758 struct occurrence *occ; 759 int count = 0; 760 int threshold; 761 int square_recip_count = 0; 762 int sqrt_recip_count = 0; 763 764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME); 765 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); 766 767 /* If DEF is a square (x * x), count the number of divisions by x. 768 If there are more divisions by x than by (DEF * DEF), prefer to optimize 769 the reciprocal of x instead of DEF. This improves cases like: 770 def = x * x 771 t0 = a / def 772 t1 = b / def 773 t2 = c / x 774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */ 775 gimple *def_stmt = SSA_NAME_DEF_STMT (def); 776 777 if (is_gimple_assign (def_stmt) 778 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR 779 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME 780 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt)) 781 { 782 tree op0 = gimple_assign_rhs1 (def_stmt); 783 784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0) 785 { 786 gimple *use_stmt = USE_STMT (use_p); 787 if (is_division_by (use_stmt, op0)) 788 sqrt_recip_count++; 789 } 790 } 791 792 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) 793 { 794 gimple *use_stmt = USE_STMT (use_p); 795 if (is_division_by (use_stmt, def)) 796 { 797 register_division_in (gimple_bb (use_stmt), 2); 798 count++; 799 } 800 801 if (is_square_of (use_stmt, def)) 802 { 803 square_def = gimple_assign_lhs (use_stmt); 804 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def) 805 { 806 gimple *square_use_stmt = USE_STMT (square_use_p); 807 if (is_division_by (square_use_stmt, square_def)) 808 { 809 /* This is executed twice for each division by a square. */ 810 register_division_in (gimple_bb (square_use_stmt), 1); 811 square_recip_count++; 812 } 813 } 814 } 815 } 816 817 /* Square reciprocals were counted twice above. */ 818 square_recip_count /= 2; 819 820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */ 821 if (sqrt_recip_count > square_recip_count) 822 goto out; 823 824 /* Do the expensive part only if we can hope to optimize something. */ 825 if (count + square_recip_count >= threshold && count >= 1) 826 { 827 gimple *use_stmt; 828 for (occ = occ_head; occ; occ = occ->next) 829 { 830 compute_merit (occ); 831 insert_reciprocals (def_gsi, occ, def, NULL, NULL, 832 square_recip_count, threshold); 833 } 834 835 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) 836 { 837 if (is_division_by (use_stmt, def)) 838 { 839 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) 840 replace_reciprocal (use_p); 841 } 842 else if (square_recip_count > 0 && is_square_of (use_stmt, def)) 843 { 844 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) 845 { 846 /* Find all uses of the square that are divisions and 847 * replace them by multiplications with the inverse. */ 848 imm_use_iterator square_iterator; 849 gimple *powmult_use_stmt = USE_STMT (use_p); 850 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt); 851 852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt, 853 square_iterator, powmult_def_name) 854 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator) 855 { 856 gimple *powmult_use_stmt = USE_STMT (square_use_p); 857 if (is_division_by (powmult_use_stmt, powmult_def_name)) 858 replace_reciprocal_squares (square_use_p); 859 } 860 } 861 } 862 } 863 } 864 865out: 866 for (occ = occ_head; occ; ) 867 occ = free_bb (occ); 868 869 occ_head = NULL; 870} 871 872/* Return an internal function that implements the reciprocal of CALL, 873 or IFN_LAST if there is no such function that the target supports. */ 874 875internal_fn 876internal_fn_reciprocal (gcall *call) 877{ 878 internal_fn ifn; 879 880 switch (gimple_call_combined_fn (call)) 881 { 882 CASE_CFN_SQRT: 883 CASE_CFN_SQRT_FN: 884 ifn = IFN_RSQRT; 885 break; 886 887 default: 888 return IFN_LAST; 889 } 890 891 tree_pair types = direct_internal_fn_types (ifn, call); 892 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED)) 893 return IFN_LAST; 894 895 return ifn; 896} 897 898/* Go through all the floating-point SSA_NAMEs, and call 899 execute_cse_reciprocals_1 on each of them. */ 900namespace { 901 902const pass_data pass_data_cse_reciprocals = 903{ 904 GIMPLE_PASS, /* type */ 905 "recip", /* name */ 906 OPTGROUP_NONE, /* optinfo_flags */ 907 TV_TREE_RECIP, /* tv_id */ 908 PROP_ssa, /* properties_required */ 909 0, /* properties_provided */ 910 0, /* properties_destroyed */ 911 0, /* todo_flags_start */ 912 TODO_update_ssa, /* todo_flags_finish */ 913}; 914 915class pass_cse_reciprocals : public gimple_opt_pass 916{ 917public: 918 pass_cse_reciprocals (gcc::context *ctxt) 919 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt) 920 {} 921 922 /* opt_pass methods: */ 923 virtual bool gate (function *) { return optimize && flag_reciprocal_math; } 924 virtual unsigned int execute (function *); 925 926}; // class pass_cse_reciprocals 927 928unsigned int 929pass_cse_reciprocals::execute (function *fun) 930{ 931 basic_block bb; 932 tree arg; 933 934 occ_pool = new object_allocator<occurrence> ("dominators for recip"); 935 936 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats)); 937 calculate_dominance_info (CDI_DOMINATORS); 938 calculate_dominance_info (CDI_POST_DOMINATORS); 939 940 if (flag_checking) 941 FOR_EACH_BB_FN (bb, fun) 942 gcc_assert (!bb->aux); 943 944 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg)) 945 if (FLOAT_TYPE_P (TREE_TYPE (arg)) 946 && is_gimple_reg (arg)) 947 { 948 tree name = ssa_default_def (fun, arg); 949 if (name) 950 execute_cse_reciprocals_1 (NULL, name); 951 } 952 953 FOR_EACH_BB_FN (bb, fun) 954 { 955 tree def; 956 957 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); 958 gsi_next (&gsi)) 959 { 960 gphi *phi = gsi.phi (); 961 def = PHI_RESULT (phi); 962 if (! virtual_operand_p (def) 963 && FLOAT_TYPE_P (TREE_TYPE (def))) 964 execute_cse_reciprocals_1 (NULL, def); 965 } 966 967 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); 968 gsi_next (&gsi)) 969 { 970 gimple *stmt = gsi_stmt (gsi); 971 972 if (gimple_has_lhs (stmt) 973 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL 974 && FLOAT_TYPE_P (TREE_TYPE (def)) 975 && TREE_CODE (def) == SSA_NAME) 976 { 977 execute_cse_reciprocals_1 (&gsi, def); 978 stmt = gsi_stmt (gsi); 979 if (flag_unsafe_math_optimizations 980 && is_gimple_assign (stmt) 981 && gimple_assign_lhs (stmt) == def 982 && !stmt_can_throw_internal (cfun, stmt) 983 && gimple_assign_rhs_code (stmt) == RDIV_EXPR) 984 optimize_recip_sqrt (&gsi, def); 985 } 986 } 987 988 if (optimize_bb_for_size_p (bb)) 989 continue; 990 991 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */ 992 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); 993 gsi_next (&gsi)) 994 { 995 gimple *stmt = gsi_stmt (gsi); 996 997 if (is_gimple_assign (stmt) 998 && gimple_assign_rhs_code (stmt) == RDIV_EXPR) 999 { 1000 tree arg1 = gimple_assign_rhs2 (stmt); 1001 gimple *stmt1; 1002 1003 if (TREE_CODE (arg1) != SSA_NAME) 1004 continue; 1005 1006 stmt1 = SSA_NAME_DEF_STMT (arg1); 1007 1008 if (is_gimple_call (stmt1) 1009 && gimple_call_lhs (stmt1)) 1010 { 1011 bool fail; 1012 imm_use_iterator ui; 1013 use_operand_p use_p; 1014 tree fndecl = NULL_TREE; 1015 1016 gcall *call = as_a <gcall *> (stmt1); 1017 internal_fn ifn = internal_fn_reciprocal (call); 1018 if (ifn == IFN_LAST) 1019 { 1020 fndecl = gimple_call_fndecl (call); 1021 if (!fndecl 1022 || !fndecl_built_in_p (fndecl, BUILT_IN_MD)) 1023 continue; 1024 fndecl = targetm.builtin_reciprocal (fndecl); 1025 if (!fndecl) 1026 continue; 1027 } 1028 1029 /* Check that all uses of the SSA name are divisions, 1030 otherwise replacing the defining statement will do 1031 the wrong thing. */ 1032 fail = false; 1033 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1) 1034 { 1035 gimple *stmt2 = USE_STMT (use_p); 1036 if (is_gimple_debug (stmt2)) 1037 continue; 1038 if (!is_gimple_assign (stmt2) 1039 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR 1040 || gimple_assign_rhs1 (stmt2) == arg1 1041 || gimple_assign_rhs2 (stmt2) != arg1) 1042 { 1043 fail = true; 1044 break; 1045 } 1046 } 1047 if (fail) 1048 continue; 1049 1050 gimple_replace_ssa_lhs (call, arg1); 1051 if (gimple_call_internal_p (call) != (ifn != IFN_LAST)) 1052 { 1053 auto_vec<tree, 4> args; 1054 for (unsigned int i = 0; 1055 i < gimple_call_num_args (call); i++) 1056 args.safe_push (gimple_call_arg (call, i)); 1057 gcall *stmt2; 1058 if (ifn == IFN_LAST) 1059 stmt2 = gimple_build_call_vec (fndecl, args); 1060 else 1061 stmt2 = gimple_build_call_internal_vec (ifn, args); 1062 gimple_call_set_lhs (stmt2, arg1); 1063 gimple_move_vops (stmt2, call); 1064 gimple_call_set_nothrow (stmt2, 1065 gimple_call_nothrow_p (call)); 1066 gimple_stmt_iterator gsi2 = gsi_for_stmt (call); 1067 gsi_replace (&gsi2, stmt2, true); 1068 } 1069 else 1070 { 1071 if (ifn == IFN_LAST) 1072 gimple_call_set_fndecl (call, fndecl); 1073 else 1074 gimple_call_set_internal_fn (call, ifn); 1075 update_stmt (call); 1076 } 1077 reciprocal_stats.rfuncs_inserted++; 1078 1079 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1) 1080 { 1081 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1082 gimple_assign_set_rhs_code (stmt, MULT_EXPR); 1083 fold_stmt_inplace (&gsi); 1084 update_stmt (stmt); 1085 } 1086 } 1087 } 1088 } 1089 } 1090 1091 statistics_counter_event (fun, "reciprocal divs inserted", 1092 reciprocal_stats.rdivs_inserted); 1093 statistics_counter_event (fun, "reciprocal functions inserted", 1094 reciprocal_stats.rfuncs_inserted); 1095 1096 free_dominance_info (CDI_DOMINATORS); 1097 free_dominance_info (CDI_POST_DOMINATORS); 1098 delete occ_pool; 1099 return 0; 1100} 1101 1102} // anon namespace 1103 1104gimple_opt_pass * 1105make_pass_cse_reciprocals (gcc::context *ctxt) 1106{ 1107 return new pass_cse_reciprocals (ctxt); 1108} 1109 1110/* If NAME is the result of a type conversion, look for other 1111 equivalent dominating or dominated conversions, and replace all 1112 uses with the earliest dominating name, removing the redundant 1113 conversions. Return the prevailing name. */ 1114 1115static tree 1116execute_cse_conv_1 (tree name, bool *cfg_changed) 1117{ 1118 if (SSA_NAME_IS_DEFAULT_DEF (name) 1119 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) 1120 return name; 1121 1122 gimple *def_stmt = SSA_NAME_DEF_STMT (name); 1123 1124 if (!gimple_assign_cast_p (def_stmt)) 1125 return name; 1126 1127 tree src = gimple_assign_rhs1 (def_stmt); 1128 1129 if (TREE_CODE (src) != SSA_NAME) 1130 return name; 1131 1132 imm_use_iterator use_iter; 1133 gimple *use_stmt; 1134 1135 /* Find the earliest dominating def. */ 1136 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) 1137 { 1138 if (use_stmt == def_stmt 1139 || !gimple_assign_cast_p (use_stmt)) 1140 continue; 1141 1142 tree lhs = gimple_assign_lhs (use_stmt); 1143 1144 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) 1145 || (gimple_assign_rhs1 (use_stmt) 1146 != gimple_assign_rhs1 (def_stmt)) 1147 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) 1148 continue; 1149 1150 bool use_dominates; 1151 if (gimple_bb (def_stmt) == gimple_bb (use_stmt)) 1152 { 1153 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 1154 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt) 1155 gsi_next (&gsi); 1156 use_dominates = !gsi_end_p (gsi); 1157 } 1158 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), 1159 gimple_bb (def_stmt))) 1160 use_dominates = false; 1161 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt), 1162 gimple_bb (use_stmt))) 1163 use_dominates = true; 1164 else 1165 continue; 1166 1167 if (use_dominates) 1168 { 1169 std::swap (name, lhs); 1170 std::swap (def_stmt, use_stmt); 1171 } 1172 } 1173 1174 /* Now go through all uses of SRC again, replacing the equivalent 1175 dominated conversions. We may replace defs that were not 1176 dominated by the then-prevailing defs when we first visited 1177 them. */ 1178 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) 1179 { 1180 if (use_stmt == def_stmt 1181 || !gimple_assign_cast_p (use_stmt)) 1182 continue; 1183 1184 tree lhs = gimple_assign_lhs (use_stmt); 1185 1186 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) 1187 || (gimple_assign_rhs1 (use_stmt) 1188 != gimple_assign_rhs1 (def_stmt)) 1189 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs))) 1190 continue; 1191 1192 basic_block use_bb = gimple_bb (use_stmt); 1193 if (gimple_bb (def_stmt) == use_bb 1194 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt))) 1195 { 1196 sincos_stats.conv_removed++; 1197 1198 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 1199 replace_uses_by (lhs, name); 1200 if (gsi_remove (&gsi, true) 1201 && gimple_purge_dead_eh_edges (use_bb)) 1202 *cfg_changed = true; 1203 release_defs (use_stmt); 1204 } 1205 } 1206 1207 return name; 1208} 1209 1210/* Records an occurrence at statement USE_STMT in the vector of trees 1211 STMTS if it is dominated by *TOP_BB or dominates it or this basic block 1212 is not yet initialized. Returns true if the occurrence was pushed on 1213 the vector. Adjusts *TOP_BB to be the basic block dominating all 1214 statements in the vector. */ 1215 1216static bool 1217maybe_record_sincos (vec<gimple *> *stmts, 1218 basic_block *top_bb, gimple *use_stmt) 1219{ 1220 basic_block use_bb = gimple_bb (use_stmt); 1221 if (*top_bb 1222 && (*top_bb == use_bb 1223 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb))) 1224 stmts->safe_push (use_stmt); 1225 else if (!*top_bb 1226 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb)) 1227 { 1228 stmts->safe_push (use_stmt); 1229 *top_bb = use_bb; 1230 } 1231 else 1232 return false; 1233 1234 return true; 1235} 1236 1237/* Look for sin, cos and cexpi calls with the same argument NAME and 1238 create a single call to cexpi CSEing the result in this case. 1239 We first walk over all immediate uses of the argument collecting 1240 statements that we can CSE in a vector and in a second pass replace 1241 the statement rhs with a REALPART or IMAGPART expression on the 1242 result of the cexpi call we insert before the use statement that 1243 dominates all other candidates. */ 1244 1245static bool 1246execute_cse_sincos_1 (tree name) 1247{ 1248 gimple_stmt_iterator gsi; 1249 imm_use_iterator use_iter; 1250 tree fndecl, res, type = NULL_TREE; 1251 gimple *def_stmt, *use_stmt, *stmt; 1252 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0; 1253 auto_vec<gimple *> stmts; 1254 basic_block top_bb = NULL; 1255 int i; 1256 bool cfg_changed = false; 1257 1258 name = execute_cse_conv_1 (name, &cfg_changed); 1259 1260 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) 1261 { 1262 if (gimple_code (use_stmt) != GIMPLE_CALL 1263 || !gimple_call_lhs (use_stmt)) 1264 continue; 1265 1266 switch (gimple_call_combined_fn (use_stmt)) 1267 { 1268 CASE_CFN_COS: 1269 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 1270 break; 1271 1272 CASE_CFN_SIN: 1273 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 1274 break; 1275 1276 CASE_CFN_CEXPI: 1277 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; 1278 break; 1279 1280 default:; 1281 continue; 1282 } 1283 1284 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt)); 1285 if (!type) 1286 { 1287 type = t; 1288 t = TREE_TYPE (name); 1289 } 1290 /* This checks that NAME has the right type in the first round, 1291 and, in subsequent rounds, that the built_in type is the same 1292 type, or a compatible type. */ 1293 if (type != t && !types_compatible_p (type, t)) 1294 return false; 1295 } 1296 if (seen_cos + seen_sin + seen_cexpi <= 1) 1297 return false; 1298 1299 /* Simply insert cexpi at the beginning of top_bb but not earlier than 1300 the name def statement. */ 1301 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI); 1302 if (!fndecl) 1303 return false; 1304 stmt = gimple_build_call (fndecl, 1, name); 1305 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp"); 1306 gimple_call_set_lhs (stmt, res); 1307 1308 def_stmt = SSA_NAME_DEF_STMT (name); 1309 if (!SSA_NAME_IS_DEFAULT_DEF (name) 1310 && gimple_code (def_stmt) != GIMPLE_PHI 1311 && gimple_bb (def_stmt) == top_bb) 1312 { 1313 gsi = gsi_for_stmt (def_stmt); 1314 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1315 } 1316 else 1317 { 1318 gsi = gsi_after_labels (top_bb); 1319 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1320 } 1321 sincos_stats.inserted++; 1322 1323 /* And adjust the recorded old call sites. */ 1324 for (i = 0; stmts.iterate (i, &use_stmt); ++i) 1325 { 1326 tree rhs = NULL; 1327 1328 switch (gimple_call_combined_fn (use_stmt)) 1329 { 1330 CASE_CFN_COS: 1331 rhs = fold_build1 (REALPART_EXPR, type, res); 1332 break; 1333 1334 CASE_CFN_SIN: 1335 rhs = fold_build1 (IMAGPART_EXPR, type, res); 1336 break; 1337 1338 CASE_CFN_CEXPI: 1339 rhs = res; 1340 break; 1341 1342 default:; 1343 gcc_unreachable (); 1344 } 1345 1346 /* Replace call with a copy. */ 1347 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs); 1348 1349 gsi = gsi_for_stmt (use_stmt); 1350 gsi_replace (&gsi, stmt, true); 1351 if (gimple_purge_dead_eh_edges (gimple_bb (stmt))) 1352 cfg_changed = true; 1353 } 1354 1355 return cfg_changed; 1356} 1357 1358/* To evaluate powi(x,n), the floating point value x raised to the 1359 constant integer exponent n, we use a hybrid algorithm that 1360 combines the "window method" with look-up tables. For an 1361 introduction to exponentiation algorithms and "addition chains", 1362 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth, 1363 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming", 1364 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation 1365 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */ 1366 1367/* Provide a default value for POWI_MAX_MULTS, the maximum number of 1368 multiplications to inline before calling the system library's pow 1369 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications, 1370 so this default never requires calling pow, powf or powl. */ 1371 1372#ifndef POWI_MAX_MULTS 1373#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2) 1374#endif 1375 1376/* The size of the "optimal power tree" lookup table. All 1377 exponents less than this value are simply looked up in the 1378 powi_table below. This threshold is also used to size the 1379 cache of pseudo registers that hold intermediate results. */ 1380#define POWI_TABLE_SIZE 256 1381 1382/* The size, in bits of the window, used in the "window method" 1383 exponentiation algorithm. This is equivalent to a radix of 1384 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */ 1385#define POWI_WINDOW_SIZE 3 1386 1387/* The following table is an efficient representation of an 1388 "optimal power tree". For each value, i, the corresponding 1389 value, j, in the table states than an optimal evaluation 1390 sequence for calculating pow(x,i) can be found by evaluating 1391 pow(x,j)*pow(x,i-j). An optimal power tree for the first 1392 100 integers is given in Knuth's "Seminumerical algorithms". */ 1393 1394static const unsigned char powi_table[POWI_TABLE_SIZE] = 1395 { 1396 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */ 1397 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */ 1398 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */ 1399 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */ 1400 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */ 1401 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */ 1402 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */ 1403 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */ 1404 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */ 1405 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */ 1406 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */ 1407 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */ 1408 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */ 1409 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */ 1410 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */ 1411 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */ 1412 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */ 1413 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */ 1414 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */ 1415 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */ 1416 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */ 1417 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */ 1418 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */ 1419 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */ 1420 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */ 1421 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */ 1422 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */ 1423 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */ 1424 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */ 1425 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */ 1426 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */ 1427 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */ 1428 }; 1429 1430 1431/* Return the number of multiplications required to calculate 1432 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a 1433 subroutine of powi_cost. CACHE is an array indicating 1434 which exponents have already been calculated. */ 1435 1436static int 1437powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache) 1438{ 1439 /* If we've already calculated this exponent, then this evaluation 1440 doesn't require any additional multiplications. */ 1441 if (cache[n]) 1442 return 0; 1443 1444 cache[n] = true; 1445 return powi_lookup_cost (n - powi_table[n], cache) 1446 + powi_lookup_cost (powi_table[n], cache) + 1; 1447} 1448 1449/* Return the number of multiplications required to calculate 1450 powi(x,n) for an arbitrary x, given the exponent N. This 1451 function needs to be kept in sync with powi_as_mults below. */ 1452 1453static int 1454powi_cost (HOST_WIDE_INT n) 1455{ 1456 bool cache[POWI_TABLE_SIZE]; 1457 unsigned HOST_WIDE_INT digit; 1458 unsigned HOST_WIDE_INT val; 1459 int result; 1460 1461 if (n == 0) 1462 return 0; 1463 1464 /* Ignore the reciprocal when calculating the cost. */ 1465 val = absu_hwi (n); 1466 1467 /* Initialize the exponent cache. */ 1468 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool)); 1469 cache[1] = true; 1470 1471 result = 0; 1472 1473 while (val >= POWI_TABLE_SIZE) 1474 { 1475 if (val & 1) 1476 { 1477 digit = val & ((1 << POWI_WINDOW_SIZE) - 1); 1478 result += powi_lookup_cost (digit, cache) 1479 + POWI_WINDOW_SIZE + 1; 1480 val >>= POWI_WINDOW_SIZE; 1481 } 1482 else 1483 { 1484 val >>= 1; 1485 result++; 1486 } 1487 } 1488 1489 return result + powi_lookup_cost (val, cache); 1490} 1491 1492/* Recursive subroutine of powi_as_mults. This function takes the 1493 array, CACHE, of already calculated exponents and an exponent N and 1494 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */ 1495 1496static tree 1497powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type, 1498 unsigned HOST_WIDE_INT n, tree *cache) 1499{ 1500 tree op0, op1, ssa_target; 1501 unsigned HOST_WIDE_INT digit; 1502 gassign *mult_stmt; 1503 1504 if (n < POWI_TABLE_SIZE && cache[n]) 1505 return cache[n]; 1506 1507 ssa_target = make_temp_ssa_name (type, NULL, "powmult"); 1508 1509 if (n < POWI_TABLE_SIZE) 1510 { 1511 cache[n] = ssa_target; 1512 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache); 1513 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache); 1514 } 1515 else if (n & 1) 1516 { 1517 digit = n & ((1 << POWI_WINDOW_SIZE) - 1); 1518 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache); 1519 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache); 1520 } 1521 else 1522 { 1523 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache); 1524 op1 = op0; 1525 } 1526 1527 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1); 1528 gimple_set_location (mult_stmt, loc); 1529 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT); 1530 1531 return ssa_target; 1532} 1533 1534/* Convert ARG0**N to a tree of multiplications of ARG0 with itself. 1535 This function needs to be kept in sync with powi_cost above. */ 1536 1537tree 1538powi_as_mults (gimple_stmt_iterator *gsi, location_t loc, 1539 tree arg0, HOST_WIDE_INT n) 1540{ 1541 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0); 1542 gassign *div_stmt; 1543 tree target; 1544 1545 if (n == 0) 1546 return build_one_cst (type); 1547 1548 memset (cache, 0, sizeof (cache)); 1549 cache[1] = arg0; 1550 1551 result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache); 1552 if (n >= 0) 1553 return result; 1554 1555 /* If the original exponent was negative, reciprocate the result. */ 1556 target = make_temp_ssa_name (type, NULL, "powmult"); 1557 div_stmt = gimple_build_assign (target, RDIV_EXPR, 1558 build_real (type, dconst1), result); 1559 gimple_set_location (div_stmt, loc); 1560 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT); 1561 1562 return target; 1563} 1564 1565/* ARG0 and N are the two arguments to a powi builtin in GSI with 1566 location info LOC. If the arguments are appropriate, create an 1567 equivalent sequence of statements prior to GSI using an optimal 1568 number of multiplications, and return an expession holding the 1569 result. */ 1570 1571static tree 1572gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc, 1573 tree arg0, HOST_WIDE_INT n) 1574{ 1575 if ((n >= -1 && n <= 2) 1576 || (optimize_function_for_speed_p (cfun) 1577 && powi_cost (n) <= POWI_MAX_MULTS)) 1578 return powi_as_mults (gsi, loc, arg0, n); 1579 1580 return NULL_TREE; 1581} 1582 1583/* Build a gimple call statement that calls FN with argument ARG. 1584 Set the lhs of the call statement to a fresh SSA name. Insert the 1585 statement prior to GSI's current position, and return the fresh 1586 SSA name. */ 1587 1588static tree 1589build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc, 1590 tree fn, tree arg) 1591{ 1592 gcall *call_stmt; 1593 tree ssa_target; 1594 1595 call_stmt = gimple_build_call (fn, 1, arg); 1596 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot"); 1597 gimple_set_lhs (call_stmt, ssa_target); 1598 gimple_set_location (call_stmt, loc); 1599 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT); 1600 1601 return ssa_target; 1602} 1603 1604/* Build a gimple binary operation with the given CODE and arguments 1605 ARG0, ARG1, assigning the result to a new SSA name for variable 1606 TARGET. Insert the statement prior to GSI's current position, and 1607 return the fresh SSA name.*/ 1608 1609static tree 1610build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc, 1611 const char *name, enum tree_code code, 1612 tree arg0, tree arg1) 1613{ 1614 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name); 1615 gassign *stmt = gimple_build_assign (result, code, arg0, arg1); 1616 gimple_set_location (stmt, loc); 1617 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 1618 return result; 1619} 1620 1621/* Build a gimple reference operation with the given CODE and argument 1622 ARG, assigning the result to a new SSA name of TYPE with NAME. 1623 Insert the statement prior to GSI's current position, and return 1624 the fresh SSA name. */ 1625 1626static inline tree 1627build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, 1628 const char *name, enum tree_code code, tree arg0) 1629{ 1630 tree result = make_temp_ssa_name (type, NULL, name); 1631 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); 1632 gimple_set_location (stmt, loc); 1633 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 1634 return result; 1635} 1636 1637/* Build a gimple assignment to cast VAL to TYPE. Insert the statement 1638 prior to GSI's current position, and return the fresh SSA name. */ 1639 1640static tree 1641build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc, 1642 tree type, tree val) 1643{ 1644 tree result = make_ssa_name (type); 1645 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val); 1646 gimple_set_location (stmt, loc); 1647 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 1648 return result; 1649} 1650 1651struct pow_synth_sqrt_info 1652{ 1653 bool *factors; 1654 unsigned int deepest; 1655 unsigned int num_mults; 1656}; 1657 1658/* Return true iff the real value C can be represented as a 1659 sum of powers of 0.5 up to N. That is: 1660 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1. 1661 Record in INFO the various parameters of the synthesis algorithm such 1662 as the factors a[i], the maximum 0.5 power and the number of 1663 multiplications that will be required. */ 1664 1665bool 1666representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n, 1667 struct pow_synth_sqrt_info *info) 1668{ 1669 REAL_VALUE_TYPE factor = dconsthalf; 1670 REAL_VALUE_TYPE remainder = c; 1671 1672 info->deepest = 0; 1673 info->num_mults = 0; 1674 memset (info->factors, 0, n * sizeof (bool)); 1675 1676 for (unsigned i = 0; i < n; i++) 1677 { 1678 REAL_VALUE_TYPE res; 1679 1680 /* If something inexact happened bail out now. */ 1681 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor)) 1682 return false; 1683 1684 /* We have hit zero. The number is representable as a sum 1685 of powers of 0.5. */ 1686 if (real_equal (&res, &dconst0)) 1687 { 1688 info->factors[i] = true; 1689 info->deepest = i + 1; 1690 return true; 1691 } 1692 else if (!REAL_VALUE_NEGATIVE (res)) 1693 { 1694 remainder = res; 1695 info->factors[i] = true; 1696 info->num_mults++; 1697 } 1698 else 1699 info->factors[i] = false; 1700 1701 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf); 1702 } 1703 return false; 1704} 1705 1706/* Return the tree corresponding to FN being applied 1707 to ARG N times at GSI and LOC. 1708 Look up previous results from CACHE if need be. 1709 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */ 1710 1711static tree 1712get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi, 1713 tree fn, location_t loc, tree *cache) 1714{ 1715 tree res = cache[n]; 1716 if (!res) 1717 { 1718 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache); 1719 res = build_and_insert_call (gsi, loc, fn, prev); 1720 cache[n] = res; 1721 } 1722 1723 return res; 1724} 1725 1726/* Print to STREAM the repeated application of function FNAME to ARG 1727 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print: 1728 "foo (foo (x))". */ 1729 1730static void 1731print_nested_fn (FILE* stream, const char *fname, const char* arg, 1732 unsigned int n) 1733{ 1734 if (n == 0) 1735 fprintf (stream, "%s", arg); 1736 else 1737 { 1738 fprintf (stream, "%s (", fname); 1739 print_nested_fn (stream, fname, arg, n - 1); 1740 fprintf (stream, ")"); 1741 } 1742} 1743 1744/* Print to STREAM the fractional sequence of sqrt chains 1745 applied to ARG, described by INFO. Used for the dump file. */ 1746 1747static void 1748dump_fractional_sqrt_sequence (FILE *stream, const char *arg, 1749 struct pow_synth_sqrt_info *info) 1750{ 1751 for (unsigned int i = 0; i < info->deepest; i++) 1752 { 1753 bool is_set = info->factors[i]; 1754 if (is_set) 1755 { 1756 print_nested_fn (stream, "sqrt", arg, i + 1); 1757 if (i != info->deepest - 1) 1758 fprintf (stream, " * "); 1759 } 1760 } 1761} 1762 1763/* Print to STREAM a representation of raising ARG to an integer 1764 power N. Used for the dump file. */ 1765 1766static void 1767dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n) 1768{ 1769 if (n > 1) 1770 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n); 1771 else if (n == 1) 1772 fprintf (stream, "%s", arg); 1773} 1774 1775/* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of 1776 square roots. Place at GSI and LOC. Limit the maximum depth 1777 of the sqrt chains to MAX_DEPTH. Return the tree holding the 1778 result of the expanded sequence or NULL_TREE if the expansion failed. 1779 1780 This routine assumes that ARG1 is a real number with a fractional part 1781 (the integer exponent case will have been handled earlier in 1782 gimple_expand_builtin_pow). 1783 1784 For ARG1 > 0.0: 1785 * For ARG1 composed of a whole part WHOLE_PART and a fractional part 1786 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and 1787 FRAC_PART == ARG1 - WHOLE_PART: 1788 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where 1789 POW (ARG0, FRAC_PART) is expanded as a product of square root chains 1790 if it can be expressed as such, that is if FRAC_PART satisfies: 1791 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i)) 1792 where integer a[i] is either 0 or 1. 1793 1794 Example: 1795 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625) 1796 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x))) 1797 1798 For ARG1 < 0.0 there are two approaches: 1799 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1) 1800 is calculated as above. 1801 1802 Example: 1803 POW (x, -5.625) == 1.0 / POW (x, 5.625) 1804 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x)))) 1805 1806 * (B) : WHOLE_PART := - ceil (abs (ARG1)) 1807 FRAC_PART := ARG1 - WHOLE_PART 1808 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART). 1809 Example: 1810 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6) 1811 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6)) 1812 1813 For ARG1 < 0.0 we choose between (A) and (B) depending on 1814 how many multiplications we'd have to do. 1815 So, for the example in (B): POW (x, -5.875), if we were to 1816 follow algorithm (A) we would produce: 1817 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X))) 1818 which contains more multiplications than approach (B). 1819 1820 Hopefully, this approach will eliminate potentially expensive POW library 1821 calls when unsafe floating point math is enabled and allow the compiler to 1822 further optimise the multiplies, square roots and divides produced by this 1823 function. */ 1824 1825static tree 1826expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc, 1827 tree arg0, tree arg1, HOST_WIDE_INT max_depth) 1828{ 1829 tree type = TREE_TYPE (arg0); 1830 machine_mode mode = TYPE_MODE (type); 1831 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); 1832 bool one_over = true; 1833 1834 if (!sqrtfn) 1835 return NULL_TREE; 1836 1837 if (TREE_CODE (arg1) != REAL_CST) 1838 return NULL_TREE; 1839 1840 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1); 1841 1842 gcc_assert (max_depth > 0); 1843 tree *cache = XALLOCAVEC (tree, max_depth + 1); 1844 1845 struct pow_synth_sqrt_info synth_info; 1846 synth_info.factors = XALLOCAVEC (bool, max_depth + 1); 1847 synth_info.deepest = 0; 1848 synth_info.num_mults = 0; 1849 1850 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init); 1851 REAL_VALUE_TYPE exp = real_value_abs (&exp_init); 1852 1853 /* The whole and fractional parts of exp. */ 1854 REAL_VALUE_TYPE whole_part; 1855 REAL_VALUE_TYPE frac_part; 1856 1857 real_floor (&whole_part, mode, &exp); 1858 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part); 1859 1860 1861 REAL_VALUE_TYPE ceil_whole = dconst0; 1862 REAL_VALUE_TYPE ceil_fract = dconst0; 1863 1864 if (neg_exp) 1865 { 1866 real_ceil (&ceil_whole, mode, &exp); 1867 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp); 1868 } 1869 1870 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info)) 1871 return NULL_TREE; 1872 1873 /* Check whether it's more profitable to not use 1.0 / ... */ 1874 if (neg_exp) 1875 { 1876 struct pow_synth_sqrt_info alt_synth_info; 1877 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1); 1878 alt_synth_info.deepest = 0; 1879 alt_synth_info.num_mults = 0; 1880 1881 if (representable_as_half_series_p (ceil_fract, max_depth, 1882 &alt_synth_info) 1883 && alt_synth_info.deepest <= synth_info.deepest 1884 && alt_synth_info.num_mults < synth_info.num_mults) 1885 { 1886 whole_part = ceil_whole; 1887 frac_part = ceil_fract; 1888 synth_info.deepest = alt_synth_info.deepest; 1889 synth_info.num_mults = alt_synth_info.num_mults; 1890 memcpy (synth_info.factors, alt_synth_info.factors, 1891 (max_depth + 1) * sizeof (bool)); 1892 one_over = false; 1893 } 1894 } 1895 1896 HOST_WIDE_INT n = real_to_integer (&whole_part); 1897 REAL_VALUE_TYPE cint; 1898 real_from_integer (&cint, VOIDmode, n, SIGNED); 1899 1900 if (!real_identical (&whole_part, &cint)) 1901 return NULL_TREE; 1902 1903 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS) 1904 return NULL_TREE; 1905 1906 memset (cache, 0, (max_depth + 1) * sizeof (tree)); 1907 1908 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0; 1909 1910 /* Calculate the integer part of the exponent. */ 1911 if (n > 1) 1912 { 1913 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n); 1914 if (!integer_res) 1915 return NULL_TREE; 1916 } 1917 1918 if (dump_file) 1919 { 1920 char string[64]; 1921 1922 real_to_decimal (string, &exp_init, sizeof (string), 0, 1); 1923 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string); 1924 1925 if (neg_exp) 1926 { 1927 if (one_over) 1928 { 1929 fprintf (dump_file, "1.0 / ("); 1930 dump_integer_part (dump_file, "x", n); 1931 if (n > 0) 1932 fprintf (dump_file, " * "); 1933 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); 1934 fprintf (dump_file, ")"); 1935 } 1936 else 1937 { 1938 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); 1939 fprintf (dump_file, " / ("); 1940 dump_integer_part (dump_file, "x", n); 1941 fprintf (dump_file, ")"); 1942 } 1943 } 1944 else 1945 { 1946 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info); 1947 if (n > 0) 1948 fprintf (dump_file, " * "); 1949 dump_integer_part (dump_file, "x", n); 1950 } 1951 1952 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest); 1953 } 1954 1955 1956 tree fract_res = NULL_TREE; 1957 cache[0] = arg0; 1958 1959 /* Calculate the fractional part of the exponent. */ 1960 for (unsigned i = 0; i < synth_info.deepest; i++) 1961 { 1962 if (synth_info.factors[i]) 1963 { 1964 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache); 1965 1966 if (!fract_res) 1967 fract_res = sqrt_chain; 1968 1969 else 1970 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, 1971 fract_res, sqrt_chain); 1972 } 1973 } 1974 1975 tree res = NULL_TREE; 1976 1977 if (neg_exp) 1978 { 1979 if (one_over) 1980 { 1981 if (n > 0) 1982 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, 1983 fract_res, integer_res); 1984 else 1985 res = fract_res; 1986 1987 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR, 1988 build_real (type, dconst1), res); 1989 } 1990 else 1991 { 1992 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, 1993 fract_res, integer_res); 1994 } 1995 } 1996 else 1997 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, 1998 fract_res, integer_res); 1999 return res; 2000} 2001 2002/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI 2003 with location info LOC. If possible, create an equivalent and 2004 less expensive sequence of statements prior to GSI, and return an 2005 expession holding the result. */ 2006 2007static tree 2008gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc, 2009 tree arg0, tree arg1) 2010{ 2011 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6; 2012 REAL_VALUE_TYPE c2, dconst3; 2013 HOST_WIDE_INT n; 2014 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x; 2015 machine_mode mode; 2016 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi)); 2017 bool hw_sqrt_exists, c_is_int, c2_is_int; 2018 2019 dconst1_4 = dconst1; 2020 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2); 2021 2022 /* If the exponent isn't a constant, there's nothing of interest 2023 to be done. */ 2024 if (TREE_CODE (arg1) != REAL_CST) 2025 return NULL_TREE; 2026 2027 /* Don't perform the operation if flag_signaling_nans is on 2028 and the operand is a signaling NaN. */ 2029 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1))) 2030 && ((TREE_CODE (arg0) == REAL_CST 2031 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0))) 2032 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))) 2033 return NULL_TREE; 2034 2035 /* If the exponent is equivalent to an integer, expand to an optimal 2036 multiplication sequence when profitable. */ 2037 c = TREE_REAL_CST (arg1); 2038 n = real_to_integer (&c); 2039 real_from_integer (&cint, VOIDmode, n, SIGNED); 2040 c_is_int = real_identical (&c, &cint); 2041 2042 if (c_is_int 2043 && ((n >= -1 && n <= 2) 2044 || (flag_unsafe_math_optimizations 2045 && speed_p 2046 && powi_cost (n) <= POWI_MAX_MULTS))) 2047 return gimple_expand_builtin_powi (gsi, loc, arg0, n); 2048 2049 /* Attempt various optimizations using sqrt and cbrt. */ 2050 type = TREE_TYPE (arg0); 2051 mode = TYPE_MODE (type); 2052 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); 2053 2054 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe 2055 unless signed zeros must be maintained. pow(-0,0.5) = +0, while 2056 sqrt(-0) = -0. */ 2057 if (sqrtfn 2058 && real_equal (&c, &dconsthalf) 2059 && !HONOR_SIGNED_ZEROS (mode)) 2060 return build_and_insert_call (gsi, loc, sqrtfn, arg0); 2061 2062 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing; 2063 2064 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math 2065 optimizations since 1./3. is not exactly representable. If x 2066 is negative and finite, the correct value of pow(x,1./3.) is 2067 a NaN with the "invalid" exception raised, because the value 2068 of 1./3. actually has an even denominator. The correct value 2069 of cbrt(x) is a negative real value. */ 2070 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT); 2071 dconst1_3 = real_value_truncate (mode, dconst_third ()); 2072 2073 if (flag_unsafe_math_optimizations 2074 && cbrtfn 2075 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) 2076 && real_equal (&c, &dconst1_3)) 2077 return build_and_insert_call (gsi, loc, cbrtfn, arg0); 2078 2079 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization 2080 if we don't have a hardware sqrt insn. */ 2081 dconst1_6 = dconst1_3; 2082 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1); 2083 2084 if (flag_unsafe_math_optimizations 2085 && sqrtfn 2086 && cbrtfn 2087 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) 2088 && speed_p 2089 && hw_sqrt_exists 2090 && real_equal (&c, &dconst1_6)) 2091 { 2092 /* sqrt(x) */ 2093 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0); 2094 2095 /* cbrt(sqrt(x)) */ 2096 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0); 2097 } 2098 2099 2100 /* Attempt to expand the POW as a product of square root chains. 2101 Expand the 0.25 case even when otpimising for size. */ 2102 if (flag_unsafe_math_optimizations 2103 && sqrtfn 2104 && hw_sqrt_exists 2105 && (speed_p || real_equal (&c, &dconst1_4)) 2106 && !HONOR_SIGNED_ZEROS (mode)) 2107 { 2108 unsigned int max_depth = speed_p 2109 ? param_max_pow_sqrt_depth 2110 : 2; 2111 2112 tree expand_with_sqrts 2113 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth); 2114 2115 if (expand_with_sqrts) 2116 return expand_with_sqrts; 2117 } 2118 2119 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2); 2120 n = real_to_integer (&c2); 2121 real_from_integer (&cint, VOIDmode, n, SIGNED); 2122 c2_is_int = real_identical (&c2, &cint); 2123 2124 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into 2125 2126 powi(x, n/3) * powi(cbrt(x), n%3), n > 0; 2127 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0. 2128 2129 Do not calculate the first factor when n/3 = 0. As cbrt(x) is 2130 different from pow(x, 1./3.) due to rounding and behavior with 2131 negative x, we need to constrain this transformation to unsafe 2132 math and positive x or finite math. */ 2133 real_from_integer (&dconst3, VOIDmode, 3, SIGNED); 2134 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3); 2135 real_round (&c2, mode, &c2); 2136 n = real_to_integer (&c2); 2137 real_from_integer (&cint, VOIDmode, n, SIGNED); 2138 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3); 2139 real_convert (&c2, mode, &c2); 2140 2141 if (flag_unsafe_math_optimizations 2142 && cbrtfn 2143 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0)) 2144 && real_identical (&c2, &c) 2145 && !c2_is_int 2146 && optimize_function_for_speed_p (cfun) 2147 && powi_cost (n / 3) <= POWI_MAX_MULTS) 2148 { 2149 tree powi_x_ndiv3 = NULL_TREE; 2150 2151 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not 2152 possible or profitable, give up. Skip the degenerate case when 2153 abs(n) < 3, where the result is always 1. */ 2154 if (absu_hwi (n) >= 3) 2155 { 2156 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0, 2157 abs_hwi (n / 3)); 2158 if (!powi_x_ndiv3) 2159 return NULL_TREE; 2160 } 2161 2162 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi 2163 as that creates an unnecessary variable. Instead, just produce 2164 either cbrt(x) or cbrt(x) * cbrt(x). */ 2165 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0); 2166 2167 if (absu_hwi (n) % 3 == 1) 2168 powi_cbrt_x = cbrt_x; 2169 else 2170 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, 2171 cbrt_x, cbrt_x); 2172 2173 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */ 2174 if (absu_hwi (n) < 3) 2175 result = powi_cbrt_x; 2176 else 2177 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR, 2178 powi_x_ndiv3, powi_cbrt_x); 2179 2180 /* If n is negative, reciprocate the result. */ 2181 if (n < 0) 2182 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR, 2183 build_real (type, dconst1), result); 2184 2185 return result; 2186 } 2187 2188 /* No optimizations succeeded. */ 2189 return NULL_TREE; 2190} 2191 2192/* ARG is the argument to a cabs builtin call in GSI with location info 2193 LOC. Create a sequence of statements prior to GSI that calculates 2194 sqrt(R*R + I*I), where R and I are the real and imaginary components 2195 of ARG, respectively. Return an expression holding the result. */ 2196 2197static tree 2198gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg) 2199{ 2200 tree real_part, imag_part, addend1, addend2, sum, result; 2201 tree type = TREE_TYPE (TREE_TYPE (arg)); 2202 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); 2203 machine_mode mode = TYPE_MODE (type); 2204 2205 if (!flag_unsafe_math_optimizations 2206 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) 2207 || !sqrtfn 2208 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) 2209 return NULL_TREE; 2210 2211 real_part = build_and_insert_ref (gsi, loc, type, "cabs", 2212 REALPART_EXPR, arg); 2213 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, 2214 real_part, real_part); 2215 imag_part = build_and_insert_ref (gsi, loc, type, "cabs", 2216 IMAGPART_EXPR, arg); 2217 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, 2218 imag_part, imag_part); 2219 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2); 2220 result = build_and_insert_call (gsi, loc, sqrtfn, sum); 2221 2222 return result; 2223} 2224 2225/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 2226 on the SSA_NAME argument of each of them. Also expand powi(x,n) into 2227 an optimal number of multiplies, when n is a constant. */ 2228 2229namespace { 2230 2231const pass_data pass_data_cse_sincos = 2232{ 2233 GIMPLE_PASS, /* type */ 2234 "sincos", /* name */ 2235 OPTGROUP_NONE, /* optinfo_flags */ 2236 TV_TREE_SINCOS, /* tv_id */ 2237 PROP_ssa, /* properties_required */ 2238 PROP_gimple_opt_math, /* properties_provided */ 2239 0, /* properties_destroyed */ 2240 0, /* todo_flags_start */ 2241 TODO_update_ssa, /* todo_flags_finish */ 2242}; 2243 2244class pass_cse_sincos : public gimple_opt_pass 2245{ 2246public: 2247 pass_cse_sincos (gcc::context *ctxt) 2248 : gimple_opt_pass (pass_data_cse_sincos, ctxt) 2249 {} 2250 2251 /* opt_pass methods: */ 2252 virtual bool gate (function *) 2253 { 2254 /* We no longer require either sincos or cexp, since powi expansion 2255 piggybacks on this pass. */ 2256 return optimize; 2257 } 2258 2259 virtual unsigned int execute (function *); 2260 2261}; // class pass_cse_sincos 2262 2263unsigned int 2264pass_cse_sincos::execute (function *fun) 2265{ 2266 basic_block bb; 2267 bool cfg_changed = false; 2268 2269 calculate_dominance_info (CDI_DOMINATORS); 2270 memset (&sincos_stats, 0, sizeof (sincos_stats)); 2271 2272 FOR_EACH_BB_FN (bb, fun) 2273 { 2274 gimple_stmt_iterator gsi; 2275 bool cleanup_eh = false; 2276 2277 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 2278 { 2279 gimple *stmt = gsi_stmt (gsi); 2280 2281 /* Only the last stmt in a bb could throw, no need to call 2282 gimple_purge_dead_eh_edges if we change something in the middle 2283 of a basic block. */ 2284 cleanup_eh = false; 2285 2286 if (is_gimple_call (stmt) 2287 && gimple_call_lhs (stmt)) 2288 { 2289 tree arg, arg0, arg1, result; 2290 HOST_WIDE_INT n; 2291 location_t loc; 2292 2293 switch (gimple_call_combined_fn (stmt)) 2294 { 2295 CASE_CFN_COS: 2296 CASE_CFN_SIN: 2297 CASE_CFN_CEXPI: 2298 arg = gimple_call_arg (stmt, 0); 2299 /* Make sure we have either sincos or cexp. */ 2300 if (!targetm.libc_has_function (function_c99_math_complex, 2301 TREE_TYPE (arg)) 2302 && !targetm.libc_has_function (function_sincos, 2303 TREE_TYPE (arg))) 2304 break; 2305 2306 if (TREE_CODE (arg) == SSA_NAME) 2307 cfg_changed |= execute_cse_sincos_1 (arg); 2308 break; 2309 2310 CASE_CFN_POW: 2311 arg0 = gimple_call_arg (stmt, 0); 2312 arg1 = gimple_call_arg (stmt, 1); 2313 2314 loc = gimple_location (stmt); 2315 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1); 2316 2317 if (result) 2318 { 2319 tree lhs = gimple_get_lhs (stmt); 2320 gassign *new_stmt = gimple_build_assign (lhs, result); 2321 gimple_set_location (new_stmt, loc); 2322 unlink_stmt_vdef (stmt); 2323 gsi_replace (&gsi, new_stmt, true); 2324 cleanup_eh = true; 2325 if (gimple_vdef (stmt)) 2326 release_ssa_name (gimple_vdef (stmt)); 2327 } 2328 break; 2329 2330 CASE_CFN_POWI: 2331 arg0 = gimple_call_arg (stmt, 0); 2332 arg1 = gimple_call_arg (stmt, 1); 2333 loc = gimple_location (stmt); 2334 2335 if (real_minus_onep (arg0)) 2336 { 2337 tree t0, t1, cond, one, minus_one; 2338 gassign *stmt; 2339 2340 t0 = TREE_TYPE (arg0); 2341 t1 = TREE_TYPE (arg1); 2342 one = build_real (t0, dconst1); 2343 minus_one = build_real (t0, dconstm1); 2344 2345 cond = make_temp_ssa_name (t1, NULL, "powi_cond"); 2346 stmt = gimple_build_assign (cond, BIT_AND_EXPR, 2347 arg1, build_int_cst (t1, 1)); 2348 gimple_set_location (stmt, loc); 2349 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 2350 2351 result = make_temp_ssa_name (t0, NULL, "powi"); 2352 stmt = gimple_build_assign (result, COND_EXPR, cond, 2353 minus_one, one); 2354 gimple_set_location (stmt, loc); 2355 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 2356 } 2357 else 2358 { 2359 if (!tree_fits_shwi_p (arg1)) 2360 break; 2361 2362 n = tree_to_shwi (arg1); 2363 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n); 2364 } 2365 2366 if (result) 2367 { 2368 tree lhs = gimple_get_lhs (stmt); 2369 gassign *new_stmt = gimple_build_assign (lhs, result); 2370 gimple_set_location (new_stmt, loc); 2371 unlink_stmt_vdef (stmt); 2372 gsi_replace (&gsi, new_stmt, true); 2373 cleanup_eh = true; 2374 if (gimple_vdef (stmt)) 2375 release_ssa_name (gimple_vdef (stmt)); 2376 } 2377 break; 2378 2379 CASE_CFN_CABS: 2380 arg0 = gimple_call_arg (stmt, 0); 2381 loc = gimple_location (stmt); 2382 result = gimple_expand_builtin_cabs (&gsi, loc, arg0); 2383 2384 if (result) 2385 { 2386 tree lhs = gimple_get_lhs (stmt); 2387 gassign *new_stmt = gimple_build_assign (lhs, result); 2388 gimple_set_location (new_stmt, loc); 2389 unlink_stmt_vdef (stmt); 2390 gsi_replace (&gsi, new_stmt, true); 2391 cleanup_eh = true; 2392 if (gimple_vdef (stmt)) 2393 release_ssa_name (gimple_vdef (stmt)); 2394 } 2395 break; 2396 2397 default:; 2398 } 2399 } 2400 } 2401 if (cleanup_eh) 2402 cfg_changed |= gimple_purge_dead_eh_edges (bb); 2403 } 2404 2405 statistics_counter_event (fun, "sincos statements inserted", 2406 sincos_stats.inserted); 2407 statistics_counter_event (fun, "conv statements removed", 2408 sincos_stats.conv_removed); 2409 2410 return cfg_changed ? TODO_cleanup_cfg : 0; 2411} 2412 2413} // anon namespace 2414 2415gimple_opt_pass * 2416make_pass_cse_sincos (gcc::context *ctxt) 2417{ 2418 return new pass_cse_sincos (ctxt); 2419} 2420 2421/* Return true if stmt is a type conversion operation that can be stripped 2422 when used in a widening multiply operation. */ 2423static bool 2424widening_mult_conversion_strippable_p (tree result_type, gimple *stmt) 2425{ 2426 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 2427 2428 if (TREE_CODE (result_type) == INTEGER_TYPE) 2429 { 2430 tree op_type; 2431 tree inner_op_type; 2432 2433 if (!CONVERT_EXPR_CODE_P (rhs_code)) 2434 return false; 2435 2436 op_type = TREE_TYPE (gimple_assign_lhs (stmt)); 2437 2438 /* If the type of OP has the same precision as the result, then 2439 we can strip this conversion. The multiply operation will be 2440 selected to create the correct extension as a by-product. */ 2441 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type)) 2442 return true; 2443 2444 /* We can also strip a conversion if it preserves the signed-ness of 2445 the operation and doesn't narrow the range. */ 2446 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); 2447 2448 /* If the inner-most type is unsigned, then we can strip any 2449 intermediate widening operation. If it's signed, then the 2450 intermediate widening operation must also be signed. */ 2451 if ((TYPE_UNSIGNED (inner_op_type) 2452 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type)) 2453 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type)) 2454 return true; 2455 2456 return false; 2457 } 2458 2459 return rhs_code == FIXED_CONVERT_EXPR; 2460} 2461 2462/* Return true if RHS is a suitable operand for a widening multiplication, 2463 assuming a target type of TYPE. 2464 There are two cases: 2465 2466 - RHS makes some value at least twice as wide. Store that value 2467 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT. 2468 2469 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so, 2470 but leave *TYPE_OUT untouched. */ 2471 2472static bool 2473is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out, 2474 tree *new_rhs_out) 2475{ 2476 gimple *stmt; 2477 tree type1, rhs1; 2478 2479 if (TREE_CODE (rhs) == SSA_NAME) 2480 { 2481 stmt = SSA_NAME_DEF_STMT (rhs); 2482 if (is_gimple_assign (stmt)) 2483 { 2484 if (! widening_mult_conversion_strippable_p (type, stmt)) 2485 rhs1 = rhs; 2486 else 2487 { 2488 rhs1 = gimple_assign_rhs1 (stmt); 2489 2490 if (TREE_CODE (rhs1) == INTEGER_CST) 2491 { 2492 *new_rhs_out = rhs1; 2493 *type_out = NULL; 2494 return true; 2495 } 2496 } 2497 } 2498 else 2499 rhs1 = rhs; 2500 2501 type1 = TREE_TYPE (rhs1); 2502 2503 if (TREE_CODE (type1) != TREE_CODE (type) 2504 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type)) 2505 return false; 2506 2507 *new_rhs_out = rhs1; 2508 *type_out = type1; 2509 return true; 2510 } 2511 2512 if (TREE_CODE (rhs) == INTEGER_CST) 2513 { 2514 *new_rhs_out = rhs; 2515 *type_out = NULL; 2516 return true; 2517 } 2518 2519 return false; 2520} 2521 2522/* Return true if STMT performs a widening multiplication, assuming the 2523 output type is TYPE. If so, store the unwidened types of the operands 2524 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and 2525 *RHS2_OUT such that converting those operands to types *TYPE1_OUT 2526 and *TYPE2_OUT would give the operands of the multiplication. */ 2527 2528static bool 2529is_widening_mult_p (gimple *stmt, 2530 tree *type1_out, tree *rhs1_out, 2531 tree *type2_out, tree *rhs2_out) 2532{ 2533 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); 2534 2535 if (TREE_CODE (type) == INTEGER_TYPE) 2536 { 2537 if (TYPE_OVERFLOW_TRAPS (type)) 2538 return false; 2539 } 2540 else if (TREE_CODE (type) != FIXED_POINT_TYPE) 2541 return false; 2542 2543 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out, 2544 rhs1_out)) 2545 return false; 2546 2547 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out, 2548 rhs2_out)) 2549 return false; 2550 2551 if (*type1_out == NULL) 2552 { 2553 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out)) 2554 return false; 2555 *type1_out = *type2_out; 2556 } 2557 2558 if (*type2_out == NULL) 2559 { 2560 if (!int_fits_type_p (*rhs2_out, *type1_out)) 2561 return false; 2562 *type2_out = *type1_out; 2563 } 2564 2565 /* Ensure that the larger of the two operands comes first. */ 2566 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out)) 2567 { 2568 std::swap (*type1_out, *type2_out); 2569 std::swap (*rhs1_out, *rhs2_out); 2570 } 2571 2572 return true; 2573} 2574 2575/* Check to see if the CALL statement is an invocation of copysign 2576 with 1. being the first argument. */ 2577static bool 2578is_copysign_call_with_1 (gimple *call) 2579{ 2580 gcall *c = dyn_cast <gcall *> (call); 2581 if (! c) 2582 return false; 2583 2584 enum combined_fn code = gimple_call_combined_fn (c); 2585 2586 if (code == CFN_LAST) 2587 return false; 2588 2589 if (builtin_fn_p (code)) 2590 { 2591 switch (as_builtin_fn (code)) 2592 { 2593 CASE_FLT_FN (BUILT_IN_COPYSIGN): 2594 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN): 2595 return real_onep (gimple_call_arg (c, 0)); 2596 default: 2597 return false; 2598 } 2599 } 2600 2601 if (internal_fn_p (code)) 2602 { 2603 switch (as_internal_fn (code)) 2604 { 2605 case IFN_COPYSIGN: 2606 return real_onep (gimple_call_arg (c, 0)); 2607 default: 2608 return false; 2609 } 2610 } 2611 2612 return false; 2613} 2614 2615/* Try to expand the pattern x * copysign (1, y) into xorsign (x, y). 2616 This only happens when the xorsign optab is defined, if the 2617 pattern is not a xorsign pattern or if expansion fails FALSE is 2618 returned, otherwise TRUE is returned. */ 2619static bool 2620convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi) 2621{ 2622 tree treeop0, treeop1, lhs, type; 2623 location_t loc = gimple_location (stmt); 2624 lhs = gimple_assign_lhs (stmt); 2625 treeop0 = gimple_assign_rhs1 (stmt); 2626 treeop1 = gimple_assign_rhs2 (stmt); 2627 type = TREE_TYPE (lhs); 2628 machine_mode mode = TYPE_MODE (type); 2629 2630 if (HONOR_SNANS (type)) 2631 return false; 2632 2633 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME) 2634 { 2635 gimple *call0 = SSA_NAME_DEF_STMT (treeop0); 2636 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0)) 2637 { 2638 call0 = SSA_NAME_DEF_STMT (treeop1); 2639 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0)) 2640 return false; 2641 2642 treeop1 = treeop0; 2643 } 2644 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing) 2645 return false; 2646 2647 gcall *c = as_a<gcall*> (call0); 2648 treeop0 = gimple_call_arg (c, 1); 2649 2650 gcall *call_stmt 2651 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0); 2652 gimple_set_lhs (call_stmt, lhs); 2653 gimple_set_location (call_stmt, loc); 2654 gsi_replace (gsi, call_stmt, true); 2655 return true; 2656 } 2657 2658 return false; 2659} 2660 2661/* Process a single gimple statement STMT, which has a MULT_EXPR as 2662 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return 2663 value is true iff we converted the statement. */ 2664 2665static bool 2666convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) 2667{ 2668 tree lhs, rhs1, rhs2, type, type1, type2; 2669 enum insn_code handler; 2670 scalar_int_mode to_mode, from_mode, actual_mode; 2671 optab op; 2672 int actual_precision; 2673 location_t loc = gimple_location (stmt); 2674 bool from_unsigned1, from_unsigned2; 2675 2676 lhs = gimple_assign_lhs (stmt); 2677 type = TREE_TYPE (lhs); 2678 if (TREE_CODE (type) != INTEGER_TYPE) 2679 return false; 2680 2681 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2)) 2682 return false; 2683 2684 to_mode = SCALAR_INT_TYPE_MODE (type); 2685 from_mode = SCALAR_INT_TYPE_MODE (type1); 2686 if (to_mode == from_mode) 2687 return false; 2688 2689 from_unsigned1 = TYPE_UNSIGNED (type1); 2690 from_unsigned2 = TYPE_UNSIGNED (type2); 2691 2692 if (from_unsigned1 && from_unsigned2) 2693 op = umul_widen_optab; 2694 else if (!from_unsigned1 && !from_unsigned2) 2695 op = smul_widen_optab; 2696 else 2697 op = usmul_widen_optab; 2698 2699 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode, 2700 &actual_mode); 2701 2702 if (handler == CODE_FOR_nothing) 2703 { 2704 if (op != smul_widen_optab) 2705 { 2706 /* We can use a signed multiply with unsigned types as long as 2707 there is a wider mode to use, or it is the smaller of the two 2708 types that is unsigned. Note that type1 >= type2, always. */ 2709 if ((TYPE_UNSIGNED (type1) 2710 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) 2711 || (TYPE_UNSIGNED (type2) 2712 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) 2713 { 2714 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) 2715 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) 2716 return false; 2717 } 2718 2719 op = smul_widen_optab; 2720 handler = find_widening_optab_handler_and_mode (op, to_mode, 2721 from_mode, 2722 &actual_mode); 2723 2724 if (handler == CODE_FOR_nothing) 2725 return false; 2726 2727 from_unsigned1 = from_unsigned2 = false; 2728 } 2729 else 2730 { 2731 /* Expand can synthesize smul_widen_optab if the target 2732 supports umul_widen_optab. */ 2733 op = umul_widen_optab; 2734 handler = find_widening_optab_handler_and_mode (op, to_mode, 2735 from_mode, 2736 &actual_mode); 2737 if (handler == CODE_FOR_nothing) 2738 return false; 2739 } 2740 } 2741 2742 /* Ensure that the inputs to the handler are in the correct precison 2743 for the opcode. This will be the full mode size. */ 2744 actual_precision = GET_MODE_PRECISION (actual_mode); 2745 if (2 * actual_precision > TYPE_PRECISION (type)) 2746 return false; 2747 if (actual_precision != TYPE_PRECISION (type1) 2748 || from_unsigned1 != TYPE_UNSIGNED (type1)) 2749 rhs1 = build_and_insert_cast (gsi, loc, 2750 build_nonstandard_integer_type 2751 (actual_precision, from_unsigned1), rhs1); 2752 if (actual_precision != TYPE_PRECISION (type2) 2753 || from_unsigned2 != TYPE_UNSIGNED (type2)) 2754 rhs2 = build_and_insert_cast (gsi, loc, 2755 build_nonstandard_integer_type 2756 (actual_precision, from_unsigned2), rhs2); 2757 2758 /* Handle constants. */ 2759 if (TREE_CODE (rhs1) == INTEGER_CST) 2760 rhs1 = fold_convert (type1, rhs1); 2761 if (TREE_CODE (rhs2) == INTEGER_CST) 2762 rhs2 = fold_convert (type2, rhs2); 2763 2764 gimple_assign_set_rhs1 (stmt, rhs1); 2765 gimple_assign_set_rhs2 (stmt, rhs2); 2766 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR); 2767 update_stmt (stmt); 2768 widen_mul_stats.widen_mults_inserted++; 2769 return true; 2770} 2771 2772/* Process a single gimple statement STMT, which is found at the 2773 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its 2774 rhs (given by CODE), and try to convert it into a 2775 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value 2776 is true iff we converted the statement. */ 2777 2778static bool 2779convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt, 2780 enum tree_code code) 2781{ 2782 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL; 2783 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt; 2784 tree type, type1, type2, optype; 2785 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; 2786 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; 2787 optab this_optab; 2788 enum tree_code wmult_code; 2789 enum insn_code handler; 2790 scalar_mode to_mode, from_mode, actual_mode; 2791 location_t loc = gimple_location (stmt); 2792 int actual_precision; 2793 bool from_unsigned1, from_unsigned2; 2794 2795 lhs = gimple_assign_lhs (stmt); 2796 type = TREE_TYPE (lhs); 2797 if (TREE_CODE (type) != INTEGER_TYPE 2798 && TREE_CODE (type) != FIXED_POINT_TYPE) 2799 return false; 2800 2801 if (code == MINUS_EXPR) 2802 wmult_code = WIDEN_MULT_MINUS_EXPR; 2803 else 2804 wmult_code = WIDEN_MULT_PLUS_EXPR; 2805 2806 rhs1 = gimple_assign_rhs1 (stmt); 2807 rhs2 = gimple_assign_rhs2 (stmt); 2808 2809 if (TREE_CODE (rhs1) == SSA_NAME) 2810 { 2811 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); 2812 if (is_gimple_assign (rhs1_stmt)) 2813 rhs1_code = gimple_assign_rhs_code (rhs1_stmt); 2814 } 2815 2816 if (TREE_CODE (rhs2) == SSA_NAME) 2817 { 2818 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); 2819 if (is_gimple_assign (rhs2_stmt)) 2820 rhs2_code = gimple_assign_rhs_code (rhs2_stmt); 2821 } 2822 2823 /* Allow for one conversion statement between the multiply 2824 and addition/subtraction statement. If there are more than 2825 one conversions then we assume they would invalidate this 2826 transformation. If that's not the case then they should have 2827 been folded before now. */ 2828 if (CONVERT_EXPR_CODE_P (rhs1_code)) 2829 { 2830 conv1_stmt = rhs1_stmt; 2831 rhs1 = gimple_assign_rhs1 (rhs1_stmt); 2832 if (TREE_CODE (rhs1) == SSA_NAME) 2833 { 2834 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); 2835 if (is_gimple_assign (rhs1_stmt)) 2836 rhs1_code = gimple_assign_rhs_code (rhs1_stmt); 2837 } 2838 else 2839 return false; 2840 } 2841 if (CONVERT_EXPR_CODE_P (rhs2_code)) 2842 { 2843 conv2_stmt = rhs2_stmt; 2844 rhs2 = gimple_assign_rhs1 (rhs2_stmt); 2845 if (TREE_CODE (rhs2) == SSA_NAME) 2846 { 2847 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); 2848 if (is_gimple_assign (rhs2_stmt)) 2849 rhs2_code = gimple_assign_rhs_code (rhs2_stmt); 2850 } 2851 else 2852 return false; 2853 } 2854 2855 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call 2856 is_widening_mult_p, but we still need the rhs returns. 2857 2858 It might also appear that it would be sufficient to use the existing 2859 operands of the widening multiply, but that would limit the choice of 2860 multiply-and-accumulate instructions. 2861 2862 If the widened-multiplication result has more than one uses, it is 2863 probably wiser not to do the conversion. Also restrict this operation 2864 to single basic block to avoid moving the multiply to a different block 2865 with a higher execution frequency. */ 2866 if (code == PLUS_EXPR 2867 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) 2868 { 2869 if (!has_single_use (rhs1) 2870 || gimple_bb (rhs1_stmt) != gimple_bb (stmt) 2871 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, 2872 &type2, &mult_rhs2)) 2873 return false; 2874 add_rhs = rhs2; 2875 conv_stmt = conv1_stmt; 2876 } 2877 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) 2878 { 2879 if (!has_single_use (rhs2) 2880 || gimple_bb (rhs2_stmt) != gimple_bb (stmt) 2881 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, 2882 &type2, &mult_rhs2)) 2883 return false; 2884 add_rhs = rhs1; 2885 conv_stmt = conv2_stmt; 2886 } 2887 else 2888 return false; 2889 2890 to_mode = SCALAR_TYPE_MODE (type); 2891 from_mode = SCALAR_TYPE_MODE (type1); 2892 if (to_mode == from_mode) 2893 return false; 2894 2895 from_unsigned1 = TYPE_UNSIGNED (type1); 2896 from_unsigned2 = TYPE_UNSIGNED (type2); 2897 optype = type1; 2898 2899 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ 2900 if (from_unsigned1 != from_unsigned2) 2901 { 2902 if (!INTEGRAL_TYPE_P (type)) 2903 return false; 2904 /* We can use a signed multiply with unsigned types as long as 2905 there is a wider mode to use, or it is the smaller of the two 2906 types that is unsigned. Note that type1 >= type2, always. */ 2907 if ((from_unsigned1 2908 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode)) 2909 || (from_unsigned2 2910 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode))) 2911 { 2912 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode) 2913 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode)) 2914 return false; 2915 } 2916 2917 from_unsigned1 = from_unsigned2 = false; 2918 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), 2919 false); 2920 } 2921 2922 /* If there was a conversion between the multiply and addition 2923 then we need to make sure it fits a multiply-and-accumulate. 2924 The should be a single mode change which does not change the 2925 value. */ 2926 if (conv_stmt) 2927 { 2928 /* We use the original, unmodified data types for this. */ 2929 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt)); 2930 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt)); 2931 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2); 2932 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2); 2933 2934 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type)) 2935 { 2936 /* Conversion is a truncate. */ 2937 if (TYPE_PRECISION (to_type) < data_size) 2938 return false; 2939 } 2940 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)) 2941 { 2942 /* Conversion is an extend. Check it's the right sort. */ 2943 if (TYPE_UNSIGNED (from_type) != is_unsigned 2944 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size)) 2945 return false; 2946 } 2947 /* else convert is a no-op for our purposes. */ 2948 } 2949 2950 /* Verify that the machine can perform a widening multiply 2951 accumulate in this mode/signedness combination, otherwise 2952 this transformation is likely to pessimize code. */ 2953 this_optab = optab_for_tree_code (wmult_code, optype, optab_default); 2954 handler = find_widening_optab_handler_and_mode (this_optab, to_mode, 2955 from_mode, &actual_mode); 2956 2957 if (handler == CODE_FOR_nothing) 2958 return false; 2959 2960 /* Ensure that the inputs to the handler are in the correct precison 2961 for the opcode. This will be the full mode size. */ 2962 actual_precision = GET_MODE_PRECISION (actual_mode); 2963 if (actual_precision != TYPE_PRECISION (type1) 2964 || from_unsigned1 != TYPE_UNSIGNED (type1)) 2965 mult_rhs1 = build_and_insert_cast (gsi, loc, 2966 build_nonstandard_integer_type 2967 (actual_precision, from_unsigned1), 2968 mult_rhs1); 2969 if (actual_precision != TYPE_PRECISION (type2) 2970 || from_unsigned2 != TYPE_UNSIGNED (type2)) 2971 mult_rhs2 = build_and_insert_cast (gsi, loc, 2972 build_nonstandard_integer_type 2973 (actual_precision, from_unsigned2), 2974 mult_rhs2); 2975 2976 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs))) 2977 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs); 2978 2979 /* Handle constants. */ 2980 if (TREE_CODE (mult_rhs1) == INTEGER_CST) 2981 mult_rhs1 = fold_convert (type1, mult_rhs1); 2982 if (TREE_CODE (mult_rhs2) == INTEGER_CST) 2983 mult_rhs2 = fold_convert (type2, mult_rhs2); 2984 2985 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2, 2986 add_rhs); 2987 update_stmt (gsi_stmt (*gsi)); 2988 widen_mul_stats.maccs_inserted++; 2989 return true; 2990} 2991 2992/* Given a result MUL_RESULT which is a result of a multiplication of OP1 and 2993 OP2 and which we know is used in statements that can be, together with the 2994 multiplication, converted to FMAs, perform the transformation. */ 2995 2996static void 2997convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2) 2998{ 2999 tree type = TREE_TYPE (mul_result); 3000 gimple *use_stmt; 3001 imm_use_iterator imm_iter; 3002 gcall *fma_stmt; 3003 3004 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result) 3005 { 3006 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 3007 tree addop, mulop1 = op1, result = mul_result; 3008 bool negate_p = false; 3009 gimple_seq seq = NULL; 3010 3011 if (is_gimple_debug (use_stmt)) 3012 continue; 3013 3014 if (is_gimple_assign (use_stmt) 3015 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) 3016 { 3017 result = gimple_assign_lhs (use_stmt); 3018 use_operand_p use_p; 3019 gimple *neguse_stmt; 3020 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt); 3021 gsi_remove (&gsi, true); 3022 release_defs (use_stmt); 3023 3024 use_stmt = neguse_stmt; 3025 gsi = gsi_for_stmt (use_stmt); 3026 negate_p = true; 3027 } 3028 3029 tree cond, else_value, ops[3]; 3030 tree_code code; 3031 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, 3032 ops, &else_value)) 3033 gcc_unreachable (); 3034 addop = ops[0] == result ? ops[1] : ops[0]; 3035 3036 if (code == MINUS_EXPR) 3037 { 3038 if (ops[0] == result) 3039 /* a * b - c -> a * b + (-c) */ 3040 addop = gimple_build (&seq, NEGATE_EXPR, type, addop); 3041 else 3042 /* a - b * c -> (-b) * c + a */ 3043 negate_p = !negate_p; 3044 } 3045 3046 if (negate_p) 3047 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1); 3048 3049 if (seq) 3050 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); 3051 3052 if (cond) 3053 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1, 3054 op2, addop, else_value); 3055 else 3056 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop); 3057 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt)); 3058 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun, 3059 use_stmt)); 3060 gsi_replace (&gsi, fma_stmt, true); 3061 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS 3062 regardless of where the negation occurs. */ 3063 gimple *orig_stmt = gsi_stmt (gsi); 3064 if (fold_stmt (&gsi, follow_all_ssa_edges)) 3065 { 3066 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi))) 3067 gcc_unreachable (); 3068 update_stmt (gsi_stmt (gsi)); 3069 } 3070 3071 if (dump_file && (dump_flags & TDF_DETAILS)) 3072 { 3073 fprintf (dump_file, "Generated FMA "); 3074 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE); 3075 fprintf (dump_file, "\n"); 3076 } 3077 3078 /* If the FMA result is negated in a single use, fold the negation 3079 too. */ 3080 orig_stmt = gsi_stmt (gsi); 3081 use_operand_p use_p; 3082 gimple *neg_stmt; 3083 if (is_gimple_call (orig_stmt) 3084 && gimple_call_internal_p (orig_stmt) 3085 && gimple_call_lhs (orig_stmt) 3086 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME 3087 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt) 3088 && is_gimple_assign (neg_stmt) 3089 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR 3090 && !stmt_could_throw_p (cfun, neg_stmt)) 3091 { 3092 gsi = gsi_for_stmt (neg_stmt); 3093 if (fold_stmt (&gsi, follow_all_ssa_edges)) 3094 { 3095 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi))) 3096 gcc_unreachable (); 3097 update_stmt (gsi_stmt (gsi)); 3098 if (dump_file && (dump_flags & TDF_DETAILS)) 3099 { 3100 fprintf (dump_file, "Folded FMA negation "); 3101 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE); 3102 fprintf (dump_file, "\n"); 3103 } 3104 } 3105 } 3106 3107 widen_mul_stats.fmas_inserted++; 3108 } 3109} 3110 3111/* Data necessary to perform the actual transformation from a multiplication 3112 and an addition to an FMA after decision is taken it should be done and to 3113 then delete the multiplication statement from the function IL. */ 3114 3115struct fma_transformation_info 3116{ 3117 gimple *mul_stmt; 3118 tree mul_result; 3119 tree op1; 3120 tree op2; 3121}; 3122 3123/* Structure containing the current state of FMA deferring, i.e. whether we are 3124 deferring, whether to continue deferring, and all data necessary to come 3125 back and perform all deferred transformations. */ 3126 3127class fma_deferring_state 3128{ 3129public: 3130 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually 3131 do any deferring. */ 3132 3133 fma_deferring_state (bool perform_deferring) 3134 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL), 3135 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {} 3136 3137 /* List of FMA candidates for which we the transformation has been determined 3138 possible but we at this point in BB analysis we do not consider them 3139 beneficial. */ 3140 auto_vec<fma_transformation_info, 8> m_candidates; 3141 3142 /* Set of results of multiplication that are part of an already deferred FMA 3143 candidates. */ 3144 hash_set<tree> m_mul_result_set; 3145 3146 /* The PHI that supposedly feeds back result of a FMA to another over loop 3147 boundary. */ 3148 gphi *m_initial_phi; 3149 3150 /* Result of the last produced FMA candidate or NULL if there has not been 3151 one. */ 3152 tree m_last_result; 3153 3154 /* If true, deferring might still be profitable. If false, transform all 3155 candidates and no longer defer. */ 3156 bool m_deferring_p; 3157}; 3158 3159/* Transform all deferred FMA candidates and mark STATE as no longer 3160 deferring. */ 3161 3162static void 3163cancel_fma_deferring (fma_deferring_state *state) 3164{ 3165 if (!state->m_deferring_p) 3166 return; 3167 3168 for (unsigned i = 0; i < state->m_candidates.length (); i++) 3169 { 3170 if (dump_file && (dump_flags & TDF_DETAILS)) 3171 fprintf (dump_file, "Generating deferred FMA\n"); 3172 3173 const fma_transformation_info &fti = state->m_candidates[i]; 3174 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2); 3175 3176 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt); 3177 gsi_remove (&gsi, true); 3178 release_defs (fti.mul_stmt); 3179 } 3180 state->m_deferring_p = false; 3181} 3182 3183/* If OP is an SSA name defined by a PHI node, return the PHI statement. 3184 Otherwise return NULL. */ 3185 3186static gphi * 3187result_of_phi (tree op) 3188{ 3189 if (TREE_CODE (op) != SSA_NAME) 3190 return NULL; 3191 3192 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op)); 3193} 3194 3195/* After processing statements of a BB and recording STATE, return true if the 3196 initial phi is fed by the last FMA candidate result ore one such result from 3197 previously processed BBs marked in LAST_RESULT_SET. */ 3198 3199static bool 3200last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, 3201 hash_set<tree> *last_result_set) 3202{ 3203 ssa_op_iter iter; 3204 use_operand_p use; 3205 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE) 3206 { 3207 tree t = USE_FROM_PTR (use); 3208 if (t == state->m_last_result 3209 || last_result_set->contains (t)) 3210 return true; 3211 } 3212 3213 return false; 3214} 3215 3216/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 3217 with uses in additions and subtractions to form fused multiply-add 3218 operations. Returns true if successful and MUL_STMT should be removed. 3219 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional 3220 on MUL_COND, otherwise it is unconditional. 3221 3222 If STATE indicates that we are deferring FMA transformation, that means 3223 that we do not produce FMAs for basic blocks which look like: 3224 3225 <bb 6> 3226 # accumulator_111 = PHI <0.0(5), accumulator_66(6)> 3227 _65 = _14 * _16; 3228 accumulator_66 = _65 + accumulator_111; 3229 3230 or its unrolled version, i.e. with several FMA candidates that feed result 3231 of one into the addend of another. Instead, we add them to a list in STATE 3232 and if we later discover an FMA candidate that is not part of such a chain, 3233 we go back and perform all deferred past candidates. */ 3234 3235static bool 3236convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, 3237 fma_deferring_state *state, tree mul_cond = NULL_TREE) 3238{ 3239 tree mul_result = gimple_get_lhs (mul_stmt); 3240 /* If there isn't a LHS then this can't be an FMA. There can be no LHS 3241 if the statement was left just for the side-effects. */ 3242 if (!mul_result) 3243 return false; 3244 tree type = TREE_TYPE (mul_result); 3245 gimple *use_stmt, *neguse_stmt; 3246 use_operand_p use_p; 3247 imm_use_iterator imm_iter; 3248 3249 if (FLOAT_TYPE_P (type) 3250 && flag_fp_contract_mode == FP_CONTRACT_OFF) 3251 return false; 3252 3253 /* We don't want to do bitfield reduction ops. */ 3254 if (INTEGRAL_TYPE_P (type) 3255 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type))) 3256 return false; 3257 3258 /* If the target doesn't support it, don't generate it. We assume that 3259 if fma isn't available then fms, fnma or fnms are not either. */ 3260 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt)); 3261 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type)) 3262 return false; 3263 3264 /* If the multiplication has zero uses, it is kept around probably because 3265 of -fnon-call-exceptions. Don't optimize it away in that case, 3266 it is DCE job. */ 3267 if (has_zero_uses (mul_result)) 3268 return false; 3269 3270 bool check_defer 3271 = (state->m_deferring_p 3272 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)), 3273 param_avoid_fma_max_bits)); 3274 bool defer = check_defer; 3275 bool seen_negate_p = false; 3276 /* Make sure that the multiplication statement becomes dead after 3277 the transformation, thus that all uses are transformed to FMAs. 3278 This means we assume that an FMA operation has the same cost 3279 as an addition. */ 3280 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result) 3281 { 3282 tree result = mul_result; 3283 bool negate_p = false; 3284 3285 use_stmt = USE_STMT (use_p); 3286 3287 if (is_gimple_debug (use_stmt)) 3288 continue; 3289 3290 /* For now restrict this operations to single basic blocks. In theory 3291 we would want to support sinking the multiplication in 3292 m = a*b; 3293 if () 3294 ma = m + c; 3295 else 3296 d = m; 3297 to form a fma in the then block and sink the multiplication to the 3298 else block. */ 3299 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) 3300 return false; 3301 3302 /* A negate on the multiplication leads to FNMA. */ 3303 if (is_gimple_assign (use_stmt) 3304 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR) 3305 { 3306 ssa_op_iter iter; 3307 use_operand_p usep; 3308 3309 /* If (due to earlier missed optimizations) we have two 3310 negates of the same value, treat them as equivalent 3311 to a single negate with multiple uses. */ 3312 if (seen_negate_p) 3313 return false; 3314 3315 result = gimple_assign_lhs (use_stmt); 3316 3317 /* Make sure the negate statement becomes dead with this 3318 single transformation. */ 3319 if (!single_imm_use (gimple_assign_lhs (use_stmt), 3320 &use_p, &neguse_stmt)) 3321 return false; 3322 3323 /* Make sure the multiplication isn't also used on that stmt. */ 3324 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE) 3325 if (USE_FROM_PTR (usep) == mul_result) 3326 return false; 3327 3328 /* Re-validate. */ 3329 use_stmt = neguse_stmt; 3330 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt)) 3331 return false; 3332 3333 negate_p = seen_negate_p = true; 3334 } 3335 3336 tree cond, else_value, ops[3]; 3337 tree_code code; 3338 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops, 3339 &else_value)) 3340 return false; 3341 3342 switch (code) 3343 { 3344 case MINUS_EXPR: 3345 if (ops[1] == result) 3346 negate_p = !negate_p; 3347 break; 3348 case PLUS_EXPR: 3349 break; 3350 default: 3351 /* FMA can only be formed from PLUS and MINUS. */ 3352 return false; 3353 } 3354 3355 if (mul_cond && cond != mul_cond) 3356 return false; 3357 3358 if (cond) 3359 { 3360 if (cond == result || else_value == result) 3361 return false; 3362 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type)) 3363 return false; 3364 } 3365 3366 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that 3367 we'll visit later, we might be able to get a more profitable 3368 match with fnma. 3369 OTOH, if we don't, a negate / fma pair has likely lower latency 3370 that a mult / subtract pair. */ 3371 if (code == MINUS_EXPR 3372 && !negate_p 3373 && ops[0] == result 3374 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type) 3375 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type) 3376 && TREE_CODE (ops[1]) == SSA_NAME 3377 && has_single_use (ops[1])) 3378 { 3379 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]); 3380 if (is_gimple_assign (stmt2) 3381 && gimple_assign_rhs_code (stmt2) == MULT_EXPR) 3382 return false; 3383 } 3384 3385 /* We can't handle a * b + a * b. */ 3386 if (ops[0] == ops[1]) 3387 return false; 3388 /* If deferring, make sure we are not looking at an instruction that 3389 wouldn't have existed if we were not. */ 3390 if (state->m_deferring_p 3391 && (state->m_mul_result_set.contains (ops[0]) 3392 || state->m_mul_result_set.contains (ops[1]))) 3393 return false; 3394 3395 if (check_defer) 3396 { 3397 tree use_lhs = gimple_get_lhs (use_stmt); 3398 if (state->m_last_result) 3399 { 3400 if (ops[1] == state->m_last_result 3401 || ops[0] == state->m_last_result) 3402 defer = true; 3403 else 3404 defer = false; 3405 } 3406 else 3407 { 3408 gcc_checking_assert (!state->m_initial_phi); 3409 gphi *phi; 3410 if (ops[0] == result) 3411 phi = result_of_phi (ops[1]); 3412 else 3413 { 3414 gcc_assert (ops[1] == result); 3415 phi = result_of_phi (ops[0]); 3416 } 3417 3418 if (phi) 3419 { 3420 state->m_initial_phi = phi; 3421 defer = true; 3422 } 3423 else 3424 defer = false; 3425 } 3426 3427 state->m_last_result = use_lhs; 3428 check_defer = false; 3429 } 3430 else 3431 defer = false; 3432 3433 /* While it is possible to validate whether or not the exact form that 3434 we've recognized is available in the backend, the assumption is that 3435 if the deferring logic above did not trigger, the transformation is 3436 never a loss. For instance, suppose the target only has the plain FMA 3437 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged 3438 MUL+SUB for FMA+NEG, which is still two operations. Consider 3439 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA 3440 form the two NEGs are independent and could be run in parallel. */ 3441 } 3442 3443 if (defer) 3444 { 3445 fma_transformation_info fti; 3446 fti.mul_stmt = mul_stmt; 3447 fti.mul_result = mul_result; 3448 fti.op1 = op1; 3449 fti.op2 = op2; 3450 state->m_candidates.safe_push (fti); 3451 state->m_mul_result_set.add (mul_result); 3452 3453 if (dump_file && (dump_flags & TDF_DETAILS)) 3454 { 3455 fprintf (dump_file, "Deferred generating FMA for multiplication "); 3456 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE); 3457 fprintf (dump_file, "\n"); 3458 } 3459 3460 return false; 3461 } 3462 else 3463 { 3464 if (state->m_deferring_p) 3465 cancel_fma_deferring (state); 3466 convert_mult_to_fma_1 (mul_result, op1, op2); 3467 return true; 3468 } 3469} 3470 3471 3472/* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have 3473 a check for non-zero like: 3474 _1 = x_4(D) * y_5(D); 3475 *res_7(D) = _1; 3476 if (x_4(D) != 0) 3477 goto <bb 3>; [50.00%] 3478 else 3479 goto <bb 4>; [50.00%] 3480 3481 <bb 3> [local count: 536870913]: 3482 _2 = _1 / x_4(D); 3483 _9 = _2 != y_5(D); 3484 _10 = (int) _9; 3485 3486 <bb 4> [local count: 1073741824]: 3487 # iftmp.0_3 = PHI <_10(3), 0(2)> 3488 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also 3489 optimize the x_4(D) != 0 condition to 1. */ 3490 3491static void 3492maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt, 3493 gimple *div_stmt, bool *cfg_changed) 3494{ 3495 basic_block bb = gimple_bb (cond_stmt); 3496 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb)) 3497 return; 3498 edge pred_edge = single_pred_edge (bb); 3499 basic_block pred_bb = pred_edge->src; 3500 if (EDGE_COUNT (pred_bb->succs) != 2) 3501 return; 3502 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge); 3503 edge other_succ_edge = NULL; 3504 if (gimple_code (cond_stmt) == GIMPLE_COND) 3505 { 3506 if (EDGE_COUNT (bb->succs) != 2) 3507 return; 3508 other_succ_edge = EDGE_SUCC (bb, 0); 3509 if (gimple_cond_code (cond_stmt) == NE_EXPR) 3510 { 3511 if (other_succ_edge->flags & EDGE_TRUE_VALUE) 3512 other_succ_edge = EDGE_SUCC (bb, 1); 3513 } 3514 else if (other_succ_edge->flags & EDGE_FALSE_VALUE) 3515 other_succ_edge = EDGE_SUCC (bb, 0); 3516 if (other_edge->dest != other_succ_edge->dest) 3517 return; 3518 } 3519 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb)) 3520 return; 3521 gimple *zero_cond = last_stmt (pred_bb); 3522 if (zero_cond == NULL 3523 || gimple_code (zero_cond) != GIMPLE_COND 3524 || (gimple_cond_code (zero_cond) 3525 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR)) 3526 || !integer_zerop (gimple_cond_rhs (zero_cond))) 3527 return; 3528 tree zero_cond_lhs = gimple_cond_lhs (zero_cond); 3529 if (TREE_CODE (zero_cond_lhs) != SSA_NAME) 3530 return; 3531 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs) 3532 { 3533 /* Allow the divisor to be result of a same precision cast 3534 from zero_cond_lhs. */ 3535 tree rhs2 = gimple_assign_rhs2 (div_stmt); 3536 if (TREE_CODE (rhs2) != SSA_NAME) 3537 return; 3538 gimple *g = SSA_NAME_DEF_STMT (rhs2); 3539 if (!gimple_assign_cast_p (g) 3540 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond) 3541 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs)) 3542 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs)) 3543 != TYPE_PRECISION (TREE_TYPE (rhs2)))) 3544 return; 3545 } 3546 gimple_stmt_iterator gsi = gsi_after_labels (bb); 3547 mul_stmts.quick_push (div_stmt); 3548 if (is_gimple_debug (gsi_stmt (gsi))) 3549 gsi_next_nondebug (&gsi); 3550 unsigned cast_count = 0; 3551 while (gsi_stmt (gsi) != cond_stmt) 3552 { 3553 /* If original mul_stmt has a single use, allow it in the same bb, 3554 we are looking then just at __builtin_mul_overflow_p. 3555 Though, in that case the original mul_stmt will be replaced 3556 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */ 3557 gimple *mul_stmt; 3558 unsigned int i; 3559 bool ok = false; 3560 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt) 3561 { 3562 if (gsi_stmt (gsi) == mul_stmt) 3563 { 3564 ok = true; 3565 break; 3566 } 3567 } 3568 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4) 3569 ok = true; 3570 if (!ok) 3571 return; 3572 gsi_next_nondebug (&gsi); 3573 } 3574 if (gimple_code (cond_stmt) == GIMPLE_COND) 3575 { 3576 basic_block succ_bb = other_edge->dest; 3577 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi); 3578 gsi_next (&gpi)) 3579 { 3580 gphi *phi = gpi.phi (); 3581 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx); 3582 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx); 3583 if (!operand_equal_p (v1, v2, 0)) 3584 return; 3585 } 3586 } 3587 else 3588 { 3589 tree lhs = gimple_assign_lhs (cond_stmt); 3590 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))) 3591 return; 3592 gsi_next_nondebug (&gsi); 3593 if (!gsi_end_p (gsi)) 3594 { 3595 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR) 3596 return; 3597 gimple *cast_stmt = gsi_stmt (gsi); 3598 if (!gimple_assign_cast_p (cast_stmt)) 3599 return; 3600 tree new_lhs = gimple_assign_lhs (cast_stmt); 3601 gsi_next_nondebug (&gsi); 3602 if (!gsi_end_p (gsi) 3603 || !new_lhs 3604 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs)) 3605 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1) 3606 return; 3607 lhs = new_lhs; 3608 } 3609 edge succ_edge = single_succ_edge (bb); 3610 basic_block succ_bb = succ_edge->dest; 3611 gsi = gsi_start_phis (succ_bb); 3612 if (gsi_end_p (gsi)) 3613 return; 3614 gphi *phi = as_a <gphi *> (gsi_stmt (gsi)); 3615 gsi_next (&gsi); 3616 if (!gsi_end_p (gsi)) 3617 return; 3618 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs) 3619 return; 3620 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx); 3621 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR) 3622 { 3623 tree cond = gimple_assign_rhs1 (cond_stmt); 3624 if (TREE_CODE (cond) == NE_EXPR) 3625 { 3626 if (!operand_equal_p (other_val, 3627 gimple_assign_rhs3 (cond_stmt), 0)) 3628 return; 3629 } 3630 else if (!operand_equal_p (other_val, 3631 gimple_assign_rhs2 (cond_stmt), 0)) 3632 return; 3633 } 3634 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR) 3635 { 3636 if (!integer_zerop (other_val)) 3637 return; 3638 } 3639 else if (!integer_onep (other_val)) 3640 return; 3641 } 3642 gcond *zero_gcond = as_a <gcond *> (zero_cond); 3643 if (pred_edge->flags & EDGE_TRUE_VALUE) 3644 gimple_cond_make_true (zero_gcond); 3645 else 3646 gimple_cond_make_false (zero_gcond); 3647 update_stmt (zero_cond); 3648 *cfg_changed = true; 3649} 3650 3651/* Helper function for arith_overflow_check_p. Return true 3652 if VAL1 is equal to VAL2 cast to corresponding integral type 3653 with other signedness or vice versa. */ 3654 3655static bool 3656arith_cast_equal_p (tree val1, tree val2) 3657{ 3658 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST) 3659 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2)); 3660 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME) 3661 return false; 3662 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1)) 3663 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2) 3664 return true; 3665 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2)) 3666 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1) 3667 return true; 3668 return false; 3669} 3670 3671/* Helper function of match_arith_overflow. Return 1 3672 if USE_STMT is unsigned overflow check ovf != 0 for 3673 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0 3674 and 0 otherwise. */ 3675 3676static int 3677arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt, 3678 tree maxval, tree *other) 3679{ 3680 enum tree_code ccode = ERROR_MARK; 3681 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE; 3682 enum tree_code code = gimple_assign_rhs_code (stmt); 3683 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt); 3684 tree rhs1 = gimple_assign_rhs1 (stmt); 3685 tree rhs2 = gimple_assign_rhs2 (stmt); 3686 tree multop = NULL_TREE, divlhs = NULL_TREE; 3687 gimple *cur_use_stmt = use_stmt; 3688 3689 if (code == MULT_EXPR) 3690 { 3691 if (!is_gimple_assign (use_stmt)) 3692 return 0; 3693 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR) 3694 return 0; 3695 if (gimple_assign_rhs1 (use_stmt) != lhs) 3696 return 0; 3697 if (cast_stmt) 3698 { 3699 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1)) 3700 multop = rhs2; 3701 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2)) 3702 multop = rhs1; 3703 else 3704 return 0; 3705 } 3706 else if (gimple_assign_rhs2 (use_stmt) == rhs1) 3707 multop = rhs2; 3708 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0)) 3709 multop = rhs1; 3710 else 3711 return 0; 3712 if (stmt_ends_bb_p (use_stmt)) 3713 return 0; 3714 divlhs = gimple_assign_lhs (use_stmt); 3715 if (!divlhs) 3716 return 0; 3717 use_operand_p use; 3718 if (!single_imm_use (divlhs, &use, &cur_use_stmt)) 3719 return 0; 3720 } 3721 if (gimple_code (cur_use_stmt) == GIMPLE_COND) 3722 { 3723 ccode = gimple_cond_code (cur_use_stmt); 3724 crhs1 = gimple_cond_lhs (cur_use_stmt); 3725 crhs2 = gimple_cond_rhs (cur_use_stmt); 3726 } 3727 else if (is_gimple_assign (cur_use_stmt)) 3728 { 3729 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS) 3730 { 3731 ccode = gimple_assign_rhs_code (cur_use_stmt); 3732 crhs1 = gimple_assign_rhs1 (cur_use_stmt); 3733 crhs2 = gimple_assign_rhs2 (cur_use_stmt); 3734 } 3735 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR) 3736 { 3737 tree cond = gimple_assign_rhs1 (cur_use_stmt); 3738 if (COMPARISON_CLASS_P (cond)) 3739 { 3740 ccode = TREE_CODE (cond); 3741 crhs1 = TREE_OPERAND (cond, 0); 3742 crhs2 = TREE_OPERAND (cond, 1); 3743 } 3744 else 3745 return 0; 3746 } 3747 else 3748 return 0; 3749 } 3750 else 3751 return 0; 3752 3753 if (TREE_CODE_CLASS (ccode) != tcc_comparison) 3754 return 0; 3755 3756 switch (ccode) 3757 { 3758 case GT_EXPR: 3759 case LE_EXPR: 3760 if (maxval) 3761 { 3762 /* r = a + b; r > maxval or r <= maxval */ 3763 if (crhs1 == lhs 3764 && TREE_CODE (crhs2) == INTEGER_CST 3765 && tree_int_cst_equal (crhs2, maxval)) 3766 return ccode == GT_EXPR ? 1 : -1; 3767 break; 3768 } 3769 /* r = a - b; r > a or r <= a 3770 r = a + b; a > r or a <= r or b > r or b <= r. */ 3771 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1) 3772 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2) 3773 && crhs2 == lhs)) 3774 return ccode == GT_EXPR ? 1 : -1; 3775 /* r = ~a; b > r or b <= r. */ 3776 if (code == BIT_NOT_EXPR && crhs2 == lhs) 3777 { 3778 if (other) 3779 *other = crhs1; 3780 return ccode == GT_EXPR ? 1 : -1; 3781 } 3782 break; 3783 case LT_EXPR: 3784 case GE_EXPR: 3785 if (maxval) 3786 break; 3787 /* r = a - b; a < r or a >= r 3788 r = a + b; r < a or r >= a or r < b or r >= b. */ 3789 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs) 3790 || (code == PLUS_EXPR && crhs1 == lhs 3791 && (crhs2 == rhs1 || crhs2 == rhs2))) 3792 return ccode == LT_EXPR ? 1 : -1; 3793 /* r = ~a; r < b or r >= b. */ 3794 if (code == BIT_NOT_EXPR && crhs1 == lhs) 3795 { 3796 if (other) 3797 *other = crhs2; 3798 return ccode == LT_EXPR ? 1 : -1; 3799 } 3800 break; 3801 case EQ_EXPR: 3802 case NE_EXPR: 3803 /* r = a * b; _1 = r / a; _1 == b 3804 r = a * b; _1 = r / b; _1 == a 3805 r = a * b; _1 = r / a; _1 != b 3806 r = a * b; _1 = r / b; _1 != a. */ 3807 if (code == MULT_EXPR) 3808 { 3809 if (cast_stmt) 3810 { 3811 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop)) 3812 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop))) 3813 { 3814 use_stmt = cur_use_stmt; 3815 return ccode == NE_EXPR ? 1 : -1; 3816 } 3817 } 3818 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0)) 3819 || (crhs2 == divlhs && crhs1 == multop)) 3820 { 3821 use_stmt = cur_use_stmt; 3822 return ccode == NE_EXPR ? 1 : -1; 3823 } 3824 } 3825 break; 3826 default: 3827 break; 3828 } 3829 return 0; 3830} 3831 3832/* Recognize for unsigned x 3833 x = y - z; 3834 if (x > y) 3835 where there are other uses of x and replace it with 3836 _7 = .SUB_OVERFLOW (y, z); 3837 x = REALPART_EXPR <_7>; 3838 _8 = IMAGPART_EXPR <_7>; 3839 if (_8) 3840 and similarly for addition. 3841 3842 Also recognize: 3843 yc = (type) y; 3844 zc = (type) z; 3845 x = yc + zc; 3846 if (x > max) 3847 where y and z have unsigned types with maximum max 3848 and there are other uses of x and all of those cast x 3849 back to that unsigned type and again replace it with 3850 _7 = .ADD_OVERFLOW (y, z); 3851 _9 = REALPART_EXPR <_7>; 3852 _8 = IMAGPART_EXPR <_7>; 3853 if (_8) 3854 and replace (utype) x with _9. 3855 3856 Also recognize: 3857 x = ~z; 3858 if (y > x) 3859 and replace it with 3860 _7 = .ADD_OVERFLOW (y, z); 3861 _8 = IMAGPART_EXPR <_7>; 3862 if (_8) 3863 3864 And also recognize: 3865 z = x * y; 3866 if (x != 0) 3867 goto <bb 3>; [50.00%] 3868 else 3869 goto <bb 4>; [50.00%] 3870 3871 <bb 3> [local count: 536870913]: 3872 _2 = z / x; 3873 _9 = _2 != y; 3874 _10 = (int) _9; 3875 3876 <bb 4> [local count: 1073741824]: 3877 # iftmp.0_3 = PHI <_10(3), 0(2)> 3878 and replace it with 3879 _7 = .MUL_OVERFLOW (x, y); 3880 z = IMAGPART_EXPR <_7>; 3881 _8 = IMAGPART_EXPR <_7>; 3882 _9 = _8 != 0; 3883 iftmp.0_3 = (int) _9; */ 3884 3885static bool 3886match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt, 3887 enum tree_code code, bool *cfg_changed) 3888{ 3889 tree lhs = gimple_assign_lhs (stmt); 3890 tree type = TREE_TYPE (lhs); 3891 use_operand_p use_p; 3892 imm_use_iterator iter; 3893 bool use_seen = false; 3894 bool ovf_use_seen = false; 3895 gimple *use_stmt; 3896 gimple *add_stmt = NULL; 3897 bool add_first = false; 3898 gimple *cond_stmt = NULL; 3899 gimple *cast_stmt = NULL; 3900 tree cast_lhs = NULL_TREE; 3901 3902 gcc_checking_assert (code == PLUS_EXPR 3903 || code == MINUS_EXPR 3904 || code == MULT_EXPR 3905 || code == BIT_NOT_EXPR); 3906 if (!INTEGRAL_TYPE_P (type) 3907 || !TYPE_UNSIGNED (type) 3908 || has_zero_uses (lhs) 3909 || (code != PLUS_EXPR 3910 && code != MULT_EXPR 3911 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab, 3912 TYPE_MODE (type)) == CODE_FOR_nothing)) 3913 return false; 3914 3915 tree rhs1 = gimple_assign_rhs1 (stmt); 3916 tree rhs2 = gimple_assign_rhs2 (stmt); 3917 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) 3918 { 3919 use_stmt = USE_STMT (use_p); 3920 if (is_gimple_debug (use_stmt)) 3921 continue; 3922 3923 tree other = NULL_TREE; 3924 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other)) 3925 { 3926 if (code == BIT_NOT_EXPR) 3927 { 3928 gcc_assert (other); 3929 if (TREE_CODE (other) != SSA_NAME) 3930 return false; 3931 if (rhs2 == NULL) 3932 rhs2 = other; 3933 else 3934 return false; 3935 cond_stmt = use_stmt; 3936 } 3937 ovf_use_seen = true; 3938 } 3939 else 3940 { 3941 use_seen = true; 3942 if (code == MULT_EXPR 3943 && cast_stmt == NULL 3944 && gimple_assign_cast_p (use_stmt)) 3945 { 3946 cast_lhs = gimple_assign_lhs (use_stmt); 3947 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs)) 3948 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs)) 3949 && (TYPE_PRECISION (TREE_TYPE (cast_lhs)) 3950 == TYPE_PRECISION (TREE_TYPE (lhs)))) 3951 cast_stmt = use_stmt; 3952 else 3953 cast_lhs = NULL_TREE; 3954 } 3955 } 3956 if (ovf_use_seen && use_seen) 3957 break; 3958 } 3959 3960 if (!ovf_use_seen 3961 && code == MULT_EXPR 3962 && cast_stmt) 3963 { 3964 if (TREE_CODE (rhs1) != SSA_NAME 3965 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST)) 3966 return false; 3967 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs) 3968 { 3969 use_stmt = USE_STMT (use_p); 3970 if (is_gimple_debug (use_stmt)) 3971 continue; 3972 3973 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt, 3974 NULL_TREE, NULL)) 3975 ovf_use_seen = true; 3976 } 3977 } 3978 else 3979 { 3980 cast_stmt = NULL; 3981 cast_lhs = NULL_TREE; 3982 } 3983 3984 tree maxval = NULL_TREE; 3985 if (!ovf_use_seen 3986 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen)) 3987 || (code == PLUS_EXPR 3988 && optab_handler (uaddv4_optab, 3989 TYPE_MODE (type)) == CODE_FOR_nothing) 3990 || (code == MULT_EXPR 3991 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab, 3992 TYPE_MODE (type)) == CODE_FOR_nothing)) 3993 { 3994 if (code != PLUS_EXPR) 3995 return false; 3996 if (TREE_CODE (rhs1) != SSA_NAME 3997 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1))) 3998 return false; 3999 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1)); 4000 tree type1 = TREE_TYPE (rhs1); 4001 if (!INTEGRAL_TYPE_P (type1) 4002 || !TYPE_UNSIGNED (type1) 4003 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type) 4004 || (TYPE_PRECISION (type1) 4005 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1)))) 4006 return false; 4007 if (TREE_CODE (rhs2) == INTEGER_CST) 4008 { 4009 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2), 4010 TYPE_PRECISION (type1), 4011 UNSIGNED), 0)) 4012 return false; 4013 rhs2 = fold_convert (type1, rhs2); 4014 } 4015 else 4016 { 4017 if (TREE_CODE (rhs2) != SSA_NAME 4018 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2))) 4019 return false; 4020 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2)); 4021 tree type2 = TREE_TYPE (rhs2); 4022 if (!INTEGRAL_TYPE_P (type2) 4023 || !TYPE_UNSIGNED (type2) 4024 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type) 4025 || (TYPE_PRECISION (type2) 4026 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2)))) 4027 return false; 4028 } 4029 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2))) 4030 type = type1; 4031 else 4032 type = TREE_TYPE (rhs2); 4033 4034 if (TREE_CODE (type) != INTEGER_TYPE 4035 || optab_handler (uaddv4_optab, 4036 TYPE_MODE (type)) == CODE_FOR_nothing) 4037 return false; 4038 4039 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type), 4040 UNSIGNED)); 4041 ovf_use_seen = false; 4042 use_seen = false; 4043 basic_block use_bb = NULL; 4044 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) 4045 { 4046 use_stmt = USE_STMT (use_p); 4047 if (is_gimple_debug (use_stmt)) 4048 continue; 4049 4050 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL)) 4051 { 4052 ovf_use_seen = true; 4053 use_bb = gimple_bb (use_stmt); 4054 } 4055 else 4056 { 4057 if (!gimple_assign_cast_p (use_stmt) 4058 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR) 4059 return false; 4060 tree use_lhs = gimple_assign_lhs (use_stmt); 4061 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs)) 4062 || (TYPE_PRECISION (TREE_TYPE (use_lhs)) 4063 > TYPE_PRECISION (type))) 4064 return false; 4065 use_seen = true; 4066 } 4067 } 4068 if (!ovf_use_seen) 4069 return false; 4070 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1))) 4071 { 4072 if (!use_seen) 4073 return false; 4074 tree new_rhs1 = make_ssa_name (type); 4075 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1); 4076 gsi_insert_before (gsi, g, GSI_SAME_STMT); 4077 rhs1 = new_rhs1; 4078 } 4079 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2))) 4080 { 4081 if (!use_seen) 4082 return false; 4083 tree new_rhs2 = make_ssa_name (type); 4084 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2); 4085 gsi_insert_before (gsi, g, GSI_SAME_STMT); 4086 rhs2 = new_rhs2; 4087 } 4088 else if (!use_seen) 4089 { 4090 /* If there are no uses of the wider addition, check if 4091 forwprop has not created a narrower addition. 4092 Require it to be in the same bb as the overflow check. */ 4093 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1) 4094 { 4095 use_stmt = USE_STMT (use_p); 4096 if (is_gimple_debug (use_stmt)) 4097 continue; 4098 4099 if (use_stmt == stmt) 4100 continue; 4101 4102 if (!is_gimple_assign (use_stmt) 4103 || gimple_bb (use_stmt) != use_bb 4104 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR) 4105 continue; 4106 4107 if (gimple_assign_rhs1 (use_stmt) == rhs1) 4108 { 4109 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), 4110 rhs2, 0)) 4111 continue; 4112 } 4113 else if (gimple_assign_rhs2 (use_stmt) == rhs1) 4114 { 4115 if (gimple_assign_rhs1 (use_stmt) != rhs2) 4116 continue; 4117 } 4118 else 4119 continue; 4120 4121 add_stmt = use_stmt; 4122 break; 4123 } 4124 if (add_stmt == NULL) 4125 return false; 4126 4127 /* If stmt and add_stmt are in the same bb, we need to find out 4128 which one is earlier. If they are in different bbs, we've 4129 checked add_stmt is in the same bb as one of the uses of the 4130 stmt lhs, so stmt needs to dominate add_stmt too. */ 4131 if (gimple_bb (stmt) == gimple_bb (add_stmt)) 4132 { 4133 gimple_stmt_iterator gsif = *gsi; 4134 gimple_stmt_iterator gsib = *gsi; 4135 int i; 4136 /* Search both forward and backward from stmt and have a small 4137 upper bound. */ 4138 for (i = 0; i < 128; i++) 4139 { 4140 if (!gsi_end_p (gsib)) 4141 { 4142 gsi_prev_nondebug (&gsib); 4143 if (gsi_stmt (gsib) == add_stmt) 4144 { 4145 add_first = true; 4146 break; 4147 } 4148 } 4149 else if (gsi_end_p (gsif)) 4150 break; 4151 if (!gsi_end_p (gsif)) 4152 { 4153 gsi_next_nondebug (&gsif); 4154 if (gsi_stmt (gsif) == add_stmt) 4155 break; 4156 } 4157 } 4158 if (i == 128) 4159 return false; 4160 if (add_first) 4161 *gsi = gsi_for_stmt (add_stmt); 4162 } 4163 } 4164 } 4165 4166 if (code == BIT_NOT_EXPR) 4167 *gsi = gsi_for_stmt (cond_stmt); 4168 4169 auto_vec<gimple *, 8> mul_stmts; 4170 if (code == MULT_EXPR && cast_stmt) 4171 { 4172 type = TREE_TYPE (cast_lhs); 4173 gimple *g = SSA_NAME_DEF_STMT (rhs1); 4174 if (gimple_assign_cast_p (g) 4175 && useless_type_conversion_p (type, 4176 TREE_TYPE (gimple_assign_rhs1 (g))) 4177 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) 4178 rhs1 = gimple_assign_rhs1 (g); 4179 else 4180 { 4181 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1); 4182 gsi_insert_before (gsi, g, GSI_SAME_STMT); 4183 rhs1 = gimple_assign_lhs (g); 4184 mul_stmts.quick_push (g); 4185 } 4186 if (TREE_CODE (rhs2) == INTEGER_CST) 4187 rhs2 = fold_convert (type, rhs2); 4188 else 4189 { 4190 g = SSA_NAME_DEF_STMT (rhs2); 4191 if (gimple_assign_cast_p (g) 4192 && useless_type_conversion_p (type, 4193 TREE_TYPE (gimple_assign_rhs1 (g))) 4194 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) 4195 rhs2 = gimple_assign_rhs1 (g); 4196 else 4197 { 4198 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2); 4199 gsi_insert_before (gsi, g, GSI_SAME_STMT); 4200 rhs2 = gimple_assign_lhs (g); 4201 mul_stmts.quick_push (g); 4202 } 4203 } 4204 } 4205 tree ctype = build_complex_type (type); 4206 gcall *g = gimple_build_call_internal (code == MULT_EXPR 4207 ? IFN_MUL_OVERFLOW 4208 : code != MINUS_EXPR 4209 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW, 4210 2, rhs1, rhs2); 4211 tree ctmp = make_ssa_name (ctype); 4212 gimple_call_set_lhs (g, ctmp); 4213 gsi_insert_before (gsi, g, GSI_SAME_STMT); 4214 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs; 4215 gassign *g2; 4216 if (code != BIT_NOT_EXPR) 4217 { 4218 g2 = gimple_build_assign (new_lhs, REALPART_EXPR, 4219 build1 (REALPART_EXPR, type, ctmp)); 4220 if (maxval || cast_stmt) 4221 { 4222 gsi_insert_before (gsi, g2, GSI_SAME_STMT); 4223 if (add_first) 4224 *gsi = gsi_for_stmt (stmt); 4225 } 4226 else 4227 gsi_replace (gsi, g2, true); 4228 if (code == MULT_EXPR) 4229 { 4230 mul_stmts.quick_push (g); 4231 mul_stmts.quick_push (g2); 4232 if (cast_stmt) 4233 { 4234 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs); 4235 gsi_replace (gsi, g2, true); 4236 mul_stmts.quick_push (g2); 4237 } 4238 } 4239 } 4240 tree ovf = make_ssa_name (type); 4241 g2 = gimple_build_assign (ovf, IMAGPART_EXPR, 4242 build1 (IMAGPART_EXPR, type, ctmp)); 4243 if (code != BIT_NOT_EXPR) 4244 gsi_insert_after (gsi, g2, GSI_NEW_STMT); 4245 else 4246 gsi_insert_before (gsi, g2, GSI_SAME_STMT); 4247 if (code == MULT_EXPR) 4248 mul_stmts.quick_push (g2); 4249 4250 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs) 4251 { 4252 if (is_gimple_debug (use_stmt)) 4253 continue; 4254 4255 gimple *orig_use_stmt = use_stmt; 4256 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt, 4257 maxval, NULL); 4258 if (ovf_use == 0) 4259 { 4260 gcc_assert (code != BIT_NOT_EXPR); 4261 if (maxval) 4262 { 4263 tree use_lhs = gimple_assign_lhs (use_stmt); 4264 gimple_assign_set_rhs1 (use_stmt, new_lhs); 4265 if (useless_type_conversion_p (TREE_TYPE (use_lhs), 4266 TREE_TYPE (new_lhs))) 4267 gimple_assign_set_rhs_code (use_stmt, SSA_NAME); 4268 update_stmt (use_stmt); 4269 } 4270 continue; 4271 } 4272 if (gimple_code (use_stmt) == GIMPLE_COND) 4273 { 4274 gcond *cond_stmt = as_a <gcond *> (use_stmt); 4275 gimple_cond_set_lhs (cond_stmt, ovf); 4276 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0)); 4277 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR); 4278 } 4279 else 4280 { 4281 gcc_checking_assert (is_gimple_assign (use_stmt)); 4282 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) 4283 { 4284 gimple_assign_set_rhs1 (use_stmt, ovf); 4285 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0)); 4286 gimple_assign_set_rhs_code (use_stmt, 4287 ovf_use == 1 ? NE_EXPR : EQ_EXPR); 4288 } 4289 else 4290 { 4291 gcc_checking_assert (gimple_assign_rhs_code (use_stmt) 4292 == COND_EXPR); 4293 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR, 4294 boolean_type_node, ovf, 4295 build_int_cst (type, 0)); 4296 gimple_assign_set_rhs1 (use_stmt, cond); 4297 } 4298 } 4299 update_stmt (use_stmt); 4300 if (code == MULT_EXPR && use_stmt != orig_use_stmt) 4301 { 4302 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt); 4303 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt, 4304 cfg_changed); 4305 gsi_remove (&gsi2, true); 4306 release_ssa_name (gimple_assign_lhs (orig_use_stmt)); 4307 } 4308 } 4309 if (maxval) 4310 { 4311 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); 4312 gsi_remove (&gsi2, true); 4313 if (add_stmt) 4314 { 4315 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt), 4316 new_lhs); 4317 gsi2 = gsi_for_stmt (add_stmt); 4318 gsi_replace (&gsi2, g, true); 4319 } 4320 } 4321 else if (code == BIT_NOT_EXPR) 4322 { 4323 *gsi = gsi_for_stmt (stmt); 4324 gsi_remove (gsi, true); 4325 release_ssa_name (lhs); 4326 return true; 4327 } 4328 return false; 4329} 4330 4331/* Return true if target has support for divmod. */ 4332 4333static bool 4334target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode) 4335{ 4336 /* If target supports hardware divmod insn, use it for divmod. */ 4337 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing) 4338 return true; 4339 4340 /* Check if libfunc for divmod is available. */ 4341 rtx libfunc = optab_libfunc (divmod_optab, mode); 4342 if (libfunc != NULL_RTX) 4343 { 4344 /* If optab_handler exists for div_optab, perhaps in a wider mode, 4345 we don't want to use the libfunc even if it exists for given mode. */ 4346 machine_mode div_mode; 4347 FOR_EACH_MODE_FROM (div_mode, mode) 4348 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing) 4349 return false; 4350 4351 return targetm.expand_divmod_libfunc != NULL; 4352 } 4353 4354 return false; 4355} 4356 4357/* Check if stmt is candidate for divmod transform. */ 4358 4359static bool 4360divmod_candidate_p (gassign *stmt) 4361{ 4362 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); 4363 machine_mode mode = TYPE_MODE (type); 4364 optab divmod_optab, div_optab; 4365 4366 if (TYPE_UNSIGNED (type)) 4367 { 4368 divmod_optab = udivmod_optab; 4369 div_optab = udiv_optab; 4370 } 4371 else 4372 { 4373 divmod_optab = sdivmod_optab; 4374 div_optab = sdiv_optab; 4375 } 4376 4377 tree op1 = gimple_assign_rhs1 (stmt); 4378 tree op2 = gimple_assign_rhs2 (stmt); 4379 4380 /* Disable the transform if either is a constant, since division-by-constant 4381 may have specialized expansion. */ 4382 if (CONSTANT_CLASS_P (op1)) 4383 return false; 4384 4385 if (CONSTANT_CLASS_P (op2)) 4386 { 4387 if (integer_pow2p (op2)) 4388 return false; 4389 4390 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT 4391 && TYPE_PRECISION (type) <= BITS_PER_WORD) 4392 return false; 4393 4394 /* If the divisor is not power of 2 and the precision wider than 4395 HWI, expand_divmod punts on that, so in that case it is better 4396 to use divmod optab or libfunc. Similarly if choose_multiplier 4397 might need pre/post shifts of BITS_PER_WORD or more. */ 4398 } 4399 4400 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should 4401 expand using the [su]divv optabs. */ 4402 if (TYPE_OVERFLOW_TRAPS (type)) 4403 return false; 4404 4405 if (!target_supports_divmod_p (divmod_optab, div_optab, mode)) 4406 return false; 4407 4408 return true; 4409} 4410 4411/* This function looks for: 4412 t1 = a TRUNC_DIV_EXPR b; 4413 t2 = a TRUNC_MOD_EXPR b; 4414 and transforms it to the following sequence: 4415 complex_tmp = DIVMOD (a, b); 4416 t1 = REALPART_EXPR(a); 4417 t2 = IMAGPART_EXPR(b); 4418 For conditions enabling the transform see divmod_candidate_p(). 4419 4420 The pass has three parts: 4421 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all 4422 other trunc_div_expr and trunc_mod_expr stmts. 4423 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt 4424 to stmts vector. 4425 3) Insert DIVMOD call just before top_stmt and update entries in 4426 stmts vector to use return value of DIMOVD (REALEXPR_PART for div, 4427 IMAGPART_EXPR for mod). */ 4428 4429static bool 4430convert_to_divmod (gassign *stmt) 4431{ 4432 if (stmt_can_throw_internal (cfun, stmt) 4433 || !divmod_candidate_p (stmt)) 4434 return false; 4435 4436 tree op1 = gimple_assign_rhs1 (stmt); 4437 tree op2 = gimple_assign_rhs2 (stmt); 4438 4439 imm_use_iterator use_iter; 4440 gimple *use_stmt; 4441 auto_vec<gimple *> stmts; 4442 4443 gimple *top_stmt = stmt; 4444 basic_block top_bb = gimple_bb (stmt); 4445 4446 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates 4447 at-least stmt and possibly other trunc_div/trunc_mod stmts 4448 having same operands as stmt. */ 4449 4450 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1) 4451 { 4452 if (is_gimple_assign (use_stmt) 4453 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR 4454 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) 4455 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0) 4456 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0)) 4457 { 4458 if (stmt_can_throw_internal (cfun, use_stmt)) 4459 continue; 4460 4461 basic_block bb = gimple_bb (use_stmt); 4462 4463 if (bb == top_bb) 4464 { 4465 if (gimple_uid (use_stmt) < gimple_uid (top_stmt)) 4466 top_stmt = use_stmt; 4467 } 4468 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb)) 4469 { 4470 top_bb = bb; 4471 top_stmt = use_stmt; 4472 } 4473 } 4474 } 4475 4476 tree top_op1 = gimple_assign_rhs1 (top_stmt); 4477 tree top_op2 = gimple_assign_rhs2 (top_stmt); 4478 4479 stmts.safe_push (top_stmt); 4480 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR); 4481 4482 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb 4483 to stmts vector. The 2nd loop will always add stmt to stmts vector, since 4484 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the 4485 2nd loop ends up adding at-least single trunc_mod_expr stmt. */ 4486 4487 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1) 4488 { 4489 if (is_gimple_assign (use_stmt) 4490 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR 4491 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR) 4492 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0) 4493 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0)) 4494 { 4495 if (use_stmt == top_stmt 4496 || stmt_can_throw_internal (cfun, use_stmt) 4497 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb)) 4498 continue; 4499 4500 stmts.safe_push (use_stmt); 4501 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR) 4502 div_seen = true; 4503 } 4504 } 4505 4506 if (!div_seen) 4507 return false; 4508 4509 /* Part 3: Create libcall to internal fn DIVMOD: 4510 divmod_tmp = DIVMOD (op1, op2). */ 4511 4512 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2); 4513 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)), 4514 call_stmt, "divmod_tmp"); 4515 gimple_call_set_lhs (call_stmt, res); 4516 /* We rejected throwing statements above. */ 4517 gimple_call_set_nothrow (call_stmt, true); 4518 4519 /* Insert the call before top_stmt. */ 4520 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt); 4521 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT); 4522 4523 widen_mul_stats.divmod_calls_inserted++; 4524 4525 /* Update all statements in stmts vector: 4526 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp> 4527 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */ 4528 4529 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i) 4530 { 4531 tree new_rhs; 4532 4533 switch (gimple_assign_rhs_code (use_stmt)) 4534 { 4535 case TRUNC_DIV_EXPR: 4536 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res); 4537 break; 4538 4539 case TRUNC_MOD_EXPR: 4540 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res); 4541 break; 4542 4543 default: 4544 gcc_unreachable (); 4545 } 4546 4547 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt); 4548 gimple_assign_set_rhs_from_tree (&gsi, new_rhs); 4549 update_stmt (use_stmt); 4550 } 4551 4552 return true; 4553} 4554 4555/* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as 4556 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return 4557 value is true iff we converted the statement. */ 4558 4559static bool 4560convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi) 4561{ 4562 tree lhs = gimple_assign_lhs (stmt); 4563 tree stype = TREE_TYPE (lhs); 4564 tree sarg0 = gimple_assign_rhs1 (stmt); 4565 tree sarg1 = gimple_assign_rhs2 (stmt); 4566 4567 if (TREE_CODE (stype) != INTEGER_TYPE 4568 || TREE_CODE (sarg1) != INTEGER_CST 4569 || TREE_CODE (sarg0) != SSA_NAME 4570 || !tree_fits_uhwi_p (sarg1) 4571 || !has_single_use (sarg0)) 4572 return false; 4573 4574 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0)); 4575 if (!def) 4576 return false; 4577 4578 enum tree_code mcode = gimple_assign_rhs_code (def); 4579 if (mcode == NOP_EXPR) 4580 { 4581 tree tmp = gimple_assign_rhs1 (def); 4582 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp)) 4583 return false; 4584 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp)); 4585 if (!def) 4586 return false; 4587 mcode = gimple_assign_rhs_code (def); 4588 } 4589 4590 if (mcode != WIDEN_MULT_EXPR 4591 || gimple_bb (def) != gimple_bb (stmt)) 4592 return false; 4593 tree mtype = TREE_TYPE (gimple_assign_lhs (def)); 4594 if (TREE_CODE (mtype) != INTEGER_TYPE 4595 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype)) 4596 return false; 4597 4598 tree mop1 = gimple_assign_rhs1 (def); 4599 tree mop2 = gimple_assign_rhs2 (def); 4600 tree optype = TREE_TYPE (mop1); 4601 bool unsignedp = TYPE_UNSIGNED (optype); 4602 unsigned int prec = TYPE_PRECISION (optype); 4603 4604 if (unsignedp != TYPE_UNSIGNED (mtype) 4605 || TYPE_PRECISION (mtype) != 2 * prec) 4606 return false; 4607 4608 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1); 4609 if (bits < prec || bits >= 2 * prec) 4610 return false; 4611 4612 /* For the time being, require operands to have the same sign. */ 4613 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2))) 4614 return false; 4615 4616 machine_mode mode = TYPE_MODE (optype); 4617 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab; 4618 if (optab_handler (tab, mode) == CODE_FOR_nothing) 4619 return false; 4620 4621 location_t loc = gimple_location (stmt); 4622 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp", 4623 MULT_HIGHPART_EXPR, mop1, mop2); 4624 tree highpart2 = highpart1; 4625 tree ntype = optype; 4626 4627 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype)) 4628 { 4629 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype) 4630 : signed_type_for (optype); 4631 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1); 4632 } 4633 if (bits > prec) 4634 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp", 4635 RSHIFT_EXPR, highpart2, 4636 build_int_cst (ntype, bits - prec)); 4637 4638 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2); 4639 gsi_replace (gsi, new_stmt, true); 4640 4641 widen_mul_stats.highpart_mults_inserted++; 4642 return true; 4643} 4644 4645/* If target has spaceship<MODE>3 expander, pattern recognize 4646 <bb 2> [local count: 1073741824]: 4647 if (a_2(D) == b_3(D)) 4648 goto <bb 6>; [34.00%] 4649 else 4650 goto <bb 3>; [66.00%] 4651 4652 <bb 3> [local count: 708669601]: 4653 if (a_2(D) < b_3(D)) 4654 goto <bb 6>; [1.04%] 4655 else 4656 goto <bb 4>; [98.96%] 4657 4658 <bb 4> [local count: 701299439]: 4659 if (a_2(D) > b_3(D)) 4660 goto <bb 5>; [48.89%] 4661 else 4662 goto <bb 6>; [51.11%] 4663 4664 <bb 5> [local count: 342865295]: 4665 4666 <bb 6> [local count: 1073741824]: 4667 and turn it into: 4668 <bb 2> [local count: 1073741824]: 4669 _1 = .SPACESHIP (a_2(D), b_3(D)); 4670 if (_1 == 0) 4671 goto <bb 6>; [34.00%] 4672 else 4673 goto <bb 3>; [66.00%] 4674 4675 <bb 3> [local count: 708669601]: 4676 if (_1 == -1) 4677 goto <bb 6>; [1.04%] 4678 else 4679 goto <bb 4>; [98.96%] 4680 4681 <bb 4> [local count: 701299439]: 4682 if (_1 == 1) 4683 goto <bb 5>; [48.89%] 4684 else 4685 goto <bb 6>; [51.11%] 4686 4687 <bb 5> [local count: 342865295]: 4688 4689 <bb 6> [local count: 1073741824]: 4690 so that the backend can emit optimal comparison and 4691 conditional jump sequence. */ 4692 4693static void 4694optimize_spaceship (gimple *stmt) 4695{ 4696 enum tree_code code = gimple_cond_code (stmt); 4697 if (code != EQ_EXPR && code != NE_EXPR) 4698 return; 4699 tree arg1 = gimple_cond_lhs (stmt); 4700 tree arg2 = gimple_cond_rhs (stmt); 4701 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) 4702 || optab_handler (spaceship_optab, 4703 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing 4704 || operand_equal_p (arg1, arg2, 0)) 4705 return; 4706 4707 basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL; 4708 edge em1 = NULL, e1 = NULL, e2 = NULL; 4709 bb1 = EDGE_SUCC (bb0, 1)->dest; 4710 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR)) 4711 bb1 = EDGE_SUCC (bb0, 0)->dest; 4712 4713 gimple *g = last_stmt (bb1); 4714 if (g == NULL 4715 || gimple_code (g) != GIMPLE_COND 4716 || !single_pred_p (bb1) 4717 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) 4718 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) 4719 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) 4720 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) 4721 || !cond_only_block_p (bb1)) 4722 return; 4723 4724 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) 4725 ? LT_EXPR : GT_EXPR); 4726 switch (gimple_cond_code (g)) 4727 { 4728 case LT_EXPR: 4729 case LE_EXPR: 4730 break; 4731 case GT_EXPR: 4732 case GE_EXPR: 4733 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR; 4734 break; 4735 default: 4736 return; 4737 } 4738 4739 for (int i = 0; i < 2; ++i) 4740 { 4741 /* With NaNs, </<=/>/>= are false, so we need to look for the 4742 third comparison on the false edge from whatever non-equality 4743 comparison the second comparison is. */ 4744 if (HONOR_NANS (TREE_TYPE (arg1)) 4745 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) 4746 continue; 4747 4748 bb2 = EDGE_SUCC (bb1, i)->dest; 4749 g = last_stmt (bb2); 4750 if (g == NULL 4751 || gimple_code (g) != GIMPLE_COND 4752 || !single_pred_p (bb2) 4753 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) 4754 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) 4755 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) 4756 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) 4757 || !cond_only_block_p (bb2) 4758 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) 4759 continue; 4760 4761 enum tree_code ccode2 4762 = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR); 4763 switch (gimple_cond_code (g)) 4764 { 4765 case LT_EXPR: 4766 case LE_EXPR: 4767 break; 4768 case GT_EXPR: 4769 case GE_EXPR: 4770 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; 4771 break; 4772 default: 4773 continue; 4774 } 4775 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) 4776 continue; 4777 4778 if ((ccode == LT_EXPR) 4779 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) 4780 { 4781 em1 = EDGE_SUCC (bb1, 1 - i); 4782 e1 = EDGE_SUCC (bb2, 0); 4783 e2 = EDGE_SUCC (bb2, 1); 4784 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) 4785 std::swap (e1, e2); 4786 } 4787 else 4788 { 4789 e1 = EDGE_SUCC (bb1, 1 - i); 4790 em1 = EDGE_SUCC (bb2, 0); 4791 e2 = EDGE_SUCC (bb2, 1); 4792 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) 4793 std::swap (em1, e2); 4794 } 4795 break; 4796 } 4797 4798 if (em1 == NULL) 4799 { 4800 if ((ccode == LT_EXPR) 4801 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0)) 4802 { 4803 em1 = EDGE_SUCC (bb1, 1); 4804 e1 = EDGE_SUCC (bb1, 0); 4805 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; 4806 } 4807 else 4808 { 4809 em1 = EDGE_SUCC (bb1, 0); 4810 e1 = EDGE_SUCC (bb1, 1); 4811 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; 4812 } 4813 } 4814 4815 g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2); 4816 tree lhs = make_ssa_name (integer_type_node); 4817 gimple_call_set_lhs (g, lhs); 4818 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 4819 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4820 4821 gcond *cond = as_a <gcond *> (stmt); 4822 gimple_cond_set_lhs (cond, lhs); 4823 gimple_cond_set_rhs (cond, integer_zero_node); 4824 update_stmt (stmt); 4825 4826 g = last_stmt (bb1); 4827 cond = as_a <gcond *> (g); 4828 gimple_cond_set_lhs (cond, lhs); 4829 if (em1->src == bb1 && e2 != em1) 4830 { 4831 gimple_cond_set_rhs (cond, integer_minus_one_node); 4832 gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE) 4833 ? EQ_EXPR : NE_EXPR); 4834 } 4835 else 4836 { 4837 gcc_assert (e1->src == bb1 && e2 != e1); 4838 gimple_cond_set_rhs (cond, integer_one_node); 4839 gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE) 4840 ? EQ_EXPR : NE_EXPR); 4841 } 4842 update_stmt (g); 4843 4844 if (e2 != e1 && e2 != em1) 4845 { 4846 g = last_stmt (bb2); 4847 cond = as_a <gcond *> (g); 4848 gimple_cond_set_lhs (cond, lhs); 4849 if (em1->src == bb2) 4850 gimple_cond_set_rhs (cond, integer_minus_one_node); 4851 else 4852 { 4853 gcc_assert (e1->src == bb2); 4854 gimple_cond_set_rhs (cond, integer_one_node); 4855 } 4856 gimple_cond_set_code (cond, 4857 (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR); 4858 update_stmt (g); 4859 } 4860 4861 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node)); 4862 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node)); 4863 set_range_info (lhs, VR_RANGE, wm1, w2); 4864} 4865 4866 4867/* Find integer multiplications where the operands are extended from 4868 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR 4869 or MULT_HIGHPART_EXPR where appropriate. */ 4870 4871namespace { 4872 4873const pass_data pass_data_optimize_widening_mul = 4874{ 4875 GIMPLE_PASS, /* type */ 4876 "widening_mul", /* name */ 4877 OPTGROUP_NONE, /* optinfo_flags */ 4878 TV_TREE_WIDEN_MUL, /* tv_id */ 4879 PROP_ssa, /* properties_required */ 4880 0, /* properties_provided */ 4881 0, /* properties_destroyed */ 4882 0, /* todo_flags_start */ 4883 TODO_update_ssa, /* todo_flags_finish */ 4884}; 4885 4886class pass_optimize_widening_mul : public gimple_opt_pass 4887{ 4888public: 4889 pass_optimize_widening_mul (gcc::context *ctxt) 4890 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt) 4891 {} 4892 4893 /* opt_pass methods: */ 4894 virtual bool gate (function *) 4895 { 4896 return flag_expensive_optimizations && optimize; 4897 } 4898 4899 virtual unsigned int execute (function *); 4900 4901}; // class pass_optimize_widening_mul 4902 4903/* Walker class to perform the transformation in reverse dominance order. */ 4904 4905class math_opts_dom_walker : public dom_walker 4906{ 4907public: 4908 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set 4909 if walking modidifes the CFG. */ 4910 4911 math_opts_dom_walker (bool *cfg_changed_p) 4912 : dom_walker (CDI_DOMINATORS), m_last_result_set (), 4913 m_cfg_changed_p (cfg_changed_p) {} 4914 4915 /* The actual actions performed in the walk. */ 4916 4917 virtual void after_dom_children (basic_block); 4918 4919 /* Set of results of chains of multiply and add statement combinations that 4920 were not transformed into FMAs because of active deferring. */ 4921 hash_set<tree> m_last_result_set; 4922 4923 /* Pointer to a flag of the user that needs to be set if CFG has been 4924 modified. */ 4925 bool *m_cfg_changed_p; 4926}; 4927 4928void 4929math_opts_dom_walker::after_dom_children (basic_block bb) 4930{ 4931 gimple_stmt_iterator gsi; 4932 4933 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0); 4934 4935 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) 4936 { 4937 gimple *stmt = gsi_stmt (gsi); 4938 enum tree_code code; 4939 4940 if (is_gimple_assign (stmt)) 4941 { 4942 code = gimple_assign_rhs_code (stmt); 4943 switch (code) 4944 { 4945 case MULT_EXPR: 4946 if (!convert_mult_to_widen (stmt, &gsi) 4947 && !convert_expand_mult_copysign (stmt, &gsi) 4948 && convert_mult_to_fma (stmt, 4949 gimple_assign_rhs1 (stmt), 4950 gimple_assign_rhs2 (stmt), 4951 &fma_state)) 4952 { 4953 gsi_remove (&gsi, true); 4954 release_defs (stmt); 4955 continue; 4956 } 4957 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p); 4958 break; 4959 4960 case PLUS_EXPR: 4961 case MINUS_EXPR: 4962 if (!convert_plusminus_to_widen (&gsi, stmt, code)) 4963 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p); 4964 break; 4965 4966 case BIT_NOT_EXPR: 4967 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p)) 4968 continue; 4969 break; 4970 4971 case TRUNC_MOD_EXPR: 4972 convert_to_divmod (as_a<gassign *> (stmt)); 4973 break; 4974 4975 case RSHIFT_EXPR: 4976 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi); 4977 break; 4978 4979 default:; 4980 } 4981 } 4982 else if (is_gimple_call (stmt)) 4983 { 4984 switch (gimple_call_combined_fn (stmt)) 4985 { 4986 CASE_CFN_POW: 4987 if (gimple_call_lhs (stmt) 4988 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST 4989 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), 4990 &dconst2) 4991 && convert_mult_to_fma (stmt, 4992 gimple_call_arg (stmt, 0), 4993 gimple_call_arg (stmt, 0), 4994 &fma_state)) 4995 { 4996 unlink_stmt_vdef (stmt); 4997 if (gsi_remove (&gsi, true) 4998 && gimple_purge_dead_eh_edges (bb)) 4999 *m_cfg_changed_p = true; 5000 release_defs (stmt); 5001 continue; 5002 } 5003 break; 5004 5005 case CFN_COND_MUL: 5006 if (convert_mult_to_fma (stmt, 5007 gimple_call_arg (stmt, 1), 5008 gimple_call_arg (stmt, 2), 5009 &fma_state, 5010 gimple_call_arg (stmt, 0))) 5011 5012 { 5013 gsi_remove (&gsi, true); 5014 release_defs (stmt); 5015 continue; 5016 } 5017 break; 5018 5019 case CFN_LAST: 5020 cancel_fma_deferring (&fma_state); 5021 break; 5022 5023 default: 5024 break; 5025 } 5026 } 5027 else if (gimple_code (stmt) == GIMPLE_COND) 5028 optimize_spaceship (stmt); 5029 gsi_next (&gsi); 5030 } 5031 if (fma_state.m_deferring_p 5032 && fma_state.m_initial_phi) 5033 { 5034 gcc_checking_assert (fma_state.m_last_result); 5035 if (!last_fma_candidate_feeds_initial_phi (&fma_state, 5036 &m_last_result_set)) 5037 cancel_fma_deferring (&fma_state); 5038 else 5039 m_last_result_set.add (fma_state.m_last_result); 5040 } 5041} 5042 5043 5044unsigned int 5045pass_optimize_widening_mul::execute (function *fun) 5046{ 5047 bool cfg_changed = false; 5048 5049 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats)); 5050 calculate_dominance_info (CDI_DOMINATORS); 5051 renumber_gimple_stmt_uids (cfun); 5052 5053 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 5054 5055 statistics_counter_event (fun, "widening multiplications inserted", 5056 widen_mul_stats.widen_mults_inserted); 5057 statistics_counter_event (fun, "widening maccs inserted", 5058 widen_mul_stats.maccs_inserted); 5059 statistics_counter_event (fun, "fused multiply-adds inserted", 5060 widen_mul_stats.fmas_inserted); 5061 statistics_counter_event (fun, "divmod calls inserted", 5062 widen_mul_stats.divmod_calls_inserted); 5063 statistics_counter_event (fun, "highpart multiplications inserted", 5064 widen_mul_stats.highpart_mults_inserted); 5065 5066 return cfg_changed ? TODO_cleanup_cfg : 0; 5067} 5068 5069} // anon namespace 5070 5071gimple_opt_pass * 5072make_pass_optimize_widening_mul (gcc::context *ctxt) 5073{ 5074 return new pass_optimize_widening_mul (ctxt); 5075} 5076