1/* Loop autoparallelization. 2 Copyright (C) 2006, 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and 5 Zdenek Dvorak <dvorakz@suse.cz>. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "tm.h" 27#include "tree.h" 28#include "rtl.h" 29#include "tree-flow.h" 30#include "cfgloop.h" 31#include "ggc.h" 32#include "tree-data-ref.h" 33#include "diagnostic.h" 34#include "tree-pass.h" 35#include "tree-scalar-evolution.h" 36#include "hashtab.h" 37#include "langhooks.h" 38#include "tree-vectorizer.h" 39 40/* This pass tries to distribute iterations of loops into several threads. 41 The implementation is straightforward -- for each loop we test whether its 42 iterations are independent, and if it is the case (and some additional 43 conditions regarding profitability and correctness are satisfied), we 44 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion 45 machinery do its job. 46 47 The most of the complexity is in bringing the code into shape expected 48 by the omp expanders: 49 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction 50 variable and that the exit test is at the start of the loop body 51 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable 52 variables by accesses through pointers, and breaking up ssa chains 53 by storing the values incoming to the parallelized loop to a structure 54 passed to the new function as an argument (something similar is done 55 in omp gimplification, unfortunately only a small part of the code 56 can be shared). 57 58 TODO: 59 -- if there are several parallelizable loops in a function, it may be 60 possible to generate the threads just once (using synchronization to 61 ensure that cross-loop dependences are obeyed). 62 -- handling of common scalar dependence patterns (accumulation, ...) 63 -- handling of non-innermost loops */ 64 65/* 66 Reduction handling: 67 currently we use vect_is_simple_reduction() to detect reduction patterns. 68 The code transformation will be introduced by an example. 69 70 71parloop 72{ 73 int sum=1; 74 75 for (i = 0; i < N; i++) 76 { 77 x[i] = i + 3; 78 sum+=x[i]; 79 } 80} 81 82gimple-like code: 83header_bb: 84 85 # sum_29 = PHI <sum_11(5), 1(3)> 86 # i_28 = PHI <i_12(5), 0(3)> 87 D.1795_8 = i_28 + 3; 88 x[i_28] = D.1795_8; 89 sum_11 = D.1795_8 + sum_29; 90 i_12 = i_28 + 1; 91 if (N_6(D) > i_12) 92 goto header_bb; 93 94 95exit_bb: 96 97 # sum_21 = PHI <sum_11(4)> 98 printf (&"%d"[0], sum_21); 99 100 101after reduction transformation (only relevant parts): 102 103parloop 104{ 105 106.... 107 108 109 # Storing the initial value given by the user. # 110 111 .paral_data_store.32.sum.27 = 1; 112 113 #pragma omp parallel num_threads(4) 114 115 #pragma omp for schedule(static) 116 117 # The neutral element corresponding to the particular 118 reduction's operation, e.g. 0 for PLUS_EXPR, 119 1 for MULT_EXPR, etc. replaces the user's initial value. # 120 121 # sum.27_29 = PHI <sum.27_11, 0> 122 123 sum.27_11 = D.1827_8 + sum.27_29; 124 125 GIMPLE_OMP_CONTINUE 126 127 # Adding this reduction phi is done at create_phi_for_local_result() # 128 # sum.27_56 = PHI <sum.27_11, 0> 129 GIMPLE_OMP_RETURN 130 131 # Creating the atomic operation is done at 132 create_call_for_reduction_1() # 133 134 #pragma omp atomic_load 135 D.1839_59 = *&.paral_data_load.33_51->reduction.23; 136 D.1840_60 = sum.27_56 + D.1839_59; 137 #pragma omp atomic_store (D.1840_60); 138 139 GIMPLE_OMP_RETURN 140 141 # collecting the result after the join of the threads is done at 142 create_loads_for_reductions(). 143 The value computed by the threads is loaded from the 144 shared struct. # 145 146 147 .paral_data_load.33_52 = &.paral_data_store.32; 148 sum_37 = .paral_data_load.33_52->sum.27; 149 sum_43 = D.1795_41 + sum_37; 150 151 exit bb: 152 # sum_21 = PHI <sum_43, sum_26> 153 printf (&"%d"[0], sum_21); 154 155... 156 157} 158 159*/ 160 161/* Minimal number of iterations of a loop that should be executed in each 162 thread. */ 163#define MIN_PER_THREAD 100 164 165/* Element of the hashtable, representing a 166 reduction in the current loop. */ 167struct reduction_info 168{ 169 gimple reduc_stmt; /* reduction statement. */ 170 gimple reduc_phi; /* The phi node defining the reduction. */ 171 enum tree_code reduction_code;/* code for the reduction operation. */ 172 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value 173 of the reduction variable when existing the loop. */ 174 tree initial_value; /* The initial value of the reduction var before entering the loop. */ 175 tree field; /* the name of the field in the parloop data structure intended for reduction. */ 176 tree init; /* reduction initialization value. */ 177 gimple new_phi; /* (helper field) Newly created phi node whose result 178 will be passed to the atomic operation. Represents 179 the local result each thread computed for the reduction 180 operation. */ 181}; 182 183/* Equality and hash functions for hashtab code. */ 184 185static int 186reduction_info_eq (const void *aa, const void *bb) 187{ 188 const struct reduction_info *a = (const struct reduction_info *) aa; 189 const struct reduction_info *b = (const struct reduction_info *) bb; 190 191 return (a->reduc_phi == b->reduc_phi); 192} 193 194static hashval_t 195reduction_info_hash (const void *aa) 196{ 197 const struct reduction_info *a = (const struct reduction_info *) aa; 198 199 return htab_hash_pointer (a->reduc_phi); 200} 201 202static struct reduction_info * 203reduction_phi (htab_t reduction_list, gimple phi) 204{ 205 struct reduction_info tmpred, *red; 206 207 if (htab_elements (reduction_list) == 0) 208 return NULL; 209 210 tmpred.reduc_phi = phi; 211 red = (struct reduction_info *) htab_find (reduction_list, &tmpred); 212 213 return red; 214} 215 216/* Element of hashtable of names to copy. */ 217 218struct name_to_copy_elt 219{ 220 unsigned version; /* The version of the name to copy. */ 221 tree new_name; /* The new name used in the copy. */ 222 tree field; /* The field of the structure used to pass the 223 value. */ 224}; 225 226/* Equality and hash functions for hashtab code. */ 227 228static int 229name_to_copy_elt_eq (const void *aa, const void *bb) 230{ 231 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa; 232 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb; 233 234 return a->version == b->version; 235} 236 237static hashval_t 238name_to_copy_elt_hash (const void *aa) 239{ 240 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa; 241 242 return (hashval_t) a->version; 243} 244 245 246/* Data dependency analysis. Returns true if the iterations of LOOP 247 are independent on each other (that is, if we can execute them 248 in parallel). */ 249 250static bool 251loop_parallel_p (struct loop *loop) 252{ 253 VEC (ddr_p, heap) * dependence_relations; 254 VEC (data_reference_p, heap) *datarefs; 255 lambda_trans_matrix trans; 256 bool ret = false; 257 258 if (dump_file && (dump_flags & TDF_DETAILS)) 259 { 260 fprintf (dump_file, "Considering loop %d\n", loop->num); 261 if (!loop->inner) 262 fprintf (dump_file, "loop is innermost\n"); 263 else 264 fprintf (dump_file, "loop NOT innermost\n"); 265 } 266 267 /* Check for problems with dependences. If the loop can be reversed, 268 the iterations are independent. */ 269 datarefs = VEC_alloc (data_reference_p, heap, 10); 270 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); 271 compute_data_dependences_for_loop (loop, true, &datarefs, 272 &dependence_relations); 273 if (dump_file && (dump_flags & TDF_DETAILS)) 274 dump_data_dependence_relations (dump_file, dependence_relations); 275 276 trans = lambda_trans_matrix_new (1, 1); 277 LTM_MATRIX (trans)[0][0] = -1; 278 279 if (lambda_transform_legal_p (trans, 1, dependence_relations)) 280 { 281 ret = true; 282 if (dump_file && (dump_flags & TDF_DETAILS)) 283 fprintf (dump_file, " SUCCESS: may be parallelized\n"); 284 } 285 else if (dump_file && (dump_flags & TDF_DETAILS)) 286 fprintf (dump_file, 287 " FAILED: data dependencies exist across iterations\n"); 288 289 free_dependence_relations (dependence_relations); 290 free_data_refs (datarefs); 291 292 return ret; 293} 294 295/* Return true when LOOP contains basic blocks marked with the 296 BB_IRREDUCIBLE_LOOP flag. */ 297 298static inline bool 299loop_has_blocks_with_irreducible_flag (struct loop *loop) 300{ 301 unsigned i; 302 basic_block *bbs = get_loop_body_in_dom_order (loop); 303 bool res = true; 304 305 for (i = 0; i < loop->num_nodes; i++) 306 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP) 307 goto end; 308 309 res = false; 310 end: 311 free (bbs); 312 return res; 313} 314 315/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name. 316 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls 317 to their addresses that can be reused. The address of OBJ is known to 318 be invariant in the whole function. Other needed statements are placed 319 right before GSI. */ 320 321static tree 322take_address_of (tree obj, tree type, edge entry, htab_t decl_address, 323 gimple_stmt_iterator *gsi) 324{ 325 int uid; 326 void **dslot; 327 struct int_tree_map ielt, *nielt; 328 tree *var_p, name, bvar, addr; 329 gimple stmt; 330 gimple_seq stmts; 331 332 /* Since the address of OBJ is invariant, the trees may be shared. 333 Avoid rewriting unrelated parts of the code. */ 334 obj = unshare_expr (obj); 335 for (var_p = &obj; 336 handled_component_p (*var_p); 337 var_p = &TREE_OPERAND (*var_p, 0)) 338 continue; 339 uid = DECL_UID (*var_p); 340 341 ielt.uid = uid; 342 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT); 343 if (!*dslot) 344 { 345 if (gsi == NULL) 346 return NULL; 347 addr = build_addr (*var_p, current_function_decl); 348 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p)); 349 add_referenced_var (bvar); 350 stmt = gimple_build_assign (bvar, addr); 351 name = make_ssa_name (bvar, stmt); 352 gimple_assign_set_lhs (stmt, name); 353 gsi_insert_on_edge_immediate (entry, stmt); 354 355 nielt = XNEW (struct int_tree_map); 356 nielt->uid = uid; 357 nielt->to = name; 358 *dslot = nielt; 359 } 360 else 361 name = ((struct int_tree_map *) *dslot)->to; 362 363 if (gsi == NULL) 364 { 365 if (var_p != &obj) 366 { 367 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name); 368 name = build_fold_addr_expr_with_type (obj, type); 369 } 370 return fold_convert (type, name); 371 } 372 if (var_p != &obj) 373 { 374 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name); 375 name = force_gimple_operand (build_addr (obj, current_function_decl), 376 &stmts, true, NULL_TREE); 377 if (!gimple_seq_empty_p (stmts)) 378 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 379 } 380 381 if (TREE_TYPE (name) != type) 382 { 383 name = force_gimple_operand (fold_convert (type, name), &stmts, true, 384 NULL_TREE); 385 if (!gimple_seq_empty_p (stmts)) 386 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); 387 } 388 389 return name; 390} 391 392/* Callback for htab_traverse. Create the initialization statement 393 for reduction described in SLOT, and place it at the preheader of 394 the loop described in DATA. */ 395 396static int 397initialize_reductions (void **slot, void *data) 398{ 399 tree init, c; 400 tree bvar, type, arg; 401 edge e; 402 403 struct reduction_info *const reduc = (struct reduction_info *) *slot; 404 struct loop *loop = (struct loop *) data; 405 406 /* Create initialization in preheader: 407 reduction_variable = initialization value of reduction. */ 408 409 /* In the phi node at the header, replace the argument coming 410 from the preheader with the reduction initialization value. */ 411 412 /* Create a new variable to initialize the reduction. */ 413 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); 414 bvar = create_tmp_var (type, "reduction"); 415 add_referenced_var (bvar); 416 417 c = build_omp_clause (gimple_location (reduc->reduc_stmt), 418 OMP_CLAUSE_REDUCTION); 419 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code; 420 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)); 421 422 init = omp_reduction_init (c, TREE_TYPE (bvar)); 423 reduc->init = init; 424 425 /* Replace the argument representing the initialization value 426 with the initialization value for the reduction (neutral 427 element for the particular operation, e.g. 0 for PLUS_EXPR, 428 1 for MULT_EXPR, etc). 429 Keep the old value in a new variable "reduction_initial", 430 that will be taken in consideration after the parallel 431 computing is done. */ 432 433 e = loop_preheader_edge (loop); 434 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e); 435 /* Create new variable to hold the initial value. */ 436 437 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE 438 (reduc->reduc_phi, loop_preheader_edge (loop)), init); 439 reduc->initial_value = arg; 440 return 1; 441} 442 443struct elv_data 444{ 445 struct walk_stmt_info info; 446 edge entry; 447 htab_t decl_address; 448 gimple_stmt_iterator *gsi; 449 bool changed; 450 bool reset; 451}; 452 453/* Eliminates references to local variables in *TP out of the single 454 entry single exit region starting at DTA->ENTRY. 455 DECL_ADDRESS contains addresses of the references that had their 456 address taken already. If the expression is changed, CHANGED is 457 set to true. Callback for walk_tree. */ 458 459static tree 460eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data) 461{ 462 struct elv_data *const dta = (struct elv_data *) data; 463 tree t = *tp, var, addr, addr_type, type, obj; 464 465 if (DECL_P (t)) 466 { 467 *walk_subtrees = 0; 468 469 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t)) 470 return NULL_TREE; 471 472 type = TREE_TYPE (t); 473 addr_type = build_pointer_type (type); 474 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address, 475 dta->gsi); 476 if (dta->gsi == NULL && addr == NULL_TREE) 477 { 478 dta->reset = true; 479 return NULL_TREE; 480 } 481 482 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr); 483 484 dta->changed = true; 485 return NULL_TREE; 486 } 487 488 if (TREE_CODE (t) == ADDR_EXPR) 489 { 490 /* ADDR_EXPR may appear in two contexts: 491 -- as a gimple operand, when the address taken is a function invariant 492 -- as gimple rhs, when the resulting address in not a function 493 invariant 494 We do not need to do anything special in the latter case (the base of 495 the memory reference whose address is taken may be replaced in the 496 DECL_P case). The former case is more complicated, as we need to 497 ensure that the new address is still a gimple operand. Thus, it 498 is not sufficient to replace just the base of the memory reference -- 499 we need to move the whole computation of the address out of the 500 loop. */ 501 if (!is_gimple_val (t)) 502 return NULL_TREE; 503 504 *walk_subtrees = 0; 505 obj = TREE_OPERAND (t, 0); 506 var = get_base_address (obj); 507 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var)) 508 return NULL_TREE; 509 510 addr_type = TREE_TYPE (t); 511 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address, 512 dta->gsi); 513 if (dta->gsi == NULL && addr == NULL_TREE) 514 { 515 dta->reset = true; 516 return NULL_TREE; 517 } 518 *tp = addr; 519 520 dta->changed = true; 521 return NULL_TREE; 522 } 523 524 if (!EXPR_P (t)) 525 *walk_subtrees = 0; 526 527 return NULL_TREE; 528} 529 530/* Moves the references to local variables in STMT at *GSI out of the single 531 entry single exit region starting at ENTRY. DECL_ADDRESS contains 532 addresses of the references that had their address taken 533 already. */ 534 535static void 536eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi, 537 htab_t decl_address) 538{ 539 struct elv_data dta; 540 gimple stmt = gsi_stmt (*gsi); 541 542 memset (&dta.info, '\0', sizeof (dta.info)); 543 dta.entry = entry; 544 dta.decl_address = decl_address; 545 dta.changed = false; 546 dta.reset = false; 547 548 if (gimple_debug_bind_p (stmt)) 549 { 550 dta.gsi = NULL; 551 walk_tree (gimple_debug_bind_get_value_ptr (stmt), 552 eliminate_local_variables_1, &dta.info, NULL); 553 if (dta.reset) 554 { 555 gimple_debug_bind_reset_value (stmt); 556 dta.changed = true; 557 } 558 } 559 else 560 { 561 dta.gsi = gsi; 562 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info); 563 } 564 565 if (dta.changed) 566 update_stmt (stmt); 567} 568 569/* Eliminates the references to local variables from the single entry 570 single exit region between the ENTRY and EXIT edges. 571 572 This includes: 573 1) Taking address of a local variable -- these are moved out of the 574 region (and temporary variable is created to hold the address if 575 necessary). 576 577 2) Dereferencing a local variable -- these are replaced with indirect 578 references. */ 579 580static void 581eliminate_local_variables (edge entry, edge exit) 582{ 583 basic_block bb; 584 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3); 585 unsigned i; 586 gimple_stmt_iterator gsi; 587 bool has_debug_stmt = false; 588 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq, 589 free); 590 basic_block entry_bb = entry->src; 591 basic_block exit_bb = exit->dest; 592 593 gather_blocks_in_sese_region (entry_bb, exit_bb, &body); 594 595 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) 596 if (bb != entry_bb && bb != exit_bb) 597 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 598 if (gimple_debug_bind_p (gsi_stmt (gsi))) 599 has_debug_stmt = true; 600 else 601 eliminate_local_variables_stmt (entry, &gsi, decl_address); 602 603 if (has_debug_stmt) 604 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) 605 if (bb != entry_bb && bb != exit_bb) 606 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 607 if (gimple_debug_bind_p (gsi_stmt (gsi))) 608 eliminate_local_variables_stmt (entry, &gsi, decl_address); 609 610 htab_delete (decl_address); 611 VEC_free (basic_block, heap, body); 612} 613 614/* Returns true if expression EXPR is not defined between ENTRY and 615 EXIT, i.e. if all its operands are defined outside of the region. */ 616 617static bool 618expr_invariant_in_region_p (edge entry, edge exit, tree expr) 619{ 620 basic_block entry_bb = entry->src; 621 basic_block exit_bb = exit->dest; 622 basic_block def_bb; 623 624 if (is_gimple_min_invariant (expr)) 625 return true; 626 627 if (TREE_CODE (expr) == SSA_NAME) 628 { 629 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); 630 if (def_bb 631 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb) 632 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb)) 633 return false; 634 635 return true; 636 } 637 638 return false; 639} 640 641/* If COPY_NAME_P is true, creates and returns a duplicate of NAME. 642 The copies are stored to NAME_COPIES, if NAME was already duplicated, 643 its duplicate stored in NAME_COPIES is returned. 644 645 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also 646 duplicated, storing the copies in DECL_COPIES. */ 647 648static tree 649separate_decls_in_region_name (tree name, 650 htab_t name_copies, htab_t decl_copies, 651 bool copy_name_p) 652{ 653 tree copy, var, var_copy; 654 unsigned idx, uid, nuid; 655 struct int_tree_map ielt, *nielt; 656 struct name_to_copy_elt elt, *nelt; 657 void **slot, **dslot; 658 659 if (TREE_CODE (name) != SSA_NAME) 660 return name; 661 662 idx = SSA_NAME_VERSION (name); 663 elt.version = idx; 664 slot = htab_find_slot_with_hash (name_copies, &elt, idx, 665 copy_name_p ? INSERT : NO_INSERT); 666 if (slot && *slot) 667 return ((struct name_to_copy_elt *) *slot)->new_name; 668 669 var = SSA_NAME_VAR (name); 670 uid = DECL_UID (var); 671 ielt.uid = uid; 672 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT); 673 if (!*dslot) 674 { 675 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var)); 676 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var); 677 add_referenced_var (var_copy); 678 nielt = XNEW (struct int_tree_map); 679 nielt->uid = uid; 680 nielt->to = var_copy; 681 *dslot = nielt; 682 683 /* Ensure that when we meet this decl next time, we won't duplicate 684 it again. */ 685 nuid = DECL_UID (var_copy); 686 ielt.uid = nuid; 687 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT); 688 gcc_assert (!*dslot); 689 nielt = XNEW (struct int_tree_map); 690 nielt->uid = nuid; 691 nielt->to = var_copy; 692 *dslot = nielt; 693 } 694 else 695 var_copy = ((struct int_tree_map *) *dslot)->to; 696 697 if (copy_name_p) 698 { 699 copy = duplicate_ssa_name (name, NULL); 700 nelt = XNEW (struct name_to_copy_elt); 701 nelt->version = idx; 702 nelt->new_name = copy; 703 nelt->field = NULL_TREE; 704 *slot = nelt; 705 } 706 else 707 { 708 gcc_assert (!slot); 709 copy = name; 710 } 711 712 SSA_NAME_VAR (copy) = var_copy; 713 return copy; 714} 715 716/* Finds the ssa names used in STMT that are defined outside the 717 region between ENTRY and EXIT and replaces such ssa names with 718 their duplicates. The duplicates are stored to NAME_COPIES. Base 719 decls of all ssa names used in STMT (including those defined in 720 LOOP) are replaced with the new temporary variables; the 721 replacement decls are stored in DECL_COPIES. */ 722 723static void 724separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, 725 htab_t name_copies, htab_t decl_copies) 726{ 727 use_operand_p use; 728 def_operand_p def; 729 ssa_op_iter oi; 730 tree name, copy; 731 bool copy_name_p; 732 733 mark_virtual_ops_for_renaming (stmt); 734 735 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF) 736 { 737 name = DEF_FROM_PTR (def); 738 gcc_assert (TREE_CODE (name) == SSA_NAME); 739 copy = separate_decls_in_region_name (name, name_copies, decl_copies, 740 false); 741 gcc_assert (copy == name); 742 } 743 744 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) 745 { 746 name = USE_FROM_PTR (use); 747 if (TREE_CODE (name) != SSA_NAME) 748 continue; 749 750 copy_name_p = expr_invariant_in_region_p (entry, exit, name); 751 copy = separate_decls_in_region_name (name, name_copies, decl_copies, 752 copy_name_p); 753 SET_USE (use, copy); 754 } 755} 756 757/* Finds the ssa names used in STMT that are defined outside the 758 region between ENTRY and EXIT and replaces such ssa names with 759 their duplicates. The duplicates are stored to NAME_COPIES. Base 760 decls of all ssa names used in STMT (including those defined in 761 LOOP) are replaced with the new temporary variables; the 762 replacement decls are stored in DECL_COPIES. */ 763 764static bool 765separate_decls_in_region_debug_bind (gimple stmt, 766 htab_t name_copies, htab_t decl_copies) 767{ 768 use_operand_p use; 769 ssa_op_iter oi; 770 tree var, name; 771 struct int_tree_map ielt; 772 struct name_to_copy_elt elt; 773 void **slot, **dslot; 774 775 var = gimple_debug_bind_get_var (stmt); 776 if (TREE_CODE (var) == DEBUG_EXPR_DECL) 777 return true; 778 gcc_assert (DECL_P (var) && SSA_VAR_P (var)); 779 ielt.uid = DECL_UID (var); 780 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT); 781 if (!dslot) 782 return true; 783 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to); 784 785 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) 786 { 787 name = USE_FROM_PTR (use); 788 if (TREE_CODE (name) != SSA_NAME) 789 continue; 790 791 elt.version = SSA_NAME_VERSION (name); 792 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT); 793 if (!slot) 794 { 795 gimple_debug_bind_reset_value (stmt); 796 update_stmt (stmt); 797 break; 798 } 799 800 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name); 801 } 802 803 return false; 804} 805 806/* Callback for htab_traverse. Adds a field corresponding to the reduction 807 specified in SLOT. The type is passed in DATA. */ 808 809static int 810add_field_for_reduction (void **slot, void *data) 811{ 812 813 struct reduction_info *const red = (struct reduction_info *) *slot; 814 tree const type = (tree) data; 815 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt)); 816 tree field = build_decl (gimple_location (red->reduc_stmt), 817 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var)); 818 819 insert_field_into_struct (type, field); 820 821 red->field = field; 822 823 return 1; 824} 825 826/* Callback for htab_traverse. Adds a field corresponding to a ssa name 827 described in SLOT. The type is passed in DATA. */ 828 829static int 830add_field_for_name (void **slot, void *data) 831{ 832 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot; 833 tree type = (tree) data; 834 tree name = ssa_name (elt->version); 835 tree var = SSA_NAME_VAR (name); 836 tree field = build_decl (DECL_SOURCE_LOCATION (var), 837 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var)); 838 839 insert_field_into_struct (type, field); 840 elt->field = field; 841 842 return 1; 843} 844 845/* Callback for htab_traverse. A local result is the intermediate result 846 computed by a single 847 thread, or the initial value in case no iteration was executed. 848 This function creates a phi node reflecting these values. 849 The phi's result will be stored in NEW_PHI field of the 850 reduction's data structure. */ 851 852static int 853create_phi_for_local_result (void **slot, void *data) 854{ 855 struct reduction_info *const reduc = (struct reduction_info *) *slot; 856 const struct loop *const loop = (const struct loop *) data; 857 edge e; 858 gimple new_phi; 859 basic_block store_bb; 860 tree local_res; 861 source_location locus; 862 863 /* STORE_BB is the block where the phi 864 should be stored. It is the destination of the loop exit. 865 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */ 866 store_bb = FALLTHRU_EDGE (loop->latch)->dest; 867 868 /* STORE_BB has two predecessors. One coming from the loop 869 (the reduction's result is computed at the loop), 870 and another coming from a block preceding the loop, 871 when no iterations 872 are executed (the initial value should be taken). */ 873 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch)) 874 e = EDGE_PRED (store_bb, 1); 875 else 876 e = EDGE_PRED (store_bb, 0); 877 local_res 878 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)), 879 NULL); 880 locus = gimple_location (reduc->reduc_stmt); 881 new_phi = create_phi_node (local_res, store_bb); 882 SSA_NAME_DEF_STMT (local_res) = new_phi; 883 add_phi_arg (new_phi, reduc->init, e, locus); 884 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt), 885 FALLTHRU_EDGE (loop->latch), locus); 886 reduc->new_phi = new_phi; 887 888 return 1; 889} 890 891struct clsn_data 892{ 893 tree store; 894 tree load; 895 896 basic_block store_bb; 897 basic_block load_bb; 898}; 899 900/* Callback for htab_traverse. Create an atomic instruction for the 901 reduction described in SLOT. 902 DATA annotates the place in memory the atomic operation relates to, 903 and the basic block it needs to be generated in. */ 904 905static int 906create_call_for_reduction_1 (void **slot, void *data) 907{ 908 struct reduction_info *const reduc = (struct reduction_info *) *slot; 909 struct clsn_data *const clsn_data = (struct clsn_data *) data; 910 gimple_stmt_iterator gsi; 911 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); 912 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); 913 tree load_struct; 914 basic_block bb; 915 basic_block new_bb; 916 edge e; 917 tree t, addr, ref, x; 918 tree tmp_load, name; 919 gimple load; 920 921 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); 922 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE); 923 924 addr = build_addr (t, current_function_decl); 925 926 /* Create phi node. */ 927 bb = clsn_data->load_bb; 928 929 e = split_block (bb, t); 930 new_bb = e->dest; 931 932 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL); 933 add_referenced_var (tmp_load); 934 tmp_load = make_ssa_name (tmp_load, NULL); 935 load = gimple_build_omp_atomic_load (tmp_load, addr); 936 SSA_NAME_DEF_STMT (tmp_load) = load; 937 gsi = gsi_start_bb (new_bb); 938 gsi_insert_after (&gsi, load, GSI_NEW_STMT); 939 940 e = split_block (new_bb, load); 941 new_bb = e->dest; 942 gsi = gsi_start_bb (new_bb); 943 ref = tmp_load; 944 x = fold_build2 (reduc->reduction_code, 945 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref, 946 PHI_RESULT (reduc->new_phi)); 947 948 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true, 949 GSI_CONTINUE_LINKING); 950 951 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT); 952 return 1; 953} 954 955/* Create the atomic operation at the join point of the threads. 956 REDUCTION_LIST describes the reductions in the LOOP. 957 LD_ST_DATA describes the shared data structure where 958 shared data is stored in and loaded from. */ 959static void 960create_call_for_reduction (struct loop *loop, htab_t reduction_list, 961 struct clsn_data *ld_st_data) 962{ 963 htab_traverse (reduction_list, create_phi_for_local_result, loop); 964 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */ 965 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest; 966 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data); 967} 968 969/* Callback for htab_traverse. Loads the final reduction value at the 970 join point of all threads, and inserts it in the right place. */ 971 972static int 973create_loads_for_reductions (void **slot, void *data) 974{ 975 struct reduction_info *const red = (struct reduction_info *) *slot; 976 struct clsn_data *const clsn_data = (struct clsn_data *) data; 977 gimple stmt; 978 gimple_stmt_iterator gsi; 979 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); 980 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); 981 tree load_struct; 982 tree name; 983 tree x; 984 985 gsi = gsi_after_labels (clsn_data->load_bb); 986 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); 987 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field, 988 NULL_TREE); 989 990 x = load_struct; 991 name = PHI_RESULT (red->keep_res); 992 stmt = gimple_build_assign (name, x); 993 SSA_NAME_DEF_STMT (name) = stmt; 994 995 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 996 997 for (gsi = gsi_start_phis (gimple_bb (red->keep_res)); 998 !gsi_end_p (gsi); gsi_next (&gsi)) 999 if (gsi_stmt (gsi) == red->keep_res) 1000 { 1001 remove_phi_node (&gsi, false); 1002 return 1; 1003 } 1004 gcc_unreachable (); 1005} 1006 1007/* Load the reduction result that was stored in LD_ST_DATA. 1008 REDUCTION_LIST describes the list of reductions that the 1009 loads should be generated for. */ 1010static void 1011create_final_loads_for_reduction (htab_t reduction_list, 1012 struct clsn_data *ld_st_data) 1013{ 1014 gimple_stmt_iterator gsi; 1015 tree t; 1016 gimple stmt; 1017 1018 gsi = gsi_after_labels (ld_st_data->load_bb); 1019 t = build_fold_addr_expr (ld_st_data->store); 1020 stmt = gimple_build_assign (ld_st_data->load, t); 1021 1022 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); 1023 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt; 1024 1025 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data); 1026 1027} 1028 1029/* Callback for htab_traverse. Store the neutral value for the 1030 particular reduction's operation, e.g. 0 for PLUS_EXPR, 1031 1 for MULT_EXPR, etc. into the reduction field. 1032 The reduction is specified in SLOT. The store information is 1033 passed in DATA. */ 1034 1035static int 1036create_stores_for_reduction (void **slot, void *data) 1037{ 1038 struct reduction_info *const red = (struct reduction_info *) *slot; 1039 struct clsn_data *const clsn_data = (struct clsn_data *) data; 1040 tree t; 1041 gimple stmt; 1042 gimple_stmt_iterator gsi; 1043 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); 1044 1045 gsi = gsi_last_bb (clsn_data->store_bb); 1046 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE); 1047 stmt = gimple_build_assign (t, red->initial_value); 1048 mark_virtual_ops_for_renaming (stmt); 1049 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1050 1051 return 1; 1052} 1053 1054/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and 1055 store to a field of STORE in STORE_BB for the ssa name and its duplicate 1056 specified in SLOT. */ 1057 1058static int 1059create_loads_and_stores_for_name (void **slot, void *data) 1060{ 1061 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot; 1062 struct clsn_data *const clsn_data = (struct clsn_data *) data; 1063 tree t; 1064 gimple stmt; 1065 gimple_stmt_iterator gsi; 1066 tree type = TREE_TYPE (elt->new_name); 1067 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); 1068 tree load_struct; 1069 1070 gsi = gsi_last_bb (clsn_data->store_bb); 1071 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE); 1072 stmt = gimple_build_assign (t, ssa_name (elt->version)); 1073 mark_virtual_ops_for_renaming (stmt); 1074 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1075 1076 gsi = gsi_last_bb (clsn_data->load_bb); 1077 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); 1078 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE); 1079 stmt = gimple_build_assign (elt->new_name, t); 1080 SSA_NAME_DEF_STMT (elt->new_name) = stmt; 1081 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1082 1083 return 1; 1084} 1085 1086/* Moves all the variables used in LOOP and defined outside of it (including 1087 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa 1088 name) to a structure created for this purpose. The code 1089 1090 while (1) 1091 { 1092 use (a); 1093 use (b); 1094 } 1095 1096 is transformed this way: 1097 1098 bb0: 1099 old.a = a; 1100 old.b = b; 1101 1102 bb1: 1103 a' = new->a; 1104 b' = new->b; 1105 while (1) 1106 { 1107 use (a'); 1108 use (b'); 1109 } 1110 1111 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The 1112 pointer `new' is intentionally not initialized (the loop will be split to a 1113 separate function later, and `new' will be initialized from its arguments). 1114 LD_ST_DATA holds information about the shared data structure used to pass 1115 information among the threads. It is initialized here, and 1116 gen_parallel_loop will pass it to create_call_for_reduction that 1117 needs this information. REDUCTION_LIST describes the reductions 1118 in LOOP. */ 1119 1120static void 1121separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, 1122 tree *arg_struct, tree *new_arg_struct, 1123 struct clsn_data *ld_st_data) 1124 1125{ 1126 basic_block bb1 = split_edge (entry); 1127 basic_block bb0 = single_pred (bb1); 1128 htab_t name_copies = htab_create (10, name_to_copy_elt_hash, 1129 name_to_copy_elt_eq, free); 1130 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq, 1131 free); 1132 unsigned i; 1133 tree type, type_name, nvar; 1134 gimple_stmt_iterator gsi; 1135 struct clsn_data clsn_data; 1136 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3); 1137 basic_block bb; 1138 basic_block entry_bb = bb1; 1139 basic_block exit_bb = exit->dest; 1140 bool has_debug_stmt = false; 1141 1142 entry = single_succ_edge (entry_bb); 1143 gather_blocks_in_sese_region (entry_bb, exit_bb, &body); 1144 1145 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) 1146 { 1147 if (bb != entry_bb && bb != exit_bb) 1148 { 1149 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1150 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi), 1151 name_copies, decl_copies); 1152 1153 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1154 { 1155 gimple stmt = gsi_stmt (gsi); 1156 1157 if (is_gimple_debug (stmt)) 1158 has_debug_stmt = true; 1159 else 1160 separate_decls_in_region_stmt (entry, exit, stmt, 1161 name_copies, decl_copies); 1162 } 1163 } 1164 } 1165 1166 /* Now process debug bind stmts. We must not create decls while 1167 processing debug stmts, so we defer their processing so as to 1168 make sure we will have debug info for as many variables as 1169 possible (all of those that were dealt with in the loop above), 1170 and discard those for which we know there's nothing we can 1171 do. */ 1172 if (has_debug_stmt) 1173 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) 1174 if (bb != entry_bb && bb != exit_bb) 1175 { 1176 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) 1177 { 1178 gimple stmt = gsi_stmt (gsi); 1179 1180 if (gimple_debug_bind_p (stmt)) 1181 { 1182 if (separate_decls_in_region_debug_bind (stmt, 1183 name_copies, 1184 decl_copies)) 1185 { 1186 gsi_remove (&gsi, true); 1187 continue; 1188 } 1189 } 1190 1191 gsi_next (&gsi); 1192 } 1193 } 1194 1195 VEC_free (basic_block, heap, body); 1196 1197 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0) 1198 { 1199 /* It may happen that there is nothing to copy (if there are only 1200 loop carried and external variables in the loop). */ 1201 *arg_struct = NULL; 1202 *new_arg_struct = NULL; 1203 } 1204 else 1205 { 1206 /* Create the type for the structure to store the ssa names to. */ 1207 type = lang_hooks.types.make_type (RECORD_TYPE); 1208 type_name = build_decl (BUILTINS_LOCATION, 1209 TYPE_DECL, create_tmp_var_name (".paral_data"), 1210 type); 1211 TYPE_NAME (type) = type_name; 1212 1213 htab_traverse (name_copies, add_field_for_name, type); 1214 if (reduction_list && htab_elements (reduction_list) > 0) 1215 { 1216 /* Create the fields for reductions. */ 1217 htab_traverse (reduction_list, add_field_for_reduction, 1218 type); 1219 } 1220 layout_type (type); 1221 1222 /* Create the loads and stores. */ 1223 *arg_struct = create_tmp_var (type, ".paral_data_store"); 1224 add_referenced_var (*arg_struct); 1225 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); 1226 add_referenced_var (nvar); 1227 *new_arg_struct = make_ssa_name (nvar, NULL); 1228 1229 ld_st_data->store = *arg_struct; 1230 ld_st_data->load = *new_arg_struct; 1231 ld_st_data->store_bb = bb0; 1232 ld_st_data->load_bb = bb1; 1233 1234 htab_traverse (name_copies, create_loads_and_stores_for_name, 1235 ld_st_data); 1236 1237 /* Load the calculation from memory (after the join of the threads). */ 1238 1239 if (reduction_list && htab_elements (reduction_list) > 0) 1240 { 1241 htab_traverse (reduction_list, create_stores_for_reduction, 1242 ld_st_data); 1243 clsn_data.load = make_ssa_name (nvar, NULL); 1244 clsn_data.load_bb = exit->dest; 1245 clsn_data.store = ld_st_data->store; 1246 create_final_loads_for_reduction (reduction_list, &clsn_data); 1247 } 1248 } 1249 1250 htab_delete (decl_copies); 1251 htab_delete (name_copies); 1252} 1253 1254/* Bitmap containing uids of functions created by parallelization. We cannot 1255 allocate it from the default obstack, as it must live across compilation 1256 of several functions; we make it gc allocated instead. */ 1257 1258static GTY(()) bitmap parallelized_functions; 1259 1260/* Returns true if FN was created by create_loop_fn. */ 1261 1262static bool 1263parallelized_function_p (tree fn) 1264{ 1265 if (!parallelized_functions || !DECL_ARTIFICIAL (fn)) 1266 return false; 1267 1268 return bitmap_bit_p (parallelized_functions, DECL_UID (fn)); 1269} 1270 1271/* Creates and returns an empty function that will receive the body of 1272 a parallelized loop. */ 1273 1274static tree 1275create_loop_fn (void) 1276{ 1277 char buf[100]; 1278 char *tname; 1279 tree decl, type, name, t; 1280 struct function *act_cfun = cfun; 1281 static unsigned loopfn_num; 1282 1283 snprintf (buf, 100, "%s.$loopfn", current_function_name ()); 1284 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++); 1285 clean_symbol_name (tname); 1286 name = get_identifier (tname); 1287 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); 1288 1289 decl = build_decl (BUILTINS_LOCATION, 1290 FUNCTION_DECL, name, type); 1291 if (!parallelized_functions) 1292 parallelized_functions = BITMAP_GGC_ALLOC (); 1293 bitmap_set_bit (parallelized_functions, DECL_UID (decl)); 1294 1295 TREE_STATIC (decl) = 1; 1296 TREE_USED (decl) = 1; 1297 DECL_ARTIFICIAL (decl) = 1; 1298 DECL_IGNORED_P (decl) = 0; 1299 TREE_PUBLIC (decl) = 0; 1300 DECL_UNINLINABLE (decl) = 1; 1301 DECL_EXTERNAL (decl) = 0; 1302 DECL_CONTEXT (decl) = NULL_TREE; 1303 DECL_INITIAL (decl) = make_node (BLOCK); 1304 1305 t = build_decl (BUILTINS_LOCATION, 1306 RESULT_DECL, NULL_TREE, void_type_node); 1307 DECL_ARTIFICIAL (t) = 1; 1308 DECL_IGNORED_P (t) = 1; 1309 DECL_RESULT (decl) = t; 1310 1311 t = build_decl (BUILTINS_LOCATION, 1312 PARM_DECL, get_identifier (".paral_data_param"), 1313 ptr_type_node); 1314 DECL_ARTIFICIAL (t) = 1; 1315 DECL_ARG_TYPE (t) = ptr_type_node; 1316 DECL_CONTEXT (t) = decl; 1317 TREE_USED (t) = 1; 1318 DECL_ARGUMENTS (decl) = t; 1319 1320 allocate_struct_function (decl, false); 1321 1322 /* The call to allocate_struct_function clobbers CFUN, so we need to restore 1323 it. */ 1324 set_cfun (act_cfun); 1325 1326 return decl; 1327} 1328 1329/* Moves the exit condition of LOOP to the beginning of its header, and 1330 duplicates the part of the last iteration that gets disabled to the 1331 exit of the loop. NIT is the number of iterations of the loop 1332 (used to initialize the variables in the duplicated part). 1333 1334 TODO: the common case is that latch of the loop is empty and immediately 1335 follows the loop exit. In this case, it would be better not to copy the 1336 body of the loop, but only move the entry of the loop directly before the 1337 exit check and increase the number of iterations of the loop by one. 1338 This may need some additional preconditioning in case NIT = ~0. 1339 REDUCTION_LIST describes the reductions in LOOP. */ 1340 1341static void 1342transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit) 1343{ 1344 basic_block *bbs, *nbbs, ex_bb, orig_header; 1345 unsigned n; 1346 bool ok; 1347 edge exit = single_dom_exit (loop), hpred; 1348 tree control, control_name, res, t; 1349 gimple phi, nphi, cond_stmt, stmt, cond_nit; 1350 gimple_stmt_iterator gsi; 1351 tree nit_1; 1352 1353 split_block_after_labels (loop->header); 1354 orig_header = single_succ (loop->header); 1355 hpred = single_succ_edge (loop->header); 1356 1357 cond_stmt = last_stmt (exit->src); 1358 control = gimple_cond_lhs (cond_stmt); 1359 gcc_assert (gimple_cond_rhs (cond_stmt) == nit); 1360 1361 /* Make sure that we have phi nodes on exit for all loop header phis 1362 (create_parallel_loop requires that). */ 1363 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 1364 { 1365 phi = gsi_stmt (gsi); 1366 res = PHI_RESULT (phi); 1367 t = make_ssa_name (SSA_NAME_VAR (res), phi); 1368 SET_PHI_RESULT (phi, t); 1369 nphi = create_phi_node (res, orig_header); 1370 SSA_NAME_DEF_STMT (res) = nphi; 1371 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION); 1372 1373 if (res == control) 1374 { 1375 gimple_cond_set_lhs (cond_stmt, t); 1376 update_stmt (cond_stmt); 1377 control = t; 1378 } 1379 } 1380 bbs = get_loop_body_in_dom_order (loop); 1381 1382 for (n = 0; bbs[n] != loop->latch; n++) 1383 continue; 1384 nbbs = XNEWVEC (basic_block, n); 1385 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, 1386 bbs + 1, n, nbbs); 1387 gcc_assert (ok); 1388 free (bbs); 1389 ex_bb = nbbs[0]; 1390 free (nbbs); 1391 1392 /* Other than reductions, the only gimple reg that should be copied 1393 out of the loop is the control variable. */ 1394 1395 control_name = NULL_TREE; 1396 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); ) 1397 { 1398 phi = gsi_stmt (gsi); 1399 res = PHI_RESULT (phi); 1400 if (!is_gimple_reg (res)) 1401 { 1402 gsi_next (&gsi); 1403 continue; 1404 } 1405 1406 /* Check if it is a part of reduction. If it is, 1407 keep the phi at the reduction's keep_res field. The 1408 PHI_RESULT of this phi is the resulting value of the reduction 1409 variable when exiting the loop. */ 1410 1411 exit = single_dom_exit (loop); 1412 1413 if (htab_elements (reduction_list) > 0) 1414 { 1415 struct reduction_info *red; 1416 1417 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1418 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val)); 1419 if (red) 1420 { 1421 red->keep_res = phi; 1422 gsi_next (&gsi); 1423 continue; 1424 } 1425 } 1426 gcc_assert (control_name == NULL_TREE 1427 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control)); 1428 control_name = res; 1429 remove_phi_node (&gsi, false); 1430 } 1431 gcc_assert (control_name != NULL_TREE); 1432 1433 /* Initialize the control variable to number of iterations 1434 according to the rhs of the exit condition. */ 1435 gsi = gsi_after_labels (ex_bb); 1436 cond_nit = last_stmt (exit->src); 1437 nit_1 = gimple_cond_rhs (cond_nit); 1438 nit_1 = force_gimple_operand_gsi (&gsi, 1439 fold_convert (TREE_TYPE (control_name), nit_1), 1440 false, NULL_TREE, false, GSI_SAME_STMT); 1441 stmt = gimple_build_assign (control_name, nit_1); 1442 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); 1443 SSA_NAME_DEF_STMT (control_name) = stmt; 1444} 1445 1446/* Create the parallel constructs for LOOP as described in gen_parallel_loop. 1447 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL. 1448 NEW_DATA is the variable that should be initialized from the argument 1449 of LOOP_FN. N_THREADS is the requested number of threads. Returns the 1450 basic block containing GIMPLE_OMP_PARALLEL tree. */ 1451 1452static basic_block 1453create_parallel_loop (struct loop *loop, tree loop_fn, tree data, 1454 tree new_data, unsigned n_threads) 1455{ 1456 gimple_stmt_iterator gsi; 1457 basic_block bb, paral_bb, for_bb, ex_bb; 1458 tree t, param; 1459 gimple stmt, for_stmt, phi, cond_stmt; 1460 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type; 1461 edge exit, nexit, guard, end, e; 1462 1463 /* Prepare the GIMPLE_OMP_PARALLEL statement. */ 1464 bb = loop_preheader_edge (loop)->src; 1465 paral_bb = single_pred (bb); 1466 gsi = gsi_last_bb (paral_bb); 1467 1468 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS); 1469 OMP_CLAUSE_NUM_THREADS_EXPR (t) 1470 = build_int_cst (integer_type_node, n_threads); 1471 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data); 1472 1473 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1474 1475 /* Initialize NEW_DATA. */ 1476 if (data) 1477 { 1478 gsi = gsi_after_labels (bb); 1479 1480 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL); 1481 stmt = gimple_build_assign (param, build_fold_addr_expr (data)); 1482 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1483 SSA_NAME_DEF_STMT (param) = stmt; 1484 1485 stmt = gimple_build_assign (new_data, 1486 fold_convert (TREE_TYPE (new_data), param)); 1487 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 1488 SSA_NAME_DEF_STMT (new_data) = stmt; 1489 } 1490 1491 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */ 1492 bb = split_loop_exit_edge (single_dom_exit (loop)); 1493 gsi = gsi_last_bb (bb); 1494 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT); 1495 1496 /* Extract data for GIMPLE_OMP_FOR. */ 1497 gcc_assert (loop->header == single_dom_exit (loop)->src); 1498 cond_stmt = last_stmt (loop->header); 1499 1500 cvar = gimple_cond_lhs (cond_stmt); 1501 cvar_base = SSA_NAME_VAR (cvar); 1502 phi = SSA_NAME_DEF_STMT (cvar); 1503 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1504 initvar = make_ssa_name (cvar_base, NULL); 1505 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)), 1506 initvar); 1507 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1508 1509 gsi = gsi_last_bb (loop->latch); 1510 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next)); 1511 gsi_remove (&gsi, true); 1512 1513 /* Prepare cfg. */ 1514 for_bb = split_edge (loop_preheader_edge (loop)); 1515 ex_bb = split_loop_exit_edge (single_dom_exit (loop)); 1516 extract_true_false_edges_from_block (loop->header, &nexit, &exit); 1517 gcc_assert (exit == single_dom_exit (loop)); 1518 1519 guard = make_edge (for_bb, ex_bb, 0); 1520 single_succ_edge (loop->latch)->flags = 0; 1521 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU); 1522 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1523 { 1524 source_location locus; 1525 tree def; 1526 phi = gsi_stmt (gsi); 1527 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)); 1528 1529 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop)); 1530 locus = gimple_phi_arg_location_from_edge (stmt, 1531 loop_preheader_edge (loop)); 1532 add_phi_arg (phi, def, guard, locus); 1533 1534 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop)); 1535 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop)); 1536 add_phi_arg (phi, def, end, locus); 1537 } 1538 e = redirect_edge_and_branch (exit, nexit->dest); 1539 PENDING_STMT (e) = NULL; 1540 1541 /* Emit GIMPLE_OMP_FOR. */ 1542 gimple_cond_set_lhs (cond_stmt, cvar_base); 1543 type = TREE_TYPE (cvar); 1544 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE); 1545 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC; 1546 1547 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL); 1548 gimple_omp_for_set_index (for_stmt, 0, initvar); 1549 gimple_omp_for_set_initial (for_stmt, 0, cvar_init); 1550 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt)); 1551 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt)); 1552 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type, 1553 cvar_base, 1554 build_int_cst (type, 1))); 1555 1556 gsi = gsi_last_bb (for_bb); 1557 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT); 1558 SSA_NAME_DEF_STMT (initvar) = for_stmt; 1559 1560 /* Emit GIMPLE_OMP_CONTINUE. */ 1561 gsi = gsi_last_bb (loop->latch); 1562 stmt = gimple_build_omp_continue (cvar_next, cvar); 1563 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); 1564 SSA_NAME_DEF_STMT (cvar_next) = stmt; 1565 1566 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */ 1567 gsi = gsi_last_bb (ex_bb); 1568 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT); 1569 1570 return paral_bb; 1571} 1572 1573/* Generates code to execute the iterations of LOOP in N_THREADS 1574 threads in parallel. 1575 1576 NITER describes number of iterations of LOOP. 1577 REDUCTION_LIST describes the reductions existent in the LOOP. */ 1578 1579static void 1580gen_parallel_loop (struct loop *loop, htab_t reduction_list, 1581 unsigned n_threads, struct tree_niter_desc *niter) 1582{ 1583 loop_iterator li; 1584 tree many_iterations_cond, type, nit; 1585 tree arg_struct, new_arg_struct; 1586 gimple_seq stmts; 1587 basic_block parallel_head; 1588 edge entry, exit; 1589 struct clsn_data clsn_data; 1590 unsigned prob; 1591 1592 /* From 1593 1594 --------------------------------------------------------------------- 1595 loop 1596 { 1597 IV = phi (INIT, IV + STEP) 1598 BODY1; 1599 if (COND) 1600 break; 1601 BODY2; 1602 } 1603 --------------------------------------------------------------------- 1604 1605 with # of iterations NITER (possibly with MAY_BE_ZERO assumption), 1606 we generate the following code: 1607 1608 --------------------------------------------------------------------- 1609 1610 if (MAY_BE_ZERO 1611 || NITER < MIN_PER_THREAD * N_THREADS) 1612 goto original; 1613 1614 BODY1; 1615 store all local loop-invariant variables used in body of the loop to DATA. 1616 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA); 1617 load the variables from DATA. 1618 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static)) 1619 BODY2; 1620 BODY1; 1621 GIMPLE_OMP_CONTINUE; 1622 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR 1623 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL 1624 goto end; 1625 1626 original: 1627 loop 1628 { 1629 IV = phi (INIT, IV + STEP) 1630 BODY1; 1631 if (COND) 1632 break; 1633 BODY2; 1634 } 1635 1636 end: 1637 1638 */ 1639 1640 /* Create two versions of the loop -- in the old one, we know that the 1641 number of iterations is large enough, and we will transform it into the 1642 loop that will be split to loop_fn, the new one will be used for the 1643 remaining iterations. */ 1644 1645 type = TREE_TYPE (niter->niter); 1646 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true, 1647 NULL_TREE); 1648 if (stmts) 1649 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1650 1651 many_iterations_cond = 1652 fold_build2 (GE_EXPR, boolean_type_node, 1653 nit, build_int_cst (type, MIN_PER_THREAD * n_threads)); 1654 many_iterations_cond 1655 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, 1656 invert_truthvalue (unshare_expr (niter->may_be_zero)), 1657 many_iterations_cond); 1658 many_iterations_cond 1659 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE); 1660 if (stmts) 1661 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1662 if (!is_gimple_condexpr (many_iterations_cond)) 1663 { 1664 many_iterations_cond 1665 = force_gimple_operand (many_iterations_cond, &stmts, 1666 true, NULL_TREE); 1667 if (stmts) 1668 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 1669 } 1670 1671 initialize_original_copy_tables (); 1672 1673 /* We assume that the loop usually iterates a lot. */ 1674 prob = 4 * REG_BR_PROB_BASE / 5; 1675 loop_version (loop, many_iterations_cond, NULL, 1676 prob, prob, REG_BR_PROB_BASE - prob, true); 1677 update_ssa (TODO_update_ssa); 1678 free_original_copy_tables (); 1679 1680 /* Base all the induction variables in LOOP on a single control one. */ 1681 canonicalize_loop_ivs (loop, &nit, true); 1682 1683 /* Ensure that the exit condition is the first statement in the loop. */ 1684 transform_to_exit_first_loop (loop, reduction_list, nit); 1685 1686 /* Generate initializations for reductions. */ 1687 if (htab_elements (reduction_list) > 0) 1688 htab_traverse (reduction_list, initialize_reductions, loop); 1689 1690 /* Eliminate the references to local variables from the loop. */ 1691 gcc_assert (single_exit (loop)); 1692 entry = loop_preheader_edge (loop); 1693 exit = single_dom_exit (loop); 1694 1695 eliminate_local_variables (entry, exit); 1696 /* In the old loop, move all variables non-local to the loop to a structure 1697 and back, and create separate decls for the variables used in loop. */ 1698 separate_decls_in_region (entry, exit, reduction_list, &arg_struct, 1699 &new_arg_struct, &clsn_data); 1700 1701 /* Create the parallel constructs. */ 1702 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct, 1703 new_arg_struct, n_threads); 1704 if (htab_elements (reduction_list) > 0) 1705 create_call_for_reduction (loop, reduction_list, &clsn_data); 1706 1707 scev_reset (); 1708 1709 /* Cancel the loop (it is simpler to do it here rather than to teach the 1710 expander to do it). */ 1711 cancel_loop_tree (loop); 1712 1713 /* Free loop bound estimations that could contain references to 1714 removed statements. */ 1715 FOR_EACH_LOOP (li, loop, 0) 1716 free_numbers_of_iterations_estimates_loop (loop); 1717 1718 /* Expand the parallel constructs. We do it directly here instead of running 1719 a separate expand_omp pass, since it is more efficient, and less likely to 1720 cause troubles with further analyses not being able to deal with the 1721 OMP trees. */ 1722 1723 omp_expand_local (parallel_head); 1724} 1725 1726/* Returns true when LOOP contains vector phi nodes. */ 1727 1728static bool 1729loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED) 1730{ 1731 unsigned i; 1732 basic_block *bbs = get_loop_body_in_dom_order (loop); 1733 gimple_stmt_iterator gsi; 1734 bool res = true; 1735 1736 for (i = 0; i < loop->num_nodes; i++) 1737 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) 1738 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE) 1739 goto end; 1740 1741 res = false; 1742 end: 1743 free (bbs); 1744 return res; 1745} 1746 1747/* Create a reduction_info struct, initialize it with REDUC_STMT 1748 and PHI, insert it to the REDUCTION_LIST. */ 1749 1750static void 1751build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi) 1752{ 1753 PTR *slot; 1754 struct reduction_info *new_reduction; 1755 1756 gcc_assert (reduc_stmt); 1757 1758 if (dump_file && (dump_flags & TDF_DETAILS)) 1759 { 1760 fprintf (dump_file, 1761 "Detected reduction. reduction stmt is: \n"); 1762 print_gimple_stmt (dump_file, reduc_stmt, 0, 0); 1763 fprintf (dump_file, "\n"); 1764 } 1765 1766 new_reduction = XCNEW (struct reduction_info); 1767 1768 new_reduction->reduc_stmt = reduc_stmt; 1769 new_reduction->reduc_phi = phi; 1770 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt); 1771 slot = htab_find_slot (reduction_list, new_reduction, INSERT); 1772 *slot = new_reduction; 1773} 1774 1775/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */ 1776 1777static void 1778gather_scalar_reductions (loop_p loop, htab_t reduction_list) 1779{ 1780 gimple_stmt_iterator gsi; 1781 loop_vec_info simple_loop_info; 1782 1783 vect_dump = NULL; 1784 simple_loop_info = vect_analyze_loop_form (loop); 1785 1786 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 1787 { 1788 gimple phi = gsi_stmt (gsi); 1789 affine_iv iv; 1790 tree res = PHI_RESULT (phi); 1791 bool double_reduc; 1792 1793 if (!is_gimple_reg (res)) 1794 continue; 1795 1796 if (!simple_iv (loop, loop, res, &iv, true) 1797 && simple_loop_info) 1798 { 1799 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc); 1800 if (reduc_stmt && !double_reduc) 1801 build_new_reduction (reduction_list, reduc_stmt, phi); 1802 } 1803 } 1804 destroy_loop_vec_info (simple_loop_info, true); 1805} 1806 1807/* Try to initialize NITER for code generation part. */ 1808 1809static bool 1810try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter) 1811{ 1812 edge exit = single_dom_exit (loop); 1813 1814 gcc_assert (exit); 1815 1816 /* We need to know # of iterations, and there should be no uses of values 1817 defined inside loop outside of it, unless the values are invariants of 1818 the loop. */ 1819 if (!number_of_iterations_exit (loop, exit, niter, false)) 1820 { 1821 if (dump_file && (dump_flags & TDF_DETAILS)) 1822 fprintf (dump_file, " FAILED: number of iterations not known\n"); 1823 return false; 1824 } 1825 1826 return true; 1827} 1828 1829/* Try to initialize REDUCTION_LIST for code generation part. 1830 REDUCTION_LIST describes the reductions. */ 1831 1832static bool 1833try_create_reduction_list (loop_p loop, htab_t reduction_list) 1834{ 1835 edge exit = single_dom_exit (loop); 1836 gimple_stmt_iterator gsi; 1837 1838 gcc_assert (exit); 1839 1840 gather_scalar_reductions (loop, reduction_list); 1841 1842 1843 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) 1844 { 1845 gimple phi = gsi_stmt (gsi); 1846 struct reduction_info *red; 1847 imm_use_iterator imm_iter; 1848 use_operand_p use_p; 1849 gimple reduc_phi; 1850 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1851 1852 if (is_gimple_reg (val)) 1853 { 1854 if (dump_file && (dump_flags & TDF_DETAILS)) 1855 { 1856 fprintf (dump_file, "phi is "); 1857 print_gimple_stmt (dump_file, phi, 0, 0); 1858 fprintf (dump_file, "arg of phi to exit: value "); 1859 print_generic_expr (dump_file, val, 0); 1860 fprintf (dump_file, " used outside loop\n"); 1861 fprintf (dump_file, 1862 " checking if it a part of reduction pattern: \n"); 1863 } 1864 if (htab_elements (reduction_list) == 0) 1865 { 1866 if (dump_file && (dump_flags & TDF_DETAILS)) 1867 fprintf (dump_file, 1868 " FAILED: it is not a part of reduction.\n"); 1869 return false; 1870 } 1871 reduc_phi = NULL; 1872 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val) 1873 { 1874 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))) 1875 { 1876 reduc_phi = USE_STMT (use_p); 1877 break; 1878 } 1879 } 1880 red = reduction_phi (reduction_list, reduc_phi); 1881 if (red == NULL) 1882 { 1883 if (dump_file && (dump_flags & TDF_DETAILS)) 1884 fprintf (dump_file, 1885 " FAILED: it is not a part of reduction.\n"); 1886 return false; 1887 } 1888 if (dump_file && (dump_flags & TDF_DETAILS)) 1889 { 1890 fprintf (dump_file, "reduction phi is "); 1891 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0); 1892 fprintf (dump_file, "reduction stmt is "); 1893 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0); 1894 } 1895 } 1896 } 1897 1898 /* The iterations of the loop may communicate only through bivs whose 1899 iteration space can be distributed efficiently. */ 1900 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) 1901 { 1902 gimple phi = gsi_stmt (gsi); 1903 tree def = PHI_RESULT (phi); 1904 affine_iv iv; 1905 1906 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true)) 1907 { 1908 struct reduction_info *red; 1909 1910 red = reduction_phi (reduction_list, phi); 1911 if (red == NULL) 1912 { 1913 if (dump_file && (dump_flags & TDF_DETAILS)) 1914 fprintf (dump_file, 1915 " FAILED: scalar dependency between iterations\n"); 1916 return false; 1917 } 1918 } 1919 } 1920 1921 1922 return true; 1923} 1924 1925/* Detect parallel loops and generate parallel code using libgomp 1926 primitives. Returns true if some loop was parallelized, false 1927 otherwise. */ 1928 1929bool 1930parallelize_loops (void) 1931{ 1932 unsigned n_threads = flag_tree_parallelize_loops; 1933 bool changed = false; 1934 struct loop *loop; 1935 struct tree_niter_desc niter_desc; 1936 loop_iterator li; 1937 htab_t reduction_list; 1938 HOST_WIDE_INT estimated; 1939 LOC loop_loc; 1940 1941 /* Do not parallelize loops in the functions created by parallelization. */ 1942 if (parallelized_function_p (cfun->decl)) 1943 return false; 1944 if (cfun->has_nonlocal_label) 1945 return false; 1946 1947 reduction_list = htab_create (10, reduction_info_hash, 1948 reduction_info_eq, free); 1949 init_stmt_vec_info_vec (); 1950 1951 FOR_EACH_LOOP (li, loop, 0) 1952 { 1953 htab_empty (reduction_list); 1954 if (dump_file && (dump_flags & TDF_DETAILS)) 1955 { 1956 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num); 1957 if (loop->inner) 1958 fprintf (dump_file, "loop %d is not innermost\n",loop->num); 1959 else 1960 fprintf (dump_file, "loop %d is innermost\n",loop->num); 1961 } 1962 1963 /* If we use autopar in graphite pass, we use its marked dependency 1964 checking results. */ 1965 if (flag_loop_parallelize_all && !loop->can_be_parallel) 1966 { 1967 if (dump_file && (dump_flags & TDF_DETAILS)) 1968 fprintf (dump_file, "loop is not parallel according to graphite\n"); 1969 continue; 1970 } 1971 1972 if (!single_dom_exit (loop)) 1973 { 1974 1975 if (dump_file && (dump_flags & TDF_DETAILS)) 1976 fprintf (dump_file, "loop is !single_dom_exit\n"); 1977 1978 continue; 1979 } 1980 1981 if (/* And of course, the loop must be parallelizable. */ 1982 !can_duplicate_loop_p (loop) 1983 || loop_has_blocks_with_irreducible_flag (loop) 1984 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP) 1985 /* FIXME: the check for vector phi nodes could be removed. */ 1986 || loop_has_vector_phi_nodes (loop)) 1987 continue; 1988 estimated = estimated_loop_iterations_int (loop, false); 1989 /* FIXME: Bypass this check as graphite doesn't update the 1990 count and frequency correctly now. */ 1991 if (!flag_loop_parallelize_all 1992 && ((estimated !=-1 1993 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD) 1994 /* Do not bother with loops in cold areas. */ 1995 || optimize_loop_nest_for_size_p (loop))) 1996 continue; 1997 1998 if (!try_get_loop_niter (loop, &niter_desc)) 1999 continue; 2000 2001 if (!try_create_reduction_list (loop, reduction_list)) 2002 continue; 2003 2004 if (!flag_loop_parallelize_all && !loop_parallel_p (loop)) 2005 continue; 2006 2007 changed = true; 2008 if (dump_file && (dump_flags & TDF_DETAILS)) 2009 { 2010 if (loop->inner) 2011 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index); 2012 else 2013 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index); 2014 loop_loc = find_loop_location (loop); 2015 if (loop_loc != UNKNOWN_LOC) 2016 fprintf (dump_file, "\nloop at %s:%d: ", 2017 LOC_FILE (loop_loc), LOC_LINE (loop_loc)); 2018 } 2019 gen_parallel_loop (loop, reduction_list, 2020 n_threads, &niter_desc); 2021 verify_flow_info (); 2022 verify_dominators (CDI_DOMINATORS); 2023 verify_loop_structure (); 2024 verify_loop_closed_ssa (); 2025 } 2026 2027 free_stmt_vec_info_vec (); 2028 htab_delete (reduction_list); 2029 2030 /* Parallelization will cause new function calls to be inserted through 2031 which local variables will escape. Reset the points-to solutions 2032 for ESCAPED and CALLUSED. */ 2033 if (changed) 2034 { 2035 pt_solution_reset (&cfun->gimple_df->escaped); 2036 pt_solution_reset (&cfun->gimple_df->callused); 2037 } 2038 2039 return changed; 2040} 2041 2042#include "gt-tree-parloops.h" 2043