1/* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5Copyright (C) 2005-2022 Free Software Foundation, Inc. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "memmodel.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "tree.h" 31#include "gimple.h" 32#include "cfghooks.h" 33#include "tree-pass.h" 34#include "ssa.h" 35#include "optabs.h" 36#include "cgraph.h" 37#include "pretty-print.h" 38#include "diagnostic-core.h" 39#include "fold-const.h" 40#include "stor-layout.h" 41#include "cfganal.h" 42#include "internal-fn.h" 43#include "gimplify.h" 44#include "gimple-iterator.h" 45#include "gimplify-me.h" 46#include "gimple-walk.h" 47#include "tree-cfg.h" 48#include "tree-into-ssa.h" 49#include "tree-ssa.h" 50#include "splay-tree.h" 51#include "cfgloop.h" 52#include "omp-general.h" 53#include "omp-offload.h" 54#include "tree-cfgcleanup.h" 55#include "alloc-pool.h" 56#include "symbol-summary.h" 57#include "gomp-constants.h" 58#include "gimple-pretty-print.h" 59#include "stringpool.h" 60#include "attribs.h" 61#include "tree-eh.h" 62#include "opts.h" 63 64/* OMP region information. Every parallel and workshare 65 directive is enclosed between two markers, the OMP_* directive 66 and a corresponding GIMPLE_OMP_RETURN statement. */ 67 68struct omp_region 69{ 70 /* The enclosing region. */ 71 struct omp_region *outer; 72 73 /* First child region. */ 74 struct omp_region *inner; 75 76 /* Next peer region. */ 77 struct omp_region *next; 78 79 /* Block containing the omp directive as its last stmt. */ 80 basic_block entry; 81 82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 83 basic_block exit; 84 85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 86 basic_block cont; 87 88 /* If this is a combined parallel+workshare region, this is a list 89 of additional arguments needed by the combined parallel+workshare 90 library call. */ 91 vec<tree, va_gc> *ws_args; 92 93 /* The code for the omp directive of this region. */ 94 enum gimple_code type; 95 96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 97 enum omp_clause_schedule_kind sched_kind; 98 99 /* Schedule modifiers. */ 100 unsigned char sched_modifiers; 101 102 /* True if this is a combined parallel+workshare region. */ 103 bool is_combined_parallel; 104 105 /* Copy of fd.lastprivate_conditional != 0. */ 106 bool has_lastprivate_conditional; 107 108 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 109 a depend clause. */ 110 gomp_ordered *ord_stmt; 111}; 112 113static struct omp_region *root_omp_region; 114static bool omp_any_child_fn_dumped; 115 116static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 117 bool = false); 118static gphi *find_phi_with_arg_on_edge (tree, edge); 119static void expand_omp (struct omp_region *region); 120 121/* Return true if REGION is a combined parallel+workshare region. */ 122 123static inline bool 124is_combined_parallel (struct omp_region *region) 125{ 126 return region->is_combined_parallel; 127} 128 129/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 130 is the immediate dominator of PAR_ENTRY_BB, return true if there 131 are no data dependencies that would prevent expanding the parallel 132 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 133 134 When expanding a combined parallel+workshare region, the call to 135 the child function may need additional arguments in the case of 136 GIMPLE_OMP_FOR regions. In some cases, these arguments are 137 computed out of variables passed in from the parent to the child 138 via 'struct .omp_data_s'. For instance: 139 140 #pragma omp parallel for schedule (guided, i * 4) 141 for (j ...) 142 143 Is lowered into: 144 145 # BLOCK 2 (PAR_ENTRY_BB) 146 .omp_data_o.i = i; 147 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 148 149 # BLOCK 3 (WS_ENTRY_BB) 150 .omp_data_i = &.omp_data_o; 151 D.1667 = .omp_data_i->i; 152 D.1598 = D.1667 * 4; 153 #pragma omp for schedule (guided, D.1598) 154 155 When we outline the parallel region, the call to the child function 156 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 157 that value is computed *after* the call site. So, in principle we 158 cannot do the transformation. 159 160 To see whether the code in WS_ENTRY_BB blocks the combined 161 parallel+workshare call, we collect all the variables used in the 162 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 163 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 164 call. 165 166 FIXME. If we had the SSA form built at this point, we could merely 167 hoist the code in block 3 into block 2 and be done with it. But at 168 this point we don't have dataflow information and though we could 169 hack something up here, it is really not worth the aggravation. */ 170 171static bool 172workshare_safe_to_combine_p (basic_block ws_entry_bb) 173{ 174 struct omp_for_data fd; 175 gimple *ws_stmt = last_stmt (ws_entry_bb); 176 177 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 178 return true; 179 180 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 181 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) 182 return false; 183 184 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 185 186 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 187 return false; 188 if (fd.iter_type != long_integer_type_node) 189 return false; 190 191 /* FIXME. We give up too easily here. If any of these arguments 192 are not constants, they will likely involve variables that have 193 been mapped into fields of .omp_data_s for sharing with the child 194 function. With appropriate data flow, it would be possible to 195 see through this. */ 196 if (!is_gimple_min_invariant (fd.loop.n1) 197 || !is_gimple_min_invariant (fd.loop.n2) 198 || !is_gimple_min_invariant (fd.loop.step) 199 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 200 return false; 201 202 return true; 203} 204 205/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 206 presence (SIMD_SCHEDULE). */ 207 208static tree 209omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 210{ 211 if (!simd_schedule || integer_zerop (chunk_size)) 212 return chunk_size; 213 214 poly_uint64 vf = omp_max_vf (); 215 if (known_eq (vf, 1U)) 216 return chunk_size; 217 218 tree type = TREE_TYPE (chunk_size); 219 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 220 build_int_cst (type, vf - 1)); 221 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 222 build_int_cst (type, -vf)); 223} 224 225/* Collect additional arguments needed to emit a combined 226 parallel+workshare call. WS_STMT is the workshare directive being 227 expanded. */ 228 229static vec<tree, va_gc> * 230get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 231{ 232 tree t; 233 location_t loc = gimple_location (ws_stmt); 234 vec<tree, va_gc> *ws_args; 235 236 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 237 { 238 struct omp_for_data fd; 239 tree n1, n2; 240 241 omp_extract_for_data (for_stmt, &fd, NULL); 242 n1 = fd.loop.n1; 243 n2 = fd.loop.n2; 244 245 if (gimple_omp_for_combined_into_p (for_stmt)) 246 { 247 tree innerc 248 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 249 OMP_CLAUSE__LOOPTEMP_); 250 gcc_assert (innerc); 251 n1 = OMP_CLAUSE_DECL (innerc); 252 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 253 OMP_CLAUSE__LOOPTEMP_); 254 gcc_assert (innerc); 255 n2 = OMP_CLAUSE_DECL (innerc); 256 } 257 258 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 259 260 t = fold_convert_loc (loc, long_integer_type_node, n1); 261 ws_args->quick_push (t); 262 263 t = fold_convert_loc (loc, long_integer_type_node, n2); 264 ws_args->quick_push (t); 265 266 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 267 ws_args->quick_push (t); 268 269 if (fd.chunk_size) 270 { 271 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 272 t = omp_adjust_chunk_size (t, fd.simd_schedule); 273 ws_args->quick_push (t); 274 } 275 276 return ws_args; 277 } 278 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 279 { 280 /* Number of sections is equal to the number of edges from the 281 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 282 the exit of the sections region. */ 283 basic_block bb = single_succ (gimple_bb (ws_stmt)); 284 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 285 vec_alloc (ws_args, 1); 286 ws_args->quick_push (t); 287 return ws_args; 288 } 289 290 gcc_unreachable (); 291} 292 293/* Discover whether REGION is a combined parallel+workshare region. */ 294 295static void 296determine_parallel_type (struct omp_region *region) 297{ 298 basic_block par_entry_bb, par_exit_bb; 299 basic_block ws_entry_bb, ws_exit_bb; 300 301 if (region == NULL || region->inner == NULL 302 || region->exit == NULL || region->inner->exit == NULL 303 || region->inner->cont == NULL) 304 return; 305 306 /* We only support parallel+for and parallel+sections. */ 307 if (region->type != GIMPLE_OMP_PARALLEL 308 || (region->inner->type != GIMPLE_OMP_FOR 309 && region->inner->type != GIMPLE_OMP_SECTIONS)) 310 return; 311 312 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 313 WS_EXIT_BB -> PAR_EXIT_BB. */ 314 par_entry_bb = region->entry; 315 par_exit_bb = region->exit; 316 ws_entry_bb = region->inner->entry; 317 ws_exit_bb = region->inner->exit; 318 319 /* Give up for task reductions on the parallel, while it is implementable, 320 adding another big set of APIs or slowing down the normal paths is 321 not acceptable. */ 322 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); 323 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) 324 return; 325 326 if (single_succ (par_entry_bb) == ws_entry_bb 327 && single_succ (ws_exit_bb) == par_exit_bb 328 && workshare_safe_to_combine_p (ws_entry_bb) 329 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 330 || (last_and_only_stmt (ws_entry_bb) 331 && last_and_only_stmt (par_exit_bb)))) 332 { 333 gimple *par_stmt = last_stmt (par_entry_bb); 334 gimple *ws_stmt = last_stmt (ws_entry_bb); 335 336 if (region->inner->type == GIMPLE_OMP_FOR) 337 { 338 /* If this is a combined parallel loop, we need to determine 339 whether or not to use the combined library calls. There 340 are two cases where we do not apply the transformation: 341 static loops and any kind of ordered loop. In the first 342 case, we already open code the loop so there is no need 343 to do anything else. In the latter case, the combined 344 parallel loop call would still need extra synchronization 345 to implement ordered semantics, so there would not be any 346 gain in using the combined call. */ 347 tree clauses = gimple_omp_for_clauses (ws_stmt); 348 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 349 if (c == NULL 350 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 351 == OMP_CLAUSE_SCHEDULE_STATIC) 352 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) 353 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_) 354 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_)) 355 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) 356 return; 357 } 358 else if (region->inner->type == GIMPLE_OMP_SECTIONS 359 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 360 OMP_CLAUSE__REDUCTEMP_) 361 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 362 OMP_CLAUSE__CONDTEMP_))) 363 return; 364 365 region->is_combined_parallel = true; 366 region->inner->is_combined_parallel = true; 367 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 368 } 369} 370 371/* Debugging dumps for parallel regions. */ 372void dump_omp_region (FILE *, struct omp_region *, int); 373void debug_omp_region (struct omp_region *); 374void debug_all_omp_regions (void); 375 376/* Dump the parallel region tree rooted at REGION. */ 377 378void 379dump_omp_region (FILE *file, struct omp_region *region, int indent) 380{ 381 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 382 gimple_code_name[region->type]); 383 384 if (region->inner) 385 dump_omp_region (file, region->inner, indent + 4); 386 387 if (region->cont) 388 { 389 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 390 region->cont->index); 391 } 392 393 if (region->exit) 394 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 395 region->exit->index); 396 else 397 fprintf (file, "%*s[no exit marker]\n", indent, ""); 398 399 if (region->next) 400 dump_omp_region (file, region->next, indent); 401} 402 403DEBUG_FUNCTION void 404debug_omp_region (struct omp_region *region) 405{ 406 dump_omp_region (stderr, region, 0); 407} 408 409DEBUG_FUNCTION void 410debug_all_omp_regions (void) 411{ 412 dump_omp_region (stderr, root_omp_region, 0); 413} 414 415/* Create a new parallel region starting at STMT inside region PARENT. */ 416 417static struct omp_region * 418new_omp_region (basic_block bb, enum gimple_code type, 419 struct omp_region *parent) 420{ 421 struct omp_region *region = XCNEW (struct omp_region); 422 423 region->outer = parent; 424 region->entry = bb; 425 region->type = type; 426 427 if (parent) 428 { 429 /* This is a nested region. Add it to the list of inner 430 regions in PARENT. */ 431 region->next = parent->inner; 432 parent->inner = region; 433 } 434 else 435 { 436 /* This is a toplevel region. Add it to the list of toplevel 437 regions in ROOT_OMP_REGION. */ 438 region->next = root_omp_region; 439 root_omp_region = region; 440 } 441 442 return region; 443} 444 445/* Release the memory associated with the region tree rooted at REGION. */ 446 447static void 448free_omp_region_1 (struct omp_region *region) 449{ 450 struct omp_region *i, *n; 451 452 for (i = region->inner; i ; i = n) 453 { 454 n = i->next; 455 free_omp_region_1 (i); 456 } 457 458 free (region); 459} 460 461/* Release the memory for the entire omp region tree. */ 462 463void 464omp_free_regions (void) 465{ 466 struct omp_region *r, *n; 467 for (r = root_omp_region; r ; r = n) 468 { 469 n = r->next; 470 free_omp_region_1 (r); 471 } 472 root_omp_region = NULL; 473} 474 475/* A convenience function to build an empty GIMPLE_COND with just the 476 condition. */ 477 478static gcond * 479gimple_build_cond_empty (tree cond) 480{ 481 enum tree_code pred_code; 482 tree lhs, rhs; 483 484 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 485 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 486} 487 488/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. 489 Add CHILD_FNDECL to decl chain of the supercontext of the block 490 ENTRY_BLOCK - this is the block which originally contained the 491 code from which CHILD_FNDECL was created. 492 493 Together, these actions ensure that the debug info for the outlined 494 function will be emitted with the correct lexical scope. */ 495 496static void 497adjust_context_and_scope (struct omp_region *region, tree entry_block, 498 tree child_fndecl) 499{ 500 tree parent_fndecl = NULL_TREE; 501 gimple *entry_stmt; 502 /* OMP expansion expands inner regions before outer ones, so if 503 we e.g. have explicit task region nested in parallel region, when 504 expanding the task region current_function_decl will be the original 505 source function, but we actually want to use as context the child 506 function of the parallel. */ 507 for (region = region->outer; 508 region && parent_fndecl == NULL_TREE; region = region->outer) 509 switch (region->type) 510 { 511 case GIMPLE_OMP_PARALLEL: 512 case GIMPLE_OMP_TASK: 513 case GIMPLE_OMP_TEAMS: 514 entry_stmt = last_stmt (region->entry); 515 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); 516 break; 517 case GIMPLE_OMP_TARGET: 518 entry_stmt = last_stmt (region->entry); 519 parent_fndecl 520 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); 521 break; 522 default: 523 break; 524 } 525 526 if (parent_fndecl == NULL_TREE) 527 parent_fndecl = current_function_decl; 528 DECL_CONTEXT (child_fndecl) = parent_fndecl; 529 530 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) 531 { 532 tree b = BLOCK_SUPERCONTEXT (entry_block); 533 if (TREE_CODE (b) == BLOCK) 534 { 535 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); 536 BLOCK_VARS (b) = child_fndecl; 537 } 538 } 539} 540 541/* Build the function calls to GOMP_parallel etc to actually 542 generate the parallel operation. REGION is the parallel region 543 being expanded. BB is the block where to insert the code. WS_ARGS 544 will be set if this is a call to a combined parallel+workshare 545 construct, it contains the list of additional arguments needed by 546 the workshare construct. */ 547 548static void 549expand_parallel_call (struct omp_region *region, basic_block bb, 550 gomp_parallel *entry_stmt, 551 vec<tree, va_gc> *ws_args) 552{ 553 tree t, t1, t2, val, cond, c, clauses, flags; 554 gimple_stmt_iterator gsi; 555 gimple *stmt; 556 enum built_in_function start_ix; 557 int start_ix2; 558 location_t clause_loc; 559 vec<tree, va_gc> *args; 560 561 clauses = gimple_omp_parallel_clauses (entry_stmt); 562 563 /* Determine what flavor of GOMP_parallel we will be 564 emitting. */ 565 start_ix = BUILT_IN_GOMP_PARALLEL; 566 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 567 if (rtmp) 568 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; 569 else if (is_combined_parallel (region)) 570 { 571 switch (region->inner->type) 572 { 573 case GIMPLE_OMP_FOR: 574 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 575 switch (region->inner->sched_kind) 576 { 577 case OMP_CLAUSE_SCHEDULE_RUNTIME: 578 /* For lastprivate(conditional:), our implementation 579 requires monotonic behavior. */ 580 if (region->inner->has_lastprivate_conditional != 0) 581 start_ix2 = 3; 582 else if ((region->inner->sched_modifiers 583 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) 584 start_ix2 = 6; 585 else if ((region->inner->sched_modifiers 586 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) 587 start_ix2 = 7; 588 else 589 start_ix2 = 3; 590 break; 591 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 592 case OMP_CLAUSE_SCHEDULE_GUIDED: 593 if ((region->inner->sched_modifiers 594 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 595 && !region->inner->has_lastprivate_conditional) 596 { 597 start_ix2 = 3 + region->inner->sched_kind; 598 break; 599 } 600 /* FALLTHRU */ 601 default: 602 start_ix2 = region->inner->sched_kind; 603 break; 604 } 605 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 606 start_ix = (enum built_in_function) start_ix2; 607 break; 608 case GIMPLE_OMP_SECTIONS: 609 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 610 break; 611 default: 612 gcc_unreachable (); 613 } 614 } 615 616 /* By default, the value of NUM_THREADS is zero (selected at run time) 617 and there is no conditional. */ 618 cond = NULL_TREE; 619 val = build_int_cst (unsigned_type_node, 0); 620 flags = build_int_cst (unsigned_type_node, 0); 621 622 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 623 if (c) 624 cond = OMP_CLAUSE_IF_EXPR (c); 625 626 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 627 if (c) 628 { 629 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 630 clause_loc = OMP_CLAUSE_LOCATION (c); 631 } 632 else 633 clause_loc = gimple_location (entry_stmt); 634 635 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 636 if (c) 637 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 638 639 /* Ensure 'val' is of the correct type. */ 640 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 641 642 /* If we found the clause 'if (cond)', build either 643 (cond != 0) or (cond ? val : 1u). */ 644 if (cond) 645 { 646 cond = gimple_boolify (cond); 647 648 if (integer_zerop (val)) 649 val = fold_build2_loc (clause_loc, 650 EQ_EXPR, unsigned_type_node, cond, 651 build_int_cst (TREE_TYPE (cond), 0)); 652 else 653 { 654 basic_block cond_bb, then_bb, else_bb; 655 edge e, e_then, e_else; 656 tree tmp_then, tmp_else, tmp_join, tmp_var; 657 658 tmp_var = create_tmp_var (TREE_TYPE (val)); 659 if (gimple_in_ssa_p (cfun)) 660 { 661 tmp_then = make_ssa_name (tmp_var); 662 tmp_else = make_ssa_name (tmp_var); 663 tmp_join = make_ssa_name (tmp_var); 664 } 665 else 666 { 667 tmp_then = tmp_var; 668 tmp_else = tmp_var; 669 tmp_join = tmp_var; 670 } 671 672 e = split_block_after_labels (bb); 673 cond_bb = e->src; 674 bb = e->dest; 675 remove_edge (e); 676 677 then_bb = create_empty_bb (cond_bb); 678 else_bb = create_empty_bb (then_bb); 679 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 680 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 681 682 stmt = gimple_build_cond_empty (cond); 683 gsi = gsi_start_bb (cond_bb); 684 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 685 686 gsi = gsi_start_bb (then_bb); 687 expand_omp_build_assign (&gsi, tmp_then, val, true); 688 689 gsi = gsi_start_bb (else_bb); 690 expand_omp_build_assign (&gsi, tmp_else, 691 build_int_cst (unsigned_type_node, 1), 692 true); 693 694 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 695 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 696 add_bb_to_loop (then_bb, cond_bb->loop_father); 697 add_bb_to_loop (else_bb, cond_bb->loop_father); 698 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 699 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 700 701 if (gimple_in_ssa_p (cfun)) 702 { 703 gphi *phi = create_phi_node (tmp_join, bb); 704 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 705 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 706 } 707 708 val = tmp_join; 709 } 710 711 gsi = gsi_start_bb (bb); 712 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 713 false, GSI_CONTINUE_LINKING); 714 } 715 716 gsi = gsi_last_nondebug_bb (bb); 717 t = gimple_omp_parallel_data_arg (entry_stmt); 718 if (t == NULL) 719 t1 = null_pointer_node; 720 else 721 t1 = build_fold_addr_expr (t); 722 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 723 t2 = build_fold_addr_expr (child_fndecl); 724 725 vec_alloc (args, 4 + vec_safe_length (ws_args)); 726 args->quick_push (t2); 727 args->quick_push (t1); 728 args->quick_push (val); 729 if (ws_args) 730 args->splice (*ws_args); 731 args->quick_push (flags); 732 733 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 734 builtin_decl_explicit (start_ix), args); 735 736 if (rtmp) 737 { 738 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); 739 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), 740 fold_convert (type, 741 fold_convert (pointer_sized_int_node, t))); 742 } 743 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 744 false, GSI_CONTINUE_LINKING); 745} 746 747/* Build the function call to GOMP_task to actually 748 generate the task operation. BB is the block where to insert the code. */ 749 750static void 751expand_task_call (struct omp_region *region, basic_block bb, 752 gomp_task *entry_stmt) 753{ 754 tree t1, t2, t3; 755 gimple_stmt_iterator gsi; 756 location_t loc = gimple_location (entry_stmt); 757 758 tree clauses = gimple_omp_task_clauses (entry_stmt); 759 760 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 761 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 762 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 763 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 764 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 765 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 766 tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH); 767 768 unsigned int iflags 769 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 770 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 771 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 772 773 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 774 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 775 tree num_tasks = NULL_TREE; 776 bool ull = false; 777 if (taskloop_p) 778 { 779 gimple *g = last_stmt (region->outer->entry); 780 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 781 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 782 struct omp_for_data fd; 783 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 784 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 785 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 786 OMP_CLAUSE__LOOPTEMP_); 787 startvar = OMP_CLAUSE_DECL (startvar); 788 endvar = OMP_CLAUSE_DECL (endvar); 789 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 790 if (fd.loop.cond_code == LT_EXPR) 791 iflags |= GOMP_TASK_FLAG_UP; 792 tree tclauses = gimple_omp_for_clauses (g); 793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 794 if (num_tasks) 795 { 796 if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks)) 797 iflags |= GOMP_TASK_FLAG_STRICT; 798 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 799 } 800 else 801 { 802 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 803 if (num_tasks) 804 { 805 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 806 if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks)) 807 iflags |= GOMP_TASK_FLAG_STRICT; 808 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 809 } 810 else 811 num_tasks = integer_zero_node; 812 } 813 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 814 if (ifc == NULL_TREE) 815 iflags |= GOMP_TASK_FLAG_IF; 816 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 817 iflags |= GOMP_TASK_FLAG_NOGROUP; 818 ull = fd.iter_type == long_long_unsigned_type_node; 819 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) 820 iflags |= GOMP_TASK_FLAG_REDUCTION; 821 } 822 else 823 { 824 if (priority) 825 iflags |= GOMP_TASK_FLAG_PRIORITY; 826 if (detach) 827 iflags |= GOMP_TASK_FLAG_DETACH; 828 } 829 830 tree flags = build_int_cst (unsigned_type_node, iflags); 831 832 tree cond = boolean_true_node; 833 if (ifc) 834 { 835 if (taskloop_p) 836 { 837 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 838 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 839 build_int_cst (unsigned_type_node, 840 GOMP_TASK_FLAG_IF), 841 build_int_cst (unsigned_type_node, 0)); 842 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 843 flags, t); 844 } 845 else 846 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 847 } 848 849 if (finalc) 850 { 851 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 852 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 853 build_int_cst (unsigned_type_node, 854 GOMP_TASK_FLAG_FINAL), 855 build_int_cst (unsigned_type_node, 0)); 856 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 857 } 858 if (depend) 859 depend = OMP_CLAUSE_DECL (depend); 860 else 861 depend = build_int_cst (ptr_type_node, 0); 862 if (priority) 863 priority = fold_convert (integer_type_node, 864 OMP_CLAUSE_PRIORITY_EXPR (priority)); 865 else 866 priority = integer_zero_node; 867 868 gsi = gsi_last_nondebug_bb (bb); 869 870 detach = (detach 871 ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach)) 872 : null_pointer_node); 873 874 tree t = gimple_omp_task_data_arg (entry_stmt); 875 if (t == NULL) 876 t2 = null_pointer_node; 877 else 878 t2 = build_fold_addr_expr_loc (loc, t); 879 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 880 t = gimple_omp_task_copy_fn (entry_stmt); 881 if (t == NULL) 882 t3 = null_pointer_node; 883 else 884 t3 = build_fold_addr_expr_loc (loc, t); 885 886 if (taskloop_p) 887 t = build_call_expr (ull 888 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 889 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 890 11, t1, t2, t3, 891 gimple_omp_task_arg_size (entry_stmt), 892 gimple_omp_task_arg_align (entry_stmt), flags, 893 num_tasks, priority, startvar, endvar, step); 894 else 895 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 896 10, t1, t2, t3, 897 gimple_omp_task_arg_size (entry_stmt), 898 gimple_omp_task_arg_align (entry_stmt), cond, flags, 899 depend, priority, detach); 900 901 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 902 false, GSI_CONTINUE_LINKING); 903} 904 905/* Build the function call to GOMP_taskwait_depend to actually 906 generate the taskwait operation. BB is the block where to insert the 907 code. */ 908 909static void 910expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) 911{ 912 tree clauses = gimple_omp_task_clauses (entry_stmt); 913 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 914 if (depend == NULL_TREE) 915 return; 916 917 depend = OMP_CLAUSE_DECL (depend); 918 919 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 920 tree t 921 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND), 922 1, depend); 923 924 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 925 false, GSI_CONTINUE_LINKING); 926} 927 928/* Build the function call to GOMP_teams_reg to actually 929 generate the host teams operation. REGION is the teams region 930 being expanded. BB is the block where to insert the code. */ 931 932static void 933expand_teams_call (basic_block bb, gomp_teams *entry_stmt) 934{ 935 tree clauses = gimple_omp_teams_clauses (entry_stmt); 936 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 937 if (num_teams == NULL_TREE) 938 num_teams = build_int_cst (unsigned_type_node, 0); 939 else 940 { 941 num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams); 942 num_teams = fold_convert (unsigned_type_node, num_teams); 943 } 944 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 945 if (thread_limit == NULL_TREE) 946 thread_limit = build_int_cst (unsigned_type_node, 0); 947 else 948 { 949 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); 950 thread_limit = fold_convert (unsigned_type_node, thread_limit); 951 } 952 953 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 954 tree t = gimple_omp_teams_data_arg (entry_stmt), t1; 955 if (t == NULL) 956 t1 = null_pointer_node; 957 else 958 t1 = build_fold_addr_expr (t); 959 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); 960 tree t2 = build_fold_addr_expr (child_fndecl); 961 962 vec<tree, va_gc> *args; 963 vec_alloc (args, 5); 964 args->quick_push (t2); 965 args->quick_push (t1); 966 args->quick_push (num_teams); 967 args->quick_push (thread_limit); 968 /* For future extensibility. */ 969 args->quick_push (build_zero_cst (unsigned_type_node)); 970 971 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 972 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), 973 args); 974 975 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 976 false, GSI_CONTINUE_LINKING); 977} 978 979/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 980 981static tree 982vec2chain (vec<tree, va_gc> *v) 983{ 984 tree chain = NULL_TREE, t; 985 unsigned ix; 986 987 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 988 { 989 DECL_CHAIN (t) = chain; 990 chain = t; 991 } 992 993 return chain; 994} 995 996/* Remove barriers in REGION->EXIT's block. Note that this is only 997 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 998 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 999 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 1000 removed. */ 1001 1002static void 1003remove_exit_barrier (struct omp_region *region) 1004{ 1005 gimple_stmt_iterator gsi; 1006 basic_block exit_bb; 1007 edge_iterator ei; 1008 edge e; 1009 gimple *stmt; 1010 int any_addressable_vars = -1; 1011 1012 exit_bb = region->exit; 1013 1014 /* If the parallel region doesn't return, we don't have REGION->EXIT 1015 block at all. */ 1016 if (! exit_bb) 1017 return; 1018 1019 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 1020 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 1021 statements that can appear in between are extremely limited -- no 1022 memory operations at all. Here, we allow nothing at all, so the 1023 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 1024 gsi = gsi_last_nondebug_bb (exit_bb); 1025 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1026 gsi_prev_nondebug (&gsi); 1027 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 1028 return; 1029 1030 FOR_EACH_EDGE (e, ei, exit_bb->preds) 1031 { 1032 gsi = gsi_last_nondebug_bb (e->src); 1033 if (gsi_end_p (gsi)) 1034 continue; 1035 stmt = gsi_stmt (gsi); 1036 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 1037 && !gimple_omp_return_nowait_p (stmt)) 1038 { 1039 /* OpenMP 3.0 tasks unfortunately prevent this optimization 1040 in many cases. If there could be tasks queued, the barrier 1041 might be needed to let the tasks run before some local 1042 variable of the parallel that the task uses as shared 1043 runs out of scope. The task can be spawned either 1044 from within current function (this would be easy to check) 1045 or from some function it calls and gets passed an address 1046 of such a variable. */ 1047 if (any_addressable_vars < 0) 1048 { 1049 gomp_parallel *parallel_stmt 1050 = as_a <gomp_parallel *> (last_stmt (region->entry)); 1051 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 1052 tree local_decls, block, decl; 1053 unsigned ix; 1054 1055 any_addressable_vars = 0; 1056 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 1057 if (TREE_ADDRESSABLE (decl)) 1058 { 1059 any_addressable_vars = 1; 1060 break; 1061 } 1062 for (block = gimple_block (stmt); 1063 !any_addressable_vars 1064 && block 1065 && TREE_CODE (block) == BLOCK; 1066 block = BLOCK_SUPERCONTEXT (block)) 1067 { 1068 for (local_decls = BLOCK_VARS (block); 1069 local_decls; 1070 local_decls = DECL_CHAIN (local_decls)) 1071 if (TREE_ADDRESSABLE (local_decls)) 1072 { 1073 any_addressable_vars = 1; 1074 break; 1075 } 1076 if (block == gimple_block (parallel_stmt)) 1077 break; 1078 } 1079 } 1080 if (!any_addressable_vars) 1081 gimple_omp_return_set_nowait (stmt); 1082 } 1083 } 1084} 1085 1086static void 1087remove_exit_barriers (struct omp_region *region) 1088{ 1089 if (region->type == GIMPLE_OMP_PARALLEL) 1090 remove_exit_barrier (region); 1091 1092 if (region->inner) 1093 { 1094 region = region->inner; 1095 remove_exit_barriers (region); 1096 while (region->next) 1097 { 1098 region = region->next; 1099 remove_exit_barriers (region); 1100 } 1101 } 1102} 1103 1104/* Optimize omp_get_thread_num () and omp_get_num_threads () 1105 calls. These can't be declared as const functions, but 1106 within one parallel body they are constant, so they can be 1107 transformed there into __builtin_omp_get_{thread_num,num_threads} () 1108 which are declared const. Similarly for task body, except 1109 that in untied task omp_get_thread_num () can change at any task 1110 scheduling point. */ 1111 1112static void 1113optimize_omp_library_calls (gimple *entry_stmt) 1114{ 1115 basic_block bb; 1116 gimple_stmt_iterator gsi; 1117 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1118 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1119 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1120 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1121 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1122 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1123 OMP_CLAUSE_UNTIED) != NULL); 1124 1125 FOR_EACH_BB_FN (bb, cfun) 1126 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1127 { 1128 gimple *call = gsi_stmt (gsi); 1129 tree decl; 1130 1131 if (is_gimple_call (call) 1132 && (decl = gimple_call_fndecl (call)) 1133 && DECL_EXTERNAL (decl) 1134 && TREE_PUBLIC (decl) 1135 && DECL_INITIAL (decl) == NULL) 1136 { 1137 tree built_in; 1138 1139 if (DECL_NAME (decl) == thr_num_id) 1140 { 1141 /* In #pragma omp task untied omp_get_thread_num () can change 1142 during the execution of the task region. */ 1143 if (untied_task) 1144 continue; 1145 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1146 } 1147 else if (DECL_NAME (decl) == num_thr_id) 1148 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1149 else 1150 continue; 1151 1152 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1153 || gimple_call_num_args (call) != 0) 1154 continue; 1155 1156 if (flag_exceptions && !TREE_NOTHROW (decl)) 1157 continue; 1158 1159 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1160 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1161 TREE_TYPE (TREE_TYPE (built_in)))) 1162 continue; 1163 1164 gimple_call_set_fndecl (call, built_in); 1165 } 1166 } 1167} 1168 1169/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1170 regimplified. */ 1171 1172static tree 1173expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1174{ 1175 tree t = *tp; 1176 1177 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1178 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1179 return t; 1180 1181 if (TREE_CODE (t) == ADDR_EXPR) 1182 recompute_tree_invariant_for_addr_expr (t); 1183 1184 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1185 return NULL_TREE; 1186} 1187 1188/* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1189 1190static void 1191expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1192 bool after) 1193{ 1194 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1195 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1196 !after, after ? GSI_CONTINUE_LINKING 1197 : GSI_SAME_STMT); 1198 gimple *stmt = gimple_build_assign (to, from); 1199 if (after) 1200 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1201 else 1202 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1203 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1204 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1205 { 1206 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1207 gimple_regimplify_operands (stmt, &gsi); 1208 } 1209} 1210 1211/* Prepend or append LHS CODE RHS condition before or after *GSI_P. */ 1212 1213static gcond * 1214expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code, 1215 tree lhs, tree rhs, bool after = false) 1216{ 1217 gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE); 1218 if (after) 1219 gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING); 1220 else 1221 gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT); 1222 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 1223 NULL, NULL) 1224 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 1225 NULL, NULL)) 1226 { 1227 gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt); 1228 gimple_regimplify_operands (cond_stmt, &gsi); 1229 } 1230 return cond_stmt; 1231} 1232 1233/* Expand the OpenMP parallel or task directive starting at REGION. */ 1234 1235static void 1236expand_omp_taskreg (struct omp_region *region) 1237{ 1238 basic_block entry_bb, exit_bb, new_bb; 1239 struct function *child_cfun; 1240 tree child_fn, block, t; 1241 gimple_stmt_iterator gsi; 1242 gimple *entry_stmt, *stmt; 1243 edge e; 1244 vec<tree, va_gc> *ws_args; 1245 1246 entry_stmt = last_stmt (region->entry); 1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1248 && gimple_omp_task_taskwait_p (entry_stmt)) 1249 { 1250 new_bb = region->entry; 1251 gsi = gsi_last_nondebug_bb (region->entry); 1252 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1253 gsi_remove (&gsi, true); 1254 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); 1255 return; 1256 } 1257 1258 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1259 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1260 1261 entry_bb = region->entry; 1262 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1263 exit_bb = region->cont; 1264 else 1265 exit_bb = region->exit; 1266 1267 if (is_combined_parallel (region)) 1268 ws_args = region->ws_args; 1269 else 1270 ws_args = NULL; 1271 1272 if (child_cfun->cfg) 1273 { 1274 /* Due to inlining, it may happen that we have already outlined 1275 the region, in which case all we need to do is make the 1276 sub-graph unreachable and emit the parallel call. */ 1277 edge entry_succ_e, exit_succ_e; 1278 1279 entry_succ_e = single_succ_edge (entry_bb); 1280 1281 gsi = gsi_last_nondebug_bb (entry_bb); 1282 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1283 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK 1284 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); 1285 gsi_remove (&gsi, true); 1286 1287 new_bb = entry_bb; 1288 if (exit_bb) 1289 { 1290 exit_succ_e = single_succ_edge (exit_bb); 1291 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1292 } 1293 remove_edge_and_dominated_blocks (entry_succ_e); 1294 } 1295 else 1296 { 1297 unsigned srcidx, dstidx, num; 1298 1299 /* If the parallel region needs data sent from the parent 1300 function, then the very first statement (except possible 1301 tree profile counter updates) of the parallel body 1302 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1303 &.OMP_DATA_O is passed as an argument to the child function, 1304 we need to replace it with the argument as seen by the child 1305 function. 1306 1307 In most cases, this will end up being the identity assignment 1308 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1309 a function call that has been inlined, the original PARM_DECL 1310 .OMP_DATA_I may have been converted into a different local 1311 variable. In which case, we need to keep the assignment. */ 1312 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1313 { 1314 basic_block entry_succ_bb 1315 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1316 : FALLTHRU_EDGE (entry_bb)->dest; 1317 tree arg; 1318 gimple *parcopy_stmt = NULL; 1319 1320 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1321 { 1322 gimple *stmt; 1323 1324 gcc_assert (!gsi_end_p (gsi)); 1325 stmt = gsi_stmt (gsi); 1326 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1327 continue; 1328 1329 if (gimple_num_ops (stmt) == 2) 1330 { 1331 tree arg = gimple_assign_rhs1 (stmt); 1332 1333 /* We're ignore the subcode because we're 1334 effectively doing a STRIP_NOPS. */ 1335 1336 if (TREE_CODE (arg) == ADDR_EXPR 1337 && (TREE_OPERAND (arg, 0) 1338 == gimple_omp_taskreg_data_arg (entry_stmt))) 1339 { 1340 parcopy_stmt = stmt; 1341 break; 1342 } 1343 } 1344 } 1345 1346 gcc_assert (parcopy_stmt != NULL); 1347 arg = DECL_ARGUMENTS (child_fn); 1348 1349 if (!gimple_in_ssa_p (cfun)) 1350 { 1351 if (gimple_assign_lhs (parcopy_stmt) == arg) 1352 gsi_remove (&gsi, true); 1353 else 1354 { 1355 /* ?? Is setting the subcode really necessary ?? */ 1356 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1357 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1358 } 1359 } 1360 else 1361 { 1362 tree lhs = gimple_assign_lhs (parcopy_stmt); 1363 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1364 /* We'd like to set the rhs to the default def in the child_fn, 1365 but it's too early to create ssa names in the child_fn. 1366 Instead, we set the rhs to the parm. In 1367 move_sese_region_to_fn, we introduce a default def for the 1368 parm, map the parm to it's default def, and once we encounter 1369 this stmt, replace the parm with the default def. */ 1370 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1371 update_stmt (parcopy_stmt); 1372 } 1373 } 1374 1375 /* Declare local variables needed in CHILD_CFUN. */ 1376 block = DECL_INITIAL (child_fn); 1377 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1378 /* The gimplifier could record temporaries in parallel/task block 1379 rather than in containing function's local_decls chain, 1380 which would mean cgraph missed finalizing them. Do it now. */ 1381 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1382 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1383 varpool_node::finalize_decl (t); 1384 DECL_SAVED_TREE (child_fn) = NULL; 1385 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1386 gimple_set_body (child_fn, NULL); 1387 TREE_USED (block) = 1; 1388 1389 /* Reset DECL_CONTEXT on function arguments. */ 1390 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1391 DECL_CONTEXT (t) = child_fn; 1392 1393 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1394 so that it can be moved to the child function. */ 1395 gsi = gsi_last_nondebug_bb (entry_bb); 1396 stmt = gsi_stmt (gsi); 1397 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1398 || gimple_code (stmt) == GIMPLE_OMP_TASK 1399 || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); 1400 e = split_block (entry_bb, stmt); 1401 gsi_remove (&gsi, true); 1402 entry_bb = e->dest; 1403 edge e2 = NULL; 1404 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) 1405 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1406 else 1407 { 1408 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1409 gcc_assert (e2->dest == region->exit); 1410 remove_edge (BRANCH_EDGE (entry_bb)); 1411 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1412 gsi = gsi_last_nondebug_bb (region->exit); 1413 gcc_assert (!gsi_end_p (gsi) 1414 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1415 gsi_remove (&gsi, true); 1416 } 1417 1418 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1419 if (exit_bb) 1420 { 1421 gsi = gsi_last_nondebug_bb (exit_bb); 1422 gcc_assert (!gsi_end_p (gsi) 1423 && (gimple_code (gsi_stmt (gsi)) 1424 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1425 stmt = gimple_build_return (NULL); 1426 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1427 gsi_remove (&gsi, true); 1428 } 1429 1430 /* Move the parallel region into CHILD_CFUN. */ 1431 1432 if (gimple_in_ssa_p (cfun)) 1433 { 1434 init_tree_ssa (child_cfun); 1435 init_ssa_operands (child_cfun); 1436 child_cfun->gimple_df->in_ssa_p = true; 1437 block = NULL_TREE; 1438 } 1439 else 1440 block = gimple_block (entry_stmt); 1441 1442 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1443 if (exit_bb) 1444 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1445 if (e2) 1446 { 1447 basic_block dest_bb = e2->dest; 1448 if (!exit_bb) 1449 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1450 remove_edge (e2); 1451 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1452 } 1453 /* When the OMP expansion process cannot guarantee an up-to-date 1454 loop tree arrange for the child function to fixup loops. */ 1455 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1456 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1457 1458 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1459 num = vec_safe_length (child_cfun->local_decls); 1460 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1461 { 1462 t = (*child_cfun->local_decls)[srcidx]; 1463 if (DECL_CONTEXT (t) == cfun->decl) 1464 continue; 1465 if (srcidx != dstidx) 1466 (*child_cfun->local_decls)[dstidx] = t; 1467 dstidx++; 1468 } 1469 if (dstidx != num) 1470 vec_safe_truncate (child_cfun->local_decls, dstidx); 1471 1472 /* Inform the callgraph about the new function. */ 1473 child_cfun->curr_properties = cfun->curr_properties; 1474 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1475 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1476 cgraph_node *node = cgraph_node::get_create (child_fn); 1477 node->parallelized_function = 1; 1478 cgraph_node::add_new_function (child_fn, true); 1479 1480 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1481 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1482 1483 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1484 fixed in a following pass. */ 1485 push_cfun (child_cfun); 1486 if (need_asm) 1487 assign_assembler_name_if_needed (child_fn); 1488 1489 if (optimize) 1490 optimize_omp_library_calls (entry_stmt); 1491 update_max_bb_count (); 1492 cgraph_edge::rebuild_edges (); 1493 1494 /* Some EH regions might become dead, see PR34608. If 1495 pass_cleanup_cfg isn't the first pass to happen with the 1496 new child, these dead EH edges might cause problems. 1497 Clean them up now. */ 1498 if (flag_exceptions) 1499 { 1500 basic_block bb; 1501 bool changed = false; 1502 1503 FOR_EACH_BB_FN (bb, cfun) 1504 changed |= gimple_purge_dead_eh_edges (bb); 1505 if (changed) 1506 cleanup_tree_cfg (); 1507 } 1508 if (gimple_in_ssa_p (cfun)) 1509 update_ssa (TODO_update_ssa); 1510 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1511 verify_loop_structure (); 1512 pop_cfun (); 1513 1514 if (dump_file && !gimple_in_ssa_p (cfun)) 1515 { 1516 omp_any_child_fn_dumped = true; 1517 dump_function_header (dump_file, child_fn, dump_flags); 1518 dump_function_to_file (child_fn, dump_file, dump_flags); 1519 } 1520 } 1521 1522 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 1523 1524 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1525 expand_parallel_call (region, new_bb, 1526 as_a <gomp_parallel *> (entry_stmt), ws_args); 1527 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) 1528 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); 1529 else 1530 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1531 if (gimple_in_ssa_p (cfun)) 1532 update_ssa (TODO_update_ssa_only_virtuals); 1533} 1534 1535/* Information about members of an OpenACC collapsed loop nest. */ 1536 1537struct oacc_collapse 1538{ 1539 tree base; /* Base value. */ 1540 tree iters; /* Number of steps. */ 1541 tree step; /* Step size. */ 1542 tree tile; /* Tile increment (if tiled). */ 1543 tree outer; /* Tile iterator var. */ 1544}; 1545 1546/* Helper for expand_oacc_for. Determine collapsed loop information. 1547 Fill in COUNTS array. Emit any initialization code before GSI. 1548 Return the calculated outer loop bound of BOUND_TYPE. */ 1549 1550static tree 1551expand_oacc_collapse_init (const struct omp_for_data *fd, 1552 gimple_stmt_iterator *gsi, 1553 oacc_collapse *counts, tree diff_type, 1554 tree bound_type, location_t loc) 1555{ 1556 tree tiling = fd->tiling; 1557 tree total = build_int_cst (bound_type, 1); 1558 int ix; 1559 1560 gcc_assert (integer_onep (fd->loop.step)); 1561 gcc_assert (integer_zerop (fd->loop.n1)); 1562 1563 /* When tiling, the first operand of the tile clause applies to the 1564 innermost loop, and we work outwards from there. Seems 1565 backwards, but whatever. */ 1566 for (ix = fd->collapse; ix--;) 1567 { 1568 const omp_for_data_loop *loop = &fd->loops[ix]; 1569 1570 tree iter_type = TREE_TYPE (loop->v); 1571 tree plus_type = iter_type; 1572 1573 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR); 1574 1575 if (POINTER_TYPE_P (iter_type)) 1576 plus_type = sizetype; 1577 1578 if (tiling) 1579 { 1580 tree num = build_int_cst (integer_type_node, fd->collapse); 1581 tree loop_no = build_int_cst (integer_type_node, ix); 1582 tree tile = TREE_VALUE (tiling); 1583 gcall *call 1584 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1585 /* gwv-outer=*/integer_zero_node, 1586 /* gwv-inner=*/integer_zero_node); 1587 1588 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1589 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1590 gimple_call_set_lhs (call, counts[ix].tile); 1591 gimple_set_location (call, loc); 1592 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1593 1594 tiling = TREE_CHAIN (tiling); 1595 } 1596 else 1597 { 1598 counts[ix].tile = NULL; 1599 counts[ix].outer = loop->v; 1600 } 1601 1602 tree b = loop->n1; 1603 tree e = loop->n2; 1604 tree s = loop->step; 1605 bool up = loop->cond_code == LT_EXPR; 1606 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1607 bool negating; 1608 tree expr; 1609 1610 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1611 true, GSI_SAME_STMT); 1612 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1613 true, GSI_SAME_STMT); 1614 1615 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1616 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1617 if (negating) 1618 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1619 s = fold_convert (diff_type, s); 1620 if (negating) 1621 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1622 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1623 true, GSI_SAME_STMT); 1624 1625 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1626 negating = !up && TYPE_UNSIGNED (iter_type); 1627 expr = fold_build2 (MINUS_EXPR, plus_type, 1628 fold_convert (plus_type, negating ? b : e), 1629 fold_convert (plus_type, negating ? e : b)); 1630 expr = fold_convert (diff_type, expr); 1631 if (negating) 1632 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1633 tree range = force_gimple_operand_gsi 1634 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1635 1636 /* Determine number of iterations. */ 1637 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1638 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1639 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1640 1641 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1642 true, GSI_SAME_STMT); 1643 1644 counts[ix].base = b; 1645 counts[ix].iters = iters; 1646 counts[ix].step = s; 1647 1648 total = fold_build2 (MULT_EXPR, bound_type, total, 1649 fold_convert (bound_type, iters)); 1650 } 1651 1652 return total; 1653} 1654 1655/* Emit initializers for collapsed loop members. INNER is true if 1656 this is for the element loop of a TILE. IVAR is the outer 1657 loop iteration variable, from which collapsed loop iteration values 1658 are calculated. COUNTS array has been initialized by 1659 expand_oacc_collapse_inits. */ 1660 1661static void 1662expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1663 gimple_stmt_iterator *gsi, 1664 const oacc_collapse *counts, tree ivar, 1665 tree diff_type) 1666{ 1667 tree ivar_type = TREE_TYPE (ivar); 1668 1669 /* The most rapidly changing iteration variable is the innermost 1670 one. */ 1671 for (int ix = fd->collapse; ix--;) 1672 { 1673 const omp_for_data_loop *loop = &fd->loops[ix]; 1674 const oacc_collapse *collapse = &counts[ix]; 1675 tree v = inner ? loop->v : collapse->outer; 1676 tree iter_type = TREE_TYPE (v); 1677 tree plus_type = iter_type; 1678 enum tree_code plus_code = PLUS_EXPR; 1679 tree expr; 1680 1681 if (POINTER_TYPE_P (iter_type)) 1682 { 1683 plus_code = POINTER_PLUS_EXPR; 1684 plus_type = sizetype; 1685 } 1686 1687 expr = ivar; 1688 if (ix) 1689 { 1690 tree mod = fold_convert (ivar_type, collapse->iters); 1691 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1692 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1693 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1694 true, GSI_SAME_STMT); 1695 } 1696 1697 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1698 fold_convert (diff_type, collapse->step)); 1699 expr = fold_build2 (plus_code, iter_type, 1700 inner ? collapse->outer : collapse->base, 1701 fold_convert (plus_type, expr)); 1702 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1703 true, GSI_SAME_STMT); 1704 gassign *ass = gimple_build_assign (v, expr); 1705 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1706 } 1707} 1708 1709/* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1710 of the combined collapse > 1 loop constructs, generate code like: 1711 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1712 if (cond3 is <) 1713 adj = STEP3 - 1; 1714 else 1715 adj = STEP3 + 1; 1716 count3 = (adj + N32 - N31) / STEP3; 1717 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1718 if (cond2 is <) 1719 adj = STEP2 - 1; 1720 else 1721 adj = STEP2 + 1; 1722 count2 = (adj + N22 - N21) / STEP2; 1723 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1724 if (cond1 is <) 1725 adj = STEP1 - 1; 1726 else 1727 adj = STEP1 + 1; 1728 count1 = (adj + N12 - N11) / STEP1; 1729 count = count1 * count2 * count3; 1730 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1731 count = 0; 1732 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1733 of the combined loop constructs, just initialize COUNTS array 1734 from the _looptemp_ clauses. For loop nests with non-rectangular 1735 loops, do this only for the rectangular loops. Then pick 1736 the loops which reference outer vars in their bound expressions 1737 and the loops which they refer to and for this sub-nest compute 1738 number of iterations. For triangular loops use Faulhaber's formula, 1739 otherwise as a fallback, compute by iterating the loops. 1740 If e.g. the sub-nest is 1741 for (I = N11; I COND1 N12; I += STEP1) 1742 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2) 1743 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3) 1744 do: 1745 COUNT = 0; 1746 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) 1747 for (tmpj = M21 * tmpi + N21; 1748 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) 1749 { 1750 int tmpk1 = M31 * tmpj + N31; 1751 int tmpk2 = M32 * tmpj + N32; 1752 if (tmpk1 COND3 tmpk2) 1753 { 1754 if (COND3 is <) 1755 adj = STEP3 - 1; 1756 else 1757 adj = STEP3 + 1; 1758 COUNT += (adj + tmpk2 - tmpk1) / STEP3; 1759 } 1760 } 1761 and finally multiply the counts of the rectangular loops not 1762 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect] 1763 store number of iterations of the loops from fd->first_nonrect 1764 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied 1765 by the counts of rectangular loops not referenced in any non-rectangular 1766 loops sandwitched in between those. */ 1767 1768/* NOTE: It *could* be better to moosh all of the BBs together, 1769 creating one larger BB with all the computation and the unexpected 1770 jump at the end. I.e. 1771 1772 bool zero3, zero2, zero1, zero; 1773 1774 zero3 = N32 c3 N31; 1775 count3 = (N32 - N31) /[cl] STEP3; 1776 zero2 = N22 c2 N21; 1777 count2 = (N22 - N21) /[cl] STEP2; 1778 zero1 = N12 c1 N11; 1779 count1 = (N12 - N11) /[cl] STEP1; 1780 zero = zero3 || zero2 || zero1; 1781 count = count1 * count2 * count3; 1782 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1783 1784 After all, we expect the zero=false, and thus we expect to have to 1785 evaluate all of the comparison expressions, so short-circuiting 1786 oughtn't be a win. Since the condition isn't protecting a 1787 denominator, we're not concerned about divide-by-zero, so we can 1788 fully evaluate count even if a numerator turned out to be wrong. 1789 1790 It seems like putting this all together would create much better 1791 scheduling opportunities, and less pressure on the chip's branch 1792 predictor. */ 1793 1794static void 1795expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1796 basic_block &entry_bb, tree *counts, 1797 basic_block &zero_iter1_bb, int &first_zero_iter1, 1798 basic_block &zero_iter2_bb, int &first_zero_iter2, 1799 basic_block &l2_dom_bb) 1800{ 1801 tree t, type = TREE_TYPE (fd->loop.v); 1802 edge e, ne; 1803 int i; 1804 1805 /* Collapsed loops need work for expansion into SSA form. */ 1806 gcc_assert (!gimple_in_ssa_p (cfun)); 1807 1808 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1809 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1810 { 1811 gcc_assert (fd->ordered == 0); 1812 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1813 isn't supposed to be handled, as the inner loop doesn't 1814 use it. */ 1815 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1816 OMP_CLAUSE__LOOPTEMP_); 1817 gcc_assert (innerc); 1818 for (i = 0; i < fd->collapse; i++) 1819 { 1820 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1821 OMP_CLAUSE__LOOPTEMP_); 1822 gcc_assert (innerc); 1823 if (i) 1824 counts[i] = OMP_CLAUSE_DECL (innerc); 1825 else 1826 counts[0] = NULL_TREE; 1827 } 1828 if (fd->non_rect 1829 && fd->last_nonrect == fd->first_nonrect + 1 1830 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v))) 1831 { 1832 tree c[4]; 1833 for (i = 0; i < 4; i++) 1834 { 1835 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1836 OMP_CLAUSE__LOOPTEMP_); 1837 gcc_assert (innerc); 1838 c[i] = OMP_CLAUSE_DECL (innerc); 1839 } 1840 counts[0] = c[0]; 1841 fd->first_inner_iterations = c[1]; 1842 fd->factor = c[2]; 1843 fd->adjn1 = c[3]; 1844 } 1845 return; 1846 } 1847 1848 for (i = fd->collapse; i < fd->ordered; i++) 1849 { 1850 tree itype = TREE_TYPE (fd->loops[i].v); 1851 counts[i] = NULL_TREE; 1852 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1853 fold_convert (itype, fd->loops[i].n1), 1854 fold_convert (itype, fd->loops[i].n2)); 1855 if (t && integer_zerop (t)) 1856 { 1857 for (i = fd->collapse; i < fd->ordered; i++) 1858 counts[i] = build_int_cst (type, 0); 1859 break; 1860 } 1861 } 1862 bool rect_count_seen = false; 1863 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1864 { 1865 tree itype = TREE_TYPE (fd->loops[i].v); 1866 1867 if (i >= fd->collapse && counts[i]) 1868 continue; 1869 if (fd->non_rect) 1870 { 1871 /* Skip loops that use outer iterators in their expressions 1872 during this phase. */ 1873 if (fd->loops[i].m1 || fd->loops[i].m2) 1874 { 1875 counts[i] = build_zero_cst (type); 1876 continue; 1877 } 1878 } 1879 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1880 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1881 fold_convert (itype, fd->loops[i].n1), 1882 fold_convert (itype, fd->loops[i].n2))) 1883 == NULL_TREE || !integer_onep (t))) 1884 { 1885 gcond *cond_stmt; 1886 tree n1, n2; 1887 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1888 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1889 true, GSI_SAME_STMT); 1890 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1891 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1892 true, GSI_SAME_STMT); 1893 cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code, 1894 n1, n2); 1895 e = split_block (entry_bb, cond_stmt); 1896 basic_block &zero_iter_bb 1897 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1898 int &first_zero_iter 1899 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1900 if (zero_iter_bb == NULL) 1901 { 1902 gassign *assign_stmt; 1903 first_zero_iter = i; 1904 zero_iter_bb = create_empty_bb (entry_bb); 1905 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1906 *gsi = gsi_after_labels (zero_iter_bb); 1907 if (i < fd->collapse) 1908 assign_stmt = gimple_build_assign (fd->loop.n2, 1909 build_zero_cst (type)); 1910 else 1911 { 1912 counts[i] = create_tmp_reg (type, ".count"); 1913 assign_stmt 1914 = gimple_build_assign (counts[i], build_zero_cst (type)); 1915 } 1916 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1917 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1918 entry_bb); 1919 } 1920 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1921 ne->probability = profile_probability::very_unlikely (); 1922 e->flags = EDGE_TRUE_VALUE; 1923 e->probability = ne->probability.invert (); 1924 if (l2_dom_bb == NULL) 1925 l2_dom_bb = entry_bb; 1926 entry_bb = e->dest; 1927 *gsi = gsi_last_nondebug_bb (entry_bb); 1928 } 1929 1930 if (POINTER_TYPE_P (itype)) 1931 itype = signed_type_for (itype); 1932 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1933 ? -1 : 1)); 1934 t = fold_build2 (PLUS_EXPR, itype, 1935 fold_convert (itype, fd->loops[i].step), t); 1936 t = fold_build2 (PLUS_EXPR, itype, t, 1937 fold_convert (itype, fd->loops[i].n2)); 1938 t = fold_build2 (MINUS_EXPR, itype, t, 1939 fold_convert (itype, fd->loops[i].n1)); 1940 /* ?? We could probably use CEIL_DIV_EXPR instead of 1941 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1942 generate the same code in the end because generically we 1943 don't know that the values involved must be negative for 1944 GT?? */ 1945 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1946 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1947 fold_build1 (NEGATE_EXPR, itype, t), 1948 fold_build1 (NEGATE_EXPR, itype, 1949 fold_convert (itype, 1950 fd->loops[i].step))); 1951 else 1952 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1953 fold_convert (itype, fd->loops[i].step)); 1954 t = fold_convert (type, t); 1955 if (TREE_CODE (t) == INTEGER_CST) 1956 counts[i] = t; 1957 else 1958 { 1959 if (i < fd->collapse || i != first_zero_iter2) 1960 counts[i] = create_tmp_reg (type, ".count"); 1961 expand_omp_build_assign (gsi, counts[i], t); 1962 } 1963 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1964 { 1965 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect) 1966 continue; 1967 if (!rect_count_seen) 1968 { 1969 t = counts[i]; 1970 rect_count_seen = true; 1971 } 1972 else 1973 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1974 expand_omp_build_assign (gsi, fd->loop.n2, t); 1975 } 1976 } 1977 if (fd->non_rect && SSA_VAR_P (fd->loop.n2)) 1978 { 1979 gcc_assert (fd->last_nonrect != -1); 1980 1981 counts[fd->last_nonrect] = create_tmp_reg (type, ".count"); 1982 expand_omp_build_assign (gsi, counts[fd->last_nonrect], 1983 build_zero_cst (type)); 1984 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++) 1985 if (fd->loops[i].m1 1986 || fd->loops[i].m2 1987 || fd->loops[i].non_rect_referenced) 1988 break; 1989 if (i == fd->last_nonrect 1990 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect 1991 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 1992 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v))) 1993 { 1994 int o = fd->first_nonrect; 1995 tree itype = TREE_TYPE (fd->loops[o].v); 1996 tree n1o = create_tmp_reg (itype, ".n1o"); 1997 t = fold_convert (itype, unshare_expr (fd->loops[o].n1)); 1998 expand_omp_build_assign (gsi, n1o, t); 1999 tree n2o = create_tmp_reg (itype, ".n2o"); 2000 t = fold_convert (itype, unshare_expr (fd->loops[o].n2)); 2001 expand_omp_build_assign (gsi, n2o, t); 2002 if (fd->loops[i].m1 && fd->loops[i].m2) 2003 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2), 2004 unshare_expr (fd->loops[i].m1)); 2005 else if (fd->loops[i].m1) 2006 t = fold_build1 (NEGATE_EXPR, itype, 2007 unshare_expr (fd->loops[i].m1)); 2008 else 2009 t = unshare_expr (fd->loops[i].m2); 2010 tree m2minusm1 2011 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 2012 true, GSI_SAME_STMT); 2013 2014 gimple_stmt_iterator gsi2 = *gsi; 2015 gsi_prev (&gsi2); 2016 e = split_block (entry_bb, gsi_stmt (gsi2)); 2017 e = split_block (e->dest, (gimple *) NULL); 2018 basic_block bb1 = e->src; 2019 entry_bb = e->dest; 2020 *gsi = gsi_after_labels (entry_bb); 2021 2022 gsi2 = gsi_after_labels (bb1); 2023 tree ostep = fold_convert (itype, fd->loops[o].step); 2024 t = build_int_cst (itype, (fd->loops[o].cond_code 2025 == LT_EXPR ? -1 : 1)); 2026 t = fold_build2 (PLUS_EXPR, itype, ostep, t); 2027 t = fold_build2 (PLUS_EXPR, itype, t, n2o); 2028 t = fold_build2 (MINUS_EXPR, itype, t, n1o); 2029 if (TYPE_UNSIGNED (itype) 2030 && fd->loops[o].cond_code == GT_EXPR) 2031 t = fold_build2 (TRUNC_DIV_EXPR, itype, 2032 fold_build1 (NEGATE_EXPR, itype, t), 2033 fold_build1 (NEGATE_EXPR, itype, ostep)); 2034 else 2035 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep); 2036 tree outer_niters 2037 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2038 true, GSI_SAME_STMT); 2039 t = fold_build2 (MINUS_EXPR, itype, outer_niters, 2040 build_one_cst (itype)); 2041 t = fold_build2 (MULT_EXPR, itype, t, ostep); 2042 t = fold_build2 (PLUS_EXPR, itype, n1o, t); 2043 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2044 true, GSI_SAME_STMT); 2045 tree n1, n2, n1e, n2e; 2046 t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 2047 if (fd->loops[i].m1) 2048 { 2049 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); 2050 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1); 2051 n1 = fold_build2 (PLUS_EXPR, itype, n1, t); 2052 } 2053 else 2054 n1 = t; 2055 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, 2056 true, GSI_SAME_STMT); 2057 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 2058 if (fd->loops[i].m2) 2059 { 2060 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); 2061 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2); 2062 n2 = fold_build2 (PLUS_EXPR, itype, n2, t); 2063 } 2064 else 2065 n2 = t; 2066 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, 2067 true, GSI_SAME_STMT); 2068 t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 2069 if (fd->loops[i].m1) 2070 { 2071 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1)); 2072 n1e = fold_build2 (MULT_EXPR, itype, last, n1e); 2073 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t); 2074 } 2075 else 2076 n1e = t; 2077 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE, 2078 true, GSI_SAME_STMT); 2079 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 2080 if (fd->loops[i].m2) 2081 { 2082 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2)); 2083 n2e = fold_build2 (MULT_EXPR, itype, last, n2e); 2084 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t); 2085 } 2086 else 2087 n2e = t; 2088 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE, 2089 true, GSI_SAME_STMT); 2090 gcond *cond_stmt 2091 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2092 n1, n2); 2093 e = split_block (bb1, cond_stmt); 2094 e->flags = EDGE_TRUE_VALUE; 2095 e->probability = profile_probability::likely ().guessed (); 2096 basic_block bb2 = e->dest; 2097 gsi2 = gsi_after_labels (bb2); 2098 2099 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2100 n1e, n2e); 2101 e = split_block (bb2, cond_stmt); 2102 e->flags = EDGE_TRUE_VALUE; 2103 e->probability = profile_probability::likely ().guessed (); 2104 gsi2 = gsi_after_labels (e->dest); 2105 2106 tree step = fold_convert (itype, fd->loops[i].step); 2107 t = build_int_cst (itype, (fd->loops[i].cond_code 2108 == LT_EXPR ? -1 : 1)); 2109 t = fold_build2 (PLUS_EXPR, itype, step, t); 2110 t = fold_build2 (PLUS_EXPR, itype, t, n2); 2111 t = fold_build2 (MINUS_EXPR, itype, t, n1); 2112 if (TYPE_UNSIGNED (itype) 2113 && fd->loops[i].cond_code == GT_EXPR) 2114 t = fold_build2 (TRUNC_DIV_EXPR, itype, 2115 fold_build1 (NEGATE_EXPR, itype, t), 2116 fold_build1 (NEGATE_EXPR, itype, step)); 2117 else 2118 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 2119 tree first_inner_iterations 2120 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2121 true, GSI_SAME_STMT); 2122 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep); 2123 if (TYPE_UNSIGNED (itype) 2124 && fd->loops[i].cond_code == GT_EXPR) 2125 t = fold_build2 (TRUNC_DIV_EXPR, itype, 2126 fold_build1 (NEGATE_EXPR, itype, t), 2127 fold_build1 (NEGATE_EXPR, itype, step)); 2128 else 2129 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 2130 tree factor 2131 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2132 true, GSI_SAME_STMT); 2133 t = fold_build2 (MINUS_EXPR, itype, outer_niters, 2134 build_one_cst (itype)); 2135 t = fold_build2 (MULT_EXPR, itype, t, outer_niters); 2136 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node); 2137 t = fold_build2 (MULT_EXPR, itype, factor, t); 2138 t = fold_build2 (PLUS_EXPR, itype, 2139 fold_build2 (MULT_EXPR, itype, outer_niters, 2140 first_inner_iterations), t); 2141 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], 2142 fold_convert (type, t)); 2143 2144 basic_block bb3 = create_empty_bb (bb1); 2145 add_bb_to_loop (bb3, bb1->loop_father); 2146 2147 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE); 2148 e->probability = profile_probability::unlikely ().guessed (); 2149 2150 gsi2 = gsi_after_labels (bb3); 2151 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2152 n1e, n2e); 2153 e = split_block (bb3, cond_stmt); 2154 e->flags = EDGE_TRUE_VALUE; 2155 e->probability = profile_probability::likely ().guessed (); 2156 basic_block bb4 = e->dest; 2157 2158 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE); 2159 ne->probability = e->probability.invert (); 2160 2161 basic_block bb5 = create_empty_bb (bb2); 2162 add_bb_to_loop (bb5, bb2->loop_father); 2163 2164 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE); 2165 ne->probability = profile_probability::unlikely ().guessed (); 2166 2167 for (int j = 0; j < 2; j++) 2168 { 2169 gsi2 = gsi_after_labels (j ? bb5 : bb4); 2170 t = fold_build2 (MINUS_EXPR, itype, 2171 unshare_expr (fd->loops[i].n1), 2172 unshare_expr (fd->loops[i].n2)); 2173 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1); 2174 tree tem 2175 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2176 true, GSI_SAME_STMT); 2177 t = fold_build2 (MINUS_EXPR, itype, tem, n1o); 2178 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep); 2179 t = fold_build2 (MINUS_EXPR, itype, tem, t); 2180 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2181 true, GSI_SAME_STMT); 2182 t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 2183 if (fd->loops[i].m1) 2184 { 2185 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); 2186 n1 = fold_build2 (MULT_EXPR, itype, tem, n1); 2187 n1 = fold_build2 (PLUS_EXPR, itype, n1, t); 2188 } 2189 else 2190 n1 = t; 2191 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, 2192 true, GSI_SAME_STMT); 2193 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 2194 if (fd->loops[i].m2) 2195 { 2196 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); 2197 n2 = fold_build2 (MULT_EXPR, itype, tem, n2); 2198 n2 = fold_build2 (PLUS_EXPR, itype, n2, t); 2199 } 2200 else 2201 n2 = t; 2202 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, 2203 true, GSI_SAME_STMT); 2204 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem); 2205 2206 cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2207 n1, n2); 2208 e = split_block (gsi_bb (gsi2), cond_stmt); 2209 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE; 2210 e->probability = profile_probability::unlikely ().guessed (); 2211 ne = make_edge (e->src, bb1, 2212 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE); 2213 ne->probability = e->probability.invert (); 2214 gsi2 = gsi_after_labels (e->dest); 2215 2216 t = fold_build2 (PLUS_EXPR, itype, tem, ostep); 2217 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t); 2218 2219 make_edge (e->dest, bb1, EDGE_FALLTHRU); 2220 } 2221 2222 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1); 2223 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2); 2224 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1); 2225 2226 if (fd->first_nonrect + 1 == fd->last_nonrect) 2227 { 2228 fd->first_inner_iterations = first_inner_iterations; 2229 fd->factor = factor; 2230 fd->adjn1 = n1o; 2231 } 2232 } 2233 else 2234 { 2235 /* Fallback implementation. Evaluate the loops with m1/m2 2236 non-NULL as well as their outer loops at runtime using temporaries 2237 instead of the original iteration variables, and in the 2238 body just bump the counter. */ 2239 gimple_stmt_iterator gsi2 = *gsi; 2240 gsi_prev (&gsi2); 2241 e = split_block (entry_bb, gsi_stmt (gsi2)); 2242 e = split_block (e->dest, (gimple *) NULL); 2243 basic_block cur_bb = e->src; 2244 basic_block next_bb = e->dest; 2245 entry_bb = e->dest; 2246 *gsi = gsi_after_labels (entry_bb); 2247 2248 tree *vs = XALLOCAVEC (tree, fd->last_nonrect); 2249 memset (vs, 0, fd->last_nonrect * sizeof (tree)); 2250 2251 for (i = 0; i <= fd->last_nonrect; i++) 2252 { 2253 if (fd->loops[i].m1 == NULL_TREE 2254 && fd->loops[i].m2 == NULL_TREE 2255 && !fd->loops[i].non_rect_referenced) 2256 continue; 2257 2258 tree itype = TREE_TYPE (fd->loops[i].v); 2259 2260 gsi2 = gsi_after_labels (cur_bb); 2261 tree n1, n2; 2262 t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 2263 if (fd->loops[i].m1 == NULL_TREE) 2264 n1 = t; 2265 else if (POINTER_TYPE_P (itype)) 2266 { 2267 gcc_assert (integer_onep (fd->loops[i].m1)); 2268 t = fold_convert (sizetype, 2269 unshare_expr (fd->loops[i].n1)); 2270 n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t); 2271 } 2272 else 2273 { 2274 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); 2275 n1 = fold_build2 (MULT_EXPR, itype, 2276 vs[i - fd->loops[i].outer], n1); 2277 n1 = fold_build2 (PLUS_EXPR, itype, n1, t); 2278 } 2279 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, 2280 true, GSI_SAME_STMT); 2281 if (i < fd->last_nonrect) 2282 { 2283 vs[i] = create_tmp_reg (itype, ".it"); 2284 expand_omp_build_assign (&gsi2, vs[i], n1); 2285 } 2286 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 2287 if (fd->loops[i].m2 == NULL_TREE) 2288 n2 = t; 2289 else if (POINTER_TYPE_P (itype)) 2290 { 2291 gcc_assert (integer_onep (fd->loops[i].m2)); 2292 t = fold_convert (sizetype, 2293 unshare_expr (fd->loops[i].n2)); 2294 n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t); 2295 } 2296 else 2297 { 2298 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); 2299 n2 = fold_build2 (MULT_EXPR, itype, 2300 vs[i - fd->loops[i].outer], n2); 2301 n2 = fold_build2 (PLUS_EXPR, itype, n2, t); 2302 } 2303 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, 2304 true, GSI_SAME_STMT); 2305 if (POINTER_TYPE_P (itype)) 2306 itype = signed_type_for (itype); 2307 if (i == fd->last_nonrect) 2308 { 2309 gcond *cond_stmt 2310 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2311 n1, n2); 2312 e = split_block (cur_bb, cond_stmt); 2313 e->flags = EDGE_TRUE_VALUE; 2314 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); 2315 e->probability = profile_probability::likely ().guessed (); 2316 ne->probability = e->probability.invert (); 2317 gsi2 = gsi_after_labels (e->dest); 2318 2319 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 2320 ? -1 : 1)); 2321 t = fold_build2 (PLUS_EXPR, itype, 2322 fold_convert (itype, fd->loops[i].step), t); 2323 t = fold_build2 (PLUS_EXPR, itype, t, 2324 fold_convert (itype, n2)); 2325 t = fold_build2 (MINUS_EXPR, itype, t, 2326 fold_convert (itype, n1)); 2327 tree step = fold_convert (itype, fd->loops[i].step); 2328 if (TYPE_UNSIGNED (itype) 2329 && fd->loops[i].cond_code == GT_EXPR) 2330 t = fold_build2 (TRUNC_DIV_EXPR, itype, 2331 fold_build1 (NEGATE_EXPR, itype, t), 2332 fold_build1 (NEGATE_EXPR, itype, step)); 2333 else 2334 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 2335 t = fold_convert (type, t); 2336 t = fold_build2 (PLUS_EXPR, type, 2337 counts[fd->last_nonrect], t); 2338 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2339 true, GSI_SAME_STMT); 2340 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t); 2341 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); 2342 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); 2343 break; 2344 } 2345 e = split_block (cur_bb, last_stmt (cur_bb)); 2346 2347 basic_block new_cur_bb = create_empty_bb (cur_bb); 2348 add_bb_to_loop (new_cur_bb, cur_bb->loop_father); 2349 2350 gsi2 = gsi_after_labels (e->dest); 2351 tree step = fold_convert (itype, 2352 unshare_expr (fd->loops[i].step)); 2353 if (POINTER_TYPE_P (TREE_TYPE (vs[i]))) 2354 t = fold_build_pointer_plus (vs[i], 2355 fold_convert (sizetype, step)); 2356 else 2357 t = fold_build2 (PLUS_EXPR, itype, vs[i], step); 2358 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2359 true, GSI_SAME_STMT); 2360 expand_omp_build_assign (&gsi2, vs[i], t); 2361 2362 ne = split_block (e->dest, last_stmt (e->dest)); 2363 gsi2 = gsi_after_labels (ne->dest); 2364 2365 expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2); 2366 edge e3, e4; 2367 if (next_bb == entry_bb) 2368 { 2369 e3 = find_edge (ne->dest, next_bb); 2370 e3->flags = EDGE_FALSE_VALUE; 2371 } 2372 else 2373 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE); 2374 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE); 2375 e4->probability = profile_probability::likely ().guessed (); 2376 e3->probability = e4->probability.invert (); 2377 basic_block esrc = e->src; 2378 make_edge (e->src, ne->dest, EDGE_FALLTHRU); 2379 cur_bb = new_cur_bb; 2380 basic_block latch_bb = next_bb; 2381 next_bb = e->dest; 2382 remove_edge (e); 2383 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc); 2384 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest); 2385 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest); 2386 } 2387 } 2388 t = NULL_TREE; 2389 for (i = fd->first_nonrect; i < fd->last_nonrect; i++) 2390 if (!fd->loops[i].non_rect_referenced 2391 && fd->loops[i].m1 == NULL_TREE 2392 && fd->loops[i].m2 == NULL_TREE) 2393 { 2394 if (t == NULL_TREE) 2395 t = counts[i]; 2396 else 2397 t = fold_build2 (MULT_EXPR, type, t, counts[i]); 2398 } 2399 if (t) 2400 { 2401 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t); 2402 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t); 2403 } 2404 if (!rect_count_seen) 2405 t = counts[fd->last_nonrect]; 2406 else 2407 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, 2408 counts[fd->last_nonrect]); 2409 expand_omp_build_assign (gsi, fd->loop.n2, t); 2410 } 2411 else if (fd->non_rect) 2412 { 2413 tree t = fd->loop.n2; 2414 gcc_assert (TREE_CODE (t) == INTEGER_CST); 2415 int non_rect_referenced = 0, non_rect = 0; 2416 for (i = 0; i < fd->collapse; i++) 2417 { 2418 if ((i < fd->first_nonrect || i > fd->last_nonrect) 2419 && !integer_zerop (counts[i])) 2420 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]); 2421 if (fd->loops[i].non_rect_referenced) 2422 non_rect_referenced++; 2423 if (fd->loops[i].m1 || fd->loops[i].m2) 2424 non_rect++; 2425 } 2426 gcc_assert (non_rect == 1 && non_rect_referenced == 1); 2427 counts[fd->last_nonrect] = t; 2428 } 2429} 2430 2431/* Helper function for expand_omp_{for_*,simd}. Generate code like: 2432 T = V; 2433 V3 = N31 + (T % count3) * STEP3; 2434 T = T / count3; 2435 V2 = N21 + (T % count2) * STEP2; 2436 T = T / count2; 2437 V1 = N11 + T * STEP1; 2438 if this loop doesn't have an inner loop construct combined with it. 2439 If it does have an inner loop construct combined with it and the 2440 iteration count isn't known constant, store values from counts array 2441 into its _looptemp_ temporaries instead. 2442 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect 2443 inclusive), use the count of all those loops together, and either 2444 find quadratic etc. equation roots, or as a fallback, do: 2445 COUNT = 0; 2446 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) 2447 for (tmpj = M21 * tmpi + N21; 2448 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) 2449 { 2450 int tmpk1 = M31 * tmpj + N31; 2451 int tmpk2 = M32 * tmpj + N32; 2452 if (tmpk1 COND3 tmpk2) 2453 { 2454 if (COND3 is <) 2455 adj = STEP3 - 1; 2456 else 2457 adj = STEP3 + 1; 2458 int temp = (adj + tmpk2 - tmpk1) / STEP3; 2459 if (COUNT + temp > T) 2460 { 2461 V1 = tmpi; 2462 V2 = tmpj; 2463 V3 = tmpk1 + (T - COUNT) * STEP3; 2464 goto done; 2465 } 2466 else 2467 COUNT += temp; 2468 } 2469 } 2470 done:; 2471 but for optional innermost or outermost rectangular loops that aren't 2472 referenced by other loop expressions keep doing the division/modulo. */ 2473 2474static void 2475expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 2476 tree *counts, tree *nonrect_bounds, 2477 gimple *inner_stmt, tree startvar) 2478{ 2479 int i; 2480 if (gimple_omp_for_combined_p (fd->for_stmt)) 2481 { 2482 /* If fd->loop.n2 is constant, then no propagation of the counts 2483 is needed, they are constant. */ 2484 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 2485 return; 2486 2487 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 2488 ? gimple_omp_taskreg_clauses (inner_stmt) 2489 : gimple_omp_for_clauses (inner_stmt); 2490 /* First two _looptemp_ clauses are for istart/iend, counts[0] 2491 isn't supposed to be handled, as the inner loop doesn't 2492 use it. */ 2493 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 2494 gcc_assert (innerc); 2495 int count = 0; 2496 if (fd->non_rect 2497 && fd->last_nonrect == fd->first_nonrect + 1 2498 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v))) 2499 count = 4; 2500 for (i = 0; i < fd->collapse + count; i++) 2501 { 2502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2503 OMP_CLAUSE__LOOPTEMP_); 2504 gcc_assert (innerc); 2505 if (i) 2506 { 2507 tree tem = OMP_CLAUSE_DECL (innerc); 2508 tree t; 2509 if (i < fd->collapse) 2510 t = counts[i]; 2511 else 2512 switch (i - fd->collapse) 2513 { 2514 case 0: t = counts[0]; break; 2515 case 1: t = fd->first_inner_iterations; break; 2516 case 2: t = fd->factor; break; 2517 case 3: t = fd->adjn1; break; 2518 default: gcc_unreachable (); 2519 } 2520 t = fold_convert (TREE_TYPE (tem), t); 2521 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 2522 false, GSI_CONTINUE_LINKING); 2523 gassign *stmt = gimple_build_assign (tem, t); 2524 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2525 } 2526 } 2527 return; 2528 } 2529 2530 tree type = TREE_TYPE (fd->loop.v); 2531 tree tem = create_tmp_reg (type, ".tem"); 2532 gassign *stmt = gimple_build_assign (tem, startvar); 2533 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2534 2535 for (i = fd->collapse - 1; i >= 0; i--) 2536 { 2537 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 2538 itype = vtype; 2539 if (POINTER_TYPE_P (vtype)) 2540 itype = signed_type_for (vtype); 2541 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect)) 2542 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 2543 else 2544 t = tem; 2545 if (i == fd->last_nonrect) 2546 { 2547 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, 2548 false, GSI_CONTINUE_LINKING); 2549 tree stopval = t; 2550 tree idx = create_tmp_reg (type, ".count"); 2551 expand_omp_build_assign (gsi, idx, 2552 build_zero_cst (type), true); 2553 basic_block bb_triang = NULL, bb_triang_dom = NULL; 2554 if (fd->first_nonrect + 1 == fd->last_nonrect 2555 && (TREE_CODE (fd->loop.n2) == INTEGER_CST 2556 || fd->first_inner_iterations) 2557 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node)) 2558 != CODE_FOR_nothing) 2559 && !integer_zerop (fd->loop.n2)) 2560 { 2561 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1; 2562 tree itype = TREE_TYPE (fd->loops[i].v); 2563 tree first_inner_iterations = fd->first_inner_iterations; 2564 tree factor = fd->factor; 2565 gcond *cond_stmt 2566 = expand_omp_build_cond (gsi, NE_EXPR, factor, 2567 build_zero_cst (TREE_TYPE (factor))); 2568 edge e = split_block (gsi_bb (*gsi), cond_stmt); 2569 basic_block bb0 = e->src; 2570 e->flags = EDGE_TRUE_VALUE; 2571 e->probability = profile_probability::likely (); 2572 bb_triang_dom = bb0; 2573 *gsi = gsi_after_labels (e->dest); 2574 tree slltype = long_long_integer_type_node; 2575 tree ulltype = long_long_unsigned_type_node; 2576 tree stopvalull = fold_convert (ulltype, stopval); 2577 stopvalull 2578 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE, 2579 false, GSI_CONTINUE_LINKING); 2580 first_inner_iterations 2581 = fold_convert (slltype, first_inner_iterations); 2582 first_inner_iterations 2583 = force_gimple_operand_gsi (gsi, first_inner_iterations, true, 2584 NULL_TREE, false, 2585 GSI_CONTINUE_LINKING); 2586 factor = fold_convert (slltype, factor); 2587 factor 2588 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE, 2589 false, GSI_CONTINUE_LINKING); 2590 tree first_inner_iterationsd 2591 = fold_build1 (FLOAT_EXPR, double_type_node, 2592 first_inner_iterations); 2593 first_inner_iterationsd 2594 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true, 2595 NULL_TREE, false, 2596 GSI_CONTINUE_LINKING); 2597 tree factord = fold_build1 (FLOAT_EXPR, double_type_node, 2598 factor); 2599 factord = force_gimple_operand_gsi (gsi, factord, true, 2600 NULL_TREE, false, 2601 GSI_CONTINUE_LINKING); 2602 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node, 2603 stopvalull); 2604 stopvald = force_gimple_operand_gsi (gsi, stopvald, true, 2605 NULL_TREE, false, 2606 GSI_CONTINUE_LINKING); 2607 /* Temporarily disable flag_rounding_math, values will be 2608 decimal numbers divided by 2 and worst case imprecisions 2609 due to too large values ought to be caught later by the 2610 checks for fallback. */ 2611 int save_flag_rounding_math = flag_rounding_math; 2612 flag_rounding_math = 0; 2613 t = fold_build2 (RDIV_EXPR, double_type_node, factord, 2614 build_real (double_type_node, dconst2)); 2615 tree t3 = fold_build2 (MINUS_EXPR, double_type_node, 2616 first_inner_iterationsd, t); 2617 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false, 2618 GSI_CONTINUE_LINKING); 2619 t = fold_build2 (MULT_EXPR, double_type_node, factord, 2620 build_real (double_type_node, dconst2)); 2621 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald); 2622 t = fold_build2 (PLUS_EXPR, double_type_node, t, 2623 fold_build2 (MULT_EXPR, double_type_node, 2624 t3, t3)); 2625 flag_rounding_math = save_flag_rounding_math; 2626 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, 2627 GSI_CONTINUE_LINKING); 2628 if (flag_exceptions 2629 && cfun->can_throw_non_call_exceptions 2630 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE)) 2631 { 2632 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t, 2633 build_zero_cst (double_type_node)); 2634 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE, 2635 false, GSI_CONTINUE_LINKING); 2636 cond_stmt = gimple_build_cond (NE_EXPR, tem, 2637 boolean_false_node, 2638 NULL_TREE, NULL_TREE); 2639 } 2640 else 2641 cond_stmt 2642 = gimple_build_cond (LT_EXPR, t, 2643 build_zero_cst (double_type_node), 2644 NULL_TREE, NULL_TREE); 2645 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); 2646 e = split_block (gsi_bb (*gsi), cond_stmt); 2647 basic_block bb1 = e->src; 2648 e->flags = EDGE_FALSE_VALUE; 2649 e->probability = profile_probability::very_likely (); 2650 *gsi = gsi_after_labels (e->dest); 2651 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t); 2652 tree sqrtr = create_tmp_var (double_type_node); 2653 gimple_call_set_lhs (call, sqrtr); 2654 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING); 2655 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3); 2656 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord); 2657 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t); 2658 tree c = create_tmp_var (ulltype); 2659 tree d = create_tmp_var (ulltype); 2660 expand_omp_build_assign (gsi, c, t, true); 2661 t = fold_build2 (MINUS_EXPR, ulltype, c, 2662 build_one_cst (ulltype)); 2663 t = fold_build2 (MULT_EXPR, ulltype, c, t); 2664 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node); 2665 t = fold_build2 (MULT_EXPR, ulltype, 2666 fold_convert (ulltype, fd->factor), t); 2667 tree t2 2668 = fold_build2 (MULT_EXPR, ulltype, c, 2669 fold_convert (ulltype, 2670 fd->first_inner_iterations)); 2671 t = fold_build2 (PLUS_EXPR, ulltype, t, t2); 2672 expand_omp_build_assign (gsi, d, t, true); 2673 t = fold_build2 (MULT_EXPR, ulltype, 2674 fold_convert (ulltype, fd->factor), c); 2675 t = fold_build2 (PLUS_EXPR, ulltype, 2676 t, fold_convert (ulltype, 2677 fd->first_inner_iterations)); 2678 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, 2679 GSI_CONTINUE_LINKING); 2680 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d, 2681 NULL_TREE, NULL_TREE); 2682 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); 2683 e = split_block (gsi_bb (*gsi), cond_stmt); 2684 basic_block bb2 = e->src; 2685 e->flags = EDGE_TRUE_VALUE; 2686 e->probability = profile_probability::very_likely (); 2687 *gsi = gsi_after_labels (e->dest); 2688 t = fold_build2 (PLUS_EXPR, ulltype, d, t2); 2689 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, 2690 GSI_CONTINUE_LINKING); 2691 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t, 2692 NULL_TREE, NULL_TREE); 2693 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); 2694 e = split_block (gsi_bb (*gsi), cond_stmt); 2695 basic_block bb3 = e->src; 2696 e->flags = EDGE_FALSE_VALUE; 2697 e->probability = profile_probability::very_likely (); 2698 *gsi = gsi_after_labels (e->dest); 2699 t = fold_convert (itype, c); 2700 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step); 2701 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t); 2702 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, 2703 GSI_CONTINUE_LINKING); 2704 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true); 2705 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d); 2706 t2 = fold_convert (itype, t2); 2707 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step); 2708 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1); 2709 if (fd->loops[i].m1) 2710 { 2711 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1); 2712 t2 = fold_build2 (PLUS_EXPR, itype, t2, t); 2713 } 2714 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true); 2715 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi)); 2716 bb_triang = e->src; 2717 *gsi = gsi_after_labels (e->dest); 2718 remove_edge (e); 2719 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE); 2720 e->probability = profile_probability::very_unlikely (); 2721 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE); 2722 e->probability = profile_probability::very_unlikely (); 2723 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE); 2724 e->probability = profile_probability::very_unlikely (); 2725 2726 basic_block bb4 = create_empty_bb (bb0); 2727 add_bb_to_loop (bb4, bb0->loop_father); 2728 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE); 2729 e->probability = profile_probability::unlikely (); 2730 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU); 2731 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0); 2732 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0); 2733 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4); 2734 t2 = fold_build2 (TRUNC_DIV_EXPR, type, 2735 counts[i], counts[i - 1]); 2736 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false, 2737 GSI_CONTINUE_LINKING); 2738 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2); 2739 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2); 2740 t = fold_convert (itype, t); 2741 t2 = fold_convert (itype, t2); 2742 t = fold_build2 (MULT_EXPR, itype, t, 2743 fold_convert (itype, fd->loops[i].step)); 2744 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 2745 t2 = fold_build2 (MULT_EXPR, itype, t2, 2746 fold_convert (itype, fd->loops[i - 1].step)); 2747 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2); 2748 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE, 2749 false, GSI_CONTINUE_LINKING); 2750 stmt = gimple_build_assign (fd->loops[i - 1].v, t2); 2751 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); 2752 if (fd->loops[i].m1) 2753 { 2754 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1, 2755 fd->loops[i - 1].v); 2756 t = fold_build2 (PLUS_EXPR, itype, t, t2); 2757 } 2758 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE, 2759 false, GSI_CONTINUE_LINKING); 2760 stmt = gimple_build_assign (fd->loops[i].v, t); 2761 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); 2762 } 2763 /* Fallback implementation. Evaluate the loops in between 2764 (inclusive) fd->first_nonrect and fd->last_nonrect at 2765 runtime unsing temporaries instead of the original iteration 2766 variables, in the body just bump the counter and compare 2767 with the desired value. */ 2768 gimple_stmt_iterator gsi2 = *gsi; 2769 basic_block entry_bb = gsi_bb (gsi2); 2770 edge e = split_block (entry_bb, gsi_stmt (gsi2)); 2771 e = split_block (e->dest, (gimple *) NULL); 2772 basic_block dom_bb = NULL; 2773 basic_block cur_bb = e->src; 2774 basic_block next_bb = e->dest; 2775 entry_bb = e->dest; 2776 *gsi = gsi_after_labels (entry_bb); 2777 2778 tree *vs = XALLOCAVEC (tree, fd->last_nonrect); 2779 tree n1 = NULL_TREE, n2 = NULL_TREE; 2780 memset (vs, 0, fd->last_nonrect * sizeof (tree)); 2781 2782 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++) 2783 { 2784 tree itype = TREE_TYPE (fd->loops[j].v); 2785 bool rect_p = (fd->loops[j].m1 == NULL_TREE 2786 && fd->loops[j].m2 == NULL_TREE 2787 && !fd->loops[j].non_rect_referenced); 2788 gsi2 = gsi_after_labels (cur_bb); 2789 t = fold_convert (itype, unshare_expr (fd->loops[j].n1)); 2790 if (fd->loops[j].m1 == NULL_TREE) 2791 n1 = rect_p ? build_zero_cst (type) : t; 2792 else if (POINTER_TYPE_P (itype)) 2793 { 2794 gcc_assert (integer_onep (fd->loops[j].m1)); 2795 t = fold_convert (sizetype, 2796 unshare_expr (fd->loops[j].n1)); 2797 n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t); 2798 } 2799 else 2800 { 2801 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1)); 2802 n1 = fold_build2 (MULT_EXPR, itype, 2803 vs[j - fd->loops[j].outer], n1); 2804 n1 = fold_build2 (PLUS_EXPR, itype, n1, t); 2805 } 2806 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, 2807 true, GSI_SAME_STMT); 2808 if (j < fd->last_nonrect) 2809 { 2810 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it"); 2811 expand_omp_build_assign (&gsi2, vs[j], n1); 2812 } 2813 t = fold_convert (itype, unshare_expr (fd->loops[j].n2)); 2814 if (fd->loops[j].m2 == NULL_TREE) 2815 n2 = rect_p ? counts[j] : t; 2816 else if (POINTER_TYPE_P (itype)) 2817 { 2818 gcc_assert (integer_onep (fd->loops[j].m2)); 2819 t = fold_convert (sizetype, 2820 unshare_expr (fd->loops[j].n2)); 2821 n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t); 2822 } 2823 else 2824 { 2825 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2)); 2826 n2 = fold_build2 (MULT_EXPR, itype, 2827 vs[j - fd->loops[j].outer], n2); 2828 n2 = fold_build2 (PLUS_EXPR, itype, n2, t); 2829 } 2830 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, 2831 true, GSI_SAME_STMT); 2832 if (POINTER_TYPE_P (itype)) 2833 itype = signed_type_for (itype); 2834 if (j == fd->last_nonrect) 2835 { 2836 gcond *cond_stmt 2837 = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, 2838 n1, n2); 2839 e = split_block (cur_bb, cond_stmt); 2840 e->flags = EDGE_TRUE_VALUE; 2841 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); 2842 e->probability = profile_probability::likely ().guessed (); 2843 ne->probability = e->probability.invert (); 2844 gsi2 = gsi_after_labels (e->dest); 2845 2846 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR 2847 ? -1 : 1)); 2848 t = fold_build2 (PLUS_EXPR, itype, 2849 fold_convert (itype, fd->loops[j].step), t); 2850 t = fold_build2 (PLUS_EXPR, itype, t, 2851 fold_convert (itype, n2)); 2852 t = fold_build2 (MINUS_EXPR, itype, t, 2853 fold_convert (itype, n1)); 2854 tree step = fold_convert (itype, fd->loops[j].step); 2855 if (TYPE_UNSIGNED (itype) 2856 && fd->loops[j].cond_code == GT_EXPR) 2857 t = fold_build2 (TRUNC_DIV_EXPR, itype, 2858 fold_build1 (NEGATE_EXPR, itype, t), 2859 fold_build1 (NEGATE_EXPR, itype, step)); 2860 else 2861 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 2862 t = fold_convert (type, t); 2863 t = fold_build2 (PLUS_EXPR, type, idx, t); 2864 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2865 true, GSI_SAME_STMT); 2866 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); 2867 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); 2868 cond_stmt 2869 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE, 2870 NULL_TREE); 2871 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); 2872 e = split_block (gsi_bb (gsi2), cond_stmt); 2873 e->flags = EDGE_TRUE_VALUE; 2874 e->probability = profile_probability::likely ().guessed (); 2875 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE); 2876 ne->probability = e->probability.invert (); 2877 gsi2 = gsi_after_labels (e->dest); 2878 expand_omp_build_assign (&gsi2, idx, t); 2879 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb); 2880 break; 2881 } 2882 e = split_block (cur_bb, last_stmt (cur_bb)); 2883 2884 basic_block new_cur_bb = create_empty_bb (cur_bb); 2885 add_bb_to_loop (new_cur_bb, cur_bb->loop_father); 2886 2887 gsi2 = gsi_after_labels (e->dest); 2888 if (rect_p) 2889 t = fold_build2 (PLUS_EXPR, type, vs[j], 2890 build_one_cst (type)); 2891 else 2892 { 2893 tree step 2894 = fold_convert (itype, unshare_expr (fd->loops[j].step)); 2895 if (POINTER_TYPE_P (vtype)) 2896 t = fold_build_pointer_plus (vs[j], fold_convert (sizetype, 2897 step)); 2898 else 2899 t = fold_build2 (PLUS_EXPR, itype, vs[j], step); 2900 } 2901 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2902 true, GSI_SAME_STMT); 2903 expand_omp_build_assign (&gsi2, vs[j], t); 2904 2905 edge ne = split_block (e->dest, last_stmt (e->dest)); 2906 gsi2 = gsi_after_labels (ne->dest); 2907 2908 gcond *cond_stmt; 2909 if (next_bb == entry_bb) 2910 /* No need to actually check the outermost condition. */ 2911 cond_stmt 2912 = gimple_build_cond (EQ_EXPR, boolean_true_node, 2913 boolean_true_node, 2914 NULL_TREE, NULL_TREE); 2915 else 2916 cond_stmt 2917 = gimple_build_cond (rect_p ? LT_EXPR 2918 : fd->loops[j].cond_code, 2919 vs[j], n2, NULL_TREE, NULL_TREE); 2920 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); 2921 edge e3, e4; 2922 if (next_bb == entry_bb) 2923 { 2924 e3 = find_edge (ne->dest, next_bb); 2925 e3->flags = EDGE_FALSE_VALUE; 2926 dom_bb = ne->dest; 2927 } 2928 else 2929 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE); 2930 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE); 2931 e4->probability = profile_probability::likely ().guessed (); 2932 e3->probability = e4->probability.invert (); 2933 basic_block esrc = e->src; 2934 make_edge (e->src, ne->dest, EDGE_FALLTHRU); 2935 cur_bb = new_cur_bb; 2936 basic_block latch_bb = next_bb; 2937 next_bb = e->dest; 2938 remove_edge (e); 2939 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc); 2940 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest); 2941 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest); 2942 } 2943 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--) 2944 { 2945 tree vtype = TREE_TYPE (fd->loops[j].v); 2946 tree itype = vtype; 2947 if (POINTER_TYPE_P (itype)) 2948 itype = signed_type_for (itype); 2949 bool rect_p = (fd->loops[j].m1 == NULL_TREE 2950 && fd->loops[j].m2 == NULL_TREE 2951 && !fd->loops[j].non_rect_referenced); 2952 if (j == fd->last_nonrect) 2953 { 2954 t = fold_build2 (MINUS_EXPR, type, stopval, idx); 2955 t = fold_convert (itype, t); 2956 tree t2 2957 = fold_convert (itype, unshare_expr (fd->loops[j].step)); 2958 t = fold_build2 (MULT_EXPR, itype, t, t2); 2959 if (POINTER_TYPE_P (vtype)) 2960 t = fold_build_pointer_plus (n1, 2961 fold_convert (sizetype, t)); 2962 else 2963 t = fold_build2 (PLUS_EXPR, itype, n1, t); 2964 } 2965 else if (rect_p) 2966 { 2967 t = fold_convert (itype, vs[j]); 2968 t = fold_build2 (MULT_EXPR, itype, t, 2969 fold_convert (itype, fd->loops[j].step)); 2970 if (POINTER_TYPE_P (vtype)) 2971 t = fold_build_pointer_plus (fd->loops[j].n1, 2972 fold_convert (sizetype, t)); 2973 else 2974 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t); 2975 } 2976 else 2977 t = vs[j]; 2978 t = force_gimple_operand_gsi (gsi, t, false, 2979 NULL_TREE, true, 2980 GSI_SAME_STMT); 2981 stmt = gimple_build_assign (fd->loops[j].v, t); 2982 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); 2983 } 2984 if (gsi_end_p (*gsi)) 2985 *gsi = gsi_last_bb (gsi_bb (*gsi)); 2986 else 2987 gsi_prev (gsi); 2988 if (bb_triang) 2989 { 2990 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi)); 2991 make_edge (bb_triang, e->dest, EDGE_FALLTHRU); 2992 *gsi = gsi_after_labels (e->dest); 2993 if (!gsi_end_p (*gsi)) 2994 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT); 2995 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom); 2996 } 2997 } 2998 else 2999 { 3000 t = fold_convert (itype, t); 3001 t = fold_build2 (MULT_EXPR, itype, t, 3002 fold_convert (itype, fd->loops[i].step)); 3003 if (POINTER_TYPE_P (vtype)) 3004 t = fold_build_pointer_plus (fd->loops[i].n1, t); 3005 else 3006 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 3007 t = force_gimple_operand_gsi (gsi, t, 3008 DECL_P (fd->loops[i].v) 3009 && TREE_ADDRESSABLE (fd->loops[i].v), 3010 NULL_TREE, false, 3011 GSI_CONTINUE_LINKING); 3012 stmt = gimple_build_assign (fd->loops[i].v, t); 3013 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 3014 } 3015 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect)) 3016 { 3017 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 3018 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 3019 false, GSI_CONTINUE_LINKING); 3020 stmt = gimple_build_assign (tem, t); 3021 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 3022 } 3023 if (i == fd->last_nonrect) 3024 i = fd->first_nonrect; 3025 } 3026 if (fd->non_rect) 3027 for (i = 0; i <= fd->last_nonrect; i++) 3028 if (fd->loops[i].m2) 3029 { 3030 tree itype = TREE_TYPE (fd->loops[i].v); 3031 3032 tree t; 3033 if (POINTER_TYPE_P (itype)) 3034 { 3035 gcc_assert (integer_onep (fd->loops[i].m2)); 3036 t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2)); 3037 t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v, 3038 t); 3039 } 3040 else 3041 { 3042 t = fold_convert (itype, unshare_expr (fd->loops[i].m2)); 3043 t = fold_build2 (MULT_EXPR, itype, 3044 fd->loops[i - fd->loops[i].outer].v, t); 3045 t = fold_build2 (PLUS_EXPR, itype, t, 3046 fold_convert (itype, 3047 unshare_expr (fd->loops[i].n2))); 3048 } 3049 nonrect_bounds[i] = create_tmp_reg (itype, ".bound"); 3050 t = force_gimple_operand_gsi (gsi, t, false, 3051 NULL_TREE, false, 3052 GSI_CONTINUE_LINKING); 3053 stmt = gimple_build_assign (nonrect_bounds[i], t); 3054 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 3055 } 3056} 3057 3058/* Helper function for expand_omp_for_*. Generate code like: 3059 L10: 3060 V3 += STEP3; 3061 if (V3 cond3 N32) goto BODY_BB; else goto L11; 3062 L11: 3063 V3 = N31; 3064 V2 += STEP2; 3065 if (V2 cond2 N22) goto BODY_BB; else goto L12; 3066 L12: 3067 V2 = N21; 3068 V1 += STEP1; 3069 goto BODY_BB; 3070 For non-rectangular loops, use temporaries stored in nonrect_bounds 3071 for the upper bounds if M?2 multiplier is present. Given e.g. 3072 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 3073 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 3074 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 3075 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4) 3076 do: 3077 L10: 3078 V4 += STEP4; 3079 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11; 3080 L11: 3081 V4 = N41 + M41 * V2; // This can be left out if the loop 3082 // refers to the immediate parent loop 3083 V3 += STEP3; 3084 if (V3 cond3 N32) goto BODY_BB; else goto L12; 3085 L12: 3086 V3 = N31; 3087 V2 += STEP2; 3088 if (V2 cond2 N22) goto L120; else goto L13; 3089 L120: 3090 V4 = N41 + M41 * V2; 3091 NONRECT_BOUND4 = N42 + M42 * V2; 3092 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12; 3093 L13: 3094 V2 = N21; 3095 V1 += STEP1; 3096 goto L120; */ 3097 3098static basic_block 3099extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds, 3100 basic_block cont_bb, basic_block body_bb) 3101{ 3102 basic_block last_bb, bb, collapse_bb = NULL; 3103 int i; 3104 gimple_stmt_iterator gsi; 3105 edge e; 3106 tree t; 3107 gimple *stmt; 3108 3109 last_bb = cont_bb; 3110 for (i = fd->collapse - 1; i >= 0; i--) 3111 { 3112 tree vtype = TREE_TYPE (fd->loops[i].v); 3113 3114 bb = create_empty_bb (last_bb); 3115 add_bb_to_loop (bb, last_bb->loop_father); 3116 gsi = gsi_start_bb (bb); 3117 3118 if (i < fd->collapse - 1) 3119 { 3120 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 3121 e->probability 3122 = profile_probability::guessed_always ().apply_scale (1, 8); 3123 3124 struct omp_for_data_loop *l = &fd->loops[i + 1]; 3125 if (l->m1 == NULL_TREE || l->outer != 1) 3126 { 3127 t = l->n1; 3128 if (l->m1) 3129 { 3130 if (POINTER_TYPE_P (TREE_TYPE (l->v))) 3131 t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v, 3132 fold_convert (sizetype, t)); 3133 else 3134 { 3135 tree t2 3136 = fold_build2 (MULT_EXPR, TREE_TYPE (t), 3137 fd->loops[i + 1 - l->outer].v, l->m1); 3138 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t); 3139 } 3140 } 3141 t = force_gimple_operand_gsi (&gsi, t, 3142 DECL_P (l->v) 3143 && TREE_ADDRESSABLE (l->v), 3144 NULL_TREE, false, 3145 GSI_CONTINUE_LINKING); 3146 stmt = gimple_build_assign (l->v, t); 3147 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 3148 } 3149 } 3150 else 3151 collapse_bb = bb; 3152 3153 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 3154 3155 if (POINTER_TYPE_P (vtype)) 3156 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 3157 else 3158 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 3159 t = force_gimple_operand_gsi (&gsi, t, 3160 DECL_P (fd->loops[i].v) 3161 && TREE_ADDRESSABLE (fd->loops[i].v), 3162 NULL_TREE, false, GSI_CONTINUE_LINKING); 3163 stmt = gimple_build_assign (fd->loops[i].v, t); 3164 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 3165 3166 if (fd->loops[i].non_rect_referenced) 3167 { 3168 basic_block update_bb = NULL, prev_bb = NULL; 3169 for (int j = i + 1; j <= fd->last_nonrect; j++) 3170 if (j - fd->loops[j].outer == i) 3171 { 3172 tree n1, n2; 3173 struct omp_for_data_loop *l = &fd->loops[j]; 3174 basic_block this_bb = create_empty_bb (last_bb); 3175 add_bb_to_loop (this_bb, last_bb->loop_father); 3176 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb); 3177 if (prev_bb) 3178 { 3179 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE); 3180 e->probability 3181 = profile_probability::guessed_always ().apply_scale (7, 3182 8); 3183 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb); 3184 } 3185 if (l->m1) 3186 { 3187 if (POINTER_TYPE_P (TREE_TYPE (l->v))) 3188 t = fold_build_pointer_plus (fd->loops[i].v, 3189 fold_convert (sizetype, 3190 l->n1)); 3191 else 3192 { 3193 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1, 3194 fd->loops[i].v); 3195 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), 3196 t, l->n1); 3197 } 3198 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3199 false, 3200 GSI_CONTINUE_LINKING); 3201 stmt = gimple_build_assign (l->v, n1); 3202 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); 3203 n1 = l->v; 3204 } 3205 else 3206 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true, 3207 NULL_TREE, false, 3208 GSI_CONTINUE_LINKING); 3209 if (l->m2) 3210 { 3211 if (POINTER_TYPE_P (TREE_TYPE (l->v))) 3212 t = fold_build_pointer_plus (fd->loops[i].v, 3213 fold_convert (sizetype, 3214 l->n2)); 3215 else 3216 { 3217 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2, 3218 fd->loops[i].v); 3219 t = fold_build2 (PLUS_EXPR, 3220 TREE_TYPE (nonrect_bounds[j]), 3221 t, unshare_expr (l->n2)); 3222 } 3223 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3224 false, 3225 GSI_CONTINUE_LINKING); 3226 stmt = gimple_build_assign (nonrect_bounds[j], n2); 3227 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); 3228 n2 = nonrect_bounds[j]; 3229 } 3230 else 3231 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2), 3232 true, NULL_TREE, false, 3233 GSI_CONTINUE_LINKING); 3234 gcond *cond_stmt 3235 = gimple_build_cond (l->cond_code, n1, n2, 3236 NULL_TREE, NULL_TREE); 3237 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING); 3238 if (update_bb == NULL) 3239 update_bb = this_bb; 3240 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE); 3241 e->probability 3242 = profile_probability::guessed_always ().apply_scale (1, 8); 3243 if (prev_bb == NULL) 3244 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb); 3245 prev_bb = this_bb; 3246 } 3247 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE); 3248 e->probability 3249 = profile_probability::guessed_always ().apply_scale (7, 8); 3250 body_bb = update_bb; 3251 } 3252 3253 if (i > 0) 3254 { 3255 if (fd->loops[i].m2) 3256 t = nonrect_bounds[i]; 3257 else 3258 t = unshare_expr (fd->loops[i].n2); 3259 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3260 false, GSI_CONTINUE_LINKING); 3261 tree v = fd->loops[i].v; 3262 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 3263 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 3264 false, GSI_CONTINUE_LINKING); 3265 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 3266 stmt = gimple_build_cond_empty (t); 3267 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 3268 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 3269 expand_omp_regimplify_p, NULL, NULL) 3270 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 3271 expand_omp_regimplify_p, NULL, NULL)) 3272 gimple_regimplify_operands (stmt, &gsi); 3273 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 3274 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 3275 } 3276 else 3277 make_edge (bb, body_bb, EDGE_FALLTHRU); 3278 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 3279 last_bb = bb; 3280 } 3281 3282 return collapse_bb; 3283} 3284 3285/* Expand #pragma omp ordered depend(source). */ 3286 3287static void 3288expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 3289 tree *counts, location_t loc) 3290{ 3291 enum built_in_function source_ix 3292 = fd->iter_type == long_integer_type_node 3293 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 3294 gimple *g 3295 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 3296 build_fold_addr_expr (counts[fd->ordered])); 3297 gimple_set_location (g, loc); 3298 gsi_insert_before (gsi, g, GSI_SAME_STMT); 3299} 3300 3301/* Expand a single depend from #pragma omp ordered depend(sink:...). */ 3302 3303static void 3304expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 3305 tree *counts, tree c, location_t loc) 3306{ 3307 auto_vec<tree, 10> args; 3308 enum built_in_function sink_ix 3309 = fd->iter_type == long_integer_type_node 3310 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 3311 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 3312 int i; 3313 gimple_stmt_iterator gsi2 = *gsi; 3314 bool warned_step = false; 3315 3316 for (i = 0; i < fd->ordered; i++) 3317 { 3318 tree step = NULL_TREE; 3319 off = TREE_PURPOSE (deps); 3320 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 3321 { 3322 step = TREE_OPERAND (off, 1); 3323 off = TREE_OPERAND (off, 0); 3324 } 3325 if (!integer_zerop (off)) 3326 { 3327 gcc_assert (fd->loops[i].cond_code == LT_EXPR 3328 || fd->loops[i].cond_code == GT_EXPR); 3329 bool forward = fd->loops[i].cond_code == LT_EXPR; 3330 if (step) 3331 { 3332 /* Non-simple Fortran DO loops. If step is variable, 3333 we don't know at compile even the direction, so can't 3334 warn. */ 3335 if (TREE_CODE (step) != INTEGER_CST) 3336 break; 3337 forward = tree_int_cst_sgn (step) != -1; 3338 } 3339 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3340 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 3341 "waiting for lexically later iteration"); 3342 break; 3343 } 3344 deps = TREE_CHAIN (deps); 3345 } 3346 /* If all offsets corresponding to the collapsed loops are zero, 3347 this depend clause can be ignored. FIXME: but there is still a 3348 flush needed. We need to emit one __sync_synchronize () for it 3349 though (perhaps conditionally)? Solve this together with the 3350 conservative dependence folding optimization. 3351 if (i >= fd->collapse) 3352 return; */ 3353 3354 deps = OMP_CLAUSE_DECL (c); 3355 gsi_prev (&gsi2); 3356 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 3357 edge e2 = split_block_after_labels (e1->dest); 3358 3359 gsi2 = gsi_after_labels (e1->dest); 3360 *gsi = gsi_last_bb (e1->src); 3361 for (i = 0; i < fd->ordered; i++) 3362 { 3363 tree itype = TREE_TYPE (fd->loops[i].v); 3364 tree step = NULL_TREE; 3365 tree orig_off = NULL_TREE; 3366 if (POINTER_TYPE_P (itype)) 3367 itype = sizetype; 3368 if (i) 3369 deps = TREE_CHAIN (deps); 3370 off = TREE_PURPOSE (deps); 3371 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 3372 { 3373 step = TREE_OPERAND (off, 1); 3374 off = TREE_OPERAND (off, 0); 3375 gcc_assert (fd->loops[i].cond_code == LT_EXPR 3376 && integer_onep (fd->loops[i].step) 3377 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 3378 } 3379 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 3380 if (step) 3381 { 3382 off = fold_convert_loc (loc, itype, off); 3383 orig_off = off; 3384 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 3385 } 3386 3387 if (integer_zerop (off)) 3388 t = boolean_true_node; 3389 else 3390 { 3391 tree a; 3392 tree co = fold_convert_loc (loc, itype, off); 3393 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 3394 { 3395 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3396 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 3397 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 3398 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 3399 co); 3400 } 3401 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3402 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 3403 fd->loops[i].v, co); 3404 else 3405 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 3406 fd->loops[i].v, co); 3407 if (step) 3408 { 3409 tree t1, t2; 3410 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3411 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 3412 fd->loops[i].n1); 3413 else 3414 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 3415 fd->loops[i].n2); 3416 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3417 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 3418 fd->loops[i].n2); 3419 else 3420 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 3421 fd->loops[i].n1); 3422 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 3423 step, build_int_cst (TREE_TYPE (step), 0)); 3424 if (TREE_CODE (step) != INTEGER_CST) 3425 { 3426 t1 = unshare_expr (t1); 3427 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 3428 false, GSI_CONTINUE_LINKING); 3429 t2 = unshare_expr (t2); 3430 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 3431 false, GSI_CONTINUE_LINKING); 3432 } 3433 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 3434 t, t2, t1); 3435 } 3436 else if (fd->loops[i].cond_code == LT_EXPR) 3437 { 3438 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3439 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 3440 fd->loops[i].n1); 3441 else 3442 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 3443 fd->loops[i].n2); 3444 } 3445 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3446 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 3447 fd->loops[i].n2); 3448 else 3449 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 3450 fd->loops[i].n1); 3451 } 3452 if (cond) 3453 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 3454 else 3455 cond = t; 3456 3457 off = fold_convert_loc (loc, itype, off); 3458 3459 if (step 3460 || (fd->loops[i].cond_code == LT_EXPR 3461 ? !integer_onep (fd->loops[i].step) 3462 : !integer_minus_onep (fd->loops[i].step))) 3463 { 3464 if (step == NULL_TREE 3465 && TYPE_UNSIGNED (itype) 3466 && fd->loops[i].cond_code == GT_EXPR) 3467 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 3468 fold_build1_loc (loc, NEGATE_EXPR, itype, 3469 s)); 3470 else 3471 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 3472 orig_off ? orig_off : off, s); 3473 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 3474 build_int_cst (itype, 0)); 3475 if (integer_zerop (t) && !warned_step) 3476 { 3477 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 3478 "refers to iteration never in the iteration " 3479 "space"); 3480 warned_step = true; 3481 } 3482 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 3483 cond, t); 3484 } 3485 3486 if (i <= fd->collapse - 1 && fd->collapse > 1) 3487 t = fd->loop.v; 3488 else if (counts[i]) 3489 t = counts[i]; 3490 else 3491 { 3492 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 3493 fd->loops[i].v, fd->loops[i].n1); 3494 t = fold_convert_loc (loc, fd->iter_type, t); 3495 } 3496 if (step) 3497 /* We have divided off by step already earlier. */; 3498 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 3499 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 3500 fold_build1_loc (loc, NEGATE_EXPR, itype, 3501 s)); 3502 else 3503 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 3504 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 3505 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 3506 off = fold_convert_loc (loc, fd->iter_type, off); 3507 if (i <= fd->collapse - 1 && fd->collapse > 1) 3508 { 3509 if (i) 3510 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 3511 off); 3512 if (i < fd->collapse - 1) 3513 { 3514 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 3515 counts[i]); 3516 continue; 3517 } 3518 } 3519 off = unshare_expr (off); 3520 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 3521 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3522 true, GSI_SAME_STMT); 3523 args.safe_push (t); 3524 } 3525 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 3526 gimple_set_location (g, loc); 3527 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 3528 3529 cond = unshare_expr (cond); 3530 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 3531 GSI_CONTINUE_LINKING); 3532 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 3533 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 3534 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 3535 e1->probability = e3->probability.invert (); 3536 e1->flags = EDGE_TRUE_VALUE; 3537 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 3538 3539 *gsi = gsi_after_labels (e2->dest); 3540} 3541 3542/* Expand all #pragma omp ordered depend(source) and 3543 #pragma omp ordered depend(sink:...) constructs in the current 3544 #pragma omp for ordered(n) region. */ 3545 3546static void 3547expand_omp_ordered_source_sink (struct omp_region *region, 3548 struct omp_for_data *fd, tree *counts, 3549 basic_block cont_bb) 3550{ 3551 struct omp_region *inner; 3552 int i; 3553 for (i = fd->collapse - 1; i < fd->ordered; i++) 3554 if (i == fd->collapse - 1 && fd->collapse > 1) 3555 counts[i] = NULL_TREE; 3556 else if (i >= fd->collapse && !cont_bb) 3557 counts[i] = build_zero_cst (fd->iter_type); 3558 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 3559 && integer_onep (fd->loops[i].step)) 3560 counts[i] = NULL_TREE; 3561 else 3562 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 3563 tree atype 3564 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 3565 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 3566 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 3567 3568 for (inner = region->inner; inner; inner = inner->next) 3569 if (inner->type == GIMPLE_OMP_ORDERED) 3570 { 3571 gomp_ordered *ord_stmt = inner->ord_stmt; 3572 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 3573 location_t loc = gimple_location (ord_stmt); 3574 tree c; 3575 for (c = gimple_omp_ordered_clauses (ord_stmt); 3576 c; c = OMP_CLAUSE_CHAIN (c)) 3577 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 3578 break; 3579 if (c) 3580 expand_omp_ordered_source (&gsi, fd, counts, loc); 3581 for (c = gimple_omp_ordered_clauses (ord_stmt); 3582 c; c = OMP_CLAUSE_CHAIN (c)) 3583 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 3584 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 3585 gsi_remove (&gsi, true); 3586 } 3587} 3588 3589/* Wrap the body into fd->ordered - fd->collapse loops that aren't 3590 collapsed. */ 3591 3592static basic_block 3593expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 3594 basic_block cont_bb, basic_block body_bb, 3595 basic_block l0_bb, bool ordered_lastprivate) 3596{ 3597 if (fd->ordered == fd->collapse) 3598 return cont_bb; 3599 3600 if (!cont_bb) 3601 { 3602 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 3603 for (int i = fd->collapse; i < fd->ordered; i++) 3604 { 3605 tree type = TREE_TYPE (fd->loops[i].v); 3606 tree n1 = fold_convert (type, fd->loops[i].n1); 3607 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 3608 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3609 size_int (i - fd->collapse + 1), 3610 NULL_TREE, NULL_TREE); 3611 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 3612 } 3613 return NULL; 3614 } 3615 3616 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 3617 { 3618 tree t, type = TREE_TYPE (fd->loops[i].v); 3619 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 3620 expand_omp_build_assign (&gsi, fd->loops[i].v, 3621 fold_convert (type, fd->loops[i].n1)); 3622 if (counts[i]) 3623 expand_omp_build_assign (&gsi, counts[i], 3624 build_zero_cst (fd->iter_type)); 3625 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3626 size_int (i - fd->collapse + 1), 3627 NULL_TREE, NULL_TREE); 3628 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 3629 if (!gsi_end_p (gsi)) 3630 gsi_prev (&gsi); 3631 else 3632 gsi = gsi_last_bb (body_bb); 3633 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 3634 basic_block new_body = e1->dest; 3635 if (body_bb == cont_bb) 3636 cont_bb = new_body; 3637 edge e2 = NULL; 3638 basic_block new_header; 3639 if (EDGE_COUNT (cont_bb->preds) > 0) 3640 { 3641 gsi = gsi_last_bb (cont_bb); 3642 if (POINTER_TYPE_P (type)) 3643 t = fold_build_pointer_plus (fd->loops[i].v, 3644 fold_convert (sizetype, 3645 fd->loops[i].step)); 3646 else 3647 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 3648 fold_convert (type, fd->loops[i].step)); 3649 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 3650 if (counts[i]) 3651 { 3652 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 3653 build_int_cst (fd->iter_type, 1)); 3654 expand_omp_build_assign (&gsi, counts[i], t); 3655 t = counts[i]; 3656 } 3657 else 3658 { 3659 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 3660 fd->loops[i].v, fd->loops[i].n1); 3661 t = fold_convert (fd->iter_type, t); 3662 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3663 true, GSI_SAME_STMT); 3664 } 3665 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3666 size_int (i - fd->collapse + 1), 3667 NULL_TREE, NULL_TREE); 3668 expand_omp_build_assign (&gsi, aref, t); 3669 gsi_prev (&gsi); 3670 e2 = split_block (cont_bb, gsi_stmt (gsi)); 3671 new_header = e2->dest; 3672 } 3673 else 3674 new_header = cont_bb; 3675 gsi = gsi_after_labels (new_header); 3676 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 3677 true, GSI_SAME_STMT); 3678 tree n2 3679 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 3680 true, NULL_TREE, true, GSI_SAME_STMT); 3681 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 3682 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 3683 edge e3 = split_block (new_header, gsi_stmt (gsi)); 3684 cont_bb = e3->dest; 3685 remove_edge (e1); 3686 make_edge (body_bb, new_header, EDGE_FALLTHRU); 3687 e3->flags = EDGE_FALSE_VALUE; 3688 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 3689 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 3690 e1->probability = e3->probability.invert (); 3691 3692 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 3693 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 3694 3695 if (e2) 3696 { 3697 class loop *loop = alloc_loop (); 3698 loop->header = new_header; 3699 loop->latch = e2->src; 3700 add_loop (loop, l0_bb->loop_father); 3701 } 3702 } 3703 3704 /* If there are any lastprivate clauses and it is possible some loops 3705 might have zero iterations, ensure all the decls are initialized, 3706 otherwise we could crash evaluating C++ class iterators with lastprivate 3707 clauses. */ 3708 bool need_inits = false; 3709 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 3710 if (need_inits) 3711 { 3712 tree type = TREE_TYPE (fd->loops[i].v); 3713 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 3714 expand_omp_build_assign (&gsi, fd->loops[i].v, 3715 fold_convert (type, fd->loops[i].n1)); 3716 } 3717 else 3718 { 3719 tree type = TREE_TYPE (fd->loops[i].v); 3720 tree this_cond = fold_build2 (fd->loops[i].cond_code, 3721 boolean_type_node, 3722 fold_convert (type, fd->loops[i].n1), 3723 fold_convert (type, fd->loops[i].n2)); 3724 if (!integer_onep (this_cond)) 3725 need_inits = true; 3726 } 3727 3728 return cont_bb; 3729} 3730 3731/* A subroutine of expand_omp_for. Generate code for a parallel 3732 loop with any schedule. Given parameters: 3733 3734 for (V = N1; V cond N2; V += STEP) BODY; 3735 3736 where COND is "<" or ">", we generate pseudocode 3737 3738 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 3739 if (more) goto L0; else goto L3; 3740 L0: 3741 V = istart0; 3742 iend = iend0; 3743 L1: 3744 BODY; 3745 V += STEP; 3746 if (V cond iend) goto L1; else goto L2; 3747 L2: 3748 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 3749 L3: 3750 3751 If this is a combined omp parallel loop, instead of the call to 3752 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 3753 If this is gimple_omp_for_combined_p loop, then instead of assigning 3754 V and iend in L0 we assign the first two _looptemp_ clause decls of the 3755 inner GIMPLE_OMP_FOR and V += STEP; and 3756 if (V cond iend) goto L1; else goto L2; are removed. 3757 3758 For collapsed loops, given parameters: 3759 collapse(3) 3760 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 3761 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 3762 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 3763 BODY; 3764 3765 we generate pseudocode 3766 3767 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 3768 if (cond3 is <) 3769 adj = STEP3 - 1; 3770 else 3771 adj = STEP3 + 1; 3772 count3 = (adj + N32 - N31) / STEP3; 3773 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 3774 if (cond2 is <) 3775 adj = STEP2 - 1; 3776 else 3777 adj = STEP2 + 1; 3778 count2 = (adj + N22 - N21) / STEP2; 3779 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 3780 if (cond1 is <) 3781 adj = STEP1 - 1; 3782 else 3783 adj = STEP1 + 1; 3784 count1 = (adj + N12 - N11) / STEP1; 3785 count = count1 * count2 * count3; 3786 goto Z1; 3787 Z0: 3788 count = 0; 3789 Z1: 3790 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 3791 if (more) goto L0; else goto L3; 3792 L0: 3793 V = istart0; 3794 T = V; 3795 V3 = N31 + (T % count3) * STEP3; 3796 T = T / count3; 3797 V2 = N21 + (T % count2) * STEP2; 3798 T = T / count2; 3799 V1 = N11 + T * STEP1; 3800 iend = iend0; 3801 L1: 3802 BODY; 3803 V += 1; 3804 if (V < iend) goto L10; else goto L2; 3805 L10: 3806 V3 += STEP3; 3807 if (V3 cond3 N32) goto L1; else goto L11; 3808 L11: 3809 V3 = N31; 3810 V2 += STEP2; 3811 if (V2 cond2 N22) goto L1; else goto L12; 3812 L12: 3813 V2 = N21; 3814 V1 += STEP1; 3815 goto L1; 3816 L2: 3817 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 3818 L3: 3819 3820 */ 3821 3822static void 3823expand_omp_for_generic (struct omp_region *region, 3824 struct omp_for_data *fd, 3825 enum built_in_function start_fn, 3826 enum built_in_function next_fn, 3827 tree sched_arg, 3828 gimple *inner_stmt) 3829{ 3830 tree type, istart0, iend0, iend; 3831 tree t, vmain, vback, bias = NULL_TREE; 3832 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 3833 basic_block l2_bb = NULL, l3_bb = NULL; 3834 gimple_stmt_iterator gsi; 3835 gassign *assign_stmt; 3836 bool in_combined_parallel = is_combined_parallel (region); 3837 bool broken_loop = region->cont == NULL; 3838 edge e, ne; 3839 tree *counts = NULL; 3840 int i; 3841 bool ordered_lastprivate = false; 3842 3843 gcc_assert (!broken_loop || !in_combined_parallel); 3844 gcc_assert (fd->iter_type == long_integer_type_node 3845 || !in_combined_parallel); 3846 3847 entry_bb = region->entry; 3848 cont_bb = region->cont; 3849 collapse_bb = NULL; 3850 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3851 gcc_assert (broken_loop 3852 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 3853 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3854 l1_bb = single_succ (l0_bb); 3855 if (!broken_loop) 3856 { 3857 l2_bb = create_empty_bb (cont_bb); 3858 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 3859 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 3860 == l1_bb)); 3861 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3862 } 3863 else 3864 l2_bb = NULL; 3865 l3_bb = BRANCH_EDGE (entry_bb)->dest; 3866 exit_bb = region->exit; 3867 3868 gsi = gsi_last_nondebug_bb (entry_bb); 3869 3870 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3871 if (fd->ordered 3872 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3873 OMP_CLAUSE_LASTPRIVATE)) 3874 ordered_lastprivate = false; 3875 tree reductions = NULL_TREE; 3876 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE; 3877 tree memv = NULL_TREE; 3878 if (fd->lastprivate_conditional) 3879 { 3880 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3881 OMP_CLAUSE__CONDTEMP_); 3882 if (fd->have_pointer_condtemp) 3883 condtemp = OMP_CLAUSE_DECL (c); 3884 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 3885 cond_var = OMP_CLAUSE_DECL (c); 3886 } 3887 if (sched_arg) 3888 { 3889 if (fd->have_reductemp) 3890 { 3891 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3892 OMP_CLAUSE__REDUCTEMP_); 3893 reductions = OMP_CLAUSE_DECL (c); 3894 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 3895 gimple *g = SSA_NAME_DEF_STMT (reductions); 3896 reductions = gimple_assign_rhs1 (g); 3897 OMP_CLAUSE_DECL (c) = reductions; 3898 entry_bb = gimple_bb (g); 3899 edge e = split_block (entry_bb, g); 3900 if (region->entry == entry_bb) 3901 region->entry = e->dest; 3902 gsi = gsi_last_bb (entry_bb); 3903 } 3904 else 3905 reductions = null_pointer_node; 3906 if (fd->have_pointer_condtemp) 3907 { 3908 tree type = TREE_TYPE (condtemp); 3909 memv = create_tmp_var (type); 3910 TREE_ADDRESSABLE (memv) = 1; 3911 unsigned HOST_WIDE_INT sz 3912 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 3913 sz *= fd->lastprivate_conditional; 3914 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 3915 false); 3916 mem = build_fold_addr_expr (memv); 3917 } 3918 else 3919 mem = null_pointer_node; 3920 } 3921 if (fd->collapse > 1 || fd->ordered) 3922 { 3923 int first_zero_iter1 = -1, first_zero_iter2 = -1; 3924 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 3925 3926 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 3927 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3928 zero_iter1_bb, first_zero_iter1, 3929 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 3930 3931 if (zero_iter1_bb) 3932 { 3933 /* Some counts[i] vars might be uninitialized if 3934 some loop has zero iterations. But the body shouldn't 3935 be executed in that case, so just avoid uninit warnings. */ 3936 for (i = first_zero_iter1; 3937 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 3938 if (SSA_VAR_P (counts[i])) 3939 suppress_warning (counts[i], OPT_Wuninitialized); 3940 gsi_prev (&gsi); 3941 e = split_block (entry_bb, gsi_stmt (gsi)); 3942 entry_bb = e->dest; 3943 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 3944 gsi = gsi_last_nondebug_bb (entry_bb); 3945 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 3946 get_immediate_dominator (CDI_DOMINATORS, 3947 zero_iter1_bb)); 3948 } 3949 if (zero_iter2_bb) 3950 { 3951 /* Some counts[i] vars might be uninitialized if 3952 some loop has zero iterations. But the body shouldn't 3953 be executed in that case, so just avoid uninit warnings. */ 3954 for (i = first_zero_iter2; i < fd->ordered; i++) 3955 if (SSA_VAR_P (counts[i])) 3956 suppress_warning (counts[i], OPT_Wuninitialized); 3957 if (zero_iter1_bb) 3958 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 3959 else 3960 { 3961 gsi_prev (&gsi); 3962 e = split_block (entry_bb, gsi_stmt (gsi)); 3963 entry_bb = e->dest; 3964 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 3965 gsi = gsi_last_nondebug_bb (entry_bb); 3966 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 3967 get_immediate_dominator 3968 (CDI_DOMINATORS, zero_iter2_bb)); 3969 } 3970 } 3971 if (fd->collapse == 1) 3972 { 3973 counts[0] = fd->loop.n2; 3974 fd->loop = fd->loops[0]; 3975 } 3976 } 3977 3978 type = TREE_TYPE (fd->loop.v); 3979 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 3980 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 3981 TREE_ADDRESSABLE (istart0) = 1; 3982 TREE_ADDRESSABLE (iend0) = 1; 3983 3984 /* See if we need to bias by LLONG_MIN. */ 3985 if (fd->iter_type == long_long_unsigned_type_node 3986 && TREE_CODE (type) == INTEGER_TYPE 3987 && !TYPE_UNSIGNED (type) 3988 && fd->ordered == 0) 3989 { 3990 tree n1, n2; 3991 3992 if (fd->loop.cond_code == LT_EXPR) 3993 { 3994 n1 = fd->loop.n1; 3995 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 3996 } 3997 else 3998 { 3999 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 4000 n2 = fd->loop.n1; 4001 } 4002 if (TREE_CODE (n1) != INTEGER_CST 4003 || TREE_CODE (n2) != INTEGER_CST 4004 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 4005 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 4006 } 4007 4008 gimple_stmt_iterator gsif = gsi; 4009 gsi_prev (&gsif); 4010 4011 tree arr = NULL_TREE; 4012 if (in_combined_parallel) 4013 { 4014 gcc_assert (fd->ordered == 0); 4015 /* In a combined parallel loop, emit a call to 4016 GOMP_loop_foo_next. */ 4017 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 4018 build_fold_addr_expr (istart0), 4019 build_fold_addr_expr (iend0)); 4020 } 4021 else 4022 { 4023 tree t0, t1, t2, t3, t4; 4024 /* If this is not a combined parallel loop, emit a call to 4025 GOMP_loop_foo_start in ENTRY_BB. */ 4026 t4 = build_fold_addr_expr (iend0); 4027 t3 = build_fold_addr_expr (istart0); 4028 if (fd->ordered) 4029 { 4030 t0 = build_int_cst (unsigned_type_node, 4031 fd->ordered - fd->collapse + 1); 4032 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 4033 fd->ordered 4034 - fd->collapse + 1), 4035 ".omp_counts"); 4036 DECL_NAMELESS (arr) = 1; 4037 TREE_ADDRESSABLE (arr) = 1; 4038 TREE_STATIC (arr) = 1; 4039 vec<constructor_elt, va_gc> *v; 4040 vec_alloc (v, fd->ordered - fd->collapse + 1); 4041 int idx; 4042 4043 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 4044 { 4045 tree c; 4046 if (idx == 0 && fd->collapse > 1) 4047 c = fd->loop.n2; 4048 else 4049 c = counts[idx + fd->collapse - 1]; 4050 tree purpose = size_int (idx); 4051 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 4052 if (TREE_CODE (c) != INTEGER_CST) 4053 TREE_STATIC (arr) = 0; 4054 } 4055 4056 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 4057 if (!TREE_STATIC (arr)) 4058 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 4059 void_type_node, arr), 4060 true, NULL_TREE, true, GSI_SAME_STMT); 4061 t1 = build_fold_addr_expr (arr); 4062 t2 = NULL_TREE; 4063 } 4064 else 4065 { 4066 t2 = fold_convert (fd->iter_type, fd->loop.step); 4067 t1 = fd->loop.n2; 4068 t0 = fd->loop.n1; 4069 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4070 { 4071 tree innerc 4072 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4073 OMP_CLAUSE__LOOPTEMP_); 4074 gcc_assert (innerc); 4075 t0 = OMP_CLAUSE_DECL (innerc); 4076 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4077 OMP_CLAUSE__LOOPTEMP_); 4078 gcc_assert (innerc); 4079 t1 = OMP_CLAUSE_DECL (innerc); 4080 } 4081 if (POINTER_TYPE_P (TREE_TYPE (t0)) 4082 && TYPE_PRECISION (TREE_TYPE (t0)) 4083 != TYPE_PRECISION (fd->iter_type)) 4084 { 4085 /* Avoid casting pointers to integer of a different size. */ 4086 tree itype = signed_type_for (type); 4087 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 4088 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 4089 } 4090 else 4091 { 4092 t1 = fold_convert (fd->iter_type, t1); 4093 t0 = fold_convert (fd->iter_type, t0); 4094 } 4095 if (bias) 4096 { 4097 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 4098 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 4099 } 4100 } 4101 if (fd->iter_type == long_integer_type_node || fd->ordered) 4102 { 4103 if (fd->chunk_size) 4104 { 4105 t = fold_convert (fd->iter_type, fd->chunk_size); 4106 t = omp_adjust_chunk_size (t, fd->simd_schedule); 4107 if (sched_arg) 4108 { 4109 if (fd->ordered) 4110 t = build_call_expr (builtin_decl_explicit (start_fn), 4111 8, t0, t1, sched_arg, t, t3, t4, 4112 reductions, mem); 4113 else 4114 t = build_call_expr (builtin_decl_explicit (start_fn), 4115 9, t0, t1, t2, sched_arg, t, t3, t4, 4116 reductions, mem); 4117 } 4118 else if (fd->ordered) 4119 t = build_call_expr (builtin_decl_explicit (start_fn), 4120 5, t0, t1, t, t3, t4); 4121 else 4122 t = build_call_expr (builtin_decl_explicit (start_fn), 4123 6, t0, t1, t2, t, t3, t4); 4124 } 4125 else if (fd->ordered) 4126 t = build_call_expr (builtin_decl_explicit (start_fn), 4127 4, t0, t1, t3, t4); 4128 else 4129 t = build_call_expr (builtin_decl_explicit (start_fn), 4130 5, t0, t1, t2, t3, t4); 4131 } 4132 else 4133 { 4134 tree t5; 4135 tree c_bool_type; 4136 tree bfn_decl; 4137 4138 /* The GOMP_loop_ull_*start functions have additional boolean 4139 argument, true for < loops and false for > loops. 4140 In Fortran, the C bool type can be different from 4141 boolean_type_node. */ 4142 bfn_decl = builtin_decl_explicit (start_fn); 4143 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 4144 t5 = build_int_cst (c_bool_type, 4145 fd->loop.cond_code == LT_EXPR ? 1 : 0); 4146 if (fd->chunk_size) 4147 { 4148 tree bfn_decl = builtin_decl_explicit (start_fn); 4149 t = fold_convert (fd->iter_type, fd->chunk_size); 4150 t = omp_adjust_chunk_size (t, fd->simd_schedule); 4151 if (sched_arg) 4152 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, 4153 t, t3, t4, reductions, mem); 4154 else 4155 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 4156 } 4157 else 4158 t = build_call_expr (builtin_decl_explicit (start_fn), 4159 6, t5, t0, t1, t2, t3, t4); 4160 } 4161 } 4162 if (TREE_TYPE (t) != boolean_type_node) 4163 t = fold_build2 (NE_EXPR, boolean_type_node, 4164 t, build_int_cst (TREE_TYPE (t), 0)); 4165 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4166 true, GSI_SAME_STMT); 4167 if (arr && !TREE_STATIC (arr)) 4168 { 4169 tree clobber = build_clobber (TREE_TYPE (arr)); 4170 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 4171 GSI_SAME_STMT); 4172 } 4173 if (fd->have_pointer_condtemp) 4174 expand_omp_build_assign (&gsi, condtemp, memv, false); 4175 if (fd->have_reductemp) 4176 { 4177 gimple *g = gsi_stmt (gsi); 4178 gsi_remove (&gsi, true); 4179 release_ssa_name (gimple_assign_lhs (g)); 4180 4181 entry_bb = region->entry; 4182 gsi = gsi_last_nondebug_bb (entry_bb); 4183 4184 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4185 } 4186 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4187 4188 /* Remove the GIMPLE_OMP_FOR statement. */ 4189 gsi_remove (&gsi, true); 4190 4191 if (gsi_end_p (gsif)) 4192 gsif = gsi_after_labels (gsi_bb (gsif)); 4193 gsi_next (&gsif); 4194 4195 /* Iteration setup for sequential loop goes in L0_BB. */ 4196 tree startvar = fd->loop.v; 4197 tree endvar = NULL_TREE; 4198 4199 if (gimple_omp_for_combined_p (fd->for_stmt)) 4200 { 4201 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 4202 && gimple_omp_for_kind (inner_stmt) 4203 == GF_OMP_FOR_KIND_SIMD); 4204 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 4205 OMP_CLAUSE__LOOPTEMP_); 4206 gcc_assert (innerc); 4207 startvar = OMP_CLAUSE_DECL (innerc); 4208 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4209 OMP_CLAUSE__LOOPTEMP_); 4210 gcc_assert (innerc); 4211 endvar = OMP_CLAUSE_DECL (innerc); 4212 } 4213 4214 gsi = gsi_start_bb (l0_bb); 4215 t = istart0; 4216 if (fd->ordered && fd->collapse == 1) 4217 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 4218 fold_convert (fd->iter_type, fd->loop.step)); 4219 else if (bias) 4220 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 4221 if (fd->ordered && fd->collapse == 1) 4222 { 4223 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 4224 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 4225 fd->loop.n1, fold_convert (sizetype, t)); 4226 else 4227 { 4228 t = fold_convert (TREE_TYPE (startvar), t); 4229 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 4230 fd->loop.n1, t); 4231 } 4232 } 4233 else 4234 { 4235 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 4236 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 4237 t = fold_convert (TREE_TYPE (startvar), t); 4238 } 4239 t = force_gimple_operand_gsi (&gsi, t, 4240 DECL_P (startvar) 4241 && TREE_ADDRESSABLE (startvar), 4242 NULL_TREE, false, GSI_CONTINUE_LINKING); 4243 assign_stmt = gimple_build_assign (startvar, t); 4244 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4245 if (cond_var) 4246 { 4247 tree itype = TREE_TYPE (cond_var); 4248 /* For lastprivate(conditional:) itervar, we need some iteration 4249 counter that starts at unsigned non-zero and increases. 4250 Prefer as few IVs as possible, so if we can use startvar 4251 itself, use that, or startvar + constant (those would be 4252 incremented with step), and as last resort use the s0 + 1 4253 incremented by 1. */ 4254 if ((fd->ordered && fd->collapse == 1) 4255 || bias 4256 || POINTER_TYPE_P (type) 4257 || TREE_CODE (fd->loop.n1) != INTEGER_CST 4258 || fd->loop.cond_code != LT_EXPR) 4259 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0), 4260 build_int_cst (itype, 1)); 4261 else if (tree_int_cst_sgn (fd->loop.n1) == 1) 4262 t = fold_convert (itype, t); 4263 else 4264 { 4265 tree c = fold_convert (itype, fd->loop.n1); 4266 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 4267 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 4268 } 4269 t = force_gimple_operand_gsi (&gsi, t, false, 4270 NULL_TREE, false, GSI_CONTINUE_LINKING); 4271 assign_stmt = gimple_build_assign (cond_var, t); 4272 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4273 } 4274 4275 t = iend0; 4276 if (fd->ordered && fd->collapse == 1) 4277 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 4278 fold_convert (fd->iter_type, fd->loop.step)); 4279 else if (bias) 4280 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 4281 if (fd->ordered && fd->collapse == 1) 4282 { 4283 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 4284 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 4285 fd->loop.n1, fold_convert (sizetype, t)); 4286 else 4287 { 4288 t = fold_convert (TREE_TYPE (startvar), t); 4289 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 4290 fd->loop.n1, t); 4291 } 4292 } 4293 else 4294 { 4295 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 4296 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 4297 t = fold_convert (TREE_TYPE (startvar), t); 4298 } 4299 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4300 false, GSI_CONTINUE_LINKING); 4301 if (endvar) 4302 { 4303 assign_stmt = gimple_build_assign (endvar, iend); 4304 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4305 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 4306 assign_stmt = gimple_build_assign (fd->loop.v, iend); 4307 else 4308 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 4309 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4310 } 4311 /* Handle linear clause adjustments. */ 4312 tree itercnt = NULL_TREE; 4313 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4314 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4315 c; c = OMP_CLAUSE_CHAIN (c)) 4316 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4317 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4318 { 4319 tree d = OMP_CLAUSE_DECL (c); 4320 tree t = d, a, dest; 4321 if (omp_privatize_by_reference (t)) 4322 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4323 tree type = TREE_TYPE (t); 4324 if (POINTER_TYPE_P (type)) 4325 type = sizetype; 4326 dest = unshare_expr (t); 4327 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4328 expand_omp_build_assign (&gsif, v, t); 4329 if (itercnt == NULL_TREE) 4330 { 4331 itercnt = startvar; 4332 tree n1 = fd->loop.n1; 4333 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 4334 { 4335 itercnt 4336 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 4337 itercnt); 4338 n1 = fold_convert (TREE_TYPE (itercnt), n1); 4339 } 4340 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 4341 itercnt, n1); 4342 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 4343 itercnt, fd->loop.step); 4344 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4345 NULL_TREE, false, 4346 GSI_CONTINUE_LINKING); 4347 } 4348 a = fold_build2 (MULT_EXPR, type, 4349 fold_convert (type, itercnt), 4350 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4351 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4352 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4353 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4354 false, GSI_CONTINUE_LINKING); 4355 expand_omp_build_assign (&gsi, dest, t, true); 4356 } 4357 if (fd->collapse > 1) 4358 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); 4359 4360 if (fd->ordered) 4361 { 4362 /* Until now, counts array contained number of iterations or 4363 variable containing it for ith loop. From now on, we need 4364 those counts only for collapsed loops, and only for the 2nd 4365 till the last collapsed one. Move those one element earlier, 4366 we'll use counts[fd->collapse - 1] for the first source/sink 4367 iteration counter and so on and counts[fd->ordered] 4368 as the array holding the current counter values for 4369 depend(source). */ 4370 if (fd->collapse > 1) 4371 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 4372 if (broken_loop) 4373 { 4374 int i; 4375 for (i = fd->collapse; i < fd->ordered; i++) 4376 { 4377 tree type = TREE_TYPE (fd->loops[i].v); 4378 tree this_cond 4379 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 4380 fold_convert (type, fd->loops[i].n1), 4381 fold_convert (type, fd->loops[i].n2)); 4382 if (!integer_onep (this_cond)) 4383 break; 4384 } 4385 if (i < fd->ordered) 4386 { 4387 if (entry_bb->loop_father != l0_bb->loop_father) 4388 { 4389 remove_bb_from_loops (l0_bb); 4390 add_bb_to_loop (l0_bb, entry_bb->loop_father); 4391 gcc_assert (single_succ (l0_bb) == l1_bb); 4392 } 4393 cont_bb 4394 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 4395 add_bb_to_loop (cont_bb, l0_bb->loop_father); 4396 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 4397 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 4398 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4399 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 4400 make_edge (cont_bb, l1_bb, 0); 4401 l2_bb = create_empty_bb (cont_bb); 4402 broken_loop = false; 4403 } 4404 } 4405 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 4406 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 4407 l0_bb, ordered_lastprivate); 4408 if (counts[fd->collapse - 1]) 4409 { 4410 gcc_assert (fd->collapse == 1); 4411 gsi = gsi_last_bb (l0_bb); 4412 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 4413 istart0, true); 4414 if (cont_bb) 4415 { 4416 gsi = gsi_last_bb (cont_bb); 4417 t = fold_build2 (PLUS_EXPR, fd->iter_type, 4418 counts[fd->collapse - 1], 4419 build_int_cst (fd->iter_type, 1)); 4420 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 4421 tree aref = build4 (ARRAY_REF, fd->iter_type, 4422 counts[fd->ordered], size_zero_node, 4423 NULL_TREE, NULL_TREE); 4424 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 4425 } 4426 t = counts[fd->collapse - 1]; 4427 } 4428 else if (fd->collapse > 1) 4429 t = fd->loop.v; 4430 else 4431 { 4432 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 4433 fd->loops[0].v, fd->loops[0].n1); 4434 t = fold_convert (fd->iter_type, t); 4435 } 4436 gsi = gsi_last_bb (l0_bb); 4437 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 4438 size_zero_node, NULL_TREE, NULL_TREE); 4439 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4440 false, GSI_CONTINUE_LINKING); 4441 expand_omp_build_assign (&gsi, aref, t, true); 4442 } 4443 4444 if (!broken_loop) 4445 { 4446 /* Code to control the increment and predicate for the sequential 4447 loop goes in the CONT_BB. */ 4448 gsi = gsi_last_nondebug_bb (cont_bb); 4449 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4450 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 4451 vmain = gimple_omp_continue_control_use (cont_stmt); 4452 vback = gimple_omp_continue_control_def (cont_stmt); 4453 4454 if (cond_var) 4455 { 4456 tree itype = TREE_TYPE (cond_var); 4457 tree t2; 4458 if ((fd->ordered && fd->collapse == 1) 4459 || bias 4460 || POINTER_TYPE_P (type) 4461 || TREE_CODE (fd->loop.n1) != INTEGER_CST 4462 || fd->loop.cond_code != LT_EXPR) 4463 t2 = build_int_cst (itype, 1); 4464 else 4465 t2 = fold_convert (itype, fd->loop.step); 4466 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 4467 t2 = force_gimple_operand_gsi (&gsi, t2, false, 4468 NULL_TREE, true, GSI_SAME_STMT); 4469 assign_stmt = gimple_build_assign (cond_var, t2); 4470 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4471 } 4472 4473 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4474 { 4475 if (POINTER_TYPE_P (type)) 4476 t = fold_build_pointer_plus (vmain, fd->loop.step); 4477 else 4478 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 4479 t = force_gimple_operand_gsi (&gsi, t, 4480 DECL_P (vback) 4481 && TREE_ADDRESSABLE (vback), 4482 NULL_TREE, true, GSI_SAME_STMT); 4483 assign_stmt = gimple_build_assign (vback, t); 4484 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4485 4486 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 4487 { 4488 tree tem; 4489 if (fd->collapse > 1) 4490 tem = fd->loop.v; 4491 else 4492 { 4493 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 4494 fd->loops[0].v, fd->loops[0].n1); 4495 tem = fold_convert (fd->iter_type, tem); 4496 } 4497 tree aref = build4 (ARRAY_REF, fd->iter_type, 4498 counts[fd->ordered], size_zero_node, 4499 NULL_TREE, NULL_TREE); 4500 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 4501 true, GSI_SAME_STMT); 4502 expand_omp_build_assign (&gsi, aref, tem); 4503 } 4504 4505 t = build2 (fd->loop.cond_code, boolean_type_node, 4506 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 4507 iend); 4508 gcond *cond_stmt = gimple_build_cond_empty (t); 4509 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 4510 } 4511 4512 /* Remove GIMPLE_OMP_CONTINUE. */ 4513 gsi_remove (&gsi, true); 4514 4515 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4516 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb); 4517 4518 /* Emit code to get the next parallel iteration in L2_BB. */ 4519 gsi = gsi_start_bb (l2_bb); 4520 4521 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 4522 build_fold_addr_expr (istart0), 4523 build_fold_addr_expr (iend0)); 4524 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4525 false, GSI_CONTINUE_LINKING); 4526 if (TREE_TYPE (t) != boolean_type_node) 4527 t = fold_build2 (NE_EXPR, boolean_type_node, 4528 t, build_int_cst (TREE_TYPE (t), 0)); 4529 gcond *cond_stmt = gimple_build_cond_empty (t); 4530 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 4531 } 4532 4533 /* Add the loop cleanup function. */ 4534 gsi = gsi_last_nondebug_bb (exit_bb); 4535 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4536 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 4537 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 4538 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4539 else 4540 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4541 gcall *call_stmt = gimple_build_call (t, 0); 4542 if (fd->ordered) 4543 { 4544 tree arr = counts[fd->ordered]; 4545 tree clobber = build_clobber (TREE_TYPE (arr)); 4546 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 4547 GSI_SAME_STMT); 4548 } 4549 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 4550 { 4551 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 4552 if (fd->have_reductemp) 4553 { 4554 gimple *g = gimple_build_assign (reductions, NOP_EXPR, 4555 gimple_call_lhs (call_stmt)); 4556 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4557 } 4558 } 4559 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 4560 gsi_remove (&gsi, true); 4561 4562 /* Connect the new blocks. */ 4563 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 4564 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 4565 4566 if (!broken_loop) 4567 { 4568 gimple_seq phis; 4569 4570 e = find_edge (cont_bb, l3_bb); 4571 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 4572 4573 phis = phi_nodes (l3_bb); 4574 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 4575 { 4576 gimple *phi = gsi_stmt (gsi); 4577 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 4578 PHI_ARG_DEF_FROM_EDGE (phi, e)); 4579 } 4580 remove_edge (e); 4581 4582 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 4583 e = find_edge (cont_bb, l1_bb); 4584 if (e == NULL) 4585 { 4586 e = BRANCH_EDGE (cont_bb); 4587 gcc_assert (single_succ (e->dest) == l1_bb); 4588 } 4589 if (gimple_omp_for_combined_p (fd->for_stmt)) 4590 { 4591 remove_edge (e); 4592 e = NULL; 4593 } 4594 else if (fd->collapse > 1) 4595 { 4596 remove_edge (e); 4597 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4598 } 4599 else 4600 e->flags = EDGE_TRUE_VALUE; 4601 if (e) 4602 { 4603 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 4604 find_edge (cont_bb, l2_bb)->probability = e->probability.invert (); 4605 } 4606 else 4607 { 4608 e = find_edge (cont_bb, l2_bb); 4609 e->flags = EDGE_FALLTHRU; 4610 } 4611 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 4612 4613 if (gimple_in_ssa_p (cfun)) 4614 { 4615 /* Add phis to the outer loop that connect to the phis in the inner, 4616 original loop, and move the loop entry value of the inner phi to 4617 the loop entry value of the outer phi. */ 4618 gphi_iterator psi; 4619 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 4620 { 4621 location_t locus; 4622 gphi *nphi; 4623 gphi *exit_phi = psi.phi (); 4624 4625 if (virtual_operand_p (gimple_phi_result (exit_phi))) 4626 continue; 4627 4628 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 4629 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 4630 4631 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 4632 edge latch_to_l1 = find_edge (latch, l1_bb); 4633 gphi *inner_phi 4634 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 4635 4636 tree t = gimple_phi_result (exit_phi); 4637 tree new_res = copy_ssa_name (t, NULL); 4638 nphi = create_phi_node (new_res, l0_bb); 4639 4640 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 4641 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 4642 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 4643 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 4644 add_phi_arg (nphi, t, entry_to_l0, locus); 4645 4646 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 4647 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 4648 4649 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 4650 } 4651 } 4652 4653 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 4654 recompute_dominator (CDI_DOMINATORS, l2_bb)); 4655 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 4656 recompute_dominator (CDI_DOMINATORS, l3_bb)); 4657 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 4658 recompute_dominator (CDI_DOMINATORS, l0_bb)); 4659 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 4660 recompute_dominator (CDI_DOMINATORS, l1_bb)); 4661 4662 /* We enter expand_omp_for_generic with a loop. This original loop may 4663 have its own loop struct, or it may be part of an outer loop struct 4664 (which may be the fake loop). */ 4665 class loop *outer_loop = entry_bb->loop_father; 4666 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 4667 4668 add_bb_to_loop (l2_bb, outer_loop); 4669 4670 /* We've added a new loop around the original loop. Allocate the 4671 corresponding loop struct. */ 4672 class loop *new_loop = alloc_loop (); 4673 new_loop->header = l0_bb; 4674 new_loop->latch = l2_bb; 4675 add_loop (new_loop, outer_loop); 4676 4677 /* Allocate a loop structure for the original loop unless we already 4678 had one. */ 4679 if (!orig_loop_has_loop_struct 4680 && !gimple_omp_for_combined_p (fd->for_stmt)) 4681 { 4682 class loop *orig_loop = alloc_loop (); 4683 orig_loop->header = l1_bb; 4684 /* The loop may have multiple latches. */ 4685 add_loop (orig_loop, new_loop); 4686 } 4687 } 4688} 4689 4690/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, 4691 compute needed allocation size. If !ALLOC of team allocations, 4692 if ALLOC of thread allocation. SZ is the initial needed size for 4693 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, 4694 CNT number of elements of each array, for !ALLOC this is 4695 omp_get_num_threads (), for ALLOC number of iterations handled by the 4696 current thread. If PTR is non-NULL, it is the start of the allocation 4697 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ 4698 clauses pointers to the corresponding arrays. */ 4699 4700static tree 4701expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, 4702 unsigned HOST_WIDE_INT alloc_align, tree cnt, 4703 gimple_stmt_iterator *gsi, bool alloc) 4704{ 4705 tree eltsz = NULL_TREE; 4706 unsigned HOST_WIDE_INT preval = 0; 4707 if (ptr && sz) 4708 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 4709 ptr, size_int (sz)); 4710 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 4711 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 4712 && !OMP_CLAUSE__SCANTEMP__CONTROL (c) 4713 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) 4714 { 4715 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 4716 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); 4717 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 4718 { 4719 unsigned HOST_WIDE_INT szl 4720 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); 4721 szl = least_bit_hwi (szl); 4722 if (szl) 4723 al = MIN (al, szl); 4724 } 4725 if (ptr == NULL_TREE) 4726 { 4727 if (eltsz == NULL_TREE) 4728 eltsz = TYPE_SIZE_UNIT (pointee_type); 4729 else 4730 eltsz = size_binop (PLUS_EXPR, eltsz, 4731 TYPE_SIZE_UNIT (pointee_type)); 4732 } 4733 if (preval == 0 && al <= alloc_align) 4734 { 4735 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; 4736 sz += diff; 4737 if (diff && ptr) 4738 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 4739 ptr, size_int (diff)); 4740 } 4741 else if (al > preval) 4742 { 4743 if (ptr) 4744 { 4745 ptr = fold_convert (pointer_sized_int_node, ptr); 4746 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, 4747 build_int_cst (pointer_sized_int_node, 4748 al - 1)); 4749 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, 4750 build_int_cst (pointer_sized_int_node, 4751 -(HOST_WIDE_INT) al)); 4752 ptr = fold_convert (ptr_type_node, ptr); 4753 } 4754 else 4755 sz += al - 1; 4756 } 4757 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 4758 preval = al; 4759 else 4760 preval = 1; 4761 if (ptr) 4762 { 4763 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); 4764 ptr = OMP_CLAUSE_DECL (c); 4765 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, 4766 size_binop (MULT_EXPR, cnt, 4767 TYPE_SIZE_UNIT (pointee_type))); 4768 } 4769 } 4770 4771 if (ptr == NULL_TREE) 4772 { 4773 eltsz = size_binop (MULT_EXPR, eltsz, cnt); 4774 if (sz) 4775 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); 4776 return eltsz; 4777 } 4778 else 4779 return ptr; 4780} 4781 4782/* Return the last _looptemp_ clause if one has been created for 4783 lastprivate on distribute parallel for{, simd} or taskloop. 4784 FD is the loop data and INNERC should be the second _looptemp_ 4785 clause (the one holding the end of the range). 4786 This is followed by collapse - 1 _looptemp_ clauses for the 4787 counts[1] and up, and for triangular loops followed by 4 4788 further _looptemp_ clauses (one for counts[0], one first_inner_iterations, 4789 one factor and one adjn1). After this there is optionally one 4790 _looptemp_ clause that this function returns. */ 4791 4792static tree 4793find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc) 4794{ 4795 gcc_assert (innerc); 4796 int count = fd->collapse - 1; 4797 if (fd->non_rect 4798 && fd->last_nonrect == fd->first_nonrect + 1 4799 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v))) 4800 count += 4; 4801 for (int i = 0; i < count; i++) 4802 { 4803 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4804 OMP_CLAUSE__LOOPTEMP_); 4805 gcc_assert (innerc); 4806 } 4807 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4808 OMP_CLAUSE__LOOPTEMP_); 4809} 4810 4811/* A subroutine of expand_omp_for. Generate code for a parallel 4812 loop with static schedule and no specified chunk size. Given 4813 parameters: 4814 4815 for (V = N1; V cond N2; V += STEP) BODY; 4816 4817 where COND is "<" or ">", we generate pseudocode 4818 4819 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 4820 if (cond is <) 4821 adj = STEP - 1; 4822 else 4823 adj = STEP + 1; 4824 if ((__typeof (V)) -1 > 0 && cond is >) 4825 n = -(adj + N2 - N1) / -STEP; 4826 else 4827 n = (adj + N2 - N1) / STEP; 4828 q = n / nthreads; 4829 tt = n % nthreads; 4830 if (threadid < tt) goto L3; else goto L4; 4831 L3: 4832 tt = 0; 4833 q = q + 1; 4834 L4: 4835 s0 = q * threadid + tt; 4836 e0 = s0 + q; 4837 V = s0 * STEP + N1; 4838 if (s0 >= e0) goto L2; else goto L0; 4839 L0: 4840 e = e0 * STEP + N1; 4841 L1: 4842 BODY; 4843 V += STEP; 4844 if (V cond e) goto L1; 4845 L2: 4846*/ 4847 4848static void 4849expand_omp_for_static_nochunk (struct omp_region *region, 4850 struct omp_for_data *fd, 4851 gimple *inner_stmt) 4852{ 4853 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; 4854 tree type, itype, vmain, vback; 4855 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 4856 basic_block body_bb, cont_bb, collapse_bb = NULL; 4857 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; 4858 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; 4859 gimple_stmt_iterator gsi, gsip; 4860 edge ep; 4861 bool broken_loop = region->cont == NULL; 4862 tree *counts = NULL; 4863 tree n1, n2, step; 4864 tree reductions = NULL_TREE; 4865 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 4866 4867 itype = type = TREE_TYPE (fd->loop.v); 4868 if (POINTER_TYPE_P (type)) 4869 itype = signed_type_for (type); 4870 4871 entry_bb = region->entry; 4872 cont_bb = region->cont; 4873 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4874 fin_bb = BRANCH_EDGE (entry_bb)->dest; 4875 gcc_assert (broken_loop 4876 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 4877 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 4878 body_bb = single_succ (seq_start_bb); 4879 if (!broken_loop) 4880 { 4881 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 4882 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 4883 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4884 } 4885 exit_bb = region->exit; 4886 4887 /* Iteration space partitioning goes in ENTRY_BB. */ 4888 gsi = gsi_last_nondebug_bb (entry_bb); 4889 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4890 gsip = gsi; 4891 gsi_prev (&gsip); 4892 4893 if (fd->collapse > 1) 4894 { 4895 int first_zero_iter = -1, dummy = -1; 4896 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 4897 4898 counts = XALLOCAVEC (tree, fd->collapse); 4899 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4900 fin_bb, first_zero_iter, 4901 dummy_bb, dummy, l2_dom_bb); 4902 t = NULL_TREE; 4903 } 4904 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4905 t = integer_one_node; 4906 else 4907 t = fold_binary (fd->loop.cond_code, boolean_type_node, 4908 fold_convert (type, fd->loop.n1), 4909 fold_convert (type, fd->loop.n2)); 4910 if (fd->collapse == 1 4911 && TYPE_UNSIGNED (type) 4912 && (t == NULL_TREE || !integer_onep (t))) 4913 { 4914 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 4915 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 4916 true, GSI_SAME_STMT); 4917 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 4918 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 4919 true, GSI_SAME_STMT); 4920 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code, 4921 n1, n2); 4922 ep = split_block (entry_bb, cond_stmt); 4923 ep->flags = EDGE_TRUE_VALUE; 4924 entry_bb = ep->dest; 4925 ep->probability = profile_probability::very_likely (); 4926 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 4927 ep->probability = profile_probability::very_unlikely (); 4928 if (gimple_in_ssa_p (cfun)) 4929 { 4930 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 4931 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 4932 !gsi_end_p (gpi); gsi_next (&gpi)) 4933 { 4934 gphi *phi = gpi.phi (); 4935 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 4936 ep, UNKNOWN_LOCATION); 4937 } 4938 } 4939 gsi = gsi_last_bb (entry_bb); 4940 } 4941 4942 if (fd->lastprivate_conditional) 4943 { 4944 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4945 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 4946 if (fd->have_pointer_condtemp) 4947 condtemp = OMP_CLAUSE_DECL (c); 4948 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 4949 cond_var = OMP_CLAUSE_DECL (c); 4950 } 4951 if (fd->have_reductemp 4952 /* For scan, we don't want to reinitialize condtemp before the 4953 second loop. */ 4954 || (fd->have_pointer_condtemp && !fd->have_scantemp) 4955 || fd->have_nonctrl_scantemp) 4956 { 4957 tree t1 = build_int_cst (long_integer_type_node, 0); 4958 tree t2 = build_int_cst (long_integer_type_node, 1); 4959 tree t3 = build_int_cstu (long_integer_type_node, 4960 (HOST_WIDE_INT_1U << 31) + 1); 4961 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4962 gimple_stmt_iterator gsi2 = gsi_none (); 4963 gimple *g = NULL; 4964 tree mem = null_pointer_node, memv = NULL_TREE; 4965 unsigned HOST_WIDE_INT condtemp_sz = 0; 4966 unsigned HOST_WIDE_INT alloc_align = 0; 4967 if (fd->have_reductemp) 4968 { 4969 gcc_assert (!fd->have_nonctrl_scantemp); 4970 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 4971 reductions = OMP_CLAUSE_DECL (c); 4972 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 4973 g = SSA_NAME_DEF_STMT (reductions); 4974 reductions = gimple_assign_rhs1 (g); 4975 OMP_CLAUSE_DECL (c) = reductions; 4976 gsi2 = gsi_for_stmt (g); 4977 } 4978 else 4979 { 4980 if (gsi_end_p (gsip)) 4981 gsi2 = gsi_after_labels (region->entry); 4982 else 4983 gsi2 = gsip; 4984 reductions = null_pointer_node; 4985 } 4986 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) 4987 { 4988 tree type; 4989 if (fd->have_pointer_condtemp) 4990 type = TREE_TYPE (condtemp); 4991 else 4992 type = ptr_type_node; 4993 memv = create_tmp_var (type); 4994 TREE_ADDRESSABLE (memv) = 1; 4995 unsigned HOST_WIDE_INT sz = 0; 4996 tree size = NULL_TREE; 4997 if (fd->have_pointer_condtemp) 4998 { 4999 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 5000 sz *= fd->lastprivate_conditional; 5001 condtemp_sz = sz; 5002 } 5003 if (fd->have_nonctrl_scantemp) 5004 { 5005 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 5006 gimple *g = gimple_build_call (nthreads, 0); 5007 nthreads = create_tmp_var (integer_type_node); 5008 gimple_call_set_lhs (g, nthreads); 5009 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 5010 nthreads = fold_convert (sizetype, nthreads); 5011 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); 5012 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, 5013 alloc_align, nthreads, NULL, 5014 false); 5015 size = fold_convert (type, size); 5016 } 5017 else 5018 size = build_int_cst (type, sz); 5019 expand_omp_build_assign (&gsi2, memv, size, false); 5020 mem = build_fold_addr_expr (memv); 5021 } 5022 tree t 5023 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 5024 9, t1, t2, t2, t3, t1, null_pointer_node, 5025 null_pointer_node, reductions, mem); 5026 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 5027 true, GSI_SAME_STMT); 5028 if (fd->have_pointer_condtemp) 5029 expand_omp_build_assign (&gsi2, condtemp, memv, false); 5030 if (fd->have_nonctrl_scantemp) 5031 { 5032 tree ptr = fd->have_pointer_condtemp ? condtemp : memv; 5033 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, 5034 alloc_align, nthreads, &gsi2, false); 5035 } 5036 if (fd->have_reductemp) 5037 { 5038 gsi_remove (&gsi2, true); 5039 release_ssa_name (gimple_assign_lhs (g)); 5040 } 5041 } 5042 switch (gimple_omp_for_kind (fd->for_stmt)) 5043 { 5044 case GF_OMP_FOR_KIND_FOR: 5045 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 5046 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 5047 break; 5048 case GF_OMP_FOR_KIND_DISTRIBUTE: 5049 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 5050 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 5051 break; 5052 default: 5053 gcc_unreachable (); 5054 } 5055 nthreads = build_call_expr (nthreads, 0); 5056 nthreads = fold_convert (itype, nthreads); 5057 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 5058 true, GSI_SAME_STMT); 5059 threadid = build_call_expr (threadid, 0); 5060 threadid = fold_convert (itype, threadid); 5061 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 5062 true, GSI_SAME_STMT); 5063 5064 n1 = fd->loop.n1; 5065 n2 = fd->loop.n2; 5066 step = fd->loop.step; 5067 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5068 { 5069 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5070 OMP_CLAUSE__LOOPTEMP_); 5071 gcc_assert (innerc); 5072 n1 = OMP_CLAUSE_DECL (innerc); 5073 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5074 OMP_CLAUSE__LOOPTEMP_); 5075 gcc_assert (innerc); 5076 n2 = OMP_CLAUSE_DECL (innerc); 5077 } 5078 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5079 true, NULL_TREE, true, GSI_SAME_STMT); 5080 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5081 true, NULL_TREE, true, GSI_SAME_STMT); 5082 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5083 true, NULL_TREE, true, GSI_SAME_STMT); 5084 5085 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 5086 t = fold_build2 (PLUS_EXPR, itype, step, t); 5087 t = fold_build2 (PLUS_EXPR, itype, t, n2); 5088 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 5089 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 5090 t = fold_build2 (TRUNC_DIV_EXPR, itype, 5091 fold_build1 (NEGATE_EXPR, itype, t), 5092 fold_build1 (NEGATE_EXPR, itype, step)); 5093 else 5094 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 5095 t = fold_convert (itype, t); 5096 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 5097 5098 q = create_tmp_reg (itype, "q"); 5099 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 5100 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 5101 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 5102 5103 tt = create_tmp_reg (itype, "tt"); 5104 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 5105 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 5106 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 5107 5108 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 5109 gcond *cond_stmt = gimple_build_cond_empty (t); 5110 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 5111 5112 second_bb = split_block (entry_bb, cond_stmt)->dest; 5113 gsi = gsi_last_nondebug_bb (second_bb); 5114 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5115 5116 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 5117 GSI_SAME_STMT); 5118 gassign *assign_stmt 5119 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 5120 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5121 5122 third_bb = split_block (second_bb, assign_stmt)->dest; 5123 gsi = gsi_last_nondebug_bb (third_bb); 5124 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5125 5126 if (fd->have_nonctrl_scantemp) 5127 { 5128 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 5129 tree controlp = NULL_TREE, controlb = NULL_TREE; 5130 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 5131 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 5132 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 5133 { 5134 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 5135 controlb = OMP_CLAUSE_DECL (c); 5136 else 5137 controlp = OMP_CLAUSE_DECL (c); 5138 if (controlb && controlp) 5139 break; 5140 } 5141 gcc_assert (controlp && controlb); 5142 tree cnt = create_tmp_var (sizetype); 5143 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); 5144 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5145 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); 5146 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, 5147 alloc_align, cnt, NULL, true); 5148 tree size = create_tmp_var (sizetype); 5149 expand_omp_build_assign (&gsi, size, sz, false); 5150 tree cmp = fold_build2 (GT_EXPR, boolean_type_node, 5151 size, size_int (16384)); 5152 expand_omp_build_assign (&gsi, controlb, cmp); 5153 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 5154 NULL_TREE, NULL_TREE); 5155 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5156 fourth_bb = split_block (third_bb, g)->dest; 5157 gsi = gsi_last_nondebug_bb (fourth_bb); 5158 /* FIXME: Once we have allocators, this should use allocator. */ 5159 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); 5160 gimple_call_set_lhs (g, controlp); 5161 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5162 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, 5163 &gsi, true); 5164 gsi_prev (&gsi); 5165 g = gsi_stmt (gsi); 5166 fifth_bb = split_block (fourth_bb, g)->dest; 5167 gsi = gsi_last_nondebug_bb (fifth_bb); 5168 5169 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); 5170 gimple_call_set_lhs (g, controlp); 5171 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5172 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); 5173 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 5174 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 5175 && OMP_CLAUSE__SCANTEMP__ALLOC (c)) 5176 { 5177 tree tmp = create_tmp_var (sizetype); 5178 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 5179 g = gimple_build_assign (tmp, MULT_EXPR, cnt, 5180 TYPE_SIZE_UNIT (pointee_type)); 5181 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5182 g = gimple_build_call (alloca_decl, 2, tmp, 5183 size_int (TYPE_ALIGN (pointee_type))); 5184 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); 5185 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5186 } 5187 5188 sixth_bb = split_block (fifth_bb, g)->dest; 5189 gsi = gsi_last_nondebug_bb (sixth_bb); 5190 } 5191 5192 t = build2 (MULT_EXPR, itype, q, threadid); 5193 t = build2 (PLUS_EXPR, itype, t, tt); 5194 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 5195 5196 t = fold_build2 (PLUS_EXPR, itype, s0, q); 5197 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 5198 5199 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 5200 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5201 5202 /* Remove the GIMPLE_OMP_FOR statement. */ 5203 gsi_remove (&gsi, true); 5204 5205 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 5206 gsi = gsi_start_bb (seq_start_bb); 5207 5208 tree startvar = fd->loop.v; 5209 tree endvar = NULL_TREE; 5210 5211 if (gimple_omp_for_combined_p (fd->for_stmt)) 5212 { 5213 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 5214 ? gimple_omp_parallel_clauses (inner_stmt) 5215 : gimple_omp_for_clauses (inner_stmt); 5216 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5217 gcc_assert (innerc); 5218 startvar = OMP_CLAUSE_DECL (innerc); 5219 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5220 OMP_CLAUSE__LOOPTEMP_); 5221 gcc_assert (innerc); 5222 endvar = OMP_CLAUSE_DECL (innerc); 5223 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 5224 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 5225 { 5226 innerc = find_lastprivate_looptemp (fd, innerc); 5227 if (innerc) 5228 { 5229 /* If needed (distribute parallel for with lastprivate), 5230 propagate down the total number of iterations. */ 5231 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 5232 fd->loop.n2); 5233 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 5234 GSI_CONTINUE_LINKING); 5235 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5236 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5237 } 5238 } 5239 } 5240 t = fold_convert (itype, s0); 5241 t = fold_build2 (MULT_EXPR, itype, t, step); 5242 if (POINTER_TYPE_P (type)) 5243 { 5244 t = fold_build_pointer_plus (n1, t); 5245 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 5246 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 5247 t = fold_convert (signed_type_for (type), t); 5248 } 5249 else 5250 t = fold_build2 (PLUS_EXPR, type, t, n1); 5251 t = fold_convert (TREE_TYPE (startvar), t); 5252 t = force_gimple_operand_gsi (&gsi, t, 5253 DECL_P (startvar) 5254 && TREE_ADDRESSABLE (startvar), 5255 NULL_TREE, false, GSI_CONTINUE_LINKING); 5256 assign_stmt = gimple_build_assign (startvar, t); 5257 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5258 if (cond_var) 5259 { 5260 tree itype = TREE_TYPE (cond_var); 5261 /* For lastprivate(conditional:) itervar, we need some iteration 5262 counter that starts at unsigned non-zero and increases. 5263 Prefer as few IVs as possible, so if we can use startvar 5264 itself, use that, or startvar + constant (those would be 5265 incremented with step), and as last resort use the s0 + 1 5266 incremented by 1. */ 5267 if (POINTER_TYPE_P (type) 5268 || TREE_CODE (n1) != INTEGER_CST 5269 || fd->loop.cond_code != LT_EXPR) 5270 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 5271 build_int_cst (itype, 1)); 5272 else if (tree_int_cst_sgn (n1) == 1) 5273 t = fold_convert (itype, t); 5274 else 5275 { 5276 tree c = fold_convert (itype, n1); 5277 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 5278 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 5279 } 5280 t = force_gimple_operand_gsi (&gsi, t, false, 5281 NULL_TREE, false, GSI_CONTINUE_LINKING); 5282 assign_stmt = gimple_build_assign (cond_var, t); 5283 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5284 } 5285 5286 t = fold_convert (itype, e0); 5287 t = fold_build2 (MULT_EXPR, itype, t, step); 5288 if (POINTER_TYPE_P (type)) 5289 { 5290 t = fold_build_pointer_plus (n1, t); 5291 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 5292 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 5293 t = fold_convert (signed_type_for (type), t); 5294 } 5295 else 5296 t = fold_build2 (PLUS_EXPR, type, t, n1); 5297 t = fold_convert (TREE_TYPE (startvar), t); 5298 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5299 false, GSI_CONTINUE_LINKING); 5300 if (endvar) 5301 { 5302 assign_stmt = gimple_build_assign (endvar, e); 5303 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5304 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5305 assign_stmt = gimple_build_assign (fd->loop.v, e); 5306 else 5307 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5308 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5309 } 5310 /* Handle linear clause adjustments. */ 5311 tree itercnt = NULL_TREE; 5312 tree *nonrect_bounds = NULL; 5313 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 5314 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 5315 c; c = OMP_CLAUSE_CHAIN (c)) 5316 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 5317 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 5318 { 5319 tree d = OMP_CLAUSE_DECL (c); 5320 tree t = d, a, dest; 5321 if (omp_privatize_by_reference (t)) 5322 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 5323 if (itercnt == NULL_TREE) 5324 { 5325 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5326 { 5327 itercnt = fold_build2 (MINUS_EXPR, itype, 5328 fold_convert (itype, n1), 5329 fold_convert (itype, fd->loop.n1)); 5330 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 5331 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 5332 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 5333 NULL_TREE, false, 5334 GSI_CONTINUE_LINKING); 5335 } 5336 else 5337 itercnt = s0; 5338 } 5339 tree type = TREE_TYPE (t); 5340 if (POINTER_TYPE_P (type)) 5341 type = sizetype; 5342 a = fold_build2 (MULT_EXPR, type, 5343 fold_convert (type, itercnt), 5344 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 5345 dest = unshare_expr (t); 5346 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 5347 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 5348 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5349 false, GSI_CONTINUE_LINKING); 5350 expand_omp_build_assign (&gsi, dest, t, true); 5351 } 5352 if (fd->collapse > 1) 5353 { 5354 if (fd->non_rect) 5355 { 5356 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1); 5357 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1)); 5358 } 5359 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt, 5360 startvar); 5361 } 5362 5363 if (!broken_loop) 5364 { 5365 /* The code controlling the sequential loop replaces the 5366 GIMPLE_OMP_CONTINUE. */ 5367 gsi = gsi_last_nondebug_bb (cont_bb); 5368 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5369 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5370 vmain = gimple_omp_continue_control_use (cont_stmt); 5371 vback = gimple_omp_continue_control_def (cont_stmt); 5372 5373 if (cond_var) 5374 { 5375 tree itype = TREE_TYPE (cond_var); 5376 tree t2; 5377 if (POINTER_TYPE_P (type) 5378 || TREE_CODE (n1) != INTEGER_CST 5379 || fd->loop.cond_code != LT_EXPR) 5380 t2 = build_int_cst (itype, 1); 5381 else 5382 t2 = fold_convert (itype, step); 5383 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 5384 t2 = force_gimple_operand_gsi (&gsi, t2, false, 5385 NULL_TREE, true, GSI_SAME_STMT); 5386 assign_stmt = gimple_build_assign (cond_var, t2); 5387 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5388 } 5389 5390 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5391 { 5392 if (POINTER_TYPE_P (type)) 5393 t = fold_build_pointer_plus (vmain, step); 5394 else 5395 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5396 t = force_gimple_operand_gsi (&gsi, t, 5397 DECL_P (vback) 5398 && TREE_ADDRESSABLE (vback), 5399 NULL_TREE, true, GSI_SAME_STMT); 5400 assign_stmt = gimple_build_assign (vback, t); 5401 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5402 5403 t = build2 (fd->loop.cond_code, boolean_type_node, 5404 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5405 ? t : vback, e); 5406 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5407 } 5408 5409 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5410 gsi_remove (&gsi, true); 5411 5412 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5413 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds, 5414 cont_bb, body_bb); 5415 } 5416 5417 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 5418 gsi = gsi_last_nondebug_bb (exit_bb); 5419 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 5420 { 5421 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 5422 if (fd->have_reductemp 5423 || ((fd->have_pointer_condtemp || fd->have_scantemp) 5424 && !fd->have_nonctrl_scantemp)) 5425 { 5426 tree fn; 5427 if (t) 5428 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 5429 else 5430 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 5431 gcall *g = gimple_build_call (fn, 0); 5432 if (t) 5433 { 5434 gimple_call_set_lhs (g, t); 5435 if (fd->have_reductemp) 5436 gsi_insert_after (&gsi, gimple_build_assign (reductions, 5437 NOP_EXPR, t), 5438 GSI_SAME_STMT); 5439 } 5440 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 5441 } 5442 else 5443 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 5444 } 5445 else if ((fd->have_pointer_condtemp || fd->have_scantemp) 5446 && !fd->have_nonctrl_scantemp) 5447 { 5448 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 5449 gcall *g = gimple_build_call (fn, 0); 5450 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 5451 } 5452 if (fd->have_scantemp && !fd->have_nonctrl_scantemp) 5453 { 5454 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 5455 tree controlp = NULL_TREE, controlb = NULL_TREE; 5456 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 5457 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 5458 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 5459 { 5460 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 5461 controlb = OMP_CLAUSE_DECL (c); 5462 else 5463 controlp = OMP_CLAUSE_DECL (c); 5464 if (controlb && controlp) 5465 break; 5466 } 5467 gcc_assert (controlp && controlb); 5468 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 5469 NULL_TREE, NULL_TREE); 5470 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5471 exit1_bb = split_block (exit_bb, g)->dest; 5472 gsi = gsi_after_labels (exit1_bb); 5473 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, 5474 controlp); 5475 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5476 exit2_bb = split_block (exit1_bb, g)->dest; 5477 gsi = gsi_after_labels (exit2_bb); 5478 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, 5479 controlp); 5480 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5481 exit3_bb = split_block (exit2_bb, g)->dest; 5482 gsi = gsi_after_labels (exit3_bb); 5483 } 5484 gsi_remove (&gsi, true); 5485 5486 /* Connect all the blocks. */ 5487 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 5488 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); 5489 ep = find_edge (entry_bb, second_bb); 5490 ep->flags = EDGE_TRUE_VALUE; 5491 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); 5492 if (fourth_bb) 5493 { 5494 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); 5495 ep->probability 5496 = profile_probability::guessed_always ().apply_scale (1, 2); 5497 ep = find_edge (third_bb, fourth_bb); 5498 ep->flags = EDGE_TRUE_VALUE; 5499 ep->probability 5500 = profile_probability::guessed_always ().apply_scale (1, 2); 5501 ep = find_edge (fourth_bb, fifth_bb); 5502 redirect_edge_and_branch (ep, sixth_bb); 5503 } 5504 else 5505 sixth_bb = third_bb; 5506 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 5507 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 5508 if (exit1_bb) 5509 { 5510 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); 5511 ep->probability 5512 = profile_probability::guessed_always ().apply_scale (1, 2); 5513 ep = find_edge (exit_bb, exit1_bb); 5514 ep->flags = EDGE_TRUE_VALUE; 5515 ep->probability 5516 = profile_probability::guessed_always ().apply_scale (1, 2); 5517 ep = find_edge (exit1_bb, exit2_bb); 5518 redirect_edge_and_branch (ep, exit3_bb); 5519 } 5520 5521 if (!broken_loop) 5522 { 5523 ep = find_edge (cont_bb, body_bb); 5524 if (ep == NULL) 5525 { 5526 ep = BRANCH_EDGE (cont_bb); 5527 gcc_assert (single_succ (ep->dest) == body_bb); 5528 } 5529 if (gimple_omp_for_combined_p (fd->for_stmt)) 5530 { 5531 remove_edge (ep); 5532 ep = NULL; 5533 } 5534 else if (fd->collapse > 1) 5535 { 5536 remove_edge (ep); 5537 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5538 } 5539 else 5540 ep->flags = EDGE_TRUE_VALUE; 5541 find_edge (cont_bb, fin_bb)->flags 5542 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5543 } 5544 5545 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 5546 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 5547 if (fourth_bb) 5548 { 5549 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); 5550 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); 5551 } 5552 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); 5553 5554 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5555 recompute_dominator (CDI_DOMINATORS, body_bb)); 5556 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5557 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5558 if (exit1_bb) 5559 { 5560 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); 5561 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); 5562 } 5563 5564 class loop *loop = body_bb->loop_father; 5565 if (loop != entry_bb->loop_father) 5566 { 5567 gcc_assert (broken_loop || loop->header == body_bb); 5568 gcc_assert (broken_loop 5569 || loop->latch == region->cont 5570 || single_pred (loop->latch) == region->cont); 5571 return; 5572 } 5573 5574 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5575 { 5576 loop = alloc_loop (); 5577 loop->header = body_bb; 5578 if (collapse_bb == NULL) 5579 loop->latch = cont_bb; 5580 add_loop (loop, body_bb->loop_father); 5581 } 5582} 5583 5584/* Return phi in E->DEST with ARG on edge E. */ 5585 5586static gphi * 5587find_phi_with_arg_on_edge (tree arg, edge e) 5588{ 5589 basic_block bb = e->dest; 5590 5591 for (gphi_iterator gpi = gsi_start_phis (bb); 5592 !gsi_end_p (gpi); 5593 gsi_next (&gpi)) 5594 { 5595 gphi *phi = gpi.phi (); 5596 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 5597 return phi; 5598 } 5599 5600 return NULL; 5601} 5602 5603/* A subroutine of expand_omp_for. Generate code for a parallel 5604 loop with static schedule and a specified chunk size. Given 5605 parameters: 5606 5607 for (V = N1; V cond N2; V += STEP) BODY; 5608 5609 where COND is "<" or ">", we generate pseudocode 5610 5611 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 5612 if (cond is <) 5613 adj = STEP - 1; 5614 else 5615 adj = STEP + 1; 5616 if ((__typeof (V)) -1 > 0 && cond is >) 5617 n = -(adj + N2 - N1) / -STEP; 5618 else 5619 n = (adj + N2 - N1) / STEP; 5620 trip = 0; 5621 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 5622 here so that V is defined 5623 if the loop is not entered 5624 L0: 5625 s0 = (trip * nthreads + threadid) * CHUNK; 5626 e0 = min (s0 + CHUNK, n); 5627 if (s0 < n) goto L1; else goto L4; 5628 L1: 5629 V = s0 * STEP + N1; 5630 e = e0 * STEP + N1; 5631 L2: 5632 BODY; 5633 V += STEP; 5634 if (V cond e) goto L2; else goto L3; 5635 L3: 5636 trip += 1; 5637 goto L0; 5638 L4: 5639*/ 5640 5641static void 5642expand_omp_for_static_chunk (struct omp_region *region, 5643 struct omp_for_data *fd, gimple *inner_stmt) 5644{ 5645 tree n, s0, e0, e, t; 5646 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 5647 tree type, itype, vmain, vback, vextra; 5648 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 5649 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 5650 gimple_stmt_iterator gsi, gsip; 5651 edge se; 5652 bool broken_loop = region->cont == NULL; 5653 tree *counts = NULL; 5654 tree n1, n2, step; 5655 tree reductions = NULL_TREE; 5656 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 5657 5658 itype = type = TREE_TYPE (fd->loop.v); 5659 if (POINTER_TYPE_P (type)) 5660 itype = signed_type_for (type); 5661 5662 entry_bb = region->entry; 5663 se = split_block (entry_bb, last_stmt (entry_bb)); 5664 entry_bb = se->src; 5665 iter_part_bb = se->dest; 5666 cont_bb = region->cont; 5667 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 5668 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 5669 gcc_assert (broken_loop 5670 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 5671 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 5672 body_bb = single_succ (seq_start_bb); 5673 if (!broken_loop) 5674 { 5675 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 5676 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 5677 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5678 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 5679 } 5680 exit_bb = region->exit; 5681 5682 /* Trip and adjustment setup goes in ENTRY_BB. */ 5683 gsi = gsi_last_nondebug_bb (entry_bb); 5684 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5685 gsip = gsi; 5686 gsi_prev (&gsip); 5687 5688 if (fd->collapse > 1) 5689 { 5690 int first_zero_iter = -1, dummy = -1; 5691 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5692 5693 counts = XALLOCAVEC (tree, fd->collapse); 5694 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5695 fin_bb, first_zero_iter, 5696 dummy_bb, dummy, l2_dom_bb); 5697 t = NULL_TREE; 5698 } 5699 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5700 t = integer_one_node; 5701 else 5702 t = fold_binary (fd->loop.cond_code, boolean_type_node, 5703 fold_convert (type, fd->loop.n1), 5704 fold_convert (type, fd->loop.n2)); 5705 if (fd->collapse == 1 5706 && TYPE_UNSIGNED (type) 5707 && (t == NULL_TREE || !integer_onep (t))) 5708 { 5709 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 5710 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 5711 true, GSI_SAME_STMT); 5712 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 5713 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 5714 true, GSI_SAME_STMT); 5715 gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code, 5716 n1, n2); 5717 se = split_block (entry_bb, cond_stmt); 5718 se->flags = EDGE_TRUE_VALUE; 5719 entry_bb = se->dest; 5720 se->probability = profile_probability::very_likely (); 5721 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 5722 se->probability = profile_probability::very_unlikely (); 5723 if (gimple_in_ssa_p (cfun)) 5724 { 5725 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 5726 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 5727 !gsi_end_p (gpi); gsi_next (&gpi)) 5728 { 5729 gphi *phi = gpi.phi (); 5730 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 5731 se, UNKNOWN_LOCATION); 5732 } 5733 } 5734 gsi = gsi_last_bb (entry_bb); 5735 } 5736 5737 if (fd->lastprivate_conditional) 5738 { 5739 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 5740 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 5741 if (fd->have_pointer_condtemp) 5742 condtemp = OMP_CLAUSE_DECL (c); 5743 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 5744 cond_var = OMP_CLAUSE_DECL (c); 5745 } 5746 if (fd->have_reductemp || fd->have_pointer_condtemp) 5747 { 5748 tree t1 = build_int_cst (long_integer_type_node, 0); 5749 tree t2 = build_int_cst (long_integer_type_node, 1); 5750 tree t3 = build_int_cstu (long_integer_type_node, 5751 (HOST_WIDE_INT_1U << 31) + 1); 5752 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 5753 gimple_stmt_iterator gsi2 = gsi_none (); 5754 gimple *g = NULL; 5755 tree mem = null_pointer_node, memv = NULL_TREE; 5756 if (fd->have_reductemp) 5757 { 5758 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 5759 reductions = OMP_CLAUSE_DECL (c); 5760 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 5761 g = SSA_NAME_DEF_STMT (reductions); 5762 reductions = gimple_assign_rhs1 (g); 5763 OMP_CLAUSE_DECL (c) = reductions; 5764 gsi2 = gsi_for_stmt (g); 5765 } 5766 else 5767 { 5768 if (gsi_end_p (gsip)) 5769 gsi2 = gsi_after_labels (region->entry); 5770 else 5771 gsi2 = gsip; 5772 reductions = null_pointer_node; 5773 } 5774 if (fd->have_pointer_condtemp) 5775 { 5776 tree type = TREE_TYPE (condtemp); 5777 memv = create_tmp_var (type); 5778 TREE_ADDRESSABLE (memv) = 1; 5779 unsigned HOST_WIDE_INT sz 5780 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 5781 sz *= fd->lastprivate_conditional; 5782 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), 5783 false); 5784 mem = build_fold_addr_expr (memv); 5785 } 5786 tree t 5787 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 5788 9, t1, t2, t2, t3, t1, null_pointer_node, 5789 null_pointer_node, reductions, mem); 5790 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 5791 true, GSI_SAME_STMT); 5792 if (fd->have_pointer_condtemp) 5793 expand_omp_build_assign (&gsi2, condtemp, memv, false); 5794 if (fd->have_reductemp) 5795 { 5796 gsi_remove (&gsi2, true); 5797 release_ssa_name (gimple_assign_lhs (g)); 5798 } 5799 } 5800 switch (gimple_omp_for_kind (fd->for_stmt)) 5801 { 5802 case GF_OMP_FOR_KIND_FOR: 5803 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 5804 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 5805 break; 5806 case GF_OMP_FOR_KIND_DISTRIBUTE: 5807 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 5808 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 5809 break; 5810 default: 5811 gcc_unreachable (); 5812 } 5813 nthreads = build_call_expr (nthreads, 0); 5814 nthreads = fold_convert (itype, nthreads); 5815 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 5816 true, GSI_SAME_STMT); 5817 threadid = build_call_expr (threadid, 0); 5818 threadid = fold_convert (itype, threadid); 5819 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 5820 true, GSI_SAME_STMT); 5821 5822 n1 = fd->loop.n1; 5823 n2 = fd->loop.n2; 5824 step = fd->loop.step; 5825 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5826 { 5827 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5828 OMP_CLAUSE__LOOPTEMP_); 5829 gcc_assert (innerc); 5830 n1 = OMP_CLAUSE_DECL (innerc); 5831 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5832 OMP_CLAUSE__LOOPTEMP_); 5833 gcc_assert (innerc); 5834 n2 = OMP_CLAUSE_DECL (innerc); 5835 } 5836 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5837 true, NULL_TREE, true, GSI_SAME_STMT); 5838 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5839 true, NULL_TREE, true, GSI_SAME_STMT); 5840 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5841 true, NULL_TREE, true, GSI_SAME_STMT); 5842 tree chunk_size = fold_convert (itype, fd->chunk_size); 5843 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 5844 chunk_size 5845 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 5846 GSI_SAME_STMT); 5847 5848 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 5849 t = fold_build2 (PLUS_EXPR, itype, step, t); 5850 t = fold_build2 (PLUS_EXPR, itype, t, n2); 5851 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 5852 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 5853 t = fold_build2 (TRUNC_DIV_EXPR, itype, 5854 fold_build1 (NEGATE_EXPR, itype, t), 5855 fold_build1 (NEGATE_EXPR, itype, step)); 5856 else 5857 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 5858 t = fold_convert (itype, t); 5859 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5860 true, GSI_SAME_STMT); 5861 5862 trip_var = create_tmp_reg (itype, ".trip"); 5863 if (gimple_in_ssa_p (cfun)) 5864 { 5865 trip_init = make_ssa_name (trip_var); 5866 trip_main = make_ssa_name (trip_var); 5867 trip_back = make_ssa_name (trip_var); 5868 } 5869 else 5870 { 5871 trip_init = trip_var; 5872 trip_main = trip_var; 5873 trip_back = trip_var; 5874 } 5875 5876 gassign *assign_stmt 5877 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 5878 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5879 5880 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 5881 t = fold_build2 (MULT_EXPR, itype, t, step); 5882 if (POINTER_TYPE_P (type)) 5883 t = fold_build_pointer_plus (n1, t); 5884 else 5885 t = fold_build2 (PLUS_EXPR, type, t, n1); 5886 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5887 true, GSI_SAME_STMT); 5888 5889 /* Remove the GIMPLE_OMP_FOR. */ 5890 gsi_remove (&gsi, true); 5891 5892 gimple_stmt_iterator gsif = gsi; 5893 5894 /* Iteration space partitioning goes in ITER_PART_BB. */ 5895 gsi = gsi_last_bb (iter_part_bb); 5896 5897 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 5898 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 5899 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 5900 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5901 false, GSI_CONTINUE_LINKING); 5902 5903 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 5904 t = fold_build2 (MIN_EXPR, itype, t, n); 5905 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5906 false, GSI_CONTINUE_LINKING); 5907 5908 t = build2 (LT_EXPR, boolean_type_node, s0, n); 5909 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 5910 5911 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 5912 gsi = gsi_start_bb (seq_start_bb); 5913 5914 tree startvar = fd->loop.v; 5915 tree endvar = NULL_TREE; 5916 5917 if (gimple_omp_for_combined_p (fd->for_stmt)) 5918 { 5919 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 5920 ? gimple_omp_parallel_clauses (inner_stmt) 5921 : gimple_omp_for_clauses (inner_stmt); 5922 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5923 gcc_assert (innerc); 5924 startvar = OMP_CLAUSE_DECL (innerc); 5925 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5926 OMP_CLAUSE__LOOPTEMP_); 5927 gcc_assert (innerc); 5928 endvar = OMP_CLAUSE_DECL (innerc); 5929 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 5930 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 5931 { 5932 innerc = find_lastprivate_looptemp (fd, innerc); 5933 if (innerc) 5934 { 5935 /* If needed (distribute parallel for with lastprivate), 5936 propagate down the total number of iterations. */ 5937 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 5938 fd->loop.n2); 5939 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 5940 GSI_CONTINUE_LINKING); 5941 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5942 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5943 } 5944 } 5945 } 5946 5947 t = fold_convert (itype, s0); 5948 t = fold_build2 (MULT_EXPR, itype, t, step); 5949 if (POINTER_TYPE_P (type)) 5950 { 5951 t = fold_build_pointer_plus (n1, t); 5952 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 5953 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 5954 t = fold_convert (signed_type_for (type), t); 5955 } 5956 else 5957 t = fold_build2 (PLUS_EXPR, type, t, n1); 5958 t = fold_convert (TREE_TYPE (startvar), t); 5959 t = force_gimple_operand_gsi (&gsi, t, 5960 DECL_P (startvar) 5961 && TREE_ADDRESSABLE (startvar), 5962 NULL_TREE, false, GSI_CONTINUE_LINKING); 5963 assign_stmt = gimple_build_assign (startvar, t); 5964 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5965 if (cond_var) 5966 { 5967 tree itype = TREE_TYPE (cond_var); 5968 /* For lastprivate(conditional:) itervar, we need some iteration 5969 counter that starts at unsigned non-zero and increases. 5970 Prefer as few IVs as possible, so if we can use startvar 5971 itself, use that, or startvar + constant (those would be 5972 incremented with step), and as last resort use the s0 + 1 5973 incremented by 1. */ 5974 if (POINTER_TYPE_P (type) 5975 || TREE_CODE (n1) != INTEGER_CST 5976 || fd->loop.cond_code != LT_EXPR) 5977 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 5978 build_int_cst (itype, 1)); 5979 else if (tree_int_cst_sgn (n1) == 1) 5980 t = fold_convert (itype, t); 5981 else 5982 { 5983 tree c = fold_convert (itype, n1); 5984 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 5985 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 5986 } 5987 t = force_gimple_operand_gsi (&gsi, t, false, 5988 NULL_TREE, false, GSI_CONTINUE_LINKING); 5989 assign_stmt = gimple_build_assign (cond_var, t); 5990 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5991 } 5992 5993 t = fold_convert (itype, e0); 5994 t = fold_build2 (MULT_EXPR, itype, t, step); 5995 if (POINTER_TYPE_P (type)) 5996 { 5997 t = fold_build_pointer_plus (n1, t); 5998 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 5999 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 6000 t = fold_convert (signed_type_for (type), t); 6001 } 6002 else 6003 t = fold_build2 (PLUS_EXPR, type, t, n1); 6004 t = fold_convert (TREE_TYPE (startvar), t); 6005 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6006 false, GSI_CONTINUE_LINKING); 6007 if (endvar) 6008 { 6009 assign_stmt = gimple_build_assign (endvar, e); 6010 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 6011 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 6012 assign_stmt = gimple_build_assign (fd->loop.v, e); 6013 else 6014 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 6015 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 6016 } 6017 /* Handle linear clause adjustments. */ 6018 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 6019 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 6020 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 6021 c; c = OMP_CLAUSE_CHAIN (c)) 6022 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 6023 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 6024 { 6025 tree d = OMP_CLAUSE_DECL (c); 6026 tree t = d, a, dest; 6027 if (omp_privatize_by_reference (t)) 6028 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 6029 tree type = TREE_TYPE (t); 6030 if (POINTER_TYPE_P (type)) 6031 type = sizetype; 6032 dest = unshare_expr (t); 6033 tree v = create_tmp_var (TREE_TYPE (t), NULL); 6034 expand_omp_build_assign (&gsif, v, t); 6035 if (itercnt == NULL_TREE) 6036 { 6037 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 6038 { 6039 itercntbias 6040 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 6041 fold_convert (itype, fd->loop.n1)); 6042 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 6043 itercntbias, step); 6044 itercntbias 6045 = force_gimple_operand_gsi (&gsif, itercntbias, true, 6046 NULL_TREE, true, 6047 GSI_SAME_STMT); 6048 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 6049 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 6050 NULL_TREE, false, 6051 GSI_CONTINUE_LINKING); 6052 } 6053 else 6054 itercnt = s0; 6055 } 6056 a = fold_build2 (MULT_EXPR, type, 6057 fold_convert (type, itercnt), 6058 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 6059 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 6060 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 6061 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6062 false, GSI_CONTINUE_LINKING); 6063 expand_omp_build_assign (&gsi, dest, t, true); 6064 } 6065 if (fd->collapse > 1) 6066 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); 6067 6068 if (!broken_loop) 6069 { 6070 /* The code controlling the sequential loop goes in CONT_BB, 6071 replacing the GIMPLE_OMP_CONTINUE. */ 6072 gsi = gsi_last_nondebug_bb (cont_bb); 6073 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 6074 vmain = gimple_omp_continue_control_use (cont_stmt); 6075 vback = gimple_omp_continue_control_def (cont_stmt); 6076 6077 if (cond_var) 6078 { 6079 tree itype = TREE_TYPE (cond_var); 6080 tree t2; 6081 if (POINTER_TYPE_P (type) 6082 || TREE_CODE (n1) != INTEGER_CST 6083 || fd->loop.cond_code != LT_EXPR) 6084 t2 = build_int_cst (itype, 1); 6085 else 6086 t2 = fold_convert (itype, step); 6087 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 6088 t2 = force_gimple_operand_gsi (&gsi, t2, false, 6089 NULL_TREE, true, GSI_SAME_STMT); 6090 assign_stmt = gimple_build_assign (cond_var, t2); 6091 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 6092 } 6093 6094 if (!gimple_omp_for_combined_p (fd->for_stmt)) 6095 { 6096 if (POINTER_TYPE_P (type)) 6097 t = fold_build_pointer_plus (vmain, step); 6098 else 6099 t = fold_build2 (PLUS_EXPR, type, vmain, step); 6100 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 6101 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6102 true, GSI_SAME_STMT); 6103 assign_stmt = gimple_build_assign (vback, t); 6104 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 6105 6106 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 6107 t = build2 (EQ_EXPR, boolean_type_node, 6108 build_int_cst (itype, 0), 6109 build_int_cst (itype, 1)); 6110 else 6111 t = build2 (fd->loop.cond_code, boolean_type_node, 6112 DECL_P (vback) && TREE_ADDRESSABLE (vback) 6113 ? t : vback, e); 6114 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 6115 } 6116 6117 /* Remove GIMPLE_OMP_CONTINUE. */ 6118 gsi_remove (&gsi, true); 6119 6120 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 6121 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb); 6122 6123 /* Trip update code goes into TRIP_UPDATE_BB. */ 6124 gsi = gsi_start_bb (trip_update_bb); 6125 6126 t = build_int_cst (itype, 1); 6127 t = build2 (PLUS_EXPR, itype, trip_main, t); 6128 assign_stmt = gimple_build_assign (trip_back, t); 6129 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 6130 } 6131 6132 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 6133 gsi = gsi_last_nondebug_bb (exit_bb); 6134 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 6135 { 6136 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 6137 if (fd->have_reductemp || fd->have_pointer_condtemp) 6138 { 6139 tree fn; 6140 if (t) 6141 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 6142 else 6143 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 6144 gcall *g = gimple_build_call (fn, 0); 6145 if (t) 6146 { 6147 gimple_call_set_lhs (g, t); 6148 if (fd->have_reductemp) 6149 gsi_insert_after (&gsi, gimple_build_assign (reductions, 6150 NOP_EXPR, t), 6151 GSI_SAME_STMT); 6152 } 6153 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 6154 } 6155 else 6156 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 6157 } 6158 else if (fd->have_pointer_condtemp) 6159 { 6160 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 6161 gcall *g = gimple_build_call (fn, 0); 6162 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 6163 } 6164 gsi_remove (&gsi, true); 6165 6166 /* Connect the new blocks. */ 6167 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 6168 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 6169 6170 if (!broken_loop) 6171 { 6172 se = find_edge (cont_bb, body_bb); 6173 if (se == NULL) 6174 { 6175 se = BRANCH_EDGE (cont_bb); 6176 gcc_assert (single_succ (se->dest) == body_bb); 6177 } 6178 if (gimple_omp_for_combined_p (fd->for_stmt)) 6179 { 6180 remove_edge (se); 6181 se = NULL; 6182 } 6183 else if (fd->collapse > 1) 6184 { 6185 remove_edge (se); 6186 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 6187 } 6188 else 6189 se->flags = EDGE_TRUE_VALUE; 6190 find_edge (cont_bb, trip_update_bb)->flags 6191 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 6192 6193 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 6194 iter_part_bb); 6195 } 6196 6197 if (gimple_in_ssa_p (cfun)) 6198 { 6199 gphi_iterator psi; 6200 gphi *phi; 6201 edge re, ene; 6202 edge_var_map *vm; 6203 size_t i; 6204 6205 gcc_assert (fd->collapse == 1 && !broken_loop); 6206 6207 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 6208 remove arguments of the phi nodes in fin_bb. We need to create 6209 appropriate phi nodes in iter_part_bb instead. */ 6210 se = find_edge (iter_part_bb, fin_bb); 6211 re = single_succ_edge (trip_update_bb); 6212 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 6213 ene = single_succ_edge (entry_bb); 6214 6215 psi = gsi_start_phis (fin_bb); 6216 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 6217 gsi_next (&psi), ++i) 6218 { 6219 gphi *nphi; 6220 location_t locus; 6221 6222 phi = psi.phi (); 6223 if (operand_equal_p (gimple_phi_arg_def (phi, 0), 6224 redirect_edge_var_map_def (vm), 0)) 6225 continue; 6226 6227 t = gimple_phi_result (phi); 6228 gcc_assert (t == redirect_edge_var_map_result (vm)); 6229 6230 if (!single_pred_p (fin_bb)) 6231 t = copy_ssa_name (t, phi); 6232 6233 nphi = create_phi_node (t, iter_part_bb); 6234 6235 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 6236 locus = gimple_phi_arg_location_from_edge (phi, se); 6237 6238 /* A special case -- fd->loop.v is not yet computed in 6239 iter_part_bb, we need to use vextra instead. */ 6240 if (t == fd->loop.v) 6241 t = vextra; 6242 add_phi_arg (nphi, t, ene, locus); 6243 locus = redirect_edge_var_map_location (vm); 6244 tree back_arg = redirect_edge_var_map_def (vm); 6245 add_phi_arg (nphi, back_arg, re, locus); 6246 edge ce = find_edge (cont_bb, body_bb); 6247 if (ce == NULL) 6248 { 6249 ce = BRANCH_EDGE (cont_bb); 6250 gcc_assert (single_succ (ce->dest) == body_bb); 6251 ce = single_succ_edge (ce->dest); 6252 } 6253 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 6254 gcc_assert (inner_loop_phi != NULL); 6255 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 6256 find_edge (seq_start_bb, body_bb), locus); 6257 6258 if (!single_pred_p (fin_bb)) 6259 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 6260 } 6261 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 6262 redirect_edge_var_map_clear (re); 6263 if (single_pred_p (fin_bb)) 6264 while (1) 6265 { 6266 psi = gsi_start_phis (fin_bb); 6267 if (gsi_end_p (psi)) 6268 break; 6269 remove_phi_node (&psi, false); 6270 } 6271 6272 /* Make phi node for trip. */ 6273 phi = create_phi_node (trip_main, iter_part_bb); 6274 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 6275 UNKNOWN_LOCATION); 6276 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 6277 UNKNOWN_LOCATION); 6278 } 6279 6280 if (!broken_loop) 6281 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 6282 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 6283 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 6284 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 6285 recompute_dominator (CDI_DOMINATORS, fin_bb)); 6286 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 6287 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 6288 set_immediate_dominator (CDI_DOMINATORS, body_bb, 6289 recompute_dominator (CDI_DOMINATORS, body_bb)); 6290 6291 if (!broken_loop) 6292 { 6293 class loop *loop = body_bb->loop_father; 6294 class loop *trip_loop = alloc_loop (); 6295 trip_loop->header = iter_part_bb; 6296 trip_loop->latch = trip_update_bb; 6297 add_loop (trip_loop, iter_part_bb->loop_father); 6298 6299 if (loop != entry_bb->loop_father) 6300 { 6301 gcc_assert (loop->header == body_bb); 6302 gcc_assert (loop->latch == region->cont 6303 || single_pred (loop->latch) == region->cont); 6304 trip_loop->inner = loop; 6305 return; 6306 } 6307 6308 if (!gimple_omp_for_combined_p (fd->for_stmt)) 6309 { 6310 loop = alloc_loop (); 6311 loop->header = body_bb; 6312 if (collapse_bb == NULL) 6313 loop->latch = cont_bb; 6314 add_loop (loop, trip_loop); 6315 } 6316 } 6317} 6318 6319/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 6320 loop. Given parameters: 6321 6322 for (V = N1; V cond N2; V += STEP) BODY; 6323 6324 where COND is "<" or ">", we generate pseudocode 6325 6326 V = N1; 6327 goto L1; 6328 L0: 6329 BODY; 6330 V += STEP; 6331 L1: 6332 if (V cond N2) goto L0; else goto L2; 6333 L2: 6334 6335 For collapsed loops, emit the outer loops as scalar 6336 and only try to vectorize the innermost loop. */ 6337 6338static void 6339expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 6340{ 6341 tree type, t; 6342 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 6343 gimple_stmt_iterator gsi; 6344 gimple *stmt; 6345 gcond *cond_stmt; 6346 bool broken_loop = region->cont == NULL; 6347 edge e, ne; 6348 tree *counts = NULL; 6349 int i; 6350 int safelen_int = INT_MAX; 6351 bool dont_vectorize = false; 6352 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6353 OMP_CLAUSE_SAFELEN); 6354 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6355 OMP_CLAUSE__SIMDUID_); 6356 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6357 OMP_CLAUSE_IF); 6358 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6359 OMP_CLAUSE_SIMDLEN); 6360 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6361 OMP_CLAUSE__CONDTEMP_); 6362 tree n1, n2; 6363 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE; 6364 6365 if (safelen) 6366 { 6367 poly_uint64 val; 6368 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 6369 if (!poly_int_tree_p (safelen, &val)) 6370 safelen_int = 0; 6371 else 6372 safelen_int = MIN (constant_lower_bound (val), INT_MAX); 6373 if (safelen_int == 1) 6374 safelen_int = 0; 6375 } 6376 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc))) 6377 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))) 6378 { 6379 safelen_int = 0; 6380 dont_vectorize = true; 6381 } 6382 type = TREE_TYPE (fd->loop.v); 6383 entry_bb = region->entry; 6384 cont_bb = region->cont; 6385 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 6386 gcc_assert (broken_loop 6387 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 6388 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 6389 if (!broken_loop) 6390 { 6391 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 6392 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 6393 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 6394 l2_bb = BRANCH_EDGE (entry_bb)->dest; 6395 } 6396 else 6397 { 6398 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 6399 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 6400 l2_bb = single_succ (l1_bb); 6401 } 6402 exit_bb = region->exit; 6403 l2_dom_bb = NULL; 6404 6405 gsi = gsi_last_nondebug_bb (entry_bb); 6406 6407 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 6408 /* Not needed in SSA form right now. */ 6409 gcc_assert (!gimple_in_ssa_p (cfun)); 6410 if (fd->collapse > 1 6411 && (gimple_omp_for_combined_into_p (fd->for_stmt) 6412 || broken_loop)) 6413 { 6414 int first_zero_iter = -1, dummy = -1; 6415 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 6416 6417 counts = XALLOCAVEC (tree, fd->collapse); 6418 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 6419 zero_iter_bb, first_zero_iter, 6420 dummy_bb, dummy, l2_dom_bb); 6421 } 6422 if (l2_dom_bb == NULL) 6423 l2_dom_bb = l1_bb; 6424 6425 n1 = fd->loop.n1; 6426 n2 = fd->loop.n2; 6427 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 6428 { 6429 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6430 OMP_CLAUSE__LOOPTEMP_); 6431 gcc_assert (innerc); 6432 n1 = OMP_CLAUSE_DECL (innerc); 6433 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 6434 OMP_CLAUSE__LOOPTEMP_); 6435 gcc_assert (innerc); 6436 n2 = OMP_CLAUSE_DECL (innerc); 6437 } 6438 tree step = fd->loop.step; 6439 tree orig_step = step; /* May be different from step if is_simt. */ 6440 6441 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 6442 OMP_CLAUSE__SIMT_); 6443 if (is_simt) 6444 { 6445 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 6446 is_simt = safelen_int > 1; 6447 } 6448 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 6449 if (is_simt) 6450 { 6451 simt_lane = create_tmp_var (unsigned_type_node); 6452 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 6453 gimple_call_set_lhs (g, simt_lane); 6454 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 6455 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 6456 fold_convert (TREE_TYPE (step), simt_lane)); 6457 n1 = fold_convert (type, n1); 6458 if (POINTER_TYPE_P (type)) 6459 n1 = fold_build_pointer_plus (n1, offset); 6460 else 6461 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 6462 6463 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 6464 if (fd->collapse > 1) 6465 simt_maxlane = build_one_cst (unsigned_type_node); 6466 else if (safelen_int < omp_max_simt_vf ()) 6467 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 6468 tree vf 6469 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 6470 unsigned_type_node, 0); 6471 if (simt_maxlane) 6472 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 6473 vf = fold_convert (TREE_TYPE (step), vf); 6474 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 6475 } 6476 6477 tree n2var = NULL_TREE; 6478 tree n2v = NULL_TREE; 6479 tree *nonrect_bounds = NULL; 6480 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE; 6481 if (fd->collapse > 1) 6482 { 6483 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt)) 6484 { 6485 if (fd->non_rect) 6486 { 6487 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1); 6488 memset (nonrect_bounds, 0, 6489 sizeof (tree) * (fd->last_nonrect + 1)); 6490 } 6491 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 6492 gcc_assert (entry_bb == gsi_bb (gsi)); 6493 gcc_assert (fd->for_stmt == gsi_stmt (gsi)); 6494 gsi_prev (&gsi); 6495 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest; 6496 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, 6497 NULL, n1); 6498 gsi = gsi_for_stmt (fd->for_stmt); 6499 } 6500 if (broken_loop) 6501 ; 6502 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 6503 { 6504 /* Compute in n2var the limit for the first innermost loop, 6505 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt) 6506 where cnt is how many iterations would the loop have if 6507 all further iterations were assigned to the current task. */ 6508 n2var = create_tmp_var (type); 6509 i = fd->collapse - 1; 6510 tree itype = TREE_TYPE (fd->loops[i].v); 6511 if (POINTER_TYPE_P (itype)) 6512 itype = signed_type_for (itype); 6513 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 6514 ? -1 : 1)); 6515 t = fold_build2 (PLUS_EXPR, itype, 6516 fold_convert (itype, fd->loops[i].step), t); 6517 t = fold_build2 (PLUS_EXPR, itype, t, 6518 fold_convert (itype, fd->loops[i].n2)); 6519 if (fd->loops[i].m2) 6520 { 6521 tree t2 = fold_convert (itype, 6522 fd->loops[i - fd->loops[i].outer].v); 6523 tree t3 = fold_convert (itype, fd->loops[i].m2); 6524 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); 6525 t = fold_build2 (PLUS_EXPR, itype, t, t2); 6526 } 6527 t = fold_build2 (MINUS_EXPR, itype, t, 6528 fold_convert (itype, fd->loops[i].v)); 6529 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 6530 t = fold_build2 (TRUNC_DIV_EXPR, itype, 6531 fold_build1 (NEGATE_EXPR, itype, t), 6532 fold_build1 (NEGATE_EXPR, itype, 6533 fold_convert (itype, 6534 fd->loops[i].step))); 6535 else 6536 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 6537 fold_convert (itype, fd->loops[i].step)); 6538 t = fold_convert (type, t); 6539 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1); 6540 min_arg1 = create_tmp_var (type); 6541 expand_omp_build_assign (&gsi, min_arg1, t2); 6542 min_arg2 = create_tmp_var (type); 6543 expand_omp_build_assign (&gsi, min_arg2, t); 6544 } 6545 else 6546 { 6547 if (TREE_CODE (n2) == INTEGER_CST) 6548 { 6549 /* Indicate for lastprivate handling that at least one iteration 6550 has been performed, without wasting runtime. */ 6551 if (integer_nonzerop (n2)) 6552 expand_omp_build_assign (&gsi, fd->loop.v, 6553 fold_convert (type, n2)); 6554 else 6555 /* Indicate that no iteration has been performed. */ 6556 expand_omp_build_assign (&gsi, fd->loop.v, 6557 build_one_cst (type)); 6558 } 6559 else 6560 { 6561 expand_omp_build_assign (&gsi, fd->loop.v, 6562 build_zero_cst (type)); 6563 expand_omp_build_assign (&gsi, n2, build_one_cst (type)); 6564 } 6565 for (i = 0; i < fd->collapse; i++) 6566 { 6567 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 6568 if (fd->loops[i].m1) 6569 { 6570 tree t2 6571 = fold_convert (TREE_TYPE (t), 6572 fd->loops[i - fd->loops[i].outer].v); 6573 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1); 6574 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); 6575 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); 6576 } 6577 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 6578 /* For normal non-combined collapsed loops just initialize 6579 the outermost iterator in the entry_bb. */ 6580 if (!broken_loop) 6581 break; 6582 } 6583 } 6584 } 6585 else 6586 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 6587 tree altv = NULL_TREE, altn2 = NULL_TREE; 6588 if (fd->collapse == 1 6589 && !broken_loop 6590 && TREE_CODE (orig_step) != INTEGER_CST) 6591 { 6592 /* The vectorizer currently punts on loops with non-constant steps 6593 for the main IV (can't compute number of iterations and gives up 6594 because of that). As for OpenMP loops it is always possible to 6595 compute the number of iterations upfront, use an alternate IV 6596 as the loop iterator: 6597 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0; 6598 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */ 6599 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); 6600 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv))); 6601 tree itype = TREE_TYPE (fd->loop.v); 6602 if (POINTER_TYPE_P (itype)) 6603 itype = signed_type_for (itype); 6604 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 6605 t = fold_build2 (PLUS_EXPR, itype, 6606 fold_convert (itype, step), t); 6607 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2)); 6608 t = fold_build2 (MINUS_EXPR, itype, t, 6609 fold_convert (itype, fd->loop.v)); 6610 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 6611 t = fold_build2 (TRUNC_DIV_EXPR, itype, 6612 fold_build1 (NEGATE_EXPR, itype, t), 6613 fold_build1 (NEGATE_EXPR, itype, 6614 fold_convert (itype, step))); 6615 else 6616 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 6617 fold_convert (itype, step)); 6618 t = fold_convert (TREE_TYPE (altv), t); 6619 altn2 = create_tmp_var (TREE_TYPE (altv)); 6620 expand_omp_build_assign (&gsi, altn2, t); 6621 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2); 6622 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, 6623 true, GSI_SAME_STMT); 6624 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2); 6625 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2, 6626 build_zero_cst (TREE_TYPE (altv))); 6627 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 6628 } 6629 else if (fd->collapse > 1 6630 && !broken_loop 6631 && !gimple_omp_for_combined_into_p (fd->for_stmt) 6632 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST) 6633 { 6634 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); 6635 altn2 = create_tmp_var (TREE_TYPE (altv)); 6636 } 6637 if (cond_var) 6638 { 6639 if (POINTER_TYPE_P (type) 6640 || TREE_CODE (n1) != INTEGER_CST 6641 || fd->loop.cond_code != LT_EXPR 6642 || tree_int_cst_sgn (n1) != 1) 6643 expand_omp_build_assign (&gsi, cond_var, 6644 build_one_cst (TREE_TYPE (cond_var))); 6645 else 6646 expand_omp_build_assign (&gsi, cond_var, 6647 fold_convert (TREE_TYPE (cond_var), n1)); 6648 } 6649 6650 /* Remove the GIMPLE_OMP_FOR statement. */ 6651 gsi_remove (&gsi, true); 6652 6653 if (!broken_loop) 6654 { 6655 /* Code to control the increment goes in the CONT_BB. */ 6656 gsi = gsi_last_nondebug_bb (cont_bb); 6657 stmt = gsi_stmt (gsi); 6658 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 6659 6660 if (fd->collapse == 1 6661 || gimple_omp_for_combined_into_p (fd->for_stmt)) 6662 { 6663 if (POINTER_TYPE_P (type)) 6664 t = fold_build_pointer_plus (fd->loop.v, step); 6665 else 6666 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 6667 expand_omp_build_assign (&gsi, fd->loop.v, t); 6668 } 6669 else if (TREE_CODE (n2) != INTEGER_CST) 6670 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type)); 6671 if (altv) 6672 { 6673 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv, 6674 build_one_cst (TREE_TYPE (altv))); 6675 expand_omp_build_assign (&gsi, altv, t); 6676 } 6677 6678 if (fd->collapse > 1) 6679 { 6680 i = fd->collapse - 1; 6681 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 6682 { 6683 t = fold_convert (sizetype, fd->loops[i].step); 6684 t = fold_build_pointer_plus (fd->loops[i].v, t); 6685 } 6686 else 6687 { 6688 t = fold_convert (TREE_TYPE (fd->loops[i].v), 6689 fd->loops[i].step); 6690 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 6691 fd->loops[i].v, t); 6692 } 6693 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 6694 } 6695 if (cond_var) 6696 { 6697 if (POINTER_TYPE_P (type) 6698 || TREE_CODE (n1) != INTEGER_CST 6699 || fd->loop.cond_code != LT_EXPR 6700 || tree_int_cst_sgn (n1) != 1) 6701 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 6702 build_one_cst (TREE_TYPE (cond_var))); 6703 else 6704 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 6705 fold_convert (TREE_TYPE (cond_var), step)); 6706 expand_omp_build_assign (&gsi, cond_var, t); 6707 } 6708 6709 /* Remove GIMPLE_OMP_CONTINUE. */ 6710 gsi_remove (&gsi, true); 6711 } 6712 6713 /* Emit the condition in L1_BB. */ 6714 gsi = gsi_start_bb (l1_bb); 6715 6716 if (altv) 6717 t = build2 (LT_EXPR, boolean_type_node, altv, altn2); 6718 else if (fd->collapse > 1 6719 && !gimple_omp_for_combined_into_p (fd->for_stmt) 6720 && !broken_loop) 6721 { 6722 i = fd->collapse - 1; 6723 tree itype = TREE_TYPE (fd->loops[i].v); 6724 if (fd->loops[i].m2) 6725 t = n2v = create_tmp_var (itype); 6726 else 6727 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 6728 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6729 false, GSI_CONTINUE_LINKING); 6730 tree v = fd->loops[i].v; 6731 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 6732 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 6733 false, GSI_CONTINUE_LINKING); 6734 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 6735 } 6736 else 6737 { 6738 if (fd->collapse > 1 && !broken_loop) 6739 t = n2var; 6740 else 6741 t = fold_convert (type, unshare_expr (n2)); 6742 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6743 false, GSI_CONTINUE_LINKING); 6744 tree v = fd->loop.v; 6745 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 6746 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 6747 false, GSI_CONTINUE_LINKING); 6748 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 6749 } 6750 cond_stmt = gimple_build_cond_empty (t); 6751 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 6752 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 6753 NULL, NULL) 6754 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 6755 NULL, NULL)) 6756 { 6757 gsi = gsi_for_stmt (cond_stmt); 6758 gimple_regimplify_operands (cond_stmt, &gsi); 6759 } 6760 6761 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 6762 if (is_simt) 6763 { 6764 gsi = gsi_start_bb (l2_bb); 6765 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step); 6766 if (POINTER_TYPE_P (type)) 6767 t = fold_build_pointer_plus (fd->loop.v, step); 6768 else 6769 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 6770 expand_omp_build_assign (&gsi, fd->loop.v, t); 6771 } 6772 6773 /* Remove GIMPLE_OMP_RETURN. */ 6774 gsi = gsi_last_nondebug_bb (exit_bb); 6775 gsi_remove (&gsi, true); 6776 6777 /* Connect the new blocks. */ 6778 remove_edge (FALLTHRU_EDGE (entry_bb)); 6779 6780 if (!broken_loop) 6781 { 6782 remove_edge (BRANCH_EDGE (entry_bb)); 6783 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 6784 6785 e = BRANCH_EDGE (l1_bb); 6786 ne = FALLTHRU_EDGE (l1_bb); 6787 e->flags = EDGE_TRUE_VALUE; 6788 } 6789 else 6790 { 6791 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6792 6793 ne = single_succ_edge (l1_bb); 6794 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 6795 6796 } 6797 ne->flags = EDGE_FALSE_VALUE; 6798 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 6799 ne->probability = e->probability.invert (); 6800 6801 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 6802 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 6803 6804 if (simt_maxlane) 6805 { 6806 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 6807 NULL_TREE, NULL_TREE); 6808 gsi = gsi_last_bb (entry_bb); 6809 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 6810 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 6811 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 6812 FALLTHRU_EDGE (entry_bb)->probability 6813 = profile_probability::guessed_always ().apply_scale (7, 8); 6814 BRANCH_EDGE (entry_bb)->probability 6815 = FALLTHRU_EDGE (entry_bb)->probability.invert (); 6816 l2_dom_bb = entry_bb; 6817 } 6818 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 6819 6820 if (!broken_loop && fd->collapse > 1) 6821 { 6822 basic_block last_bb = l1_bb; 6823 basic_block init_bb = NULL; 6824 for (i = fd->collapse - 2; i >= 0; i--) 6825 { 6826 tree nextn2v = NULL_TREE; 6827 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE) 6828 e = EDGE_SUCC (last_bb, 0); 6829 else 6830 e = EDGE_SUCC (last_bb, 1); 6831 basic_block bb = split_edge (e); 6832 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 6833 { 6834 t = fold_convert (sizetype, fd->loops[i].step); 6835 t = fold_build_pointer_plus (fd->loops[i].v, t); 6836 } 6837 else 6838 { 6839 t = fold_convert (TREE_TYPE (fd->loops[i].v), 6840 fd->loops[i].step); 6841 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 6842 fd->loops[i].v, t); 6843 } 6844 gsi = gsi_after_labels (bb); 6845 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 6846 6847 bb = split_block (bb, last_stmt (bb))->dest; 6848 gsi = gsi_start_bb (bb); 6849 tree itype = TREE_TYPE (fd->loops[i].v); 6850 if (fd->loops[i].m2) 6851 t = nextn2v = create_tmp_var (itype); 6852 else 6853 t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 6854 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 6855 false, GSI_CONTINUE_LINKING); 6856 tree v = fd->loops[i].v; 6857 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 6858 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 6859 false, GSI_CONTINUE_LINKING); 6860 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 6861 cond_stmt = gimple_build_cond_empty (t); 6862 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 6863 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 6864 expand_omp_regimplify_p, NULL, NULL) 6865 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 6866 expand_omp_regimplify_p, NULL, NULL)) 6867 { 6868 gsi = gsi_for_stmt (cond_stmt); 6869 gimple_regimplify_operands (cond_stmt, &gsi); 6870 } 6871 ne = single_succ_edge (bb); 6872 ne->flags = EDGE_FALSE_VALUE; 6873 6874 init_bb = create_empty_bb (bb); 6875 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb); 6876 add_bb_to_loop (init_bb, bb->loop_father); 6877 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE); 6878 e->probability 6879 = profile_probability::guessed_always ().apply_scale (7, 8); 6880 ne->probability = e->probability.invert (); 6881 6882 gsi = gsi_after_labels (init_bb); 6883 if (fd->loops[i + 1].m1) 6884 { 6885 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 6886 fd->loops[i + 1 6887 - fd->loops[i + 1].outer].v); 6888 if (POINTER_TYPE_P (TREE_TYPE (t2))) 6889 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1); 6890 else 6891 { 6892 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 6893 fd->loops[i + 1].n1); 6894 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1); 6895 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); 6896 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); 6897 } 6898 } 6899 else 6900 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 6901 fd->loops[i + 1].n1); 6902 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t); 6903 if (fd->loops[i + 1].m2) 6904 { 6905 if (i + 2 == fd->collapse && (n2var || altv)) 6906 { 6907 gcc_assert (n2v == NULL_TREE); 6908 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v)); 6909 } 6910 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 6911 fd->loops[i + 1 6912 - fd->loops[i + 1].outer].v); 6913 if (POINTER_TYPE_P (TREE_TYPE (t2))) 6914 t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2); 6915 else 6916 { 6917 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 6918 fd->loops[i + 1].n2); 6919 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2); 6920 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); 6921 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); 6922 } 6923 expand_omp_build_assign (&gsi, n2v, t); 6924 } 6925 if (i + 2 == fd->collapse && n2var) 6926 { 6927 /* For composite simd, n2 is the first iteration the current 6928 task shouldn't already handle, so we effectively want to use 6929 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3) 6930 as the vectorized loop. Except the vectorizer will not 6931 vectorize that, so instead compute N2VAR as 6932 N2VAR = V + MIN (N2 - V, COUNTS3) and use 6933 for (V3 = N31; V < N2VAR; V++, V3 += STEP3) 6934 as the loop to vectorize. */ 6935 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v); 6936 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2) 6937 { 6938 tree itype = TREE_TYPE (fd->loops[i].v); 6939 if (POINTER_TYPE_P (itype)) 6940 itype = signed_type_for (itype); 6941 t = build_int_cst (itype, (fd->loops[i + 1].cond_code 6942 == LT_EXPR ? -1 : 1)); 6943 t = fold_build2 (PLUS_EXPR, itype, 6944 fold_convert (itype, 6945 fd->loops[i + 1].step), t); 6946 if (fd->loops[i + 1].m2 == NULL_TREE) 6947 t = fold_build2 (PLUS_EXPR, itype, t, 6948 fold_convert (itype, 6949 fd->loops[i + 1].n2)); 6950 else if (POINTER_TYPE_P (TREE_TYPE (n2v))) 6951 { 6952 t = fold_build_pointer_plus (n2v, t); 6953 t = fold_convert (itype, t); 6954 } 6955 else 6956 t = fold_build2 (PLUS_EXPR, itype, t, n2v); 6957 t = fold_build2 (MINUS_EXPR, itype, t, 6958 fold_convert (itype, fd->loops[i + 1].v)); 6959 tree step = fold_convert (itype, fd->loops[i + 1].step); 6960 if (TYPE_UNSIGNED (itype) 6961 && fd->loops[i + 1].cond_code == GT_EXPR) 6962 t = fold_build2 (TRUNC_DIV_EXPR, itype, 6963 fold_build1 (NEGATE_EXPR, itype, t), 6964 fold_build1 (NEGATE_EXPR, itype, step)); 6965 else 6966 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 6967 t = fold_convert (type, t); 6968 } 6969 else 6970 t = counts[i + 1]; 6971 expand_omp_build_assign (&gsi, min_arg1, t2); 6972 expand_omp_build_assign (&gsi, min_arg2, t); 6973 e = split_block (init_bb, last_stmt (init_bb)); 6974 gsi = gsi_after_labels (e->dest); 6975 init_bb = e->dest; 6976 remove_edge (FALLTHRU_EDGE (entry_bb)); 6977 make_edge (entry_bb, init_bb, EDGE_FALLTHRU); 6978 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb); 6979 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb); 6980 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2); 6981 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); 6982 expand_omp_build_assign (&gsi, n2var, t); 6983 } 6984 if (i + 2 == fd->collapse && altv) 6985 { 6986 /* The vectorizer currently punts on loops with non-constant 6987 steps for the main IV (can't compute number of iterations 6988 and gives up because of that). As for OpenMP loops it is 6989 always possible to compute the number of iterations upfront, 6990 use an alternate IV as the loop iterator. */ 6991 expand_omp_build_assign (&gsi, altv, 6992 build_zero_cst (TREE_TYPE (altv))); 6993 tree itype = TREE_TYPE (fd->loops[i + 1].v); 6994 if (POINTER_TYPE_P (itype)) 6995 itype = signed_type_for (itype); 6996 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR 6997 ? -1 : 1)); 6998 t = fold_build2 (PLUS_EXPR, itype, 6999 fold_convert (itype, fd->loops[i + 1].step), t); 7000 t = fold_build2 (PLUS_EXPR, itype, t, 7001 fold_convert (itype, 7002 fd->loops[i + 1].m2 7003 ? n2v : fd->loops[i + 1].n2)); 7004 t = fold_build2 (MINUS_EXPR, itype, t, 7005 fold_convert (itype, fd->loops[i + 1].v)); 7006 tree step = fold_convert (itype, fd->loops[i + 1].step); 7007 if (TYPE_UNSIGNED (itype) 7008 && fd->loops[i + 1].cond_code == GT_EXPR) 7009 t = fold_build2 (TRUNC_DIV_EXPR, itype, 7010 fold_build1 (NEGATE_EXPR, itype, t), 7011 fold_build1 (NEGATE_EXPR, itype, step)); 7012 else 7013 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 7014 t = fold_convert (TREE_TYPE (altv), t); 7015 expand_omp_build_assign (&gsi, altn2, t); 7016 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), 7017 fd->loops[i + 1].m2 7018 ? n2v : fd->loops[i + 1].n2); 7019 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, 7020 true, GSI_SAME_STMT); 7021 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node, 7022 fd->loops[i + 1].v, t2); 7023 gassign *g 7024 = gimple_build_assign (altn2, COND_EXPR, t2, altn2, 7025 build_zero_cst (TREE_TYPE (altv))); 7026 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 7027 } 7028 n2v = nextn2v; 7029 7030 make_edge (init_bb, last_bb, EDGE_FALLTHRU); 7031 if (!gimple_omp_for_combined_into_p (fd->for_stmt)) 7032 { 7033 e = find_edge (entry_bb, last_bb); 7034 redirect_edge_succ (e, bb); 7035 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb); 7036 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb); 7037 } 7038 7039 last_bb = bb; 7040 } 7041 } 7042 if (!broken_loop) 7043 { 7044 class loop *loop = alloc_loop (); 7045 loop->header = l1_bb; 7046 loop->latch = cont_bb; 7047 add_loop (loop, l1_bb->loop_father); 7048 loop->safelen = safelen_int; 7049 if (simduid) 7050 { 7051 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 7052 cfun->has_simduid_loops = true; 7053 } 7054 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 7055 the loop. */ 7056 if ((flag_tree_loop_vectorize 7057 || !OPTION_SET_P (flag_tree_loop_vectorize)) 7058 && flag_tree_loop_optimize 7059 && loop->safelen > 1) 7060 { 7061 loop->force_vectorize = true; 7062 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))) 7063 { 7064 unsigned HOST_WIDE_INT v 7065 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)); 7066 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen) 7067 loop->simdlen = v; 7068 } 7069 cfun->has_force_vectorize_loops = true; 7070 } 7071 else if (dont_vectorize) 7072 loop->dont_vectorize = true; 7073 } 7074 else if (simduid) 7075 cfun->has_simduid_loops = true; 7076} 7077 7078/* Taskloop construct is represented after gimplification with 7079 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 7080 in between them. This routine expands the outer GIMPLE_OMP_FOR, 7081 which should just compute all the needed loop temporaries 7082 for GIMPLE_OMP_TASK. */ 7083 7084static void 7085expand_omp_taskloop_for_outer (struct omp_region *region, 7086 struct omp_for_data *fd, 7087 gimple *inner_stmt) 7088{ 7089 tree type, bias = NULL_TREE; 7090 basic_block entry_bb, cont_bb, exit_bb; 7091 gimple_stmt_iterator gsi; 7092 gassign *assign_stmt; 7093 tree *counts = NULL; 7094 int i; 7095 7096 gcc_assert (inner_stmt); 7097 gcc_assert (region->cont); 7098 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 7099 && gimple_omp_task_taskloop_p (inner_stmt)); 7100 type = TREE_TYPE (fd->loop.v); 7101 7102 /* See if we need to bias by LLONG_MIN. */ 7103 if (fd->iter_type == long_long_unsigned_type_node 7104 && TREE_CODE (type) == INTEGER_TYPE 7105 && !TYPE_UNSIGNED (type)) 7106 { 7107 tree n1, n2; 7108 7109 if (fd->loop.cond_code == LT_EXPR) 7110 { 7111 n1 = fd->loop.n1; 7112 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 7113 } 7114 else 7115 { 7116 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 7117 n2 = fd->loop.n1; 7118 } 7119 if (TREE_CODE (n1) != INTEGER_CST 7120 || TREE_CODE (n2) != INTEGER_CST 7121 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 7122 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 7123 } 7124 7125 entry_bb = region->entry; 7126 cont_bb = region->cont; 7127 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 7128 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 7129 exit_bb = region->exit; 7130 7131 gsi = gsi_last_nondebug_bb (entry_bb); 7132 gimple *for_stmt = gsi_stmt (gsi); 7133 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 7134 if (fd->collapse > 1) 7135 { 7136 int first_zero_iter = -1, dummy = -1; 7137 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 7138 7139 counts = XALLOCAVEC (tree, fd->collapse); 7140 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 7141 zero_iter_bb, first_zero_iter, 7142 dummy_bb, dummy, l2_dom_bb); 7143 7144 if (zero_iter_bb) 7145 { 7146 /* Some counts[i] vars might be uninitialized if 7147 some loop has zero iterations. But the body shouldn't 7148 be executed in that case, so just avoid uninit warnings. */ 7149 for (i = first_zero_iter; i < fd->collapse; i++) 7150 if (SSA_VAR_P (counts[i])) 7151 suppress_warning (counts[i], OPT_Wuninitialized); 7152 gsi_prev (&gsi); 7153 edge e = split_block (entry_bb, gsi_stmt (gsi)); 7154 entry_bb = e->dest; 7155 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 7156 gsi = gsi_last_bb (entry_bb); 7157 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 7158 get_immediate_dominator (CDI_DOMINATORS, 7159 zero_iter_bb)); 7160 } 7161 } 7162 7163 tree t0, t1; 7164 t1 = fd->loop.n2; 7165 t0 = fd->loop.n1; 7166 if (POINTER_TYPE_P (TREE_TYPE (t0)) 7167 && TYPE_PRECISION (TREE_TYPE (t0)) 7168 != TYPE_PRECISION (fd->iter_type)) 7169 { 7170 /* Avoid casting pointers to integer of a different size. */ 7171 tree itype = signed_type_for (type); 7172 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 7173 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 7174 } 7175 else 7176 { 7177 t1 = fold_convert (fd->iter_type, t1); 7178 t0 = fold_convert (fd->iter_type, t0); 7179 } 7180 if (bias) 7181 { 7182 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 7183 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 7184 } 7185 7186 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 7187 OMP_CLAUSE__LOOPTEMP_); 7188 gcc_assert (innerc); 7189 tree startvar = OMP_CLAUSE_DECL (innerc); 7190 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 7191 gcc_assert (innerc); 7192 tree endvar = OMP_CLAUSE_DECL (innerc); 7193 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 7194 { 7195 innerc = find_lastprivate_looptemp (fd, innerc); 7196 if (innerc) 7197 { 7198 /* If needed (inner taskloop has lastprivate clause), propagate 7199 down the total number of iterations. */ 7200 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 7201 NULL_TREE, false, 7202 GSI_CONTINUE_LINKING); 7203 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 7204 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7205 } 7206 } 7207 7208 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 7209 GSI_CONTINUE_LINKING); 7210 assign_stmt = gimple_build_assign (startvar, t0); 7211 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7212 7213 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 7214 GSI_CONTINUE_LINKING); 7215 assign_stmt = gimple_build_assign (endvar, t1); 7216 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7217 if (fd->collapse > 1) 7218 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); 7219 7220 /* Remove the GIMPLE_OMP_FOR statement. */ 7221 gsi = gsi_for_stmt (for_stmt); 7222 gsi_remove (&gsi, true); 7223 7224 gsi = gsi_last_nondebug_bb (cont_bb); 7225 gsi_remove (&gsi, true); 7226 7227 gsi = gsi_last_nondebug_bb (exit_bb); 7228 gsi_remove (&gsi, true); 7229 7230 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 7231 remove_edge (BRANCH_EDGE (entry_bb)); 7232 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always (); 7233 remove_edge (BRANCH_EDGE (cont_bb)); 7234 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 7235 set_immediate_dominator (CDI_DOMINATORS, region->entry, 7236 recompute_dominator (CDI_DOMINATORS, region->entry)); 7237} 7238 7239/* Taskloop construct is represented after gimplification with 7240 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 7241 in between them. This routine expands the inner GIMPLE_OMP_FOR. 7242 GOMP_taskloop{,_ull} function arranges for each task to be given just 7243 a single range of iterations. */ 7244 7245static void 7246expand_omp_taskloop_for_inner (struct omp_region *region, 7247 struct omp_for_data *fd, 7248 gimple *inner_stmt) 7249{ 7250 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 7251 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 7252 basic_block fin_bb; 7253 gimple_stmt_iterator gsi; 7254 edge ep; 7255 bool broken_loop = region->cont == NULL; 7256 tree *counts = NULL; 7257 tree n1, n2, step; 7258 7259 itype = type = TREE_TYPE (fd->loop.v); 7260 if (POINTER_TYPE_P (type)) 7261 itype = signed_type_for (type); 7262 7263 /* See if we need to bias by LLONG_MIN. */ 7264 if (fd->iter_type == long_long_unsigned_type_node 7265 && TREE_CODE (type) == INTEGER_TYPE 7266 && !TYPE_UNSIGNED (type)) 7267 { 7268 tree n1, n2; 7269 7270 if (fd->loop.cond_code == LT_EXPR) 7271 { 7272 n1 = fd->loop.n1; 7273 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 7274 } 7275 else 7276 { 7277 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 7278 n2 = fd->loop.n1; 7279 } 7280 if (TREE_CODE (n1) != INTEGER_CST 7281 || TREE_CODE (n2) != INTEGER_CST 7282 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 7283 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 7284 } 7285 7286 entry_bb = region->entry; 7287 cont_bb = region->cont; 7288 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 7289 fin_bb = BRANCH_EDGE (entry_bb)->dest; 7290 gcc_assert (broken_loop 7291 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 7292 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 7293 if (!broken_loop) 7294 { 7295 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 7296 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 7297 } 7298 exit_bb = region->exit; 7299 7300 /* Iteration space partitioning goes in ENTRY_BB. */ 7301 gsi = gsi_last_nondebug_bb (entry_bb); 7302 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 7303 7304 if (fd->collapse > 1) 7305 { 7306 int first_zero_iter = -1, dummy = -1; 7307 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 7308 7309 counts = XALLOCAVEC (tree, fd->collapse); 7310 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 7311 fin_bb, first_zero_iter, 7312 dummy_bb, dummy, l2_dom_bb); 7313 t = NULL_TREE; 7314 } 7315 else 7316 t = integer_one_node; 7317 7318 step = fd->loop.step; 7319 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 7320 OMP_CLAUSE__LOOPTEMP_); 7321 gcc_assert (innerc); 7322 n1 = OMP_CLAUSE_DECL (innerc); 7323 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 7324 gcc_assert (innerc); 7325 n2 = OMP_CLAUSE_DECL (innerc); 7326 if (bias) 7327 { 7328 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 7329 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 7330 } 7331 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 7332 true, NULL_TREE, true, GSI_SAME_STMT); 7333 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 7334 true, NULL_TREE, true, GSI_SAME_STMT); 7335 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 7336 true, NULL_TREE, true, GSI_SAME_STMT); 7337 7338 tree startvar = fd->loop.v; 7339 tree endvar = NULL_TREE; 7340 7341 if (gimple_omp_for_combined_p (fd->for_stmt)) 7342 { 7343 tree clauses = gimple_omp_for_clauses (inner_stmt); 7344 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 7345 gcc_assert (innerc); 7346 startvar = OMP_CLAUSE_DECL (innerc); 7347 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 7348 OMP_CLAUSE__LOOPTEMP_); 7349 gcc_assert (innerc); 7350 endvar = OMP_CLAUSE_DECL (innerc); 7351 } 7352 t = fold_convert (TREE_TYPE (startvar), n1); 7353 t = force_gimple_operand_gsi (&gsi, t, 7354 DECL_P (startvar) 7355 && TREE_ADDRESSABLE (startvar), 7356 NULL_TREE, false, GSI_CONTINUE_LINKING); 7357 gimple *assign_stmt = gimple_build_assign (startvar, t); 7358 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7359 7360 t = fold_convert (TREE_TYPE (startvar), n2); 7361 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 7362 false, GSI_CONTINUE_LINKING); 7363 if (endvar) 7364 { 7365 assign_stmt = gimple_build_assign (endvar, e); 7366 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7367 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 7368 assign_stmt = gimple_build_assign (fd->loop.v, e); 7369 else 7370 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 7371 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 7372 } 7373 7374 tree *nonrect_bounds = NULL; 7375 if (fd->collapse > 1) 7376 { 7377 if (fd->non_rect) 7378 { 7379 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1); 7380 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1)); 7381 } 7382 gcc_assert (gsi_bb (gsi) == entry_bb); 7383 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt, 7384 startvar); 7385 entry_bb = gsi_bb (gsi); 7386 } 7387 7388 if (!broken_loop) 7389 { 7390 /* The code controlling the sequential loop replaces the 7391 GIMPLE_OMP_CONTINUE. */ 7392 gsi = gsi_last_nondebug_bb (cont_bb); 7393 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 7394 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 7395 vmain = gimple_omp_continue_control_use (cont_stmt); 7396 vback = gimple_omp_continue_control_def (cont_stmt); 7397 7398 if (!gimple_omp_for_combined_p (fd->for_stmt)) 7399 { 7400 if (POINTER_TYPE_P (type)) 7401 t = fold_build_pointer_plus (vmain, step); 7402 else 7403 t = fold_build2 (PLUS_EXPR, type, vmain, step); 7404 t = force_gimple_operand_gsi (&gsi, t, 7405 DECL_P (vback) 7406 && TREE_ADDRESSABLE (vback), 7407 NULL_TREE, true, GSI_SAME_STMT); 7408 assign_stmt = gimple_build_assign (vback, t); 7409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 7410 7411 t = build2 (fd->loop.cond_code, boolean_type_node, 7412 DECL_P (vback) && TREE_ADDRESSABLE (vback) 7413 ? t : vback, e); 7414 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 7415 } 7416 7417 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 7418 gsi_remove (&gsi, true); 7419 7420 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 7421 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds, 7422 cont_bb, body_bb); 7423 } 7424 7425 /* Remove the GIMPLE_OMP_FOR statement. */ 7426 gsi = gsi_for_stmt (fd->for_stmt); 7427 gsi_remove (&gsi, true); 7428 7429 /* Remove the GIMPLE_OMP_RETURN statement. */ 7430 gsi = gsi_last_nondebug_bb (exit_bb); 7431 gsi_remove (&gsi, true); 7432 7433 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 7434 if (!broken_loop) 7435 remove_edge (BRANCH_EDGE (entry_bb)); 7436 else 7437 { 7438 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 7439 region->outer->cont = NULL; 7440 } 7441 7442 /* Connect all the blocks. */ 7443 if (!broken_loop) 7444 { 7445 ep = find_edge (cont_bb, body_bb); 7446 if (gimple_omp_for_combined_p (fd->for_stmt)) 7447 { 7448 remove_edge (ep); 7449 ep = NULL; 7450 } 7451 else if (fd->collapse > 1) 7452 { 7453 remove_edge (ep); 7454 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 7455 } 7456 else 7457 ep->flags = EDGE_TRUE_VALUE; 7458 find_edge (cont_bb, fin_bb)->flags 7459 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 7460 } 7461 7462 set_immediate_dominator (CDI_DOMINATORS, body_bb, 7463 recompute_dominator (CDI_DOMINATORS, body_bb)); 7464 if (!broken_loop) 7465 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 7466 recompute_dominator (CDI_DOMINATORS, fin_bb)); 7467 7468 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 7469 { 7470 class loop *loop = alloc_loop (); 7471 loop->header = body_bb; 7472 if (collapse_bb == NULL) 7473 loop->latch = cont_bb; 7474 add_loop (loop, body_bb->loop_father); 7475 } 7476} 7477 7478/* A subroutine of expand_omp_for. Generate code for an OpenACC 7479 partitioned loop. The lowering here is abstracted, in that the 7480 loop parameters are passed through internal functions, which are 7481 further lowered by oacc_device_lower, once we get to the target 7482 compiler. The loop is of the form: 7483 7484 for (V = B; V LTGT E; V += S) {BODY} 7485 7486 where LTGT is < or >. We may have a specified chunking size, CHUNKING 7487 (constant 0 for no chunking) and we will have a GWV partitioning 7488 mask, specifying dimensions over which the loop is to be 7489 partitioned (see note below). We generate code that looks like 7490 (this ignores tiling): 7491 7492 <entry_bb> [incoming FALL->body, BRANCH->exit] 7493 typedef signedintify (typeof (V)) T; // underlying signed integral type 7494 T range = E - B; 7495 T chunk_no = 0; 7496 T DIR = LTGT == '<' ? +1 : -1; 7497 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 7498 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 7499 7500 <head_bb> [created by splitting end of entry_bb] 7501 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 7502 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 7503 if (!(offset LTGT bound)) goto bottom_bb; 7504 7505 <body_bb> [incoming] 7506 V = B + offset; 7507 {BODY} 7508 7509 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 7510 offset += step; 7511 if (offset LTGT bound) goto body_bb; [*] 7512 7513 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 7514 chunk_no++; 7515 if (chunk < chunk_max) goto head_bb; 7516 7517 <exit_bb> [incoming] 7518 V = B + ((range -/+ 1) / S +/- 1) * S [*] 7519 7520 [*] Needed if V live at end of loop. */ 7521 7522static void 7523expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 7524{ 7525 bool is_oacc_kernels_parallelized 7526 = (lookup_attribute ("oacc kernels parallelized", 7527 DECL_ATTRIBUTES (current_function_decl)) != NULL); 7528 { 7529 bool is_oacc_kernels 7530 = (lookup_attribute ("oacc kernels", 7531 DECL_ATTRIBUTES (current_function_decl)) != NULL); 7532 if (is_oacc_kernels_parallelized) 7533 gcc_checking_assert (is_oacc_kernels); 7534 } 7535 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized); 7536 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are 7537 for SSA specifics, and some are for 'parloops' OpenACC 7538 'kernels'-parallelized specifics. */ 7539 7540 tree v = fd->loop.v; 7541 enum tree_code cond_code = fd->loop.cond_code; 7542 enum tree_code plus_code = PLUS_EXPR; 7543 7544 tree chunk_size = integer_minus_one_node; 7545 tree gwv = integer_zero_node; 7546 tree iter_type = TREE_TYPE (v); 7547 tree diff_type = iter_type; 7548 tree plus_type = iter_type; 7549 struct oacc_collapse *counts = NULL; 7550 7551 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 7552 == GF_OMP_FOR_KIND_OACC_LOOP); 7553 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 7554 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 7555 7556 if (POINTER_TYPE_P (iter_type)) 7557 { 7558 plus_code = POINTER_PLUS_EXPR; 7559 plus_type = sizetype; 7560 } 7561 for (int ix = fd->collapse; ix--;) 7562 { 7563 tree diff_type2 = TREE_TYPE (fd->loops[ix].step); 7564 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2)) 7565 diff_type = diff_type2; 7566 } 7567 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 7568 diff_type = signed_type_for (diff_type); 7569 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 7570 diff_type = integer_type_node; 7571 7572 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 7573 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 7574 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 7575 basic_block bottom_bb = NULL; 7576 7577 /* entry_bb has two successors; the branch edge is to the exit 7578 block, fallthrough edge to body. */ 7579 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 7580 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 7581 7582 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 7583 body_bb, or to a block whose only successor is the body_bb. Its 7584 fallthrough successor is the final block (same as the branch 7585 successor of the entry_bb). */ 7586 if (cont_bb) 7587 { 7588 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 7589 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 7590 7591 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 7592 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 7593 } 7594 else 7595 gcc_assert (!gimple_in_ssa_p (cfun)); 7596 7597 /* The exit block only has entry_bb and cont_bb as predecessors. */ 7598 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 7599 7600 tree chunk_no; 7601 tree chunk_max = NULL_TREE; 7602 tree bound, offset; 7603 tree step = create_tmp_var (diff_type, ".step"); 7604 bool up = cond_code == LT_EXPR; 7605 tree dir = build_int_cst (diff_type, up ? +1 : -1); 7606 bool chunking = !gimple_in_ssa_p (cfun); 7607 bool negating; 7608 7609 /* Tiling vars. */ 7610 tree tile_size = NULL_TREE; 7611 tree element_s = NULL_TREE; 7612 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 7613 basic_block elem_body_bb = NULL; 7614 basic_block elem_cont_bb = NULL; 7615 7616 /* SSA instances. */ 7617 tree offset_incr = NULL_TREE; 7618 tree offset_init = NULL_TREE; 7619 7620 gimple_stmt_iterator gsi; 7621 gassign *ass; 7622 gcall *call; 7623 gimple *stmt; 7624 tree expr; 7625 location_t loc; 7626 edge split, be, fte; 7627 7628 /* Split the end of entry_bb to create head_bb. */ 7629 split = split_block (entry_bb, last_stmt (entry_bb)); 7630 basic_block head_bb = split->dest; 7631 entry_bb = split->src; 7632 7633 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 7634 gsi = gsi_last_nondebug_bb (entry_bb); 7635 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 7636 loc = gimple_location (for_stmt); 7637 7638 if (gimple_in_ssa_p (cfun)) 7639 { 7640 offset_init = gimple_omp_for_index (for_stmt, 0); 7641 gcc_assert (integer_zerop (fd->loop.n1)); 7642 /* The SSA parallelizer does gang parallelism. */ 7643 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 7644 } 7645 7646 if (fd->collapse > 1 || fd->tiling) 7647 { 7648 gcc_assert (!gimple_in_ssa_p (cfun) && up); 7649 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 7650 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type, 7651 TREE_TYPE (fd->loop.n2), loc); 7652 7653 if (SSA_VAR_P (fd->loop.n2)) 7654 { 7655 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 7656 true, GSI_SAME_STMT); 7657 ass = gimple_build_assign (fd->loop.n2, total); 7658 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7659 } 7660 } 7661 7662 tree b = fd->loop.n1; 7663 tree e = fd->loop.n2; 7664 tree s = fd->loop.step; 7665 7666 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 7667 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 7668 7669 /* Convert the step, avoiding possible unsigned->signed overflow. */ 7670 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 7671 if (negating) 7672 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 7673 s = fold_convert (diff_type, s); 7674 if (negating) 7675 s = fold_build1 (NEGATE_EXPR, diff_type, s); 7676 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 7677 7678 if (!chunking) 7679 chunk_size = integer_zero_node; 7680 expr = fold_convert (diff_type, chunk_size); 7681 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 7682 NULL_TREE, true, GSI_SAME_STMT); 7683 7684 if (fd->tiling) 7685 { 7686 /* Determine the tile size and element step, 7687 modify the outer loop step size. */ 7688 tile_size = create_tmp_var (diff_type, ".tile_size"); 7689 expr = build_int_cst (diff_type, 1); 7690 for (int ix = 0; ix < fd->collapse; ix++) 7691 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 7692 expr = force_gimple_operand_gsi (&gsi, expr, true, 7693 NULL_TREE, true, GSI_SAME_STMT); 7694 ass = gimple_build_assign (tile_size, expr); 7695 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7696 7697 element_s = create_tmp_var (diff_type, ".element_s"); 7698 ass = gimple_build_assign (element_s, s); 7699 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7700 7701 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 7702 s = force_gimple_operand_gsi (&gsi, expr, true, 7703 NULL_TREE, true, GSI_SAME_STMT); 7704 } 7705 7706 /* Determine the range, avoiding possible unsigned->signed overflow. */ 7707 negating = !up && TYPE_UNSIGNED (iter_type); 7708 expr = fold_build2 (MINUS_EXPR, plus_type, 7709 fold_convert (plus_type, negating ? b : e), 7710 fold_convert (plus_type, negating ? e : b)); 7711 expr = fold_convert (diff_type, expr); 7712 if (negating) 7713 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 7714 tree range = force_gimple_operand_gsi (&gsi, expr, true, 7715 NULL_TREE, true, GSI_SAME_STMT); 7716 7717 chunk_no = build_int_cst (diff_type, 0); 7718 if (chunking) 7719 { 7720 gcc_assert (!gimple_in_ssa_p (cfun)); 7721 7722 expr = chunk_no; 7723 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 7724 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 7725 7726 ass = gimple_build_assign (chunk_no, expr); 7727 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7728 7729 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 7730 build_int_cst (integer_type_node, 7731 IFN_GOACC_LOOP_CHUNKS), 7732 dir, range, s, chunk_size, gwv); 7733 gimple_call_set_lhs (call, chunk_max); 7734 gimple_set_location (call, loc); 7735 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 7736 } 7737 else 7738 chunk_size = chunk_no; 7739 7740 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 7741 build_int_cst (integer_type_node, 7742 IFN_GOACC_LOOP_STEP), 7743 dir, range, s, chunk_size, gwv); 7744 gimple_call_set_lhs (call, step); 7745 gimple_set_location (call, loc); 7746 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 7747 7748 /* Remove the GIMPLE_OMP_FOR. */ 7749 gsi_remove (&gsi, true); 7750 7751 /* Fixup edges from head_bb. */ 7752 be = BRANCH_EDGE (head_bb); 7753 fte = FALLTHRU_EDGE (head_bb); 7754 be->flags |= EDGE_FALSE_VALUE; 7755 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 7756 7757 basic_block body_bb = fte->dest; 7758 7759 if (gimple_in_ssa_p (cfun)) 7760 { 7761 gsi = gsi_last_nondebug_bb (cont_bb); 7762 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 7763 7764 offset = gimple_omp_continue_control_use (cont_stmt); 7765 offset_incr = gimple_omp_continue_control_def (cont_stmt); 7766 } 7767 else 7768 { 7769 offset = create_tmp_var (diff_type, ".offset"); 7770 offset_init = offset_incr = offset; 7771 } 7772 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 7773 7774 /* Loop offset & bound go into head_bb. */ 7775 gsi = gsi_start_bb (head_bb); 7776 7777 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 7778 build_int_cst (integer_type_node, 7779 IFN_GOACC_LOOP_OFFSET), 7780 dir, range, s, 7781 chunk_size, gwv, chunk_no); 7782 gimple_call_set_lhs (call, offset_init); 7783 gimple_set_location (call, loc); 7784 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 7785 7786 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 7787 build_int_cst (integer_type_node, 7788 IFN_GOACC_LOOP_BOUND), 7789 dir, range, s, 7790 chunk_size, gwv, offset_init); 7791 gimple_call_set_lhs (call, bound); 7792 gimple_set_location (call, loc); 7793 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 7794 7795 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 7796 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 7797 GSI_CONTINUE_LINKING); 7798 7799 /* V assignment goes into body_bb. */ 7800 if (!gimple_in_ssa_p (cfun)) 7801 { 7802 gsi = gsi_start_bb (body_bb); 7803 7804 expr = build2 (plus_code, iter_type, b, 7805 fold_convert (plus_type, offset)); 7806 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 7807 true, GSI_SAME_STMT); 7808 ass = gimple_build_assign (v, expr); 7809 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7810 7811 if (fd->collapse > 1 || fd->tiling) 7812 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type); 7813 7814 if (fd->tiling) 7815 { 7816 /* Determine the range of the element loop -- usually simply 7817 the tile_size, but could be smaller if the final 7818 iteration of the outer loop is a partial tile. */ 7819 tree e_range = create_tmp_var (diff_type, ".e_range"); 7820 7821 expr = build2 (MIN_EXPR, diff_type, 7822 build2 (MINUS_EXPR, diff_type, bound, offset), 7823 build2 (MULT_EXPR, diff_type, tile_size, 7824 element_s)); 7825 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 7826 true, GSI_SAME_STMT); 7827 ass = gimple_build_assign (e_range, expr); 7828 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7829 7830 /* Determine bound, offset & step of inner loop. */ 7831 e_bound = create_tmp_var (diff_type, ".e_bound"); 7832 e_offset = create_tmp_var (diff_type, ".e_offset"); 7833 e_step = create_tmp_var (diff_type, ".e_step"); 7834 7835 /* Mark these as element loops. */ 7836 tree t, e_gwv = integer_minus_one_node; 7837 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 7838 7839 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 7840 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 7841 element_s, chunk, e_gwv, chunk); 7842 gimple_call_set_lhs (call, e_offset); 7843 gimple_set_location (call, loc); 7844 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 7845 7846 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 7847 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 7848 element_s, chunk, e_gwv, e_offset); 7849 gimple_call_set_lhs (call, e_bound); 7850 gimple_set_location (call, loc); 7851 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 7852 7853 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 7854 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 7855 element_s, chunk, e_gwv); 7856 gimple_call_set_lhs (call, e_step); 7857 gimple_set_location (call, loc); 7858 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 7859 7860 /* Add test and split block. */ 7861 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 7862 stmt = gimple_build_cond_empty (expr); 7863 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 7864 split = split_block (body_bb, stmt); 7865 elem_body_bb = split->dest; 7866 if (cont_bb == body_bb) 7867 cont_bb = elem_body_bb; 7868 body_bb = split->src; 7869 7870 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 7871 7872 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 7873 if (cont_bb == NULL) 7874 { 7875 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 7876 e->probability = profile_probability::even (); 7877 split->probability = profile_probability::even (); 7878 } 7879 7880 /* Initialize the user's loop vars. */ 7881 gsi = gsi_start_bb (elem_body_bb); 7882 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset, 7883 diff_type); 7884 } 7885 } 7886 7887 /* Loop increment goes into cont_bb. If this is not a loop, we 7888 will have spawned threads as if it was, and each one will 7889 execute one iteration. The specification is not explicit about 7890 whether such constructs are ill-formed or not, and they can 7891 occur, especially when noreturn routines are involved. */ 7892 if (cont_bb) 7893 { 7894 gsi = gsi_last_nondebug_bb (cont_bb); 7895 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 7896 loc = gimple_location (cont_stmt); 7897 7898 if (fd->tiling) 7899 { 7900 /* Insert element loop increment and test. */ 7901 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 7902 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 7903 true, GSI_SAME_STMT); 7904 ass = gimple_build_assign (e_offset, expr); 7905 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7906 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 7907 7908 stmt = gimple_build_cond_empty (expr); 7909 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 7910 split = split_block (cont_bb, stmt); 7911 elem_cont_bb = split->src; 7912 cont_bb = split->dest; 7913 7914 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 7915 split->probability = profile_probability::unlikely ().guessed (); 7916 edge latch_edge 7917 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 7918 latch_edge->probability = profile_probability::likely ().guessed (); 7919 7920 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 7921 skip_edge->probability = profile_probability::unlikely ().guessed (); 7922 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx); 7923 loop_entry_edge->probability 7924 = profile_probability::likely ().guessed (); 7925 7926 gsi = gsi_for_stmt (cont_stmt); 7927 } 7928 7929 /* Increment offset. */ 7930 if (gimple_in_ssa_p (cfun)) 7931 expr = build2 (plus_code, iter_type, offset, 7932 fold_convert (plus_type, step)); 7933 else 7934 expr = build2 (PLUS_EXPR, diff_type, offset, step); 7935 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 7936 true, GSI_SAME_STMT); 7937 ass = gimple_build_assign (offset_incr, expr); 7938 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 7939 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 7940 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 7941 7942 /* Remove the GIMPLE_OMP_CONTINUE. */ 7943 gsi_remove (&gsi, true); 7944 7945 /* Fixup edges from cont_bb. */ 7946 be = BRANCH_EDGE (cont_bb); 7947 fte = FALLTHRU_EDGE (cont_bb); 7948 be->flags |= EDGE_TRUE_VALUE; 7949 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 7950 7951 if (chunking) 7952 { 7953 /* Split the beginning of exit_bb to make bottom_bb. We 7954 need to insert a nop at the start, because splitting is 7955 after a stmt, not before. */ 7956 gsi = gsi_start_bb (exit_bb); 7957 stmt = gimple_build_nop (); 7958 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 7959 split = split_block (exit_bb, stmt); 7960 bottom_bb = split->src; 7961 exit_bb = split->dest; 7962 gsi = gsi_last_bb (bottom_bb); 7963 7964 /* Chunk increment and test goes into bottom_bb. */ 7965 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 7966 build_int_cst (diff_type, 1)); 7967 ass = gimple_build_assign (chunk_no, expr); 7968 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 7969 7970 /* Chunk test at end of bottom_bb. */ 7971 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 7972 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 7973 GSI_CONTINUE_LINKING); 7974 7975 /* Fixup edges from bottom_bb. */ 7976 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 7977 split->probability = profile_probability::unlikely ().guessed (); 7978 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 7979 latch_edge->probability = profile_probability::likely ().guessed (); 7980 } 7981 } 7982 7983 gsi = gsi_last_nondebug_bb (exit_bb); 7984 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7985 loc = gimple_location (gsi_stmt (gsi)); 7986 7987 if (!gimple_in_ssa_p (cfun)) 7988 { 7989 /* Insert the final value of V, in case it is live. This is the 7990 value for the only thread that survives past the join. */ 7991 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 7992 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 7993 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 7994 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 7995 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 7996 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 7997 true, GSI_SAME_STMT); 7998 ass = gimple_build_assign (v, expr); 7999 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 8000 } 8001 8002 /* Remove the OMP_RETURN. */ 8003 gsi_remove (&gsi, true); 8004 8005 if (cont_bb) 8006 { 8007 /* We now have one, two or three nested loops. Update the loop 8008 structures. */ 8009 class loop *parent = entry_bb->loop_father; 8010 class loop *body = body_bb->loop_father; 8011 8012 if (chunking) 8013 { 8014 class loop *chunk_loop = alloc_loop (); 8015 chunk_loop->header = head_bb; 8016 chunk_loop->latch = bottom_bb; 8017 add_loop (chunk_loop, parent); 8018 parent = chunk_loop; 8019 } 8020 else if (parent != body) 8021 { 8022 gcc_assert (body->header == body_bb); 8023 gcc_assert (body->latch == cont_bb 8024 || single_pred (body->latch) == cont_bb); 8025 parent = NULL; 8026 } 8027 8028 if (parent) 8029 { 8030 class loop *body_loop = alloc_loop (); 8031 body_loop->header = body_bb; 8032 body_loop->latch = cont_bb; 8033 add_loop (body_loop, parent); 8034 8035 if (fd->tiling) 8036 { 8037 /* Insert tiling's element loop. */ 8038 class loop *inner_loop = alloc_loop (); 8039 inner_loop->header = elem_body_bb; 8040 inner_loop->latch = elem_cont_bb; 8041 add_loop (inner_loop, body_loop); 8042 } 8043 } 8044 } 8045} 8046 8047/* Expand the OMP loop defined by REGION. */ 8048 8049static void 8050expand_omp_for (struct omp_region *region, gimple *inner_stmt) 8051{ 8052 struct omp_for_data fd; 8053 struct omp_for_data_loop *loops; 8054 8055 loops = XALLOCAVEC (struct omp_for_data_loop, 8056 gimple_omp_for_collapse (last_stmt (region->entry))); 8057 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 8058 &fd, loops); 8059 region->sched_kind = fd.sched_kind; 8060 region->sched_modifiers = fd.sched_modifiers; 8061 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0; 8062 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt)) 8063 { 8064 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++) 8065 if ((loops[i].m1 || loops[i].m2) 8066 && (loops[i].m1 == NULL_TREE 8067 || TREE_CODE (loops[i].m1) == INTEGER_CST) 8068 && (loops[i].m2 == NULL_TREE 8069 || TREE_CODE (loops[i].m2) == INTEGER_CST) 8070 && TREE_CODE (loops[i].step) == INTEGER_CST 8071 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST) 8072 { 8073 tree t; 8074 tree itype = TREE_TYPE (loops[i].v); 8075 if (loops[i].m1 && loops[i].m2) 8076 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1); 8077 else if (loops[i].m1) 8078 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1); 8079 else 8080 t = loops[i].m2; 8081 t = fold_build2 (MULT_EXPR, itype, t, 8082 fold_convert (itype, 8083 loops[i - loops[i].outer].step)); 8084 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR) 8085 t = fold_build2 (TRUNC_MOD_EXPR, itype, 8086 fold_build1 (NEGATE_EXPR, itype, t), 8087 fold_build1 (NEGATE_EXPR, itype, 8088 fold_convert (itype, 8089 loops[i].step))); 8090 else 8091 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, 8092 fold_convert (itype, loops[i].step)); 8093 if (integer_nonzerop (t)) 8094 error_at (gimple_location (fd.for_stmt), 8095 "invalid OpenMP non-rectangular loop step; " 8096 "%<(%E - %E) * %E%> is not a multiple of loop %d " 8097 "step %qE", 8098 loops[i].m2 ? loops[i].m2 : integer_zero_node, 8099 loops[i].m1 ? loops[i].m1 : integer_zero_node, 8100 loops[i - loops[i].outer].step, i + 1, 8101 loops[i].step); 8102 } 8103 } 8104 8105 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 8106 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 8107 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 8108 if (region->cont) 8109 { 8110 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 8111 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 8112 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 8113 } 8114 else 8115 /* If there isn't a continue then this is a degerate case where 8116 the introduction of abnormal edges during lowering will prevent 8117 original loops from being detected. Fix that up. */ 8118 loops_state_set (LOOPS_NEED_FIXUP); 8119 8120 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD) 8121 expand_omp_simd (region, &fd); 8122 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 8123 { 8124 gcc_assert (!inner_stmt && !fd.non_rect); 8125 expand_oacc_for (region, &fd); 8126 } 8127 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 8128 { 8129 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 8130 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 8131 else 8132 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 8133 } 8134 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 8135 && !fd.have_ordered) 8136 { 8137 if (fd.chunk_size == NULL) 8138 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 8139 else 8140 expand_omp_for_static_chunk (region, &fd, inner_stmt); 8141 } 8142 else 8143 { 8144 int fn_index, start_ix, next_ix; 8145 unsigned HOST_WIDE_INT sched = 0; 8146 tree sched_arg = NULL_TREE; 8147 8148 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 8149 == GF_OMP_FOR_KIND_FOR && !fd.non_rect); 8150 if (fd.chunk_size == NULL 8151 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 8152 fd.chunk_size = integer_zero_node; 8153 switch (fd.sched_kind) 8154 { 8155 case OMP_CLAUSE_SCHEDULE_RUNTIME: 8156 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0 8157 && fd.lastprivate_conditional == 0) 8158 { 8159 gcc_assert (!fd.have_ordered); 8160 fn_index = 6; 8161 sched = 4; 8162 } 8163 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 8164 && !fd.have_ordered 8165 && fd.lastprivate_conditional == 0) 8166 fn_index = 7; 8167 else 8168 { 8169 fn_index = 3; 8170 sched = (HOST_WIDE_INT_1U << 31); 8171 } 8172 break; 8173 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 8174 case OMP_CLAUSE_SCHEDULE_GUIDED: 8175 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 8176 && !fd.have_ordered 8177 && fd.lastprivate_conditional == 0) 8178 { 8179 fn_index = 3 + fd.sched_kind; 8180 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 8181 break; 8182 } 8183 fn_index = fd.sched_kind; 8184 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 8185 sched += (HOST_WIDE_INT_1U << 31); 8186 break; 8187 case OMP_CLAUSE_SCHEDULE_STATIC: 8188 gcc_assert (fd.have_ordered); 8189 fn_index = 0; 8190 sched = (HOST_WIDE_INT_1U << 31) + 1; 8191 break; 8192 default: 8193 gcc_unreachable (); 8194 } 8195 if (!fd.ordered) 8196 fn_index += fd.have_ordered * 8; 8197 if (fd.ordered) 8198 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 8199 else 8200 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 8201 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 8202 if (fd.have_reductemp || fd.have_pointer_condtemp) 8203 { 8204 if (fd.ordered) 8205 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; 8206 else if (fd.have_ordered) 8207 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; 8208 else 8209 start_ix = (int)BUILT_IN_GOMP_LOOP_START; 8210 sched_arg = build_int_cstu (long_integer_type_node, sched); 8211 if (!fd.chunk_size) 8212 fd.chunk_size = integer_zero_node; 8213 } 8214 if (fd.iter_type == long_long_unsigned_type_node) 8215 { 8216 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 8217 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 8218 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 8219 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 8220 } 8221 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 8222 (enum built_in_function) next_ix, sched_arg, 8223 inner_stmt); 8224 } 8225 8226 if (gimple_in_ssa_p (cfun)) 8227 update_ssa (TODO_update_ssa_only_virtuals); 8228} 8229 8230/* Expand code for an OpenMP sections directive. In pseudo code, we generate 8231 8232 v = GOMP_sections_start (n); 8233 L0: 8234 switch (v) 8235 { 8236 case 0: 8237 goto L2; 8238 case 1: 8239 section 1; 8240 goto L1; 8241 case 2: 8242 ... 8243 case n: 8244 ... 8245 default: 8246 abort (); 8247 } 8248 L1: 8249 v = GOMP_sections_next (); 8250 goto L0; 8251 L2: 8252 reduction; 8253 8254 If this is a combined parallel sections, replace the call to 8255 GOMP_sections_start with call to GOMP_sections_next. */ 8256 8257static void 8258expand_omp_sections (struct omp_region *region) 8259{ 8260 tree t, u, vin = NULL, vmain, vnext, l2; 8261 unsigned len; 8262 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 8263 gimple_stmt_iterator si, switch_si; 8264 gomp_sections *sections_stmt; 8265 gimple *stmt; 8266 gomp_continue *cont; 8267 edge_iterator ei; 8268 edge e; 8269 struct omp_region *inner; 8270 unsigned i, casei; 8271 bool exit_reachable = region->cont != NULL; 8272 8273 gcc_assert (region->exit != NULL); 8274 entry_bb = region->entry; 8275 l0_bb = single_succ (entry_bb); 8276 l1_bb = region->cont; 8277 l2_bb = region->exit; 8278 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 8279 l2 = gimple_block_label (l2_bb); 8280 else 8281 { 8282 /* This can happen if there are reductions. */ 8283 len = EDGE_COUNT (l0_bb->succs); 8284 gcc_assert (len > 0); 8285 e = EDGE_SUCC (l0_bb, len - 1); 8286 si = gsi_last_nondebug_bb (e->dest); 8287 l2 = NULL_TREE; 8288 if (gsi_end_p (si) 8289 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 8290 l2 = gimple_block_label (e->dest); 8291 else 8292 FOR_EACH_EDGE (e, ei, l0_bb->succs) 8293 { 8294 si = gsi_last_nondebug_bb (e->dest); 8295 if (gsi_end_p (si) 8296 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 8297 { 8298 l2 = gimple_block_label (e->dest); 8299 break; 8300 } 8301 } 8302 } 8303 if (exit_reachable) 8304 default_bb = create_empty_bb (l1_bb->prev_bb); 8305 else 8306 default_bb = create_empty_bb (l0_bb); 8307 8308 /* We will build a switch() with enough cases for all the 8309 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 8310 and a default case to abort if something goes wrong. */ 8311 len = EDGE_COUNT (l0_bb->succs); 8312 8313 /* Use vec::quick_push on label_vec throughout, since we know the size 8314 in advance. */ 8315 auto_vec<tree> label_vec (len); 8316 8317 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 8318 GIMPLE_OMP_SECTIONS statement. */ 8319 si = gsi_last_nondebug_bb (entry_bb); 8320 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 8321 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 8322 vin = gimple_omp_sections_control (sections_stmt); 8323 tree clauses = gimple_omp_sections_clauses (sections_stmt); 8324 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 8325 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 8326 tree cond_var = NULL_TREE; 8327 if (reductmp || condtmp) 8328 { 8329 tree reductions = null_pointer_node, mem = null_pointer_node; 8330 tree memv = NULL_TREE, condtemp = NULL_TREE; 8331 gimple_stmt_iterator gsi = gsi_none (); 8332 gimple *g = NULL; 8333 if (reductmp) 8334 { 8335 reductions = OMP_CLAUSE_DECL (reductmp); 8336 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 8337 g = SSA_NAME_DEF_STMT (reductions); 8338 reductions = gimple_assign_rhs1 (g); 8339 OMP_CLAUSE_DECL (reductmp) = reductions; 8340 gsi = gsi_for_stmt (g); 8341 } 8342 else 8343 gsi = si; 8344 if (condtmp) 8345 { 8346 condtemp = OMP_CLAUSE_DECL (condtmp); 8347 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp), 8348 OMP_CLAUSE__CONDTEMP_); 8349 cond_var = OMP_CLAUSE_DECL (c); 8350 tree type = TREE_TYPE (condtemp); 8351 memv = create_tmp_var (type); 8352 TREE_ADDRESSABLE (memv) = 1; 8353 unsigned cnt = 0; 8354 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 8355 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE 8356 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) 8357 ++cnt; 8358 unsigned HOST_WIDE_INT sz 8359 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt; 8360 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 8361 false); 8362 mem = build_fold_addr_expr (memv); 8363 } 8364 t = build_int_cst (unsigned_type_node, len - 1); 8365 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); 8366 stmt = gimple_build_call (u, 3, t, reductions, mem); 8367 gimple_call_set_lhs (stmt, vin); 8368 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 8369 if (condtmp) 8370 { 8371 expand_omp_build_assign (&gsi, condtemp, memv, false); 8372 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 8373 vin, build_one_cst (TREE_TYPE (cond_var))); 8374 expand_omp_build_assign (&gsi, cond_var, t, false); 8375 } 8376 if (reductmp) 8377 { 8378 gsi_remove (&gsi, true); 8379 release_ssa_name (gimple_assign_lhs (g)); 8380 } 8381 } 8382 else if (!is_combined_parallel (region)) 8383 { 8384 /* If we are not inside a combined parallel+sections region, 8385 call GOMP_sections_start. */ 8386 t = build_int_cst (unsigned_type_node, len - 1); 8387 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 8388 stmt = gimple_build_call (u, 1, t); 8389 } 8390 else 8391 { 8392 /* Otherwise, call GOMP_sections_next. */ 8393 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 8394 stmt = gimple_build_call (u, 0); 8395 } 8396 if (!reductmp && !condtmp) 8397 { 8398 gimple_call_set_lhs (stmt, vin); 8399 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 8400 } 8401 gsi_remove (&si, true); 8402 8403 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 8404 L0_BB. */ 8405 switch_si = gsi_last_nondebug_bb (l0_bb); 8406 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 8407 if (exit_reachable) 8408 { 8409 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 8410 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 8411 vmain = gimple_omp_continue_control_use (cont); 8412 vnext = gimple_omp_continue_control_def (cont); 8413 } 8414 else 8415 { 8416 vmain = vin; 8417 vnext = NULL_TREE; 8418 } 8419 8420 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 8421 label_vec.quick_push (t); 8422 i = 1; 8423 8424 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 8425 for (inner = region->inner, casei = 1; 8426 inner; 8427 inner = inner->next, i++, casei++) 8428 { 8429 basic_block s_entry_bb, s_exit_bb; 8430 8431 /* Skip optional reduction region. */ 8432 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 8433 { 8434 --i; 8435 --casei; 8436 continue; 8437 } 8438 8439 s_entry_bb = inner->entry; 8440 s_exit_bb = inner->exit; 8441 8442 t = gimple_block_label (s_entry_bb); 8443 u = build_int_cst (unsigned_type_node, casei); 8444 u = build_case_label (u, NULL, t); 8445 label_vec.quick_push (u); 8446 8447 si = gsi_last_nondebug_bb (s_entry_bb); 8448 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 8449 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 8450 gsi_remove (&si, true); 8451 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 8452 8453 if (s_exit_bb == NULL) 8454 continue; 8455 8456 si = gsi_last_nondebug_bb (s_exit_bb); 8457 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 8458 gsi_remove (&si, true); 8459 8460 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 8461 } 8462 8463 /* Error handling code goes in DEFAULT_BB. */ 8464 t = gimple_block_label (default_bb); 8465 u = build_case_label (NULL, NULL, t); 8466 make_edge (l0_bb, default_bb, 0); 8467 add_bb_to_loop (default_bb, current_loops->tree_root); 8468 8469 stmt = gimple_build_switch (vmain, u, label_vec); 8470 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 8471 gsi_remove (&switch_si, true); 8472 8473 si = gsi_start_bb (default_bb); 8474 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 8475 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 8476 8477 if (exit_reachable) 8478 { 8479 tree bfn_decl; 8480 8481 /* Code to get the next section goes in L1_BB. */ 8482 si = gsi_last_nondebug_bb (l1_bb); 8483 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 8484 8485 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 8486 stmt = gimple_build_call (bfn_decl, 0); 8487 gimple_call_set_lhs (stmt, vnext); 8488 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 8489 if (cond_var) 8490 { 8491 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 8492 vnext, build_one_cst (TREE_TYPE (cond_var))); 8493 expand_omp_build_assign (&si, cond_var, t, false); 8494 } 8495 gsi_remove (&si, true); 8496 8497 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 8498 } 8499 8500 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 8501 si = gsi_last_nondebug_bb (l2_bb); 8502 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 8503 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 8504 else if (gimple_omp_return_lhs (gsi_stmt (si))) 8505 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 8506 else 8507 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 8508 stmt = gimple_build_call (t, 0); 8509 if (gimple_omp_return_lhs (gsi_stmt (si))) 8510 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 8511 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 8512 gsi_remove (&si, true); 8513 8514 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 8515} 8516 8517/* Expand code for an OpenMP single or scope directive. We've already expanded 8518 much of the code, here we simply place the GOMP_barrier call. */ 8519 8520static void 8521expand_omp_single (struct omp_region *region) 8522{ 8523 basic_block entry_bb, exit_bb; 8524 gimple_stmt_iterator si; 8525 8526 entry_bb = region->entry; 8527 exit_bb = region->exit; 8528 8529 si = gsi_last_nondebug_bb (entry_bb); 8530 enum gimple_code code = gimple_code (gsi_stmt (si)); 8531 gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE); 8532 gsi_remove (&si, true); 8533 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 8534 8535 if (exit_bb == NULL) 8536 { 8537 gcc_assert (code == GIMPLE_OMP_SCOPE); 8538 return; 8539 } 8540 8541 si = gsi_last_nondebug_bb (exit_bb); 8542 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 8543 { 8544 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 8545 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 8546 } 8547 gsi_remove (&si, true); 8548 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 8549} 8550 8551/* Generic expansion for OpenMP synchronization directives: master, 8552 ordered and critical. All we need to do here is remove the entry 8553 and exit markers for REGION. */ 8554 8555static void 8556expand_omp_synch (struct omp_region *region) 8557{ 8558 basic_block entry_bb, exit_bb; 8559 gimple_stmt_iterator si; 8560 8561 entry_bb = region->entry; 8562 exit_bb = region->exit; 8563 8564 si = gsi_last_nondebug_bb (entry_bb); 8565 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 8566 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 8567 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED 8568 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 8569 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 8570 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 8571 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 8572 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS 8573 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si)))) 8574 { 8575 expand_omp_taskreg (region); 8576 return; 8577 } 8578 gsi_remove (&si, true); 8579 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 8580 8581 if (exit_bb) 8582 { 8583 si = gsi_last_nondebug_bb (exit_bb); 8584 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 8585 gsi_remove (&si, true); 8586 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 8587 } 8588} 8589 8590/* Translate enum omp_memory_order to enum memmodel for the embedded 8591 fail clause in there. */ 8592 8593static enum memmodel 8594omp_memory_order_to_fail_memmodel (enum omp_memory_order mo) 8595{ 8596 switch (mo & OMP_FAIL_MEMORY_ORDER_MASK) 8597 { 8598 case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED: 8599 switch (mo & OMP_MEMORY_ORDER_MASK) 8600 { 8601 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; 8602 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; 8603 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED; 8604 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE; 8605 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; 8606 default: break; 8607 } 8608 gcc_unreachable (); 8609 case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; 8610 case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; 8611 case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; 8612 default: gcc_unreachable (); 8613 } 8614} 8615 8616/* Translate enum omp_memory_order to enum memmodel. The two enums 8617 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED 8618 is 0 and omp_memory_order has the fail mode encoded in it too. */ 8619 8620static enum memmodel 8621omp_memory_order_to_memmodel (enum omp_memory_order mo) 8622{ 8623 enum memmodel ret, fail_ret; 8624 switch (mo & OMP_MEMORY_ORDER_MASK) 8625 { 8626 case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break; 8627 case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break; 8628 case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break; 8629 case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break; 8630 case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break; 8631 default: gcc_unreachable (); 8632 } 8633 /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2, 8634 we can just return ret here unconditionally. Otherwise, work around 8635 it here and make sure fail memmodel is not stronger. */ 8636 if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED) 8637 return ret; 8638 fail_ret = omp_memory_order_to_fail_memmodel (mo); 8639 if (fail_ret > ret) 8640 return fail_ret; 8641 return ret; 8642} 8643 8644/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 8645 operation as a normal volatile load. */ 8646 8647static bool 8648expand_omp_atomic_load (basic_block load_bb, tree addr, 8649 tree loaded_val, int index) 8650{ 8651 enum built_in_function tmpbase; 8652 gimple_stmt_iterator gsi; 8653 basic_block store_bb; 8654 location_t loc; 8655 gimple *stmt; 8656 tree decl, call, type, itype; 8657 8658 gsi = gsi_last_nondebug_bb (load_bb); 8659 stmt = gsi_stmt (gsi); 8660 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 8661 loc = gimple_location (stmt); 8662 8663 /* ??? If the target does not implement atomic_load_optab[mode], and mode 8664 is smaller than word size, then expand_atomic_load assumes that the load 8665 is atomic. We could avoid the builtin entirely in this case. */ 8666 8667 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 8668 decl = builtin_decl_explicit (tmpbase); 8669 if (decl == NULL_TREE) 8670 return false; 8671 8672 type = TREE_TYPE (loaded_val); 8673 itype = TREE_TYPE (TREE_TYPE (decl)); 8674 8675 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 8676 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 8677 call = build_call_expr_loc (loc, decl, 2, addr, mo); 8678 if (!useless_type_conversion_p (type, itype)) 8679 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 8680 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 8681 8682 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 8683 gsi_remove (&gsi, true); 8684 8685 store_bb = single_succ (load_bb); 8686 gsi = gsi_last_nondebug_bb (store_bb); 8687 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 8688 gsi_remove (&gsi, true); 8689 8690 if (gimple_in_ssa_p (cfun)) 8691 update_ssa (TODO_update_ssa_no_phi); 8692 8693 return true; 8694} 8695 8696/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 8697 operation as a normal volatile store. */ 8698 8699static bool 8700expand_omp_atomic_store (basic_block load_bb, tree addr, 8701 tree loaded_val, tree stored_val, int index) 8702{ 8703 enum built_in_function tmpbase; 8704 gimple_stmt_iterator gsi; 8705 basic_block store_bb = single_succ (load_bb); 8706 location_t loc; 8707 gimple *stmt; 8708 tree decl, call, type, itype; 8709 machine_mode imode; 8710 bool exchange; 8711 8712 gsi = gsi_last_nondebug_bb (load_bb); 8713 stmt = gsi_stmt (gsi); 8714 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 8715 8716 /* If the load value is needed, then this isn't a store but an exchange. */ 8717 exchange = gimple_omp_atomic_need_value_p (stmt); 8718 8719 gsi = gsi_last_nondebug_bb (store_bb); 8720 stmt = gsi_stmt (gsi); 8721 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 8722 loc = gimple_location (stmt); 8723 8724 /* ??? If the target does not implement atomic_store_optab[mode], and mode 8725 is smaller than word size, then expand_atomic_store assumes that the store 8726 is atomic. We could avoid the builtin entirely in this case. */ 8727 8728 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 8729 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 8730 decl = builtin_decl_explicit (tmpbase); 8731 if (decl == NULL_TREE) 8732 return false; 8733 8734 type = TREE_TYPE (stored_val); 8735 8736 /* Dig out the type of the function's second argument. */ 8737 itype = TREE_TYPE (decl); 8738 itype = TYPE_ARG_TYPES (itype); 8739 itype = TREE_CHAIN (itype); 8740 itype = TREE_VALUE (itype); 8741 imode = TYPE_MODE (itype); 8742 8743 if (exchange && !can_atomic_exchange_p (imode, true)) 8744 return false; 8745 8746 if (!useless_type_conversion_p (itype, type)) 8747 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 8748 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 8749 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 8750 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo); 8751 if (exchange) 8752 { 8753 if (!useless_type_conversion_p (type, itype)) 8754 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 8755 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 8756 } 8757 8758 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 8759 gsi_remove (&gsi, true); 8760 8761 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 8762 gsi = gsi_last_nondebug_bb (load_bb); 8763 gsi_remove (&gsi, true); 8764 8765 if (gimple_in_ssa_p (cfun)) 8766 update_ssa (TODO_update_ssa_no_phi); 8767 8768 return true; 8769} 8770 8771/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 8772 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 8773 size of the data type, and thus usable to find the index of the builtin 8774 decl. Returns false if the expression is not of the proper form. */ 8775 8776static bool 8777expand_omp_atomic_fetch_op (basic_block load_bb, 8778 tree addr, tree loaded_val, 8779 tree stored_val, int index) 8780{ 8781 enum built_in_function oldbase, newbase, tmpbase; 8782 tree decl, itype, call; 8783 tree lhs, rhs; 8784 basic_block store_bb = single_succ (load_bb); 8785 gimple_stmt_iterator gsi; 8786 gimple *stmt; 8787 location_t loc; 8788 enum tree_code code; 8789 bool need_old, need_new; 8790 machine_mode imode; 8791 8792 /* We expect to find the following sequences: 8793 8794 load_bb: 8795 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 8796 8797 store_bb: 8798 val = tmp OP something; (or: something OP tmp) 8799 GIMPLE_OMP_STORE (val) 8800 8801 ???FIXME: Allow a more flexible sequence. 8802 Perhaps use data flow to pick the statements. 8803 8804 */ 8805 8806 gsi = gsi_after_labels (store_bb); 8807 stmt = gsi_stmt (gsi); 8808 if (is_gimple_debug (stmt)) 8809 { 8810 gsi_next_nondebug (&gsi); 8811 if (gsi_end_p (gsi)) 8812 return false; 8813 stmt = gsi_stmt (gsi); 8814 } 8815 loc = gimple_location (stmt); 8816 if (!is_gimple_assign (stmt)) 8817 return false; 8818 gsi_next_nondebug (&gsi); 8819 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 8820 return false; 8821 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 8822 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 8823 enum omp_memory_order omo 8824 = gimple_omp_atomic_memory_order (last_stmt (load_bb)); 8825 enum memmodel mo = omp_memory_order_to_memmodel (omo); 8826 gcc_checking_assert (!need_old || !need_new); 8827 8828 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 8829 return false; 8830 8831 /* Check for one of the supported fetch-op operations. */ 8832 code = gimple_assign_rhs_code (stmt); 8833 switch (code) 8834 { 8835 case PLUS_EXPR: 8836 case POINTER_PLUS_EXPR: 8837 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 8838 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 8839 break; 8840 case MINUS_EXPR: 8841 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 8842 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 8843 break; 8844 case BIT_AND_EXPR: 8845 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 8846 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 8847 break; 8848 case BIT_IOR_EXPR: 8849 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 8850 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 8851 break; 8852 case BIT_XOR_EXPR: 8853 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 8854 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 8855 break; 8856 default: 8857 return false; 8858 } 8859 8860 /* Make sure the expression is of the proper form. */ 8861 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 8862 rhs = gimple_assign_rhs2 (stmt); 8863 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 8864 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 8865 rhs = gimple_assign_rhs1 (stmt); 8866 else 8867 return false; 8868 8869 tmpbase = ((enum built_in_function) 8870 ((need_new ? newbase : oldbase) + index + 1)); 8871 decl = builtin_decl_explicit (tmpbase); 8872 if (decl == NULL_TREE) 8873 return false; 8874 itype = TREE_TYPE (TREE_TYPE (decl)); 8875 imode = TYPE_MODE (itype); 8876 8877 /* We could test all of the various optabs involved, but the fact of the 8878 matter is that (with the exception of i486 vs i586 and xadd) all targets 8879 that support any atomic operaton optab also implements compare-and-swap. 8880 Let optabs.cc take care of expanding any compare-and-swap loop. */ 8881 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 8882 return false; 8883 8884 gsi = gsi_last_nondebug_bb (load_bb); 8885 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 8886 8887 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 8888 It only requires that the operation happen atomically. Thus we can 8889 use the RELAXED memory model. */ 8890 call = build_call_expr_loc (loc, decl, 3, addr, 8891 fold_convert_loc (loc, itype, rhs), 8892 build_int_cst (NULL, mo)); 8893 8894 if (need_old || need_new) 8895 { 8896 lhs = need_old ? loaded_val : stored_val; 8897 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 8898 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 8899 } 8900 else 8901 call = fold_convert_loc (loc, void_type_node, call); 8902 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 8903 gsi_remove (&gsi, true); 8904 8905 gsi = gsi_last_nondebug_bb (store_bb); 8906 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 8907 gsi_remove (&gsi, true); 8908 gsi = gsi_last_nondebug_bb (store_bb); 8909 stmt = gsi_stmt (gsi); 8910 gsi_remove (&gsi, true); 8911 8912 if (gimple_in_ssa_p (cfun)) 8913 { 8914 release_defs (stmt); 8915 update_ssa (TODO_update_ssa_no_phi); 8916 } 8917 8918 return true; 8919} 8920 8921/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 8922 compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function. 8923 Returns false if the expression is not of the proper form. */ 8924 8925static bool 8926expand_omp_atomic_cas (basic_block load_bb, tree addr, 8927 tree loaded_val, tree stored_val, int index) 8928{ 8929 /* We expect to find the following sequences: 8930 8931 load_bb: 8932 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 8933 8934 store_bb: 8935 val = tmp == e ? d : tmp; 8936 GIMPLE_OMP_ATOMIC_STORE (val) 8937 8938 or in store_bb instead: 8939 tmp2 = tmp == e; 8940 val = tmp2 ? d : tmp; 8941 GIMPLE_OMP_ATOMIC_STORE (val) 8942 8943 or: 8944 tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp); 8945 val = e == tmp3 ? d : tmp; 8946 GIMPLE_OMP_ATOMIC_STORE (val) 8947 8948 etc. */ 8949 8950 8951 basic_block store_bb = single_succ (load_bb); 8952 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb); 8953 gimple *store_stmt = gsi_stmt (gsi); 8954 if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE) 8955 return false; 8956 gsi_prev_nondebug (&gsi); 8957 if (gsi_end_p (gsi)) 8958 return false; 8959 gimple *condexpr_stmt = gsi_stmt (gsi); 8960 if (!is_gimple_assign (condexpr_stmt) 8961 || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR) 8962 return false; 8963 if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0)) 8964 return false; 8965 gimple *cond_stmt = NULL; 8966 gimple *vce_stmt = NULL; 8967 gsi_prev_nondebug (&gsi); 8968 if (!gsi_end_p (gsi)) 8969 { 8970 cond_stmt = gsi_stmt (gsi); 8971 if (!is_gimple_assign (cond_stmt)) 8972 return false; 8973 if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR) 8974 { 8975 gsi_prev_nondebug (&gsi); 8976 if (!gsi_end_p (gsi)) 8977 { 8978 vce_stmt = gsi_stmt (gsi); 8979 if (!is_gimple_assign (vce_stmt) 8980 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR) 8981 return false; 8982 } 8983 } 8984 else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR) 8985 std::swap (vce_stmt, cond_stmt); 8986 else 8987 return false; 8988 if (vce_stmt) 8989 { 8990 tree vce_rhs = gimple_assign_rhs1 (vce_stmt); 8991 if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR 8992 || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val)) 8993 return false; 8994 if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs)) 8995 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val)) 8996 || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)), 8997 TYPE_SIZE (TREE_TYPE (loaded_val)))) 8998 return false; 8999 gsi_prev_nondebug (&gsi); 9000 if (!gsi_end_p (gsi)) 9001 return false; 9002 } 9003 } 9004 tree cond = gimple_assign_rhs1 (condexpr_stmt); 9005 tree cond_op1, cond_op2; 9006 if (cond_stmt) 9007 { 9008 if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt))) 9009 return false; 9010 cond_op1 = gimple_assign_rhs1 (cond_stmt); 9011 cond_op2 = gimple_assign_rhs2 (cond_stmt); 9012 } 9013 else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR) 9014 return false; 9015 else 9016 { 9017 cond_op1 = TREE_OPERAND (cond, 0); 9018 cond_op2 = TREE_OPERAND (cond, 1); 9019 } 9020 tree d; 9021 if (TREE_CODE (cond) == NE_EXPR) 9022 { 9023 if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val)) 9024 return false; 9025 d = gimple_assign_rhs3 (condexpr_stmt); 9026 } 9027 else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val)) 9028 return false; 9029 else 9030 d = gimple_assign_rhs2 (condexpr_stmt); 9031 tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val; 9032 if (operand_equal_p (e, cond_op1)) 9033 e = cond_op2; 9034 else if (operand_equal_p (e, cond_op2)) 9035 e = cond_op1; 9036 else 9037 return false; 9038 9039 location_t loc = gimple_location (store_stmt); 9040 gimple *load_stmt = last_stmt (load_bb); 9041 bool need_new = gimple_omp_atomic_need_value_p (store_stmt); 9042 bool need_old = gimple_omp_atomic_need_value_p (load_stmt); 9043 bool weak = gimple_omp_atomic_weak_p (load_stmt); 9044 enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt); 9045 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 9046 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo)); 9047 gcc_checking_assert (!need_old || !need_new); 9048 9049 enum built_in_function fncode 9050 = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 9051 + index + 1); 9052 tree cmpxchg = builtin_decl_explicit (fncode); 9053 if (cmpxchg == NULL_TREE) 9054 return false; 9055 tree itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 9056 9057 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 9058 || !can_atomic_load_p (TYPE_MODE (itype))) 9059 return false; 9060 9061 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 9062 if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt) 9063 return false; 9064 9065 gsi = gsi_for_stmt (store_stmt); 9066 if (!useless_type_conversion_p (itype, TREE_TYPE (e))) 9067 { 9068 tree ne = create_tmp_reg (itype); 9069 gimple *g = gimple_build_assign (ne, NOP_EXPR, e); 9070 gimple_set_location (g, loc); 9071 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9072 e = ne; 9073 } 9074 if (!useless_type_conversion_p (itype, TREE_TYPE (d))) 9075 { 9076 tree nd = create_tmp_reg (itype); 9077 enum tree_code code; 9078 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d))) 9079 { 9080 code = VIEW_CONVERT_EXPR; 9081 d = build1 (VIEW_CONVERT_EXPR, itype, d); 9082 } 9083 else 9084 code = NOP_EXPR; 9085 gimple *g = gimple_build_assign (nd, code, d); 9086 gimple_set_location (g, loc); 9087 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9088 d = nd; 9089 } 9090 9091 tree ctype = build_complex_type (itype); 9092 int flag = int_size_in_bytes (itype) + (weak ? 256 : 0); 9093 gimple *g 9094 = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d, 9095 build_int_cst (integer_type_node, flag), 9096 mo, fmo); 9097 tree cres = create_tmp_reg (ctype); 9098 gimple_call_set_lhs (g, cres); 9099 gimple_set_location (g, loc); 9100 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9101 9102 if (cond_stmt || need_old || need_new) 9103 { 9104 tree im = create_tmp_reg (itype); 9105 g = gimple_build_assign (im, IMAGPART_EXPR, 9106 build1 (IMAGPART_EXPR, itype, cres)); 9107 gimple_set_location (g, loc); 9108 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9109 9110 tree re = NULL_TREE; 9111 if (need_old || need_new) 9112 { 9113 re = create_tmp_reg (itype); 9114 g = gimple_build_assign (re, REALPART_EXPR, 9115 build1 (REALPART_EXPR, itype, cres)); 9116 gimple_set_location (g, loc); 9117 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9118 } 9119 9120 if (cond_stmt) 9121 { 9122 g = gimple_build_assign (gimple_assign_lhs (cond_stmt), 9123 NOP_EXPR, im); 9124 gimple_set_location (g, loc); 9125 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9126 } 9127 else if (need_new) 9128 { 9129 g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR, 9130 build2 (NE_EXPR, boolean_type_node, 9131 im, build_zero_cst (itype)), 9132 d, re); 9133 gimple_set_location (g, loc); 9134 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9135 re = gimple_assign_lhs (g); 9136 } 9137 9138 if (need_old || need_new) 9139 { 9140 tree v = need_old ? loaded_val : stored_val; 9141 enum tree_code code; 9142 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v))) 9143 { 9144 code = VIEW_CONVERT_EXPR; 9145 re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re); 9146 } 9147 else if (!useless_type_conversion_p (TREE_TYPE (v), itype)) 9148 code = NOP_EXPR; 9149 else 9150 code = TREE_CODE (re); 9151 g = gimple_build_assign (v, code, re); 9152 gimple_set_location (g, loc); 9153 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 9154 } 9155 } 9156 9157 gsi_remove (&gsi, true); 9158 gsi = gsi_for_stmt (load_stmt); 9159 gsi_remove (&gsi, true); 9160 gsi = gsi_for_stmt (condexpr_stmt); 9161 gsi_remove (&gsi, true); 9162 if (cond_stmt) 9163 { 9164 gsi = gsi_for_stmt (cond_stmt); 9165 gsi_remove (&gsi, true); 9166 } 9167 if (vce_stmt) 9168 { 9169 gsi = gsi_for_stmt (vce_stmt); 9170 gsi_remove (&gsi, true); 9171 } 9172 9173 return true; 9174} 9175 9176/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 9177 9178 oldval = *addr; 9179 repeat: 9180 newval = rhs; // with oldval replacing *addr in rhs 9181 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 9182 if (oldval != newval) 9183 goto repeat; 9184 9185 INDEX is log2 of the size of the data type, and thus usable to find the 9186 index of the builtin decl. */ 9187 9188static bool 9189expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 9190 tree addr, tree loaded_val, tree stored_val, 9191 int index) 9192{ 9193 tree loadedi, storedi, initial, new_storedi, old_vali; 9194 tree type, itype, cmpxchg, iaddr, atype; 9195 gimple_stmt_iterator si; 9196 basic_block loop_header = single_succ (load_bb); 9197 gimple *phi, *stmt; 9198 edge e; 9199 enum built_in_function fncode; 9200 9201 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 9202 + index + 1); 9203 cmpxchg = builtin_decl_explicit (fncode); 9204 if (cmpxchg == NULL_TREE) 9205 return false; 9206 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 9207 atype = type; 9208 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 9209 9210 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 9211 || !can_atomic_load_p (TYPE_MODE (itype))) 9212 return false; 9213 9214 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 9215 si = gsi_last_nondebug_bb (load_bb); 9216 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 9217 location_t loc = gimple_location (gsi_stmt (si)); 9218 enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si)); 9219 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 9220 tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo)); 9221 9222 /* For floating-point values, we'll need to view-convert them to integers 9223 so that we can perform the atomic compare and swap. Simplify the 9224 following code by always setting up the "i"ntegral variables. */ 9225 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 9226 { 9227 tree iaddr_val; 9228 9229 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 9230 true)); 9231 atype = itype; 9232 iaddr_val 9233 = force_gimple_operand_gsi (&si, 9234 fold_convert (TREE_TYPE (iaddr), addr), 9235 false, NULL_TREE, true, GSI_SAME_STMT); 9236 stmt = gimple_build_assign (iaddr, iaddr_val); 9237 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9238 loadedi = create_tmp_var (itype); 9239 if (gimple_in_ssa_p (cfun)) 9240 loadedi = make_ssa_name (loadedi); 9241 } 9242 else 9243 { 9244 iaddr = addr; 9245 loadedi = loaded_val; 9246 } 9247 9248 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 9249 tree loaddecl = builtin_decl_explicit (fncode); 9250 if (loaddecl) 9251 initial 9252 = fold_convert (atype, 9253 build_call_expr (loaddecl, 2, iaddr, 9254 build_int_cst (NULL_TREE, 9255 MEMMODEL_RELAXED))); 9256 else 9257 { 9258 tree off 9259 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode, 9260 true), 0); 9261 initial = build2 (MEM_REF, atype, iaddr, off); 9262 } 9263 9264 initial 9265 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 9266 GSI_SAME_STMT); 9267 9268 /* Move the value to the LOADEDI temporary. */ 9269 if (gimple_in_ssa_p (cfun)) 9270 { 9271 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 9272 phi = create_phi_node (loadedi, loop_header); 9273 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 9274 initial); 9275 } 9276 else 9277 gsi_insert_before (&si, 9278 gimple_build_assign (loadedi, initial), 9279 GSI_SAME_STMT); 9280 if (loadedi != loaded_val) 9281 { 9282 gimple_stmt_iterator gsi2; 9283 tree x; 9284 9285 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 9286 gsi2 = gsi_start_bb (loop_header); 9287 if (gimple_in_ssa_p (cfun)) 9288 { 9289 gassign *stmt; 9290 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 9291 true, GSI_SAME_STMT); 9292 stmt = gimple_build_assign (loaded_val, x); 9293 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 9294 } 9295 else 9296 { 9297 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 9298 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 9299 true, GSI_SAME_STMT); 9300 } 9301 } 9302 gsi_remove (&si, true); 9303 9304 si = gsi_last_nondebug_bb (store_bb); 9305 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 9306 9307 if (iaddr == addr) 9308 storedi = stored_val; 9309 else 9310 storedi 9311 = force_gimple_operand_gsi (&si, 9312 build1 (VIEW_CONVERT_EXPR, itype, 9313 stored_val), true, NULL_TREE, true, 9314 GSI_SAME_STMT); 9315 9316 /* Build the compare&swap statement. */ 9317 tree ctype = build_complex_type (itype); 9318 int flag = int_size_in_bytes (itype); 9319 new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE, 9320 ctype, 6, iaddr, loadedi, 9321 storedi, 9322 build_int_cst (integer_type_node, 9323 flag), 9324 mo, fmo); 9325 new_storedi = build1 (REALPART_EXPR, itype, new_storedi); 9326 new_storedi = force_gimple_operand_gsi (&si, 9327 fold_convert (TREE_TYPE (loadedi), 9328 new_storedi), 9329 true, NULL_TREE, 9330 true, GSI_SAME_STMT); 9331 9332 if (gimple_in_ssa_p (cfun)) 9333 old_vali = loadedi; 9334 else 9335 { 9336 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 9337 stmt = gimple_build_assign (old_vali, loadedi); 9338 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9339 9340 stmt = gimple_build_assign (loadedi, new_storedi); 9341 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9342 } 9343 9344 /* Note that we always perform the comparison as an integer, even for 9345 floating point. This allows the atomic operation to properly 9346 succeed even with NaNs and -0.0. */ 9347 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 9348 stmt = gimple_build_cond_empty (ne); 9349 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9350 9351 /* Update cfg. */ 9352 e = single_succ_edge (store_bb); 9353 e->flags &= ~EDGE_FALLTHRU; 9354 e->flags |= EDGE_FALSE_VALUE; 9355 /* Expect no looping. */ 9356 e->probability = profile_probability::guessed_always (); 9357 9358 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 9359 e->probability = profile_probability::guessed_never (); 9360 9361 /* Copy the new value to loadedi (we already did that before the condition 9362 if we are not in SSA). */ 9363 if (gimple_in_ssa_p (cfun)) 9364 { 9365 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 9366 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 9367 } 9368 9369 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 9370 gsi_remove (&si, true); 9371 9372 class loop *loop = alloc_loop (); 9373 loop->header = loop_header; 9374 loop->latch = store_bb; 9375 add_loop (loop, loop_header->loop_father); 9376 9377 if (gimple_in_ssa_p (cfun)) 9378 update_ssa (TODO_update_ssa_no_phi); 9379 9380 return true; 9381} 9382 9383/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 9384 9385 GOMP_atomic_start (); 9386 *addr = rhs; 9387 GOMP_atomic_end (); 9388 9389 The result is not globally atomic, but works so long as all parallel 9390 references are within #pragma omp atomic directives. According to 9391 responses received from omp@openmp.org, appears to be within spec. 9392 Which makes sense, since that's how several other compilers handle 9393 this situation as well. 9394 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 9395 expanding. STORED_VAL is the operand of the matching 9396 GIMPLE_OMP_ATOMIC_STORE. 9397 9398 We replace 9399 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 9400 loaded_val = *addr; 9401 9402 and replace 9403 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 9404 *addr = stored_val; 9405*/ 9406 9407static bool 9408expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 9409 tree addr, tree loaded_val, tree stored_val) 9410{ 9411 gimple_stmt_iterator si; 9412 gassign *stmt; 9413 tree t; 9414 9415 si = gsi_last_nondebug_bb (load_bb); 9416 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 9417 9418 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 9419 t = build_call_expr (t, 0); 9420 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 9421 9422 tree mem = build_simple_mem_ref (addr); 9423 TREE_TYPE (mem) = TREE_TYPE (loaded_val); 9424 TREE_OPERAND (mem, 1) 9425 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode, 9426 true), 9427 TREE_OPERAND (mem, 1)); 9428 stmt = gimple_build_assign (loaded_val, mem); 9429 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9430 gsi_remove (&si, true); 9431 9432 si = gsi_last_nondebug_bb (store_bb); 9433 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 9434 9435 stmt = gimple_build_assign (unshare_expr (mem), stored_val); 9436 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 9437 9438 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 9439 t = build_call_expr (t, 0); 9440 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 9441 gsi_remove (&si, true); 9442 9443 if (gimple_in_ssa_p (cfun)) 9444 update_ssa (TODO_update_ssa_no_phi); 9445 return true; 9446} 9447 9448/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 9449 using expand_omp_atomic_fetch_op. If it failed, we try to 9450 call expand_omp_atomic_pipeline, and if it fails too, the 9451 ultimate fallback is wrapping the operation in a mutex 9452 (expand_omp_atomic_mutex). REGION is the atomic region built 9453 by build_omp_regions_1(). */ 9454 9455static void 9456expand_omp_atomic (struct omp_region *region) 9457{ 9458 basic_block load_bb = region->entry, store_bb = region->exit; 9459 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 9460 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 9461 tree loaded_val = gimple_omp_atomic_load_lhs (load); 9462 tree addr = gimple_omp_atomic_load_rhs (load); 9463 tree stored_val = gimple_omp_atomic_store_val (store); 9464 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 9465 HOST_WIDE_INT index; 9466 9467 /* Make sure the type is one of the supported sizes. */ 9468 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 9469 index = exact_log2 (index); 9470 if (index >= 0 && index <= 4) 9471 { 9472 unsigned int align = TYPE_ALIGN_UNIT (type); 9473 9474 /* __sync builtins require strict data alignment. */ 9475 if (exact_log2 (align) >= index) 9476 { 9477 /* Atomic load. */ 9478 scalar_mode smode; 9479 if (loaded_val == stored_val 9480 && (is_int_mode (TYPE_MODE (type), &smode) 9481 || is_float_mode (TYPE_MODE (type), &smode)) 9482 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 9483 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 9484 return; 9485 9486 /* Atomic store. */ 9487 if ((is_int_mode (TYPE_MODE (type), &smode) 9488 || is_float_mode (TYPE_MODE (type), &smode)) 9489 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 9490 && store_bb == single_succ (load_bb) 9491 && first_stmt (store_bb) == store 9492 && expand_omp_atomic_store (load_bb, addr, loaded_val, 9493 stored_val, index)) 9494 return; 9495 9496 /* When possible, use specialized atomic update functions. */ 9497 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 9498 && store_bb == single_succ (load_bb) 9499 && expand_omp_atomic_fetch_op (load_bb, addr, 9500 loaded_val, stored_val, index)) 9501 return; 9502 9503 /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop. */ 9504 if (store_bb == single_succ (load_bb) 9505 && !gimple_in_ssa_p (cfun) 9506 && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val, 9507 index)) 9508 return; 9509 9510 /* If we don't have specialized __sync builtins, try and implement 9511 as a compare and swap loop. */ 9512 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 9513 loaded_val, stored_val, index)) 9514 return; 9515 } 9516 } 9517 9518 /* The ultimate fallback is wrapping the operation in a mutex. */ 9519 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 9520} 9521 9522/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 9523 at REGION_EXIT. */ 9524 9525static void 9526mark_loops_in_oacc_kernels_region (basic_block region_entry, 9527 basic_block region_exit) 9528{ 9529 class loop *outer = region_entry->loop_father; 9530 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 9531 9532 /* Don't parallelize the kernels region if it contains more than one outer 9533 loop. */ 9534 unsigned int nr_outer_loops = 0; 9535 class loop *single_outer = NULL; 9536 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next) 9537 { 9538 gcc_assert (loop_outer (loop) == outer); 9539 9540 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 9541 continue; 9542 9543 if (region_exit != NULL 9544 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 9545 continue; 9546 9547 nr_outer_loops++; 9548 single_outer = loop; 9549 } 9550 if (nr_outer_loops != 1) 9551 return; 9552 9553 for (class loop *loop = single_outer->inner; 9554 loop != NULL; 9555 loop = loop->inner) 9556 if (loop->next) 9557 return; 9558 9559 /* Mark the loops in the region. */ 9560 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner) 9561 loop->in_oacc_kernels_region = true; 9562} 9563 9564/* Build target argument identifier from the DEVICE identifier, value 9565 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 9566 9567static tree 9568get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 9569{ 9570 tree t = build_int_cst (integer_type_node, device); 9571 if (subseqent_param) 9572 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 9573 build_int_cst (integer_type_node, 9574 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 9575 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 9576 build_int_cst (integer_type_node, id)); 9577 return t; 9578} 9579 9580/* Like above but return it in type that can be directly stored as an element 9581 of the argument array. */ 9582 9583static tree 9584get_target_argument_identifier (int device, bool subseqent_param, int id) 9585{ 9586 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 9587 return fold_convert (ptr_type_node, t); 9588} 9589 9590/* Return a target argument consisting of DEVICE identifier, value identifier 9591 ID, and the actual VALUE. */ 9592 9593static tree 9594get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 9595 tree value) 9596{ 9597 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 9598 fold_convert (integer_type_node, value), 9599 build_int_cst (unsigned_type_node, 9600 GOMP_TARGET_ARG_VALUE_SHIFT)); 9601 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 9602 get_target_argument_identifier_1 (device, false, id)); 9603 t = fold_convert (ptr_type_node, t); 9604 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 9605} 9606 9607/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 9608 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 9609 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 9610 arguments. */ 9611 9612static void 9613push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 9614 int id, tree value, vec <tree> *args) 9615{ 9616 if (tree_fits_shwi_p (value) 9617 && tree_to_shwi (value) > -(1 << 15) 9618 && tree_to_shwi (value) < (1 << 15)) 9619 args->quick_push (get_target_argument_value (gsi, device, id, value)); 9620 else 9621 { 9622 args->quick_push (get_target_argument_identifier (device, true, id)); 9623 value = fold_convert (ptr_type_node, value); 9624 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 9625 GSI_SAME_STMT); 9626 args->quick_push (value); 9627 } 9628} 9629 9630/* Create an array of arguments that is then passed to GOMP_target. */ 9631 9632static tree 9633get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 9634{ 9635 auto_vec <tree, 6> args; 9636 tree clauses = gimple_omp_target_clauses (tgt_stmt); 9637 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 9638 if (c) 9639 t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c); 9640 else 9641 t = integer_minus_one_node; 9642 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 9643 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 9644 9645 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 9646 if (c) 9647 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 9648 else 9649 t = integer_minus_one_node; 9650 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 9651 GOMP_TARGET_ARG_THREAD_LIMIT, t, 9652 &args); 9653 9654 /* Produce more, perhaps device specific, arguments here. */ 9655 9656 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 9657 args.length () + 1), 9658 ".omp_target_args"); 9659 for (unsigned i = 0; i < args.length (); i++) 9660 { 9661 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 9662 build_int_cst (integer_type_node, i), 9663 NULL_TREE, NULL_TREE); 9664 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 9665 GSI_SAME_STMT); 9666 } 9667 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 9668 build_int_cst (integer_type_node, args.length ()), 9669 NULL_TREE, NULL_TREE); 9670 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 9671 GSI_SAME_STMT); 9672 TREE_ADDRESSABLE (argarray) = 1; 9673 return build_fold_addr_expr (argarray); 9674} 9675 9676/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 9677 9678static void 9679expand_omp_target (struct omp_region *region) 9680{ 9681 basic_block entry_bb, exit_bb, new_bb; 9682 struct function *child_cfun; 9683 tree child_fn, block, t; 9684 gimple_stmt_iterator gsi; 9685 gomp_target *entry_stmt; 9686 gimple *stmt; 9687 edge e; 9688 bool offloaded; 9689 int target_kind; 9690 9691 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 9692 target_kind = gimple_omp_target_kind (entry_stmt); 9693 new_bb = region->entry; 9694 9695 offloaded = is_gimple_omp_offloaded (entry_stmt); 9696 switch (target_kind) 9697 { 9698 case GF_OMP_TARGET_KIND_REGION: 9699 case GF_OMP_TARGET_KIND_UPDATE: 9700 case GF_OMP_TARGET_KIND_ENTER_DATA: 9701 case GF_OMP_TARGET_KIND_EXIT_DATA: 9702 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 9703 case GF_OMP_TARGET_KIND_OACC_KERNELS: 9704 case GF_OMP_TARGET_KIND_OACC_SERIAL: 9705 case GF_OMP_TARGET_KIND_OACC_UPDATE: 9706 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA: 9707 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA: 9708 case GF_OMP_TARGET_KIND_OACC_DECLARE: 9709 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: 9710 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: 9711 case GF_OMP_TARGET_KIND_DATA: 9712 case GF_OMP_TARGET_KIND_OACC_DATA: 9713 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 9714 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: 9715 break; 9716 default: 9717 gcc_unreachable (); 9718 } 9719 9720 child_fn = NULL_TREE; 9721 child_cfun = NULL; 9722 if (offloaded) 9723 { 9724 child_fn = gimple_omp_target_child_fn (entry_stmt); 9725 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 9726 } 9727 9728 /* Supported by expand_omp_taskreg, but not here. */ 9729 if (child_cfun != NULL) 9730 gcc_checking_assert (!child_cfun->cfg); 9731 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 9732 9733 entry_bb = region->entry; 9734 exit_bb = region->exit; 9735 9736 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS) 9737 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 9738 9739 /* Going on, all OpenACC compute constructs are mapped to 9740 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined. 9741 To distinguish between them, we attach attributes. */ 9742 switch (target_kind) 9743 { 9744 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 9745 DECL_ATTRIBUTES (child_fn) 9746 = tree_cons (get_identifier ("oacc parallel"), 9747 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 9748 break; 9749 case GF_OMP_TARGET_KIND_OACC_KERNELS: 9750 DECL_ATTRIBUTES (child_fn) 9751 = tree_cons (get_identifier ("oacc kernels"), 9752 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 9753 break; 9754 case GF_OMP_TARGET_KIND_OACC_SERIAL: 9755 DECL_ATTRIBUTES (child_fn) 9756 = tree_cons (get_identifier ("oacc serial"), 9757 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 9758 break; 9759 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: 9760 DECL_ATTRIBUTES (child_fn) 9761 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"), 9762 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 9763 break; 9764 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: 9765 DECL_ATTRIBUTES (child_fn) 9766 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"), 9767 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 9768 break; 9769 default: 9770 /* Make sure we don't miss any. */ 9771 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt) 9772 && is_gimple_omp_offloaded (entry_stmt))); 9773 break; 9774 } 9775 9776 if (offloaded) 9777 { 9778 unsigned srcidx, dstidx, num; 9779 9780 /* If the offloading region needs data sent from the parent 9781 function, then the very first statement (except possible 9782 tree profile counter updates) of the offloading body 9783 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 9784 &.OMP_DATA_O is passed as an argument to the child function, 9785 we need to replace it with the argument as seen by the child 9786 function. 9787 9788 In most cases, this will end up being the identity assignment 9789 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 9790 a function call that has been inlined, the original PARM_DECL 9791 .OMP_DATA_I may have been converted into a different local 9792 variable. In which case, we need to keep the assignment. */ 9793 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 9794 if (data_arg) 9795 { 9796 basic_block entry_succ_bb = single_succ (entry_bb); 9797 gimple_stmt_iterator gsi; 9798 tree arg; 9799 gimple *tgtcopy_stmt = NULL; 9800 tree sender = TREE_VEC_ELT (data_arg, 0); 9801 9802 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 9803 { 9804 gcc_assert (!gsi_end_p (gsi)); 9805 stmt = gsi_stmt (gsi); 9806 if (gimple_code (stmt) != GIMPLE_ASSIGN) 9807 continue; 9808 9809 if (gimple_num_ops (stmt) == 2) 9810 { 9811 tree arg = gimple_assign_rhs1 (stmt); 9812 9813 /* We're ignoring the subcode because we're 9814 effectively doing a STRIP_NOPS. */ 9815 9816 if (TREE_CODE (arg) == ADDR_EXPR 9817 && TREE_OPERAND (arg, 0) == sender) 9818 { 9819 tgtcopy_stmt = stmt; 9820 break; 9821 } 9822 } 9823 } 9824 9825 gcc_assert (tgtcopy_stmt != NULL); 9826 arg = DECL_ARGUMENTS (child_fn); 9827 9828 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 9829 gsi_remove (&gsi, true); 9830 } 9831 9832 /* Declare local variables needed in CHILD_CFUN. */ 9833 block = DECL_INITIAL (child_fn); 9834 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 9835 /* The gimplifier could record temporaries in the offloading block 9836 rather than in containing function's local_decls chain, 9837 which would mean cgraph missed finalizing them. Do it now. */ 9838 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 9839 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 9840 varpool_node::finalize_decl (t); 9841 DECL_SAVED_TREE (child_fn) = NULL; 9842 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 9843 gimple_set_body (child_fn, NULL); 9844 TREE_USED (block) = 1; 9845 9846 /* Reset DECL_CONTEXT on function arguments. */ 9847 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 9848 DECL_CONTEXT (t) = child_fn; 9849 9850 /* Split ENTRY_BB at GIMPLE_*, 9851 so that it can be moved to the child function. */ 9852 gsi = gsi_last_nondebug_bb (entry_bb); 9853 stmt = gsi_stmt (gsi); 9854 gcc_assert (stmt 9855 && gimple_code (stmt) == gimple_code (entry_stmt)); 9856 e = split_block (entry_bb, stmt); 9857 gsi_remove (&gsi, true); 9858 entry_bb = e->dest; 9859 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 9860 9861 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 9862 if (exit_bb) 9863 { 9864 gsi = gsi_last_nondebug_bb (exit_bb); 9865 gcc_assert (!gsi_end_p (gsi) 9866 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 9867 stmt = gimple_build_return (NULL); 9868 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 9869 gsi_remove (&gsi, true); 9870 } 9871 9872 /* Move the offloading region into CHILD_CFUN. */ 9873 9874 block = gimple_block (entry_stmt); 9875 9876 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 9877 if (exit_bb) 9878 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 9879 /* When the OMP expansion process cannot guarantee an up-to-date 9880 loop tree arrange for the child function to fixup loops. */ 9881 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 9882 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 9883 9884 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 9885 num = vec_safe_length (child_cfun->local_decls); 9886 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 9887 { 9888 t = (*child_cfun->local_decls)[srcidx]; 9889 if (DECL_CONTEXT (t) == cfun->decl) 9890 continue; 9891 if (srcidx != dstidx) 9892 (*child_cfun->local_decls)[dstidx] = t; 9893 dstidx++; 9894 } 9895 if (dstidx != num) 9896 vec_safe_truncate (child_cfun->local_decls, dstidx); 9897 9898 /* Inform the callgraph about the new function. */ 9899 child_cfun->curr_properties = cfun->curr_properties; 9900 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 9901 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 9902 cgraph_node *node = cgraph_node::get_create (child_fn); 9903 node->parallelized_function = 1; 9904 cgraph_node::add_new_function (child_fn, true); 9905 9906 /* Add the new function to the offload table. */ 9907 if (ENABLE_OFFLOADING) 9908 { 9909 if (in_lto_p) 9910 DECL_PRESERVE_P (child_fn) = 1; 9911 vec_safe_push (offload_funcs, child_fn); 9912 } 9913 9914 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 9915 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 9916 9917 /* Fix the callgraph edges for child_cfun. Those for cfun will be 9918 fixed in a following pass. */ 9919 push_cfun (child_cfun); 9920 if (need_asm) 9921 assign_assembler_name_if_needed (child_fn); 9922 cgraph_edge::rebuild_edges (); 9923 9924 /* Some EH regions might become dead, see PR34608. If 9925 pass_cleanup_cfg isn't the first pass to happen with the 9926 new child, these dead EH edges might cause problems. 9927 Clean them up now. */ 9928 if (flag_exceptions) 9929 { 9930 basic_block bb; 9931 bool changed = false; 9932 9933 FOR_EACH_BB_FN (bb, cfun) 9934 changed |= gimple_purge_dead_eh_edges (bb); 9935 if (changed) 9936 cleanup_tree_cfg (); 9937 } 9938 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 9939 verify_loop_structure (); 9940 pop_cfun (); 9941 9942 if (dump_file && !gimple_in_ssa_p (cfun)) 9943 { 9944 omp_any_child_fn_dumped = true; 9945 dump_function_header (dump_file, child_fn, dump_flags); 9946 dump_function_to_file (child_fn, dump_file, dump_flags); 9947 } 9948 9949 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 9950 } 9951 9952 /* Emit a library call to launch the offloading region, or do data 9953 transfers. */ 9954 tree t1, t2, t3, t4, depend, c, clauses; 9955 enum built_in_function start_ix; 9956 unsigned int flags_i = 0; 9957 9958 switch (gimple_omp_target_kind (entry_stmt)) 9959 { 9960 case GF_OMP_TARGET_KIND_REGION: 9961 start_ix = BUILT_IN_GOMP_TARGET; 9962 break; 9963 case GF_OMP_TARGET_KIND_DATA: 9964 start_ix = BUILT_IN_GOMP_TARGET_DATA; 9965 break; 9966 case GF_OMP_TARGET_KIND_UPDATE: 9967 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 9968 break; 9969 case GF_OMP_TARGET_KIND_ENTER_DATA: 9970 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 9971 break; 9972 case GF_OMP_TARGET_KIND_EXIT_DATA: 9973 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 9974 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 9975 break; 9976 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 9977 case GF_OMP_TARGET_KIND_OACC_KERNELS: 9978 case GF_OMP_TARGET_KIND_OACC_SERIAL: 9979 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: 9980 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: 9981 start_ix = BUILT_IN_GOACC_PARALLEL; 9982 break; 9983 case GF_OMP_TARGET_KIND_OACC_DATA: 9984 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 9985 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: 9986 start_ix = BUILT_IN_GOACC_DATA_START; 9987 break; 9988 case GF_OMP_TARGET_KIND_OACC_UPDATE: 9989 start_ix = BUILT_IN_GOACC_UPDATE; 9990 break; 9991 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA: 9992 start_ix = BUILT_IN_GOACC_ENTER_DATA; 9993 break; 9994 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA: 9995 start_ix = BUILT_IN_GOACC_EXIT_DATA; 9996 break; 9997 case GF_OMP_TARGET_KIND_OACC_DECLARE: 9998 start_ix = BUILT_IN_GOACC_DECLARE; 9999 break; 10000 default: 10001 gcc_unreachable (); 10002 } 10003 10004 clauses = gimple_omp_target_clauses (entry_stmt); 10005 10006 tree device = NULL_TREE; 10007 location_t device_loc = UNKNOWN_LOCATION; 10008 tree goacc_flags = NULL_TREE; 10009 if (is_gimple_omp_oacc (entry_stmt)) 10010 { 10011 /* By default, no GOACC_FLAGs are set. */ 10012 goacc_flags = integer_zero_node; 10013 } 10014 else 10015 { 10016 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 10017 if (c) 10018 { 10019 device = OMP_CLAUSE_DEVICE_ID (c); 10020 device_loc = OMP_CLAUSE_LOCATION (c); 10021 if (OMP_CLAUSE_DEVICE_ANCESTOR (c)) 10022 sorry_at (device_loc, "%<ancestor%> not yet supported"); 10023 } 10024 else 10025 { 10026 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 10027 library choose). */ 10028 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 10029 device_loc = gimple_location (entry_stmt); 10030 } 10031 10032 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 10033 /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend 10034 nowait doesn't appear. */ 10035 if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION)) 10036 c = NULL; 10037 if (c) 10038 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 10039 } 10040 10041 /* By default, there is no conditional. */ 10042 tree cond = NULL_TREE; 10043 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 10044 if (c) 10045 cond = OMP_CLAUSE_IF_EXPR (c); 10046 /* If we found the clause 'if (cond)', build: 10047 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK) 10048 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */ 10049 if (cond) 10050 { 10051 tree *tp; 10052 if (is_gimple_omp_oacc (entry_stmt)) 10053 tp = &goacc_flags; 10054 else 10055 { 10056 /* Ensure 'device' is of the correct type. */ 10057 device = fold_convert_loc (device_loc, integer_type_node, device); 10058 10059 tp = &device; 10060 } 10061 10062 cond = gimple_boolify (cond); 10063 10064 basic_block cond_bb, then_bb, else_bb; 10065 edge e; 10066 tree tmp_var; 10067 10068 tmp_var = create_tmp_var (TREE_TYPE (*tp)); 10069 if (offloaded) 10070 e = split_block_after_labels (new_bb); 10071 else 10072 { 10073 gsi = gsi_last_nondebug_bb (new_bb); 10074 gsi_prev (&gsi); 10075 e = split_block (new_bb, gsi_stmt (gsi)); 10076 } 10077 cond_bb = e->src; 10078 new_bb = e->dest; 10079 remove_edge (e); 10080 10081 then_bb = create_empty_bb (cond_bb); 10082 else_bb = create_empty_bb (then_bb); 10083 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 10084 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 10085 10086 stmt = gimple_build_cond_empty (cond); 10087 gsi = gsi_last_bb (cond_bb); 10088 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 10089 10090 gsi = gsi_start_bb (then_bb); 10091 stmt = gimple_build_assign (tmp_var, *tp); 10092 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 10093 10094 gsi = gsi_start_bb (else_bb); 10095 if (is_gimple_omp_oacc (entry_stmt)) 10096 stmt = gimple_build_assign (tmp_var, 10097 BIT_IOR_EXPR, 10098 *tp, 10099 build_int_cst (integer_type_node, 10100 GOACC_FLAG_HOST_FALLBACK)); 10101 else 10102 stmt = gimple_build_assign (tmp_var, 10103 build_int_cst (integer_type_node, 10104 GOMP_DEVICE_HOST_FALLBACK)); 10105 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 10106 10107 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 10108 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 10109 add_bb_to_loop (then_bb, cond_bb->loop_father); 10110 add_bb_to_loop (else_bb, cond_bb->loop_father); 10111 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 10112 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 10113 10114 *tp = tmp_var; 10115 10116 gsi = gsi_last_nondebug_bb (new_bb); 10117 } 10118 else 10119 { 10120 gsi = gsi_last_nondebug_bb (new_bb); 10121 10122 if (device != NULL_TREE) 10123 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 10124 true, GSI_SAME_STMT); 10125 } 10126 10127 t = gimple_omp_target_data_arg (entry_stmt); 10128 if (t == NULL) 10129 { 10130 t1 = size_zero_node; 10131 t2 = build_zero_cst (ptr_type_node); 10132 t3 = t2; 10133 t4 = t2; 10134 } 10135 else 10136 { 10137 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 10138 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 10139 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 10140 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 10141 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 10142 } 10143 10144 gimple *g; 10145 bool tagging = false; 10146 /* The maximum number used by any start_ix, without varargs. */ 10147 auto_vec<tree, 11> args; 10148 if (is_gimple_omp_oacc (entry_stmt)) 10149 { 10150 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP, 10151 TREE_TYPE (goacc_flags), goacc_flags); 10152 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true, 10153 NULL_TREE, true, 10154 GSI_SAME_STMT); 10155 args.quick_push (goacc_flags_m); 10156 } 10157 else 10158 args.quick_push (device); 10159 if (offloaded) 10160 args.quick_push (build_fold_addr_expr (child_fn)); 10161 args.quick_push (t1); 10162 args.quick_push (t2); 10163 args.quick_push (t3); 10164 args.quick_push (t4); 10165 switch (start_ix) 10166 { 10167 case BUILT_IN_GOACC_DATA_START: 10168 case BUILT_IN_GOACC_DECLARE: 10169 case BUILT_IN_GOMP_TARGET_DATA: 10170 break; 10171 case BUILT_IN_GOMP_TARGET: 10172 case BUILT_IN_GOMP_TARGET_UPDATE: 10173 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 10174 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 10175 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 10176 if (c) 10177 depend = OMP_CLAUSE_DECL (c); 10178 else 10179 depend = build_int_cst (ptr_type_node, 0); 10180 args.quick_push (depend); 10181 if (start_ix == BUILT_IN_GOMP_TARGET) 10182 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 10183 break; 10184 case BUILT_IN_GOACC_PARALLEL: 10185 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL) 10186 { 10187 tree dims = NULL_TREE; 10188 unsigned int ix; 10189 10190 /* For serial constructs we set all dimensions to 1. */ 10191 for (ix = GOMP_DIM_MAX; ix--;) 10192 dims = tree_cons (NULL_TREE, integer_one_node, dims); 10193 oacc_replace_fn_attrib (child_fn, dims); 10194 } 10195 else 10196 oacc_set_fn_attrib (child_fn, clauses, &args); 10197 tagging = true; 10198 /* FALLTHRU */ 10199 case BUILT_IN_GOACC_ENTER_DATA: 10200 case BUILT_IN_GOACC_EXIT_DATA: 10201 case BUILT_IN_GOACC_UPDATE: 10202 { 10203 tree t_async = NULL_TREE; 10204 10205 /* If present, use the value specified by the respective 10206 clause, making sure that is of the correct type. */ 10207 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 10208 if (c) 10209 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 10210 integer_type_node, 10211 OMP_CLAUSE_ASYNC_EXPR (c)); 10212 else if (!tagging) 10213 /* Default values for t_async. */ 10214 t_async = fold_convert_loc (gimple_location (entry_stmt), 10215 integer_type_node, 10216 build_int_cst (integer_type_node, 10217 GOMP_ASYNC_SYNC)); 10218 if (tagging && t_async) 10219 { 10220 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 10221 10222 if (TREE_CODE (t_async) == INTEGER_CST) 10223 { 10224 /* See if we can pack the async arg in to the tag's 10225 operand. */ 10226 i_async = TREE_INT_CST_LOW (t_async); 10227 if (i_async < GOMP_LAUNCH_OP_MAX) 10228 t_async = NULL_TREE; 10229 else 10230 i_async = GOMP_LAUNCH_OP_MAX; 10231 } 10232 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 10233 i_async)); 10234 } 10235 if (t_async) 10236 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true, 10237 NULL_TREE, true, 10238 GSI_SAME_STMT)); 10239 10240 /* Save the argument index, and ... */ 10241 unsigned t_wait_idx = args.length (); 10242 unsigned num_waits = 0; 10243 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 10244 if (!tagging || c) 10245 /* ... push a placeholder. */ 10246 args.safe_push (integer_zero_node); 10247 10248 for (; c; c = OMP_CLAUSE_CHAIN (c)) 10249 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 10250 { 10251 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 10252 integer_type_node, 10253 OMP_CLAUSE_WAIT_EXPR (c)); 10254 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true, 10255 GSI_SAME_STMT); 10256 args.safe_push (arg); 10257 num_waits++; 10258 } 10259 10260 if (!tagging || num_waits) 10261 { 10262 tree len; 10263 10264 /* Now that we know the number, update the placeholder. */ 10265 if (tagging) 10266 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 10267 else 10268 len = build_int_cst (integer_type_node, num_waits); 10269 len = fold_convert_loc (gimple_location (entry_stmt), 10270 unsigned_type_node, len); 10271 args[t_wait_idx] = len; 10272 } 10273 } 10274 break; 10275 default: 10276 gcc_unreachable (); 10277 } 10278 if (tagging) 10279 /* Push terminal marker - zero. */ 10280 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 10281 10282 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 10283 gimple_set_location (g, gimple_location (entry_stmt)); 10284 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 10285 if (!offloaded) 10286 { 10287 g = gsi_stmt (gsi); 10288 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 10289 gsi_remove (&gsi, true); 10290 } 10291} 10292 10293/* Expand the parallel region tree rooted at REGION. Expansion 10294 proceeds in depth-first order. Innermost regions are expanded 10295 first. This way, parallel regions that require a new function to 10296 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 10297 internal dependencies in their body. */ 10298 10299static void 10300expand_omp (struct omp_region *region) 10301{ 10302 omp_any_child_fn_dumped = false; 10303 while (region) 10304 { 10305 location_t saved_location; 10306 gimple *inner_stmt = NULL; 10307 10308 /* First, determine whether this is a combined parallel+workshare 10309 region. */ 10310 if (region->type == GIMPLE_OMP_PARALLEL) 10311 determine_parallel_type (region); 10312 10313 if (region->type == GIMPLE_OMP_FOR 10314 && gimple_omp_for_combined_p (last_stmt (region->entry))) 10315 inner_stmt = last_stmt (region->inner->entry); 10316 10317 if (region->inner) 10318 expand_omp (region->inner); 10319 10320 saved_location = input_location; 10321 if (gimple_has_location (last_stmt (region->entry))) 10322 input_location = gimple_location (last_stmt (region->entry)); 10323 10324 switch (region->type) 10325 { 10326 case GIMPLE_OMP_PARALLEL: 10327 case GIMPLE_OMP_TASK: 10328 expand_omp_taskreg (region); 10329 break; 10330 10331 case GIMPLE_OMP_FOR: 10332 expand_omp_for (region, inner_stmt); 10333 break; 10334 10335 case GIMPLE_OMP_SECTIONS: 10336 expand_omp_sections (region); 10337 break; 10338 10339 case GIMPLE_OMP_SECTION: 10340 /* Individual omp sections are handled together with their 10341 parent GIMPLE_OMP_SECTIONS region. */ 10342 break; 10343 10344 case GIMPLE_OMP_SINGLE: 10345 case GIMPLE_OMP_SCOPE: 10346 expand_omp_single (region); 10347 break; 10348 10349 case GIMPLE_OMP_ORDERED: 10350 { 10351 gomp_ordered *ord_stmt 10352 = as_a <gomp_ordered *> (last_stmt (region->entry)); 10353 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 10354 OMP_CLAUSE_DEPEND)) 10355 { 10356 /* We'll expand these when expanding corresponding 10357 worksharing region with ordered(n) clause. */ 10358 gcc_assert (region->outer 10359 && region->outer->type == GIMPLE_OMP_FOR); 10360 region->ord_stmt = ord_stmt; 10361 break; 10362 } 10363 } 10364 /* FALLTHRU */ 10365 case GIMPLE_OMP_MASTER: 10366 case GIMPLE_OMP_MASKED: 10367 case GIMPLE_OMP_TASKGROUP: 10368 case GIMPLE_OMP_CRITICAL: 10369 case GIMPLE_OMP_TEAMS: 10370 expand_omp_synch (region); 10371 break; 10372 10373 case GIMPLE_OMP_ATOMIC_LOAD: 10374 expand_omp_atomic (region); 10375 break; 10376 10377 case GIMPLE_OMP_TARGET: 10378 expand_omp_target (region); 10379 break; 10380 10381 default: 10382 gcc_unreachable (); 10383 } 10384 10385 input_location = saved_location; 10386 region = region->next; 10387 } 10388 if (omp_any_child_fn_dumped) 10389 { 10390 if (dump_file) 10391 dump_function_header (dump_file, current_function_decl, dump_flags); 10392 omp_any_child_fn_dumped = false; 10393 } 10394} 10395 10396/* Helper for build_omp_regions. Scan the dominator tree starting at 10397 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 10398 true, the function ends once a single tree is built (otherwise, whole 10399 forest of OMP constructs may be built). */ 10400 10401static void 10402build_omp_regions_1 (basic_block bb, struct omp_region *parent, 10403 bool single_tree) 10404{ 10405 gimple_stmt_iterator gsi; 10406 gimple *stmt; 10407 basic_block son; 10408 10409 gsi = gsi_last_nondebug_bb (bb); 10410 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 10411 { 10412 struct omp_region *region; 10413 enum gimple_code code; 10414 10415 stmt = gsi_stmt (gsi); 10416 code = gimple_code (stmt); 10417 if (code == GIMPLE_OMP_RETURN) 10418 { 10419 /* STMT is the return point out of region PARENT. Mark it 10420 as the exit point and make PARENT the immediately 10421 enclosing region. */ 10422 gcc_assert (parent); 10423 region = parent; 10424 region->exit = bb; 10425 parent = parent->outer; 10426 } 10427 else if (code == GIMPLE_OMP_ATOMIC_STORE) 10428 { 10429 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 10430 GIMPLE_OMP_RETURN, but matches with 10431 GIMPLE_OMP_ATOMIC_LOAD. */ 10432 gcc_assert (parent); 10433 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 10434 region = parent; 10435 region->exit = bb; 10436 parent = parent->outer; 10437 } 10438 else if (code == GIMPLE_OMP_CONTINUE) 10439 { 10440 gcc_assert (parent); 10441 parent->cont = bb; 10442 } 10443 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 10444 { 10445 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 10446 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 10447 } 10448 else 10449 { 10450 region = new_omp_region (bb, code, parent); 10451 /* Otherwise... */ 10452 if (code == GIMPLE_OMP_TARGET) 10453 { 10454 switch (gimple_omp_target_kind (stmt)) 10455 { 10456 case GF_OMP_TARGET_KIND_REGION: 10457 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 10458 case GF_OMP_TARGET_KIND_OACC_KERNELS: 10459 case GF_OMP_TARGET_KIND_OACC_SERIAL: 10460 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: 10461 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: 10462 break; 10463 case GF_OMP_TARGET_KIND_UPDATE: 10464 case GF_OMP_TARGET_KIND_ENTER_DATA: 10465 case GF_OMP_TARGET_KIND_EXIT_DATA: 10466 case GF_OMP_TARGET_KIND_DATA: 10467 case GF_OMP_TARGET_KIND_OACC_DATA: 10468 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 10469 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: 10470 case GF_OMP_TARGET_KIND_OACC_UPDATE: 10471 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA: 10472 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA: 10473 case GF_OMP_TARGET_KIND_OACC_DECLARE: 10474 /* ..., other than for those stand-alone directives... 10475 To be precise, target data isn't stand-alone, but 10476 gimplifier put the end API call into try finally block 10477 for it, so omp expansion can treat it as such. */ 10478 region = NULL; 10479 break; 10480 default: 10481 gcc_unreachable (); 10482 } 10483 } 10484 else if (code == GIMPLE_OMP_ORDERED 10485 && omp_find_clause (gimple_omp_ordered_clauses 10486 (as_a <gomp_ordered *> (stmt)), 10487 OMP_CLAUSE_DEPEND)) 10488 /* #pragma omp ordered depend is also just a stand-alone 10489 directive. */ 10490 region = NULL; 10491 else if (code == GIMPLE_OMP_TASK 10492 && gimple_omp_task_taskwait_p (stmt)) 10493 /* #pragma omp taskwait depend(...) is a stand-alone directive. */ 10494 region = NULL; 10495 else if (code == GIMPLE_OMP_TASKGROUP) 10496 /* #pragma omp taskgroup isn't a stand-alone directive, but 10497 gimplifier put the end API call into try finall block 10498 for it, so omp expansion can treat it as such. */ 10499 region = NULL; 10500 /* ..., this directive becomes the parent for a new region. */ 10501 if (region) 10502 parent = region; 10503 } 10504 } 10505 10506 if (single_tree && !parent) 10507 return; 10508 10509 for (son = first_dom_son (CDI_DOMINATORS, bb); 10510 son; 10511 son = next_dom_son (CDI_DOMINATORS, son)) 10512 build_omp_regions_1 (son, parent, single_tree); 10513} 10514 10515/* Builds the tree of OMP regions rooted at ROOT, storing it to 10516 root_omp_region. */ 10517 10518static void 10519build_omp_regions_root (basic_block root) 10520{ 10521 gcc_assert (root_omp_region == NULL); 10522 build_omp_regions_1 (root, NULL, true); 10523 gcc_assert (root_omp_region != NULL); 10524} 10525 10526/* Expands omp construct (and its subconstructs) starting in HEAD. */ 10527 10528void 10529omp_expand_local (basic_block head) 10530{ 10531 build_omp_regions_root (head); 10532 if (dump_file && (dump_flags & TDF_DETAILS)) 10533 { 10534 fprintf (dump_file, "\nOMP region tree\n\n"); 10535 dump_omp_region (dump_file, root_omp_region, 0); 10536 fprintf (dump_file, "\n"); 10537 } 10538 10539 remove_exit_barriers (root_omp_region); 10540 expand_omp (root_omp_region); 10541 10542 omp_free_regions (); 10543} 10544 10545/* Scan the CFG and build a tree of OMP regions. Return the root of 10546 the OMP region tree. */ 10547 10548static void 10549build_omp_regions (void) 10550{ 10551 gcc_assert (root_omp_region == NULL); 10552 calculate_dominance_info (CDI_DOMINATORS); 10553 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 10554} 10555 10556/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 10557 10558static unsigned int 10559execute_expand_omp (void) 10560{ 10561 build_omp_regions (); 10562 10563 if (!root_omp_region) 10564 return 0; 10565 10566 if (dump_file) 10567 { 10568 fprintf (dump_file, "\nOMP region tree\n\n"); 10569 dump_omp_region (dump_file, root_omp_region, 0); 10570 fprintf (dump_file, "\n"); 10571 } 10572 10573 remove_exit_barriers (root_omp_region); 10574 10575 expand_omp (root_omp_region); 10576 10577 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 10578 verify_loop_structure (); 10579 cleanup_tree_cfg (); 10580 10581 omp_free_regions (); 10582 10583 return 0; 10584} 10585 10586/* OMP expansion -- the default pass, run before creation of SSA form. */ 10587 10588namespace { 10589 10590const pass_data pass_data_expand_omp = 10591{ 10592 GIMPLE_PASS, /* type */ 10593 "ompexp", /* name */ 10594 OPTGROUP_OMP, /* optinfo_flags */ 10595 TV_NONE, /* tv_id */ 10596 PROP_gimple_any, /* properties_required */ 10597 PROP_gimple_eomp, /* properties_provided */ 10598 0, /* properties_destroyed */ 10599 0, /* todo_flags_start */ 10600 0, /* todo_flags_finish */ 10601}; 10602 10603class pass_expand_omp : public gimple_opt_pass 10604{ 10605public: 10606 pass_expand_omp (gcc::context *ctxt) 10607 : gimple_opt_pass (pass_data_expand_omp, ctxt) 10608 {} 10609 10610 /* opt_pass methods: */ 10611 virtual unsigned int execute (function *) 10612 { 10613 bool gate = ((flag_openacc != 0 || flag_openmp != 0 10614 || flag_openmp_simd != 0) 10615 && !seen_error ()); 10616 10617 /* This pass always runs, to provide PROP_gimple_eomp. 10618 But often, there is nothing to do. */ 10619 if (!gate) 10620 return 0; 10621 10622 return execute_expand_omp (); 10623 } 10624 10625}; // class pass_expand_omp 10626 10627} // anon namespace 10628 10629gimple_opt_pass * 10630make_pass_expand_omp (gcc::context *ctxt) 10631{ 10632 return new pass_expand_omp (ctxt); 10633} 10634 10635namespace { 10636 10637const pass_data pass_data_expand_omp_ssa = 10638{ 10639 GIMPLE_PASS, /* type */ 10640 "ompexpssa", /* name */ 10641 OPTGROUP_OMP, /* optinfo_flags */ 10642 TV_NONE, /* tv_id */ 10643 PROP_cfg | PROP_ssa, /* properties_required */ 10644 PROP_gimple_eomp, /* properties_provided */ 10645 0, /* properties_destroyed */ 10646 0, /* todo_flags_start */ 10647 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 10648}; 10649 10650class pass_expand_omp_ssa : public gimple_opt_pass 10651{ 10652public: 10653 pass_expand_omp_ssa (gcc::context *ctxt) 10654 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 10655 {} 10656 10657 /* opt_pass methods: */ 10658 virtual bool gate (function *fun) 10659 { 10660 return !(fun->curr_properties & PROP_gimple_eomp); 10661 } 10662 virtual unsigned int execute (function *) { return execute_expand_omp (); } 10663 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 10664 10665}; // class pass_expand_omp_ssa 10666 10667} // anon namespace 10668 10669gimple_opt_pass * 10670make_pass_expand_omp_ssa (gcc::context *ctxt) 10671{ 10672 return new pass_expand_omp_ssa (ctxt); 10673} 10674 10675/* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant 10676 GIMPLE_* codes. */ 10677 10678bool 10679omp_make_gimple_edges (basic_block bb, struct omp_region **region, 10680 int *region_idx) 10681{ 10682 gimple *last = last_stmt (bb); 10683 enum gimple_code code = gimple_code (last); 10684 struct omp_region *cur_region = *region; 10685 bool fallthru = false; 10686 10687 switch (code) 10688 { 10689 case GIMPLE_OMP_PARALLEL: 10690 case GIMPLE_OMP_FOR: 10691 case GIMPLE_OMP_SINGLE: 10692 case GIMPLE_OMP_TEAMS: 10693 case GIMPLE_OMP_MASTER: 10694 case GIMPLE_OMP_MASKED: 10695 case GIMPLE_OMP_SCOPE: 10696 case GIMPLE_OMP_CRITICAL: 10697 case GIMPLE_OMP_SECTION: 10698 cur_region = new_omp_region (bb, code, cur_region); 10699 fallthru = true; 10700 break; 10701 10702 case GIMPLE_OMP_TASKGROUP: 10703 cur_region = new_omp_region (bb, code, cur_region); 10704 fallthru = true; 10705 cur_region = cur_region->outer; 10706 break; 10707 10708 case GIMPLE_OMP_TASK: 10709 cur_region = new_omp_region (bb, code, cur_region); 10710 fallthru = true; 10711 if (gimple_omp_task_taskwait_p (last)) 10712 cur_region = cur_region->outer; 10713 break; 10714 10715 case GIMPLE_OMP_ORDERED: 10716 cur_region = new_omp_region (bb, code, cur_region); 10717 fallthru = true; 10718 if (omp_find_clause (gimple_omp_ordered_clauses 10719 (as_a <gomp_ordered *> (last)), 10720 OMP_CLAUSE_DEPEND)) 10721 cur_region = cur_region->outer; 10722 break; 10723 10724 case GIMPLE_OMP_TARGET: 10725 cur_region = new_omp_region (bb, code, cur_region); 10726 fallthru = true; 10727 switch (gimple_omp_target_kind (last)) 10728 { 10729 case GF_OMP_TARGET_KIND_REGION: 10730 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 10731 case GF_OMP_TARGET_KIND_OACC_KERNELS: 10732 case GF_OMP_TARGET_KIND_OACC_SERIAL: 10733 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: 10734 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: 10735 break; 10736 case GF_OMP_TARGET_KIND_UPDATE: 10737 case GF_OMP_TARGET_KIND_ENTER_DATA: 10738 case GF_OMP_TARGET_KIND_EXIT_DATA: 10739 case GF_OMP_TARGET_KIND_DATA: 10740 case GF_OMP_TARGET_KIND_OACC_DATA: 10741 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 10742 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: 10743 case GF_OMP_TARGET_KIND_OACC_UPDATE: 10744 case GF_OMP_TARGET_KIND_OACC_ENTER_DATA: 10745 case GF_OMP_TARGET_KIND_OACC_EXIT_DATA: 10746 case GF_OMP_TARGET_KIND_OACC_DECLARE: 10747 cur_region = cur_region->outer; 10748 break; 10749 default: 10750 gcc_unreachable (); 10751 } 10752 break; 10753 10754 case GIMPLE_OMP_SECTIONS: 10755 cur_region = new_omp_region (bb, code, cur_region); 10756 fallthru = true; 10757 break; 10758 10759 case GIMPLE_OMP_SECTIONS_SWITCH: 10760 fallthru = false; 10761 break; 10762 10763 case GIMPLE_OMP_ATOMIC_LOAD: 10764 case GIMPLE_OMP_ATOMIC_STORE: 10765 fallthru = true; 10766 break; 10767 10768 case GIMPLE_OMP_RETURN: 10769 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 10770 somewhere other than the next block. This will be 10771 created later. */ 10772 cur_region->exit = bb; 10773 if (cur_region->type == GIMPLE_OMP_TASK) 10774 /* Add an edge corresponding to not scheduling the task 10775 immediately. */ 10776 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 10777 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 10778 cur_region = cur_region->outer; 10779 break; 10780 10781 case GIMPLE_OMP_CONTINUE: 10782 cur_region->cont = bb; 10783 switch (cur_region->type) 10784 { 10785 case GIMPLE_OMP_FOR: 10786 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 10787 succs edges as abnormal to prevent splitting 10788 them. */ 10789 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 10790 /* Make the loopback edge. */ 10791 make_edge (bb, single_succ (cur_region->entry), 10792 EDGE_ABNORMAL); 10793 10794 /* Create an edge from GIMPLE_OMP_FOR to exit, which 10795 corresponds to the case that the body of the loop 10796 is not executed at all. */ 10797 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 10798 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 10799 fallthru = false; 10800 break; 10801 10802 case GIMPLE_OMP_SECTIONS: 10803 /* Wire up the edges into and out of the nested sections. */ 10804 { 10805 basic_block switch_bb = single_succ (cur_region->entry); 10806 10807 struct omp_region *i; 10808 for (i = cur_region->inner; i ; i = i->next) 10809 { 10810 gcc_assert (i->type == GIMPLE_OMP_SECTION); 10811 make_edge (switch_bb, i->entry, 0); 10812 make_edge (i->exit, bb, EDGE_FALLTHRU); 10813 } 10814 10815 /* Make the loopback edge to the block with 10816 GIMPLE_OMP_SECTIONS_SWITCH. */ 10817 make_edge (bb, switch_bb, 0); 10818 10819 /* Make the edge from the switch to exit. */ 10820 make_edge (switch_bb, bb->next_bb, 0); 10821 fallthru = false; 10822 } 10823 break; 10824 10825 case GIMPLE_OMP_TASK: 10826 fallthru = true; 10827 break; 10828 10829 default: 10830 gcc_unreachable (); 10831 } 10832 break; 10833 10834 default: 10835 gcc_unreachable (); 10836 } 10837 10838 if (*region != cur_region) 10839 { 10840 *region = cur_region; 10841 if (cur_region) 10842 *region_idx = cur_region->entry->index; 10843 else 10844 *region_idx = 0; 10845 } 10846 10847 return fallthru; 10848} 10849