omp-expand.c revision 1.6
1/* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5Copyright (C) 2005-2020 Free Software Foundation, Inc. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "memmodel.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "tree.h" 31#include "gimple.h" 32#include "cfghooks.h" 33#include "tree-pass.h" 34#include "ssa.h" 35#include "optabs.h" 36#include "cgraph.h" 37#include "pretty-print.h" 38#include "diagnostic-core.h" 39#include "fold-const.h" 40#include "stor-layout.h" 41#include "cfganal.h" 42#include "internal-fn.h" 43#include "gimplify.h" 44#include "gimple-iterator.h" 45#include "gimplify-me.h" 46#include "gimple-walk.h" 47#include "tree-cfg.h" 48#include "tree-into-ssa.h" 49#include "tree-ssa.h" 50#include "splay-tree.h" 51#include "cfgloop.h" 52#include "omp-general.h" 53#include "omp-offload.h" 54#include "tree-cfgcleanup.h" 55#include "alloc-pool.h" 56#include "symbol-summary.h" 57#include "gomp-constants.h" 58#include "gimple-pretty-print.h" 59#include "hsa-common.h" 60#include "stringpool.h" 61#include "attribs.h" 62 63/* OMP region information. Every parallel and workshare 64 directive is enclosed between two markers, the OMP_* directive 65 and a corresponding GIMPLE_OMP_RETURN statement. */ 66 67struct omp_region 68{ 69 /* The enclosing region. */ 70 struct omp_region *outer; 71 72 /* First child region. */ 73 struct omp_region *inner; 74 75 /* Next peer region. */ 76 struct omp_region *next; 77 78 /* Block containing the omp directive as its last stmt. */ 79 basic_block entry; 80 81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 82 basic_block exit; 83 84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 85 basic_block cont; 86 87 /* If this is a combined parallel+workshare region, this is a list 88 of additional arguments needed by the combined parallel+workshare 89 library call. */ 90 vec<tree, va_gc> *ws_args; 91 92 /* The code for the omp directive of this region. */ 93 enum gimple_code type; 94 95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 96 enum omp_clause_schedule_kind sched_kind; 97 98 /* Schedule modifiers. */ 99 unsigned char sched_modifiers; 100 101 /* True if this is a combined parallel+workshare region. */ 102 bool is_combined_parallel; 103 104 /* Copy of fd.lastprivate_conditional != 0. */ 105 bool has_lastprivate_conditional; 106 107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 108 a depend clause. */ 109 gomp_ordered *ord_stmt; 110}; 111 112static struct omp_region *root_omp_region; 113static bool omp_any_child_fn_dumped; 114 115static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 116 bool = false); 117static gphi *find_phi_with_arg_on_edge (tree, edge); 118static void expand_omp (struct omp_region *region); 119 120/* Return true if REGION is a combined parallel+workshare region. */ 121 122static inline bool 123is_combined_parallel (struct omp_region *region) 124{ 125 return region->is_combined_parallel; 126} 127 128/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 129 is the immediate dominator of PAR_ENTRY_BB, return true if there 130 are no data dependencies that would prevent expanding the parallel 131 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 132 133 When expanding a combined parallel+workshare region, the call to 134 the child function may need additional arguments in the case of 135 GIMPLE_OMP_FOR regions. In some cases, these arguments are 136 computed out of variables passed in from the parent to the child 137 via 'struct .omp_data_s'. For instance: 138 139 #pragma omp parallel for schedule (guided, i * 4) 140 for (j ...) 141 142 Is lowered into: 143 144 # BLOCK 2 (PAR_ENTRY_BB) 145 .omp_data_o.i = i; 146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 147 148 # BLOCK 3 (WS_ENTRY_BB) 149 .omp_data_i = &.omp_data_o; 150 D.1667 = .omp_data_i->i; 151 D.1598 = D.1667 * 4; 152 #pragma omp for schedule (guided, D.1598) 153 154 When we outline the parallel region, the call to the child function 155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 156 that value is computed *after* the call site. So, in principle we 157 cannot do the transformation. 158 159 To see whether the code in WS_ENTRY_BB blocks the combined 160 parallel+workshare call, we collect all the variables used in the 161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 163 call. 164 165 FIXME. If we had the SSA form built at this point, we could merely 166 hoist the code in block 3 into block 2 and be done with it. But at 167 this point we don't have dataflow information and though we could 168 hack something up here, it is really not worth the aggravation. */ 169 170static bool 171workshare_safe_to_combine_p (basic_block ws_entry_bb) 172{ 173 struct omp_for_data fd; 174 gimple *ws_stmt = last_stmt (ws_entry_bb); 175 176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 177 return true; 178 179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) 181 return false; 182 183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 184 185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 186 return false; 187 if (fd.iter_type != long_integer_type_node) 188 return false; 189 190 /* FIXME. We give up too easily here. If any of these arguments 191 are not constants, they will likely involve variables that have 192 been mapped into fields of .omp_data_s for sharing with the child 193 function. With appropriate data flow, it would be possible to 194 see through this. */ 195 if (!is_gimple_min_invariant (fd.loop.n1) 196 || !is_gimple_min_invariant (fd.loop.n2) 197 || !is_gimple_min_invariant (fd.loop.step) 198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 199 return false; 200 201 return true; 202} 203 204/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 205 presence (SIMD_SCHEDULE). */ 206 207static tree 208omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 209{ 210 if (!simd_schedule || integer_zerop (chunk_size)) 211 return chunk_size; 212 213 poly_uint64 vf = omp_max_vf (); 214 if (known_eq (vf, 1U)) 215 return chunk_size; 216 217 tree type = TREE_TYPE (chunk_size); 218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 219 build_int_cst (type, vf - 1)); 220 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 221 build_int_cst (type, -vf)); 222} 223 224/* Collect additional arguments needed to emit a combined 225 parallel+workshare call. WS_STMT is the workshare directive being 226 expanded. */ 227 228static vec<tree, va_gc> * 229get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 230{ 231 tree t; 232 location_t loc = gimple_location (ws_stmt); 233 vec<tree, va_gc> *ws_args; 234 235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 236 { 237 struct omp_for_data fd; 238 tree n1, n2; 239 240 omp_extract_for_data (for_stmt, &fd, NULL); 241 n1 = fd.loop.n1; 242 n2 = fd.loop.n2; 243 244 if (gimple_omp_for_combined_into_p (for_stmt)) 245 { 246 tree innerc 247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 248 OMP_CLAUSE__LOOPTEMP_); 249 gcc_assert (innerc); 250 n1 = OMP_CLAUSE_DECL (innerc); 251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 252 OMP_CLAUSE__LOOPTEMP_); 253 gcc_assert (innerc); 254 n2 = OMP_CLAUSE_DECL (innerc); 255 } 256 257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 258 259 t = fold_convert_loc (loc, long_integer_type_node, n1); 260 ws_args->quick_push (t); 261 262 t = fold_convert_loc (loc, long_integer_type_node, n2); 263 ws_args->quick_push (t); 264 265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 266 ws_args->quick_push (t); 267 268 if (fd.chunk_size) 269 { 270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 271 t = omp_adjust_chunk_size (t, fd.simd_schedule); 272 ws_args->quick_push (t); 273 } 274 275 return ws_args; 276 } 277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 278 { 279 /* Number of sections is equal to the number of edges from the 280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 281 the exit of the sections region. */ 282 basic_block bb = single_succ (gimple_bb (ws_stmt)); 283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 284 vec_alloc (ws_args, 1); 285 ws_args->quick_push (t); 286 return ws_args; 287 } 288 289 gcc_unreachable (); 290} 291 292/* Discover whether REGION is a combined parallel+workshare region. */ 293 294static void 295determine_parallel_type (struct omp_region *region) 296{ 297 basic_block par_entry_bb, par_exit_bb; 298 basic_block ws_entry_bb, ws_exit_bb; 299 300 if (region == NULL || region->inner == NULL 301 || region->exit == NULL || region->inner->exit == NULL 302 || region->inner->cont == NULL) 303 return; 304 305 /* We only support parallel+for and parallel+sections. */ 306 if (region->type != GIMPLE_OMP_PARALLEL 307 || (region->inner->type != GIMPLE_OMP_FOR 308 && region->inner->type != GIMPLE_OMP_SECTIONS)) 309 return; 310 311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 312 WS_EXIT_BB -> PAR_EXIT_BB. */ 313 par_entry_bb = region->entry; 314 par_exit_bb = region->exit; 315 ws_entry_bb = region->inner->entry; 316 ws_exit_bb = region->inner->exit; 317 318 /* Give up for task reductions on the parallel, while it is implementable, 319 adding another big set of APIs or slowing down the normal paths is 320 not acceptable. */ 321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); 322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) 323 return; 324 325 if (single_succ (par_entry_bb) == ws_entry_bb 326 && single_succ (ws_exit_bb) == par_exit_bb 327 && workshare_safe_to_combine_p (ws_entry_bb) 328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 329 || (last_and_only_stmt (ws_entry_bb) 330 && last_and_only_stmt (par_exit_bb)))) 331 { 332 gimple *par_stmt = last_stmt (par_entry_bb); 333 gimple *ws_stmt = last_stmt (ws_entry_bb); 334 335 if (region->inner->type == GIMPLE_OMP_FOR) 336 { 337 /* If this is a combined parallel loop, we need to determine 338 whether or not to use the combined library calls. There 339 are two cases where we do not apply the transformation: 340 static loops and any kind of ordered loop. In the first 341 case, we already open code the loop so there is no need 342 to do anything else. In the latter case, the combined 343 parallel loop call would still need extra synchronization 344 to implement ordered semantics, so there would not be any 345 gain in using the combined call. */ 346 tree clauses = gimple_omp_for_clauses (ws_stmt); 347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 348 if (c == NULL 349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 350 == OMP_CLAUSE_SCHEDULE_STATIC) 351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) 352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_) 353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_)) 354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) 355 return; 356 } 357 else if (region->inner->type == GIMPLE_OMP_SECTIONS 358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 359 OMP_CLAUSE__REDUCTEMP_) 360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt), 361 OMP_CLAUSE__CONDTEMP_))) 362 return; 363 364 region->is_combined_parallel = true; 365 region->inner->is_combined_parallel = true; 366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 367 } 368} 369 370/* Debugging dumps for parallel regions. */ 371void dump_omp_region (FILE *, struct omp_region *, int); 372void debug_omp_region (struct omp_region *); 373void debug_all_omp_regions (void); 374 375/* Dump the parallel region tree rooted at REGION. */ 376 377void 378dump_omp_region (FILE *file, struct omp_region *region, int indent) 379{ 380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 381 gimple_code_name[region->type]); 382 383 if (region->inner) 384 dump_omp_region (file, region->inner, indent + 4); 385 386 if (region->cont) 387 { 388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 389 region->cont->index); 390 } 391 392 if (region->exit) 393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 394 region->exit->index); 395 else 396 fprintf (file, "%*s[no exit marker]\n", indent, ""); 397 398 if (region->next) 399 dump_omp_region (file, region->next, indent); 400} 401 402DEBUG_FUNCTION void 403debug_omp_region (struct omp_region *region) 404{ 405 dump_omp_region (stderr, region, 0); 406} 407 408DEBUG_FUNCTION void 409debug_all_omp_regions (void) 410{ 411 dump_omp_region (stderr, root_omp_region, 0); 412} 413 414/* Create a new parallel region starting at STMT inside region PARENT. */ 415 416static struct omp_region * 417new_omp_region (basic_block bb, enum gimple_code type, 418 struct omp_region *parent) 419{ 420 struct omp_region *region = XCNEW (struct omp_region); 421 422 region->outer = parent; 423 region->entry = bb; 424 region->type = type; 425 426 if (parent) 427 { 428 /* This is a nested region. Add it to the list of inner 429 regions in PARENT. */ 430 region->next = parent->inner; 431 parent->inner = region; 432 } 433 else 434 { 435 /* This is a toplevel region. Add it to the list of toplevel 436 regions in ROOT_OMP_REGION. */ 437 region->next = root_omp_region; 438 root_omp_region = region; 439 } 440 441 return region; 442} 443 444/* Release the memory associated with the region tree rooted at REGION. */ 445 446static void 447free_omp_region_1 (struct omp_region *region) 448{ 449 struct omp_region *i, *n; 450 451 for (i = region->inner; i ; i = n) 452 { 453 n = i->next; 454 free_omp_region_1 (i); 455 } 456 457 free (region); 458} 459 460/* Release the memory for the entire omp region tree. */ 461 462void 463omp_free_regions (void) 464{ 465 struct omp_region *r, *n; 466 for (r = root_omp_region; r ; r = n) 467 { 468 n = r->next; 469 free_omp_region_1 (r); 470 } 471 root_omp_region = NULL; 472} 473 474/* A convenience function to build an empty GIMPLE_COND with just the 475 condition. */ 476 477static gcond * 478gimple_build_cond_empty (tree cond) 479{ 480 enum tree_code pred_code; 481 tree lhs, rhs; 482 483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 485} 486 487/* Return true if a parallel REGION is within a declare target function or 488 within a target region and is not a part of a gridified target. */ 489 490static bool 491parallel_needs_hsa_kernel_p (struct omp_region *region) 492{ 493 bool indirect = false; 494 for (region = region->outer; region; region = region->outer) 495 { 496 if (region->type == GIMPLE_OMP_PARALLEL) 497 indirect = true; 498 else if (region->type == GIMPLE_OMP_TARGET) 499 { 500 gomp_target *tgt_stmt 501 = as_a <gomp_target *> (last_stmt (region->entry)); 502 503 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 504 OMP_CLAUSE__GRIDDIM_)) 505 return indirect; 506 else 507 return true; 508 } 509 } 510 511 if (lookup_attribute ("omp declare target", 512 DECL_ATTRIBUTES (current_function_decl))) 513 return true; 514 515 return false; 516} 517 518/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. 519 Add CHILD_FNDECL to decl chain of the supercontext of the block 520 ENTRY_BLOCK - this is the block which originally contained the 521 code from which CHILD_FNDECL was created. 522 523 Together, these actions ensure that the debug info for the outlined 524 function will be emitted with the correct lexical scope. */ 525 526static void 527adjust_context_and_scope (struct omp_region *region, tree entry_block, 528 tree child_fndecl) 529{ 530 tree parent_fndecl = NULL_TREE; 531 gimple *entry_stmt; 532 /* OMP expansion expands inner regions before outer ones, so if 533 we e.g. have explicit task region nested in parallel region, when 534 expanding the task region current_function_decl will be the original 535 source function, but we actually want to use as context the child 536 function of the parallel. */ 537 for (region = region->outer; 538 region && parent_fndecl == NULL_TREE; region = region->outer) 539 switch (region->type) 540 { 541 case GIMPLE_OMP_PARALLEL: 542 case GIMPLE_OMP_TASK: 543 case GIMPLE_OMP_TEAMS: 544 entry_stmt = last_stmt (region->entry); 545 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); 546 break; 547 case GIMPLE_OMP_TARGET: 548 entry_stmt = last_stmt (region->entry); 549 parent_fndecl 550 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); 551 break; 552 default: 553 break; 554 } 555 556 if (parent_fndecl == NULL_TREE) 557 parent_fndecl = current_function_decl; 558 DECL_CONTEXT (child_fndecl) = parent_fndecl; 559 560 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) 561 { 562 tree b = BLOCK_SUPERCONTEXT (entry_block); 563 if (TREE_CODE (b) == BLOCK) 564 { 565 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); 566 BLOCK_VARS (b) = child_fndecl; 567 } 568 } 569} 570 571/* Build the function calls to GOMP_parallel etc to actually 572 generate the parallel operation. REGION is the parallel region 573 being expanded. BB is the block where to insert the code. WS_ARGS 574 will be set if this is a call to a combined parallel+workshare 575 construct, it contains the list of additional arguments needed by 576 the workshare construct. */ 577 578static void 579expand_parallel_call (struct omp_region *region, basic_block bb, 580 gomp_parallel *entry_stmt, 581 vec<tree, va_gc> *ws_args) 582{ 583 tree t, t1, t2, val, cond, c, clauses, flags; 584 gimple_stmt_iterator gsi; 585 gimple *stmt; 586 enum built_in_function start_ix; 587 int start_ix2; 588 location_t clause_loc; 589 vec<tree, va_gc> *args; 590 591 clauses = gimple_omp_parallel_clauses (entry_stmt); 592 593 /* Determine what flavor of GOMP_parallel we will be 594 emitting. */ 595 start_ix = BUILT_IN_GOMP_PARALLEL; 596 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 597 if (rtmp) 598 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; 599 else if (is_combined_parallel (region)) 600 { 601 switch (region->inner->type) 602 { 603 case GIMPLE_OMP_FOR: 604 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 605 switch (region->inner->sched_kind) 606 { 607 case OMP_CLAUSE_SCHEDULE_RUNTIME: 608 /* For lastprivate(conditional:), our implementation 609 requires monotonic behavior. */ 610 if (region->inner->has_lastprivate_conditional != 0) 611 start_ix2 = 3; 612 else if ((region->inner->sched_modifiers 613 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) 614 start_ix2 = 6; 615 else if ((region->inner->sched_modifiers 616 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) 617 start_ix2 = 7; 618 else 619 start_ix2 = 3; 620 break; 621 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 622 case OMP_CLAUSE_SCHEDULE_GUIDED: 623 if ((region->inner->sched_modifiers 624 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 625 && !region->inner->has_lastprivate_conditional) 626 { 627 start_ix2 = 3 + region->inner->sched_kind; 628 break; 629 } 630 /* FALLTHRU */ 631 default: 632 start_ix2 = region->inner->sched_kind; 633 break; 634 } 635 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 636 start_ix = (enum built_in_function) start_ix2; 637 break; 638 case GIMPLE_OMP_SECTIONS: 639 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 640 break; 641 default: 642 gcc_unreachable (); 643 } 644 } 645 646 /* By default, the value of NUM_THREADS is zero (selected at run time) 647 and there is no conditional. */ 648 cond = NULL_TREE; 649 val = build_int_cst (unsigned_type_node, 0); 650 flags = build_int_cst (unsigned_type_node, 0); 651 652 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 653 if (c) 654 cond = OMP_CLAUSE_IF_EXPR (c); 655 656 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 657 if (c) 658 { 659 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 660 clause_loc = OMP_CLAUSE_LOCATION (c); 661 } 662 else 663 clause_loc = gimple_location (entry_stmt); 664 665 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 666 if (c) 667 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 668 669 /* Ensure 'val' is of the correct type. */ 670 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 671 672 /* If we found the clause 'if (cond)', build either 673 (cond != 0) or (cond ? val : 1u). */ 674 if (cond) 675 { 676 cond = gimple_boolify (cond); 677 678 if (integer_zerop (val)) 679 val = fold_build2_loc (clause_loc, 680 EQ_EXPR, unsigned_type_node, cond, 681 build_int_cst (TREE_TYPE (cond), 0)); 682 else 683 { 684 basic_block cond_bb, then_bb, else_bb; 685 edge e, e_then, e_else; 686 tree tmp_then, tmp_else, tmp_join, tmp_var; 687 688 tmp_var = create_tmp_var (TREE_TYPE (val)); 689 if (gimple_in_ssa_p (cfun)) 690 { 691 tmp_then = make_ssa_name (tmp_var); 692 tmp_else = make_ssa_name (tmp_var); 693 tmp_join = make_ssa_name (tmp_var); 694 } 695 else 696 { 697 tmp_then = tmp_var; 698 tmp_else = tmp_var; 699 tmp_join = tmp_var; 700 } 701 702 e = split_block_after_labels (bb); 703 cond_bb = e->src; 704 bb = e->dest; 705 remove_edge (e); 706 707 then_bb = create_empty_bb (cond_bb); 708 else_bb = create_empty_bb (then_bb); 709 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 710 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 711 712 stmt = gimple_build_cond_empty (cond); 713 gsi = gsi_start_bb (cond_bb); 714 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 715 716 gsi = gsi_start_bb (then_bb); 717 expand_omp_build_assign (&gsi, tmp_then, val, true); 718 719 gsi = gsi_start_bb (else_bb); 720 expand_omp_build_assign (&gsi, tmp_else, 721 build_int_cst (unsigned_type_node, 1), 722 true); 723 724 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 725 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 726 add_bb_to_loop (then_bb, cond_bb->loop_father); 727 add_bb_to_loop (else_bb, cond_bb->loop_father); 728 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 729 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 730 731 if (gimple_in_ssa_p (cfun)) 732 { 733 gphi *phi = create_phi_node (tmp_join, bb); 734 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 735 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 736 } 737 738 val = tmp_join; 739 } 740 741 gsi = gsi_start_bb (bb); 742 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 743 false, GSI_CONTINUE_LINKING); 744 } 745 746 gsi = gsi_last_nondebug_bb (bb); 747 t = gimple_omp_parallel_data_arg (entry_stmt); 748 if (t == NULL) 749 t1 = null_pointer_node; 750 else 751 t1 = build_fold_addr_expr (t); 752 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 753 t2 = build_fold_addr_expr (child_fndecl); 754 755 vec_alloc (args, 4 + vec_safe_length (ws_args)); 756 args->quick_push (t2); 757 args->quick_push (t1); 758 args->quick_push (val); 759 if (ws_args) 760 args->splice (*ws_args); 761 args->quick_push (flags); 762 763 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 764 builtin_decl_explicit (start_ix), args); 765 766 if (rtmp) 767 { 768 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); 769 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), 770 fold_convert (type, 771 fold_convert (pointer_sized_int_node, t))); 772 } 773 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 774 false, GSI_CONTINUE_LINKING); 775 776 if (hsa_gen_requested_p () 777 && parallel_needs_hsa_kernel_p (region)) 778 { 779 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 780 hsa_register_kernel (child_cnode); 781 } 782} 783 784/* Build the function call to GOMP_task to actually 785 generate the task operation. BB is the block where to insert the code. */ 786 787static void 788expand_task_call (struct omp_region *region, basic_block bb, 789 gomp_task *entry_stmt) 790{ 791 tree t1, t2, t3; 792 gimple_stmt_iterator gsi; 793 location_t loc = gimple_location (entry_stmt); 794 795 tree clauses = gimple_omp_task_clauses (entry_stmt); 796 797 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 798 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 799 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 800 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 801 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 802 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 803 804 unsigned int iflags 805 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 806 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 807 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 808 809 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 810 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 811 tree num_tasks = NULL_TREE; 812 bool ull = false; 813 if (taskloop_p) 814 { 815 gimple *g = last_stmt (region->outer->entry); 816 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 817 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 818 struct omp_for_data fd; 819 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 820 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 821 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 822 OMP_CLAUSE__LOOPTEMP_); 823 startvar = OMP_CLAUSE_DECL (startvar); 824 endvar = OMP_CLAUSE_DECL (endvar); 825 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 826 if (fd.loop.cond_code == LT_EXPR) 827 iflags |= GOMP_TASK_FLAG_UP; 828 tree tclauses = gimple_omp_for_clauses (g); 829 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 830 if (num_tasks) 831 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 832 else 833 { 834 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 835 if (num_tasks) 836 { 837 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 838 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 839 } 840 else 841 num_tasks = integer_zero_node; 842 } 843 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 844 if (ifc == NULL_TREE) 845 iflags |= GOMP_TASK_FLAG_IF; 846 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 847 iflags |= GOMP_TASK_FLAG_NOGROUP; 848 ull = fd.iter_type == long_long_unsigned_type_node; 849 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) 850 iflags |= GOMP_TASK_FLAG_REDUCTION; 851 } 852 else if (priority) 853 iflags |= GOMP_TASK_FLAG_PRIORITY; 854 855 tree flags = build_int_cst (unsigned_type_node, iflags); 856 857 tree cond = boolean_true_node; 858 if (ifc) 859 { 860 if (taskloop_p) 861 { 862 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 863 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 864 build_int_cst (unsigned_type_node, 865 GOMP_TASK_FLAG_IF), 866 build_int_cst (unsigned_type_node, 0)); 867 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 868 flags, t); 869 } 870 else 871 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 872 } 873 874 if (finalc) 875 { 876 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 877 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 878 build_int_cst (unsigned_type_node, 879 GOMP_TASK_FLAG_FINAL), 880 build_int_cst (unsigned_type_node, 0)); 881 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 882 } 883 if (depend) 884 depend = OMP_CLAUSE_DECL (depend); 885 else 886 depend = build_int_cst (ptr_type_node, 0); 887 if (priority) 888 priority = fold_convert (integer_type_node, 889 OMP_CLAUSE_PRIORITY_EXPR (priority)); 890 else 891 priority = integer_zero_node; 892 893 gsi = gsi_last_nondebug_bb (bb); 894 tree t = gimple_omp_task_data_arg (entry_stmt); 895 if (t == NULL) 896 t2 = null_pointer_node; 897 else 898 t2 = build_fold_addr_expr_loc (loc, t); 899 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 900 t = gimple_omp_task_copy_fn (entry_stmt); 901 if (t == NULL) 902 t3 = null_pointer_node; 903 else 904 t3 = build_fold_addr_expr_loc (loc, t); 905 906 if (taskloop_p) 907 t = build_call_expr (ull 908 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 909 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 910 11, t1, t2, t3, 911 gimple_omp_task_arg_size (entry_stmt), 912 gimple_omp_task_arg_align (entry_stmt), flags, 913 num_tasks, priority, startvar, endvar, step); 914 else 915 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 916 9, t1, t2, t3, 917 gimple_omp_task_arg_size (entry_stmt), 918 gimple_omp_task_arg_align (entry_stmt), cond, flags, 919 depend, priority); 920 921 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 922 false, GSI_CONTINUE_LINKING); 923} 924 925/* Build the function call to GOMP_taskwait_depend to actually 926 generate the taskwait operation. BB is the block where to insert the 927 code. */ 928 929static void 930expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) 931{ 932 tree clauses = gimple_omp_task_clauses (entry_stmt); 933 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 934 if (depend == NULL_TREE) 935 return; 936 937 depend = OMP_CLAUSE_DECL (depend); 938 939 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 940 tree t 941 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND), 942 1, depend); 943 944 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 945 false, GSI_CONTINUE_LINKING); 946} 947 948/* Build the function call to GOMP_teams_reg to actually 949 generate the host teams operation. REGION is the teams region 950 being expanded. BB is the block where to insert the code. */ 951 952static void 953expand_teams_call (basic_block bb, gomp_teams *entry_stmt) 954{ 955 tree clauses = gimple_omp_teams_clauses (entry_stmt); 956 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 957 if (num_teams == NULL_TREE) 958 num_teams = build_int_cst (unsigned_type_node, 0); 959 else 960 { 961 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams); 962 num_teams = fold_convert (unsigned_type_node, num_teams); 963 } 964 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 965 if (thread_limit == NULL_TREE) 966 thread_limit = build_int_cst (unsigned_type_node, 0); 967 else 968 { 969 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); 970 thread_limit = fold_convert (unsigned_type_node, thread_limit); 971 } 972 973 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); 974 tree t = gimple_omp_teams_data_arg (entry_stmt), t1; 975 if (t == NULL) 976 t1 = null_pointer_node; 977 else 978 t1 = build_fold_addr_expr (t); 979 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); 980 tree t2 = build_fold_addr_expr (child_fndecl); 981 982 vec<tree, va_gc> *args; 983 vec_alloc (args, 5); 984 args->quick_push (t2); 985 args->quick_push (t1); 986 args->quick_push (num_teams); 987 args->quick_push (thread_limit); 988 /* For future extensibility. */ 989 args->quick_push (build_zero_cst (unsigned_type_node)); 990 991 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 992 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), 993 args); 994 995 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 996 false, GSI_CONTINUE_LINKING); 997} 998 999/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 1000 1001static tree 1002vec2chain (vec<tree, va_gc> *v) 1003{ 1004 tree chain = NULL_TREE, t; 1005 unsigned ix; 1006 1007 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 1008 { 1009 DECL_CHAIN (t) = chain; 1010 chain = t; 1011 } 1012 1013 return chain; 1014} 1015 1016/* Remove barriers in REGION->EXIT's block. Note that this is only 1017 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 1018 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 1019 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 1020 removed. */ 1021 1022static void 1023remove_exit_barrier (struct omp_region *region) 1024{ 1025 gimple_stmt_iterator gsi; 1026 basic_block exit_bb; 1027 edge_iterator ei; 1028 edge e; 1029 gimple *stmt; 1030 int any_addressable_vars = -1; 1031 1032 exit_bb = region->exit; 1033 1034 /* If the parallel region doesn't return, we don't have REGION->EXIT 1035 block at all. */ 1036 if (! exit_bb) 1037 return; 1038 1039 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 1040 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 1041 statements that can appear in between are extremely limited -- no 1042 memory operations at all. Here, we allow nothing at all, so the 1043 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 1044 gsi = gsi_last_nondebug_bb (exit_bb); 1045 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1046 gsi_prev_nondebug (&gsi); 1047 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 1048 return; 1049 1050 FOR_EACH_EDGE (e, ei, exit_bb->preds) 1051 { 1052 gsi = gsi_last_nondebug_bb (e->src); 1053 if (gsi_end_p (gsi)) 1054 continue; 1055 stmt = gsi_stmt (gsi); 1056 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 1057 && !gimple_omp_return_nowait_p (stmt)) 1058 { 1059 /* OpenMP 3.0 tasks unfortunately prevent this optimization 1060 in many cases. If there could be tasks queued, the barrier 1061 might be needed to let the tasks run before some local 1062 variable of the parallel that the task uses as shared 1063 runs out of scope. The task can be spawned either 1064 from within current function (this would be easy to check) 1065 or from some function it calls and gets passed an address 1066 of such a variable. */ 1067 if (any_addressable_vars < 0) 1068 { 1069 gomp_parallel *parallel_stmt 1070 = as_a <gomp_parallel *> (last_stmt (region->entry)); 1071 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 1072 tree local_decls, block, decl; 1073 unsigned ix; 1074 1075 any_addressable_vars = 0; 1076 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 1077 if (TREE_ADDRESSABLE (decl)) 1078 { 1079 any_addressable_vars = 1; 1080 break; 1081 } 1082 for (block = gimple_block (stmt); 1083 !any_addressable_vars 1084 && block 1085 && TREE_CODE (block) == BLOCK; 1086 block = BLOCK_SUPERCONTEXT (block)) 1087 { 1088 for (local_decls = BLOCK_VARS (block); 1089 local_decls; 1090 local_decls = DECL_CHAIN (local_decls)) 1091 if (TREE_ADDRESSABLE (local_decls)) 1092 { 1093 any_addressable_vars = 1; 1094 break; 1095 } 1096 if (block == gimple_block (parallel_stmt)) 1097 break; 1098 } 1099 } 1100 if (!any_addressable_vars) 1101 gimple_omp_return_set_nowait (stmt); 1102 } 1103 } 1104} 1105 1106static void 1107remove_exit_barriers (struct omp_region *region) 1108{ 1109 if (region->type == GIMPLE_OMP_PARALLEL) 1110 remove_exit_barrier (region); 1111 1112 if (region->inner) 1113 { 1114 region = region->inner; 1115 remove_exit_barriers (region); 1116 while (region->next) 1117 { 1118 region = region->next; 1119 remove_exit_barriers (region); 1120 } 1121 } 1122} 1123 1124/* Optimize omp_get_thread_num () and omp_get_num_threads () 1125 calls. These can't be declared as const functions, but 1126 within one parallel body they are constant, so they can be 1127 transformed there into __builtin_omp_get_{thread_num,num_threads} () 1128 which are declared const. Similarly for task body, except 1129 that in untied task omp_get_thread_num () can change at any task 1130 scheduling point. */ 1131 1132static void 1133optimize_omp_library_calls (gimple *entry_stmt) 1134{ 1135 basic_block bb; 1136 gimple_stmt_iterator gsi; 1137 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1138 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1139 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1140 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1141 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1142 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1143 OMP_CLAUSE_UNTIED) != NULL); 1144 1145 FOR_EACH_BB_FN (bb, cfun) 1146 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1147 { 1148 gimple *call = gsi_stmt (gsi); 1149 tree decl; 1150 1151 if (is_gimple_call (call) 1152 && (decl = gimple_call_fndecl (call)) 1153 && DECL_EXTERNAL (decl) 1154 && TREE_PUBLIC (decl) 1155 && DECL_INITIAL (decl) == NULL) 1156 { 1157 tree built_in; 1158 1159 if (DECL_NAME (decl) == thr_num_id) 1160 { 1161 /* In #pragma omp task untied omp_get_thread_num () can change 1162 during the execution of the task region. */ 1163 if (untied_task) 1164 continue; 1165 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1166 } 1167 else if (DECL_NAME (decl) == num_thr_id) 1168 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1169 else 1170 continue; 1171 1172 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1173 || gimple_call_num_args (call) != 0) 1174 continue; 1175 1176 if (flag_exceptions && !TREE_NOTHROW (decl)) 1177 continue; 1178 1179 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1180 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1181 TREE_TYPE (TREE_TYPE (built_in)))) 1182 continue; 1183 1184 gimple_call_set_fndecl (call, built_in); 1185 } 1186 } 1187} 1188 1189/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1190 regimplified. */ 1191 1192static tree 1193expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1194{ 1195 tree t = *tp; 1196 1197 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1198 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1199 return t; 1200 1201 if (TREE_CODE (t) == ADDR_EXPR) 1202 recompute_tree_invariant_for_addr_expr (t); 1203 1204 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1205 return NULL_TREE; 1206} 1207 1208/* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1209 1210static void 1211expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1212 bool after) 1213{ 1214 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1215 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1216 !after, after ? GSI_CONTINUE_LINKING 1217 : GSI_SAME_STMT); 1218 gimple *stmt = gimple_build_assign (to, from); 1219 if (after) 1220 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1221 else 1222 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1223 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1224 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1225 { 1226 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1227 gimple_regimplify_operands (stmt, &gsi); 1228 } 1229} 1230 1231/* Expand the OpenMP parallel or task directive starting at REGION. */ 1232 1233static void 1234expand_omp_taskreg (struct omp_region *region) 1235{ 1236 basic_block entry_bb, exit_bb, new_bb; 1237 struct function *child_cfun; 1238 tree child_fn, block, t; 1239 gimple_stmt_iterator gsi; 1240 gimple *entry_stmt, *stmt; 1241 edge e; 1242 vec<tree, va_gc> *ws_args; 1243 1244 entry_stmt = last_stmt (region->entry); 1245 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1246 && gimple_omp_task_taskwait_p (entry_stmt)) 1247 { 1248 new_bb = region->entry; 1249 gsi = gsi_last_nondebug_bb (region->entry); 1250 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1251 gsi_remove (&gsi, true); 1252 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); 1253 return; 1254 } 1255 1256 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1257 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1258 1259 entry_bb = region->entry; 1260 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1261 exit_bb = region->cont; 1262 else 1263 exit_bb = region->exit; 1264 1265 if (is_combined_parallel (region)) 1266 ws_args = region->ws_args; 1267 else 1268 ws_args = NULL; 1269 1270 if (child_cfun->cfg) 1271 { 1272 /* Due to inlining, it may happen that we have already outlined 1273 the region, in which case all we need to do is make the 1274 sub-graph unreachable and emit the parallel call. */ 1275 edge entry_succ_e, exit_succ_e; 1276 1277 entry_succ_e = single_succ_edge (entry_bb); 1278 1279 gsi = gsi_last_nondebug_bb (entry_bb); 1280 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK 1282 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); 1283 gsi_remove (&gsi, true); 1284 1285 new_bb = entry_bb; 1286 if (exit_bb) 1287 { 1288 exit_succ_e = single_succ_edge (exit_bb); 1289 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1290 } 1291 remove_edge_and_dominated_blocks (entry_succ_e); 1292 } 1293 else 1294 { 1295 unsigned srcidx, dstidx, num; 1296 1297 /* If the parallel region needs data sent from the parent 1298 function, then the very first statement (except possible 1299 tree profile counter updates) of the parallel body 1300 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1301 &.OMP_DATA_O is passed as an argument to the child function, 1302 we need to replace it with the argument as seen by the child 1303 function. 1304 1305 In most cases, this will end up being the identity assignment 1306 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1307 a function call that has been inlined, the original PARM_DECL 1308 .OMP_DATA_I may have been converted into a different local 1309 variable. In which case, we need to keep the assignment. */ 1310 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1311 { 1312 basic_block entry_succ_bb 1313 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1314 : FALLTHRU_EDGE (entry_bb)->dest; 1315 tree arg; 1316 gimple *parcopy_stmt = NULL; 1317 1318 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1319 { 1320 gimple *stmt; 1321 1322 gcc_assert (!gsi_end_p (gsi)); 1323 stmt = gsi_stmt (gsi); 1324 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1325 continue; 1326 1327 if (gimple_num_ops (stmt) == 2) 1328 { 1329 tree arg = gimple_assign_rhs1 (stmt); 1330 1331 /* We're ignore the subcode because we're 1332 effectively doing a STRIP_NOPS. */ 1333 1334 if (TREE_CODE (arg) == ADDR_EXPR 1335 && (TREE_OPERAND (arg, 0) 1336 == gimple_omp_taskreg_data_arg (entry_stmt))) 1337 { 1338 parcopy_stmt = stmt; 1339 break; 1340 } 1341 } 1342 } 1343 1344 gcc_assert (parcopy_stmt != NULL); 1345 arg = DECL_ARGUMENTS (child_fn); 1346 1347 if (!gimple_in_ssa_p (cfun)) 1348 { 1349 if (gimple_assign_lhs (parcopy_stmt) == arg) 1350 gsi_remove (&gsi, true); 1351 else 1352 { 1353 /* ?? Is setting the subcode really necessary ?? */ 1354 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1355 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1356 } 1357 } 1358 else 1359 { 1360 tree lhs = gimple_assign_lhs (parcopy_stmt); 1361 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1362 /* We'd like to set the rhs to the default def in the child_fn, 1363 but it's too early to create ssa names in the child_fn. 1364 Instead, we set the rhs to the parm. In 1365 move_sese_region_to_fn, we introduce a default def for the 1366 parm, map the parm to it's default def, and once we encounter 1367 this stmt, replace the parm with the default def. */ 1368 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1369 update_stmt (parcopy_stmt); 1370 } 1371 } 1372 1373 /* Declare local variables needed in CHILD_CFUN. */ 1374 block = DECL_INITIAL (child_fn); 1375 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1376 /* The gimplifier could record temporaries in parallel/task block 1377 rather than in containing function's local_decls chain, 1378 which would mean cgraph missed finalizing them. Do it now. */ 1379 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1380 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1381 varpool_node::finalize_decl (t); 1382 DECL_SAVED_TREE (child_fn) = NULL; 1383 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1384 gimple_set_body (child_fn, NULL); 1385 TREE_USED (block) = 1; 1386 1387 /* Reset DECL_CONTEXT on function arguments. */ 1388 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1389 DECL_CONTEXT (t) = child_fn; 1390 1391 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1392 so that it can be moved to the child function. */ 1393 gsi = gsi_last_nondebug_bb (entry_bb); 1394 stmt = gsi_stmt (gsi); 1395 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1396 || gimple_code (stmt) == GIMPLE_OMP_TASK 1397 || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); 1398 e = split_block (entry_bb, stmt); 1399 gsi_remove (&gsi, true); 1400 entry_bb = e->dest; 1401 edge e2 = NULL; 1402 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) 1403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1404 else 1405 { 1406 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1407 gcc_assert (e2->dest == region->exit); 1408 remove_edge (BRANCH_EDGE (entry_bb)); 1409 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1410 gsi = gsi_last_nondebug_bb (region->exit); 1411 gcc_assert (!gsi_end_p (gsi) 1412 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1413 gsi_remove (&gsi, true); 1414 } 1415 1416 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1417 if (exit_bb) 1418 { 1419 gsi = gsi_last_nondebug_bb (exit_bb); 1420 gcc_assert (!gsi_end_p (gsi) 1421 && (gimple_code (gsi_stmt (gsi)) 1422 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1423 stmt = gimple_build_return (NULL); 1424 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1425 gsi_remove (&gsi, true); 1426 } 1427 1428 /* Move the parallel region into CHILD_CFUN. */ 1429 1430 if (gimple_in_ssa_p (cfun)) 1431 { 1432 init_tree_ssa (child_cfun); 1433 init_ssa_operands (child_cfun); 1434 child_cfun->gimple_df->in_ssa_p = true; 1435 block = NULL_TREE; 1436 } 1437 else 1438 block = gimple_block (entry_stmt); 1439 1440 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1441 if (exit_bb) 1442 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1443 if (e2) 1444 { 1445 basic_block dest_bb = e2->dest; 1446 if (!exit_bb) 1447 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1448 remove_edge (e2); 1449 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1450 } 1451 /* When the OMP expansion process cannot guarantee an up-to-date 1452 loop tree arrange for the child function to fixup loops. */ 1453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1455 1456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1457 num = vec_safe_length (child_cfun->local_decls); 1458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1459 { 1460 t = (*child_cfun->local_decls)[srcidx]; 1461 if (DECL_CONTEXT (t) == cfun->decl) 1462 continue; 1463 if (srcidx != dstidx) 1464 (*child_cfun->local_decls)[dstidx] = t; 1465 dstidx++; 1466 } 1467 if (dstidx != num) 1468 vec_safe_truncate (child_cfun->local_decls, dstidx); 1469 1470 /* Inform the callgraph about the new function. */ 1471 child_cfun->curr_properties = cfun->curr_properties; 1472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1474 cgraph_node *node = cgraph_node::get_create (child_fn); 1475 node->parallelized_function = 1; 1476 cgraph_node::add_new_function (child_fn, true); 1477 1478 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1479 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1480 1481 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1482 fixed in a following pass. */ 1483 push_cfun (child_cfun); 1484 if (need_asm) 1485 assign_assembler_name_if_needed (child_fn); 1486 1487 if (optimize) 1488 optimize_omp_library_calls (entry_stmt); 1489 update_max_bb_count (); 1490 cgraph_edge::rebuild_edges (); 1491 1492 /* Some EH regions might become dead, see PR34608. If 1493 pass_cleanup_cfg isn't the first pass to happen with the 1494 new child, these dead EH edges might cause problems. 1495 Clean them up now. */ 1496 if (flag_exceptions) 1497 { 1498 basic_block bb; 1499 bool changed = false; 1500 1501 FOR_EACH_BB_FN (bb, cfun) 1502 changed |= gimple_purge_dead_eh_edges (bb); 1503 if (changed) 1504 cleanup_tree_cfg (); 1505 } 1506 if (gimple_in_ssa_p (cfun)) 1507 update_ssa (TODO_update_ssa); 1508 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1509 verify_loop_structure (); 1510 pop_cfun (); 1511 1512 if (dump_file && !gimple_in_ssa_p (cfun)) 1513 { 1514 omp_any_child_fn_dumped = true; 1515 dump_function_header (dump_file, child_fn, dump_flags); 1516 dump_function_to_file (child_fn, dump_file, dump_flags); 1517 } 1518 } 1519 1520 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 1521 1522 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1523 expand_parallel_call (region, new_bb, 1524 as_a <gomp_parallel *> (entry_stmt), ws_args); 1525 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) 1526 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); 1527 else 1528 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1529 if (gimple_in_ssa_p (cfun)) 1530 update_ssa (TODO_update_ssa_only_virtuals); 1531} 1532 1533/* Information about members of an OpenACC collapsed loop nest. */ 1534 1535struct oacc_collapse 1536{ 1537 tree base; /* Base value. */ 1538 tree iters; /* Number of steps. */ 1539 tree step; /* Step size. */ 1540 tree tile; /* Tile increment (if tiled). */ 1541 tree outer; /* Tile iterator var. */ 1542}; 1543 1544/* Helper for expand_oacc_for. Determine collapsed loop information. 1545 Fill in COUNTS array. Emit any initialization code before GSI. 1546 Return the calculated outer loop bound of BOUND_TYPE. */ 1547 1548static tree 1549expand_oacc_collapse_init (const struct omp_for_data *fd, 1550 gimple_stmt_iterator *gsi, 1551 oacc_collapse *counts, tree diff_type, 1552 tree bound_type, location_t loc) 1553{ 1554 tree tiling = fd->tiling; 1555 tree total = build_int_cst (bound_type, 1); 1556 int ix; 1557 1558 gcc_assert (integer_onep (fd->loop.step)); 1559 gcc_assert (integer_zerop (fd->loop.n1)); 1560 1561 /* When tiling, the first operand of the tile clause applies to the 1562 innermost loop, and we work outwards from there. Seems 1563 backwards, but whatever. */ 1564 for (ix = fd->collapse; ix--;) 1565 { 1566 const omp_for_data_loop *loop = &fd->loops[ix]; 1567 1568 tree iter_type = TREE_TYPE (loop->v); 1569 tree plus_type = iter_type; 1570 1571 gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR); 1572 1573 if (POINTER_TYPE_P (iter_type)) 1574 plus_type = sizetype; 1575 1576 if (tiling) 1577 { 1578 tree num = build_int_cst (integer_type_node, fd->collapse); 1579 tree loop_no = build_int_cst (integer_type_node, ix); 1580 tree tile = TREE_VALUE (tiling); 1581 gcall *call 1582 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1583 /* gwv-outer=*/integer_zero_node, 1584 /* gwv-inner=*/integer_zero_node); 1585 1586 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1587 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1588 gimple_call_set_lhs (call, counts[ix].tile); 1589 gimple_set_location (call, loc); 1590 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1591 1592 tiling = TREE_CHAIN (tiling); 1593 } 1594 else 1595 { 1596 counts[ix].tile = NULL; 1597 counts[ix].outer = loop->v; 1598 } 1599 1600 tree b = loop->n1; 1601 tree e = loop->n2; 1602 tree s = loop->step; 1603 bool up = loop->cond_code == LT_EXPR; 1604 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1605 bool negating; 1606 tree expr; 1607 1608 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1609 true, GSI_SAME_STMT); 1610 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1611 true, GSI_SAME_STMT); 1612 1613 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1614 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1615 if (negating) 1616 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1617 s = fold_convert (diff_type, s); 1618 if (negating) 1619 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1620 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1621 true, GSI_SAME_STMT); 1622 1623 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1624 negating = !up && TYPE_UNSIGNED (iter_type); 1625 expr = fold_build2 (MINUS_EXPR, plus_type, 1626 fold_convert (plus_type, negating ? b : e), 1627 fold_convert (plus_type, negating ? e : b)); 1628 expr = fold_convert (diff_type, expr); 1629 if (negating) 1630 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1631 tree range = force_gimple_operand_gsi 1632 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1633 1634 /* Determine number of iterations. */ 1635 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1636 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1637 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1638 1639 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1640 true, GSI_SAME_STMT); 1641 1642 counts[ix].base = b; 1643 counts[ix].iters = iters; 1644 counts[ix].step = s; 1645 1646 total = fold_build2 (MULT_EXPR, bound_type, total, 1647 fold_convert (bound_type, iters)); 1648 } 1649 1650 return total; 1651} 1652 1653/* Emit initializers for collapsed loop members. INNER is true if 1654 this is for the element loop of a TILE. IVAR is the outer 1655 loop iteration variable, from which collapsed loop iteration values 1656 are calculated. COUNTS array has been initialized by 1657 expand_oacc_collapse_inits. */ 1658 1659static void 1660expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1661 gimple_stmt_iterator *gsi, 1662 const oacc_collapse *counts, tree ivar, 1663 tree diff_type) 1664{ 1665 tree ivar_type = TREE_TYPE (ivar); 1666 1667 /* The most rapidly changing iteration variable is the innermost 1668 one. */ 1669 for (int ix = fd->collapse; ix--;) 1670 { 1671 const omp_for_data_loop *loop = &fd->loops[ix]; 1672 const oacc_collapse *collapse = &counts[ix]; 1673 tree v = inner ? loop->v : collapse->outer; 1674 tree iter_type = TREE_TYPE (v); 1675 tree plus_type = iter_type; 1676 enum tree_code plus_code = PLUS_EXPR; 1677 tree expr; 1678 1679 if (POINTER_TYPE_P (iter_type)) 1680 { 1681 plus_code = POINTER_PLUS_EXPR; 1682 plus_type = sizetype; 1683 } 1684 1685 expr = ivar; 1686 if (ix) 1687 { 1688 tree mod = fold_convert (ivar_type, collapse->iters); 1689 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1690 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1691 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1692 true, GSI_SAME_STMT); 1693 } 1694 1695 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1696 fold_convert (diff_type, collapse->step)); 1697 expr = fold_build2 (plus_code, iter_type, 1698 inner ? collapse->outer : collapse->base, 1699 fold_convert (plus_type, expr)); 1700 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1701 true, GSI_SAME_STMT); 1702 gassign *ass = gimple_build_assign (v, expr); 1703 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1704 } 1705} 1706 1707/* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1708 of the combined collapse > 1 loop constructs, generate code like: 1709 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1710 if (cond3 is <) 1711 adj = STEP3 - 1; 1712 else 1713 adj = STEP3 + 1; 1714 count3 = (adj + N32 - N31) / STEP3; 1715 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1716 if (cond2 is <) 1717 adj = STEP2 - 1; 1718 else 1719 adj = STEP2 + 1; 1720 count2 = (adj + N22 - N21) / STEP2; 1721 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1722 if (cond1 is <) 1723 adj = STEP1 - 1; 1724 else 1725 adj = STEP1 + 1; 1726 count1 = (adj + N12 - N11) / STEP1; 1727 count = count1 * count2 * count3; 1728 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1729 count = 0; 1730 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1731 of the combined loop constructs, just initialize COUNTS array 1732 from the _looptemp_ clauses. */ 1733 1734/* NOTE: It *could* be better to moosh all of the BBs together, 1735 creating one larger BB with all the computation and the unexpected 1736 jump at the end. I.e. 1737 1738 bool zero3, zero2, zero1, zero; 1739 1740 zero3 = N32 c3 N31; 1741 count3 = (N32 - N31) /[cl] STEP3; 1742 zero2 = N22 c2 N21; 1743 count2 = (N22 - N21) /[cl] STEP2; 1744 zero1 = N12 c1 N11; 1745 count1 = (N12 - N11) /[cl] STEP1; 1746 zero = zero3 || zero2 || zero1; 1747 count = count1 * count2 * count3; 1748 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1749 1750 After all, we expect the zero=false, and thus we expect to have to 1751 evaluate all of the comparison expressions, so short-circuiting 1752 oughtn't be a win. Since the condition isn't protecting a 1753 denominator, we're not concerned about divide-by-zero, so we can 1754 fully evaluate count even if a numerator turned out to be wrong. 1755 1756 It seems like putting this all together would create much better 1757 scheduling opportunities, and less pressure on the chip's branch 1758 predictor. */ 1759 1760static void 1761expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1762 basic_block &entry_bb, tree *counts, 1763 basic_block &zero_iter1_bb, int &first_zero_iter1, 1764 basic_block &zero_iter2_bb, int &first_zero_iter2, 1765 basic_block &l2_dom_bb) 1766{ 1767 tree t, type = TREE_TYPE (fd->loop.v); 1768 edge e, ne; 1769 int i; 1770 1771 /* Collapsed loops need work for expansion into SSA form. */ 1772 gcc_assert (!gimple_in_ssa_p (cfun)); 1773 1774 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1775 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1776 { 1777 gcc_assert (fd->ordered == 0); 1778 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1779 isn't supposed to be handled, as the inner loop doesn't 1780 use it. */ 1781 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1782 OMP_CLAUSE__LOOPTEMP_); 1783 gcc_assert (innerc); 1784 for (i = 0; i < fd->collapse; i++) 1785 { 1786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1787 OMP_CLAUSE__LOOPTEMP_); 1788 gcc_assert (innerc); 1789 if (i) 1790 counts[i] = OMP_CLAUSE_DECL (innerc); 1791 else 1792 counts[0] = NULL_TREE; 1793 } 1794 return; 1795 } 1796 1797 for (i = fd->collapse; i < fd->ordered; i++) 1798 { 1799 tree itype = TREE_TYPE (fd->loops[i].v); 1800 counts[i] = NULL_TREE; 1801 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1802 fold_convert (itype, fd->loops[i].n1), 1803 fold_convert (itype, fd->loops[i].n2)); 1804 if (t && integer_zerop (t)) 1805 { 1806 for (i = fd->collapse; i < fd->ordered; i++) 1807 counts[i] = build_int_cst (type, 0); 1808 break; 1809 } 1810 } 1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1812 { 1813 tree itype = TREE_TYPE (fd->loops[i].v); 1814 1815 if (i >= fd->collapse && counts[i]) 1816 continue; 1817 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1818 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1819 fold_convert (itype, fd->loops[i].n1), 1820 fold_convert (itype, fd->loops[i].n2))) 1821 == NULL_TREE || !integer_onep (t))) 1822 { 1823 gcond *cond_stmt; 1824 tree n1, n2; 1825 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1826 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1827 true, GSI_SAME_STMT); 1828 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1829 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1830 true, GSI_SAME_STMT); 1831 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1832 NULL_TREE, NULL_TREE); 1833 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1834 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1835 expand_omp_regimplify_p, NULL, NULL) 1836 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1837 expand_omp_regimplify_p, NULL, NULL)) 1838 { 1839 *gsi = gsi_for_stmt (cond_stmt); 1840 gimple_regimplify_operands (cond_stmt, gsi); 1841 } 1842 e = split_block (entry_bb, cond_stmt); 1843 basic_block &zero_iter_bb 1844 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1845 int &first_zero_iter 1846 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1847 if (zero_iter_bb == NULL) 1848 { 1849 gassign *assign_stmt; 1850 first_zero_iter = i; 1851 zero_iter_bb = create_empty_bb (entry_bb); 1852 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1853 *gsi = gsi_after_labels (zero_iter_bb); 1854 if (i < fd->collapse) 1855 assign_stmt = gimple_build_assign (fd->loop.n2, 1856 build_zero_cst (type)); 1857 else 1858 { 1859 counts[i] = create_tmp_reg (type, ".count"); 1860 assign_stmt 1861 = gimple_build_assign (counts[i], build_zero_cst (type)); 1862 } 1863 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1864 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1865 entry_bb); 1866 } 1867 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1868 ne->probability = profile_probability::very_unlikely (); 1869 e->flags = EDGE_TRUE_VALUE; 1870 e->probability = ne->probability.invert (); 1871 if (l2_dom_bb == NULL) 1872 l2_dom_bb = entry_bb; 1873 entry_bb = e->dest; 1874 *gsi = gsi_last_nondebug_bb (entry_bb); 1875 } 1876 1877 if (POINTER_TYPE_P (itype)) 1878 itype = signed_type_for (itype); 1879 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1880 ? -1 : 1)); 1881 t = fold_build2 (PLUS_EXPR, itype, 1882 fold_convert (itype, fd->loops[i].step), t); 1883 t = fold_build2 (PLUS_EXPR, itype, t, 1884 fold_convert (itype, fd->loops[i].n2)); 1885 t = fold_build2 (MINUS_EXPR, itype, t, 1886 fold_convert (itype, fd->loops[i].n1)); 1887 /* ?? We could probably use CEIL_DIV_EXPR instead of 1888 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1889 generate the same code in the end because generically we 1890 don't know that the values involved must be negative for 1891 GT?? */ 1892 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1893 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1894 fold_build1 (NEGATE_EXPR, itype, t), 1895 fold_build1 (NEGATE_EXPR, itype, 1896 fold_convert (itype, 1897 fd->loops[i].step))); 1898 else 1899 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1900 fold_convert (itype, fd->loops[i].step)); 1901 t = fold_convert (type, t); 1902 if (TREE_CODE (t) == INTEGER_CST) 1903 counts[i] = t; 1904 else 1905 { 1906 if (i < fd->collapse || i != first_zero_iter2) 1907 counts[i] = create_tmp_reg (type, ".count"); 1908 expand_omp_build_assign (gsi, counts[i], t); 1909 } 1910 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1911 { 1912 if (i == 0) 1913 t = counts[0]; 1914 else 1915 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1916 expand_omp_build_assign (gsi, fd->loop.n2, t); 1917 } 1918 } 1919} 1920 1921/* Helper function for expand_omp_{for_*,simd}. Generate code like: 1922 T = V; 1923 V3 = N31 + (T % count3) * STEP3; 1924 T = T / count3; 1925 V2 = N21 + (T % count2) * STEP2; 1926 T = T / count2; 1927 V1 = N11 + T * STEP1; 1928 if this loop doesn't have an inner loop construct combined with it. 1929 If it does have an inner loop construct combined with it and the 1930 iteration count isn't known constant, store values from counts array 1931 into its _looptemp_ temporaries instead. */ 1932 1933static void 1934expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1935 tree *counts, gimple *inner_stmt, tree startvar) 1936{ 1937 int i; 1938 if (gimple_omp_for_combined_p (fd->for_stmt)) 1939 { 1940 /* If fd->loop.n2 is constant, then no propagation of the counts 1941 is needed, they are constant. */ 1942 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1943 return; 1944 1945 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1946 ? gimple_omp_taskreg_clauses (inner_stmt) 1947 : gimple_omp_for_clauses (inner_stmt); 1948 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1949 isn't supposed to be handled, as the inner loop doesn't 1950 use it. */ 1951 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1952 gcc_assert (innerc); 1953 for (i = 0; i < fd->collapse; i++) 1954 { 1955 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1956 OMP_CLAUSE__LOOPTEMP_); 1957 gcc_assert (innerc); 1958 if (i) 1959 { 1960 tree tem = OMP_CLAUSE_DECL (innerc); 1961 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1962 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1963 false, GSI_CONTINUE_LINKING); 1964 gassign *stmt = gimple_build_assign (tem, t); 1965 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1966 } 1967 } 1968 return; 1969 } 1970 1971 tree type = TREE_TYPE (fd->loop.v); 1972 tree tem = create_tmp_reg (type, ".tem"); 1973 gassign *stmt = gimple_build_assign (tem, startvar); 1974 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1975 1976 for (i = fd->collapse - 1; i >= 0; i--) 1977 { 1978 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1979 itype = vtype; 1980 if (POINTER_TYPE_P (vtype)) 1981 itype = signed_type_for (vtype); 1982 if (i != 0) 1983 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1984 else 1985 t = tem; 1986 t = fold_convert (itype, t); 1987 t = fold_build2 (MULT_EXPR, itype, t, 1988 fold_convert (itype, fd->loops[i].step)); 1989 if (POINTER_TYPE_P (vtype)) 1990 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1991 else 1992 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1993 t = force_gimple_operand_gsi (gsi, t, 1994 DECL_P (fd->loops[i].v) 1995 && TREE_ADDRESSABLE (fd->loops[i].v), 1996 NULL_TREE, false, 1997 GSI_CONTINUE_LINKING); 1998 stmt = gimple_build_assign (fd->loops[i].v, t); 1999 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2000 if (i != 0) 2001 { 2002 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 2003 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 2004 false, GSI_CONTINUE_LINKING); 2005 stmt = gimple_build_assign (tem, t); 2006 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 2007 } 2008 } 2009} 2010 2011/* Helper function for expand_omp_for_*. Generate code like: 2012 L10: 2013 V3 += STEP3; 2014 if (V3 cond3 N32) goto BODY_BB; else goto L11; 2015 L11: 2016 V3 = N31; 2017 V2 += STEP2; 2018 if (V2 cond2 N22) goto BODY_BB; else goto L12; 2019 L12: 2020 V2 = N21; 2021 V1 += STEP1; 2022 goto BODY_BB; */ 2023 2024static basic_block 2025extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 2026 basic_block body_bb) 2027{ 2028 basic_block last_bb, bb, collapse_bb = NULL; 2029 int i; 2030 gimple_stmt_iterator gsi; 2031 edge e; 2032 tree t; 2033 gimple *stmt; 2034 2035 last_bb = cont_bb; 2036 for (i = fd->collapse - 1; i >= 0; i--) 2037 { 2038 tree vtype = TREE_TYPE (fd->loops[i].v); 2039 2040 bb = create_empty_bb (last_bb); 2041 add_bb_to_loop (bb, last_bb->loop_father); 2042 gsi = gsi_start_bb (bb); 2043 2044 if (i < fd->collapse - 1) 2045 { 2046 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 2047 e->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2048 2049 t = fd->loops[i + 1].n1; 2050 t = force_gimple_operand_gsi (&gsi, t, 2051 DECL_P (fd->loops[i + 1].v) 2052 && TREE_ADDRESSABLE (fd->loops[i 2053 + 1].v), 2054 NULL_TREE, false, 2055 GSI_CONTINUE_LINKING); 2056 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 2057 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2058 } 2059 else 2060 collapse_bb = bb; 2061 2062 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 2063 2064 if (POINTER_TYPE_P (vtype)) 2065 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 2066 else 2067 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 2068 t = force_gimple_operand_gsi (&gsi, t, 2069 DECL_P (fd->loops[i].v) 2070 && TREE_ADDRESSABLE (fd->loops[i].v), 2071 NULL_TREE, false, GSI_CONTINUE_LINKING); 2072 stmt = gimple_build_assign (fd->loops[i].v, t); 2073 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2074 2075 if (i > 0) 2076 { 2077 t = fd->loops[i].n2; 2078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2079 false, GSI_CONTINUE_LINKING); 2080 tree v = fd->loops[i].v; 2081 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 2082 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 2083 false, GSI_CONTINUE_LINKING); 2084 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 2085 stmt = gimple_build_cond_empty (t); 2086 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 2087 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 2088 expand_omp_regimplify_p, NULL, NULL) 2089 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 2090 expand_omp_regimplify_p, NULL, NULL)) 2091 gimple_regimplify_operands (stmt, &gsi); 2092 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 2093 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 2094 } 2095 else 2096 make_edge (bb, body_bb, EDGE_FALLTHRU); 2097 last_bb = bb; 2098 } 2099 2100 return collapse_bb; 2101} 2102 2103/* Expand #pragma omp ordered depend(source). */ 2104 2105static void 2106expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2107 tree *counts, location_t loc) 2108{ 2109 enum built_in_function source_ix 2110 = fd->iter_type == long_integer_type_node 2111 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 2112 gimple *g 2113 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 2114 build_fold_addr_expr (counts[fd->ordered])); 2115 gimple_set_location (g, loc); 2116 gsi_insert_before (gsi, g, GSI_SAME_STMT); 2117} 2118 2119/* Expand a single depend from #pragma omp ordered depend(sink:...). */ 2120 2121static void 2122expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 2123 tree *counts, tree c, location_t loc) 2124{ 2125 auto_vec<tree, 10> args; 2126 enum built_in_function sink_ix 2127 = fd->iter_type == long_integer_type_node 2128 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2129 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2130 int i; 2131 gimple_stmt_iterator gsi2 = *gsi; 2132 bool warned_step = false; 2133 2134 for (i = 0; i < fd->ordered; i++) 2135 { 2136 tree step = NULL_TREE; 2137 off = TREE_PURPOSE (deps); 2138 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2139 { 2140 step = TREE_OPERAND (off, 1); 2141 off = TREE_OPERAND (off, 0); 2142 } 2143 if (!integer_zerop (off)) 2144 { 2145 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2146 || fd->loops[i].cond_code == GT_EXPR); 2147 bool forward = fd->loops[i].cond_code == LT_EXPR; 2148 if (step) 2149 { 2150 /* Non-simple Fortran DO loops. If step is variable, 2151 we don't know at compile even the direction, so can't 2152 warn. */ 2153 if (TREE_CODE (step) != INTEGER_CST) 2154 break; 2155 forward = tree_int_cst_sgn (step) != -1; 2156 } 2157 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2158 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2159 "waiting for lexically later iteration"); 2160 break; 2161 } 2162 deps = TREE_CHAIN (deps); 2163 } 2164 /* If all offsets corresponding to the collapsed loops are zero, 2165 this depend clause can be ignored. FIXME: but there is still a 2166 flush needed. We need to emit one __sync_synchronize () for it 2167 though (perhaps conditionally)? Solve this together with the 2168 conservative dependence folding optimization. 2169 if (i >= fd->collapse) 2170 return; */ 2171 2172 deps = OMP_CLAUSE_DECL (c); 2173 gsi_prev (&gsi2); 2174 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2175 edge e2 = split_block_after_labels (e1->dest); 2176 2177 gsi2 = gsi_after_labels (e1->dest); 2178 *gsi = gsi_last_bb (e1->src); 2179 for (i = 0; i < fd->ordered; i++) 2180 { 2181 tree itype = TREE_TYPE (fd->loops[i].v); 2182 tree step = NULL_TREE; 2183 tree orig_off = NULL_TREE; 2184 if (POINTER_TYPE_P (itype)) 2185 itype = sizetype; 2186 if (i) 2187 deps = TREE_CHAIN (deps); 2188 off = TREE_PURPOSE (deps); 2189 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2190 { 2191 step = TREE_OPERAND (off, 1); 2192 off = TREE_OPERAND (off, 0); 2193 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2194 && integer_onep (fd->loops[i].step) 2195 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2196 } 2197 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2198 if (step) 2199 { 2200 off = fold_convert_loc (loc, itype, off); 2201 orig_off = off; 2202 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2203 } 2204 2205 if (integer_zerop (off)) 2206 t = boolean_true_node; 2207 else 2208 { 2209 tree a; 2210 tree co = fold_convert_loc (loc, itype, off); 2211 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2212 { 2213 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2214 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2215 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2216 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2217 co); 2218 } 2219 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2220 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2221 fd->loops[i].v, co); 2222 else 2223 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2224 fd->loops[i].v, co); 2225 if (step) 2226 { 2227 tree t1, t2; 2228 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2229 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2230 fd->loops[i].n1); 2231 else 2232 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2233 fd->loops[i].n2); 2234 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2235 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2236 fd->loops[i].n2); 2237 else 2238 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2239 fd->loops[i].n1); 2240 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2241 step, build_int_cst (TREE_TYPE (step), 0)); 2242 if (TREE_CODE (step) != INTEGER_CST) 2243 { 2244 t1 = unshare_expr (t1); 2245 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2246 false, GSI_CONTINUE_LINKING); 2247 t2 = unshare_expr (t2); 2248 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2249 false, GSI_CONTINUE_LINKING); 2250 } 2251 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2252 t, t2, t1); 2253 } 2254 else if (fd->loops[i].cond_code == LT_EXPR) 2255 { 2256 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2257 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2258 fd->loops[i].n1); 2259 else 2260 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2261 fd->loops[i].n2); 2262 } 2263 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2264 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2265 fd->loops[i].n2); 2266 else 2267 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2268 fd->loops[i].n1); 2269 } 2270 if (cond) 2271 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2272 else 2273 cond = t; 2274 2275 off = fold_convert_loc (loc, itype, off); 2276 2277 if (step 2278 || (fd->loops[i].cond_code == LT_EXPR 2279 ? !integer_onep (fd->loops[i].step) 2280 : !integer_minus_onep (fd->loops[i].step))) 2281 { 2282 if (step == NULL_TREE 2283 && TYPE_UNSIGNED (itype) 2284 && fd->loops[i].cond_code == GT_EXPR) 2285 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2286 fold_build1_loc (loc, NEGATE_EXPR, itype, 2287 s)); 2288 else 2289 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2290 orig_off ? orig_off : off, s); 2291 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2292 build_int_cst (itype, 0)); 2293 if (integer_zerop (t) && !warned_step) 2294 { 2295 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier " 2296 "refers to iteration never in the iteration " 2297 "space"); 2298 warned_step = true; 2299 } 2300 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2301 cond, t); 2302 } 2303 2304 if (i <= fd->collapse - 1 && fd->collapse > 1) 2305 t = fd->loop.v; 2306 else if (counts[i]) 2307 t = counts[i]; 2308 else 2309 { 2310 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2311 fd->loops[i].v, fd->loops[i].n1); 2312 t = fold_convert_loc (loc, fd->iter_type, t); 2313 } 2314 if (step) 2315 /* We have divided off by step already earlier. */; 2316 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2317 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2318 fold_build1_loc (loc, NEGATE_EXPR, itype, 2319 s)); 2320 else 2321 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2322 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2323 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2324 off = fold_convert_loc (loc, fd->iter_type, off); 2325 if (i <= fd->collapse - 1 && fd->collapse > 1) 2326 { 2327 if (i) 2328 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2329 off); 2330 if (i < fd->collapse - 1) 2331 { 2332 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2333 counts[i]); 2334 continue; 2335 } 2336 } 2337 off = unshare_expr (off); 2338 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2339 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2340 true, GSI_SAME_STMT); 2341 args.safe_push (t); 2342 } 2343 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2344 gimple_set_location (g, loc); 2345 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2346 2347 cond = unshare_expr (cond); 2348 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2349 GSI_CONTINUE_LINKING); 2350 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2351 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2352 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2353 e1->probability = e3->probability.invert (); 2354 e1->flags = EDGE_TRUE_VALUE; 2355 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2356 2357 *gsi = gsi_after_labels (e2->dest); 2358} 2359 2360/* Expand all #pragma omp ordered depend(source) and 2361 #pragma omp ordered depend(sink:...) constructs in the current 2362 #pragma omp for ordered(n) region. */ 2363 2364static void 2365expand_omp_ordered_source_sink (struct omp_region *region, 2366 struct omp_for_data *fd, tree *counts, 2367 basic_block cont_bb) 2368{ 2369 struct omp_region *inner; 2370 int i; 2371 for (i = fd->collapse - 1; i < fd->ordered; i++) 2372 if (i == fd->collapse - 1 && fd->collapse > 1) 2373 counts[i] = NULL_TREE; 2374 else if (i >= fd->collapse && !cont_bb) 2375 counts[i] = build_zero_cst (fd->iter_type); 2376 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2377 && integer_onep (fd->loops[i].step)) 2378 counts[i] = NULL_TREE; 2379 else 2380 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2381 tree atype 2382 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2383 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2384 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2385 2386 for (inner = region->inner; inner; inner = inner->next) 2387 if (inner->type == GIMPLE_OMP_ORDERED) 2388 { 2389 gomp_ordered *ord_stmt = inner->ord_stmt; 2390 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2391 location_t loc = gimple_location (ord_stmt); 2392 tree c; 2393 for (c = gimple_omp_ordered_clauses (ord_stmt); 2394 c; c = OMP_CLAUSE_CHAIN (c)) 2395 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2396 break; 2397 if (c) 2398 expand_omp_ordered_source (&gsi, fd, counts, loc); 2399 for (c = gimple_omp_ordered_clauses (ord_stmt); 2400 c; c = OMP_CLAUSE_CHAIN (c)) 2401 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2402 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2403 gsi_remove (&gsi, true); 2404 } 2405} 2406 2407/* Wrap the body into fd->ordered - fd->collapse loops that aren't 2408 collapsed. */ 2409 2410static basic_block 2411expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2412 basic_block cont_bb, basic_block body_bb, 2413 basic_block l0_bb, bool ordered_lastprivate) 2414{ 2415 if (fd->ordered == fd->collapse) 2416 return cont_bb; 2417 2418 if (!cont_bb) 2419 { 2420 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2421 for (int i = fd->collapse; i < fd->ordered; i++) 2422 { 2423 tree type = TREE_TYPE (fd->loops[i].v); 2424 tree n1 = fold_convert (type, fd->loops[i].n1); 2425 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2426 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2427 size_int (i - fd->collapse + 1), 2428 NULL_TREE, NULL_TREE); 2429 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2430 } 2431 return NULL; 2432 } 2433 2434 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2435 { 2436 tree t, type = TREE_TYPE (fd->loops[i].v); 2437 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2438 expand_omp_build_assign (&gsi, fd->loops[i].v, 2439 fold_convert (type, fd->loops[i].n1)); 2440 if (counts[i]) 2441 expand_omp_build_assign (&gsi, counts[i], 2442 build_zero_cst (fd->iter_type)); 2443 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2444 size_int (i - fd->collapse + 1), 2445 NULL_TREE, NULL_TREE); 2446 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2447 if (!gsi_end_p (gsi)) 2448 gsi_prev (&gsi); 2449 else 2450 gsi = gsi_last_bb (body_bb); 2451 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2452 basic_block new_body = e1->dest; 2453 if (body_bb == cont_bb) 2454 cont_bb = new_body; 2455 edge e2 = NULL; 2456 basic_block new_header; 2457 if (EDGE_COUNT (cont_bb->preds) > 0) 2458 { 2459 gsi = gsi_last_bb (cont_bb); 2460 if (POINTER_TYPE_P (type)) 2461 t = fold_build_pointer_plus (fd->loops[i].v, 2462 fold_convert (sizetype, 2463 fd->loops[i].step)); 2464 else 2465 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2466 fold_convert (type, fd->loops[i].step)); 2467 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2468 if (counts[i]) 2469 { 2470 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2471 build_int_cst (fd->iter_type, 1)); 2472 expand_omp_build_assign (&gsi, counts[i], t); 2473 t = counts[i]; 2474 } 2475 else 2476 { 2477 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2478 fd->loops[i].v, fd->loops[i].n1); 2479 t = fold_convert (fd->iter_type, t); 2480 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2481 true, GSI_SAME_STMT); 2482 } 2483 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2484 size_int (i - fd->collapse + 1), 2485 NULL_TREE, NULL_TREE); 2486 expand_omp_build_assign (&gsi, aref, t); 2487 gsi_prev (&gsi); 2488 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2489 new_header = e2->dest; 2490 } 2491 else 2492 new_header = cont_bb; 2493 gsi = gsi_after_labels (new_header); 2494 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2495 true, GSI_SAME_STMT); 2496 tree n2 2497 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2498 true, NULL_TREE, true, GSI_SAME_STMT); 2499 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2500 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2501 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2502 cont_bb = e3->dest; 2503 remove_edge (e1); 2504 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2505 e3->flags = EDGE_FALSE_VALUE; 2506 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2507 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2508 e1->probability = e3->probability.invert (); 2509 2510 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2511 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2512 2513 if (e2) 2514 { 2515 class loop *loop = alloc_loop (); 2516 loop->header = new_header; 2517 loop->latch = e2->src; 2518 add_loop (loop, l0_bb->loop_father); 2519 } 2520 } 2521 2522 /* If there are any lastprivate clauses and it is possible some loops 2523 might have zero iterations, ensure all the decls are initialized, 2524 otherwise we could crash evaluating C++ class iterators with lastprivate 2525 clauses. */ 2526 bool need_inits = false; 2527 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2528 if (need_inits) 2529 { 2530 tree type = TREE_TYPE (fd->loops[i].v); 2531 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2532 expand_omp_build_assign (&gsi, fd->loops[i].v, 2533 fold_convert (type, fd->loops[i].n1)); 2534 } 2535 else 2536 { 2537 tree type = TREE_TYPE (fd->loops[i].v); 2538 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2539 boolean_type_node, 2540 fold_convert (type, fd->loops[i].n1), 2541 fold_convert (type, fd->loops[i].n2)); 2542 if (!integer_onep (this_cond)) 2543 need_inits = true; 2544 } 2545 2546 return cont_bb; 2547} 2548 2549/* A subroutine of expand_omp_for. Generate code for a parallel 2550 loop with any schedule. Given parameters: 2551 2552 for (V = N1; V cond N2; V += STEP) BODY; 2553 2554 where COND is "<" or ">", we generate pseudocode 2555 2556 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2557 if (more) goto L0; else goto L3; 2558 L0: 2559 V = istart0; 2560 iend = iend0; 2561 L1: 2562 BODY; 2563 V += STEP; 2564 if (V cond iend) goto L1; else goto L2; 2565 L2: 2566 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2567 L3: 2568 2569 If this is a combined omp parallel loop, instead of the call to 2570 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2571 If this is gimple_omp_for_combined_p loop, then instead of assigning 2572 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2573 inner GIMPLE_OMP_FOR and V += STEP; and 2574 if (V cond iend) goto L1; else goto L2; are removed. 2575 2576 For collapsed loops, given parameters: 2577 collapse(3) 2578 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2579 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2580 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2581 BODY; 2582 2583 we generate pseudocode 2584 2585 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2586 if (cond3 is <) 2587 adj = STEP3 - 1; 2588 else 2589 adj = STEP3 + 1; 2590 count3 = (adj + N32 - N31) / STEP3; 2591 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2592 if (cond2 is <) 2593 adj = STEP2 - 1; 2594 else 2595 adj = STEP2 + 1; 2596 count2 = (adj + N22 - N21) / STEP2; 2597 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2598 if (cond1 is <) 2599 adj = STEP1 - 1; 2600 else 2601 adj = STEP1 + 1; 2602 count1 = (adj + N12 - N11) / STEP1; 2603 count = count1 * count2 * count3; 2604 goto Z1; 2605 Z0: 2606 count = 0; 2607 Z1: 2608 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2609 if (more) goto L0; else goto L3; 2610 L0: 2611 V = istart0; 2612 T = V; 2613 V3 = N31 + (T % count3) * STEP3; 2614 T = T / count3; 2615 V2 = N21 + (T % count2) * STEP2; 2616 T = T / count2; 2617 V1 = N11 + T * STEP1; 2618 iend = iend0; 2619 L1: 2620 BODY; 2621 V += 1; 2622 if (V < iend) goto L10; else goto L2; 2623 L10: 2624 V3 += STEP3; 2625 if (V3 cond3 N32) goto L1; else goto L11; 2626 L11: 2627 V3 = N31; 2628 V2 += STEP2; 2629 if (V2 cond2 N22) goto L1; else goto L12; 2630 L12: 2631 V2 = N21; 2632 V1 += STEP1; 2633 goto L1; 2634 L2: 2635 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2636 L3: 2637 2638 */ 2639 2640static void 2641expand_omp_for_generic (struct omp_region *region, 2642 struct omp_for_data *fd, 2643 enum built_in_function start_fn, 2644 enum built_in_function next_fn, 2645 tree sched_arg, 2646 gimple *inner_stmt) 2647{ 2648 tree type, istart0, iend0, iend; 2649 tree t, vmain, vback, bias = NULL_TREE; 2650 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2651 basic_block l2_bb = NULL, l3_bb = NULL; 2652 gimple_stmt_iterator gsi; 2653 gassign *assign_stmt; 2654 bool in_combined_parallel = is_combined_parallel (region); 2655 bool broken_loop = region->cont == NULL; 2656 edge e, ne; 2657 tree *counts = NULL; 2658 int i; 2659 bool ordered_lastprivate = false; 2660 2661 gcc_assert (!broken_loop || !in_combined_parallel); 2662 gcc_assert (fd->iter_type == long_integer_type_node 2663 || !in_combined_parallel); 2664 2665 entry_bb = region->entry; 2666 cont_bb = region->cont; 2667 collapse_bb = NULL; 2668 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2669 gcc_assert (broken_loop 2670 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2671 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2672 l1_bb = single_succ (l0_bb); 2673 if (!broken_loop) 2674 { 2675 l2_bb = create_empty_bb (cont_bb); 2676 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2677 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2678 == l1_bb)); 2679 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2680 } 2681 else 2682 l2_bb = NULL; 2683 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2684 exit_bb = region->exit; 2685 2686 gsi = gsi_last_nondebug_bb (entry_bb); 2687 2688 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2689 if (fd->ordered 2690 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2691 OMP_CLAUSE_LASTPRIVATE)) 2692 ordered_lastprivate = false; 2693 tree reductions = NULL_TREE; 2694 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE; 2695 tree memv = NULL_TREE; 2696 if (fd->lastprivate_conditional) 2697 { 2698 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2699 OMP_CLAUSE__CONDTEMP_); 2700 if (fd->have_pointer_condtemp) 2701 condtemp = OMP_CLAUSE_DECL (c); 2702 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 2703 cond_var = OMP_CLAUSE_DECL (c); 2704 } 2705 if (sched_arg) 2706 { 2707 if (fd->have_reductemp) 2708 { 2709 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2710 OMP_CLAUSE__REDUCTEMP_); 2711 reductions = OMP_CLAUSE_DECL (c); 2712 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 2713 gimple *g = SSA_NAME_DEF_STMT (reductions); 2714 reductions = gimple_assign_rhs1 (g); 2715 OMP_CLAUSE_DECL (c) = reductions; 2716 entry_bb = gimple_bb (g); 2717 edge e = split_block (entry_bb, g); 2718 if (region->entry == entry_bb) 2719 region->entry = e->dest; 2720 gsi = gsi_last_bb (entry_bb); 2721 } 2722 else 2723 reductions = null_pointer_node; 2724 if (fd->have_pointer_condtemp) 2725 { 2726 tree type = TREE_TYPE (condtemp); 2727 memv = create_tmp_var (type); 2728 TREE_ADDRESSABLE (memv) = 1; 2729 unsigned HOST_WIDE_INT sz 2730 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 2731 sz *= fd->lastprivate_conditional; 2732 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 2733 false); 2734 mem = build_fold_addr_expr (memv); 2735 } 2736 else 2737 mem = null_pointer_node; 2738 } 2739 if (fd->collapse > 1 || fd->ordered) 2740 { 2741 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2742 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2743 2744 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2745 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2746 zero_iter1_bb, first_zero_iter1, 2747 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2748 2749 if (zero_iter1_bb) 2750 { 2751 /* Some counts[i] vars might be uninitialized if 2752 some loop has zero iterations. But the body shouldn't 2753 be executed in that case, so just avoid uninit warnings. */ 2754 for (i = first_zero_iter1; 2755 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2756 if (SSA_VAR_P (counts[i])) 2757 TREE_NO_WARNING (counts[i]) = 1; 2758 gsi_prev (&gsi); 2759 e = split_block (entry_bb, gsi_stmt (gsi)); 2760 entry_bb = e->dest; 2761 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2762 gsi = gsi_last_nondebug_bb (entry_bb); 2763 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2764 get_immediate_dominator (CDI_DOMINATORS, 2765 zero_iter1_bb)); 2766 } 2767 if (zero_iter2_bb) 2768 { 2769 /* Some counts[i] vars might be uninitialized if 2770 some loop has zero iterations. But the body shouldn't 2771 be executed in that case, so just avoid uninit warnings. */ 2772 for (i = first_zero_iter2; i < fd->ordered; i++) 2773 if (SSA_VAR_P (counts[i])) 2774 TREE_NO_WARNING (counts[i]) = 1; 2775 if (zero_iter1_bb) 2776 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2777 else 2778 { 2779 gsi_prev (&gsi); 2780 e = split_block (entry_bb, gsi_stmt (gsi)); 2781 entry_bb = e->dest; 2782 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2783 gsi = gsi_last_nondebug_bb (entry_bb); 2784 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2785 get_immediate_dominator 2786 (CDI_DOMINATORS, zero_iter2_bb)); 2787 } 2788 } 2789 if (fd->collapse == 1) 2790 { 2791 counts[0] = fd->loop.n2; 2792 fd->loop = fd->loops[0]; 2793 } 2794 } 2795 2796 type = TREE_TYPE (fd->loop.v); 2797 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2798 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2799 TREE_ADDRESSABLE (istart0) = 1; 2800 TREE_ADDRESSABLE (iend0) = 1; 2801 2802 /* See if we need to bias by LLONG_MIN. */ 2803 if (fd->iter_type == long_long_unsigned_type_node 2804 && TREE_CODE (type) == INTEGER_TYPE 2805 && !TYPE_UNSIGNED (type) 2806 && fd->ordered == 0) 2807 { 2808 tree n1, n2; 2809 2810 if (fd->loop.cond_code == LT_EXPR) 2811 { 2812 n1 = fd->loop.n1; 2813 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2814 } 2815 else 2816 { 2817 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2818 n2 = fd->loop.n1; 2819 } 2820 if (TREE_CODE (n1) != INTEGER_CST 2821 || TREE_CODE (n2) != INTEGER_CST 2822 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2823 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2824 } 2825 2826 gimple_stmt_iterator gsif = gsi; 2827 gsi_prev (&gsif); 2828 2829 tree arr = NULL_TREE; 2830 if (in_combined_parallel) 2831 { 2832 gcc_assert (fd->ordered == 0); 2833 /* In a combined parallel loop, emit a call to 2834 GOMP_loop_foo_next. */ 2835 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2836 build_fold_addr_expr (istart0), 2837 build_fold_addr_expr (iend0)); 2838 } 2839 else 2840 { 2841 tree t0, t1, t2, t3, t4; 2842 /* If this is not a combined parallel loop, emit a call to 2843 GOMP_loop_foo_start in ENTRY_BB. */ 2844 t4 = build_fold_addr_expr (iend0); 2845 t3 = build_fold_addr_expr (istart0); 2846 if (fd->ordered) 2847 { 2848 t0 = build_int_cst (unsigned_type_node, 2849 fd->ordered - fd->collapse + 1); 2850 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2851 fd->ordered 2852 - fd->collapse + 1), 2853 ".omp_counts"); 2854 DECL_NAMELESS (arr) = 1; 2855 TREE_ADDRESSABLE (arr) = 1; 2856 TREE_STATIC (arr) = 1; 2857 vec<constructor_elt, va_gc> *v; 2858 vec_alloc (v, fd->ordered - fd->collapse + 1); 2859 int idx; 2860 2861 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2862 { 2863 tree c; 2864 if (idx == 0 && fd->collapse > 1) 2865 c = fd->loop.n2; 2866 else 2867 c = counts[idx + fd->collapse - 1]; 2868 tree purpose = size_int (idx); 2869 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2870 if (TREE_CODE (c) != INTEGER_CST) 2871 TREE_STATIC (arr) = 0; 2872 } 2873 2874 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2875 if (!TREE_STATIC (arr)) 2876 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2877 void_type_node, arr), 2878 true, NULL_TREE, true, GSI_SAME_STMT); 2879 t1 = build_fold_addr_expr (arr); 2880 t2 = NULL_TREE; 2881 } 2882 else 2883 { 2884 t2 = fold_convert (fd->iter_type, fd->loop.step); 2885 t1 = fd->loop.n2; 2886 t0 = fd->loop.n1; 2887 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2888 { 2889 tree innerc 2890 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2891 OMP_CLAUSE__LOOPTEMP_); 2892 gcc_assert (innerc); 2893 t0 = OMP_CLAUSE_DECL (innerc); 2894 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2895 OMP_CLAUSE__LOOPTEMP_); 2896 gcc_assert (innerc); 2897 t1 = OMP_CLAUSE_DECL (innerc); 2898 } 2899 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2900 && TYPE_PRECISION (TREE_TYPE (t0)) 2901 != TYPE_PRECISION (fd->iter_type)) 2902 { 2903 /* Avoid casting pointers to integer of a different size. */ 2904 tree itype = signed_type_for (type); 2905 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2906 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2907 } 2908 else 2909 { 2910 t1 = fold_convert (fd->iter_type, t1); 2911 t0 = fold_convert (fd->iter_type, t0); 2912 } 2913 if (bias) 2914 { 2915 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2916 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2917 } 2918 } 2919 if (fd->iter_type == long_integer_type_node || fd->ordered) 2920 { 2921 if (fd->chunk_size) 2922 { 2923 t = fold_convert (fd->iter_type, fd->chunk_size); 2924 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2925 if (sched_arg) 2926 { 2927 if (fd->ordered) 2928 t = build_call_expr (builtin_decl_explicit (start_fn), 2929 8, t0, t1, sched_arg, t, t3, t4, 2930 reductions, mem); 2931 else 2932 t = build_call_expr (builtin_decl_explicit (start_fn), 2933 9, t0, t1, t2, sched_arg, t, t3, t4, 2934 reductions, mem); 2935 } 2936 else if (fd->ordered) 2937 t = build_call_expr (builtin_decl_explicit (start_fn), 2938 5, t0, t1, t, t3, t4); 2939 else 2940 t = build_call_expr (builtin_decl_explicit (start_fn), 2941 6, t0, t1, t2, t, t3, t4); 2942 } 2943 else if (fd->ordered) 2944 t = build_call_expr (builtin_decl_explicit (start_fn), 2945 4, t0, t1, t3, t4); 2946 else 2947 t = build_call_expr (builtin_decl_explicit (start_fn), 2948 5, t0, t1, t2, t3, t4); 2949 } 2950 else 2951 { 2952 tree t5; 2953 tree c_bool_type; 2954 tree bfn_decl; 2955 2956 /* The GOMP_loop_ull_*start functions have additional boolean 2957 argument, true for < loops and false for > loops. 2958 In Fortran, the C bool type can be different from 2959 boolean_type_node. */ 2960 bfn_decl = builtin_decl_explicit (start_fn); 2961 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2962 t5 = build_int_cst (c_bool_type, 2963 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2964 if (fd->chunk_size) 2965 { 2966 tree bfn_decl = builtin_decl_explicit (start_fn); 2967 t = fold_convert (fd->iter_type, fd->chunk_size); 2968 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2969 if (sched_arg) 2970 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg, 2971 t, t3, t4, reductions, mem); 2972 else 2973 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2974 } 2975 else 2976 t = build_call_expr (builtin_decl_explicit (start_fn), 2977 6, t5, t0, t1, t2, t3, t4); 2978 } 2979 } 2980 if (TREE_TYPE (t) != boolean_type_node) 2981 t = fold_build2 (NE_EXPR, boolean_type_node, 2982 t, build_int_cst (TREE_TYPE (t), 0)); 2983 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2984 true, GSI_SAME_STMT); 2985 if (arr && !TREE_STATIC (arr)) 2986 { 2987 tree clobber = build_clobber (TREE_TYPE (arr)); 2988 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2989 GSI_SAME_STMT); 2990 } 2991 if (fd->have_pointer_condtemp) 2992 expand_omp_build_assign (&gsi, condtemp, memv, false); 2993 if (fd->have_reductemp) 2994 { 2995 gimple *g = gsi_stmt (gsi); 2996 gsi_remove (&gsi, true); 2997 release_ssa_name (gimple_assign_lhs (g)); 2998 2999 entry_bb = region->entry; 3000 gsi = gsi_last_nondebug_bb (entry_bb); 3001 3002 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3003 } 3004 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3005 3006 /* Remove the GIMPLE_OMP_FOR statement. */ 3007 gsi_remove (&gsi, true); 3008 3009 if (gsi_end_p (gsif)) 3010 gsif = gsi_after_labels (gsi_bb (gsif)); 3011 gsi_next (&gsif); 3012 3013 /* Iteration setup for sequential loop goes in L0_BB. */ 3014 tree startvar = fd->loop.v; 3015 tree endvar = NULL_TREE; 3016 3017 if (gimple_omp_for_combined_p (fd->for_stmt)) 3018 { 3019 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 3020 && gimple_omp_for_kind (inner_stmt) 3021 == GF_OMP_FOR_KIND_SIMD); 3022 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 3023 OMP_CLAUSE__LOOPTEMP_); 3024 gcc_assert (innerc); 3025 startvar = OMP_CLAUSE_DECL (innerc); 3026 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3027 OMP_CLAUSE__LOOPTEMP_); 3028 gcc_assert (innerc); 3029 endvar = OMP_CLAUSE_DECL (innerc); 3030 } 3031 3032 gsi = gsi_start_bb (l0_bb); 3033 t = istart0; 3034 if (fd->ordered && fd->collapse == 1) 3035 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3036 fold_convert (fd->iter_type, fd->loop.step)); 3037 else if (bias) 3038 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3039 if (fd->ordered && fd->collapse == 1) 3040 { 3041 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3042 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3043 fd->loop.n1, fold_convert (sizetype, t)); 3044 else 3045 { 3046 t = fold_convert (TREE_TYPE (startvar), t); 3047 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3048 fd->loop.n1, t); 3049 } 3050 } 3051 else 3052 { 3053 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3054 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3055 t = fold_convert (TREE_TYPE (startvar), t); 3056 } 3057 t = force_gimple_operand_gsi (&gsi, t, 3058 DECL_P (startvar) 3059 && TREE_ADDRESSABLE (startvar), 3060 NULL_TREE, false, GSI_CONTINUE_LINKING); 3061 assign_stmt = gimple_build_assign (startvar, t); 3062 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3063 if (cond_var) 3064 { 3065 tree itype = TREE_TYPE (cond_var); 3066 /* For lastprivate(conditional:) itervar, we need some iteration 3067 counter that starts at unsigned non-zero and increases. 3068 Prefer as few IVs as possible, so if we can use startvar 3069 itself, use that, or startvar + constant (those would be 3070 incremented with step), and as last resort use the s0 + 1 3071 incremented by 1. */ 3072 if ((fd->ordered && fd->collapse == 1) 3073 || bias 3074 || POINTER_TYPE_P (type) 3075 || TREE_CODE (fd->loop.n1) != INTEGER_CST 3076 || fd->loop.cond_code != LT_EXPR) 3077 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0), 3078 build_int_cst (itype, 1)); 3079 else if (tree_int_cst_sgn (fd->loop.n1) == 1) 3080 t = fold_convert (itype, t); 3081 else 3082 { 3083 tree c = fold_convert (itype, fd->loop.n1); 3084 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 3085 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 3086 } 3087 t = force_gimple_operand_gsi (&gsi, t, false, 3088 NULL_TREE, false, GSI_CONTINUE_LINKING); 3089 assign_stmt = gimple_build_assign (cond_var, t); 3090 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3091 } 3092 3093 t = iend0; 3094 if (fd->ordered && fd->collapse == 1) 3095 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 3096 fold_convert (fd->iter_type, fd->loop.step)); 3097 else if (bias) 3098 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 3099 if (fd->ordered && fd->collapse == 1) 3100 { 3101 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3102 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 3103 fd->loop.n1, fold_convert (sizetype, t)); 3104 else 3105 { 3106 t = fold_convert (TREE_TYPE (startvar), t); 3107 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 3108 fd->loop.n1, t); 3109 } 3110 } 3111 else 3112 { 3113 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 3114 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 3115 t = fold_convert (TREE_TYPE (startvar), t); 3116 } 3117 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3118 false, GSI_CONTINUE_LINKING); 3119 if (endvar) 3120 { 3121 assign_stmt = gimple_build_assign (endvar, iend); 3122 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3123 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 3124 assign_stmt = gimple_build_assign (fd->loop.v, iend); 3125 else 3126 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 3127 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3128 } 3129 /* Handle linear clause adjustments. */ 3130 tree itercnt = NULL_TREE; 3131 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3132 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3133 c; c = OMP_CLAUSE_CHAIN (c)) 3134 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3135 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3136 { 3137 tree d = OMP_CLAUSE_DECL (c); 3138 bool is_ref = omp_is_reference (d); 3139 tree t = d, a, dest; 3140 if (is_ref) 3141 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3142 tree type = TREE_TYPE (t); 3143 if (POINTER_TYPE_P (type)) 3144 type = sizetype; 3145 dest = unshare_expr (t); 3146 tree v = create_tmp_var (TREE_TYPE (t), NULL); 3147 expand_omp_build_assign (&gsif, v, t); 3148 if (itercnt == NULL_TREE) 3149 { 3150 itercnt = startvar; 3151 tree n1 = fd->loop.n1; 3152 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 3153 { 3154 itercnt 3155 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 3156 itercnt); 3157 n1 = fold_convert (TREE_TYPE (itercnt), n1); 3158 } 3159 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 3160 itercnt, n1); 3161 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 3162 itercnt, fd->loop.step); 3163 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3164 NULL_TREE, false, 3165 GSI_CONTINUE_LINKING); 3166 } 3167 a = fold_build2 (MULT_EXPR, type, 3168 fold_convert (type, itercnt), 3169 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3170 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3171 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 3172 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3173 false, GSI_CONTINUE_LINKING); 3174 assign_stmt = gimple_build_assign (dest, t); 3175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3176 } 3177 if (fd->collapse > 1) 3178 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3179 3180 if (fd->ordered) 3181 { 3182 /* Until now, counts array contained number of iterations or 3183 variable containing it for ith loop. From now on, we need 3184 those counts only for collapsed loops, and only for the 2nd 3185 till the last collapsed one. Move those one element earlier, 3186 we'll use counts[fd->collapse - 1] for the first source/sink 3187 iteration counter and so on and counts[fd->ordered] 3188 as the array holding the current counter values for 3189 depend(source). */ 3190 if (fd->collapse > 1) 3191 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 3192 if (broken_loop) 3193 { 3194 int i; 3195 for (i = fd->collapse; i < fd->ordered; i++) 3196 { 3197 tree type = TREE_TYPE (fd->loops[i].v); 3198 tree this_cond 3199 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 3200 fold_convert (type, fd->loops[i].n1), 3201 fold_convert (type, fd->loops[i].n2)); 3202 if (!integer_onep (this_cond)) 3203 break; 3204 } 3205 if (i < fd->ordered) 3206 { 3207 if (entry_bb->loop_father != l0_bb->loop_father) 3208 { 3209 remove_bb_from_loops (l0_bb); 3210 add_bb_to_loop (l0_bb, entry_bb->loop_father); 3211 gcc_assert (single_succ (l0_bb) == l1_bb); 3212 } 3213 cont_bb 3214 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 3215 add_bb_to_loop (cont_bb, l0_bb->loop_father); 3216 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 3217 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 3218 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3219 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 3220 make_edge (cont_bb, l1_bb, 0); 3221 l2_bb = create_empty_bb (cont_bb); 3222 broken_loop = false; 3223 } 3224 } 3225 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 3226 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 3227 l0_bb, ordered_lastprivate); 3228 if (counts[fd->collapse - 1]) 3229 { 3230 gcc_assert (fd->collapse == 1); 3231 gsi = gsi_last_bb (l0_bb); 3232 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 3233 istart0, true); 3234 if (cont_bb) 3235 { 3236 gsi = gsi_last_bb (cont_bb); 3237 t = fold_build2 (PLUS_EXPR, fd->iter_type, 3238 counts[fd->collapse - 1], 3239 build_int_cst (fd->iter_type, 1)); 3240 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 3241 tree aref = build4 (ARRAY_REF, fd->iter_type, 3242 counts[fd->ordered], size_zero_node, 3243 NULL_TREE, NULL_TREE); 3244 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3245 } 3246 t = counts[fd->collapse - 1]; 3247 } 3248 else if (fd->collapse > 1) 3249 t = fd->loop.v; 3250 else 3251 { 3252 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3253 fd->loops[0].v, fd->loops[0].n1); 3254 t = fold_convert (fd->iter_type, t); 3255 } 3256 gsi = gsi_last_bb (l0_bb); 3257 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3258 size_zero_node, NULL_TREE, NULL_TREE); 3259 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3260 false, GSI_CONTINUE_LINKING); 3261 expand_omp_build_assign (&gsi, aref, t, true); 3262 } 3263 3264 if (!broken_loop) 3265 { 3266 /* Code to control the increment and predicate for the sequential 3267 loop goes in the CONT_BB. */ 3268 gsi = gsi_last_nondebug_bb (cont_bb); 3269 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3270 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3271 vmain = gimple_omp_continue_control_use (cont_stmt); 3272 vback = gimple_omp_continue_control_def (cont_stmt); 3273 3274 if (cond_var) 3275 { 3276 tree itype = TREE_TYPE (cond_var); 3277 tree t2; 3278 if ((fd->ordered && fd->collapse == 1) 3279 || bias 3280 || POINTER_TYPE_P (type) 3281 || TREE_CODE (fd->loop.n1) != INTEGER_CST 3282 || fd->loop.cond_code != LT_EXPR) 3283 t2 = build_int_cst (itype, 1); 3284 else 3285 t2 = fold_convert (itype, fd->loop.step); 3286 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 3287 t2 = force_gimple_operand_gsi (&gsi, t2, false, 3288 NULL_TREE, true, GSI_SAME_STMT); 3289 assign_stmt = gimple_build_assign (cond_var, t2); 3290 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3291 } 3292 3293 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3294 { 3295 if (POINTER_TYPE_P (type)) 3296 t = fold_build_pointer_plus (vmain, fd->loop.step); 3297 else 3298 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3299 t = force_gimple_operand_gsi (&gsi, t, 3300 DECL_P (vback) 3301 && TREE_ADDRESSABLE (vback), 3302 NULL_TREE, true, GSI_SAME_STMT); 3303 assign_stmt = gimple_build_assign (vback, t); 3304 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3305 3306 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3307 { 3308 tree tem; 3309 if (fd->collapse > 1) 3310 tem = fd->loop.v; 3311 else 3312 { 3313 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3314 fd->loops[0].v, fd->loops[0].n1); 3315 tem = fold_convert (fd->iter_type, tem); 3316 } 3317 tree aref = build4 (ARRAY_REF, fd->iter_type, 3318 counts[fd->ordered], size_zero_node, 3319 NULL_TREE, NULL_TREE); 3320 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 3321 true, GSI_SAME_STMT); 3322 expand_omp_build_assign (&gsi, aref, tem); 3323 } 3324 3325 t = build2 (fd->loop.cond_code, boolean_type_node, 3326 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3327 iend); 3328 gcond *cond_stmt = gimple_build_cond_empty (t); 3329 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3330 } 3331 3332 /* Remove GIMPLE_OMP_CONTINUE. */ 3333 gsi_remove (&gsi, true); 3334 3335 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3336 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3337 3338 /* Emit code to get the next parallel iteration in L2_BB. */ 3339 gsi = gsi_start_bb (l2_bb); 3340 3341 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3342 build_fold_addr_expr (istart0), 3343 build_fold_addr_expr (iend0)); 3344 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3345 false, GSI_CONTINUE_LINKING); 3346 if (TREE_TYPE (t) != boolean_type_node) 3347 t = fold_build2 (NE_EXPR, boolean_type_node, 3348 t, build_int_cst (TREE_TYPE (t), 0)); 3349 gcond *cond_stmt = gimple_build_cond_empty (t); 3350 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3351 } 3352 3353 /* Add the loop cleanup function. */ 3354 gsi = gsi_last_nondebug_bb (exit_bb); 3355 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3356 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3357 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3358 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3359 else 3360 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3361 gcall *call_stmt = gimple_build_call (t, 0); 3362 if (fd->ordered) 3363 { 3364 tree arr = counts[fd->ordered]; 3365 tree clobber = build_clobber (TREE_TYPE (arr)); 3366 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3367 GSI_SAME_STMT); 3368 } 3369 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3370 { 3371 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3372 if (fd->have_reductemp) 3373 { 3374 gimple *g = gimple_build_assign (reductions, NOP_EXPR, 3375 gimple_call_lhs (call_stmt)); 3376 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 3377 } 3378 } 3379 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3380 gsi_remove (&gsi, true); 3381 3382 /* Connect the new blocks. */ 3383 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3384 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3385 3386 if (!broken_loop) 3387 { 3388 gimple_seq phis; 3389 3390 e = find_edge (cont_bb, l3_bb); 3391 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3392 3393 phis = phi_nodes (l3_bb); 3394 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3395 { 3396 gimple *phi = gsi_stmt (gsi); 3397 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3398 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3399 } 3400 remove_edge (e); 3401 3402 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3403 e = find_edge (cont_bb, l1_bb); 3404 if (e == NULL) 3405 { 3406 e = BRANCH_EDGE (cont_bb); 3407 gcc_assert (single_succ (e->dest) == l1_bb); 3408 } 3409 if (gimple_omp_for_combined_p (fd->for_stmt)) 3410 { 3411 remove_edge (e); 3412 e = NULL; 3413 } 3414 else if (fd->collapse > 1) 3415 { 3416 remove_edge (e); 3417 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3418 } 3419 else 3420 e->flags = EDGE_TRUE_VALUE; 3421 if (e) 3422 { 3423 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 3424 find_edge (cont_bb, l2_bb)->probability = e->probability.invert (); 3425 } 3426 else 3427 { 3428 e = find_edge (cont_bb, l2_bb); 3429 e->flags = EDGE_FALLTHRU; 3430 } 3431 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3432 3433 if (gimple_in_ssa_p (cfun)) 3434 { 3435 /* Add phis to the outer loop that connect to the phis in the inner, 3436 original loop, and move the loop entry value of the inner phi to 3437 the loop entry value of the outer phi. */ 3438 gphi_iterator psi; 3439 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3440 { 3441 location_t locus; 3442 gphi *nphi; 3443 gphi *exit_phi = psi.phi (); 3444 3445 if (virtual_operand_p (gimple_phi_result (exit_phi))) 3446 continue; 3447 3448 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3449 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3450 3451 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3452 edge latch_to_l1 = find_edge (latch, l1_bb); 3453 gphi *inner_phi 3454 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3455 3456 tree t = gimple_phi_result (exit_phi); 3457 tree new_res = copy_ssa_name (t, NULL); 3458 nphi = create_phi_node (new_res, l0_bb); 3459 3460 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3461 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3462 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3463 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3464 add_phi_arg (nphi, t, entry_to_l0, locus); 3465 3466 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3467 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3468 3469 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3470 } 3471 } 3472 3473 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3474 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3475 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3476 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3477 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3478 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3479 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3480 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3481 3482 /* We enter expand_omp_for_generic with a loop. This original loop may 3483 have its own loop struct, or it may be part of an outer loop struct 3484 (which may be the fake loop). */ 3485 class loop *outer_loop = entry_bb->loop_father; 3486 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3487 3488 add_bb_to_loop (l2_bb, outer_loop); 3489 3490 /* We've added a new loop around the original loop. Allocate the 3491 corresponding loop struct. */ 3492 class loop *new_loop = alloc_loop (); 3493 new_loop->header = l0_bb; 3494 new_loop->latch = l2_bb; 3495 add_loop (new_loop, outer_loop); 3496 3497 /* Allocate a loop structure for the original loop unless we already 3498 had one. */ 3499 if (!orig_loop_has_loop_struct 3500 && !gimple_omp_for_combined_p (fd->for_stmt)) 3501 { 3502 class loop *orig_loop = alloc_loop (); 3503 orig_loop->header = l1_bb; 3504 /* The loop may have multiple latches. */ 3505 add_loop (orig_loop, new_loop); 3506 } 3507 } 3508} 3509 3510/* Helper function for expand_omp_for_static_nochunk. If PTR is NULL, 3511 compute needed allocation size. If !ALLOC of team allocations, 3512 if ALLOC of thread allocation. SZ is the initial needed size for 3513 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes, 3514 CNT number of elements of each array, for !ALLOC this is 3515 omp_get_num_threads (), for ALLOC number of iterations handled by the 3516 current thread. If PTR is non-NULL, it is the start of the allocation 3517 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_ 3518 clauses pointers to the corresponding arrays. */ 3519 3520static tree 3521expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz, 3522 unsigned HOST_WIDE_INT alloc_align, tree cnt, 3523 gimple_stmt_iterator *gsi, bool alloc) 3524{ 3525 tree eltsz = NULL_TREE; 3526 unsigned HOST_WIDE_INT preval = 0; 3527 if (ptr && sz) 3528 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 3529 ptr, size_int (sz)); 3530 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3531 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3532 && !OMP_CLAUSE__SCANTEMP__CONTROL (c) 3533 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc) 3534 { 3535 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 3536 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type); 3537 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 3538 { 3539 unsigned HOST_WIDE_INT szl 3540 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type)); 3541 szl = least_bit_hwi (szl); 3542 if (szl) 3543 al = MIN (al, szl); 3544 } 3545 if (ptr == NULL_TREE) 3546 { 3547 if (eltsz == NULL_TREE) 3548 eltsz = TYPE_SIZE_UNIT (pointee_type); 3549 else 3550 eltsz = size_binop (PLUS_EXPR, eltsz, 3551 TYPE_SIZE_UNIT (pointee_type)); 3552 } 3553 if (preval == 0 && al <= alloc_align) 3554 { 3555 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz; 3556 sz += diff; 3557 if (diff && ptr) 3558 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), 3559 ptr, size_int (diff)); 3560 } 3561 else if (al > preval) 3562 { 3563 if (ptr) 3564 { 3565 ptr = fold_convert (pointer_sized_int_node, ptr); 3566 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr, 3567 build_int_cst (pointer_sized_int_node, 3568 al - 1)); 3569 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr, 3570 build_int_cst (pointer_sized_int_node, 3571 -(HOST_WIDE_INT) al)); 3572 ptr = fold_convert (ptr_type_node, ptr); 3573 } 3574 else 3575 sz += al - 1; 3576 } 3577 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type))) 3578 preval = al; 3579 else 3580 preval = 1; 3581 if (ptr) 3582 { 3583 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false); 3584 ptr = OMP_CLAUSE_DECL (c); 3585 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr, 3586 size_binop (MULT_EXPR, cnt, 3587 TYPE_SIZE_UNIT (pointee_type))); 3588 } 3589 } 3590 3591 if (ptr == NULL_TREE) 3592 { 3593 eltsz = size_binop (MULT_EXPR, eltsz, cnt); 3594 if (sz) 3595 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz)); 3596 return eltsz; 3597 } 3598 else 3599 return ptr; 3600} 3601 3602/* A subroutine of expand_omp_for. Generate code for a parallel 3603 loop with static schedule and no specified chunk size. Given 3604 parameters: 3605 3606 for (V = N1; V cond N2; V += STEP) BODY; 3607 3608 where COND is "<" or ">", we generate pseudocode 3609 3610 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3611 if (cond is <) 3612 adj = STEP - 1; 3613 else 3614 adj = STEP + 1; 3615 if ((__typeof (V)) -1 > 0 && cond is >) 3616 n = -(adj + N2 - N1) / -STEP; 3617 else 3618 n = (adj + N2 - N1) / STEP; 3619 q = n / nthreads; 3620 tt = n % nthreads; 3621 if (threadid < tt) goto L3; else goto L4; 3622 L3: 3623 tt = 0; 3624 q = q + 1; 3625 L4: 3626 s0 = q * threadid + tt; 3627 e0 = s0 + q; 3628 V = s0 * STEP + N1; 3629 if (s0 >= e0) goto L2; else goto L0; 3630 L0: 3631 e = e0 * STEP + N1; 3632 L1: 3633 BODY; 3634 V += STEP; 3635 if (V cond e) goto L1; 3636 L2: 3637*/ 3638 3639static void 3640expand_omp_for_static_nochunk (struct omp_region *region, 3641 struct omp_for_data *fd, 3642 gimple *inner_stmt) 3643{ 3644 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid; 3645 tree type, itype, vmain, vback; 3646 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3647 basic_block body_bb, cont_bb, collapse_bb = NULL; 3648 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL; 3649 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL; 3650 gimple_stmt_iterator gsi, gsip; 3651 edge ep; 3652 bool broken_loop = region->cont == NULL; 3653 tree *counts = NULL; 3654 tree n1, n2, step; 3655 tree reductions = NULL_TREE; 3656 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 3657 3658 itype = type = TREE_TYPE (fd->loop.v); 3659 if (POINTER_TYPE_P (type)) 3660 itype = signed_type_for (type); 3661 3662 entry_bb = region->entry; 3663 cont_bb = region->cont; 3664 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3665 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3666 gcc_assert (broken_loop 3667 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3668 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3669 body_bb = single_succ (seq_start_bb); 3670 if (!broken_loop) 3671 { 3672 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3673 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3674 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3675 } 3676 exit_bb = region->exit; 3677 3678 /* Iteration space partitioning goes in ENTRY_BB. */ 3679 gsi = gsi_last_nondebug_bb (entry_bb); 3680 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3681 gsip = gsi; 3682 gsi_prev (&gsip); 3683 3684 if (fd->collapse > 1) 3685 { 3686 int first_zero_iter = -1, dummy = -1; 3687 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3688 3689 counts = XALLOCAVEC (tree, fd->collapse); 3690 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3691 fin_bb, first_zero_iter, 3692 dummy_bb, dummy, l2_dom_bb); 3693 t = NULL_TREE; 3694 } 3695 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3696 t = integer_one_node; 3697 else 3698 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3699 fold_convert (type, fd->loop.n1), 3700 fold_convert (type, fd->loop.n2)); 3701 if (fd->collapse == 1 3702 && TYPE_UNSIGNED (type) 3703 && (t == NULL_TREE || !integer_onep (t))) 3704 { 3705 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3706 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3707 true, GSI_SAME_STMT); 3708 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3709 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3710 true, GSI_SAME_STMT); 3711 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3712 NULL_TREE, NULL_TREE); 3713 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3714 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3715 expand_omp_regimplify_p, NULL, NULL) 3716 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3717 expand_omp_regimplify_p, NULL, NULL)) 3718 { 3719 gsi = gsi_for_stmt (cond_stmt); 3720 gimple_regimplify_operands (cond_stmt, &gsi); 3721 } 3722 ep = split_block (entry_bb, cond_stmt); 3723 ep->flags = EDGE_TRUE_VALUE; 3724 entry_bb = ep->dest; 3725 ep->probability = profile_probability::very_likely (); 3726 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3727 ep->probability = profile_probability::very_unlikely (); 3728 if (gimple_in_ssa_p (cfun)) 3729 { 3730 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3731 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3732 !gsi_end_p (gpi); gsi_next (&gpi)) 3733 { 3734 gphi *phi = gpi.phi (); 3735 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3736 ep, UNKNOWN_LOCATION); 3737 } 3738 } 3739 gsi = gsi_last_bb (entry_bb); 3740 } 3741 3742 if (fd->lastprivate_conditional) 3743 { 3744 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3745 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 3746 if (fd->have_pointer_condtemp) 3747 condtemp = OMP_CLAUSE_DECL (c); 3748 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 3749 cond_var = OMP_CLAUSE_DECL (c); 3750 } 3751 if (fd->have_reductemp 3752 /* For scan, we don't want to reinitialize condtemp before the 3753 second loop. */ 3754 || (fd->have_pointer_condtemp && !fd->have_scantemp) 3755 || fd->have_nonctrl_scantemp) 3756 { 3757 tree t1 = build_int_cst (long_integer_type_node, 0); 3758 tree t2 = build_int_cst (long_integer_type_node, 1); 3759 tree t3 = build_int_cstu (long_integer_type_node, 3760 (HOST_WIDE_INT_1U << 31) + 1); 3761 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3762 gimple_stmt_iterator gsi2 = gsi_none (); 3763 gimple *g = NULL; 3764 tree mem = null_pointer_node, memv = NULL_TREE; 3765 unsigned HOST_WIDE_INT condtemp_sz = 0; 3766 unsigned HOST_WIDE_INT alloc_align = 0; 3767 if (fd->have_reductemp) 3768 { 3769 gcc_assert (!fd->have_nonctrl_scantemp); 3770 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 3771 reductions = OMP_CLAUSE_DECL (c); 3772 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 3773 g = SSA_NAME_DEF_STMT (reductions); 3774 reductions = gimple_assign_rhs1 (g); 3775 OMP_CLAUSE_DECL (c) = reductions; 3776 gsi2 = gsi_for_stmt (g); 3777 } 3778 else 3779 { 3780 if (gsi_end_p (gsip)) 3781 gsi2 = gsi_after_labels (region->entry); 3782 else 3783 gsi2 = gsip; 3784 reductions = null_pointer_node; 3785 } 3786 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp) 3787 { 3788 tree type; 3789 if (fd->have_pointer_condtemp) 3790 type = TREE_TYPE (condtemp); 3791 else 3792 type = ptr_type_node; 3793 memv = create_tmp_var (type); 3794 TREE_ADDRESSABLE (memv) = 1; 3795 unsigned HOST_WIDE_INT sz = 0; 3796 tree size = NULL_TREE; 3797 if (fd->have_pointer_condtemp) 3798 { 3799 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 3800 sz *= fd->lastprivate_conditional; 3801 condtemp_sz = sz; 3802 } 3803 if (fd->have_nonctrl_scantemp) 3804 { 3805 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3806 gimple *g = gimple_build_call (nthreads, 0); 3807 nthreads = create_tmp_var (integer_type_node); 3808 gimple_call_set_lhs (g, nthreads); 3809 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 3810 nthreads = fold_convert (sizetype, nthreads); 3811 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node); 3812 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz, 3813 alloc_align, nthreads, NULL, 3814 false); 3815 size = fold_convert (type, size); 3816 } 3817 else 3818 size = build_int_cst (type, sz); 3819 expand_omp_build_assign (&gsi2, memv, size, false); 3820 mem = build_fold_addr_expr (memv); 3821 } 3822 tree t 3823 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 3824 9, t1, t2, t2, t3, t1, null_pointer_node, 3825 null_pointer_node, reductions, mem); 3826 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 3827 true, GSI_SAME_STMT); 3828 if (fd->have_pointer_condtemp) 3829 expand_omp_build_assign (&gsi2, condtemp, memv, false); 3830 if (fd->have_nonctrl_scantemp) 3831 { 3832 tree ptr = fd->have_pointer_condtemp ? condtemp : memv; 3833 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz, 3834 alloc_align, nthreads, &gsi2, false); 3835 } 3836 if (fd->have_reductemp) 3837 { 3838 gsi_remove (&gsi2, true); 3839 release_ssa_name (gimple_assign_lhs (g)); 3840 } 3841 } 3842 switch (gimple_omp_for_kind (fd->for_stmt)) 3843 { 3844 case GF_OMP_FOR_KIND_FOR: 3845 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3846 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3847 break; 3848 case GF_OMP_FOR_KIND_DISTRIBUTE: 3849 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3850 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3851 break; 3852 default: 3853 gcc_unreachable (); 3854 } 3855 nthreads = build_call_expr (nthreads, 0); 3856 nthreads = fold_convert (itype, nthreads); 3857 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3858 true, GSI_SAME_STMT); 3859 threadid = build_call_expr (threadid, 0); 3860 threadid = fold_convert (itype, threadid); 3861 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3862 true, GSI_SAME_STMT); 3863 3864 n1 = fd->loop.n1; 3865 n2 = fd->loop.n2; 3866 step = fd->loop.step; 3867 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3868 { 3869 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3870 OMP_CLAUSE__LOOPTEMP_); 3871 gcc_assert (innerc); 3872 n1 = OMP_CLAUSE_DECL (innerc); 3873 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3874 OMP_CLAUSE__LOOPTEMP_); 3875 gcc_assert (innerc); 3876 n2 = OMP_CLAUSE_DECL (innerc); 3877 } 3878 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3879 true, NULL_TREE, true, GSI_SAME_STMT); 3880 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3881 true, NULL_TREE, true, GSI_SAME_STMT); 3882 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3883 true, NULL_TREE, true, GSI_SAME_STMT); 3884 3885 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3886 t = fold_build2 (PLUS_EXPR, itype, step, t); 3887 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3888 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3889 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3890 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3891 fold_build1 (NEGATE_EXPR, itype, t), 3892 fold_build1 (NEGATE_EXPR, itype, step)); 3893 else 3894 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3895 t = fold_convert (itype, t); 3896 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3897 3898 q = create_tmp_reg (itype, "q"); 3899 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3900 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3901 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3902 3903 tt = create_tmp_reg (itype, "tt"); 3904 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3905 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3906 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3907 3908 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3909 gcond *cond_stmt = gimple_build_cond_empty (t); 3910 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3911 3912 second_bb = split_block (entry_bb, cond_stmt)->dest; 3913 gsi = gsi_last_nondebug_bb (second_bb); 3914 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3915 3916 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3917 GSI_SAME_STMT); 3918 gassign *assign_stmt 3919 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3920 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3921 3922 third_bb = split_block (second_bb, assign_stmt)->dest; 3923 gsi = gsi_last_nondebug_bb (third_bb); 3924 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3925 3926 if (fd->have_nonctrl_scantemp) 3927 { 3928 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 3929 tree controlp = NULL_TREE, controlb = NULL_TREE; 3930 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3931 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3932 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 3933 { 3934 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 3935 controlb = OMP_CLAUSE_DECL (c); 3936 else 3937 controlp = OMP_CLAUSE_DECL (c); 3938 if (controlb && controlp) 3939 break; 3940 } 3941 gcc_assert (controlp && controlb); 3942 tree cnt = create_tmp_var (sizetype); 3943 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q); 3944 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3945 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node); 3946 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0, 3947 alloc_align, cnt, NULL, true); 3948 tree size = create_tmp_var (sizetype); 3949 expand_omp_build_assign (&gsi, size, sz, false); 3950 tree cmp = fold_build2 (GT_EXPR, boolean_type_node, 3951 size, size_int (16384)); 3952 expand_omp_build_assign (&gsi, controlb, cmp); 3953 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 3954 NULL_TREE, NULL_TREE); 3955 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3956 fourth_bb = split_block (third_bb, g)->dest; 3957 gsi = gsi_last_nondebug_bb (fourth_bb); 3958 /* FIXME: Once we have allocators, this should use allocator. */ 3959 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size); 3960 gimple_call_set_lhs (g, controlp); 3961 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3962 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt, 3963 &gsi, true); 3964 gsi_prev (&gsi); 3965 g = gsi_stmt (gsi); 3966 fifth_bb = split_block (fourth_bb, g)->dest; 3967 gsi = gsi_last_nondebug_bb (fifth_bb); 3968 3969 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0); 3970 gimple_call_set_lhs (g, controlp); 3971 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3972 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN); 3973 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 3974 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 3975 && OMP_CLAUSE__SCANTEMP__ALLOC (c)) 3976 { 3977 tree tmp = create_tmp_var (sizetype); 3978 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c))); 3979 g = gimple_build_assign (tmp, MULT_EXPR, cnt, 3980 TYPE_SIZE_UNIT (pointee_type)); 3981 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3982 g = gimple_build_call (alloca_decl, 2, tmp, 3983 size_int (TYPE_ALIGN (pointee_type))); 3984 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c)); 3985 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 3986 } 3987 3988 sixth_bb = split_block (fifth_bb, g)->dest; 3989 gsi = gsi_last_nondebug_bb (sixth_bb); 3990 } 3991 3992 t = build2 (MULT_EXPR, itype, q, threadid); 3993 t = build2 (PLUS_EXPR, itype, t, tt); 3994 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3995 3996 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3997 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3998 3999 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 4000 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4001 4002 /* Remove the GIMPLE_OMP_FOR statement. */ 4003 gsi_remove (&gsi, true); 4004 4005 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 4006 gsi = gsi_start_bb (seq_start_bb); 4007 4008 tree startvar = fd->loop.v; 4009 tree endvar = NULL_TREE; 4010 4011 if (gimple_omp_for_combined_p (fd->for_stmt)) 4012 { 4013 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 4014 ? gimple_omp_parallel_clauses (inner_stmt) 4015 : gimple_omp_for_clauses (inner_stmt); 4016 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4017 gcc_assert (innerc); 4018 startvar = OMP_CLAUSE_DECL (innerc); 4019 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4020 OMP_CLAUSE__LOOPTEMP_); 4021 gcc_assert (innerc); 4022 endvar = OMP_CLAUSE_DECL (innerc); 4023 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 4024 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 4025 { 4026 int i; 4027 for (i = 1; i < fd->collapse; i++) 4028 { 4029 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4030 OMP_CLAUSE__LOOPTEMP_); 4031 gcc_assert (innerc); 4032 } 4033 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4034 OMP_CLAUSE__LOOPTEMP_); 4035 if (innerc) 4036 { 4037 /* If needed (distribute parallel for with lastprivate), 4038 propagate down the total number of iterations. */ 4039 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4040 fd->loop.n2); 4041 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4042 GSI_CONTINUE_LINKING); 4043 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4044 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4045 } 4046 } 4047 } 4048 t = fold_convert (itype, s0); 4049 t = fold_build2 (MULT_EXPR, itype, t, step); 4050 if (POINTER_TYPE_P (type)) 4051 { 4052 t = fold_build_pointer_plus (n1, t); 4053 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4054 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4055 t = fold_convert (signed_type_for (type), t); 4056 } 4057 else 4058 t = fold_build2 (PLUS_EXPR, type, t, n1); 4059 t = fold_convert (TREE_TYPE (startvar), t); 4060 t = force_gimple_operand_gsi (&gsi, t, 4061 DECL_P (startvar) 4062 && TREE_ADDRESSABLE (startvar), 4063 NULL_TREE, false, GSI_CONTINUE_LINKING); 4064 assign_stmt = gimple_build_assign (startvar, t); 4065 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4066 if (cond_var) 4067 { 4068 tree itype = TREE_TYPE (cond_var); 4069 /* For lastprivate(conditional:) itervar, we need some iteration 4070 counter that starts at unsigned non-zero and increases. 4071 Prefer as few IVs as possible, so if we can use startvar 4072 itself, use that, or startvar + constant (those would be 4073 incremented with step), and as last resort use the s0 + 1 4074 incremented by 1. */ 4075 if (POINTER_TYPE_P (type) 4076 || TREE_CODE (n1) != INTEGER_CST 4077 || fd->loop.cond_code != LT_EXPR) 4078 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 4079 build_int_cst (itype, 1)); 4080 else if (tree_int_cst_sgn (n1) == 1) 4081 t = fold_convert (itype, t); 4082 else 4083 { 4084 tree c = fold_convert (itype, n1); 4085 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 4086 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 4087 } 4088 t = force_gimple_operand_gsi (&gsi, t, false, 4089 NULL_TREE, false, GSI_CONTINUE_LINKING); 4090 assign_stmt = gimple_build_assign (cond_var, t); 4091 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4092 } 4093 4094 t = fold_convert (itype, e0); 4095 t = fold_build2 (MULT_EXPR, itype, t, step); 4096 if (POINTER_TYPE_P (type)) 4097 { 4098 t = fold_build_pointer_plus (n1, t); 4099 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4100 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4101 t = fold_convert (signed_type_for (type), t); 4102 } 4103 else 4104 t = fold_build2 (PLUS_EXPR, type, t, n1); 4105 t = fold_convert (TREE_TYPE (startvar), t); 4106 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4107 false, GSI_CONTINUE_LINKING); 4108 if (endvar) 4109 { 4110 assign_stmt = gimple_build_assign (endvar, e); 4111 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4112 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4113 assign_stmt = gimple_build_assign (fd->loop.v, e); 4114 else 4115 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4116 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4117 } 4118 /* Handle linear clause adjustments. */ 4119 tree itercnt = NULL_TREE; 4120 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4121 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4122 c; c = OMP_CLAUSE_CHAIN (c)) 4123 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4124 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4125 { 4126 tree d = OMP_CLAUSE_DECL (c); 4127 bool is_ref = omp_is_reference (d); 4128 tree t = d, a, dest; 4129 if (is_ref) 4130 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4131 if (itercnt == NULL_TREE) 4132 { 4133 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4134 { 4135 itercnt = fold_build2 (MINUS_EXPR, itype, 4136 fold_convert (itype, n1), 4137 fold_convert (itype, fd->loop.n1)); 4138 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 4139 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 4140 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4141 NULL_TREE, false, 4142 GSI_CONTINUE_LINKING); 4143 } 4144 else 4145 itercnt = s0; 4146 } 4147 tree type = TREE_TYPE (t); 4148 if (POINTER_TYPE_P (type)) 4149 type = sizetype; 4150 a = fold_build2 (MULT_EXPR, type, 4151 fold_convert (type, itercnt), 4152 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4153 dest = unshare_expr (t); 4154 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4155 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 4156 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4157 false, GSI_CONTINUE_LINKING); 4158 assign_stmt = gimple_build_assign (dest, t); 4159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4160 } 4161 if (fd->collapse > 1) 4162 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4163 4164 if (!broken_loop) 4165 { 4166 /* The code controlling the sequential loop replaces the 4167 GIMPLE_OMP_CONTINUE. */ 4168 gsi = gsi_last_nondebug_bb (cont_bb); 4169 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4170 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 4171 vmain = gimple_omp_continue_control_use (cont_stmt); 4172 vback = gimple_omp_continue_control_def (cont_stmt); 4173 4174 if (cond_var) 4175 { 4176 tree itype = TREE_TYPE (cond_var); 4177 tree t2; 4178 if (POINTER_TYPE_P (type) 4179 || TREE_CODE (n1) != INTEGER_CST 4180 || fd->loop.cond_code != LT_EXPR) 4181 t2 = build_int_cst (itype, 1); 4182 else 4183 t2 = fold_convert (itype, step); 4184 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 4185 t2 = force_gimple_operand_gsi (&gsi, t2, false, 4186 NULL_TREE, true, GSI_SAME_STMT); 4187 assign_stmt = gimple_build_assign (cond_var, t2); 4188 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4189 } 4190 4191 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4192 { 4193 if (POINTER_TYPE_P (type)) 4194 t = fold_build_pointer_plus (vmain, step); 4195 else 4196 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4197 t = force_gimple_operand_gsi (&gsi, t, 4198 DECL_P (vback) 4199 && TREE_ADDRESSABLE (vback), 4200 NULL_TREE, true, GSI_SAME_STMT); 4201 assign_stmt = gimple_build_assign (vback, t); 4202 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4203 4204 t = build2 (fd->loop.cond_code, boolean_type_node, 4205 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4206 ? t : vback, e); 4207 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4208 } 4209 4210 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 4211 gsi_remove (&gsi, true); 4212 4213 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4214 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4215 } 4216 4217 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4218 gsi = gsi_last_nondebug_bb (exit_bb); 4219 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4220 { 4221 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4222 if (fd->have_reductemp 4223 || ((fd->have_pointer_condtemp || fd->have_scantemp) 4224 && !fd->have_nonctrl_scantemp)) 4225 { 4226 tree fn; 4227 if (t) 4228 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4229 else 4230 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4231 gcall *g = gimple_build_call (fn, 0); 4232 if (t) 4233 { 4234 gimple_call_set_lhs (g, t); 4235 if (fd->have_reductemp) 4236 gsi_insert_after (&gsi, gimple_build_assign (reductions, 4237 NOP_EXPR, t), 4238 GSI_SAME_STMT); 4239 } 4240 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4241 } 4242 else 4243 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4244 } 4245 else if ((fd->have_pointer_condtemp || fd->have_scantemp) 4246 && !fd->have_nonctrl_scantemp) 4247 { 4248 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 4249 gcall *g = gimple_build_call (fn, 0); 4250 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4251 } 4252 if (fd->have_scantemp && !fd->have_nonctrl_scantemp) 4253 { 4254 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4255 tree controlp = NULL_TREE, controlb = NULL_TREE; 4256 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 4257 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_ 4258 && OMP_CLAUSE__SCANTEMP__CONTROL (c)) 4259 { 4260 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node) 4261 controlb = OMP_CLAUSE_DECL (c); 4262 else 4263 controlp = OMP_CLAUSE_DECL (c); 4264 if (controlb && controlp) 4265 break; 4266 } 4267 gcc_assert (controlp && controlb); 4268 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node, 4269 NULL_TREE, NULL_TREE); 4270 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4271 exit1_bb = split_block (exit_bb, g)->dest; 4272 gsi = gsi_after_labels (exit1_bb); 4273 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1, 4274 controlp); 4275 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4276 exit2_bb = split_block (exit1_bb, g)->dest; 4277 gsi = gsi_after_labels (exit2_bb); 4278 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1, 4279 controlp); 4280 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4281 exit3_bb = split_block (exit2_bb, g)->dest; 4282 gsi = gsi_after_labels (exit3_bb); 4283 } 4284 gsi_remove (&gsi, true); 4285 4286 /* Connect all the blocks. */ 4287 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 4288 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); 4289 ep = find_edge (entry_bb, second_bb); 4290 ep->flags = EDGE_TRUE_VALUE; 4291 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); 4292 if (fourth_bb) 4293 { 4294 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE); 4295 ep->probability 4296 = profile_probability::guessed_always ().apply_scale (1, 2); 4297 ep = find_edge (third_bb, fourth_bb); 4298 ep->flags = EDGE_TRUE_VALUE; 4299 ep->probability 4300 = profile_probability::guessed_always ().apply_scale (1, 2); 4301 ep = find_edge (fourth_bb, fifth_bb); 4302 redirect_edge_and_branch (ep, sixth_bb); 4303 } 4304 else 4305 sixth_bb = third_bb; 4306 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 4307 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 4308 if (exit1_bb) 4309 { 4310 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE); 4311 ep->probability 4312 = profile_probability::guessed_always ().apply_scale (1, 2); 4313 ep = find_edge (exit_bb, exit1_bb); 4314 ep->flags = EDGE_TRUE_VALUE; 4315 ep->probability 4316 = profile_probability::guessed_always ().apply_scale (1, 2); 4317 ep = find_edge (exit1_bb, exit2_bb); 4318 redirect_edge_and_branch (ep, exit3_bb); 4319 } 4320 4321 if (!broken_loop) 4322 { 4323 ep = find_edge (cont_bb, body_bb); 4324 if (ep == NULL) 4325 { 4326 ep = BRANCH_EDGE (cont_bb); 4327 gcc_assert (single_succ (ep->dest) == body_bb); 4328 } 4329 if (gimple_omp_for_combined_p (fd->for_stmt)) 4330 { 4331 remove_edge (ep); 4332 ep = NULL; 4333 } 4334 else if (fd->collapse > 1) 4335 { 4336 remove_edge (ep); 4337 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4338 } 4339 else 4340 ep->flags = EDGE_TRUE_VALUE; 4341 find_edge (cont_bb, fin_bb)->flags 4342 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4343 } 4344 4345 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 4346 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 4347 if (fourth_bb) 4348 { 4349 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb); 4350 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb); 4351 } 4352 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb); 4353 4354 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4355 recompute_dominator (CDI_DOMINATORS, body_bb)); 4356 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4357 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4358 if (exit1_bb) 4359 { 4360 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb); 4361 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb); 4362 } 4363 4364 class loop *loop = body_bb->loop_father; 4365 if (loop != entry_bb->loop_father) 4366 { 4367 gcc_assert (broken_loop || loop->header == body_bb); 4368 gcc_assert (broken_loop 4369 || loop->latch == region->cont 4370 || single_pred (loop->latch) == region->cont); 4371 return; 4372 } 4373 4374 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 4375 { 4376 loop = alloc_loop (); 4377 loop->header = body_bb; 4378 if (collapse_bb == NULL) 4379 loop->latch = cont_bb; 4380 add_loop (loop, body_bb->loop_father); 4381 } 4382} 4383 4384/* Return phi in E->DEST with ARG on edge E. */ 4385 4386static gphi * 4387find_phi_with_arg_on_edge (tree arg, edge e) 4388{ 4389 basic_block bb = e->dest; 4390 4391 for (gphi_iterator gpi = gsi_start_phis (bb); 4392 !gsi_end_p (gpi); 4393 gsi_next (&gpi)) 4394 { 4395 gphi *phi = gpi.phi (); 4396 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 4397 return phi; 4398 } 4399 4400 return NULL; 4401} 4402 4403/* A subroutine of expand_omp_for. Generate code for a parallel 4404 loop with static schedule and a specified chunk size. Given 4405 parameters: 4406 4407 for (V = N1; V cond N2; V += STEP) BODY; 4408 4409 where COND is "<" or ">", we generate pseudocode 4410 4411 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 4412 if (cond is <) 4413 adj = STEP - 1; 4414 else 4415 adj = STEP + 1; 4416 if ((__typeof (V)) -1 > 0 && cond is >) 4417 n = -(adj + N2 - N1) / -STEP; 4418 else 4419 n = (adj + N2 - N1) / STEP; 4420 trip = 0; 4421 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 4422 here so that V is defined 4423 if the loop is not entered 4424 L0: 4425 s0 = (trip * nthreads + threadid) * CHUNK; 4426 e0 = min (s0 + CHUNK, n); 4427 if (s0 < n) goto L1; else goto L4; 4428 L1: 4429 V = s0 * STEP + N1; 4430 e = e0 * STEP + N1; 4431 L2: 4432 BODY; 4433 V += STEP; 4434 if (V cond e) goto L2; else goto L3; 4435 L3: 4436 trip += 1; 4437 goto L0; 4438 L4: 4439*/ 4440 4441static void 4442expand_omp_for_static_chunk (struct omp_region *region, 4443 struct omp_for_data *fd, gimple *inner_stmt) 4444{ 4445 tree n, s0, e0, e, t; 4446 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 4447 tree type, itype, vmain, vback, vextra; 4448 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 4449 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 4450 gimple_stmt_iterator gsi, gsip; 4451 edge se; 4452 bool broken_loop = region->cont == NULL; 4453 tree *counts = NULL; 4454 tree n1, n2, step; 4455 tree reductions = NULL_TREE; 4456 tree cond_var = NULL_TREE, condtemp = NULL_TREE; 4457 4458 itype = type = TREE_TYPE (fd->loop.v); 4459 if (POINTER_TYPE_P (type)) 4460 itype = signed_type_for (type); 4461 4462 entry_bb = region->entry; 4463 se = split_block (entry_bb, last_stmt (entry_bb)); 4464 entry_bb = se->src; 4465 iter_part_bb = se->dest; 4466 cont_bb = region->cont; 4467 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 4468 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 4469 gcc_assert (broken_loop 4470 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 4471 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 4472 body_bb = single_succ (seq_start_bb); 4473 if (!broken_loop) 4474 { 4475 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 4476 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 4477 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4478 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 4479 } 4480 exit_bb = region->exit; 4481 4482 /* Trip and adjustment setup goes in ENTRY_BB. */ 4483 gsi = gsi_last_nondebug_bb (entry_bb); 4484 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4485 gsip = gsi; 4486 gsi_prev (&gsip); 4487 4488 if (fd->collapse > 1) 4489 { 4490 int first_zero_iter = -1, dummy = -1; 4491 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 4492 4493 counts = XALLOCAVEC (tree, fd->collapse); 4494 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4495 fin_bb, first_zero_iter, 4496 dummy_bb, dummy, l2_dom_bb); 4497 t = NULL_TREE; 4498 } 4499 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4500 t = integer_one_node; 4501 else 4502 t = fold_binary (fd->loop.cond_code, boolean_type_node, 4503 fold_convert (type, fd->loop.n1), 4504 fold_convert (type, fd->loop.n2)); 4505 if (fd->collapse == 1 4506 && TYPE_UNSIGNED (type) 4507 && (t == NULL_TREE || !integer_onep (t))) 4508 { 4509 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 4510 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 4511 true, GSI_SAME_STMT); 4512 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 4513 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 4514 true, GSI_SAME_STMT); 4515 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 4516 NULL_TREE, NULL_TREE); 4517 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 4518 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 4519 expand_omp_regimplify_p, NULL, NULL) 4520 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 4521 expand_omp_regimplify_p, NULL, NULL)) 4522 { 4523 gsi = gsi_for_stmt (cond_stmt); 4524 gimple_regimplify_operands (cond_stmt, &gsi); 4525 } 4526 se = split_block (entry_bb, cond_stmt); 4527 se->flags = EDGE_TRUE_VALUE; 4528 entry_bb = se->dest; 4529 se->probability = profile_probability::very_likely (); 4530 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 4531 se->probability = profile_probability::very_unlikely (); 4532 if (gimple_in_ssa_p (cfun)) 4533 { 4534 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 4535 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 4536 !gsi_end_p (gpi); gsi_next (&gpi)) 4537 { 4538 gphi *phi = gpi.phi (); 4539 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 4540 se, UNKNOWN_LOCATION); 4541 } 4542 } 4543 gsi = gsi_last_bb (entry_bb); 4544 } 4545 4546 if (fd->lastprivate_conditional) 4547 { 4548 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4549 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 4550 if (fd->have_pointer_condtemp) 4551 condtemp = OMP_CLAUSE_DECL (c); 4552 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_); 4553 cond_var = OMP_CLAUSE_DECL (c); 4554 } 4555 if (fd->have_reductemp || fd->have_pointer_condtemp) 4556 { 4557 tree t1 = build_int_cst (long_integer_type_node, 0); 4558 tree t2 = build_int_cst (long_integer_type_node, 1); 4559 tree t3 = build_int_cstu (long_integer_type_node, 4560 (HOST_WIDE_INT_1U << 31) + 1); 4561 tree clauses = gimple_omp_for_clauses (fd->for_stmt); 4562 gimple_stmt_iterator gsi2 = gsi_none (); 4563 gimple *g = NULL; 4564 tree mem = null_pointer_node, memv = NULL_TREE; 4565 if (fd->have_reductemp) 4566 { 4567 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 4568 reductions = OMP_CLAUSE_DECL (c); 4569 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 4570 g = SSA_NAME_DEF_STMT (reductions); 4571 reductions = gimple_assign_rhs1 (g); 4572 OMP_CLAUSE_DECL (c) = reductions; 4573 gsi2 = gsi_for_stmt (g); 4574 } 4575 else 4576 { 4577 if (gsi_end_p (gsip)) 4578 gsi2 = gsi_after_labels (region->entry); 4579 else 4580 gsi2 = gsip; 4581 reductions = null_pointer_node; 4582 } 4583 if (fd->have_pointer_condtemp) 4584 { 4585 tree type = TREE_TYPE (condtemp); 4586 memv = create_tmp_var (type); 4587 TREE_ADDRESSABLE (memv) = 1; 4588 unsigned HOST_WIDE_INT sz 4589 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); 4590 sz *= fd->lastprivate_conditional; 4591 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz), 4592 false); 4593 mem = build_fold_addr_expr (memv); 4594 } 4595 tree t 4596 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START), 4597 9, t1, t2, t2, t3, t1, null_pointer_node, 4598 null_pointer_node, reductions, mem); 4599 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 4600 true, GSI_SAME_STMT); 4601 if (fd->have_pointer_condtemp) 4602 expand_omp_build_assign (&gsi2, condtemp, memv, false); 4603 if (fd->have_reductemp) 4604 { 4605 gsi_remove (&gsi2, true); 4606 release_ssa_name (gimple_assign_lhs (g)); 4607 } 4608 } 4609 switch (gimple_omp_for_kind (fd->for_stmt)) 4610 { 4611 case GF_OMP_FOR_KIND_FOR: 4612 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 4613 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 4614 break; 4615 case GF_OMP_FOR_KIND_DISTRIBUTE: 4616 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 4617 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 4618 break; 4619 default: 4620 gcc_unreachable (); 4621 } 4622 nthreads = build_call_expr (nthreads, 0); 4623 nthreads = fold_convert (itype, nthreads); 4624 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 4625 true, GSI_SAME_STMT); 4626 threadid = build_call_expr (threadid, 0); 4627 threadid = fold_convert (itype, threadid); 4628 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 4629 true, GSI_SAME_STMT); 4630 4631 n1 = fd->loop.n1; 4632 n2 = fd->loop.n2; 4633 step = fd->loop.step; 4634 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4635 { 4636 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4637 OMP_CLAUSE__LOOPTEMP_); 4638 gcc_assert (innerc); 4639 n1 = OMP_CLAUSE_DECL (innerc); 4640 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4641 OMP_CLAUSE__LOOPTEMP_); 4642 gcc_assert (innerc); 4643 n2 = OMP_CLAUSE_DECL (innerc); 4644 } 4645 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 4646 true, NULL_TREE, true, GSI_SAME_STMT); 4647 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 4648 true, NULL_TREE, true, GSI_SAME_STMT); 4649 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 4650 true, NULL_TREE, true, GSI_SAME_STMT); 4651 tree chunk_size = fold_convert (itype, fd->chunk_size); 4652 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 4653 chunk_size 4654 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 4655 GSI_SAME_STMT); 4656 4657 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 4658 t = fold_build2 (PLUS_EXPR, itype, step, t); 4659 t = fold_build2 (PLUS_EXPR, itype, t, n2); 4660 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 4661 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 4662 t = fold_build2 (TRUNC_DIV_EXPR, itype, 4663 fold_build1 (NEGATE_EXPR, itype, t), 4664 fold_build1 (NEGATE_EXPR, itype, step)); 4665 else 4666 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 4667 t = fold_convert (itype, t); 4668 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4669 true, GSI_SAME_STMT); 4670 4671 trip_var = create_tmp_reg (itype, ".trip"); 4672 if (gimple_in_ssa_p (cfun)) 4673 { 4674 trip_init = make_ssa_name (trip_var); 4675 trip_main = make_ssa_name (trip_var); 4676 trip_back = make_ssa_name (trip_var); 4677 } 4678 else 4679 { 4680 trip_init = trip_var; 4681 trip_main = trip_var; 4682 trip_back = trip_var; 4683 } 4684 4685 gassign *assign_stmt 4686 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 4687 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4688 4689 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 4690 t = fold_build2 (MULT_EXPR, itype, t, step); 4691 if (POINTER_TYPE_P (type)) 4692 t = fold_build_pointer_plus (n1, t); 4693 else 4694 t = fold_build2 (PLUS_EXPR, type, t, n1); 4695 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4696 true, GSI_SAME_STMT); 4697 4698 /* Remove the GIMPLE_OMP_FOR. */ 4699 gsi_remove (&gsi, true); 4700 4701 gimple_stmt_iterator gsif = gsi; 4702 4703 /* Iteration space partitioning goes in ITER_PART_BB. */ 4704 gsi = gsi_last_bb (iter_part_bb); 4705 4706 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 4707 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 4708 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 4709 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4710 false, GSI_CONTINUE_LINKING); 4711 4712 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 4713 t = fold_build2 (MIN_EXPR, itype, t, n); 4714 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4715 false, GSI_CONTINUE_LINKING); 4716 4717 t = build2 (LT_EXPR, boolean_type_node, s0, n); 4718 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 4719 4720 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 4721 gsi = gsi_start_bb (seq_start_bb); 4722 4723 tree startvar = fd->loop.v; 4724 tree endvar = NULL_TREE; 4725 4726 if (gimple_omp_for_combined_p (fd->for_stmt)) 4727 { 4728 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 4729 ? gimple_omp_parallel_clauses (inner_stmt) 4730 : gimple_omp_for_clauses (inner_stmt); 4731 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4732 gcc_assert (innerc); 4733 startvar = OMP_CLAUSE_DECL (innerc); 4734 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4735 OMP_CLAUSE__LOOPTEMP_); 4736 gcc_assert (innerc); 4737 endvar = OMP_CLAUSE_DECL (innerc); 4738 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 4739 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 4740 { 4741 int i; 4742 for (i = 1; i < fd->collapse; i++) 4743 { 4744 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4745 OMP_CLAUSE__LOOPTEMP_); 4746 gcc_assert (innerc); 4747 } 4748 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4749 OMP_CLAUSE__LOOPTEMP_); 4750 if (innerc) 4751 { 4752 /* If needed (distribute parallel for with lastprivate), 4753 propagate down the total number of iterations. */ 4754 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4755 fd->loop.n2); 4756 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4757 GSI_CONTINUE_LINKING); 4758 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4759 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4760 } 4761 } 4762 } 4763 4764 t = fold_convert (itype, s0); 4765 t = fold_build2 (MULT_EXPR, itype, t, step); 4766 if (POINTER_TYPE_P (type)) 4767 { 4768 t = fold_build_pointer_plus (n1, t); 4769 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4770 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4771 t = fold_convert (signed_type_for (type), t); 4772 } 4773 else 4774 t = fold_build2 (PLUS_EXPR, type, t, n1); 4775 t = fold_convert (TREE_TYPE (startvar), t); 4776 t = force_gimple_operand_gsi (&gsi, t, 4777 DECL_P (startvar) 4778 && TREE_ADDRESSABLE (startvar), 4779 NULL_TREE, false, GSI_CONTINUE_LINKING); 4780 assign_stmt = gimple_build_assign (startvar, t); 4781 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4782 if (cond_var) 4783 { 4784 tree itype = TREE_TYPE (cond_var); 4785 /* For lastprivate(conditional:) itervar, we need some iteration 4786 counter that starts at unsigned non-zero and increases. 4787 Prefer as few IVs as possible, so if we can use startvar 4788 itself, use that, or startvar + constant (those would be 4789 incremented with step), and as last resort use the s0 + 1 4790 incremented by 1. */ 4791 if (POINTER_TYPE_P (type) 4792 || TREE_CODE (n1) != INTEGER_CST 4793 || fd->loop.cond_code != LT_EXPR) 4794 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0), 4795 build_int_cst (itype, 1)); 4796 else if (tree_int_cst_sgn (n1) == 1) 4797 t = fold_convert (itype, t); 4798 else 4799 { 4800 tree c = fold_convert (itype, n1); 4801 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c); 4802 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c); 4803 } 4804 t = force_gimple_operand_gsi (&gsi, t, false, 4805 NULL_TREE, false, GSI_CONTINUE_LINKING); 4806 assign_stmt = gimple_build_assign (cond_var, t); 4807 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4808 } 4809 4810 t = fold_convert (itype, e0); 4811 t = fold_build2 (MULT_EXPR, itype, t, step); 4812 if (POINTER_TYPE_P (type)) 4813 { 4814 t = fold_build_pointer_plus (n1, t); 4815 if (!POINTER_TYPE_P (TREE_TYPE (startvar)) 4816 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type)) 4817 t = fold_convert (signed_type_for (type), t); 4818 } 4819 else 4820 t = fold_build2 (PLUS_EXPR, type, t, n1); 4821 t = fold_convert (TREE_TYPE (startvar), t); 4822 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4823 false, GSI_CONTINUE_LINKING); 4824 if (endvar) 4825 { 4826 assign_stmt = gimple_build_assign (endvar, e); 4827 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4828 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4829 assign_stmt = gimple_build_assign (fd->loop.v, e); 4830 else 4831 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4832 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4833 } 4834 /* Handle linear clause adjustments. */ 4835 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4836 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4837 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4838 c; c = OMP_CLAUSE_CHAIN (c)) 4839 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4840 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4841 { 4842 tree d = OMP_CLAUSE_DECL (c); 4843 bool is_ref = omp_is_reference (d); 4844 tree t = d, a, dest; 4845 if (is_ref) 4846 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4847 tree type = TREE_TYPE (t); 4848 if (POINTER_TYPE_P (type)) 4849 type = sizetype; 4850 dest = unshare_expr (t); 4851 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4852 expand_omp_build_assign (&gsif, v, t); 4853 if (itercnt == NULL_TREE) 4854 { 4855 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4856 { 4857 itercntbias 4858 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4859 fold_convert (itype, fd->loop.n1)); 4860 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4861 itercntbias, step); 4862 itercntbias 4863 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4864 NULL_TREE, true, 4865 GSI_SAME_STMT); 4866 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4867 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4868 NULL_TREE, false, 4869 GSI_CONTINUE_LINKING); 4870 } 4871 else 4872 itercnt = s0; 4873 } 4874 a = fold_build2 (MULT_EXPR, type, 4875 fold_convert (type, itercnt), 4876 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4877 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4878 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4879 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4880 false, GSI_CONTINUE_LINKING); 4881 assign_stmt = gimple_build_assign (dest, t); 4882 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4883 } 4884 if (fd->collapse > 1) 4885 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4886 4887 if (!broken_loop) 4888 { 4889 /* The code controlling the sequential loop goes in CONT_BB, 4890 replacing the GIMPLE_OMP_CONTINUE. */ 4891 gsi = gsi_last_nondebug_bb (cont_bb); 4892 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4893 vmain = gimple_omp_continue_control_use (cont_stmt); 4894 vback = gimple_omp_continue_control_def (cont_stmt); 4895 4896 if (cond_var) 4897 { 4898 tree itype = TREE_TYPE (cond_var); 4899 tree t2; 4900 if (POINTER_TYPE_P (type) 4901 || TREE_CODE (n1) != INTEGER_CST 4902 || fd->loop.cond_code != LT_EXPR) 4903 t2 = build_int_cst (itype, 1); 4904 else 4905 t2 = fold_convert (itype, step); 4906 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2); 4907 t2 = force_gimple_operand_gsi (&gsi, t2, false, 4908 NULL_TREE, true, GSI_SAME_STMT); 4909 assign_stmt = gimple_build_assign (cond_var, t2); 4910 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4911 } 4912 4913 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4914 { 4915 if (POINTER_TYPE_P (type)) 4916 t = fold_build_pointer_plus (vmain, step); 4917 else 4918 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4919 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4920 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4921 true, GSI_SAME_STMT); 4922 assign_stmt = gimple_build_assign (vback, t); 4923 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4924 4925 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4926 t = build2 (EQ_EXPR, boolean_type_node, 4927 build_int_cst (itype, 0), 4928 build_int_cst (itype, 1)); 4929 else 4930 t = build2 (fd->loop.cond_code, boolean_type_node, 4931 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4932 ? t : vback, e); 4933 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4934 } 4935 4936 /* Remove GIMPLE_OMP_CONTINUE. */ 4937 gsi_remove (&gsi, true); 4938 4939 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4940 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4941 4942 /* Trip update code goes into TRIP_UPDATE_BB. */ 4943 gsi = gsi_start_bb (trip_update_bb); 4944 4945 t = build_int_cst (itype, 1); 4946 t = build2 (PLUS_EXPR, itype, trip_main, t); 4947 assign_stmt = gimple_build_assign (trip_back, t); 4948 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4949 } 4950 4951 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4952 gsi = gsi_last_nondebug_bb (exit_bb); 4953 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4954 { 4955 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4956 if (fd->have_reductemp || fd->have_pointer_condtemp) 4957 { 4958 tree fn; 4959 if (t) 4960 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 4961 else 4962 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 4963 gcall *g = gimple_build_call (fn, 0); 4964 if (t) 4965 { 4966 gimple_call_set_lhs (g, t); 4967 if (fd->have_reductemp) 4968 gsi_insert_after (&gsi, gimple_build_assign (reductions, 4969 NOP_EXPR, t), 4970 GSI_SAME_STMT); 4971 } 4972 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4973 } 4974 else 4975 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4976 } 4977 else if (fd->have_pointer_condtemp) 4978 { 4979 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 4980 gcall *g = gimple_build_call (fn, 0); 4981 gsi_insert_after (&gsi, g, GSI_SAME_STMT); 4982 } 4983 gsi_remove (&gsi, true); 4984 4985 /* Connect the new blocks. */ 4986 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4987 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4988 4989 if (!broken_loop) 4990 { 4991 se = find_edge (cont_bb, body_bb); 4992 if (se == NULL) 4993 { 4994 se = BRANCH_EDGE (cont_bb); 4995 gcc_assert (single_succ (se->dest) == body_bb); 4996 } 4997 if (gimple_omp_for_combined_p (fd->for_stmt)) 4998 { 4999 remove_edge (se); 5000 se = NULL; 5001 } 5002 else if (fd->collapse > 1) 5003 { 5004 remove_edge (se); 5005 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5006 } 5007 else 5008 se->flags = EDGE_TRUE_VALUE; 5009 find_edge (cont_bb, trip_update_bb)->flags 5010 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5011 5012 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 5013 iter_part_bb); 5014 } 5015 5016 if (gimple_in_ssa_p (cfun)) 5017 { 5018 gphi_iterator psi; 5019 gphi *phi; 5020 edge re, ene; 5021 edge_var_map *vm; 5022 size_t i; 5023 5024 gcc_assert (fd->collapse == 1 && !broken_loop); 5025 5026 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 5027 remove arguments of the phi nodes in fin_bb. We need to create 5028 appropriate phi nodes in iter_part_bb instead. */ 5029 se = find_edge (iter_part_bb, fin_bb); 5030 re = single_succ_edge (trip_update_bb); 5031 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 5032 ene = single_succ_edge (entry_bb); 5033 5034 psi = gsi_start_phis (fin_bb); 5035 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 5036 gsi_next (&psi), ++i) 5037 { 5038 gphi *nphi; 5039 location_t locus; 5040 5041 phi = psi.phi (); 5042 if (operand_equal_p (gimple_phi_arg_def (phi, 0), 5043 redirect_edge_var_map_def (vm), 0)) 5044 continue; 5045 5046 t = gimple_phi_result (phi); 5047 gcc_assert (t == redirect_edge_var_map_result (vm)); 5048 5049 if (!single_pred_p (fin_bb)) 5050 t = copy_ssa_name (t, phi); 5051 5052 nphi = create_phi_node (t, iter_part_bb); 5053 5054 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 5055 locus = gimple_phi_arg_location_from_edge (phi, se); 5056 5057 /* A special case -- fd->loop.v is not yet computed in 5058 iter_part_bb, we need to use vextra instead. */ 5059 if (t == fd->loop.v) 5060 t = vextra; 5061 add_phi_arg (nphi, t, ene, locus); 5062 locus = redirect_edge_var_map_location (vm); 5063 tree back_arg = redirect_edge_var_map_def (vm); 5064 add_phi_arg (nphi, back_arg, re, locus); 5065 edge ce = find_edge (cont_bb, body_bb); 5066 if (ce == NULL) 5067 { 5068 ce = BRANCH_EDGE (cont_bb); 5069 gcc_assert (single_succ (ce->dest) == body_bb); 5070 ce = single_succ_edge (ce->dest); 5071 } 5072 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 5073 gcc_assert (inner_loop_phi != NULL); 5074 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 5075 find_edge (seq_start_bb, body_bb), locus); 5076 5077 if (!single_pred_p (fin_bb)) 5078 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 5079 } 5080 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 5081 redirect_edge_var_map_clear (re); 5082 if (single_pred_p (fin_bb)) 5083 while (1) 5084 { 5085 psi = gsi_start_phis (fin_bb); 5086 if (gsi_end_p (psi)) 5087 break; 5088 remove_phi_node (&psi, false); 5089 } 5090 5091 /* Make phi node for trip. */ 5092 phi = create_phi_node (trip_main, iter_part_bb); 5093 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 5094 UNKNOWN_LOCATION); 5095 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 5096 UNKNOWN_LOCATION); 5097 } 5098 5099 if (!broken_loop) 5100 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 5101 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 5102 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 5103 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5104 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5105 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 5106 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 5107 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5108 recompute_dominator (CDI_DOMINATORS, body_bb)); 5109 5110 if (!broken_loop) 5111 { 5112 class loop *loop = body_bb->loop_father; 5113 class loop *trip_loop = alloc_loop (); 5114 trip_loop->header = iter_part_bb; 5115 trip_loop->latch = trip_update_bb; 5116 add_loop (trip_loop, iter_part_bb->loop_father); 5117 5118 if (loop != entry_bb->loop_father) 5119 { 5120 gcc_assert (loop->header == body_bb); 5121 gcc_assert (loop->latch == region->cont 5122 || single_pred (loop->latch) == region->cont); 5123 trip_loop->inner = loop; 5124 return; 5125 } 5126 5127 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5128 { 5129 loop = alloc_loop (); 5130 loop->header = body_bb; 5131 if (collapse_bb == NULL) 5132 loop->latch = cont_bb; 5133 add_loop (loop, trip_loop); 5134 } 5135 } 5136} 5137 5138/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 5139 loop. Given parameters: 5140 5141 for (V = N1; V cond N2; V += STEP) BODY; 5142 5143 where COND is "<" or ">", we generate pseudocode 5144 5145 V = N1; 5146 goto L1; 5147 L0: 5148 BODY; 5149 V += STEP; 5150 L1: 5151 if (V cond N2) goto L0; else goto L2; 5152 L2: 5153 5154 For collapsed loops, given parameters: 5155 collapse(3) 5156 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 5157 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 5158 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 5159 BODY; 5160 5161 we generate pseudocode 5162 5163 if (cond3 is <) 5164 adj = STEP3 - 1; 5165 else 5166 adj = STEP3 + 1; 5167 count3 = (adj + N32 - N31) / STEP3; 5168 if (cond2 is <) 5169 adj = STEP2 - 1; 5170 else 5171 adj = STEP2 + 1; 5172 count2 = (adj + N22 - N21) / STEP2; 5173 if (cond1 is <) 5174 adj = STEP1 - 1; 5175 else 5176 adj = STEP1 + 1; 5177 count1 = (adj + N12 - N11) / STEP1; 5178 count = count1 * count2 * count3; 5179 V = 0; 5180 V1 = N11; 5181 V2 = N21; 5182 V3 = N31; 5183 goto L1; 5184 L0: 5185 BODY; 5186 V += 1; 5187 V3 += STEP3; 5188 V2 += (V3 cond3 N32) ? 0 : STEP2; 5189 V3 = (V3 cond3 N32) ? V3 : N31; 5190 V1 += (V2 cond2 N22) ? 0 : STEP1; 5191 V2 = (V2 cond2 N22) ? V2 : N21; 5192 L1: 5193 if (V < count) goto L0; else goto L2; 5194 L2: 5195 5196 */ 5197 5198static void 5199expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 5200{ 5201 tree type, t; 5202 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 5203 gimple_stmt_iterator gsi; 5204 gimple *stmt; 5205 gcond *cond_stmt; 5206 bool broken_loop = region->cont == NULL; 5207 edge e, ne; 5208 tree *counts = NULL; 5209 int i; 5210 int safelen_int = INT_MAX; 5211 bool dont_vectorize = false; 5212 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5213 OMP_CLAUSE_SAFELEN); 5214 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5215 OMP_CLAUSE__SIMDUID_); 5216 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5217 OMP_CLAUSE_IF); 5218 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5219 OMP_CLAUSE_SIMDLEN); 5220 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5221 OMP_CLAUSE__CONDTEMP_); 5222 tree n1, n2; 5223 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE; 5224 5225 if (safelen) 5226 { 5227 poly_uint64 val; 5228 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 5229 if (!poly_int_tree_p (safelen, &val)) 5230 safelen_int = 0; 5231 else 5232 safelen_int = MIN (constant_lower_bound (val), INT_MAX); 5233 if (safelen_int == 1) 5234 safelen_int = 0; 5235 } 5236 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc))) 5237 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))) 5238 { 5239 safelen_int = 0; 5240 dont_vectorize = true; 5241 } 5242 type = TREE_TYPE (fd->loop.v); 5243 entry_bb = region->entry; 5244 cont_bb = region->cont; 5245 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5246 gcc_assert (broken_loop 5247 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 5248 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 5249 if (!broken_loop) 5250 { 5251 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 5252 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5253 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 5254 l2_bb = BRANCH_EDGE (entry_bb)->dest; 5255 } 5256 else 5257 { 5258 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 5259 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 5260 l2_bb = single_succ (l1_bb); 5261 } 5262 exit_bb = region->exit; 5263 l2_dom_bb = NULL; 5264 5265 gsi = gsi_last_nondebug_bb (entry_bb); 5266 5267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5268 /* Not needed in SSA form right now. */ 5269 gcc_assert (!gimple_in_ssa_p (cfun)); 5270 if (fd->collapse > 1) 5271 { 5272 int first_zero_iter = -1, dummy = -1; 5273 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 5274 5275 counts = XALLOCAVEC (tree, fd->collapse); 5276 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5277 zero_iter_bb, first_zero_iter, 5278 dummy_bb, dummy, l2_dom_bb); 5279 } 5280 if (l2_dom_bb == NULL) 5281 l2_dom_bb = l1_bb; 5282 5283 n1 = fd->loop.n1; 5284 n2 = fd->loop.n2; 5285 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5286 { 5287 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5288 OMP_CLAUSE__LOOPTEMP_); 5289 gcc_assert (innerc); 5290 n1 = OMP_CLAUSE_DECL (innerc); 5291 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5292 OMP_CLAUSE__LOOPTEMP_); 5293 gcc_assert (innerc); 5294 n2 = OMP_CLAUSE_DECL (innerc); 5295 } 5296 tree step = fd->loop.step; 5297 5298 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5299 OMP_CLAUSE__SIMT_); 5300 if (is_simt) 5301 { 5302 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 5303 is_simt = safelen_int > 1; 5304 } 5305 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 5306 if (is_simt) 5307 { 5308 simt_lane = create_tmp_var (unsigned_type_node); 5309 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 5310 gimple_call_set_lhs (g, simt_lane); 5311 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 5312 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 5313 fold_convert (TREE_TYPE (step), simt_lane)); 5314 n1 = fold_convert (type, n1); 5315 if (POINTER_TYPE_P (type)) 5316 n1 = fold_build_pointer_plus (n1, offset); 5317 else 5318 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 5319 5320 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 5321 if (fd->collapse > 1) 5322 simt_maxlane = build_one_cst (unsigned_type_node); 5323 else if (safelen_int < omp_max_simt_vf ()) 5324 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 5325 tree vf 5326 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 5327 unsigned_type_node, 0); 5328 if (simt_maxlane) 5329 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 5330 vf = fold_convert (TREE_TYPE (step), vf); 5331 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 5332 } 5333 5334 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 5335 if (fd->collapse > 1) 5336 { 5337 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 5338 { 5339 gsi_prev (&gsi); 5340 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 5341 gsi_next (&gsi); 5342 } 5343 else 5344 for (i = 0; i < fd->collapse; i++) 5345 { 5346 tree itype = TREE_TYPE (fd->loops[i].v); 5347 if (POINTER_TYPE_P (itype)) 5348 itype = signed_type_for (itype); 5349 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 5350 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5351 } 5352 } 5353 if (cond_var) 5354 { 5355 if (POINTER_TYPE_P (type) 5356 || TREE_CODE (n1) != INTEGER_CST 5357 || fd->loop.cond_code != LT_EXPR 5358 || tree_int_cst_sgn (n1) != 1) 5359 expand_omp_build_assign (&gsi, cond_var, 5360 build_one_cst (TREE_TYPE (cond_var))); 5361 else 5362 expand_omp_build_assign (&gsi, cond_var, 5363 fold_convert (TREE_TYPE (cond_var), n1)); 5364 } 5365 5366 /* Remove the GIMPLE_OMP_FOR statement. */ 5367 gsi_remove (&gsi, true); 5368 5369 if (!broken_loop) 5370 { 5371 /* Code to control the increment goes in the CONT_BB. */ 5372 gsi = gsi_last_nondebug_bb (cont_bb); 5373 stmt = gsi_stmt (gsi); 5374 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 5375 5376 if (POINTER_TYPE_P (type)) 5377 t = fold_build_pointer_plus (fd->loop.v, step); 5378 else 5379 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 5380 expand_omp_build_assign (&gsi, fd->loop.v, t); 5381 5382 if (fd->collapse > 1) 5383 { 5384 i = fd->collapse - 1; 5385 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 5386 { 5387 t = fold_convert (sizetype, fd->loops[i].step); 5388 t = fold_build_pointer_plus (fd->loops[i].v, t); 5389 } 5390 else 5391 { 5392 t = fold_convert (TREE_TYPE (fd->loops[i].v), 5393 fd->loops[i].step); 5394 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 5395 fd->loops[i].v, t); 5396 } 5397 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5398 5399 for (i = fd->collapse - 1; i > 0; i--) 5400 { 5401 tree itype = TREE_TYPE (fd->loops[i].v); 5402 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 5403 if (POINTER_TYPE_P (itype2)) 5404 itype2 = signed_type_for (itype2); 5405 t = fold_convert (itype2, fd->loops[i - 1].step); 5406 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 5407 GSI_SAME_STMT); 5408 t = build3 (COND_EXPR, itype2, 5409 build2 (fd->loops[i].cond_code, boolean_type_node, 5410 fd->loops[i].v, 5411 fold_convert (itype, fd->loops[i].n2)), 5412 build_int_cst (itype2, 0), t); 5413 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 5414 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 5415 else 5416 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 5417 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 5418 5419 t = fold_convert (itype, fd->loops[i].n1); 5420 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 5421 GSI_SAME_STMT); 5422 t = build3 (COND_EXPR, itype, 5423 build2 (fd->loops[i].cond_code, boolean_type_node, 5424 fd->loops[i].v, 5425 fold_convert (itype, fd->loops[i].n2)), 5426 fd->loops[i].v, t); 5427 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 5428 } 5429 } 5430 if (cond_var) 5431 { 5432 if (POINTER_TYPE_P (type) 5433 || TREE_CODE (n1) != INTEGER_CST 5434 || fd->loop.cond_code != LT_EXPR 5435 || tree_int_cst_sgn (n1) != 1) 5436 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 5437 build_one_cst (TREE_TYPE (cond_var))); 5438 else 5439 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var, 5440 fold_convert (TREE_TYPE (cond_var), step)); 5441 expand_omp_build_assign (&gsi, cond_var, t); 5442 } 5443 5444 /* Remove GIMPLE_OMP_CONTINUE. */ 5445 gsi_remove (&gsi, true); 5446 } 5447 5448 /* Emit the condition in L1_BB. */ 5449 gsi = gsi_start_bb (l1_bb); 5450 5451 t = fold_convert (type, n2); 5452 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5453 false, GSI_CONTINUE_LINKING); 5454 tree v = fd->loop.v; 5455 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 5456 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 5457 false, GSI_CONTINUE_LINKING); 5458 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 5459 cond_stmt = gimple_build_cond_empty (t); 5460 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 5461 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 5462 NULL, NULL) 5463 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 5464 NULL, NULL)) 5465 { 5466 gsi = gsi_for_stmt (cond_stmt); 5467 gimple_regimplify_operands (cond_stmt, &gsi); 5468 } 5469 5470 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 5471 if (is_simt) 5472 { 5473 gsi = gsi_start_bb (l2_bb); 5474 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 5475 if (POINTER_TYPE_P (type)) 5476 t = fold_build_pointer_plus (fd->loop.v, step); 5477 else 5478 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 5479 expand_omp_build_assign (&gsi, fd->loop.v, t); 5480 } 5481 5482 /* Remove GIMPLE_OMP_RETURN. */ 5483 gsi = gsi_last_nondebug_bb (exit_bb); 5484 gsi_remove (&gsi, true); 5485 5486 /* Connect the new blocks. */ 5487 remove_edge (FALLTHRU_EDGE (entry_bb)); 5488 5489 if (!broken_loop) 5490 { 5491 remove_edge (BRANCH_EDGE (entry_bb)); 5492 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 5493 5494 e = BRANCH_EDGE (l1_bb); 5495 ne = FALLTHRU_EDGE (l1_bb); 5496 e->flags = EDGE_TRUE_VALUE; 5497 } 5498 else 5499 { 5500 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 5501 5502 ne = single_succ_edge (l1_bb); 5503 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 5504 5505 } 5506 ne->flags = EDGE_FALSE_VALUE; 5507 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 5508 ne->probability = e->probability.invert (); 5509 5510 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 5511 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 5512 5513 if (simt_maxlane) 5514 { 5515 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 5516 NULL_TREE, NULL_TREE); 5517 gsi = gsi_last_bb (entry_bb); 5518 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 5519 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 5520 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 5521 FALLTHRU_EDGE (entry_bb)->probability 5522 = profile_probability::guessed_always ().apply_scale (7, 8); 5523 BRANCH_EDGE (entry_bb)->probability 5524 = FALLTHRU_EDGE (entry_bb)->probability.invert (); 5525 l2_dom_bb = entry_bb; 5526 } 5527 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 5528 5529 if (!broken_loop) 5530 { 5531 class loop *loop = alloc_loop (); 5532 loop->header = l1_bb; 5533 loop->latch = cont_bb; 5534 add_loop (loop, l1_bb->loop_father); 5535 loop->safelen = safelen_int; 5536 if (simduid) 5537 { 5538 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 5539 cfun->has_simduid_loops = true; 5540 } 5541 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 5542 the loop. */ 5543 if ((flag_tree_loop_vectorize 5544 || !global_options_set.x_flag_tree_loop_vectorize) 5545 && flag_tree_loop_optimize 5546 && loop->safelen > 1) 5547 { 5548 loop->force_vectorize = true; 5549 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))) 5550 { 5551 unsigned HOST_WIDE_INT v 5552 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)); 5553 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen) 5554 loop->simdlen = v; 5555 } 5556 cfun->has_force_vectorize_loops = true; 5557 } 5558 else if (dont_vectorize) 5559 loop->dont_vectorize = true; 5560 } 5561 else if (simduid) 5562 cfun->has_simduid_loops = true; 5563} 5564 5565/* Taskloop construct is represented after gimplification with 5566 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5567 in between them. This routine expands the outer GIMPLE_OMP_FOR, 5568 which should just compute all the needed loop temporaries 5569 for GIMPLE_OMP_TASK. */ 5570 5571static void 5572expand_omp_taskloop_for_outer (struct omp_region *region, 5573 struct omp_for_data *fd, 5574 gimple *inner_stmt) 5575{ 5576 tree type, bias = NULL_TREE; 5577 basic_block entry_bb, cont_bb, exit_bb; 5578 gimple_stmt_iterator gsi; 5579 gassign *assign_stmt; 5580 tree *counts = NULL; 5581 int i; 5582 5583 gcc_assert (inner_stmt); 5584 gcc_assert (region->cont); 5585 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 5586 && gimple_omp_task_taskloop_p (inner_stmt)); 5587 type = TREE_TYPE (fd->loop.v); 5588 5589 /* See if we need to bias by LLONG_MIN. */ 5590 if (fd->iter_type == long_long_unsigned_type_node 5591 && TREE_CODE (type) == INTEGER_TYPE 5592 && !TYPE_UNSIGNED (type)) 5593 { 5594 tree n1, n2; 5595 5596 if (fd->loop.cond_code == LT_EXPR) 5597 { 5598 n1 = fd->loop.n1; 5599 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5600 } 5601 else 5602 { 5603 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5604 n2 = fd->loop.n1; 5605 } 5606 if (TREE_CODE (n1) != INTEGER_CST 5607 || TREE_CODE (n2) != INTEGER_CST 5608 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5609 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5610 } 5611 5612 entry_bb = region->entry; 5613 cont_bb = region->cont; 5614 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5615 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 5616 exit_bb = region->exit; 5617 5618 gsi = gsi_last_nondebug_bb (entry_bb); 5619 gimple *for_stmt = gsi_stmt (gsi); 5620 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 5621 if (fd->collapse > 1) 5622 { 5623 int first_zero_iter = -1, dummy = -1; 5624 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 5625 5626 counts = XALLOCAVEC (tree, fd->collapse); 5627 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5628 zero_iter_bb, first_zero_iter, 5629 dummy_bb, dummy, l2_dom_bb); 5630 5631 if (zero_iter_bb) 5632 { 5633 /* Some counts[i] vars might be uninitialized if 5634 some loop has zero iterations. But the body shouldn't 5635 be executed in that case, so just avoid uninit warnings. */ 5636 for (i = first_zero_iter; i < fd->collapse; i++) 5637 if (SSA_VAR_P (counts[i])) 5638 TREE_NO_WARNING (counts[i]) = 1; 5639 gsi_prev (&gsi); 5640 edge e = split_block (entry_bb, gsi_stmt (gsi)); 5641 entry_bb = e->dest; 5642 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 5643 gsi = gsi_last_bb (entry_bb); 5644 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 5645 get_immediate_dominator (CDI_DOMINATORS, 5646 zero_iter_bb)); 5647 } 5648 } 5649 5650 tree t0, t1; 5651 t1 = fd->loop.n2; 5652 t0 = fd->loop.n1; 5653 if (POINTER_TYPE_P (TREE_TYPE (t0)) 5654 && TYPE_PRECISION (TREE_TYPE (t0)) 5655 != TYPE_PRECISION (fd->iter_type)) 5656 { 5657 /* Avoid casting pointers to integer of a different size. */ 5658 tree itype = signed_type_for (type); 5659 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 5660 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 5661 } 5662 else 5663 { 5664 t1 = fold_convert (fd->iter_type, t1); 5665 t0 = fold_convert (fd->iter_type, t0); 5666 } 5667 if (bias) 5668 { 5669 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 5670 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 5671 } 5672 5673 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 5674 OMP_CLAUSE__LOOPTEMP_); 5675 gcc_assert (innerc); 5676 tree startvar = OMP_CLAUSE_DECL (innerc); 5677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5678 gcc_assert (innerc); 5679 tree endvar = OMP_CLAUSE_DECL (innerc); 5680 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 5681 { 5682 gcc_assert (innerc); 5683 for (i = 1; i < fd->collapse; i++) 5684 { 5685 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5686 OMP_CLAUSE__LOOPTEMP_); 5687 gcc_assert (innerc); 5688 } 5689 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5690 OMP_CLAUSE__LOOPTEMP_); 5691 if (innerc) 5692 { 5693 /* If needed (inner taskloop has lastprivate clause), propagate 5694 down the total number of iterations. */ 5695 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 5696 NULL_TREE, false, 5697 GSI_CONTINUE_LINKING); 5698 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 5699 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5700 } 5701 } 5702 5703 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 5704 GSI_CONTINUE_LINKING); 5705 assign_stmt = gimple_build_assign (startvar, t0); 5706 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5707 5708 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 5709 GSI_CONTINUE_LINKING); 5710 assign_stmt = gimple_build_assign (endvar, t1); 5711 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5712 if (fd->collapse > 1) 5713 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5714 5715 /* Remove the GIMPLE_OMP_FOR statement. */ 5716 gsi = gsi_for_stmt (for_stmt); 5717 gsi_remove (&gsi, true); 5718 5719 gsi = gsi_last_nondebug_bb (cont_bb); 5720 gsi_remove (&gsi, true); 5721 5722 gsi = gsi_last_nondebug_bb (exit_bb); 5723 gsi_remove (&gsi, true); 5724 5725 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5726 remove_edge (BRANCH_EDGE (entry_bb)); 5727 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always (); 5728 remove_edge (BRANCH_EDGE (cont_bb)); 5729 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 5730 set_immediate_dominator (CDI_DOMINATORS, region->entry, 5731 recompute_dominator (CDI_DOMINATORS, region->entry)); 5732} 5733 5734/* Taskloop construct is represented after gimplification with 5735 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 5736 in between them. This routine expands the inner GIMPLE_OMP_FOR. 5737 GOMP_taskloop{,_ull} function arranges for each task to be given just 5738 a single range of iterations. */ 5739 5740static void 5741expand_omp_taskloop_for_inner (struct omp_region *region, 5742 struct omp_for_data *fd, 5743 gimple *inner_stmt) 5744{ 5745 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 5746 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 5747 basic_block fin_bb; 5748 gimple_stmt_iterator gsi; 5749 edge ep; 5750 bool broken_loop = region->cont == NULL; 5751 tree *counts = NULL; 5752 tree n1, n2, step; 5753 5754 itype = type = TREE_TYPE (fd->loop.v); 5755 if (POINTER_TYPE_P (type)) 5756 itype = signed_type_for (type); 5757 5758 /* See if we need to bias by LLONG_MIN. */ 5759 if (fd->iter_type == long_long_unsigned_type_node 5760 && TREE_CODE (type) == INTEGER_TYPE 5761 && !TYPE_UNSIGNED (type)) 5762 { 5763 tree n1, n2; 5764 5765 if (fd->loop.cond_code == LT_EXPR) 5766 { 5767 n1 = fd->loop.n1; 5768 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 5769 } 5770 else 5771 { 5772 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 5773 n2 = fd->loop.n1; 5774 } 5775 if (TREE_CODE (n1) != INTEGER_CST 5776 || TREE_CODE (n2) != INTEGER_CST 5777 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 5778 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 5779 } 5780 5781 entry_bb = region->entry; 5782 cont_bb = region->cont; 5783 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 5784 fin_bb = BRANCH_EDGE (entry_bb)->dest; 5785 gcc_assert (broken_loop 5786 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 5787 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5788 if (!broken_loop) 5789 { 5790 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 5791 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 5792 } 5793 exit_bb = region->exit; 5794 5795 /* Iteration space partitioning goes in ENTRY_BB. */ 5796 gsi = gsi_last_nondebug_bb (entry_bb); 5797 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 5798 5799 if (fd->collapse > 1) 5800 { 5801 int first_zero_iter = -1, dummy = -1; 5802 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 5803 5804 counts = XALLOCAVEC (tree, fd->collapse); 5805 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 5806 fin_bb, first_zero_iter, 5807 dummy_bb, dummy, l2_dom_bb); 5808 t = NULL_TREE; 5809 } 5810 else 5811 t = integer_one_node; 5812 5813 step = fd->loop.step; 5814 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 5815 OMP_CLAUSE__LOOPTEMP_); 5816 gcc_assert (innerc); 5817 n1 = OMP_CLAUSE_DECL (innerc); 5818 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 5819 gcc_assert (innerc); 5820 n2 = OMP_CLAUSE_DECL (innerc); 5821 if (bias) 5822 { 5823 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 5824 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 5825 } 5826 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 5827 true, NULL_TREE, true, GSI_SAME_STMT); 5828 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 5829 true, NULL_TREE, true, GSI_SAME_STMT); 5830 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 5831 true, NULL_TREE, true, GSI_SAME_STMT); 5832 5833 tree startvar = fd->loop.v; 5834 tree endvar = NULL_TREE; 5835 5836 if (gimple_omp_for_combined_p (fd->for_stmt)) 5837 { 5838 tree clauses = gimple_omp_for_clauses (inner_stmt); 5839 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 5840 gcc_assert (innerc); 5841 startvar = OMP_CLAUSE_DECL (innerc); 5842 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 5843 OMP_CLAUSE__LOOPTEMP_); 5844 gcc_assert (innerc); 5845 endvar = OMP_CLAUSE_DECL (innerc); 5846 } 5847 t = fold_convert (TREE_TYPE (startvar), n1); 5848 t = force_gimple_operand_gsi (&gsi, t, 5849 DECL_P (startvar) 5850 && TREE_ADDRESSABLE (startvar), 5851 NULL_TREE, false, GSI_CONTINUE_LINKING); 5852 gimple *assign_stmt = gimple_build_assign (startvar, t); 5853 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5854 5855 t = fold_convert (TREE_TYPE (startvar), n2); 5856 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 5857 false, GSI_CONTINUE_LINKING); 5858 if (endvar) 5859 { 5860 assign_stmt = gimple_build_assign (endvar, e); 5861 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5862 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 5863 assign_stmt = gimple_build_assign (fd->loop.v, e); 5864 else 5865 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 5866 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 5867 } 5868 if (fd->collapse > 1) 5869 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 5870 5871 if (!broken_loop) 5872 { 5873 /* The code controlling the sequential loop replaces the 5874 GIMPLE_OMP_CONTINUE. */ 5875 gsi = gsi_last_nondebug_bb (cont_bb); 5876 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5877 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5878 vmain = gimple_omp_continue_control_use (cont_stmt); 5879 vback = gimple_omp_continue_control_def (cont_stmt); 5880 5881 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5882 { 5883 if (POINTER_TYPE_P (type)) 5884 t = fold_build_pointer_plus (vmain, step); 5885 else 5886 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5887 t = force_gimple_operand_gsi (&gsi, t, 5888 DECL_P (vback) 5889 && TREE_ADDRESSABLE (vback), 5890 NULL_TREE, true, GSI_SAME_STMT); 5891 assign_stmt = gimple_build_assign (vback, t); 5892 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5893 5894 t = build2 (fd->loop.cond_code, boolean_type_node, 5895 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5896 ? t : vback, e); 5897 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5898 } 5899 5900 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5901 gsi_remove (&gsi, true); 5902 5903 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5904 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5905 } 5906 5907 /* Remove the GIMPLE_OMP_FOR statement. */ 5908 gsi = gsi_for_stmt (fd->for_stmt); 5909 gsi_remove (&gsi, true); 5910 5911 /* Remove the GIMPLE_OMP_RETURN statement. */ 5912 gsi = gsi_last_nondebug_bb (exit_bb); 5913 gsi_remove (&gsi, true); 5914 5915 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5916 if (!broken_loop) 5917 remove_edge (BRANCH_EDGE (entry_bb)); 5918 else 5919 { 5920 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5921 region->outer->cont = NULL; 5922 } 5923 5924 /* Connect all the blocks. */ 5925 if (!broken_loop) 5926 { 5927 ep = find_edge (cont_bb, body_bb); 5928 if (gimple_omp_for_combined_p (fd->for_stmt)) 5929 { 5930 remove_edge (ep); 5931 ep = NULL; 5932 } 5933 else if (fd->collapse > 1) 5934 { 5935 remove_edge (ep); 5936 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5937 } 5938 else 5939 ep->flags = EDGE_TRUE_VALUE; 5940 find_edge (cont_bb, fin_bb)->flags 5941 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5942 } 5943 5944 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5945 recompute_dominator (CDI_DOMINATORS, body_bb)); 5946 if (!broken_loop) 5947 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5948 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5949 5950 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5951 { 5952 class loop *loop = alloc_loop (); 5953 loop->header = body_bb; 5954 if (collapse_bb == NULL) 5955 loop->latch = cont_bb; 5956 add_loop (loop, body_bb->loop_father); 5957 } 5958} 5959 5960/* A subroutine of expand_omp_for. Generate code for an OpenACC 5961 partitioned loop. The lowering here is abstracted, in that the 5962 loop parameters are passed through internal functions, which are 5963 further lowered by oacc_device_lower, once we get to the target 5964 compiler. The loop is of the form: 5965 5966 for (V = B; V LTGT E; V += S) {BODY} 5967 5968 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5969 (constant 0 for no chunking) and we will have a GWV partitioning 5970 mask, specifying dimensions over which the loop is to be 5971 partitioned (see note below). We generate code that looks like 5972 (this ignores tiling): 5973 5974 <entry_bb> [incoming FALL->body, BRANCH->exit] 5975 typedef signedintify (typeof (V)) T; // underlying signed integral type 5976 T range = E - B; 5977 T chunk_no = 0; 5978 T DIR = LTGT == '<' ? +1 : -1; 5979 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5980 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5981 5982 <head_bb> [created by splitting end of entry_bb] 5983 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5984 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5985 if (!(offset LTGT bound)) goto bottom_bb; 5986 5987 <body_bb> [incoming] 5988 V = B + offset; 5989 {BODY} 5990 5991 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5992 offset += step; 5993 if (offset LTGT bound) goto body_bb; [*] 5994 5995 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5996 chunk_no++; 5997 if (chunk < chunk_max) goto head_bb; 5998 5999 <exit_bb> [incoming] 6000 V = B + ((range -/+ 1) / S +/- 1) * S [*] 6001 6002 [*] Needed if V live at end of loop. */ 6003 6004static void 6005expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 6006{ 6007 bool is_oacc_kernels_parallelized 6008 = (lookup_attribute ("oacc kernels parallelized", 6009 DECL_ATTRIBUTES (current_function_decl)) != NULL); 6010 { 6011 bool is_oacc_kernels 6012 = (lookup_attribute ("oacc kernels", 6013 DECL_ATTRIBUTES (current_function_decl)) != NULL); 6014 if (is_oacc_kernels_parallelized) 6015 gcc_checking_assert (is_oacc_kernels); 6016 } 6017 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized); 6018 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are 6019 for SSA specifics, and some are for 'parloops' OpenACC 6020 'kernels'-parallelized specifics. */ 6021 6022 tree v = fd->loop.v; 6023 enum tree_code cond_code = fd->loop.cond_code; 6024 enum tree_code plus_code = PLUS_EXPR; 6025 6026 tree chunk_size = integer_minus_one_node; 6027 tree gwv = integer_zero_node; 6028 tree iter_type = TREE_TYPE (v); 6029 tree diff_type = iter_type; 6030 tree plus_type = iter_type; 6031 struct oacc_collapse *counts = NULL; 6032 6033 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 6034 == GF_OMP_FOR_KIND_OACC_LOOP); 6035 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 6036 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 6037 6038 if (POINTER_TYPE_P (iter_type)) 6039 { 6040 plus_code = POINTER_PLUS_EXPR; 6041 plus_type = sizetype; 6042 } 6043 for (int ix = fd->collapse; ix--;) 6044 { 6045 tree diff_type2 = TREE_TYPE (fd->loops[ix].step); 6046 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2)) 6047 diff_type = diff_type2; 6048 } 6049 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 6050 diff_type = signed_type_for (diff_type); 6051 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 6052 diff_type = integer_type_node; 6053 6054 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 6055 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 6056 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 6057 basic_block bottom_bb = NULL; 6058 6059 /* entry_bb has two successors; the branch edge is to the exit 6060 block, fallthrough edge to body. */ 6061 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 6062 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 6063 6064 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 6065 body_bb, or to a block whose only successor is the body_bb. Its 6066 fallthrough successor is the final block (same as the branch 6067 successor of the entry_bb). */ 6068 if (cont_bb) 6069 { 6070 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 6071 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 6072 6073 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 6074 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 6075 } 6076 else 6077 gcc_assert (!gimple_in_ssa_p (cfun)); 6078 6079 /* The exit block only has entry_bb and cont_bb as predecessors. */ 6080 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 6081 6082 tree chunk_no; 6083 tree chunk_max = NULL_TREE; 6084 tree bound, offset; 6085 tree step = create_tmp_var (diff_type, ".step"); 6086 bool up = cond_code == LT_EXPR; 6087 tree dir = build_int_cst (diff_type, up ? +1 : -1); 6088 bool chunking = !gimple_in_ssa_p (cfun); 6089 bool negating; 6090 6091 /* Tiling vars. */ 6092 tree tile_size = NULL_TREE; 6093 tree element_s = NULL_TREE; 6094 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 6095 basic_block elem_body_bb = NULL; 6096 basic_block elem_cont_bb = NULL; 6097 6098 /* SSA instances. */ 6099 tree offset_incr = NULL_TREE; 6100 tree offset_init = NULL_TREE; 6101 6102 gimple_stmt_iterator gsi; 6103 gassign *ass; 6104 gcall *call; 6105 gimple *stmt; 6106 tree expr; 6107 location_t loc; 6108 edge split, be, fte; 6109 6110 /* Split the end of entry_bb to create head_bb. */ 6111 split = split_block (entry_bb, last_stmt (entry_bb)); 6112 basic_block head_bb = split->dest; 6113 entry_bb = split->src; 6114 6115 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 6116 gsi = gsi_last_nondebug_bb (entry_bb); 6117 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 6118 loc = gimple_location (for_stmt); 6119 6120 if (gimple_in_ssa_p (cfun)) 6121 { 6122 offset_init = gimple_omp_for_index (for_stmt, 0); 6123 gcc_assert (integer_zerop (fd->loop.n1)); 6124 /* The SSA parallelizer does gang parallelism. */ 6125 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 6126 } 6127 6128 if (fd->collapse > 1 || fd->tiling) 6129 { 6130 gcc_assert (!gimple_in_ssa_p (cfun) && up); 6131 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 6132 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type, 6133 TREE_TYPE (fd->loop.n2), loc); 6134 6135 if (SSA_VAR_P (fd->loop.n2)) 6136 { 6137 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 6138 true, GSI_SAME_STMT); 6139 ass = gimple_build_assign (fd->loop.n2, total); 6140 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6141 } 6142 } 6143 6144 tree b = fd->loop.n1; 6145 tree e = fd->loop.n2; 6146 tree s = fd->loop.step; 6147 6148 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 6149 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 6150 6151 /* Convert the step, avoiding possible unsigned->signed overflow. */ 6152 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 6153 if (negating) 6154 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 6155 s = fold_convert (diff_type, s); 6156 if (negating) 6157 s = fold_build1 (NEGATE_EXPR, diff_type, s); 6158 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 6159 6160 if (!chunking) 6161 chunk_size = integer_zero_node; 6162 expr = fold_convert (diff_type, chunk_size); 6163 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 6164 NULL_TREE, true, GSI_SAME_STMT); 6165 6166 if (fd->tiling) 6167 { 6168 /* Determine the tile size and element step, 6169 modify the outer loop step size. */ 6170 tile_size = create_tmp_var (diff_type, ".tile_size"); 6171 expr = build_int_cst (diff_type, 1); 6172 for (int ix = 0; ix < fd->collapse; ix++) 6173 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 6174 expr = force_gimple_operand_gsi (&gsi, expr, true, 6175 NULL_TREE, true, GSI_SAME_STMT); 6176 ass = gimple_build_assign (tile_size, expr); 6177 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6178 6179 element_s = create_tmp_var (diff_type, ".element_s"); 6180 ass = gimple_build_assign (element_s, s); 6181 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6182 6183 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 6184 s = force_gimple_operand_gsi (&gsi, expr, true, 6185 NULL_TREE, true, GSI_SAME_STMT); 6186 } 6187 6188 /* Determine the range, avoiding possible unsigned->signed overflow. */ 6189 negating = !up && TYPE_UNSIGNED (iter_type); 6190 expr = fold_build2 (MINUS_EXPR, plus_type, 6191 fold_convert (plus_type, negating ? b : e), 6192 fold_convert (plus_type, negating ? e : b)); 6193 expr = fold_convert (diff_type, expr); 6194 if (negating) 6195 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 6196 tree range = force_gimple_operand_gsi (&gsi, expr, true, 6197 NULL_TREE, true, GSI_SAME_STMT); 6198 6199 chunk_no = build_int_cst (diff_type, 0); 6200 if (chunking) 6201 { 6202 gcc_assert (!gimple_in_ssa_p (cfun)); 6203 6204 expr = chunk_no; 6205 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 6206 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 6207 6208 ass = gimple_build_assign (chunk_no, expr); 6209 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6210 6211 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 6212 build_int_cst (integer_type_node, 6213 IFN_GOACC_LOOP_CHUNKS), 6214 dir, range, s, chunk_size, gwv); 6215 gimple_call_set_lhs (call, chunk_max); 6216 gimple_set_location (call, loc); 6217 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6218 } 6219 else 6220 chunk_size = chunk_no; 6221 6222 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 6223 build_int_cst (integer_type_node, 6224 IFN_GOACC_LOOP_STEP), 6225 dir, range, s, chunk_size, gwv); 6226 gimple_call_set_lhs (call, step); 6227 gimple_set_location (call, loc); 6228 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6229 6230 /* Remove the GIMPLE_OMP_FOR. */ 6231 gsi_remove (&gsi, true); 6232 6233 /* Fixup edges from head_bb. */ 6234 be = BRANCH_EDGE (head_bb); 6235 fte = FALLTHRU_EDGE (head_bb); 6236 be->flags |= EDGE_FALSE_VALUE; 6237 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 6238 6239 basic_block body_bb = fte->dest; 6240 6241 if (gimple_in_ssa_p (cfun)) 6242 { 6243 gsi = gsi_last_nondebug_bb (cont_bb); 6244 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 6245 6246 offset = gimple_omp_continue_control_use (cont_stmt); 6247 offset_incr = gimple_omp_continue_control_def (cont_stmt); 6248 } 6249 else 6250 { 6251 offset = create_tmp_var (diff_type, ".offset"); 6252 offset_init = offset_incr = offset; 6253 } 6254 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 6255 6256 /* Loop offset & bound go into head_bb. */ 6257 gsi = gsi_start_bb (head_bb); 6258 6259 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 6260 build_int_cst (integer_type_node, 6261 IFN_GOACC_LOOP_OFFSET), 6262 dir, range, s, 6263 chunk_size, gwv, chunk_no); 6264 gimple_call_set_lhs (call, offset_init); 6265 gimple_set_location (call, loc); 6266 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 6267 6268 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 6269 build_int_cst (integer_type_node, 6270 IFN_GOACC_LOOP_BOUND), 6271 dir, range, s, 6272 chunk_size, gwv, offset_init); 6273 gimple_call_set_lhs (call, bound); 6274 gimple_set_location (call, loc); 6275 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 6276 6277 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 6278 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 6279 GSI_CONTINUE_LINKING); 6280 6281 /* V assignment goes into body_bb. */ 6282 if (!gimple_in_ssa_p (cfun)) 6283 { 6284 gsi = gsi_start_bb (body_bb); 6285 6286 expr = build2 (plus_code, iter_type, b, 6287 fold_convert (plus_type, offset)); 6288 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6289 true, GSI_SAME_STMT); 6290 ass = gimple_build_assign (v, expr); 6291 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6292 6293 if (fd->collapse > 1 || fd->tiling) 6294 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type); 6295 6296 if (fd->tiling) 6297 { 6298 /* Determine the range of the element loop -- usually simply 6299 the tile_size, but could be smaller if the final 6300 iteration of the outer loop is a partial tile. */ 6301 tree e_range = create_tmp_var (diff_type, ".e_range"); 6302 6303 expr = build2 (MIN_EXPR, diff_type, 6304 build2 (MINUS_EXPR, diff_type, bound, offset), 6305 build2 (MULT_EXPR, diff_type, tile_size, 6306 element_s)); 6307 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6308 true, GSI_SAME_STMT); 6309 ass = gimple_build_assign (e_range, expr); 6310 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6311 6312 /* Determine bound, offset & step of inner loop. */ 6313 e_bound = create_tmp_var (diff_type, ".e_bound"); 6314 e_offset = create_tmp_var (diff_type, ".e_offset"); 6315 e_step = create_tmp_var (diff_type, ".e_step"); 6316 6317 /* Mark these as element loops. */ 6318 tree t, e_gwv = integer_minus_one_node; 6319 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 6320 6321 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 6322 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 6323 element_s, chunk, e_gwv, chunk); 6324 gimple_call_set_lhs (call, e_offset); 6325 gimple_set_location (call, loc); 6326 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6327 6328 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 6329 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 6330 element_s, chunk, e_gwv, e_offset); 6331 gimple_call_set_lhs (call, e_bound); 6332 gimple_set_location (call, loc); 6333 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6334 6335 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 6336 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 6337 element_s, chunk, e_gwv); 6338 gimple_call_set_lhs (call, e_step); 6339 gimple_set_location (call, loc); 6340 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 6341 6342 /* Add test and split block. */ 6343 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 6344 stmt = gimple_build_cond_empty (expr); 6345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6346 split = split_block (body_bb, stmt); 6347 elem_body_bb = split->dest; 6348 if (cont_bb == body_bb) 6349 cont_bb = elem_body_bb; 6350 body_bb = split->src; 6351 6352 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 6353 6354 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 6355 if (cont_bb == NULL) 6356 { 6357 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 6358 e->probability = profile_probability::even (); 6359 split->probability = profile_probability::even (); 6360 } 6361 6362 /* Initialize the user's loop vars. */ 6363 gsi = gsi_start_bb (elem_body_bb); 6364 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset, 6365 diff_type); 6366 } 6367 } 6368 6369 /* Loop increment goes into cont_bb. If this is not a loop, we 6370 will have spawned threads as if it was, and each one will 6371 execute one iteration. The specification is not explicit about 6372 whether such constructs are ill-formed or not, and they can 6373 occur, especially when noreturn routines are involved. */ 6374 if (cont_bb) 6375 { 6376 gsi = gsi_last_nondebug_bb (cont_bb); 6377 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 6378 loc = gimple_location (cont_stmt); 6379 6380 if (fd->tiling) 6381 { 6382 /* Insert element loop increment and test. */ 6383 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 6384 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6385 true, GSI_SAME_STMT); 6386 ass = gimple_build_assign (e_offset, expr); 6387 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6388 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 6389 6390 stmt = gimple_build_cond_empty (expr); 6391 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6392 split = split_block (cont_bb, stmt); 6393 elem_cont_bb = split->src; 6394 cont_bb = split->dest; 6395 6396 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6397 split->probability = profile_probability::unlikely ().guessed (); 6398 edge latch_edge 6399 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 6400 latch_edge->probability = profile_probability::likely ().guessed (); 6401 6402 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 6403 skip_edge->probability = profile_probability::unlikely ().guessed (); 6404 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx); 6405 loop_entry_edge->probability 6406 = profile_probability::likely ().guessed (); 6407 6408 gsi = gsi_for_stmt (cont_stmt); 6409 } 6410 6411 /* Increment offset. */ 6412 if (gimple_in_ssa_p (cfun)) 6413 expr = build2 (plus_code, iter_type, offset, 6414 fold_convert (plus_type, step)); 6415 else 6416 expr = build2 (PLUS_EXPR, diff_type, offset, step); 6417 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6418 true, GSI_SAME_STMT); 6419 ass = gimple_build_assign (offset_incr, expr); 6420 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6421 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 6422 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 6423 6424 /* Remove the GIMPLE_OMP_CONTINUE. */ 6425 gsi_remove (&gsi, true); 6426 6427 /* Fixup edges from cont_bb. */ 6428 be = BRANCH_EDGE (cont_bb); 6429 fte = FALLTHRU_EDGE (cont_bb); 6430 be->flags |= EDGE_TRUE_VALUE; 6431 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6432 6433 if (chunking) 6434 { 6435 /* Split the beginning of exit_bb to make bottom_bb. We 6436 need to insert a nop at the start, because splitting is 6437 after a stmt, not before. */ 6438 gsi = gsi_start_bb (exit_bb); 6439 stmt = gimple_build_nop (); 6440 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6441 split = split_block (exit_bb, stmt); 6442 bottom_bb = split->src; 6443 exit_bb = split->dest; 6444 gsi = gsi_last_bb (bottom_bb); 6445 6446 /* Chunk increment and test goes into bottom_bb. */ 6447 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 6448 build_int_cst (diff_type, 1)); 6449 ass = gimple_build_assign (chunk_no, expr); 6450 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 6451 6452 /* Chunk test at end of bottom_bb. */ 6453 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 6454 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 6455 GSI_CONTINUE_LINKING); 6456 6457 /* Fixup edges from bottom_bb. */ 6458 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 6459 split->probability = profile_probability::unlikely ().guessed (); 6460 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 6461 latch_edge->probability = profile_probability::likely ().guessed (); 6462 } 6463 } 6464 6465 gsi = gsi_last_nondebug_bb (exit_bb); 6466 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 6467 loc = gimple_location (gsi_stmt (gsi)); 6468 6469 if (!gimple_in_ssa_p (cfun)) 6470 { 6471 /* Insert the final value of V, in case it is live. This is the 6472 value for the only thread that survives past the join. */ 6473 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 6474 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 6475 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 6476 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 6477 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 6478 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 6479 true, GSI_SAME_STMT); 6480 ass = gimple_build_assign (v, expr); 6481 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 6482 } 6483 6484 /* Remove the OMP_RETURN. */ 6485 gsi_remove (&gsi, true); 6486 6487 if (cont_bb) 6488 { 6489 /* We now have one, two or three nested loops. Update the loop 6490 structures. */ 6491 class loop *parent = entry_bb->loop_father; 6492 class loop *body = body_bb->loop_father; 6493 6494 if (chunking) 6495 { 6496 class loop *chunk_loop = alloc_loop (); 6497 chunk_loop->header = head_bb; 6498 chunk_loop->latch = bottom_bb; 6499 add_loop (chunk_loop, parent); 6500 parent = chunk_loop; 6501 } 6502 else if (parent != body) 6503 { 6504 gcc_assert (body->header == body_bb); 6505 gcc_assert (body->latch == cont_bb 6506 || single_pred (body->latch) == cont_bb); 6507 parent = NULL; 6508 } 6509 6510 if (parent) 6511 { 6512 class loop *body_loop = alloc_loop (); 6513 body_loop->header = body_bb; 6514 body_loop->latch = cont_bb; 6515 add_loop (body_loop, parent); 6516 6517 if (fd->tiling) 6518 { 6519 /* Insert tiling's element loop. */ 6520 class loop *inner_loop = alloc_loop (); 6521 inner_loop->header = elem_body_bb; 6522 inner_loop->latch = elem_cont_bb; 6523 add_loop (inner_loop, body_loop); 6524 } 6525 } 6526 } 6527} 6528 6529/* Expand the OMP loop defined by REGION. */ 6530 6531static void 6532expand_omp_for (struct omp_region *region, gimple *inner_stmt) 6533{ 6534 struct omp_for_data fd; 6535 struct omp_for_data_loop *loops; 6536 6537 loops 6538 = (struct omp_for_data_loop *) 6539 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 6540 * sizeof (struct omp_for_data_loop)); 6541 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 6542 &fd, loops); 6543 region->sched_kind = fd.sched_kind; 6544 region->sched_modifiers = fd.sched_modifiers; 6545 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0; 6546 6547 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 6548 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 6549 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 6550 if (region->cont) 6551 { 6552 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 6553 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 6554 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 6555 } 6556 else 6557 /* If there isn't a continue then this is a degerate case where 6558 the introduction of abnormal edges during lowering will prevent 6559 original loops from being detected. Fix that up. */ 6560 loops_state_set (LOOPS_NEED_FIXUP); 6561 6562 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD) 6563 expand_omp_simd (region, &fd); 6564 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 6565 { 6566 gcc_assert (!inner_stmt); 6567 expand_oacc_for (region, &fd); 6568 } 6569 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 6570 { 6571 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 6572 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 6573 else 6574 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 6575 } 6576 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 6577 && !fd.have_ordered) 6578 { 6579 if (fd.chunk_size == NULL) 6580 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 6581 else 6582 expand_omp_for_static_chunk (region, &fd, inner_stmt); 6583 } 6584 else 6585 { 6586 int fn_index, start_ix, next_ix; 6587 unsigned HOST_WIDE_INT sched = 0; 6588 tree sched_arg = NULL_TREE; 6589 6590 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 6591 == GF_OMP_FOR_KIND_FOR); 6592 if (fd.chunk_size == NULL 6593 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 6594 fd.chunk_size = integer_zero_node; 6595 switch (fd.sched_kind) 6596 { 6597 case OMP_CLAUSE_SCHEDULE_RUNTIME: 6598 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0 6599 && fd.lastprivate_conditional == 0) 6600 { 6601 gcc_assert (!fd.have_ordered); 6602 fn_index = 6; 6603 sched = 4; 6604 } 6605 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6606 && !fd.have_ordered 6607 && fd.lastprivate_conditional == 0) 6608 fn_index = 7; 6609 else 6610 { 6611 fn_index = 3; 6612 sched = (HOST_WIDE_INT_1U << 31); 6613 } 6614 break; 6615 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 6616 case OMP_CLAUSE_SCHEDULE_GUIDED: 6617 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 6618 && !fd.have_ordered 6619 && fd.lastprivate_conditional == 0) 6620 { 6621 fn_index = 3 + fd.sched_kind; 6622 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6623 break; 6624 } 6625 fn_index = fd.sched_kind; 6626 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2; 6627 sched += (HOST_WIDE_INT_1U << 31); 6628 break; 6629 case OMP_CLAUSE_SCHEDULE_STATIC: 6630 gcc_assert (fd.have_ordered); 6631 fn_index = 0; 6632 sched = (HOST_WIDE_INT_1U << 31) + 1; 6633 break; 6634 default: 6635 gcc_unreachable (); 6636 } 6637 if (!fd.ordered) 6638 fn_index += fd.have_ordered * 8; 6639 if (fd.ordered) 6640 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 6641 else 6642 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 6643 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 6644 if (fd.have_reductemp || fd.have_pointer_condtemp) 6645 { 6646 if (fd.ordered) 6647 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START; 6648 else if (fd.have_ordered) 6649 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START; 6650 else 6651 start_ix = (int)BUILT_IN_GOMP_LOOP_START; 6652 sched_arg = build_int_cstu (long_integer_type_node, sched); 6653 if (!fd.chunk_size) 6654 fd.chunk_size = integer_zero_node; 6655 } 6656 if (fd.iter_type == long_long_unsigned_type_node) 6657 { 6658 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 6659 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 6660 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 6661 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 6662 } 6663 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 6664 (enum built_in_function) next_ix, sched_arg, 6665 inner_stmt); 6666 } 6667 6668 if (gimple_in_ssa_p (cfun)) 6669 update_ssa (TODO_update_ssa_only_virtuals); 6670} 6671 6672/* Expand code for an OpenMP sections directive. In pseudo code, we generate 6673 6674 v = GOMP_sections_start (n); 6675 L0: 6676 switch (v) 6677 { 6678 case 0: 6679 goto L2; 6680 case 1: 6681 section 1; 6682 goto L1; 6683 case 2: 6684 ... 6685 case n: 6686 ... 6687 default: 6688 abort (); 6689 } 6690 L1: 6691 v = GOMP_sections_next (); 6692 goto L0; 6693 L2: 6694 reduction; 6695 6696 If this is a combined parallel sections, replace the call to 6697 GOMP_sections_start with call to GOMP_sections_next. */ 6698 6699static void 6700expand_omp_sections (struct omp_region *region) 6701{ 6702 tree t, u, vin = NULL, vmain, vnext, l2; 6703 unsigned len; 6704 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 6705 gimple_stmt_iterator si, switch_si; 6706 gomp_sections *sections_stmt; 6707 gimple *stmt; 6708 gomp_continue *cont; 6709 edge_iterator ei; 6710 edge e; 6711 struct omp_region *inner; 6712 unsigned i, casei; 6713 bool exit_reachable = region->cont != NULL; 6714 6715 gcc_assert (region->exit != NULL); 6716 entry_bb = region->entry; 6717 l0_bb = single_succ (entry_bb); 6718 l1_bb = region->cont; 6719 l2_bb = region->exit; 6720 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 6721 l2 = gimple_block_label (l2_bb); 6722 else 6723 { 6724 /* This can happen if there are reductions. */ 6725 len = EDGE_COUNT (l0_bb->succs); 6726 gcc_assert (len > 0); 6727 e = EDGE_SUCC (l0_bb, len - 1); 6728 si = gsi_last_nondebug_bb (e->dest); 6729 l2 = NULL_TREE; 6730 if (gsi_end_p (si) 6731 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6732 l2 = gimple_block_label (e->dest); 6733 else 6734 FOR_EACH_EDGE (e, ei, l0_bb->succs) 6735 { 6736 si = gsi_last_nondebug_bb (e->dest); 6737 if (gsi_end_p (si) 6738 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 6739 { 6740 l2 = gimple_block_label (e->dest); 6741 break; 6742 } 6743 } 6744 } 6745 if (exit_reachable) 6746 default_bb = create_empty_bb (l1_bb->prev_bb); 6747 else 6748 default_bb = create_empty_bb (l0_bb); 6749 6750 /* We will build a switch() with enough cases for all the 6751 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 6752 and a default case to abort if something goes wrong. */ 6753 len = EDGE_COUNT (l0_bb->succs); 6754 6755 /* Use vec::quick_push on label_vec throughout, since we know the size 6756 in advance. */ 6757 auto_vec<tree> label_vec (len); 6758 6759 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 6760 GIMPLE_OMP_SECTIONS statement. */ 6761 si = gsi_last_nondebug_bb (entry_bb); 6762 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 6763 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 6764 vin = gimple_omp_sections_control (sections_stmt); 6765 tree clauses = gimple_omp_sections_clauses (sections_stmt); 6766 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); 6767 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_); 6768 tree cond_var = NULL_TREE; 6769 if (reductmp || condtmp) 6770 { 6771 tree reductions = null_pointer_node, mem = null_pointer_node; 6772 tree memv = NULL_TREE, condtemp = NULL_TREE; 6773 gimple_stmt_iterator gsi = gsi_none (); 6774 gimple *g = NULL; 6775 if (reductmp) 6776 { 6777 reductions = OMP_CLAUSE_DECL (reductmp); 6778 gcc_assert (TREE_CODE (reductions) == SSA_NAME); 6779 g = SSA_NAME_DEF_STMT (reductions); 6780 reductions = gimple_assign_rhs1 (g); 6781 OMP_CLAUSE_DECL (reductmp) = reductions; 6782 gsi = gsi_for_stmt (g); 6783 } 6784 else 6785 gsi = si; 6786 if (condtmp) 6787 { 6788 condtemp = OMP_CLAUSE_DECL (condtmp); 6789 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp), 6790 OMP_CLAUSE__CONDTEMP_); 6791 cond_var = OMP_CLAUSE_DECL (c); 6792 tree type = TREE_TYPE (condtemp); 6793 memv = create_tmp_var (type); 6794 TREE_ADDRESSABLE (memv) = 1; 6795 unsigned cnt = 0; 6796 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) 6797 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE 6798 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c)) 6799 ++cnt; 6800 unsigned HOST_WIDE_INT sz 6801 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt; 6802 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz), 6803 false); 6804 mem = build_fold_addr_expr (memv); 6805 } 6806 t = build_int_cst (unsigned_type_node, len - 1); 6807 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START); 6808 stmt = gimple_build_call (u, 3, t, reductions, mem); 6809 gimple_call_set_lhs (stmt, vin); 6810 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 6811 if (condtmp) 6812 { 6813 expand_omp_build_assign (&gsi, condtemp, memv, false); 6814 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 6815 vin, build_one_cst (TREE_TYPE (cond_var))); 6816 expand_omp_build_assign (&gsi, cond_var, t, false); 6817 } 6818 if (reductmp) 6819 { 6820 gsi_remove (&gsi, true); 6821 release_ssa_name (gimple_assign_lhs (g)); 6822 } 6823 } 6824 else if (!is_combined_parallel (region)) 6825 { 6826 /* If we are not inside a combined parallel+sections region, 6827 call GOMP_sections_start. */ 6828 t = build_int_cst (unsigned_type_node, len - 1); 6829 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 6830 stmt = gimple_build_call (u, 1, t); 6831 } 6832 else 6833 { 6834 /* Otherwise, call GOMP_sections_next. */ 6835 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6836 stmt = gimple_build_call (u, 0); 6837 } 6838 if (!reductmp && !condtmp) 6839 { 6840 gimple_call_set_lhs (stmt, vin); 6841 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6842 } 6843 gsi_remove (&si, true); 6844 6845 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 6846 L0_BB. */ 6847 switch_si = gsi_last_nondebug_bb (l0_bb); 6848 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 6849 if (exit_reachable) 6850 { 6851 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 6852 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 6853 vmain = gimple_omp_continue_control_use (cont); 6854 vnext = gimple_omp_continue_control_def (cont); 6855 } 6856 else 6857 { 6858 vmain = vin; 6859 vnext = NULL_TREE; 6860 } 6861 6862 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 6863 label_vec.quick_push (t); 6864 i = 1; 6865 6866 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 6867 for (inner = region->inner, casei = 1; 6868 inner; 6869 inner = inner->next, i++, casei++) 6870 { 6871 basic_block s_entry_bb, s_exit_bb; 6872 6873 /* Skip optional reduction region. */ 6874 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 6875 { 6876 --i; 6877 --casei; 6878 continue; 6879 } 6880 6881 s_entry_bb = inner->entry; 6882 s_exit_bb = inner->exit; 6883 6884 t = gimple_block_label (s_entry_bb); 6885 u = build_int_cst (unsigned_type_node, casei); 6886 u = build_case_label (u, NULL, t); 6887 label_vec.quick_push (u); 6888 6889 si = gsi_last_nondebug_bb (s_entry_bb); 6890 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 6891 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 6892 gsi_remove (&si, true); 6893 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 6894 6895 if (s_exit_bb == NULL) 6896 continue; 6897 6898 si = gsi_last_nondebug_bb (s_exit_bb); 6899 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6900 gsi_remove (&si, true); 6901 6902 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 6903 } 6904 6905 /* Error handling code goes in DEFAULT_BB. */ 6906 t = gimple_block_label (default_bb); 6907 u = build_case_label (NULL, NULL, t); 6908 make_edge (l0_bb, default_bb, 0); 6909 add_bb_to_loop (default_bb, current_loops->tree_root); 6910 6911 stmt = gimple_build_switch (vmain, u, label_vec); 6912 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 6913 gsi_remove (&switch_si, true); 6914 6915 si = gsi_start_bb (default_bb); 6916 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 6917 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 6918 6919 if (exit_reachable) 6920 { 6921 tree bfn_decl; 6922 6923 /* Code to get the next section goes in L1_BB. */ 6924 si = gsi_last_nondebug_bb (l1_bb); 6925 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 6926 6927 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 6928 stmt = gimple_build_call (bfn_decl, 0); 6929 gimple_call_set_lhs (stmt, vnext); 6930 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6931 if (cond_var) 6932 { 6933 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var), 6934 vnext, build_one_cst (TREE_TYPE (cond_var))); 6935 expand_omp_build_assign (&si, cond_var, t, false); 6936 } 6937 gsi_remove (&si, true); 6938 6939 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 6940 } 6941 6942 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 6943 si = gsi_last_nondebug_bb (l2_bb); 6944 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 6945 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 6946 else if (gimple_omp_return_lhs (gsi_stmt (si))) 6947 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 6948 else 6949 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 6950 stmt = gimple_build_call (t, 0); 6951 if (gimple_omp_return_lhs (gsi_stmt (si))) 6952 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 6953 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 6954 gsi_remove (&si, true); 6955 6956 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 6957} 6958 6959/* Expand code for an OpenMP single directive. We've already expanded 6960 much of the code, here we simply place the GOMP_barrier call. */ 6961 6962static void 6963expand_omp_single (struct omp_region *region) 6964{ 6965 basic_block entry_bb, exit_bb; 6966 gimple_stmt_iterator si; 6967 6968 entry_bb = region->entry; 6969 exit_bb = region->exit; 6970 6971 si = gsi_last_nondebug_bb (entry_bb); 6972 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 6973 gsi_remove (&si, true); 6974 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6975 6976 si = gsi_last_nondebug_bb (exit_bb); 6977 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 6978 { 6979 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 6980 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 6981 } 6982 gsi_remove (&si, true); 6983 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6984} 6985 6986/* Generic expansion for OpenMP synchronization directives: master, 6987 ordered and critical. All we need to do here is remove the entry 6988 and exit markers for REGION. */ 6989 6990static void 6991expand_omp_synch (struct omp_region *region) 6992{ 6993 basic_block entry_bb, exit_bb; 6994 gimple_stmt_iterator si; 6995 6996 entry_bb = region->entry; 6997 exit_bb = region->exit; 6998 6999 si = gsi_last_nondebug_bb (entry_bb); 7000 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 7001 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 7002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 7003 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 7004 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 7005 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 7006 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS 7007 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si)))) 7008 { 7009 expand_omp_taskreg (region); 7010 return; 7011 } 7012 gsi_remove (&si, true); 7013 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7014 7015 if (exit_bb) 7016 { 7017 si = gsi_last_nondebug_bb (exit_bb); 7018 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 7019 gsi_remove (&si, true); 7020 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 7021 } 7022} 7023 7024/* Translate enum omp_memory_order to enum memmodel. The two enums 7025 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED 7026 is 0. */ 7027 7028static enum memmodel 7029omp_memory_order_to_memmodel (enum omp_memory_order mo) 7030{ 7031 switch (mo) 7032 { 7033 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED; 7034 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE; 7035 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE; 7036 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL; 7037 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST; 7038 default: gcc_unreachable (); 7039 } 7040} 7041 7042/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7043 operation as a normal volatile load. */ 7044 7045static bool 7046expand_omp_atomic_load (basic_block load_bb, tree addr, 7047 tree loaded_val, int index) 7048{ 7049 enum built_in_function tmpbase; 7050 gimple_stmt_iterator gsi; 7051 basic_block store_bb; 7052 location_t loc; 7053 gimple *stmt; 7054 tree decl, call, type, itype; 7055 7056 gsi = gsi_last_nondebug_bb (load_bb); 7057 stmt = gsi_stmt (gsi); 7058 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 7059 loc = gimple_location (stmt); 7060 7061 /* ??? If the target does not implement atomic_load_optab[mode], and mode 7062 is smaller than word size, then expand_atomic_load assumes that the load 7063 is atomic. We could avoid the builtin entirely in this case. */ 7064 7065 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 7066 decl = builtin_decl_explicit (tmpbase); 7067 if (decl == NULL_TREE) 7068 return false; 7069 7070 type = TREE_TYPE (loaded_val); 7071 itype = TREE_TYPE (TREE_TYPE (decl)); 7072 7073 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 7074 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 7075 call = build_call_expr_loc (loc, decl, 2, addr, mo); 7076 if (!useless_type_conversion_p (type, itype)) 7077 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 7078 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 7079 7080 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7081 gsi_remove (&gsi, true); 7082 7083 store_bb = single_succ (load_bb); 7084 gsi = gsi_last_nondebug_bb (store_bb); 7085 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 7086 gsi_remove (&gsi, true); 7087 7088 if (gimple_in_ssa_p (cfun)) 7089 update_ssa (TODO_update_ssa_no_phi); 7090 7091 return true; 7092} 7093 7094/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7095 operation as a normal volatile store. */ 7096 7097static bool 7098expand_omp_atomic_store (basic_block load_bb, tree addr, 7099 tree loaded_val, tree stored_val, int index) 7100{ 7101 enum built_in_function tmpbase; 7102 gimple_stmt_iterator gsi; 7103 basic_block store_bb = single_succ (load_bb); 7104 location_t loc; 7105 gimple *stmt; 7106 tree decl, call, type, itype; 7107 machine_mode imode; 7108 bool exchange; 7109 7110 gsi = gsi_last_nondebug_bb (load_bb); 7111 stmt = gsi_stmt (gsi); 7112 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 7113 7114 /* If the load value is needed, then this isn't a store but an exchange. */ 7115 exchange = gimple_omp_atomic_need_value_p (stmt); 7116 7117 gsi = gsi_last_nondebug_bb (store_bb); 7118 stmt = gsi_stmt (gsi); 7119 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 7120 loc = gimple_location (stmt); 7121 7122 /* ??? If the target does not implement atomic_store_optab[mode], and mode 7123 is smaller than word size, then expand_atomic_store assumes that the store 7124 is atomic. We could avoid the builtin entirely in this case. */ 7125 7126 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 7127 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 7128 decl = builtin_decl_explicit (tmpbase); 7129 if (decl == NULL_TREE) 7130 return false; 7131 7132 type = TREE_TYPE (stored_val); 7133 7134 /* Dig out the type of the function's second argument. */ 7135 itype = TREE_TYPE (decl); 7136 itype = TYPE_ARG_TYPES (itype); 7137 itype = TREE_CHAIN (itype); 7138 itype = TREE_VALUE (itype); 7139 imode = TYPE_MODE (itype); 7140 7141 if (exchange && !can_atomic_exchange_p (imode, true)) 7142 return false; 7143 7144 if (!useless_type_conversion_p (itype, type)) 7145 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 7146 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt); 7147 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo)); 7148 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo); 7149 if (exchange) 7150 { 7151 if (!useless_type_conversion_p (type, itype)) 7152 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 7153 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 7154 } 7155 7156 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7157 gsi_remove (&gsi, true); 7158 7159 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 7160 gsi = gsi_last_nondebug_bb (load_bb); 7161 gsi_remove (&gsi, true); 7162 7163 if (gimple_in_ssa_p (cfun)) 7164 update_ssa (TODO_update_ssa_no_phi); 7165 7166 return true; 7167} 7168 7169/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 7170 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 7171 size of the data type, and thus usable to find the index of the builtin 7172 decl. Returns false if the expression is not of the proper form. */ 7173 7174static bool 7175expand_omp_atomic_fetch_op (basic_block load_bb, 7176 tree addr, tree loaded_val, 7177 tree stored_val, int index) 7178{ 7179 enum built_in_function oldbase, newbase, tmpbase; 7180 tree decl, itype, call; 7181 tree lhs, rhs; 7182 basic_block store_bb = single_succ (load_bb); 7183 gimple_stmt_iterator gsi; 7184 gimple *stmt; 7185 location_t loc; 7186 enum tree_code code; 7187 bool need_old, need_new; 7188 machine_mode imode; 7189 7190 /* We expect to find the following sequences: 7191 7192 load_bb: 7193 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 7194 7195 store_bb: 7196 val = tmp OP something; (or: something OP tmp) 7197 GIMPLE_OMP_STORE (val) 7198 7199 ???FIXME: Allow a more flexible sequence. 7200 Perhaps use data flow to pick the statements. 7201 7202 */ 7203 7204 gsi = gsi_after_labels (store_bb); 7205 stmt = gsi_stmt (gsi); 7206 if (is_gimple_debug (stmt)) 7207 { 7208 gsi_next_nondebug (&gsi); 7209 if (gsi_end_p (gsi)) 7210 return false; 7211 stmt = gsi_stmt (gsi); 7212 } 7213 loc = gimple_location (stmt); 7214 if (!is_gimple_assign (stmt)) 7215 return false; 7216 gsi_next_nondebug (&gsi); 7217 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 7218 return false; 7219 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 7220 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 7221 enum omp_memory_order omo 7222 = gimple_omp_atomic_memory_order (last_stmt (load_bb)); 7223 enum memmodel mo = omp_memory_order_to_memmodel (omo); 7224 gcc_checking_assert (!need_old || !need_new); 7225 7226 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 7227 return false; 7228 7229 /* Check for one of the supported fetch-op operations. */ 7230 code = gimple_assign_rhs_code (stmt); 7231 switch (code) 7232 { 7233 case PLUS_EXPR: 7234 case POINTER_PLUS_EXPR: 7235 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 7236 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 7237 break; 7238 case MINUS_EXPR: 7239 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 7240 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 7241 break; 7242 case BIT_AND_EXPR: 7243 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 7244 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 7245 break; 7246 case BIT_IOR_EXPR: 7247 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 7248 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 7249 break; 7250 case BIT_XOR_EXPR: 7251 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 7252 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 7253 break; 7254 default: 7255 return false; 7256 } 7257 7258 /* Make sure the expression is of the proper form. */ 7259 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 7260 rhs = gimple_assign_rhs2 (stmt); 7261 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 7262 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 7263 rhs = gimple_assign_rhs1 (stmt); 7264 else 7265 return false; 7266 7267 tmpbase = ((enum built_in_function) 7268 ((need_new ? newbase : oldbase) + index + 1)); 7269 decl = builtin_decl_explicit (tmpbase); 7270 if (decl == NULL_TREE) 7271 return false; 7272 itype = TREE_TYPE (TREE_TYPE (decl)); 7273 imode = TYPE_MODE (itype); 7274 7275 /* We could test all of the various optabs involved, but the fact of the 7276 matter is that (with the exception of i486 vs i586 and xadd) all targets 7277 that support any atomic operaton optab also implements compare-and-swap. 7278 Let optabs.c take care of expanding any compare-and-swap loop. */ 7279 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 7280 return false; 7281 7282 gsi = gsi_last_nondebug_bb (load_bb); 7283 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 7284 7285 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 7286 It only requires that the operation happen atomically. Thus we can 7287 use the RELAXED memory model. */ 7288 call = build_call_expr_loc (loc, decl, 3, addr, 7289 fold_convert_loc (loc, itype, rhs), 7290 build_int_cst (NULL, mo)); 7291 7292 if (need_old || need_new) 7293 { 7294 lhs = need_old ? loaded_val : stored_val; 7295 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 7296 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 7297 } 7298 else 7299 call = fold_convert_loc (loc, void_type_node, call); 7300 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 7301 gsi_remove (&gsi, true); 7302 7303 gsi = gsi_last_nondebug_bb (store_bb); 7304 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 7305 gsi_remove (&gsi, true); 7306 gsi = gsi_last_nondebug_bb (store_bb); 7307 stmt = gsi_stmt (gsi); 7308 gsi_remove (&gsi, true); 7309 7310 if (gimple_in_ssa_p (cfun)) 7311 { 7312 release_defs (stmt); 7313 update_ssa (TODO_update_ssa_no_phi); 7314 } 7315 7316 return true; 7317} 7318 7319/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 7320 7321 oldval = *addr; 7322 repeat: 7323 newval = rhs; // with oldval replacing *addr in rhs 7324 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 7325 if (oldval != newval) 7326 goto repeat; 7327 7328 INDEX is log2 of the size of the data type, and thus usable to find the 7329 index of the builtin decl. */ 7330 7331static bool 7332expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 7333 tree addr, tree loaded_val, tree stored_val, 7334 int index) 7335{ 7336 tree loadedi, storedi, initial, new_storedi, old_vali; 7337 tree type, itype, cmpxchg, iaddr, atype; 7338 gimple_stmt_iterator si; 7339 basic_block loop_header = single_succ (load_bb); 7340 gimple *phi, *stmt; 7341 edge e; 7342 enum built_in_function fncode; 7343 7344 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 7345 order to use the RELAXED memory model effectively. */ 7346 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 7347 + index + 1); 7348 cmpxchg = builtin_decl_explicit (fncode); 7349 if (cmpxchg == NULL_TREE) 7350 return false; 7351 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 7352 atype = type; 7353 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 7354 7355 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 7356 || !can_atomic_load_p (TYPE_MODE (itype))) 7357 return false; 7358 7359 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 7360 si = gsi_last_nondebug_bb (load_bb); 7361 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 7362 7363 /* For floating-point values, we'll need to view-convert them to integers 7364 so that we can perform the atomic compare and swap. Simplify the 7365 following code by always setting up the "i"ntegral variables. */ 7366 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 7367 { 7368 tree iaddr_val; 7369 7370 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 7371 true)); 7372 atype = itype; 7373 iaddr_val 7374 = force_gimple_operand_gsi (&si, 7375 fold_convert (TREE_TYPE (iaddr), addr), 7376 false, NULL_TREE, true, GSI_SAME_STMT); 7377 stmt = gimple_build_assign (iaddr, iaddr_val); 7378 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7379 loadedi = create_tmp_var (itype); 7380 if (gimple_in_ssa_p (cfun)) 7381 loadedi = make_ssa_name (loadedi); 7382 } 7383 else 7384 { 7385 iaddr = addr; 7386 loadedi = loaded_val; 7387 } 7388 7389 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 7390 tree loaddecl = builtin_decl_explicit (fncode); 7391 if (loaddecl) 7392 initial 7393 = fold_convert (atype, 7394 build_call_expr (loaddecl, 2, iaddr, 7395 build_int_cst (NULL_TREE, 7396 MEMMODEL_RELAXED))); 7397 else 7398 { 7399 tree off 7400 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode, 7401 true), 0); 7402 initial = build2 (MEM_REF, atype, iaddr, off); 7403 } 7404 7405 initial 7406 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 7407 GSI_SAME_STMT); 7408 7409 /* Move the value to the LOADEDI temporary. */ 7410 if (gimple_in_ssa_p (cfun)) 7411 { 7412 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 7413 phi = create_phi_node (loadedi, loop_header); 7414 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 7415 initial); 7416 } 7417 else 7418 gsi_insert_before (&si, 7419 gimple_build_assign (loadedi, initial), 7420 GSI_SAME_STMT); 7421 if (loadedi != loaded_val) 7422 { 7423 gimple_stmt_iterator gsi2; 7424 tree x; 7425 7426 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 7427 gsi2 = gsi_start_bb (loop_header); 7428 if (gimple_in_ssa_p (cfun)) 7429 { 7430 gassign *stmt; 7431 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 7432 true, GSI_SAME_STMT); 7433 stmt = gimple_build_assign (loaded_val, x); 7434 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 7435 } 7436 else 7437 { 7438 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 7439 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 7440 true, GSI_SAME_STMT); 7441 } 7442 } 7443 gsi_remove (&si, true); 7444 7445 si = gsi_last_nondebug_bb (store_bb); 7446 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 7447 7448 if (iaddr == addr) 7449 storedi = stored_val; 7450 else 7451 storedi 7452 = force_gimple_operand_gsi (&si, 7453 build1 (VIEW_CONVERT_EXPR, itype, 7454 stored_val), true, NULL_TREE, true, 7455 GSI_SAME_STMT); 7456 7457 /* Build the compare&swap statement. */ 7458 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 7459 new_storedi = force_gimple_operand_gsi (&si, 7460 fold_convert (TREE_TYPE (loadedi), 7461 new_storedi), 7462 true, NULL_TREE, 7463 true, GSI_SAME_STMT); 7464 7465 if (gimple_in_ssa_p (cfun)) 7466 old_vali = loadedi; 7467 else 7468 { 7469 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 7470 stmt = gimple_build_assign (old_vali, loadedi); 7471 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7472 7473 stmt = gimple_build_assign (loadedi, new_storedi); 7474 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7475 } 7476 7477 /* Note that we always perform the comparison as an integer, even for 7478 floating point. This allows the atomic operation to properly 7479 succeed even with NaNs and -0.0. */ 7480 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 7481 stmt = gimple_build_cond_empty (ne); 7482 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7483 7484 /* Update cfg. */ 7485 e = single_succ_edge (store_bb); 7486 e->flags &= ~EDGE_FALLTHRU; 7487 e->flags |= EDGE_FALSE_VALUE; 7488 /* Expect no looping. */ 7489 e->probability = profile_probability::guessed_always (); 7490 7491 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 7492 e->probability = profile_probability::guessed_never (); 7493 7494 /* Copy the new value to loadedi (we already did that before the condition 7495 if we are not in SSA). */ 7496 if (gimple_in_ssa_p (cfun)) 7497 { 7498 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 7499 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 7500 } 7501 7502 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 7503 gsi_remove (&si, true); 7504 7505 class loop *loop = alloc_loop (); 7506 loop->header = loop_header; 7507 loop->latch = store_bb; 7508 add_loop (loop, loop_header->loop_father); 7509 7510 if (gimple_in_ssa_p (cfun)) 7511 update_ssa (TODO_update_ssa_no_phi); 7512 7513 return true; 7514} 7515 7516/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 7517 7518 GOMP_atomic_start (); 7519 *addr = rhs; 7520 GOMP_atomic_end (); 7521 7522 The result is not globally atomic, but works so long as all parallel 7523 references are within #pragma omp atomic directives. According to 7524 responses received from omp@openmp.org, appears to be within spec. 7525 Which makes sense, since that's how several other compilers handle 7526 this situation as well. 7527 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 7528 expanding. STORED_VAL is the operand of the matching 7529 GIMPLE_OMP_ATOMIC_STORE. 7530 7531 We replace 7532 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 7533 loaded_val = *addr; 7534 7535 and replace 7536 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 7537 *addr = stored_val; 7538*/ 7539 7540static bool 7541expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 7542 tree addr, tree loaded_val, tree stored_val) 7543{ 7544 gimple_stmt_iterator si; 7545 gassign *stmt; 7546 tree t; 7547 7548 si = gsi_last_nondebug_bb (load_bb); 7549 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 7550 7551 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 7552 t = build_call_expr (t, 0); 7553 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 7554 7555 tree mem = build_simple_mem_ref (addr); 7556 TREE_TYPE (mem) = TREE_TYPE (loaded_val); 7557 TREE_OPERAND (mem, 1) 7558 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode, 7559 true), 7560 TREE_OPERAND (mem, 1)); 7561 stmt = gimple_build_assign (loaded_val, mem); 7562 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7563 gsi_remove (&si, true); 7564 7565 si = gsi_last_nondebug_bb (store_bb); 7566 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 7567 7568 stmt = gimple_build_assign (unshare_expr (mem), stored_val); 7569 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 7570 7571 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 7572 t = build_call_expr (t, 0); 7573 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 7574 gsi_remove (&si, true); 7575 7576 if (gimple_in_ssa_p (cfun)) 7577 update_ssa (TODO_update_ssa_no_phi); 7578 return true; 7579} 7580 7581/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 7582 using expand_omp_atomic_fetch_op. If it failed, we try to 7583 call expand_omp_atomic_pipeline, and if it fails too, the 7584 ultimate fallback is wrapping the operation in a mutex 7585 (expand_omp_atomic_mutex). REGION is the atomic region built 7586 by build_omp_regions_1(). */ 7587 7588static void 7589expand_omp_atomic (struct omp_region *region) 7590{ 7591 basic_block load_bb = region->entry, store_bb = region->exit; 7592 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 7593 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 7594 tree loaded_val = gimple_omp_atomic_load_lhs (load); 7595 tree addr = gimple_omp_atomic_load_rhs (load); 7596 tree stored_val = gimple_omp_atomic_store_val (store); 7597 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 7598 HOST_WIDE_INT index; 7599 7600 /* Make sure the type is one of the supported sizes. */ 7601 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 7602 index = exact_log2 (index); 7603 if (index >= 0 && index <= 4) 7604 { 7605 unsigned int align = TYPE_ALIGN_UNIT (type); 7606 7607 /* __sync builtins require strict data alignment. */ 7608 if (exact_log2 (align) >= index) 7609 { 7610 /* Atomic load. */ 7611 scalar_mode smode; 7612 if (loaded_val == stored_val 7613 && (is_int_mode (TYPE_MODE (type), &smode) 7614 || is_float_mode (TYPE_MODE (type), &smode)) 7615 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 7616 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 7617 return; 7618 7619 /* Atomic store. */ 7620 if ((is_int_mode (TYPE_MODE (type), &smode) 7621 || is_float_mode (TYPE_MODE (type), &smode)) 7622 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 7623 && store_bb == single_succ (load_bb) 7624 && first_stmt (store_bb) == store 7625 && expand_omp_atomic_store (load_bb, addr, loaded_val, 7626 stored_val, index)) 7627 return; 7628 7629 /* When possible, use specialized atomic update functions. */ 7630 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 7631 && store_bb == single_succ (load_bb) 7632 && expand_omp_atomic_fetch_op (load_bb, addr, 7633 loaded_val, stored_val, index)) 7634 return; 7635 7636 /* If we don't have specialized __sync builtins, try and implement 7637 as a compare and swap loop. */ 7638 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 7639 loaded_val, stored_val, index)) 7640 return; 7641 } 7642 } 7643 7644 /* The ultimate fallback is wrapping the operation in a mutex. */ 7645 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 7646} 7647 7648/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 7649 at REGION_EXIT. */ 7650 7651static void 7652mark_loops_in_oacc_kernels_region (basic_block region_entry, 7653 basic_block region_exit) 7654{ 7655 class loop *outer = region_entry->loop_father; 7656 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 7657 7658 /* Don't parallelize the kernels region if it contains more than one outer 7659 loop. */ 7660 unsigned int nr_outer_loops = 0; 7661 class loop *single_outer = NULL; 7662 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next) 7663 { 7664 gcc_assert (loop_outer (loop) == outer); 7665 7666 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 7667 continue; 7668 7669 if (region_exit != NULL 7670 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 7671 continue; 7672 7673 nr_outer_loops++; 7674 single_outer = loop; 7675 } 7676 if (nr_outer_loops != 1) 7677 return; 7678 7679 for (class loop *loop = single_outer->inner; 7680 loop != NULL; 7681 loop = loop->inner) 7682 if (loop->next) 7683 return; 7684 7685 /* Mark the loops in the region. */ 7686 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner) 7687 loop->in_oacc_kernels_region = true; 7688} 7689 7690/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 7691 7692struct GTY(()) grid_launch_attributes_trees 7693{ 7694 tree kernel_dim_array_type; 7695 tree kernel_lattrs_dimnum_decl; 7696 tree kernel_lattrs_grid_decl; 7697 tree kernel_lattrs_group_decl; 7698 tree kernel_launch_attributes_type; 7699}; 7700 7701static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 7702 7703/* Create types used to pass kernel launch attributes to target. */ 7704 7705static void 7706grid_create_kernel_launch_attr_types (void) 7707{ 7708 if (grid_attr_trees) 7709 return; 7710 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 7711 7712 tree dim_arr_index_type 7713 = build_index_type (build_int_cst (integer_type_node, 2)); 7714 grid_attr_trees->kernel_dim_array_type 7715 = build_array_type (uint32_type_node, dim_arr_index_type); 7716 7717 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 7718 grid_attr_trees->kernel_lattrs_dimnum_decl 7719 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 7720 uint32_type_node); 7721 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 7722 7723 grid_attr_trees->kernel_lattrs_grid_decl 7724 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 7725 grid_attr_trees->kernel_dim_array_type); 7726 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 7727 = grid_attr_trees->kernel_lattrs_dimnum_decl; 7728 grid_attr_trees->kernel_lattrs_group_decl 7729 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 7730 grid_attr_trees->kernel_dim_array_type); 7731 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 7732 = grid_attr_trees->kernel_lattrs_grid_decl; 7733 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 7734 "__gomp_kernel_launch_attributes", 7735 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 7736} 7737 7738/* Insert before the current statement in GSI a store of VALUE to INDEX of 7739 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 7740 of type uint32_type_node. */ 7741 7742static void 7743grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 7744 tree fld_decl, int index, tree value) 7745{ 7746 tree ref = build4 (ARRAY_REF, uint32_type_node, 7747 build3 (COMPONENT_REF, 7748 grid_attr_trees->kernel_dim_array_type, 7749 range_var, fld_decl, NULL_TREE), 7750 build_int_cst (integer_type_node, index), 7751 NULL_TREE, NULL_TREE); 7752 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 7753} 7754 7755/* Return a tree representation of a pointer to a structure with grid and 7756 work-group size information. Statements filling that information will be 7757 inserted before GSI, TGT_STMT is the target statement which has the 7758 necessary information in it. */ 7759 7760static tree 7761grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 7762 gomp_target *tgt_stmt) 7763{ 7764 grid_create_kernel_launch_attr_types (); 7765 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 7766 "__kernel_launch_attrs"); 7767 7768 unsigned max_dim = 0; 7769 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 7770 clause; 7771 clause = OMP_CLAUSE_CHAIN (clause)) 7772 { 7773 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 7774 continue; 7775 7776 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 7777 max_dim = MAX (dim, max_dim); 7778 7779 grid_insert_store_range_dim (gsi, lattrs, 7780 grid_attr_trees->kernel_lattrs_grid_decl, 7781 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 7782 grid_insert_store_range_dim (gsi, lattrs, 7783 grid_attr_trees->kernel_lattrs_group_decl, 7784 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 7785 } 7786 7787 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 7788 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 7789 gcc_checking_assert (max_dim <= 2); 7790 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 7791 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 7792 GSI_SAME_STMT); 7793 TREE_ADDRESSABLE (lattrs) = 1; 7794 return build_fold_addr_expr (lattrs); 7795} 7796 7797/* Build target argument identifier from the DEVICE identifier, value 7798 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 7799 7800static tree 7801get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 7802{ 7803 tree t = build_int_cst (integer_type_node, device); 7804 if (subseqent_param) 7805 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7806 build_int_cst (integer_type_node, 7807 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 7808 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7809 build_int_cst (integer_type_node, id)); 7810 return t; 7811} 7812 7813/* Like above but return it in type that can be directly stored as an element 7814 of the argument array. */ 7815 7816static tree 7817get_target_argument_identifier (int device, bool subseqent_param, int id) 7818{ 7819 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 7820 return fold_convert (ptr_type_node, t); 7821} 7822 7823/* Return a target argument consisting of DEVICE identifier, value identifier 7824 ID, and the actual VALUE. */ 7825 7826static tree 7827get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 7828 tree value) 7829{ 7830 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 7831 fold_convert (integer_type_node, value), 7832 build_int_cst (unsigned_type_node, 7833 GOMP_TARGET_ARG_VALUE_SHIFT)); 7834 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 7835 get_target_argument_identifier_1 (device, false, id)); 7836 t = fold_convert (ptr_type_node, t); 7837 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 7838} 7839 7840/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 7841 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 7842 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 7843 arguments. */ 7844 7845static void 7846push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 7847 int id, tree value, vec <tree> *args) 7848{ 7849 if (tree_fits_shwi_p (value) 7850 && tree_to_shwi (value) > -(1 << 15) 7851 && tree_to_shwi (value) < (1 << 15)) 7852 args->quick_push (get_target_argument_value (gsi, device, id, value)); 7853 else 7854 { 7855 args->quick_push (get_target_argument_identifier (device, true, id)); 7856 value = fold_convert (ptr_type_node, value); 7857 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 7858 GSI_SAME_STMT); 7859 args->quick_push (value); 7860 } 7861} 7862 7863/* Create an array of arguments that is then passed to GOMP_target. */ 7864 7865static tree 7866get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 7867{ 7868 auto_vec <tree, 6> args; 7869 tree clauses = gimple_omp_target_clauses (tgt_stmt); 7870 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 7871 if (c) 7872 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 7873 else 7874 t = integer_minus_one_node; 7875 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7876 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 7877 7878 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 7879 if (c) 7880 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 7881 else 7882 t = integer_minus_one_node; 7883 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 7884 GOMP_TARGET_ARG_THREAD_LIMIT, t, 7885 &args); 7886 7887 /* Add HSA-specific grid sizes, if available. */ 7888 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7889 OMP_CLAUSE__GRIDDIM_)) 7890 { 7891 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 7892 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 7893 args.quick_push (t); 7894 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 7895 } 7896 7897 /* Produce more, perhaps device specific, arguments here. */ 7898 7899 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 7900 args.length () + 1), 7901 ".omp_target_args"); 7902 for (unsigned i = 0; i < args.length (); i++) 7903 { 7904 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7905 build_int_cst (integer_type_node, i), 7906 NULL_TREE, NULL_TREE); 7907 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 7908 GSI_SAME_STMT); 7909 } 7910 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 7911 build_int_cst (integer_type_node, args.length ()), 7912 NULL_TREE, NULL_TREE); 7913 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 7914 GSI_SAME_STMT); 7915 TREE_ADDRESSABLE (argarray) = 1; 7916 return build_fold_addr_expr (argarray); 7917} 7918 7919/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 7920 7921static void 7922expand_omp_target (struct omp_region *region) 7923{ 7924 basic_block entry_bb, exit_bb, new_bb; 7925 struct function *child_cfun; 7926 tree child_fn, block, t; 7927 gimple_stmt_iterator gsi; 7928 gomp_target *entry_stmt; 7929 gimple *stmt; 7930 edge e; 7931 bool offloaded; 7932 int target_kind; 7933 7934 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 7935 target_kind = gimple_omp_target_kind (entry_stmt); 7936 new_bb = region->entry; 7937 7938 offloaded = is_gimple_omp_offloaded (entry_stmt); 7939 switch (target_kind) 7940 { 7941 case GF_OMP_TARGET_KIND_REGION: 7942 case GF_OMP_TARGET_KIND_UPDATE: 7943 case GF_OMP_TARGET_KIND_ENTER_DATA: 7944 case GF_OMP_TARGET_KIND_EXIT_DATA: 7945 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7946 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7947 case GF_OMP_TARGET_KIND_OACC_SERIAL: 7948 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7949 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7950 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7951 case GF_OMP_TARGET_KIND_DATA: 7952 case GF_OMP_TARGET_KIND_OACC_DATA: 7953 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7954 break; 7955 default: 7956 gcc_unreachable (); 7957 } 7958 7959 child_fn = NULL_TREE; 7960 child_cfun = NULL; 7961 if (offloaded) 7962 { 7963 child_fn = gimple_omp_target_child_fn (entry_stmt); 7964 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 7965 } 7966 7967 /* Supported by expand_omp_taskreg, but not here. */ 7968 if (child_cfun != NULL) 7969 gcc_checking_assert (!child_cfun->cfg); 7970 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 7971 7972 entry_bb = region->entry; 7973 exit_bb = region->exit; 7974 7975 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS) 7976 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 7977 7978 /* Going on, all OpenACC compute constructs are mapped to 7979 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined. 7980 To distinguish between them, we attach attributes. */ 7981 switch (target_kind) 7982 { 7983 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7984 DECL_ATTRIBUTES (child_fn) 7985 = tree_cons (get_identifier ("oacc parallel"), 7986 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7987 break; 7988 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7989 DECL_ATTRIBUTES (child_fn) 7990 = tree_cons (get_identifier ("oacc kernels"), 7991 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7992 break; 7993 case GF_OMP_TARGET_KIND_OACC_SERIAL: 7994 DECL_ATTRIBUTES (child_fn) 7995 = tree_cons (get_identifier ("oacc serial"), 7996 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 7997 break; 7998 default: 7999 /* Make sure we don't miss any. */ 8000 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt) 8001 && is_gimple_omp_offloaded (entry_stmt))); 8002 break; 8003 } 8004 8005 if (offloaded) 8006 { 8007 unsigned srcidx, dstidx, num; 8008 8009 /* If the offloading region needs data sent from the parent 8010 function, then the very first statement (except possible 8011 tree profile counter updates) of the offloading body 8012 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 8013 &.OMP_DATA_O is passed as an argument to the child function, 8014 we need to replace it with the argument as seen by the child 8015 function. 8016 8017 In most cases, this will end up being the identity assignment 8018 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 8019 a function call that has been inlined, the original PARM_DECL 8020 .OMP_DATA_I may have been converted into a different local 8021 variable. In which case, we need to keep the assignment. */ 8022 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 8023 if (data_arg) 8024 { 8025 basic_block entry_succ_bb = single_succ (entry_bb); 8026 gimple_stmt_iterator gsi; 8027 tree arg; 8028 gimple *tgtcopy_stmt = NULL; 8029 tree sender = TREE_VEC_ELT (data_arg, 0); 8030 8031 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 8032 { 8033 gcc_assert (!gsi_end_p (gsi)); 8034 stmt = gsi_stmt (gsi); 8035 if (gimple_code (stmt) != GIMPLE_ASSIGN) 8036 continue; 8037 8038 if (gimple_num_ops (stmt) == 2) 8039 { 8040 tree arg = gimple_assign_rhs1 (stmt); 8041 8042 /* We're ignoring the subcode because we're 8043 effectively doing a STRIP_NOPS. */ 8044 8045 if (TREE_CODE (arg) == ADDR_EXPR 8046 && TREE_OPERAND (arg, 0) == sender) 8047 { 8048 tgtcopy_stmt = stmt; 8049 break; 8050 } 8051 } 8052 } 8053 8054 gcc_assert (tgtcopy_stmt != NULL); 8055 arg = DECL_ARGUMENTS (child_fn); 8056 8057 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 8058 gsi_remove (&gsi, true); 8059 } 8060 8061 /* Declare local variables needed in CHILD_CFUN. */ 8062 block = DECL_INITIAL (child_fn); 8063 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 8064 /* The gimplifier could record temporaries in the offloading block 8065 rather than in containing function's local_decls chain, 8066 which would mean cgraph missed finalizing them. Do it now. */ 8067 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 8068 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 8069 varpool_node::finalize_decl (t); 8070 DECL_SAVED_TREE (child_fn) = NULL; 8071 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 8072 gimple_set_body (child_fn, NULL); 8073 TREE_USED (block) = 1; 8074 8075 /* Reset DECL_CONTEXT on function arguments. */ 8076 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 8077 DECL_CONTEXT (t) = child_fn; 8078 8079 /* Split ENTRY_BB at GIMPLE_*, 8080 so that it can be moved to the child function. */ 8081 gsi = gsi_last_nondebug_bb (entry_bb); 8082 stmt = gsi_stmt (gsi); 8083 gcc_assert (stmt 8084 && gimple_code (stmt) == gimple_code (entry_stmt)); 8085 e = split_block (entry_bb, stmt); 8086 gsi_remove (&gsi, true); 8087 entry_bb = e->dest; 8088 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 8089 8090 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 8091 if (exit_bb) 8092 { 8093 gsi = gsi_last_nondebug_bb (exit_bb); 8094 gcc_assert (!gsi_end_p (gsi) 8095 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8096 stmt = gimple_build_return (NULL); 8097 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 8098 gsi_remove (&gsi, true); 8099 } 8100 8101 /* Move the offloading region into CHILD_CFUN. */ 8102 8103 block = gimple_block (entry_stmt); 8104 8105 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 8106 if (exit_bb) 8107 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 8108 /* When the OMP expansion process cannot guarantee an up-to-date 8109 loop tree arrange for the child function to fixup loops. */ 8110 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8111 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 8112 8113 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 8114 num = vec_safe_length (child_cfun->local_decls); 8115 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 8116 { 8117 t = (*child_cfun->local_decls)[srcidx]; 8118 if (DECL_CONTEXT (t) == cfun->decl) 8119 continue; 8120 if (srcidx != dstidx) 8121 (*child_cfun->local_decls)[dstidx] = t; 8122 dstidx++; 8123 } 8124 if (dstidx != num) 8125 vec_safe_truncate (child_cfun->local_decls, dstidx); 8126 8127 /* Inform the callgraph about the new function. */ 8128 child_cfun->curr_properties = cfun->curr_properties; 8129 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 8130 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 8131 cgraph_node *node = cgraph_node::get_create (child_fn); 8132 node->parallelized_function = 1; 8133 cgraph_node::add_new_function (child_fn, true); 8134 8135 /* Add the new function to the offload table. */ 8136 if (ENABLE_OFFLOADING) 8137 { 8138 if (in_lto_p) 8139 DECL_PRESERVE_P (child_fn) = 1; 8140 vec_safe_push (offload_funcs, child_fn); 8141 } 8142 8143 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 8144 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 8145 8146 /* Fix the callgraph edges for child_cfun. Those for cfun will be 8147 fixed in a following pass. */ 8148 push_cfun (child_cfun); 8149 if (need_asm) 8150 assign_assembler_name_if_needed (child_fn); 8151 cgraph_edge::rebuild_edges (); 8152 8153 /* Some EH regions might become dead, see PR34608. If 8154 pass_cleanup_cfg isn't the first pass to happen with the 8155 new child, these dead EH edges might cause problems. 8156 Clean them up now. */ 8157 if (flag_exceptions) 8158 { 8159 basic_block bb; 8160 bool changed = false; 8161 8162 FOR_EACH_BB_FN (bb, cfun) 8163 changed |= gimple_purge_dead_eh_edges (bb); 8164 if (changed) 8165 cleanup_tree_cfg (); 8166 } 8167 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 8168 verify_loop_structure (); 8169 pop_cfun (); 8170 8171 if (dump_file && !gimple_in_ssa_p (cfun)) 8172 { 8173 omp_any_child_fn_dumped = true; 8174 dump_function_header (dump_file, child_fn, dump_flags); 8175 dump_function_to_file (child_fn, dump_file, dump_flags); 8176 } 8177 8178 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 8179 } 8180 8181 /* Emit a library call to launch the offloading region, or do data 8182 transfers. */ 8183 tree t1, t2, t3, t4, depend, c, clauses; 8184 enum built_in_function start_ix; 8185 unsigned int flags_i = 0; 8186 8187 switch (gimple_omp_target_kind (entry_stmt)) 8188 { 8189 case GF_OMP_TARGET_KIND_REGION: 8190 start_ix = BUILT_IN_GOMP_TARGET; 8191 break; 8192 case GF_OMP_TARGET_KIND_DATA: 8193 start_ix = BUILT_IN_GOMP_TARGET_DATA; 8194 break; 8195 case GF_OMP_TARGET_KIND_UPDATE: 8196 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 8197 break; 8198 case GF_OMP_TARGET_KIND_ENTER_DATA: 8199 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 8200 break; 8201 case GF_OMP_TARGET_KIND_EXIT_DATA: 8202 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 8203 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 8204 break; 8205 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8206 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8207 case GF_OMP_TARGET_KIND_OACC_SERIAL: 8208 start_ix = BUILT_IN_GOACC_PARALLEL; 8209 break; 8210 case GF_OMP_TARGET_KIND_OACC_DATA: 8211 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8212 start_ix = BUILT_IN_GOACC_DATA_START; 8213 break; 8214 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8215 start_ix = BUILT_IN_GOACC_UPDATE; 8216 break; 8217 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8218 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 8219 break; 8220 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8221 start_ix = BUILT_IN_GOACC_DECLARE; 8222 break; 8223 default: 8224 gcc_unreachable (); 8225 } 8226 8227 clauses = gimple_omp_target_clauses (entry_stmt); 8228 8229 tree device = NULL_TREE; 8230 location_t device_loc = UNKNOWN_LOCATION; 8231 tree goacc_flags = NULL_TREE; 8232 if (is_gimple_omp_oacc (entry_stmt)) 8233 { 8234 /* By default, no GOACC_FLAGs are set. */ 8235 goacc_flags = integer_zero_node; 8236 } 8237 else 8238 { 8239 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 8240 if (c) 8241 { 8242 device = OMP_CLAUSE_DEVICE_ID (c); 8243 device_loc = OMP_CLAUSE_LOCATION (c); 8244 } 8245 else 8246 { 8247 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 8248 library choose). */ 8249 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 8250 device_loc = gimple_location (entry_stmt); 8251 } 8252 8253 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 8254 if (c) 8255 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 8256 } 8257 8258 /* By default, there is no conditional. */ 8259 tree cond = NULL_TREE; 8260 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 8261 if (c) 8262 cond = OMP_CLAUSE_IF_EXPR (c); 8263 /* If we found the clause 'if (cond)', build: 8264 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK) 8265 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */ 8266 if (cond) 8267 { 8268 tree *tp; 8269 if (is_gimple_omp_oacc (entry_stmt)) 8270 tp = &goacc_flags; 8271 else 8272 { 8273 /* Ensure 'device' is of the correct type. */ 8274 device = fold_convert_loc (device_loc, integer_type_node, device); 8275 8276 tp = &device; 8277 } 8278 8279 cond = gimple_boolify (cond); 8280 8281 basic_block cond_bb, then_bb, else_bb; 8282 edge e; 8283 tree tmp_var; 8284 8285 tmp_var = create_tmp_var (TREE_TYPE (*tp)); 8286 if (offloaded) 8287 e = split_block_after_labels (new_bb); 8288 else 8289 { 8290 gsi = gsi_last_nondebug_bb (new_bb); 8291 gsi_prev (&gsi); 8292 e = split_block (new_bb, gsi_stmt (gsi)); 8293 } 8294 cond_bb = e->src; 8295 new_bb = e->dest; 8296 remove_edge (e); 8297 8298 then_bb = create_empty_bb (cond_bb); 8299 else_bb = create_empty_bb (then_bb); 8300 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 8301 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 8302 8303 stmt = gimple_build_cond_empty (cond); 8304 gsi = gsi_last_bb (cond_bb); 8305 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8306 8307 gsi = gsi_start_bb (then_bb); 8308 stmt = gimple_build_assign (tmp_var, *tp); 8309 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8310 8311 gsi = gsi_start_bb (else_bb); 8312 if (is_gimple_omp_oacc (entry_stmt)) 8313 stmt = gimple_build_assign (tmp_var, 8314 BIT_IOR_EXPR, 8315 *tp, 8316 build_int_cst (integer_type_node, 8317 GOACC_FLAG_HOST_FALLBACK)); 8318 else 8319 stmt = gimple_build_assign (tmp_var, 8320 build_int_cst (integer_type_node, 8321 GOMP_DEVICE_HOST_FALLBACK)); 8322 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 8323 8324 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 8325 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 8326 add_bb_to_loop (then_bb, cond_bb->loop_father); 8327 add_bb_to_loop (else_bb, cond_bb->loop_father); 8328 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 8329 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 8330 8331 *tp = tmp_var; 8332 8333 gsi = gsi_last_nondebug_bb (new_bb); 8334 } 8335 else 8336 { 8337 gsi = gsi_last_nondebug_bb (new_bb); 8338 8339 if (device != NULL_TREE) 8340 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 8341 true, GSI_SAME_STMT); 8342 } 8343 8344 t = gimple_omp_target_data_arg (entry_stmt); 8345 if (t == NULL) 8346 { 8347 t1 = size_zero_node; 8348 t2 = build_zero_cst (ptr_type_node); 8349 t3 = t2; 8350 t4 = t2; 8351 } 8352 else 8353 { 8354 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 8355 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 8356 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 8357 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 8358 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 8359 } 8360 8361 gimple *g; 8362 bool tagging = false; 8363 /* The maximum number used by any start_ix, without varargs. */ 8364 auto_vec<tree, 11> args; 8365 if (is_gimple_omp_oacc (entry_stmt)) 8366 { 8367 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP, 8368 TREE_TYPE (goacc_flags), goacc_flags); 8369 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true, 8370 NULL_TREE, true, 8371 GSI_SAME_STMT); 8372 args.quick_push (goacc_flags_m); 8373 } 8374 else 8375 args.quick_push (device); 8376 if (offloaded) 8377 args.quick_push (build_fold_addr_expr (child_fn)); 8378 args.quick_push (t1); 8379 args.quick_push (t2); 8380 args.quick_push (t3); 8381 args.quick_push (t4); 8382 switch (start_ix) 8383 { 8384 case BUILT_IN_GOACC_DATA_START: 8385 case BUILT_IN_GOACC_DECLARE: 8386 case BUILT_IN_GOMP_TARGET_DATA: 8387 break; 8388 case BUILT_IN_GOMP_TARGET: 8389 case BUILT_IN_GOMP_TARGET_UPDATE: 8390 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 8391 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 8392 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 8393 if (c) 8394 depend = OMP_CLAUSE_DECL (c); 8395 else 8396 depend = build_int_cst (ptr_type_node, 0); 8397 args.quick_push (depend); 8398 if (start_ix == BUILT_IN_GOMP_TARGET) 8399 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 8400 break; 8401 case BUILT_IN_GOACC_PARALLEL: 8402 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL) 8403 { 8404 tree dims = NULL_TREE; 8405 unsigned int ix; 8406 8407 /* For serial constructs we set all dimensions to 1. */ 8408 for (ix = GOMP_DIM_MAX; ix--;) 8409 dims = tree_cons (NULL_TREE, integer_one_node, dims); 8410 oacc_replace_fn_attrib (child_fn, dims); 8411 } 8412 else 8413 oacc_set_fn_attrib (child_fn, clauses, &args); 8414 tagging = true; 8415 /* FALLTHRU */ 8416 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 8417 case BUILT_IN_GOACC_UPDATE: 8418 { 8419 tree t_async = NULL_TREE; 8420 8421 /* If present, use the value specified by the respective 8422 clause, making sure that is of the correct type. */ 8423 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 8424 if (c) 8425 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 8426 integer_type_node, 8427 OMP_CLAUSE_ASYNC_EXPR (c)); 8428 else if (!tagging) 8429 /* Default values for t_async. */ 8430 t_async = fold_convert_loc (gimple_location (entry_stmt), 8431 integer_type_node, 8432 build_int_cst (integer_type_node, 8433 GOMP_ASYNC_SYNC)); 8434 if (tagging && t_async) 8435 { 8436 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 8437 8438 if (TREE_CODE (t_async) == INTEGER_CST) 8439 { 8440 /* See if we can pack the async arg in to the tag's 8441 operand. */ 8442 i_async = TREE_INT_CST_LOW (t_async); 8443 if (i_async < GOMP_LAUNCH_OP_MAX) 8444 t_async = NULL_TREE; 8445 else 8446 i_async = GOMP_LAUNCH_OP_MAX; 8447 } 8448 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 8449 i_async)); 8450 } 8451 if (t_async) 8452 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true, 8453 NULL_TREE, true, 8454 GSI_SAME_STMT)); 8455 8456 /* Save the argument index, and ... */ 8457 unsigned t_wait_idx = args.length (); 8458 unsigned num_waits = 0; 8459 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 8460 if (!tagging || c) 8461 /* ... push a placeholder. */ 8462 args.safe_push (integer_zero_node); 8463 8464 for (; c; c = OMP_CLAUSE_CHAIN (c)) 8465 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 8466 { 8467 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 8468 integer_type_node, 8469 OMP_CLAUSE_WAIT_EXPR (c)); 8470 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true, 8471 GSI_SAME_STMT); 8472 args.safe_push (arg); 8473 num_waits++; 8474 } 8475 8476 if (!tagging || num_waits) 8477 { 8478 tree len; 8479 8480 /* Now that we know the number, update the placeholder. */ 8481 if (tagging) 8482 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 8483 else 8484 len = build_int_cst (integer_type_node, num_waits); 8485 len = fold_convert_loc (gimple_location (entry_stmt), 8486 unsigned_type_node, len); 8487 args[t_wait_idx] = len; 8488 } 8489 } 8490 break; 8491 default: 8492 gcc_unreachable (); 8493 } 8494 if (tagging) 8495 /* Push terminal marker - zero. */ 8496 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 8497 8498 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 8499 gimple_set_location (g, gimple_location (entry_stmt)); 8500 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 8501 if (!offloaded) 8502 { 8503 g = gsi_stmt (gsi); 8504 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 8505 gsi_remove (&gsi, true); 8506 } 8507} 8508 8509/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 8510 iteration variable derived from the thread number. INTRA_GROUP means this 8511 is an expansion of a loop iterating over work-items within a separate 8512 iteration over groups. */ 8513 8514static void 8515grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 8516{ 8517 gimple_stmt_iterator gsi; 8518 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 8519 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 8520 == GF_OMP_FOR_KIND_GRID_LOOP); 8521 size_t collapse = gimple_omp_for_collapse (for_stmt); 8522 struct omp_for_data_loop *loops 8523 = XALLOCAVEC (struct omp_for_data_loop, 8524 gimple_omp_for_collapse (for_stmt)); 8525 struct omp_for_data fd; 8526 8527 remove_edge (BRANCH_EDGE (kfor->entry)); 8528 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 8529 8530 gcc_assert (kfor->cont); 8531 omp_extract_for_data (for_stmt, &fd, loops); 8532 8533 gsi = gsi_start_bb (body_bb); 8534 8535 for (size_t dim = 0; dim < collapse; dim++) 8536 { 8537 tree type, itype; 8538 itype = type = TREE_TYPE (fd.loops[dim].v); 8539 if (POINTER_TYPE_P (type)) 8540 itype = signed_type_for (type); 8541 8542 tree n1 = fd.loops[dim].n1; 8543 tree step = fd.loops[dim].step; 8544 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 8545 true, NULL_TREE, true, GSI_SAME_STMT); 8546 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 8547 true, NULL_TREE, true, GSI_SAME_STMT); 8548 tree threadid; 8549 if (gimple_omp_for_grid_group_iter (for_stmt)) 8550 { 8551 gcc_checking_assert (!intra_group); 8552 threadid = build_call_expr (builtin_decl_explicit 8553 (BUILT_IN_HSA_WORKGROUPID), 1, 8554 build_int_cstu (unsigned_type_node, dim)); 8555 } 8556 else if (intra_group) 8557 threadid = build_call_expr (builtin_decl_explicit 8558 (BUILT_IN_HSA_WORKITEMID), 1, 8559 build_int_cstu (unsigned_type_node, dim)); 8560 else 8561 threadid = build_call_expr (builtin_decl_explicit 8562 (BUILT_IN_HSA_WORKITEMABSID), 1, 8563 build_int_cstu (unsigned_type_node, dim)); 8564 threadid = fold_convert (itype, threadid); 8565 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 8566 true, GSI_SAME_STMT); 8567 8568 tree startvar = fd.loops[dim].v; 8569 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 8570 if (POINTER_TYPE_P (type)) 8571 t = fold_build_pointer_plus (n1, t); 8572 else 8573 t = fold_build2 (PLUS_EXPR, type, t, n1); 8574 t = fold_convert (type, t); 8575 t = force_gimple_operand_gsi (&gsi, t, 8576 DECL_P (startvar) 8577 && TREE_ADDRESSABLE (startvar), 8578 NULL_TREE, true, GSI_SAME_STMT); 8579 gassign *assign_stmt = gimple_build_assign (startvar, t); 8580 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 8581 } 8582 /* Remove the omp for statement. */ 8583 gsi = gsi_last_nondebug_bb (kfor->entry); 8584 gsi_remove (&gsi, true); 8585 8586 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 8587 gsi = gsi_last_nondebug_bb (kfor->cont); 8588 gcc_assert (!gsi_end_p (gsi) 8589 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 8590 gsi_remove (&gsi, true); 8591 8592 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 8593 gsi = gsi_last_nondebug_bb (kfor->exit); 8594 gcc_assert (!gsi_end_p (gsi) 8595 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8596 if (intra_group) 8597 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 8598 gsi_remove (&gsi, true); 8599 8600 /* Fixup the much simpler CFG. */ 8601 remove_edge (find_edge (kfor->cont, body_bb)); 8602 8603 if (kfor->cont != body_bb) 8604 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 8605 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 8606} 8607 8608/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 8609 argument_decls. */ 8610 8611struct grid_arg_decl_map 8612{ 8613 tree old_arg; 8614 tree new_arg; 8615}; 8616 8617/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 8618 pertaining to kernel function. */ 8619 8620static tree 8621grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 8622{ 8623 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 8624 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 8625 tree t = *tp; 8626 8627 if (t == adm->old_arg) 8628 *tp = adm->new_arg; 8629 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 8630 return NULL_TREE; 8631} 8632 8633/* If TARGET region contains a kernel body for loop, remove its region from the 8634 TARGET and expand it in HSA gridified kernel fashion. */ 8635 8636static void 8637grid_expand_target_grid_body (struct omp_region *target) 8638{ 8639 if (!hsa_gen_requested_p ()) 8640 return; 8641 8642 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 8643 struct omp_region **pp; 8644 8645 for (pp = &target->inner; *pp; pp = &(*pp)->next) 8646 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 8647 break; 8648 8649 struct omp_region *gpukernel = *pp; 8650 8651 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 8652 if (!gpukernel) 8653 { 8654 /* HSA cannot handle OACC stuff. */ 8655 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 8656 return; 8657 gcc_checking_assert (orig_child_fndecl); 8658 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 8659 OMP_CLAUSE__GRIDDIM_)); 8660 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 8661 8662 hsa_register_kernel (n); 8663 return; 8664 } 8665 8666 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 8667 OMP_CLAUSE__GRIDDIM_)); 8668 tree inside_block 8669 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 8670 *pp = gpukernel->next; 8671 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 8672 if ((*pp)->type == GIMPLE_OMP_FOR) 8673 break; 8674 8675 struct omp_region *kfor = *pp; 8676 gcc_assert (kfor); 8677 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 8678 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 8679 *pp = kfor->next; 8680 if (kfor->inner) 8681 { 8682 if (gimple_omp_for_grid_group_iter (for_stmt)) 8683 { 8684 struct omp_region **next_pp; 8685 for (pp = &kfor->inner; *pp; pp = next_pp) 8686 { 8687 next_pp = &(*pp)->next; 8688 if ((*pp)->type != GIMPLE_OMP_FOR) 8689 continue; 8690 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 8691 gcc_assert (gimple_omp_for_kind (inner) 8692 == GF_OMP_FOR_KIND_GRID_LOOP); 8693 grid_expand_omp_for_loop (*pp, true); 8694 *pp = (*pp)->next; 8695 next_pp = pp; 8696 } 8697 } 8698 expand_omp (kfor->inner); 8699 } 8700 if (gpukernel->inner) 8701 expand_omp (gpukernel->inner); 8702 8703 tree kern_fndecl = copy_node (orig_child_fndecl); 8704 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl, 8705 "kernel"); 8706 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 8707 tree tgtblock = gimple_block (tgt_stmt); 8708 tree fniniblock = make_node (BLOCK); 8709 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); 8710 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 8711 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 8712 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 8713 DECL_INITIAL (kern_fndecl) = fniniblock; 8714 push_struct_function (kern_fndecl); 8715 cfun->function_end_locus = gimple_location (tgt_stmt); 8716 init_tree_ssa (cfun); 8717 pop_cfun (); 8718 8719 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 8720 gcc_assert (!DECL_CHAIN (old_parm_decl)); 8721 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 8722 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 8723 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 8724 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 8725 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 8726 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 8727 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 8728 kern_cfun->curr_properties = cfun->curr_properties; 8729 8730 grid_expand_omp_for_loop (kfor, false); 8731 8732 /* Remove the omp for statement. */ 8733 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry); 8734 gsi_remove (&gsi, true); 8735 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 8736 return. */ 8737 gsi = gsi_last_nondebug_bb (gpukernel->exit); 8738 gcc_assert (!gsi_end_p (gsi) 8739 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 8740 gimple *ret_stmt = gimple_build_return (NULL); 8741 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 8742 gsi_remove (&gsi, true); 8743 8744 /* Statements in the first BB in the target construct have been produced by 8745 target lowering and must be copied inside the GPUKERNEL, with the two 8746 exceptions of the first OMP statement and the OMP_DATA assignment 8747 statement. */ 8748 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 8749 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 8750 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 8751 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 8752 !gsi_end_p (tsi); gsi_next (&tsi)) 8753 { 8754 gimple *stmt = gsi_stmt (tsi); 8755 if (is_gimple_omp (stmt)) 8756 break; 8757 if (sender 8758 && is_gimple_assign (stmt) 8759 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 8760 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 8761 continue; 8762 gimple *copy = gimple_copy (stmt); 8763 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 8764 gimple_set_block (copy, fniniblock); 8765 } 8766 8767 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 8768 gpukernel->exit, inside_block); 8769 8770 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 8771 kcn->mark_force_output (); 8772 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 8773 8774 hsa_register_kernel (kcn, orig_child); 8775 8776 cgraph_node::add_new_function (kern_fndecl, true); 8777 push_cfun (kern_cfun); 8778 cgraph_edge::rebuild_edges (); 8779 8780 /* Re-map any mention of the PARM_DECL of the original function to the 8781 PARM_DECL of the new one. 8782 8783 TODO: It would be great if lowering produced references into the GPU 8784 kernel decl straight away and we did not have to do this. */ 8785 struct grid_arg_decl_map adm; 8786 adm.old_arg = old_parm_decl; 8787 adm.new_arg = new_parm_decl; 8788 basic_block bb; 8789 FOR_EACH_BB_FN (bb, kern_cfun) 8790 { 8791 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 8792 { 8793 gimple *stmt = gsi_stmt (gsi); 8794 struct walk_stmt_info wi; 8795 memset (&wi, 0, sizeof (wi)); 8796 wi.info = &adm; 8797 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 8798 } 8799 } 8800 pop_cfun (); 8801 8802 return; 8803} 8804 8805/* Expand the parallel region tree rooted at REGION. Expansion 8806 proceeds in depth-first order. Innermost regions are expanded 8807 first. This way, parallel regions that require a new function to 8808 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 8809 internal dependencies in their body. */ 8810 8811static void 8812expand_omp (struct omp_region *region) 8813{ 8814 omp_any_child_fn_dumped = false; 8815 while (region) 8816 { 8817 location_t saved_location; 8818 gimple *inner_stmt = NULL; 8819 8820 /* First, determine whether this is a combined parallel+workshare 8821 region. */ 8822 if (region->type == GIMPLE_OMP_PARALLEL) 8823 determine_parallel_type (region); 8824 else if (region->type == GIMPLE_OMP_TARGET) 8825 grid_expand_target_grid_body (region); 8826 8827 if (region->type == GIMPLE_OMP_FOR 8828 && gimple_omp_for_combined_p (last_stmt (region->entry))) 8829 inner_stmt = last_stmt (region->inner->entry); 8830 8831 if (region->inner) 8832 expand_omp (region->inner); 8833 8834 saved_location = input_location; 8835 if (gimple_has_location (last_stmt (region->entry))) 8836 input_location = gimple_location (last_stmt (region->entry)); 8837 8838 switch (region->type) 8839 { 8840 case GIMPLE_OMP_PARALLEL: 8841 case GIMPLE_OMP_TASK: 8842 expand_omp_taskreg (region); 8843 break; 8844 8845 case GIMPLE_OMP_FOR: 8846 expand_omp_for (region, inner_stmt); 8847 break; 8848 8849 case GIMPLE_OMP_SECTIONS: 8850 expand_omp_sections (region); 8851 break; 8852 8853 case GIMPLE_OMP_SECTION: 8854 /* Individual omp sections are handled together with their 8855 parent GIMPLE_OMP_SECTIONS region. */ 8856 break; 8857 8858 case GIMPLE_OMP_SINGLE: 8859 expand_omp_single (region); 8860 break; 8861 8862 case GIMPLE_OMP_ORDERED: 8863 { 8864 gomp_ordered *ord_stmt 8865 = as_a <gomp_ordered *> (last_stmt (region->entry)); 8866 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 8867 OMP_CLAUSE_DEPEND)) 8868 { 8869 /* We'll expand these when expanding corresponding 8870 worksharing region with ordered(n) clause. */ 8871 gcc_assert (region->outer 8872 && region->outer->type == GIMPLE_OMP_FOR); 8873 region->ord_stmt = ord_stmt; 8874 break; 8875 } 8876 } 8877 /* FALLTHRU */ 8878 case GIMPLE_OMP_MASTER: 8879 case GIMPLE_OMP_TASKGROUP: 8880 case GIMPLE_OMP_CRITICAL: 8881 case GIMPLE_OMP_TEAMS: 8882 expand_omp_synch (region); 8883 break; 8884 8885 case GIMPLE_OMP_ATOMIC_LOAD: 8886 expand_omp_atomic (region); 8887 break; 8888 8889 case GIMPLE_OMP_TARGET: 8890 expand_omp_target (region); 8891 break; 8892 8893 default: 8894 gcc_unreachable (); 8895 } 8896 8897 input_location = saved_location; 8898 region = region->next; 8899 } 8900 if (omp_any_child_fn_dumped) 8901 { 8902 if (dump_file) 8903 dump_function_header (dump_file, current_function_decl, dump_flags); 8904 omp_any_child_fn_dumped = false; 8905 } 8906} 8907 8908/* Helper for build_omp_regions. Scan the dominator tree starting at 8909 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 8910 true, the function ends once a single tree is built (otherwise, whole 8911 forest of OMP constructs may be built). */ 8912 8913static void 8914build_omp_regions_1 (basic_block bb, struct omp_region *parent, 8915 bool single_tree) 8916{ 8917 gimple_stmt_iterator gsi; 8918 gimple *stmt; 8919 basic_block son; 8920 8921 gsi = gsi_last_nondebug_bb (bb); 8922 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 8923 { 8924 struct omp_region *region; 8925 enum gimple_code code; 8926 8927 stmt = gsi_stmt (gsi); 8928 code = gimple_code (stmt); 8929 if (code == GIMPLE_OMP_RETURN) 8930 { 8931 /* STMT is the return point out of region PARENT. Mark it 8932 as the exit point and make PARENT the immediately 8933 enclosing region. */ 8934 gcc_assert (parent); 8935 region = parent; 8936 region->exit = bb; 8937 parent = parent->outer; 8938 } 8939 else if (code == GIMPLE_OMP_ATOMIC_STORE) 8940 { 8941 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 8942 GIMPLE_OMP_RETURN, but matches with 8943 GIMPLE_OMP_ATOMIC_LOAD. */ 8944 gcc_assert (parent); 8945 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 8946 region = parent; 8947 region->exit = bb; 8948 parent = parent->outer; 8949 } 8950 else if (code == GIMPLE_OMP_CONTINUE) 8951 { 8952 gcc_assert (parent); 8953 parent->cont = bb; 8954 } 8955 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 8956 { 8957 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 8958 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 8959 } 8960 else 8961 { 8962 region = new_omp_region (bb, code, parent); 8963 /* Otherwise... */ 8964 if (code == GIMPLE_OMP_TARGET) 8965 { 8966 switch (gimple_omp_target_kind (stmt)) 8967 { 8968 case GF_OMP_TARGET_KIND_REGION: 8969 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8970 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8971 case GF_OMP_TARGET_KIND_OACC_SERIAL: 8972 break; 8973 case GF_OMP_TARGET_KIND_UPDATE: 8974 case GF_OMP_TARGET_KIND_ENTER_DATA: 8975 case GF_OMP_TARGET_KIND_EXIT_DATA: 8976 case GF_OMP_TARGET_KIND_DATA: 8977 case GF_OMP_TARGET_KIND_OACC_DATA: 8978 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8979 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8980 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8981 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8982 /* ..., other than for those stand-alone directives... 8983 To be precise, target data isn't stand-alone, but 8984 gimplifier put the end API call into try finally block 8985 for it, so omp expansion can treat it as such. */ 8986 region = NULL; 8987 break; 8988 default: 8989 gcc_unreachable (); 8990 } 8991 } 8992 else if (code == GIMPLE_OMP_ORDERED 8993 && omp_find_clause (gimple_omp_ordered_clauses 8994 (as_a <gomp_ordered *> (stmt)), 8995 OMP_CLAUSE_DEPEND)) 8996 /* #pragma omp ordered depend is also just a stand-alone 8997 directive. */ 8998 region = NULL; 8999 else if (code == GIMPLE_OMP_TASK 9000 && gimple_omp_task_taskwait_p (stmt)) 9001 /* #pragma omp taskwait depend(...) is a stand-alone directive. */ 9002 region = NULL; 9003 else if (code == GIMPLE_OMP_TASKGROUP) 9004 /* #pragma omp taskgroup isn't a stand-alone directive, but 9005 gimplifier put the end API call into try finall block 9006 for it, so omp expansion can treat it as such. */ 9007 region = NULL; 9008 /* ..., this directive becomes the parent for a new region. */ 9009 if (region) 9010 parent = region; 9011 } 9012 } 9013 9014 if (single_tree && !parent) 9015 return; 9016 9017 for (son = first_dom_son (CDI_DOMINATORS, bb); 9018 son; 9019 son = next_dom_son (CDI_DOMINATORS, son)) 9020 build_omp_regions_1 (son, parent, single_tree); 9021} 9022 9023/* Builds the tree of OMP regions rooted at ROOT, storing it to 9024 root_omp_region. */ 9025 9026static void 9027build_omp_regions_root (basic_block root) 9028{ 9029 gcc_assert (root_omp_region == NULL); 9030 build_omp_regions_1 (root, NULL, true); 9031 gcc_assert (root_omp_region != NULL); 9032} 9033 9034/* Expands omp construct (and its subconstructs) starting in HEAD. */ 9035 9036void 9037omp_expand_local (basic_block head) 9038{ 9039 build_omp_regions_root (head); 9040 if (dump_file && (dump_flags & TDF_DETAILS)) 9041 { 9042 fprintf (dump_file, "\nOMP region tree\n\n"); 9043 dump_omp_region (dump_file, root_omp_region, 0); 9044 fprintf (dump_file, "\n"); 9045 } 9046 9047 remove_exit_barriers (root_omp_region); 9048 expand_omp (root_omp_region); 9049 9050 omp_free_regions (); 9051} 9052 9053/* Scan the CFG and build a tree of OMP regions. Return the root of 9054 the OMP region tree. */ 9055 9056static void 9057build_omp_regions (void) 9058{ 9059 gcc_assert (root_omp_region == NULL); 9060 calculate_dominance_info (CDI_DOMINATORS); 9061 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 9062} 9063 9064/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 9065 9066static unsigned int 9067execute_expand_omp (void) 9068{ 9069 build_omp_regions (); 9070 9071 if (!root_omp_region) 9072 return 0; 9073 9074 if (dump_file) 9075 { 9076 fprintf (dump_file, "\nOMP region tree\n\n"); 9077 dump_omp_region (dump_file, root_omp_region, 0); 9078 fprintf (dump_file, "\n"); 9079 } 9080 9081 remove_exit_barriers (root_omp_region); 9082 9083 expand_omp (root_omp_region); 9084 9085 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 9086 verify_loop_structure (); 9087 cleanup_tree_cfg (); 9088 9089 omp_free_regions (); 9090 9091 return 0; 9092} 9093 9094/* OMP expansion -- the default pass, run before creation of SSA form. */ 9095 9096namespace { 9097 9098const pass_data pass_data_expand_omp = 9099{ 9100 GIMPLE_PASS, /* type */ 9101 "ompexp", /* name */ 9102 OPTGROUP_OMP, /* optinfo_flags */ 9103 TV_NONE, /* tv_id */ 9104 PROP_gimple_any, /* properties_required */ 9105 PROP_gimple_eomp, /* properties_provided */ 9106 0, /* properties_destroyed */ 9107 0, /* todo_flags_start */ 9108 0, /* todo_flags_finish */ 9109}; 9110 9111class pass_expand_omp : public gimple_opt_pass 9112{ 9113public: 9114 pass_expand_omp (gcc::context *ctxt) 9115 : gimple_opt_pass (pass_data_expand_omp, ctxt) 9116 {} 9117 9118 /* opt_pass methods: */ 9119 virtual unsigned int execute (function *) 9120 { 9121 bool gate = ((flag_openacc != 0 || flag_openmp != 0 9122 || flag_openmp_simd != 0) 9123 && !seen_error ()); 9124 9125 /* This pass always runs, to provide PROP_gimple_eomp. 9126 But often, there is nothing to do. */ 9127 if (!gate) 9128 return 0; 9129 9130 return execute_expand_omp (); 9131 } 9132 9133}; // class pass_expand_omp 9134 9135} // anon namespace 9136 9137gimple_opt_pass * 9138make_pass_expand_omp (gcc::context *ctxt) 9139{ 9140 return new pass_expand_omp (ctxt); 9141} 9142 9143namespace { 9144 9145const pass_data pass_data_expand_omp_ssa = 9146{ 9147 GIMPLE_PASS, /* type */ 9148 "ompexpssa", /* name */ 9149 OPTGROUP_OMP, /* optinfo_flags */ 9150 TV_NONE, /* tv_id */ 9151 PROP_cfg | PROP_ssa, /* properties_required */ 9152 PROP_gimple_eomp, /* properties_provided */ 9153 0, /* properties_destroyed */ 9154 0, /* todo_flags_start */ 9155 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 9156}; 9157 9158class pass_expand_omp_ssa : public gimple_opt_pass 9159{ 9160public: 9161 pass_expand_omp_ssa (gcc::context *ctxt) 9162 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 9163 {} 9164 9165 /* opt_pass methods: */ 9166 virtual bool gate (function *fun) 9167 { 9168 return !(fun->curr_properties & PROP_gimple_eomp); 9169 } 9170 virtual unsigned int execute (function *) { return execute_expand_omp (); } 9171 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 9172 9173}; // class pass_expand_omp_ssa 9174 9175} // anon namespace 9176 9177gimple_opt_pass * 9178make_pass_expand_omp_ssa (gcc::context *ctxt) 9179{ 9180 return new pass_expand_omp_ssa (ctxt); 9181} 9182 9183/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 9184 GIMPLE_* codes. */ 9185 9186bool 9187omp_make_gimple_edges (basic_block bb, struct omp_region **region, 9188 int *region_idx) 9189{ 9190 gimple *last = last_stmt (bb); 9191 enum gimple_code code = gimple_code (last); 9192 struct omp_region *cur_region = *region; 9193 bool fallthru = false; 9194 9195 switch (code) 9196 { 9197 case GIMPLE_OMP_PARALLEL: 9198 case GIMPLE_OMP_FOR: 9199 case GIMPLE_OMP_SINGLE: 9200 case GIMPLE_OMP_TEAMS: 9201 case GIMPLE_OMP_MASTER: 9202 case GIMPLE_OMP_CRITICAL: 9203 case GIMPLE_OMP_SECTION: 9204 case GIMPLE_OMP_GRID_BODY: 9205 cur_region = new_omp_region (bb, code, cur_region); 9206 fallthru = true; 9207 break; 9208 9209 case GIMPLE_OMP_TASKGROUP: 9210 cur_region = new_omp_region (bb, code, cur_region); 9211 fallthru = true; 9212 cur_region = cur_region->outer; 9213 break; 9214 9215 case GIMPLE_OMP_TASK: 9216 cur_region = new_omp_region (bb, code, cur_region); 9217 fallthru = true; 9218 if (gimple_omp_task_taskwait_p (last)) 9219 cur_region = cur_region->outer; 9220 break; 9221 9222 case GIMPLE_OMP_ORDERED: 9223 cur_region = new_omp_region (bb, code, cur_region); 9224 fallthru = true; 9225 if (omp_find_clause (gimple_omp_ordered_clauses 9226 (as_a <gomp_ordered *> (last)), 9227 OMP_CLAUSE_DEPEND)) 9228 cur_region = cur_region->outer; 9229 break; 9230 9231 case GIMPLE_OMP_TARGET: 9232 cur_region = new_omp_region (bb, code, cur_region); 9233 fallthru = true; 9234 switch (gimple_omp_target_kind (last)) 9235 { 9236 case GF_OMP_TARGET_KIND_REGION: 9237 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 9238 case GF_OMP_TARGET_KIND_OACC_KERNELS: 9239 case GF_OMP_TARGET_KIND_OACC_SERIAL: 9240 break; 9241 case GF_OMP_TARGET_KIND_UPDATE: 9242 case GF_OMP_TARGET_KIND_ENTER_DATA: 9243 case GF_OMP_TARGET_KIND_EXIT_DATA: 9244 case GF_OMP_TARGET_KIND_DATA: 9245 case GF_OMP_TARGET_KIND_OACC_DATA: 9246 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 9247 case GF_OMP_TARGET_KIND_OACC_UPDATE: 9248 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 9249 case GF_OMP_TARGET_KIND_OACC_DECLARE: 9250 cur_region = cur_region->outer; 9251 break; 9252 default: 9253 gcc_unreachable (); 9254 } 9255 break; 9256 9257 case GIMPLE_OMP_SECTIONS: 9258 cur_region = new_omp_region (bb, code, cur_region); 9259 fallthru = true; 9260 break; 9261 9262 case GIMPLE_OMP_SECTIONS_SWITCH: 9263 fallthru = false; 9264 break; 9265 9266 case GIMPLE_OMP_ATOMIC_LOAD: 9267 case GIMPLE_OMP_ATOMIC_STORE: 9268 fallthru = true; 9269 break; 9270 9271 case GIMPLE_OMP_RETURN: 9272 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 9273 somewhere other than the next block. This will be 9274 created later. */ 9275 cur_region->exit = bb; 9276 if (cur_region->type == GIMPLE_OMP_TASK) 9277 /* Add an edge corresponding to not scheduling the task 9278 immediately. */ 9279 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 9280 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 9281 cur_region = cur_region->outer; 9282 break; 9283 9284 case GIMPLE_OMP_CONTINUE: 9285 cur_region->cont = bb; 9286 switch (cur_region->type) 9287 { 9288 case GIMPLE_OMP_FOR: 9289 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 9290 succs edges as abnormal to prevent splitting 9291 them. */ 9292 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 9293 /* Make the loopback edge. */ 9294 make_edge (bb, single_succ (cur_region->entry), 9295 EDGE_ABNORMAL); 9296 9297 /* Create an edge from GIMPLE_OMP_FOR to exit, which 9298 corresponds to the case that the body of the loop 9299 is not executed at all. */ 9300 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 9301 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 9302 fallthru = false; 9303 break; 9304 9305 case GIMPLE_OMP_SECTIONS: 9306 /* Wire up the edges into and out of the nested sections. */ 9307 { 9308 basic_block switch_bb = single_succ (cur_region->entry); 9309 9310 struct omp_region *i; 9311 for (i = cur_region->inner; i ; i = i->next) 9312 { 9313 gcc_assert (i->type == GIMPLE_OMP_SECTION); 9314 make_edge (switch_bb, i->entry, 0); 9315 make_edge (i->exit, bb, EDGE_FALLTHRU); 9316 } 9317 9318 /* Make the loopback edge to the block with 9319 GIMPLE_OMP_SECTIONS_SWITCH. */ 9320 make_edge (bb, switch_bb, 0); 9321 9322 /* Make the edge from the switch to exit. */ 9323 make_edge (switch_bb, bb->next_bb, 0); 9324 fallthru = false; 9325 } 9326 break; 9327 9328 case GIMPLE_OMP_TASK: 9329 fallthru = true; 9330 break; 9331 9332 default: 9333 gcc_unreachable (); 9334 } 9335 break; 9336 9337 default: 9338 gcc_unreachable (); 9339 } 9340 9341 if (*region != cur_region) 9342 { 9343 *region = cur_region; 9344 if (cur_region) 9345 *region_idx = cur_region->entry->index; 9346 else 9347 *region_idx = 0; 9348 } 9349 9350 return fallthru; 9351} 9352 9353#include "gt-omp-expand.h" 9354