omp-expand.c revision 1.3
1/* Expansion pass for OMP directives. Outlines regions of certain OMP 2 directives to separate functions, converts others into explicit calls to the 3 runtime library (libgomp) and so forth 4 5Copyright (C) 2005-2018 Free Software Foundation, Inc. 6 7This file is part of GCC. 8 9GCC is free software; you can redistribute it and/or modify it under 10the terms of the GNU General Public License as published by the Free 11Software Foundation; either version 3, or (at your option) any later 12version. 13 14GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15WARRANTY; without even the implied warranty of MERCHANTABILITY or 16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17for more details. 18 19You should have received a copy of the GNU General Public License 20along with GCC; see the file COPYING3. If not see 21<http://www.gnu.org/licenses/>. */ 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "memmodel.h" 27#include "backend.h" 28#include "target.h" 29#include "rtl.h" 30#include "tree.h" 31#include "gimple.h" 32#include "cfghooks.h" 33#include "tree-pass.h" 34#include "ssa.h" 35#include "optabs.h" 36#include "cgraph.h" 37#include "pretty-print.h" 38#include "diagnostic-core.h" 39#include "fold-const.h" 40#include "stor-layout.h" 41#include "cfganal.h" 42#include "internal-fn.h" 43#include "gimplify.h" 44#include "gimple-iterator.h" 45#include "gimplify-me.h" 46#include "gimple-walk.h" 47#include "tree-cfg.h" 48#include "tree-into-ssa.h" 49#include "tree-ssa.h" 50#include "splay-tree.h" 51#include "cfgloop.h" 52#include "omp-general.h" 53#include "omp-offload.h" 54#include "tree-cfgcleanup.h" 55#include "symbol-summary.h" 56#include "gomp-constants.h" 57#include "gimple-pretty-print.h" 58#include "hsa-common.h" 59#include "stringpool.h" 60#include "attribs.h" 61 62/* OMP region information. Every parallel and workshare 63 directive is enclosed between two markers, the OMP_* directive 64 and a corresponding GIMPLE_OMP_RETURN statement. */ 65 66struct omp_region 67{ 68 /* The enclosing region. */ 69 struct omp_region *outer; 70 71 /* First child region. */ 72 struct omp_region *inner; 73 74 /* Next peer region. */ 75 struct omp_region *next; 76 77 /* Block containing the omp directive as its last stmt. */ 78 basic_block entry; 79 80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ 81 basic_block exit; 82 83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ 84 basic_block cont; 85 86 /* If this is a combined parallel+workshare region, this is a list 87 of additional arguments needed by the combined parallel+workshare 88 library call. */ 89 vec<tree, va_gc> *ws_args; 90 91 /* The code for the omp directive of this region. */ 92 enum gimple_code type; 93 94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ 95 enum omp_clause_schedule_kind sched_kind; 96 97 /* Schedule modifiers. */ 98 unsigned char sched_modifiers; 99 100 /* True if this is a combined parallel+workshare region. */ 101 bool is_combined_parallel; 102 103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has 104 a depend clause. */ 105 gomp_ordered *ord_stmt; 106}; 107 108static struct omp_region *root_omp_region; 109static bool omp_any_child_fn_dumped; 110 111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, 112 bool = false); 113static gphi *find_phi_with_arg_on_edge (tree, edge); 114static void expand_omp (struct omp_region *region); 115 116/* Return true if REGION is a combined parallel+workshare region. */ 117 118static inline bool 119is_combined_parallel (struct omp_region *region) 120{ 121 return region->is_combined_parallel; 122} 123 124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB 125 is the immediate dominator of PAR_ENTRY_BB, return true if there 126 are no data dependencies that would prevent expanding the parallel 127 directive at PAR_ENTRY_BB as a combined parallel+workshare region. 128 129 When expanding a combined parallel+workshare region, the call to 130 the child function may need additional arguments in the case of 131 GIMPLE_OMP_FOR regions. In some cases, these arguments are 132 computed out of variables passed in from the parent to the child 133 via 'struct .omp_data_s'. For instance: 134 135 #pragma omp parallel for schedule (guided, i * 4) 136 for (j ...) 137 138 Is lowered into: 139 140 # BLOCK 2 (PAR_ENTRY_BB) 141 .omp_data_o.i = i; 142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) 143 144 # BLOCK 3 (WS_ENTRY_BB) 145 .omp_data_i = &.omp_data_o; 146 D.1667 = .omp_data_i->i; 147 D.1598 = D.1667 * 4; 148 #pragma omp for schedule (guided, D.1598) 149 150 When we outline the parallel region, the call to the child function 151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but 152 that value is computed *after* the call site. So, in principle we 153 cannot do the transformation. 154 155 To see whether the code in WS_ENTRY_BB blocks the combined 156 parallel+workshare call, we collect all the variables used in the 157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any 158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined 159 call. 160 161 FIXME. If we had the SSA form built at this point, we could merely 162 hoist the code in block 3 into block 2 and be done with it. But at 163 this point we don't have dataflow information and though we could 164 hack something up here, it is really not worth the aggravation. */ 165 166static bool 167workshare_safe_to_combine_p (basic_block ws_entry_bb) 168{ 169 struct omp_for_data fd; 170 gimple *ws_stmt = last_stmt (ws_entry_bb); 171 172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 173 return true; 174 175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); 176 177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); 178 179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) 180 return false; 181 if (fd.iter_type != long_integer_type_node) 182 return false; 183 184 /* FIXME. We give up too easily here. If any of these arguments 185 are not constants, they will likely involve variables that have 186 been mapped into fields of .omp_data_s for sharing with the child 187 function. With appropriate data flow, it would be possible to 188 see through this. */ 189 if (!is_gimple_min_invariant (fd.loop.n1) 190 || !is_gimple_min_invariant (fd.loop.n2) 191 || !is_gimple_min_invariant (fd.loop.step) 192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) 193 return false; 194 195 return true; 196} 197 198/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier 199 presence (SIMD_SCHEDULE). */ 200 201static tree 202omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) 203{ 204 if (!simd_schedule) 205 return chunk_size; 206 207 poly_uint64 vf = omp_max_vf (); 208 if (known_eq (vf, 1U)) 209 return chunk_size; 210 211 tree type = TREE_TYPE (chunk_size); 212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, 213 build_int_cst (type, vf - 1)); 214 return fold_build2 (BIT_AND_EXPR, type, chunk_size, 215 build_int_cst (type, -vf)); 216} 217 218/* Collect additional arguments needed to emit a combined 219 parallel+workshare call. WS_STMT is the workshare directive being 220 expanded. */ 221 222static vec<tree, va_gc> * 223get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) 224{ 225 tree t; 226 location_t loc = gimple_location (ws_stmt); 227 vec<tree, va_gc> *ws_args; 228 229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) 230 { 231 struct omp_for_data fd; 232 tree n1, n2; 233 234 omp_extract_for_data (for_stmt, &fd, NULL); 235 n1 = fd.loop.n1; 236 n2 = fd.loop.n2; 237 238 if (gimple_omp_for_combined_into_p (for_stmt)) 239 { 240 tree innerc 241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), 242 OMP_CLAUSE__LOOPTEMP_); 243 gcc_assert (innerc); 244 n1 = OMP_CLAUSE_DECL (innerc); 245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 246 OMP_CLAUSE__LOOPTEMP_); 247 gcc_assert (innerc); 248 n2 = OMP_CLAUSE_DECL (innerc); 249 } 250 251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); 252 253 t = fold_convert_loc (loc, long_integer_type_node, n1); 254 ws_args->quick_push (t); 255 256 t = fold_convert_loc (loc, long_integer_type_node, n2); 257 ws_args->quick_push (t); 258 259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); 260 ws_args->quick_push (t); 261 262 if (fd.chunk_size) 263 { 264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); 265 t = omp_adjust_chunk_size (t, fd.simd_schedule); 266 ws_args->quick_push (t); 267 } 268 269 return ws_args; 270 } 271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) 272 { 273 /* Number of sections is equal to the number of edges from the 274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to 275 the exit of the sections region. */ 276 basic_block bb = single_succ (gimple_bb (ws_stmt)); 277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); 278 vec_alloc (ws_args, 1); 279 ws_args->quick_push (t); 280 return ws_args; 281 } 282 283 gcc_unreachable (); 284} 285 286/* Discover whether REGION is a combined parallel+workshare region. */ 287 288static void 289determine_parallel_type (struct omp_region *region) 290{ 291 basic_block par_entry_bb, par_exit_bb; 292 basic_block ws_entry_bb, ws_exit_bb; 293 294 if (region == NULL || region->inner == NULL 295 || region->exit == NULL || region->inner->exit == NULL 296 || region->inner->cont == NULL) 297 return; 298 299 /* We only support parallel+for and parallel+sections. */ 300 if (region->type != GIMPLE_OMP_PARALLEL 301 || (region->inner->type != GIMPLE_OMP_FOR 302 && region->inner->type != GIMPLE_OMP_SECTIONS)) 303 return; 304 305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and 306 WS_EXIT_BB -> PAR_EXIT_BB. */ 307 par_entry_bb = region->entry; 308 par_exit_bb = region->exit; 309 ws_entry_bb = region->inner->entry; 310 ws_exit_bb = region->inner->exit; 311 312 if (single_succ (par_entry_bb) == ws_entry_bb 313 && single_succ (ws_exit_bb) == par_exit_bb 314 && workshare_safe_to_combine_p (ws_entry_bb) 315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) 316 || (last_and_only_stmt (ws_entry_bb) 317 && last_and_only_stmt (par_exit_bb)))) 318 { 319 gimple *par_stmt = last_stmt (par_entry_bb); 320 gimple *ws_stmt = last_stmt (ws_entry_bb); 321 322 if (region->inner->type == GIMPLE_OMP_FOR) 323 { 324 /* If this is a combined parallel loop, we need to determine 325 whether or not to use the combined library calls. There 326 are two cases where we do not apply the transformation: 327 static loops and any kind of ordered loop. In the first 328 case, we already open code the loop so there is no need 329 to do anything else. In the latter case, the combined 330 parallel loop call would still need extra synchronization 331 to implement ordered semantics, so there would not be any 332 gain in using the combined call. */ 333 tree clauses = gimple_omp_for_clauses (ws_stmt); 334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); 335 if (c == NULL 336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) 337 == OMP_CLAUSE_SCHEDULE_STATIC) 338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) 339 { 340 region->is_combined_parallel = false; 341 region->inner->is_combined_parallel = false; 342 return; 343 } 344 } 345 346 region->is_combined_parallel = true; 347 region->inner->is_combined_parallel = true; 348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt); 349 } 350} 351 352/* Debugging dumps for parallel regions. */ 353void dump_omp_region (FILE *, struct omp_region *, int); 354void debug_omp_region (struct omp_region *); 355void debug_all_omp_regions (void); 356 357/* Dump the parallel region tree rooted at REGION. */ 358 359void 360dump_omp_region (FILE *file, struct omp_region *region, int indent) 361{ 362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, 363 gimple_code_name[region->type]); 364 365 if (region->inner) 366 dump_omp_region (file, region->inner, indent + 4); 367 368 if (region->cont) 369 { 370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", 371 region->cont->index); 372 } 373 374 if (region->exit) 375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", 376 region->exit->index); 377 else 378 fprintf (file, "%*s[no exit marker]\n", indent, ""); 379 380 if (region->next) 381 dump_omp_region (file, region->next, indent); 382} 383 384DEBUG_FUNCTION void 385debug_omp_region (struct omp_region *region) 386{ 387 dump_omp_region (stderr, region, 0); 388} 389 390DEBUG_FUNCTION void 391debug_all_omp_regions (void) 392{ 393 dump_omp_region (stderr, root_omp_region, 0); 394} 395 396/* Create a new parallel region starting at STMT inside region PARENT. */ 397 398static struct omp_region * 399new_omp_region (basic_block bb, enum gimple_code type, 400 struct omp_region *parent) 401{ 402 struct omp_region *region = XCNEW (struct omp_region); 403 404 region->outer = parent; 405 region->entry = bb; 406 region->type = type; 407 408 if (parent) 409 { 410 /* This is a nested region. Add it to the list of inner 411 regions in PARENT. */ 412 region->next = parent->inner; 413 parent->inner = region; 414 } 415 else 416 { 417 /* This is a toplevel region. Add it to the list of toplevel 418 regions in ROOT_OMP_REGION. */ 419 region->next = root_omp_region; 420 root_omp_region = region; 421 } 422 423 return region; 424} 425 426/* Release the memory associated with the region tree rooted at REGION. */ 427 428static void 429free_omp_region_1 (struct omp_region *region) 430{ 431 struct omp_region *i, *n; 432 433 for (i = region->inner; i ; i = n) 434 { 435 n = i->next; 436 free_omp_region_1 (i); 437 } 438 439 free (region); 440} 441 442/* Release the memory for the entire omp region tree. */ 443 444void 445omp_free_regions (void) 446{ 447 struct omp_region *r, *n; 448 for (r = root_omp_region; r ; r = n) 449 { 450 n = r->next; 451 free_omp_region_1 (r); 452 } 453 root_omp_region = NULL; 454} 455 456/* A convenience function to build an empty GIMPLE_COND with just the 457 condition. */ 458 459static gcond * 460gimple_build_cond_empty (tree cond) 461{ 462 enum tree_code pred_code; 463 tree lhs, rhs; 464 465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); 466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); 467} 468 469/* Return true if a parallel REGION is within a declare target function or 470 within a target region and is not a part of a gridified target. */ 471 472static bool 473parallel_needs_hsa_kernel_p (struct omp_region *region) 474{ 475 bool indirect = false; 476 for (region = region->outer; region; region = region->outer) 477 { 478 if (region->type == GIMPLE_OMP_PARALLEL) 479 indirect = true; 480 else if (region->type == GIMPLE_OMP_TARGET) 481 { 482 gomp_target *tgt_stmt 483 = as_a <gomp_target *> (last_stmt (region->entry)); 484 485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 486 OMP_CLAUSE__GRIDDIM_)) 487 return indirect; 488 else 489 return true; 490 } 491 } 492 493 if (lookup_attribute ("omp declare target", 494 DECL_ATTRIBUTES (current_function_decl))) 495 return true; 496 497 return false; 498} 499 500/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. 501 Add CHILD_FNDECL to decl chain of the supercontext of the block 502 ENTRY_BLOCK - this is the block which originally contained the 503 code from which CHILD_FNDECL was created. 504 505 Together, these actions ensure that the debug info for the outlined 506 function will be emitted with the correct lexical scope. */ 507 508static void 509adjust_context_and_scope (struct omp_region *region, tree entry_block, 510 tree child_fndecl) 511{ 512 tree parent_fndecl = NULL_TREE; 513 gimple *entry_stmt; 514 /* OMP expansion expands inner regions before outer ones, so if 515 we e.g. have explicit task region nested in parallel region, when 516 expanding the task region current_function_decl will be the original 517 source function, but we actually want to use as context the child 518 function of the parallel. */ 519 for (region = region->outer; 520 region && parent_fndecl == NULL_TREE; region = region->outer) 521 switch (region->type) 522 { 523 case GIMPLE_OMP_PARALLEL: 524 case GIMPLE_OMP_TASK: 525 entry_stmt = last_stmt (region->entry); 526 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); 527 break; 528 case GIMPLE_OMP_TARGET: 529 entry_stmt = last_stmt (region->entry); 530 parent_fndecl 531 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); 532 break; 533 default: 534 break; 535 } 536 537 if (parent_fndecl == NULL_TREE) 538 parent_fndecl = current_function_decl; 539 DECL_CONTEXT (child_fndecl) = parent_fndecl; 540 541 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) 542 { 543 tree b = BLOCK_SUPERCONTEXT (entry_block); 544 if (TREE_CODE (b) == BLOCK) 545 { 546 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); 547 BLOCK_VARS (b) = child_fndecl; 548 } 549 } 550} 551 552/* Build the function calls to GOMP_parallel_start etc to actually 553 generate the parallel operation. REGION is the parallel region 554 being expanded. BB is the block where to insert the code. WS_ARGS 555 will be set if this is a call to a combined parallel+workshare 556 construct, it contains the list of additional arguments needed by 557 the workshare construct. */ 558 559static void 560expand_parallel_call (struct omp_region *region, basic_block bb, 561 gomp_parallel *entry_stmt, 562 vec<tree, va_gc> *ws_args) 563{ 564 tree t, t1, t2, val, cond, c, clauses, flags; 565 gimple_stmt_iterator gsi; 566 gimple *stmt; 567 enum built_in_function start_ix; 568 int start_ix2; 569 location_t clause_loc; 570 vec<tree, va_gc> *args; 571 572 clauses = gimple_omp_parallel_clauses (entry_stmt); 573 574 /* Determine what flavor of GOMP_parallel we will be 575 emitting. */ 576 start_ix = BUILT_IN_GOMP_PARALLEL; 577 if (is_combined_parallel (region)) 578 { 579 switch (region->inner->type) 580 { 581 case GIMPLE_OMP_FOR: 582 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 583 switch (region->inner->sched_kind) 584 { 585 case OMP_CLAUSE_SCHEDULE_RUNTIME: 586 start_ix2 = 3; 587 break; 588 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 589 case OMP_CLAUSE_SCHEDULE_GUIDED: 590 if (region->inner->sched_modifiers 591 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 592 { 593 start_ix2 = 3 + region->inner->sched_kind; 594 break; 595 } 596 /* FALLTHRU */ 597 default: 598 start_ix2 = region->inner->sched_kind; 599 break; 600 } 601 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; 602 start_ix = (enum built_in_function) start_ix2; 603 break; 604 case GIMPLE_OMP_SECTIONS: 605 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; 606 break; 607 default: 608 gcc_unreachable (); 609 } 610 } 611 612 /* By default, the value of NUM_THREADS is zero (selected at run time) 613 and there is no conditional. */ 614 cond = NULL_TREE; 615 val = build_int_cst (unsigned_type_node, 0); 616 flags = build_int_cst (unsigned_type_node, 0); 617 618 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 619 if (c) 620 cond = OMP_CLAUSE_IF_EXPR (c); 621 622 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); 623 if (c) 624 { 625 val = OMP_CLAUSE_NUM_THREADS_EXPR (c); 626 clause_loc = OMP_CLAUSE_LOCATION (c); 627 } 628 else 629 clause_loc = gimple_location (entry_stmt); 630 631 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); 632 if (c) 633 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); 634 635 /* Ensure 'val' is of the correct type. */ 636 val = fold_convert_loc (clause_loc, unsigned_type_node, val); 637 638 /* If we found the clause 'if (cond)', build either 639 (cond != 0) or (cond ? val : 1u). */ 640 if (cond) 641 { 642 cond = gimple_boolify (cond); 643 644 if (integer_zerop (val)) 645 val = fold_build2_loc (clause_loc, 646 EQ_EXPR, unsigned_type_node, cond, 647 build_int_cst (TREE_TYPE (cond), 0)); 648 else 649 { 650 basic_block cond_bb, then_bb, else_bb; 651 edge e, e_then, e_else; 652 tree tmp_then, tmp_else, tmp_join, tmp_var; 653 654 tmp_var = create_tmp_var (TREE_TYPE (val)); 655 if (gimple_in_ssa_p (cfun)) 656 { 657 tmp_then = make_ssa_name (tmp_var); 658 tmp_else = make_ssa_name (tmp_var); 659 tmp_join = make_ssa_name (tmp_var); 660 } 661 else 662 { 663 tmp_then = tmp_var; 664 tmp_else = tmp_var; 665 tmp_join = tmp_var; 666 } 667 668 e = split_block_after_labels (bb); 669 cond_bb = e->src; 670 bb = e->dest; 671 remove_edge (e); 672 673 then_bb = create_empty_bb (cond_bb); 674 else_bb = create_empty_bb (then_bb); 675 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 676 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 677 678 stmt = gimple_build_cond_empty (cond); 679 gsi = gsi_start_bb (cond_bb); 680 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 681 682 gsi = gsi_start_bb (then_bb); 683 expand_omp_build_assign (&gsi, tmp_then, val, true); 684 685 gsi = gsi_start_bb (else_bb); 686 expand_omp_build_assign (&gsi, tmp_else, 687 build_int_cst (unsigned_type_node, 1), 688 true); 689 690 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 691 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 692 add_bb_to_loop (then_bb, cond_bb->loop_father); 693 add_bb_to_loop (else_bb, cond_bb->loop_father); 694 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); 695 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); 696 697 if (gimple_in_ssa_p (cfun)) 698 { 699 gphi *phi = create_phi_node (tmp_join, bb); 700 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); 701 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); 702 } 703 704 val = tmp_join; 705 } 706 707 gsi = gsi_start_bb (bb); 708 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, 709 false, GSI_CONTINUE_LINKING); 710 } 711 712 gsi = gsi_last_nondebug_bb (bb); 713 t = gimple_omp_parallel_data_arg (entry_stmt); 714 if (t == NULL) 715 t1 = null_pointer_node; 716 else 717 t1 = build_fold_addr_expr (t); 718 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); 719 t2 = build_fold_addr_expr (child_fndecl); 720 721 vec_alloc (args, 4 + vec_safe_length (ws_args)); 722 args->quick_push (t2); 723 args->quick_push (t1); 724 args->quick_push (val); 725 if (ws_args) 726 args->splice (*ws_args); 727 args->quick_push (flags); 728 729 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, 730 builtin_decl_explicit (start_ix), args); 731 732 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 733 false, GSI_CONTINUE_LINKING); 734 735 if (hsa_gen_requested_p () 736 && parallel_needs_hsa_kernel_p (region)) 737 { 738 cgraph_node *child_cnode = cgraph_node::get (child_fndecl); 739 hsa_register_kernel (child_cnode); 740 } 741} 742 743/* Build the function call to GOMP_task to actually 744 generate the task operation. BB is the block where to insert the code. */ 745 746static void 747expand_task_call (struct omp_region *region, basic_block bb, 748 gomp_task *entry_stmt) 749{ 750 tree t1, t2, t3; 751 gimple_stmt_iterator gsi; 752 location_t loc = gimple_location (entry_stmt); 753 754 tree clauses = gimple_omp_task_clauses (entry_stmt); 755 756 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); 757 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); 758 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); 759 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 760 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); 761 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); 762 763 unsigned int iflags 764 = (untied ? GOMP_TASK_FLAG_UNTIED : 0) 765 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) 766 | (depend ? GOMP_TASK_FLAG_DEPEND : 0); 767 768 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); 769 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; 770 tree num_tasks = NULL_TREE; 771 bool ull = false; 772 if (taskloop_p) 773 { 774 gimple *g = last_stmt (region->outer->entry); 775 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR 776 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); 777 struct omp_for_data fd; 778 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); 779 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 780 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), 781 OMP_CLAUSE__LOOPTEMP_); 782 startvar = OMP_CLAUSE_DECL (startvar); 783 endvar = OMP_CLAUSE_DECL (endvar); 784 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); 785 if (fd.loop.cond_code == LT_EXPR) 786 iflags |= GOMP_TASK_FLAG_UP; 787 tree tclauses = gimple_omp_for_clauses (g); 788 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); 789 if (num_tasks) 790 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); 791 else 792 { 793 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); 794 if (num_tasks) 795 { 796 iflags |= GOMP_TASK_FLAG_GRAINSIZE; 797 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); 798 } 799 else 800 num_tasks = integer_zero_node; 801 } 802 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); 803 if (ifc == NULL_TREE) 804 iflags |= GOMP_TASK_FLAG_IF; 805 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) 806 iflags |= GOMP_TASK_FLAG_NOGROUP; 807 ull = fd.iter_type == long_long_unsigned_type_node; 808 } 809 else if (priority) 810 iflags |= GOMP_TASK_FLAG_PRIORITY; 811 812 tree flags = build_int_cst (unsigned_type_node, iflags); 813 814 tree cond = boolean_true_node; 815 if (ifc) 816 { 817 if (taskloop_p) 818 { 819 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 820 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 821 build_int_cst (unsigned_type_node, 822 GOMP_TASK_FLAG_IF), 823 build_int_cst (unsigned_type_node, 0)); 824 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, 825 flags, t); 826 } 827 else 828 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); 829 } 830 831 if (finalc) 832 { 833 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); 834 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, 835 build_int_cst (unsigned_type_node, 836 GOMP_TASK_FLAG_FINAL), 837 build_int_cst (unsigned_type_node, 0)); 838 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); 839 } 840 if (depend) 841 depend = OMP_CLAUSE_DECL (depend); 842 else 843 depend = build_int_cst (ptr_type_node, 0); 844 if (priority) 845 priority = fold_convert (integer_type_node, 846 OMP_CLAUSE_PRIORITY_EXPR (priority)); 847 else 848 priority = integer_zero_node; 849 850 gsi = gsi_last_nondebug_bb (bb); 851 tree t = gimple_omp_task_data_arg (entry_stmt); 852 if (t == NULL) 853 t2 = null_pointer_node; 854 else 855 t2 = build_fold_addr_expr_loc (loc, t); 856 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); 857 t = gimple_omp_task_copy_fn (entry_stmt); 858 if (t == NULL) 859 t3 = null_pointer_node; 860 else 861 t3 = build_fold_addr_expr_loc (loc, t); 862 863 if (taskloop_p) 864 t = build_call_expr (ull 865 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) 866 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), 867 11, t1, t2, t3, 868 gimple_omp_task_arg_size (entry_stmt), 869 gimple_omp_task_arg_align (entry_stmt), flags, 870 num_tasks, priority, startvar, endvar, step); 871 else 872 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), 873 9, t1, t2, t3, 874 gimple_omp_task_arg_size (entry_stmt), 875 gimple_omp_task_arg_align (entry_stmt), cond, flags, 876 depend, priority); 877 878 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 879 false, GSI_CONTINUE_LINKING); 880} 881 882/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ 883 884static tree 885vec2chain (vec<tree, va_gc> *v) 886{ 887 tree chain = NULL_TREE, t; 888 unsigned ix; 889 890 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) 891 { 892 DECL_CHAIN (t) = chain; 893 chain = t; 894 } 895 896 return chain; 897} 898 899/* Remove barriers in REGION->EXIT's block. Note that this is only 900 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region 901 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that 902 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be 903 removed. */ 904 905static void 906remove_exit_barrier (struct omp_region *region) 907{ 908 gimple_stmt_iterator gsi; 909 basic_block exit_bb; 910 edge_iterator ei; 911 edge e; 912 gimple *stmt; 913 int any_addressable_vars = -1; 914 915 exit_bb = region->exit; 916 917 /* If the parallel region doesn't return, we don't have REGION->EXIT 918 block at all. */ 919 if (! exit_bb) 920 return; 921 922 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The 923 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of 924 statements that can appear in between are extremely limited -- no 925 memory operations at all. Here, we allow nothing at all, so the 926 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ 927 gsi = gsi_last_nondebug_bb (exit_bb); 928 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 929 gsi_prev_nondebug (&gsi); 930 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) 931 return; 932 933 FOR_EACH_EDGE (e, ei, exit_bb->preds) 934 { 935 gsi = gsi_last_nondebug_bb (e->src); 936 if (gsi_end_p (gsi)) 937 continue; 938 stmt = gsi_stmt (gsi); 939 if (gimple_code (stmt) == GIMPLE_OMP_RETURN 940 && !gimple_omp_return_nowait_p (stmt)) 941 { 942 /* OpenMP 3.0 tasks unfortunately prevent this optimization 943 in many cases. If there could be tasks queued, the barrier 944 might be needed to let the tasks run before some local 945 variable of the parallel that the task uses as shared 946 runs out of scope. The task can be spawned either 947 from within current function (this would be easy to check) 948 or from some function it calls and gets passed an address 949 of such a variable. */ 950 if (any_addressable_vars < 0) 951 { 952 gomp_parallel *parallel_stmt 953 = as_a <gomp_parallel *> (last_stmt (region->entry)); 954 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); 955 tree local_decls, block, decl; 956 unsigned ix; 957 958 any_addressable_vars = 0; 959 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) 960 if (TREE_ADDRESSABLE (decl)) 961 { 962 any_addressable_vars = 1; 963 break; 964 } 965 for (block = gimple_block (stmt); 966 !any_addressable_vars 967 && block 968 && TREE_CODE (block) == BLOCK; 969 block = BLOCK_SUPERCONTEXT (block)) 970 { 971 for (local_decls = BLOCK_VARS (block); 972 local_decls; 973 local_decls = DECL_CHAIN (local_decls)) 974 if (TREE_ADDRESSABLE (local_decls)) 975 { 976 any_addressable_vars = 1; 977 break; 978 } 979 if (block == gimple_block (parallel_stmt)) 980 break; 981 } 982 } 983 if (!any_addressable_vars) 984 gimple_omp_return_set_nowait (stmt); 985 } 986 } 987} 988 989static void 990remove_exit_barriers (struct omp_region *region) 991{ 992 if (region->type == GIMPLE_OMP_PARALLEL) 993 remove_exit_barrier (region); 994 995 if (region->inner) 996 { 997 region = region->inner; 998 remove_exit_barriers (region); 999 while (region->next) 1000 { 1001 region = region->next; 1002 remove_exit_barriers (region); 1003 } 1004 } 1005} 1006 1007/* Optimize omp_get_thread_num () and omp_get_num_threads () 1008 calls. These can't be declared as const functions, but 1009 within one parallel body they are constant, so they can be 1010 transformed there into __builtin_omp_get_{thread_num,num_threads} () 1011 which are declared const. Similarly for task body, except 1012 that in untied task omp_get_thread_num () can change at any task 1013 scheduling point. */ 1014 1015static void 1016optimize_omp_library_calls (gimple *entry_stmt) 1017{ 1018 basic_block bb; 1019 gimple_stmt_iterator gsi; 1020 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1021 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); 1022 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1023 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); 1024 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK 1025 && omp_find_clause (gimple_omp_task_clauses (entry_stmt), 1026 OMP_CLAUSE_UNTIED) != NULL); 1027 1028 FOR_EACH_BB_FN (bb, cfun) 1029 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1030 { 1031 gimple *call = gsi_stmt (gsi); 1032 tree decl; 1033 1034 if (is_gimple_call (call) 1035 && (decl = gimple_call_fndecl (call)) 1036 && DECL_EXTERNAL (decl) 1037 && TREE_PUBLIC (decl) 1038 && DECL_INITIAL (decl) == NULL) 1039 { 1040 tree built_in; 1041 1042 if (DECL_NAME (decl) == thr_num_id) 1043 { 1044 /* In #pragma omp task untied omp_get_thread_num () can change 1045 during the execution of the task region. */ 1046 if (untied_task) 1047 continue; 1048 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 1049 } 1050 else if (DECL_NAME (decl) == num_thr_id) 1051 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 1052 else 1053 continue; 1054 1055 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) 1056 || gimple_call_num_args (call) != 0) 1057 continue; 1058 1059 if (flag_exceptions && !TREE_NOTHROW (decl)) 1060 continue; 1061 1062 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE 1063 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), 1064 TREE_TYPE (TREE_TYPE (built_in)))) 1065 continue; 1066 1067 gimple_call_set_fndecl (call, built_in); 1068 } 1069 } 1070} 1071 1072/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be 1073 regimplified. */ 1074 1075static tree 1076expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) 1077{ 1078 tree t = *tp; 1079 1080 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ 1081 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) 1082 return t; 1083 1084 if (TREE_CODE (t) == ADDR_EXPR) 1085 recompute_tree_invariant_for_addr_expr (t); 1086 1087 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 1088 return NULL_TREE; 1089} 1090 1091/* Prepend or append TO = FROM assignment before or after *GSI_P. */ 1092 1093static void 1094expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, 1095 bool after) 1096{ 1097 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); 1098 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, 1099 !after, after ? GSI_CONTINUE_LINKING 1100 : GSI_SAME_STMT); 1101 gimple *stmt = gimple_build_assign (to, from); 1102 if (after) 1103 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); 1104 else 1105 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); 1106 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) 1107 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) 1108 { 1109 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 1110 gimple_regimplify_operands (stmt, &gsi); 1111 } 1112} 1113 1114/* Expand the OpenMP parallel or task directive starting at REGION. */ 1115 1116static void 1117expand_omp_taskreg (struct omp_region *region) 1118{ 1119 basic_block entry_bb, exit_bb, new_bb; 1120 struct function *child_cfun; 1121 tree child_fn, block, t; 1122 gimple_stmt_iterator gsi; 1123 gimple *entry_stmt, *stmt; 1124 edge e; 1125 vec<tree, va_gc> *ws_args; 1126 1127 entry_stmt = last_stmt (region->entry); 1128 child_fn = gimple_omp_taskreg_child_fn (entry_stmt); 1129 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 1130 1131 entry_bb = region->entry; 1132 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) 1133 exit_bb = region->cont; 1134 else 1135 exit_bb = region->exit; 1136 1137 if (is_combined_parallel (region)) 1138 ws_args = region->ws_args; 1139 else 1140 ws_args = NULL; 1141 1142 if (child_cfun->cfg) 1143 { 1144 /* Due to inlining, it may happen that we have already outlined 1145 the region, in which case all we need to do is make the 1146 sub-graph unreachable and emit the parallel call. */ 1147 edge entry_succ_e, exit_succ_e; 1148 1149 entry_succ_e = single_succ_edge (entry_bb); 1150 1151 gsi = gsi_last_nondebug_bb (entry_bb); 1152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL 1153 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); 1154 gsi_remove (&gsi, true); 1155 1156 new_bb = entry_bb; 1157 if (exit_bb) 1158 { 1159 exit_succ_e = single_succ_edge (exit_bb); 1160 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); 1161 } 1162 remove_edge_and_dominated_blocks (entry_succ_e); 1163 } 1164 else 1165 { 1166 unsigned srcidx, dstidx, num; 1167 1168 /* If the parallel region needs data sent from the parent 1169 function, then the very first statement (except possible 1170 tree profile counter updates) of the parallel body 1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 1172 &.OMP_DATA_O is passed as an argument to the child function, 1173 we need to replace it with the argument as seen by the child 1174 function. 1175 1176 In most cases, this will end up being the identity assignment 1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had 1178 a function call that has been inlined, the original PARM_DECL 1179 .OMP_DATA_I may have been converted into a different local 1180 variable. In which case, we need to keep the assignment. */ 1181 if (gimple_omp_taskreg_data_arg (entry_stmt)) 1182 { 1183 basic_block entry_succ_bb 1184 = single_succ_p (entry_bb) ? single_succ (entry_bb) 1185 : FALLTHRU_EDGE (entry_bb)->dest; 1186 tree arg; 1187 gimple *parcopy_stmt = NULL; 1188 1189 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 1190 { 1191 gimple *stmt; 1192 1193 gcc_assert (!gsi_end_p (gsi)); 1194 stmt = gsi_stmt (gsi); 1195 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1196 continue; 1197 1198 if (gimple_num_ops (stmt) == 2) 1199 { 1200 tree arg = gimple_assign_rhs1 (stmt); 1201 1202 /* We're ignore the subcode because we're 1203 effectively doing a STRIP_NOPS. */ 1204 1205 if (TREE_CODE (arg) == ADDR_EXPR 1206 && TREE_OPERAND (arg, 0) 1207 == gimple_omp_taskreg_data_arg (entry_stmt)) 1208 { 1209 parcopy_stmt = stmt; 1210 break; 1211 } 1212 } 1213 } 1214 1215 gcc_assert (parcopy_stmt != NULL); 1216 arg = DECL_ARGUMENTS (child_fn); 1217 1218 if (!gimple_in_ssa_p (cfun)) 1219 { 1220 if (gimple_assign_lhs (parcopy_stmt) == arg) 1221 gsi_remove (&gsi, true); 1222 else 1223 { 1224 /* ?? Is setting the subcode really necessary ?? */ 1225 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); 1226 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1227 } 1228 } 1229 else 1230 { 1231 tree lhs = gimple_assign_lhs (parcopy_stmt); 1232 gcc_assert (SSA_NAME_VAR (lhs) == arg); 1233 /* We'd like to set the rhs to the default def in the child_fn, 1234 but it's too early to create ssa names in the child_fn. 1235 Instead, we set the rhs to the parm. In 1236 move_sese_region_to_fn, we introduce a default def for the 1237 parm, map the parm to it's default def, and once we encounter 1238 this stmt, replace the parm with the default def. */ 1239 gimple_assign_set_rhs1 (parcopy_stmt, arg); 1240 update_stmt (parcopy_stmt); 1241 } 1242 } 1243 1244 /* Declare local variables needed in CHILD_CFUN. */ 1245 block = DECL_INITIAL (child_fn); 1246 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 1247 /* The gimplifier could record temporaries in parallel/task block 1248 rather than in containing function's local_decls chain, 1249 which would mean cgraph missed finalizing them. Do it now. */ 1250 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 1251 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 1252 varpool_node::finalize_decl (t); 1253 DECL_SAVED_TREE (child_fn) = NULL; 1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 1255 gimple_set_body (child_fn, NULL); 1256 TREE_USED (block) = 1; 1257 1258 /* Reset DECL_CONTEXT on function arguments. */ 1259 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 1260 DECL_CONTEXT (t) = child_fn; 1261 1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, 1263 so that it can be moved to the child function. */ 1264 gsi = gsi_last_nondebug_bb (entry_bb); 1265 stmt = gsi_stmt (gsi); 1266 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL 1267 || gimple_code (stmt) == GIMPLE_OMP_TASK)); 1268 e = split_block (entry_bb, stmt); 1269 gsi_remove (&gsi, true); 1270 entry_bb = e->dest; 1271 edge e2 = NULL; 1272 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1273 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 1274 else 1275 { 1276 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); 1277 gcc_assert (e2->dest == region->exit); 1278 remove_edge (BRANCH_EDGE (entry_bb)); 1279 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); 1280 gsi = gsi_last_nondebug_bb (region->exit); 1281 gcc_assert (!gsi_end_p (gsi) 1282 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 1283 gsi_remove (&gsi, true); 1284 } 1285 1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ 1287 if (exit_bb) 1288 { 1289 gsi = gsi_last_nondebug_bb (exit_bb); 1290 gcc_assert (!gsi_end_p (gsi) 1291 && (gimple_code (gsi_stmt (gsi)) 1292 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); 1293 stmt = gimple_build_return (NULL); 1294 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 1295 gsi_remove (&gsi, true); 1296 } 1297 1298 /* Move the parallel region into CHILD_CFUN. */ 1299 1300 if (gimple_in_ssa_p (cfun)) 1301 { 1302 init_tree_ssa (child_cfun); 1303 init_ssa_operands (child_cfun); 1304 child_cfun->gimple_df->in_ssa_p = true; 1305 block = NULL_TREE; 1306 } 1307 else 1308 block = gimple_block (entry_stmt); 1309 1310 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 1311 if (exit_bb) 1312 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 1313 if (e2) 1314 { 1315 basic_block dest_bb = e2->dest; 1316 if (!exit_bb) 1317 make_edge (new_bb, dest_bb, EDGE_FALLTHRU); 1318 remove_edge (e2); 1319 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); 1320 } 1321 /* When the OMP expansion process cannot guarantee an up-to-date 1322 loop tree arrange for the child function to fixup loops. */ 1323 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1324 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 1325 1326 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 1327 num = vec_safe_length (child_cfun->local_decls); 1328 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 1329 { 1330 t = (*child_cfun->local_decls)[srcidx]; 1331 if (DECL_CONTEXT (t) == cfun->decl) 1332 continue; 1333 if (srcidx != dstidx) 1334 (*child_cfun->local_decls)[dstidx] = t; 1335 dstidx++; 1336 } 1337 if (dstidx != num) 1338 vec_safe_truncate (child_cfun->local_decls, dstidx); 1339 1340 /* Inform the callgraph about the new function. */ 1341 child_cfun->curr_properties = cfun->curr_properties; 1342 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 1343 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 1344 cgraph_node *node = cgraph_node::get_create (child_fn); 1345 node->parallelized_function = 1; 1346 cgraph_node::add_new_function (child_fn, true); 1347 1348 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 1349 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 1350 1351 /* Fix the callgraph edges for child_cfun. Those for cfun will be 1352 fixed in a following pass. */ 1353 push_cfun (child_cfun); 1354 if (need_asm) 1355 assign_assembler_name_if_needed (child_fn); 1356 1357 if (optimize) 1358 optimize_omp_library_calls (entry_stmt); 1359 update_max_bb_count (); 1360 cgraph_edge::rebuild_edges (); 1361 1362 /* Some EH regions might become dead, see PR34608. If 1363 pass_cleanup_cfg isn't the first pass to happen with the 1364 new child, these dead EH edges might cause problems. 1365 Clean them up now. */ 1366 if (flag_exceptions) 1367 { 1368 basic_block bb; 1369 bool changed = false; 1370 1371 FOR_EACH_BB_FN (bb, cfun) 1372 changed |= gimple_purge_dead_eh_edges (bb); 1373 if (changed) 1374 cleanup_tree_cfg (); 1375 } 1376 if (gimple_in_ssa_p (cfun)) 1377 update_ssa (TODO_update_ssa); 1378 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 1379 verify_loop_structure (); 1380 pop_cfun (); 1381 1382 if (dump_file && !gimple_in_ssa_p (cfun)) 1383 { 1384 omp_any_child_fn_dumped = true; 1385 dump_function_header (dump_file, child_fn, dump_flags); 1386 dump_function_to_file (child_fn, dump_file, dump_flags); 1387 } 1388 } 1389 1390 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 1391 1392 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) 1393 expand_parallel_call (region, new_bb, 1394 as_a <gomp_parallel *> (entry_stmt), ws_args); 1395 else 1396 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); 1397 if (gimple_in_ssa_p (cfun)) 1398 update_ssa (TODO_update_ssa_only_virtuals); 1399} 1400 1401/* Information about members of an OpenACC collapsed loop nest. */ 1402 1403struct oacc_collapse 1404{ 1405 tree base; /* Base value. */ 1406 tree iters; /* Number of steps. */ 1407 tree step; /* Step size. */ 1408 tree tile; /* Tile increment (if tiled). */ 1409 tree outer; /* Tile iterator var. */ 1410}; 1411 1412/* Helper for expand_oacc_for. Determine collapsed loop information. 1413 Fill in COUNTS array. Emit any initialization code before GSI. 1414 Return the calculated outer loop bound of BOUND_TYPE. */ 1415 1416static tree 1417expand_oacc_collapse_init (const struct omp_for_data *fd, 1418 gimple_stmt_iterator *gsi, 1419 oacc_collapse *counts, tree bound_type, 1420 location_t loc) 1421{ 1422 tree tiling = fd->tiling; 1423 tree total = build_int_cst (bound_type, 1); 1424 int ix; 1425 1426 gcc_assert (integer_onep (fd->loop.step)); 1427 gcc_assert (integer_zerop (fd->loop.n1)); 1428 1429 /* When tiling, the first operand of the tile clause applies to the 1430 innermost loop, and we work outwards from there. Seems 1431 backwards, but whatever. */ 1432 for (ix = fd->collapse; ix--;) 1433 { 1434 const omp_for_data_loop *loop = &fd->loops[ix]; 1435 1436 tree iter_type = TREE_TYPE (loop->v); 1437 tree diff_type = iter_type; 1438 tree plus_type = iter_type; 1439 1440 gcc_assert (loop->cond_code == fd->loop.cond_code); 1441 1442 if (POINTER_TYPE_P (iter_type)) 1443 plus_type = sizetype; 1444 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 1445 diff_type = signed_type_for (diff_type); 1446 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 1447 diff_type = integer_type_node; 1448 1449 if (tiling) 1450 { 1451 tree num = build_int_cst (integer_type_node, fd->collapse); 1452 tree loop_no = build_int_cst (integer_type_node, ix); 1453 tree tile = TREE_VALUE (tiling); 1454 gcall *call 1455 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, 1456 /* gwv-outer=*/integer_zero_node, 1457 /* gwv-inner=*/integer_zero_node); 1458 1459 counts[ix].outer = create_tmp_var (iter_type, ".outer"); 1460 counts[ix].tile = create_tmp_var (diff_type, ".tile"); 1461 gimple_call_set_lhs (call, counts[ix].tile); 1462 gimple_set_location (call, loc); 1463 gsi_insert_before (gsi, call, GSI_SAME_STMT); 1464 1465 tiling = TREE_CHAIN (tiling); 1466 } 1467 else 1468 { 1469 counts[ix].tile = NULL; 1470 counts[ix].outer = loop->v; 1471 } 1472 1473 tree b = loop->n1; 1474 tree e = loop->n2; 1475 tree s = loop->step; 1476 bool up = loop->cond_code == LT_EXPR; 1477 tree dir = build_int_cst (diff_type, up ? +1 : -1); 1478 bool negating; 1479 tree expr; 1480 1481 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, 1482 true, GSI_SAME_STMT); 1483 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, 1484 true, GSI_SAME_STMT); 1485 1486 /* Convert the step, avoiding possible unsigned->signed overflow. */ 1487 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 1488 if (negating) 1489 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 1490 s = fold_convert (diff_type, s); 1491 if (negating) 1492 s = fold_build1 (NEGATE_EXPR, diff_type, s); 1493 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, 1494 true, GSI_SAME_STMT); 1495 1496 /* Determine the range, avoiding possible unsigned->signed overflow. */ 1497 negating = !up && TYPE_UNSIGNED (iter_type); 1498 expr = fold_build2 (MINUS_EXPR, plus_type, 1499 fold_convert (plus_type, negating ? b : e), 1500 fold_convert (plus_type, negating ? e : b)); 1501 expr = fold_convert (diff_type, expr); 1502 if (negating) 1503 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 1504 tree range = force_gimple_operand_gsi 1505 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); 1506 1507 /* Determine number of iterations. */ 1508 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 1509 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 1510 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 1511 1512 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, 1513 true, GSI_SAME_STMT); 1514 1515 counts[ix].base = b; 1516 counts[ix].iters = iters; 1517 counts[ix].step = s; 1518 1519 total = fold_build2 (MULT_EXPR, bound_type, total, 1520 fold_convert (bound_type, iters)); 1521 } 1522 1523 return total; 1524} 1525 1526/* Emit initializers for collapsed loop members. INNER is true if 1527 this is for the element loop of a TILE. IVAR is the outer 1528 loop iteration variable, from which collapsed loop iteration values 1529 are calculated. COUNTS array has been initialized by 1530 expand_oacc_collapse_inits. */ 1531 1532static void 1533expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, 1534 gimple_stmt_iterator *gsi, 1535 const oacc_collapse *counts, tree ivar) 1536{ 1537 tree ivar_type = TREE_TYPE (ivar); 1538 1539 /* The most rapidly changing iteration variable is the innermost 1540 one. */ 1541 for (int ix = fd->collapse; ix--;) 1542 { 1543 const omp_for_data_loop *loop = &fd->loops[ix]; 1544 const oacc_collapse *collapse = &counts[ix]; 1545 tree v = inner ? loop->v : collapse->outer; 1546 tree iter_type = TREE_TYPE (v); 1547 tree diff_type = TREE_TYPE (collapse->step); 1548 tree plus_type = iter_type; 1549 enum tree_code plus_code = PLUS_EXPR; 1550 tree expr; 1551 1552 if (POINTER_TYPE_P (iter_type)) 1553 { 1554 plus_code = POINTER_PLUS_EXPR; 1555 plus_type = sizetype; 1556 } 1557 1558 expr = ivar; 1559 if (ix) 1560 { 1561 tree mod = fold_convert (ivar_type, collapse->iters); 1562 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); 1563 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); 1564 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, 1565 true, GSI_SAME_STMT); 1566 } 1567 1568 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), 1569 collapse->step); 1570 expr = fold_build2 (plus_code, iter_type, 1571 inner ? collapse->outer : collapse->base, 1572 fold_convert (plus_type, expr)); 1573 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, 1574 true, GSI_SAME_STMT); 1575 gassign *ass = gimple_build_assign (v, expr); 1576 gsi_insert_before (gsi, ass, GSI_SAME_STMT); 1577 } 1578} 1579 1580/* Helper function for expand_omp_{for_*,simd}. If this is the outermost 1581 of the combined collapse > 1 loop constructs, generate code like: 1582 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; 1583 if (cond3 is <) 1584 adj = STEP3 - 1; 1585 else 1586 adj = STEP3 + 1; 1587 count3 = (adj + N32 - N31) / STEP3; 1588 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; 1589 if (cond2 is <) 1590 adj = STEP2 - 1; 1591 else 1592 adj = STEP2 + 1; 1593 count2 = (adj + N22 - N21) / STEP2; 1594 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; 1595 if (cond1 is <) 1596 adj = STEP1 - 1; 1597 else 1598 adj = STEP1 + 1; 1599 count1 = (adj + N12 - N11) / STEP1; 1600 count = count1 * count2 * count3; 1601 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: 1602 count = 0; 1603 and set ZERO_ITER_BB to that bb. If this isn't the outermost 1604 of the combined loop constructs, just initialize COUNTS array 1605 from the _looptemp_ clauses. */ 1606 1607/* NOTE: It *could* be better to moosh all of the BBs together, 1608 creating one larger BB with all the computation and the unexpected 1609 jump at the end. I.e. 1610 1611 bool zero3, zero2, zero1, zero; 1612 1613 zero3 = N32 c3 N31; 1614 count3 = (N32 - N31) /[cl] STEP3; 1615 zero2 = N22 c2 N21; 1616 count2 = (N22 - N21) /[cl] STEP2; 1617 zero1 = N12 c1 N11; 1618 count1 = (N12 - N11) /[cl] STEP1; 1619 zero = zero3 || zero2 || zero1; 1620 count = count1 * count2 * count3; 1621 if (__builtin_expect(zero, false)) goto zero_iter_bb; 1622 1623 After all, we expect the zero=false, and thus we expect to have to 1624 evaluate all of the comparison expressions, so short-circuiting 1625 oughtn't be a win. Since the condition isn't protecting a 1626 denominator, we're not concerned about divide-by-zero, so we can 1627 fully evaluate count even if a numerator turned out to be wrong. 1628 1629 It seems like putting this all together would create much better 1630 scheduling opportunities, and less pressure on the chip's branch 1631 predictor. */ 1632 1633static void 1634expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1635 basic_block &entry_bb, tree *counts, 1636 basic_block &zero_iter1_bb, int &first_zero_iter1, 1637 basic_block &zero_iter2_bb, int &first_zero_iter2, 1638 basic_block &l2_dom_bb) 1639{ 1640 tree t, type = TREE_TYPE (fd->loop.v); 1641 edge e, ne; 1642 int i; 1643 1644 /* Collapsed loops need work for expansion into SSA form. */ 1645 gcc_assert (!gimple_in_ssa_p (cfun)); 1646 1647 if (gimple_omp_for_combined_into_p (fd->for_stmt) 1648 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 1649 { 1650 gcc_assert (fd->ordered == 0); 1651 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1652 isn't supposed to be handled, as the inner loop doesn't 1653 use it. */ 1654 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 1655 OMP_CLAUSE__LOOPTEMP_); 1656 gcc_assert (innerc); 1657 for (i = 0; i < fd->collapse; i++) 1658 { 1659 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1660 OMP_CLAUSE__LOOPTEMP_); 1661 gcc_assert (innerc); 1662 if (i) 1663 counts[i] = OMP_CLAUSE_DECL (innerc); 1664 else 1665 counts[0] = NULL_TREE; 1666 } 1667 return; 1668 } 1669 1670 for (i = fd->collapse; i < fd->ordered; i++) 1671 { 1672 tree itype = TREE_TYPE (fd->loops[i].v); 1673 counts[i] = NULL_TREE; 1674 t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1675 fold_convert (itype, fd->loops[i].n1), 1676 fold_convert (itype, fd->loops[i].n2)); 1677 if (t && integer_zerop (t)) 1678 { 1679 for (i = fd->collapse; i < fd->ordered; i++) 1680 counts[i] = build_int_cst (type, 0); 1681 break; 1682 } 1683 } 1684 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) 1685 { 1686 tree itype = TREE_TYPE (fd->loops[i].v); 1687 1688 if (i >= fd->collapse && counts[i]) 1689 continue; 1690 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) 1691 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, 1692 fold_convert (itype, fd->loops[i].n1), 1693 fold_convert (itype, fd->loops[i].n2))) 1694 == NULL_TREE || !integer_onep (t))) 1695 { 1696 gcond *cond_stmt; 1697 tree n1, n2; 1698 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); 1699 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, 1700 true, GSI_SAME_STMT); 1701 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); 1702 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, 1703 true, GSI_SAME_STMT); 1704 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, 1705 NULL_TREE, NULL_TREE); 1706 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); 1707 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 1708 expand_omp_regimplify_p, NULL, NULL) 1709 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 1710 expand_omp_regimplify_p, NULL, NULL)) 1711 { 1712 *gsi = gsi_for_stmt (cond_stmt); 1713 gimple_regimplify_operands (cond_stmt, gsi); 1714 } 1715 e = split_block (entry_bb, cond_stmt); 1716 basic_block &zero_iter_bb 1717 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; 1718 int &first_zero_iter 1719 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; 1720 if (zero_iter_bb == NULL) 1721 { 1722 gassign *assign_stmt; 1723 first_zero_iter = i; 1724 zero_iter_bb = create_empty_bb (entry_bb); 1725 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); 1726 *gsi = gsi_after_labels (zero_iter_bb); 1727 if (i < fd->collapse) 1728 assign_stmt = gimple_build_assign (fd->loop.n2, 1729 build_zero_cst (type)); 1730 else 1731 { 1732 counts[i] = create_tmp_reg (type, ".count"); 1733 assign_stmt 1734 = gimple_build_assign (counts[i], build_zero_cst (type)); 1735 } 1736 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); 1737 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, 1738 entry_bb); 1739 } 1740 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); 1741 ne->probability = profile_probability::very_unlikely (); 1742 e->flags = EDGE_TRUE_VALUE; 1743 e->probability = ne->probability.invert (); 1744 if (l2_dom_bb == NULL) 1745 l2_dom_bb = entry_bb; 1746 entry_bb = e->dest; 1747 *gsi = gsi_last_nondebug_bb (entry_bb); 1748 } 1749 1750 if (POINTER_TYPE_P (itype)) 1751 itype = signed_type_for (itype); 1752 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR 1753 ? -1 : 1)); 1754 t = fold_build2 (PLUS_EXPR, itype, 1755 fold_convert (itype, fd->loops[i].step), t); 1756 t = fold_build2 (PLUS_EXPR, itype, t, 1757 fold_convert (itype, fd->loops[i].n2)); 1758 t = fold_build2 (MINUS_EXPR, itype, t, 1759 fold_convert (itype, fd->loops[i].n1)); 1760 /* ?? We could probably use CEIL_DIV_EXPR instead of 1761 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't 1762 generate the same code in the end because generically we 1763 don't know that the values involved must be negative for 1764 GT?? */ 1765 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 1766 t = fold_build2 (TRUNC_DIV_EXPR, itype, 1767 fold_build1 (NEGATE_EXPR, itype, t), 1768 fold_build1 (NEGATE_EXPR, itype, 1769 fold_convert (itype, 1770 fd->loops[i].step))); 1771 else 1772 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, 1773 fold_convert (itype, fd->loops[i].step)); 1774 t = fold_convert (type, t); 1775 if (TREE_CODE (t) == INTEGER_CST) 1776 counts[i] = t; 1777 else 1778 { 1779 if (i < fd->collapse || i != first_zero_iter2) 1780 counts[i] = create_tmp_reg (type, ".count"); 1781 expand_omp_build_assign (gsi, counts[i], t); 1782 } 1783 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) 1784 { 1785 if (i == 0) 1786 t = counts[0]; 1787 else 1788 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); 1789 expand_omp_build_assign (gsi, fd->loop.n2, t); 1790 } 1791 } 1792} 1793 1794/* Helper function for expand_omp_{for_*,simd}. Generate code like: 1795 T = V; 1796 V3 = N31 + (T % count3) * STEP3; 1797 T = T / count3; 1798 V2 = N21 + (T % count2) * STEP2; 1799 T = T / count2; 1800 V1 = N11 + T * STEP1; 1801 if this loop doesn't have an inner loop construct combined with it. 1802 If it does have an inner loop construct combined with it and the 1803 iteration count isn't known constant, store values from counts array 1804 into its _looptemp_ temporaries instead. */ 1805 1806static void 1807expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, 1808 tree *counts, gimple *inner_stmt, tree startvar) 1809{ 1810 int i; 1811 if (gimple_omp_for_combined_p (fd->for_stmt)) 1812 { 1813 /* If fd->loop.n2 is constant, then no propagation of the counts 1814 is needed, they are constant. */ 1815 if (TREE_CODE (fd->loop.n2) == INTEGER_CST) 1816 return; 1817 1818 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR 1819 ? gimple_omp_taskreg_clauses (inner_stmt) 1820 : gimple_omp_for_clauses (inner_stmt); 1821 /* First two _looptemp_ clauses are for istart/iend, counts[0] 1822 isn't supposed to be handled, as the inner loop doesn't 1823 use it. */ 1824 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 1825 gcc_assert (innerc); 1826 for (i = 0; i < fd->collapse; i++) 1827 { 1828 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 1829 OMP_CLAUSE__LOOPTEMP_); 1830 gcc_assert (innerc); 1831 if (i) 1832 { 1833 tree tem = OMP_CLAUSE_DECL (innerc); 1834 tree t = fold_convert (TREE_TYPE (tem), counts[i]); 1835 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1836 false, GSI_CONTINUE_LINKING); 1837 gassign *stmt = gimple_build_assign (tem, t); 1838 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1839 } 1840 } 1841 return; 1842 } 1843 1844 tree type = TREE_TYPE (fd->loop.v); 1845 tree tem = create_tmp_reg (type, ".tem"); 1846 gassign *stmt = gimple_build_assign (tem, startvar); 1847 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1848 1849 for (i = fd->collapse - 1; i >= 0; i--) 1850 { 1851 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; 1852 itype = vtype; 1853 if (POINTER_TYPE_P (vtype)) 1854 itype = signed_type_for (vtype); 1855 if (i != 0) 1856 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); 1857 else 1858 t = tem; 1859 t = fold_convert (itype, t); 1860 t = fold_build2 (MULT_EXPR, itype, t, 1861 fold_convert (itype, fd->loops[i].step)); 1862 if (POINTER_TYPE_P (vtype)) 1863 t = fold_build_pointer_plus (fd->loops[i].n1, t); 1864 else 1865 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); 1866 t = force_gimple_operand_gsi (gsi, t, 1867 DECL_P (fd->loops[i].v) 1868 && TREE_ADDRESSABLE (fd->loops[i].v), 1869 NULL_TREE, false, 1870 GSI_CONTINUE_LINKING); 1871 stmt = gimple_build_assign (fd->loops[i].v, t); 1872 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1873 if (i != 0) 1874 { 1875 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); 1876 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, 1877 false, GSI_CONTINUE_LINKING); 1878 stmt = gimple_build_assign (tem, t); 1879 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); 1880 } 1881 } 1882} 1883 1884/* Helper function for expand_omp_for_*. Generate code like: 1885 L10: 1886 V3 += STEP3; 1887 if (V3 cond3 N32) goto BODY_BB; else goto L11; 1888 L11: 1889 V3 = N31; 1890 V2 += STEP2; 1891 if (V2 cond2 N22) goto BODY_BB; else goto L12; 1892 L12: 1893 V2 = N21; 1894 V1 += STEP1; 1895 goto BODY_BB; */ 1896 1897static basic_block 1898extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, 1899 basic_block body_bb) 1900{ 1901 basic_block last_bb, bb, collapse_bb = NULL; 1902 int i; 1903 gimple_stmt_iterator gsi; 1904 edge e; 1905 tree t; 1906 gimple *stmt; 1907 1908 last_bb = cont_bb; 1909 for (i = fd->collapse - 1; i >= 0; i--) 1910 { 1911 tree vtype = TREE_TYPE (fd->loops[i].v); 1912 1913 bb = create_empty_bb (last_bb); 1914 add_bb_to_loop (bb, last_bb->loop_father); 1915 gsi = gsi_start_bb (bb); 1916 1917 if (i < fd->collapse - 1) 1918 { 1919 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); 1920 e->probability = profile_probability::guessed_always ().apply_scale (1, 8); 1921 1922 t = fd->loops[i + 1].n1; 1923 t = force_gimple_operand_gsi (&gsi, t, 1924 DECL_P (fd->loops[i + 1].v) 1925 && TREE_ADDRESSABLE (fd->loops[i 1926 + 1].v), 1927 NULL_TREE, false, 1928 GSI_CONTINUE_LINKING); 1929 stmt = gimple_build_assign (fd->loops[i + 1].v, t); 1930 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1931 } 1932 else 1933 collapse_bb = bb; 1934 1935 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); 1936 1937 if (POINTER_TYPE_P (vtype)) 1938 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); 1939 else 1940 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); 1941 t = force_gimple_operand_gsi (&gsi, t, 1942 DECL_P (fd->loops[i].v) 1943 && TREE_ADDRESSABLE (fd->loops[i].v), 1944 NULL_TREE, false, GSI_CONTINUE_LINKING); 1945 stmt = gimple_build_assign (fd->loops[i].v, t); 1946 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1947 1948 if (i > 0) 1949 { 1950 t = fd->loops[i].n2; 1951 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 1952 false, GSI_CONTINUE_LINKING); 1953 tree v = fd->loops[i].v; 1954 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 1955 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 1956 false, GSI_CONTINUE_LINKING); 1957 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); 1958 stmt = gimple_build_cond_empty (t); 1959 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 1960 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)), 1961 expand_omp_regimplify_p, NULL, NULL) 1962 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)), 1963 expand_omp_regimplify_p, NULL, NULL)) 1964 gimple_regimplify_operands (stmt, &gsi); 1965 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); 1966 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 1967 } 1968 else 1969 make_edge (bb, body_bb, EDGE_FALLTHRU); 1970 last_bb = bb; 1971 } 1972 1973 return collapse_bb; 1974} 1975 1976/* Expand #pragma omp ordered depend(source). */ 1977 1978static void 1979expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1980 tree *counts, location_t loc) 1981{ 1982 enum built_in_function source_ix 1983 = fd->iter_type == long_integer_type_node 1984 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; 1985 gimple *g 1986 = gimple_build_call (builtin_decl_explicit (source_ix), 1, 1987 build_fold_addr_expr (counts[fd->ordered])); 1988 gimple_set_location (g, loc); 1989 gsi_insert_before (gsi, g, GSI_SAME_STMT); 1990} 1991 1992/* Expand a single depend from #pragma omp ordered depend(sink:...). */ 1993 1994static void 1995expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, 1996 tree *counts, tree c, location_t loc) 1997{ 1998 auto_vec<tree, 10> args; 1999 enum built_in_function sink_ix 2000 = fd->iter_type == long_integer_type_node 2001 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; 2002 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; 2003 int i; 2004 gimple_stmt_iterator gsi2 = *gsi; 2005 bool warned_step = false; 2006 2007 for (i = 0; i < fd->ordered; i++) 2008 { 2009 tree step = NULL_TREE; 2010 off = TREE_PURPOSE (deps); 2011 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2012 { 2013 step = TREE_OPERAND (off, 1); 2014 off = TREE_OPERAND (off, 0); 2015 } 2016 if (!integer_zerop (off)) 2017 { 2018 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2019 || fd->loops[i].cond_code == GT_EXPR); 2020 bool forward = fd->loops[i].cond_code == LT_EXPR; 2021 if (step) 2022 { 2023 /* Non-simple Fortran DO loops. If step is variable, 2024 we don't know at compile even the direction, so can't 2025 warn. */ 2026 if (TREE_CODE (step) != INTEGER_CST) 2027 break; 2028 forward = tree_int_cst_sgn (step) != -1; 2029 } 2030 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2031 warning_at (loc, 0, "%<depend(sink)%> clause waiting for " 2032 "lexically later iteration"); 2033 break; 2034 } 2035 deps = TREE_CHAIN (deps); 2036 } 2037 /* If all offsets corresponding to the collapsed loops are zero, 2038 this depend clause can be ignored. FIXME: but there is still a 2039 flush needed. We need to emit one __sync_synchronize () for it 2040 though (perhaps conditionally)? Solve this together with the 2041 conservative dependence folding optimization. 2042 if (i >= fd->collapse) 2043 return; */ 2044 2045 deps = OMP_CLAUSE_DECL (c); 2046 gsi_prev (&gsi2); 2047 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); 2048 edge e2 = split_block_after_labels (e1->dest); 2049 2050 gsi2 = gsi_after_labels (e1->dest); 2051 *gsi = gsi_last_bb (e1->src); 2052 for (i = 0; i < fd->ordered; i++) 2053 { 2054 tree itype = TREE_TYPE (fd->loops[i].v); 2055 tree step = NULL_TREE; 2056 tree orig_off = NULL_TREE; 2057 if (POINTER_TYPE_P (itype)) 2058 itype = sizetype; 2059 if (i) 2060 deps = TREE_CHAIN (deps); 2061 off = TREE_PURPOSE (deps); 2062 if (TREE_CODE (off) == TRUNC_DIV_EXPR) 2063 { 2064 step = TREE_OPERAND (off, 1); 2065 off = TREE_OPERAND (off, 0); 2066 gcc_assert (fd->loops[i].cond_code == LT_EXPR 2067 && integer_onep (fd->loops[i].step) 2068 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); 2069 } 2070 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); 2071 if (step) 2072 { 2073 off = fold_convert_loc (loc, itype, off); 2074 orig_off = off; 2075 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2076 } 2077 2078 if (integer_zerop (off)) 2079 t = boolean_true_node; 2080 else 2081 { 2082 tree a; 2083 tree co = fold_convert_loc (loc, itype, off); 2084 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 2085 { 2086 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2087 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); 2088 a = fold_build2_loc (loc, POINTER_PLUS_EXPR, 2089 TREE_TYPE (fd->loops[i].v), fd->loops[i].v, 2090 co); 2091 } 2092 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2093 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2094 fd->loops[i].v, co); 2095 else 2096 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 2097 fd->loops[i].v, co); 2098 if (step) 2099 { 2100 tree t1, t2; 2101 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2102 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2103 fd->loops[i].n1); 2104 else 2105 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2106 fd->loops[i].n2); 2107 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2108 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2109 fd->loops[i].n2); 2110 else 2111 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2112 fd->loops[i].n1); 2113 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, 2114 step, build_int_cst (TREE_TYPE (step), 0)); 2115 if (TREE_CODE (step) != INTEGER_CST) 2116 { 2117 t1 = unshare_expr (t1); 2118 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, 2119 false, GSI_CONTINUE_LINKING); 2120 t2 = unshare_expr (t2); 2121 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, 2122 false, GSI_CONTINUE_LINKING); 2123 } 2124 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, 2125 t, t2, t1); 2126 } 2127 else if (fd->loops[i].cond_code == LT_EXPR) 2128 { 2129 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2130 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, 2131 fd->loops[i].n1); 2132 else 2133 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, 2134 fd->loops[i].n2); 2135 } 2136 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2137 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, 2138 fd->loops[i].n2); 2139 else 2140 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, 2141 fd->loops[i].n1); 2142 } 2143 if (cond) 2144 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); 2145 else 2146 cond = t; 2147 2148 off = fold_convert_loc (loc, itype, off); 2149 2150 if (step 2151 || (fd->loops[i].cond_code == LT_EXPR 2152 ? !integer_onep (fd->loops[i].step) 2153 : !integer_minus_onep (fd->loops[i].step))) 2154 { 2155 if (step == NULL_TREE 2156 && TYPE_UNSIGNED (itype) 2157 && fd->loops[i].cond_code == GT_EXPR) 2158 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, 2159 fold_build1_loc (loc, NEGATE_EXPR, itype, 2160 s)); 2161 else 2162 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, 2163 orig_off ? orig_off : off, s); 2164 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, 2165 build_int_cst (itype, 0)); 2166 if (integer_zerop (t) && !warned_step) 2167 { 2168 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " 2169 "in the iteration space"); 2170 warned_step = true; 2171 } 2172 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, 2173 cond, t); 2174 } 2175 2176 if (i <= fd->collapse - 1 && fd->collapse > 1) 2177 t = fd->loop.v; 2178 else if (counts[i]) 2179 t = counts[i]; 2180 else 2181 { 2182 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2183 fd->loops[i].v, fd->loops[i].n1); 2184 t = fold_convert_loc (loc, fd->iter_type, t); 2185 } 2186 if (step) 2187 /* We have divided off by step already earlier. */; 2188 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) 2189 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, 2190 fold_build1_loc (loc, NEGATE_EXPR, itype, 2191 s)); 2192 else 2193 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); 2194 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) 2195 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); 2196 off = fold_convert_loc (loc, fd->iter_type, off); 2197 if (i <= fd->collapse - 1 && fd->collapse > 1) 2198 { 2199 if (i) 2200 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, 2201 off); 2202 if (i < fd->collapse - 1) 2203 { 2204 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, 2205 counts[i]); 2206 continue; 2207 } 2208 } 2209 off = unshare_expr (off); 2210 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); 2211 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, 2212 true, GSI_SAME_STMT); 2213 args.safe_push (t); 2214 } 2215 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); 2216 gimple_set_location (g, loc); 2217 gsi_insert_before (&gsi2, g, GSI_SAME_STMT); 2218 2219 cond = unshare_expr (cond); 2220 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, 2221 GSI_CONTINUE_LINKING); 2222 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); 2223 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); 2224 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2225 e1->probability = e3->probability.invert (); 2226 e1->flags = EDGE_TRUE_VALUE; 2227 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); 2228 2229 *gsi = gsi_after_labels (e2->dest); 2230} 2231 2232/* Expand all #pragma omp ordered depend(source) and 2233 #pragma omp ordered depend(sink:...) constructs in the current 2234 #pragma omp for ordered(n) region. */ 2235 2236static void 2237expand_omp_ordered_source_sink (struct omp_region *region, 2238 struct omp_for_data *fd, tree *counts, 2239 basic_block cont_bb) 2240{ 2241 struct omp_region *inner; 2242 int i; 2243 for (i = fd->collapse - 1; i < fd->ordered; i++) 2244 if (i == fd->collapse - 1 && fd->collapse > 1) 2245 counts[i] = NULL_TREE; 2246 else if (i >= fd->collapse && !cont_bb) 2247 counts[i] = build_zero_cst (fd->iter_type); 2248 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) 2249 && integer_onep (fd->loops[i].step)) 2250 counts[i] = NULL_TREE; 2251 else 2252 counts[i] = create_tmp_var (fd->iter_type, ".orditer"); 2253 tree atype 2254 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); 2255 counts[fd->ordered] = create_tmp_var (atype, ".orditera"); 2256 TREE_ADDRESSABLE (counts[fd->ordered]) = 1; 2257 2258 for (inner = region->inner; inner; inner = inner->next) 2259 if (inner->type == GIMPLE_OMP_ORDERED) 2260 { 2261 gomp_ordered *ord_stmt = inner->ord_stmt; 2262 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); 2263 location_t loc = gimple_location (ord_stmt); 2264 tree c; 2265 for (c = gimple_omp_ordered_clauses (ord_stmt); 2266 c; c = OMP_CLAUSE_CHAIN (c)) 2267 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) 2268 break; 2269 if (c) 2270 expand_omp_ordered_source (&gsi, fd, counts, loc); 2271 for (c = gimple_omp_ordered_clauses (ord_stmt); 2272 c; c = OMP_CLAUSE_CHAIN (c)) 2273 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) 2274 expand_omp_ordered_sink (&gsi, fd, counts, c, loc); 2275 gsi_remove (&gsi, true); 2276 } 2277} 2278 2279/* Wrap the body into fd->ordered - fd->collapse loops that aren't 2280 collapsed. */ 2281 2282static basic_block 2283expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, 2284 basic_block cont_bb, basic_block body_bb, 2285 bool ordered_lastprivate) 2286{ 2287 if (fd->ordered == fd->collapse) 2288 return cont_bb; 2289 2290 if (!cont_bb) 2291 { 2292 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2293 for (int i = fd->collapse; i < fd->ordered; i++) 2294 { 2295 tree type = TREE_TYPE (fd->loops[i].v); 2296 tree n1 = fold_convert (type, fd->loops[i].n1); 2297 expand_omp_build_assign (&gsi, fd->loops[i].v, n1); 2298 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2299 size_int (i - fd->collapse + 1), 2300 NULL_TREE, NULL_TREE); 2301 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2302 } 2303 return NULL; 2304 } 2305 2306 for (int i = fd->ordered - 1; i >= fd->collapse; i--) 2307 { 2308 tree t, type = TREE_TYPE (fd->loops[i].v); 2309 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2310 expand_omp_build_assign (&gsi, fd->loops[i].v, 2311 fold_convert (type, fd->loops[i].n1)); 2312 if (counts[i]) 2313 expand_omp_build_assign (&gsi, counts[i], 2314 build_zero_cst (fd->iter_type)); 2315 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2316 size_int (i - fd->collapse + 1), 2317 NULL_TREE, NULL_TREE); 2318 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); 2319 if (!gsi_end_p (gsi)) 2320 gsi_prev (&gsi); 2321 else 2322 gsi = gsi_last_bb (body_bb); 2323 edge e1 = split_block (body_bb, gsi_stmt (gsi)); 2324 basic_block new_body = e1->dest; 2325 if (body_bb == cont_bb) 2326 cont_bb = new_body; 2327 edge e2 = NULL; 2328 basic_block new_header; 2329 if (EDGE_COUNT (cont_bb->preds) > 0) 2330 { 2331 gsi = gsi_last_bb (cont_bb); 2332 if (POINTER_TYPE_P (type)) 2333 t = fold_build_pointer_plus (fd->loops[i].v, 2334 fold_convert (sizetype, 2335 fd->loops[i].step)); 2336 else 2337 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, 2338 fold_convert (type, fd->loops[i].step)); 2339 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 2340 if (counts[i]) 2341 { 2342 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], 2343 build_int_cst (fd->iter_type, 1)); 2344 expand_omp_build_assign (&gsi, counts[i], t); 2345 t = counts[i]; 2346 } 2347 else 2348 { 2349 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), 2350 fd->loops[i].v, fd->loops[i].n1); 2351 t = fold_convert (fd->iter_type, t); 2352 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2353 true, GSI_SAME_STMT); 2354 } 2355 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 2356 size_int (i - fd->collapse + 1), 2357 NULL_TREE, NULL_TREE); 2358 expand_omp_build_assign (&gsi, aref, t); 2359 gsi_prev (&gsi); 2360 e2 = split_block (cont_bb, gsi_stmt (gsi)); 2361 new_header = e2->dest; 2362 } 2363 else 2364 new_header = cont_bb; 2365 gsi = gsi_after_labels (new_header); 2366 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, 2367 true, GSI_SAME_STMT); 2368 tree n2 2369 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), 2370 true, NULL_TREE, true, GSI_SAME_STMT); 2371 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); 2372 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); 2373 edge e3 = split_block (new_header, gsi_stmt (gsi)); 2374 cont_bb = e3->dest; 2375 remove_edge (e1); 2376 make_edge (body_bb, new_header, EDGE_FALLTHRU); 2377 e3->flags = EDGE_FALSE_VALUE; 2378 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8); 2379 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); 2380 e1->probability = e3->probability.invert (); 2381 2382 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); 2383 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); 2384 2385 if (e2) 2386 { 2387 struct loop *loop = alloc_loop (); 2388 loop->header = new_header; 2389 loop->latch = e2->src; 2390 add_loop (loop, body_bb->loop_father); 2391 } 2392 } 2393 2394 /* If there are any lastprivate clauses and it is possible some loops 2395 might have zero iterations, ensure all the decls are initialized, 2396 otherwise we could crash evaluating C++ class iterators with lastprivate 2397 clauses. */ 2398 bool need_inits = false; 2399 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) 2400 if (need_inits) 2401 { 2402 tree type = TREE_TYPE (fd->loops[i].v); 2403 gimple_stmt_iterator gsi = gsi_after_labels (body_bb); 2404 expand_omp_build_assign (&gsi, fd->loops[i].v, 2405 fold_convert (type, fd->loops[i].n1)); 2406 } 2407 else 2408 { 2409 tree type = TREE_TYPE (fd->loops[i].v); 2410 tree this_cond = fold_build2 (fd->loops[i].cond_code, 2411 boolean_type_node, 2412 fold_convert (type, fd->loops[i].n1), 2413 fold_convert (type, fd->loops[i].n2)); 2414 if (!integer_onep (this_cond)) 2415 need_inits = true; 2416 } 2417 2418 return cont_bb; 2419} 2420 2421/* A subroutine of expand_omp_for. Generate code for a parallel 2422 loop with any schedule. Given parameters: 2423 2424 for (V = N1; V cond N2; V += STEP) BODY; 2425 2426 where COND is "<" or ">", we generate pseudocode 2427 2428 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); 2429 if (more) goto L0; else goto L3; 2430 L0: 2431 V = istart0; 2432 iend = iend0; 2433 L1: 2434 BODY; 2435 V += STEP; 2436 if (V cond iend) goto L1; else goto L2; 2437 L2: 2438 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2439 L3: 2440 2441 If this is a combined omp parallel loop, instead of the call to 2442 GOMP_loop_foo_start, we call GOMP_loop_foo_next. 2443 If this is gimple_omp_for_combined_p loop, then instead of assigning 2444 V and iend in L0 we assign the first two _looptemp_ clause decls of the 2445 inner GIMPLE_OMP_FOR and V += STEP; and 2446 if (V cond iend) goto L1; else goto L2; are removed. 2447 2448 For collapsed loops, given parameters: 2449 collapse(3) 2450 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 2451 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 2452 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 2453 BODY; 2454 2455 we generate pseudocode 2456 2457 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; 2458 if (cond3 is <) 2459 adj = STEP3 - 1; 2460 else 2461 adj = STEP3 + 1; 2462 count3 = (adj + N32 - N31) / STEP3; 2463 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; 2464 if (cond2 is <) 2465 adj = STEP2 - 1; 2466 else 2467 adj = STEP2 + 1; 2468 count2 = (adj + N22 - N21) / STEP2; 2469 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; 2470 if (cond1 is <) 2471 adj = STEP1 - 1; 2472 else 2473 adj = STEP1 + 1; 2474 count1 = (adj + N12 - N11) / STEP1; 2475 count = count1 * count2 * count3; 2476 goto Z1; 2477 Z0: 2478 count = 0; 2479 Z1: 2480 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); 2481 if (more) goto L0; else goto L3; 2482 L0: 2483 V = istart0; 2484 T = V; 2485 V3 = N31 + (T % count3) * STEP3; 2486 T = T / count3; 2487 V2 = N21 + (T % count2) * STEP2; 2488 T = T / count2; 2489 V1 = N11 + T * STEP1; 2490 iend = iend0; 2491 L1: 2492 BODY; 2493 V += 1; 2494 if (V < iend) goto L10; else goto L2; 2495 L10: 2496 V3 += STEP3; 2497 if (V3 cond3 N32) goto L1; else goto L11; 2498 L11: 2499 V3 = N31; 2500 V2 += STEP2; 2501 if (V2 cond2 N22) goto L1; else goto L12; 2502 L12: 2503 V2 = N21; 2504 V1 += STEP1; 2505 goto L1; 2506 L2: 2507 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; 2508 L3: 2509 2510 */ 2511 2512static void 2513expand_omp_for_generic (struct omp_region *region, 2514 struct omp_for_data *fd, 2515 enum built_in_function start_fn, 2516 enum built_in_function next_fn, 2517 gimple *inner_stmt) 2518{ 2519 tree type, istart0, iend0, iend; 2520 tree t, vmain, vback, bias = NULL_TREE; 2521 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; 2522 basic_block l2_bb = NULL, l3_bb = NULL; 2523 gimple_stmt_iterator gsi; 2524 gassign *assign_stmt; 2525 bool in_combined_parallel = is_combined_parallel (region); 2526 bool broken_loop = region->cont == NULL; 2527 edge e, ne; 2528 tree *counts = NULL; 2529 int i; 2530 bool ordered_lastprivate = false; 2531 2532 gcc_assert (!broken_loop || !in_combined_parallel); 2533 gcc_assert (fd->iter_type == long_integer_type_node 2534 || !in_combined_parallel); 2535 2536 entry_bb = region->entry; 2537 cont_bb = region->cont; 2538 collapse_bb = NULL; 2539 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 2540 gcc_assert (broken_loop 2541 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 2542 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 2543 l1_bb = single_succ (l0_bb); 2544 if (!broken_loop) 2545 { 2546 l2_bb = create_empty_bb (cont_bb); 2547 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb 2548 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest 2549 == l1_bb)); 2550 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 2551 } 2552 else 2553 l2_bb = NULL; 2554 l3_bb = BRANCH_EDGE (entry_bb)->dest; 2555 exit_bb = region->exit; 2556 2557 gsi = gsi_last_nondebug_bb (entry_bb); 2558 2559 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 2560 if (fd->ordered 2561 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), 2562 OMP_CLAUSE_LASTPRIVATE)) 2563 ordered_lastprivate = false; 2564 if (fd->collapse > 1 || fd->ordered) 2565 { 2566 int first_zero_iter1 = -1, first_zero_iter2 = -1; 2567 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; 2568 2569 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); 2570 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 2571 zero_iter1_bb, first_zero_iter1, 2572 zero_iter2_bb, first_zero_iter2, l2_dom_bb); 2573 2574 if (zero_iter1_bb) 2575 { 2576 /* Some counts[i] vars might be uninitialized if 2577 some loop has zero iterations. But the body shouldn't 2578 be executed in that case, so just avoid uninit warnings. */ 2579 for (i = first_zero_iter1; 2580 i < (fd->ordered ? fd->ordered : fd->collapse); i++) 2581 if (SSA_VAR_P (counts[i])) 2582 TREE_NO_WARNING (counts[i]) = 1; 2583 gsi_prev (&gsi); 2584 e = split_block (entry_bb, gsi_stmt (gsi)); 2585 entry_bb = e->dest; 2586 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); 2587 gsi = gsi_last_nondebug_bb (entry_bb); 2588 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2589 get_immediate_dominator (CDI_DOMINATORS, 2590 zero_iter1_bb)); 2591 } 2592 if (zero_iter2_bb) 2593 { 2594 /* Some counts[i] vars might be uninitialized if 2595 some loop has zero iterations. But the body shouldn't 2596 be executed in that case, so just avoid uninit warnings. */ 2597 for (i = first_zero_iter2; i < fd->ordered; i++) 2598 if (SSA_VAR_P (counts[i])) 2599 TREE_NO_WARNING (counts[i]) = 1; 2600 if (zero_iter1_bb) 2601 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2602 else 2603 { 2604 gsi_prev (&gsi); 2605 e = split_block (entry_bb, gsi_stmt (gsi)); 2606 entry_bb = e->dest; 2607 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); 2608 gsi = gsi_last_nondebug_bb (entry_bb); 2609 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 2610 get_immediate_dominator 2611 (CDI_DOMINATORS, zero_iter2_bb)); 2612 } 2613 } 2614 if (fd->collapse == 1) 2615 { 2616 counts[0] = fd->loop.n2; 2617 fd->loop = fd->loops[0]; 2618 } 2619 } 2620 2621 type = TREE_TYPE (fd->loop.v); 2622 istart0 = create_tmp_var (fd->iter_type, ".istart0"); 2623 iend0 = create_tmp_var (fd->iter_type, ".iend0"); 2624 TREE_ADDRESSABLE (istart0) = 1; 2625 TREE_ADDRESSABLE (iend0) = 1; 2626 2627 /* See if we need to bias by LLONG_MIN. */ 2628 if (fd->iter_type == long_long_unsigned_type_node 2629 && TREE_CODE (type) == INTEGER_TYPE 2630 && !TYPE_UNSIGNED (type) 2631 && fd->ordered == 0) 2632 { 2633 tree n1, n2; 2634 2635 if (fd->loop.cond_code == LT_EXPR) 2636 { 2637 n1 = fd->loop.n1; 2638 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 2639 } 2640 else 2641 { 2642 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 2643 n2 = fd->loop.n1; 2644 } 2645 if (TREE_CODE (n1) != INTEGER_CST 2646 || TREE_CODE (n2) != INTEGER_CST 2647 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 2648 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 2649 } 2650 2651 gimple_stmt_iterator gsif = gsi; 2652 gsi_prev (&gsif); 2653 2654 tree arr = NULL_TREE; 2655 if (in_combined_parallel) 2656 { 2657 gcc_assert (fd->ordered == 0); 2658 /* In a combined parallel loop, emit a call to 2659 GOMP_loop_foo_next. */ 2660 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 2661 build_fold_addr_expr (istart0), 2662 build_fold_addr_expr (iend0)); 2663 } 2664 else 2665 { 2666 tree t0, t1, t2, t3, t4; 2667 /* If this is not a combined parallel loop, emit a call to 2668 GOMP_loop_foo_start in ENTRY_BB. */ 2669 t4 = build_fold_addr_expr (iend0); 2670 t3 = build_fold_addr_expr (istart0); 2671 if (fd->ordered) 2672 { 2673 t0 = build_int_cst (unsigned_type_node, 2674 fd->ordered - fd->collapse + 1); 2675 arr = create_tmp_var (build_array_type_nelts (fd->iter_type, 2676 fd->ordered 2677 - fd->collapse + 1), 2678 ".omp_counts"); 2679 DECL_NAMELESS (arr) = 1; 2680 TREE_ADDRESSABLE (arr) = 1; 2681 TREE_STATIC (arr) = 1; 2682 vec<constructor_elt, va_gc> *v; 2683 vec_alloc (v, fd->ordered - fd->collapse + 1); 2684 int idx; 2685 2686 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) 2687 { 2688 tree c; 2689 if (idx == 0 && fd->collapse > 1) 2690 c = fd->loop.n2; 2691 else 2692 c = counts[idx + fd->collapse - 1]; 2693 tree purpose = size_int (idx); 2694 CONSTRUCTOR_APPEND_ELT (v, purpose, c); 2695 if (TREE_CODE (c) != INTEGER_CST) 2696 TREE_STATIC (arr) = 0; 2697 } 2698 2699 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); 2700 if (!TREE_STATIC (arr)) 2701 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, 2702 void_type_node, arr), 2703 true, NULL_TREE, true, GSI_SAME_STMT); 2704 t1 = build_fold_addr_expr (arr); 2705 t2 = NULL_TREE; 2706 } 2707 else 2708 { 2709 t2 = fold_convert (fd->iter_type, fd->loop.step); 2710 t1 = fd->loop.n2; 2711 t0 = fd->loop.n1; 2712 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 2713 { 2714 tree innerc 2715 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 2716 OMP_CLAUSE__LOOPTEMP_); 2717 gcc_assert (innerc); 2718 t0 = OMP_CLAUSE_DECL (innerc); 2719 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2720 OMP_CLAUSE__LOOPTEMP_); 2721 gcc_assert (innerc); 2722 t1 = OMP_CLAUSE_DECL (innerc); 2723 } 2724 if (POINTER_TYPE_P (TREE_TYPE (t0)) 2725 && TYPE_PRECISION (TREE_TYPE (t0)) 2726 != TYPE_PRECISION (fd->iter_type)) 2727 { 2728 /* Avoid casting pointers to integer of a different size. */ 2729 tree itype = signed_type_for (type); 2730 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 2731 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 2732 } 2733 else 2734 { 2735 t1 = fold_convert (fd->iter_type, t1); 2736 t0 = fold_convert (fd->iter_type, t0); 2737 } 2738 if (bias) 2739 { 2740 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 2741 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 2742 } 2743 } 2744 if (fd->iter_type == long_integer_type_node || fd->ordered) 2745 { 2746 if (fd->chunk_size) 2747 { 2748 t = fold_convert (fd->iter_type, fd->chunk_size); 2749 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2750 if (fd->ordered) 2751 t = build_call_expr (builtin_decl_explicit (start_fn), 2752 5, t0, t1, t, t3, t4); 2753 else 2754 t = build_call_expr (builtin_decl_explicit (start_fn), 2755 6, t0, t1, t2, t, t3, t4); 2756 } 2757 else if (fd->ordered) 2758 t = build_call_expr (builtin_decl_explicit (start_fn), 2759 4, t0, t1, t3, t4); 2760 else 2761 t = build_call_expr (builtin_decl_explicit (start_fn), 2762 5, t0, t1, t2, t3, t4); 2763 } 2764 else 2765 { 2766 tree t5; 2767 tree c_bool_type; 2768 tree bfn_decl; 2769 2770 /* The GOMP_loop_ull_*start functions have additional boolean 2771 argument, true for < loops and false for > loops. 2772 In Fortran, the C bool type can be different from 2773 boolean_type_node. */ 2774 bfn_decl = builtin_decl_explicit (start_fn); 2775 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); 2776 t5 = build_int_cst (c_bool_type, 2777 fd->loop.cond_code == LT_EXPR ? 1 : 0); 2778 if (fd->chunk_size) 2779 { 2780 tree bfn_decl = builtin_decl_explicit (start_fn); 2781 t = fold_convert (fd->iter_type, fd->chunk_size); 2782 t = omp_adjust_chunk_size (t, fd->simd_schedule); 2783 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); 2784 } 2785 else 2786 t = build_call_expr (builtin_decl_explicit (start_fn), 2787 6, t5, t0, t1, t2, t3, t4); 2788 } 2789 } 2790 if (TREE_TYPE (t) != boolean_type_node) 2791 t = fold_build2 (NE_EXPR, boolean_type_node, 2792 t, build_int_cst (TREE_TYPE (t), 0)); 2793 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2794 true, GSI_SAME_STMT); 2795 if (arr && !TREE_STATIC (arr)) 2796 { 2797 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 2798 TREE_THIS_VOLATILE (clobber) = 1; 2799 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), 2800 GSI_SAME_STMT); 2801 } 2802 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 2803 2804 /* Remove the GIMPLE_OMP_FOR statement. */ 2805 gsi_remove (&gsi, true); 2806 2807 if (gsi_end_p (gsif)) 2808 gsif = gsi_after_labels (gsi_bb (gsif)); 2809 gsi_next (&gsif); 2810 2811 /* Iteration setup for sequential loop goes in L0_BB. */ 2812 tree startvar = fd->loop.v; 2813 tree endvar = NULL_TREE; 2814 2815 if (gimple_omp_for_combined_p (fd->for_stmt)) 2816 { 2817 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR 2818 && gimple_omp_for_kind (inner_stmt) 2819 == GF_OMP_FOR_KIND_SIMD); 2820 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), 2821 OMP_CLAUSE__LOOPTEMP_); 2822 gcc_assert (innerc); 2823 startvar = OMP_CLAUSE_DECL (innerc); 2824 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 2825 OMP_CLAUSE__LOOPTEMP_); 2826 gcc_assert (innerc); 2827 endvar = OMP_CLAUSE_DECL (innerc); 2828 } 2829 2830 gsi = gsi_start_bb (l0_bb); 2831 t = istart0; 2832 if (fd->ordered && fd->collapse == 1) 2833 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2834 fold_convert (fd->iter_type, fd->loop.step)); 2835 else if (bias) 2836 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2837 if (fd->ordered && fd->collapse == 1) 2838 { 2839 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2840 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2841 fd->loop.n1, fold_convert (sizetype, t)); 2842 else 2843 { 2844 t = fold_convert (TREE_TYPE (startvar), t); 2845 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2846 fd->loop.n1, t); 2847 } 2848 } 2849 else 2850 { 2851 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2852 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2853 t = fold_convert (TREE_TYPE (startvar), t); 2854 } 2855 t = force_gimple_operand_gsi (&gsi, t, 2856 DECL_P (startvar) 2857 && TREE_ADDRESSABLE (startvar), 2858 NULL_TREE, false, GSI_CONTINUE_LINKING); 2859 assign_stmt = gimple_build_assign (startvar, t); 2860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2861 2862 t = iend0; 2863 if (fd->ordered && fd->collapse == 1) 2864 t = fold_build2 (MULT_EXPR, fd->iter_type, t, 2865 fold_convert (fd->iter_type, fd->loop.step)); 2866 else if (bias) 2867 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); 2868 if (fd->ordered && fd->collapse == 1) 2869 { 2870 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2871 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), 2872 fd->loop.n1, fold_convert (sizetype, t)); 2873 else 2874 { 2875 t = fold_convert (TREE_TYPE (startvar), t); 2876 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), 2877 fd->loop.n1, t); 2878 } 2879 } 2880 else 2881 { 2882 if (POINTER_TYPE_P (TREE_TYPE (startvar))) 2883 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); 2884 t = fold_convert (TREE_TYPE (startvar), t); 2885 } 2886 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2887 false, GSI_CONTINUE_LINKING); 2888 if (endvar) 2889 { 2890 assign_stmt = gimple_build_assign (endvar, iend); 2891 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2892 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) 2893 assign_stmt = gimple_build_assign (fd->loop.v, iend); 2894 else 2895 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); 2896 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2897 } 2898 /* Handle linear clause adjustments. */ 2899 tree itercnt = NULL_TREE; 2900 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 2901 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 2902 c; c = OMP_CLAUSE_CHAIN (c)) 2903 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 2904 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 2905 { 2906 tree d = OMP_CLAUSE_DECL (c); 2907 bool is_ref = omp_is_reference (d); 2908 tree t = d, a, dest; 2909 if (is_ref) 2910 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 2911 tree type = TREE_TYPE (t); 2912 if (POINTER_TYPE_P (type)) 2913 type = sizetype; 2914 dest = unshare_expr (t); 2915 tree v = create_tmp_var (TREE_TYPE (t), NULL); 2916 expand_omp_build_assign (&gsif, v, t); 2917 if (itercnt == NULL_TREE) 2918 { 2919 itercnt = startvar; 2920 tree n1 = fd->loop.n1; 2921 if (POINTER_TYPE_P (TREE_TYPE (itercnt))) 2922 { 2923 itercnt 2924 = fold_convert (signed_type_for (TREE_TYPE (itercnt)), 2925 itercnt); 2926 n1 = fold_convert (TREE_TYPE (itercnt), n1); 2927 } 2928 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), 2929 itercnt, n1); 2930 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), 2931 itercnt, fd->loop.step); 2932 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 2933 NULL_TREE, false, 2934 GSI_CONTINUE_LINKING); 2935 } 2936 a = fold_build2 (MULT_EXPR, type, 2937 fold_convert (type, itercnt), 2938 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 2939 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 2940 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 2941 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 2942 false, GSI_CONTINUE_LINKING); 2943 assign_stmt = gimple_build_assign (dest, t); 2944 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 2945 } 2946 if (fd->collapse > 1) 2947 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 2948 2949 if (fd->ordered) 2950 { 2951 /* Until now, counts array contained number of iterations or 2952 variable containing it for ith loop. From now on, we need 2953 those counts only for collapsed loops, and only for the 2nd 2954 till the last collapsed one. Move those one element earlier, 2955 we'll use counts[fd->collapse - 1] for the first source/sink 2956 iteration counter and so on and counts[fd->ordered] 2957 as the array holding the current counter values for 2958 depend(source). */ 2959 if (fd->collapse > 1) 2960 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); 2961 if (broken_loop) 2962 { 2963 int i; 2964 for (i = fd->collapse; i < fd->ordered; i++) 2965 { 2966 tree type = TREE_TYPE (fd->loops[i].v); 2967 tree this_cond 2968 = fold_build2 (fd->loops[i].cond_code, boolean_type_node, 2969 fold_convert (type, fd->loops[i].n1), 2970 fold_convert (type, fd->loops[i].n2)); 2971 if (!integer_onep (this_cond)) 2972 break; 2973 } 2974 if (i < fd->ordered) 2975 { 2976 cont_bb 2977 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); 2978 add_bb_to_loop (cont_bb, l1_bb->loop_father); 2979 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); 2980 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); 2981 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 2982 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); 2983 make_edge (cont_bb, l1_bb, 0); 2984 l2_bb = create_empty_bb (cont_bb); 2985 broken_loop = false; 2986 } 2987 } 2988 expand_omp_ordered_source_sink (region, fd, counts, cont_bb); 2989 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, 2990 ordered_lastprivate); 2991 if (counts[fd->collapse - 1]) 2992 { 2993 gcc_assert (fd->collapse == 1); 2994 gsi = gsi_last_bb (l0_bb); 2995 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], 2996 istart0, true); 2997 gsi = gsi_last_bb (cont_bb); 2998 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], 2999 build_int_cst (fd->iter_type, 1)); 3000 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); 3001 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3002 size_zero_node, NULL_TREE, NULL_TREE); 3003 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); 3004 t = counts[fd->collapse - 1]; 3005 } 3006 else if (fd->collapse > 1) 3007 t = fd->loop.v; 3008 else 3009 { 3010 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3011 fd->loops[0].v, fd->loops[0].n1); 3012 t = fold_convert (fd->iter_type, t); 3013 } 3014 gsi = gsi_last_bb (l0_bb); 3015 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], 3016 size_zero_node, NULL_TREE, NULL_TREE); 3017 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3018 false, GSI_CONTINUE_LINKING); 3019 expand_omp_build_assign (&gsi, aref, t, true); 3020 } 3021 3022 if (!broken_loop) 3023 { 3024 /* Code to control the increment and predicate for the sequential 3025 loop goes in the CONT_BB. */ 3026 gsi = gsi_last_nondebug_bb (cont_bb); 3027 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3028 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3029 vmain = gimple_omp_continue_control_use (cont_stmt); 3030 vback = gimple_omp_continue_control_def (cont_stmt); 3031 3032 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3033 { 3034 if (POINTER_TYPE_P (type)) 3035 t = fold_build_pointer_plus (vmain, fd->loop.step); 3036 else 3037 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); 3038 t = force_gimple_operand_gsi (&gsi, t, 3039 DECL_P (vback) 3040 && TREE_ADDRESSABLE (vback), 3041 NULL_TREE, true, GSI_SAME_STMT); 3042 assign_stmt = gimple_build_assign (vback, t); 3043 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3044 3045 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) 3046 { 3047 tree tem; 3048 if (fd->collapse > 1) 3049 tem = fd->loop.v; 3050 else 3051 { 3052 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), 3053 fd->loops[0].v, fd->loops[0].n1); 3054 tem = fold_convert (fd->iter_type, tem); 3055 } 3056 tree aref = build4 (ARRAY_REF, fd->iter_type, 3057 counts[fd->ordered], size_zero_node, 3058 NULL_TREE, NULL_TREE); 3059 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, 3060 true, GSI_SAME_STMT); 3061 expand_omp_build_assign (&gsi, aref, tem); 3062 } 3063 3064 t = build2 (fd->loop.cond_code, boolean_type_node, 3065 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, 3066 iend); 3067 gcond *cond_stmt = gimple_build_cond_empty (t); 3068 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3069 } 3070 3071 /* Remove GIMPLE_OMP_CONTINUE. */ 3072 gsi_remove (&gsi, true); 3073 3074 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3075 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); 3076 3077 /* Emit code to get the next parallel iteration in L2_BB. */ 3078 gsi = gsi_start_bb (l2_bb); 3079 3080 t = build_call_expr (builtin_decl_explicit (next_fn), 2, 3081 build_fold_addr_expr (istart0), 3082 build_fold_addr_expr (iend0)); 3083 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3084 false, GSI_CONTINUE_LINKING); 3085 if (TREE_TYPE (t) != boolean_type_node) 3086 t = fold_build2 (NE_EXPR, boolean_type_node, 3087 t, build_int_cst (TREE_TYPE (t), 0)); 3088 gcond *cond_stmt = gimple_build_cond_empty (t); 3089 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 3090 } 3091 3092 /* Add the loop cleanup function. */ 3093 gsi = gsi_last_nondebug_bb (exit_bb); 3094 if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3095 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); 3096 else if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3097 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); 3098 else 3099 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); 3100 gcall *call_stmt = gimple_build_call (t, 0); 3101 if (gimple_omp_return_lhs (gsi_stmt (gsi))) 3102 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); 3103 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); 3104 if (fd->ordered) 3105 { 3106 tree arr = counts[fd->ordered]; 3107 tree clobber = build_constructor (TREE_TYPE (arr), NULL); 3108 TREE_THIS_VOLATILE (clobber) = 1; 3109 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), 3110 GSI_SAME_STMT); 3111 } 3112 gsi_remove (&gsi, true); 3113 3114 /* Connect the new blocks. */ 3115 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; 3116 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; 3117 3118 if (!broken_loop) 3119 { 3120 gimple_seq phis; 3121 3122 e = find_edge (cont_bb, l3_bb); 3123 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); 3124 3125 phis = phi_nodes (l3_bb); 3126 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) 3127 { 3128 gimple *phi = gsi_stmt (gsi); 3129 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), 3130 PHI_ARG_DEF_FROM_EDGE (phi, e)); 3131 } 3132 remove_edge (e); 3133 3134 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); 3135 e = find_edge (cont_bb, l1_bb); 3136 if (e == NULL) 3137 { 3138 e = BRANCH_EDGE (cont_bb); 3139 gcc_assert (single_succ (e->dest) == l1_bb); 3140 } 3141 if (gimple_omp_for_combined_p (fd->for_stmt)) 3142 { 3143 remove_edge (e); 3144 e = NULL; 3145 } 3146 else if (fd->collapse > 1) 3147 { 3148 remove_edge (e); 3149 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3150 } 3151 else 3152 e->flags = EDGE_TRUE_VALUE; 3153 if (e) 3154 { 3155 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 3156 find_edge (cont_bb, l2_bb)->probability = e->probability.invert (); 3157 } 3158 else 3159 { 3160 e = find_edge (cont_bb, l2_bb); 3161 e->flags = EDGE_FALLTHRU; 3162 } 3163 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); 3164 3165 if (gimple_in_ssa_p (cfun)) 3166 { 3167 /* Add phis to the outer loop that connect to the phis in the inner, 3168 original loop, and move the loop entry value of the inner phi to 3169 the loop entry value of the outer phi. */ 3170 gphi_iterator psi; 3171 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) 3172 { 3173 source_location locus; 3174 gphi *nphi; 3175 gphi *exit_phi = psi.phi (); 3176 3177 if (virtual_operand_p (gimple_phi_result (exit_phi))) 3178 continue; 3179 3180 edge l2_to_l3 = find_edge (l2_bb, l3_bb); 3181 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); 3182 3183 basic_block latch = BRANCH_EDGE (cont_bb)->dest; 3184 edge latch_to_l1 = find_edge (latch, l1_bb); 3185 gphi *inner_phi 3186 = find_phi_with_arg_on_edge (exit_res, latch_to_l1); 3187 3188 tree t = gimple_phi_result (exit_phi); 3189 tree new_res = copy_ssa_name (t, NULL); 3190 nphi = create_phi_node (new_res, l0_bb); 3191 3192 edge l0_to_l1 = find_edge (l0_bb, l1_bb); 3193 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); 3194 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); 3195 edge entry_to_l0 = find_edge (entry_bb, l0_bb); 3196 add_phi_arg (nphi, t, entry_to_l0, locus); 3197 3198 edge l2_to_l0 = find_edge (l2_bb, l0_bb); 3199 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); 3200 3201 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); 3202 } 3203 } 3204 3205 set_immediate_dominator (CDI_DOMINATORS, l2_bb, 3206 recompute_dominator (CDI_DOMINATORS, l2_bb)); 3207 set_immediate_dominator (CDI_DOMINATORS, l3_bb, 3208 recompute_dominator (CDI_DOMINATORS, l3_bb)); 3209 set_immediate_dominator (CDI_DOMINATORS, l0_bb, 3210 recompute_dominator (CDI_DOMINATORS, l0_bb)); 3211 set_immediate_dominator (CDI_DOMINATORS, l1_bb, 3212 recompute_dominator (CDI_DOMINATORS, l1_bb)); 3213 3214 /* We enter expand_omp_for_generic with a loop. This original loop may 3215 have its own loop struct, or it may be part of an outer loop struct 3216 (which may be the fake loop). */ 3217 struct loop *outer_loop = entry_bb->loop_father; 3218 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; 3219 3220 add_bb_to_loop (l2_bb, outer_loop); 3221 3222 /* We've added a new loop around the original loop. Allocate the 3223 corresponding loop struct. */ 3224 struct loop *new_loop = alloc_loop (); 3225 new_loop->header = l0_bb; 3226 new_loop->latch = l2_bb; 3227 add_loop (new_loop, outer_loop); 3228 3229 /* Allocate a loop structure for the original loop unless we already 3230 had one. */ 3231 if (!orig_loop_has_loop_struct 3232 && !gimple_omp_for_combined_p (fd->for_stmt)) 3233 { 3234 struct loop *orig_loop = alloc_loop (); 3235 orig_loop->header = l1_bb; 3236 /* The loop may have multiple latches. */ 3237 add_loop (orig_loop, new_loop); 3238 } 3239 } 3240} 3241 3242/* A subroutine of expand_omp_for. Generate code for a parallel 3243 loop with static schedule and no specified chunk size. Given 3244 parameters: 3245 3246 for (V = N1; V cond N2; V += STEP) BODY; 3247 3248 where COND is "<" or ">", we generate pseudocode 3249 3250 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3251 if (cond is <) 3252 adj = STEP - 1; 3253 else 3254 adj = STEP + 1; 3255 if ((__typeof (V)) -1 > 0 && cond is >) 3256 n = -(adj + N2 - N1) / -STEP; 3257 else 3258 n = (adj + N2 - N1) / STEP; 3259 q = n / nthreads; 3260 tt = n % nthreads; 3261 if (threadid < tt) goto L3; else goto L4; 3262 L3: 3263 tt = 0; 3264 q = q + 1; 3265 L4: 3266 s0 = q * threadid + tt; 3267 e0 = s0 + q; 3268 V = s0 * STEP + N1; 3269 if (s0 >= e0) goto L2; else goto L0; 3270 L0: 3271 e = e0 * STEP + N1; 3272 L1: 3273 BODY; 3274 V += STEP; 3275 if (V cond e) goto L1; 3276 L2: 3277*/ 3278 3279static void 3280expand_omp_for_static_nochunk (struct omp_region *region, 3281 struct omp_for_data *fd, 3282 gimple *inner_stmt) 3283{ 3284 tree n, q, s0, e0, e, t, tt, nthreads, threadid; 3285 tree type, itype, vmain, vback; 3286 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; 3287 basic_block body_bb, cont_bb, collapse_bb = NULL; 3288 basic_block fin_bb; 3289 gimple_stmt_iterator gsi; 3290 edge ep; 3291 bool broken_loop = region->cont == NULL; 3292 tree *counts = NULL; 3293 tree n1, n2, step; 3294 3295 itype = type = TREE_TYPE (fd->loop.v); 3296 if (POINTER_TYPE_P (type)) 3297 itype = signed_type_for (type); 3298 3299 entry_bb = region->entry; 3300 cont_bb = region->cont; 3301 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 3302 fin_bb = BRANCH_EDGE (entry_bb)->dest; 3303 gcc_assert (broken_loop 3304 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 3305 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); 3306 body_bb = single_succ (seq_start_bb); 3307 if (!broken_loop) 3308 { 3309 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3310 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3311 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3312 } 3313 exit_bb = region->exit; 3314 3315 /* Iteration space partitioning goes in ENTRY_BB. */ 3316 gsi = gsi_last_nondebug_bb (entry_bb); 3317 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3318 3319 if (fd->collapse > 1) 3320 { 3321 int first_zero_iter = -1, dummy = -1; 3322 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3323 3324 counts = XALLOCAVEC (tree, fd->collapse); 3325 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3326 fin_bb, first_zero_iter, 3327 dummy_bb, dummy, l2_dom_bb); 3328 t = NULL_TREE; 3329 } 3330 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3331 t = integer_one_node; 3332 else 3333 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3334 fold_convert (type, fd->loop.n1), 3335 fold_convert (type, fd->loop.n2)); 3336 if (fd->collapse == 1 3337 && TYPE_UNSIGNED (type) 3338 && (t == NULL_TREE || !integer_onep (t))) 3339 { 3340 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3341 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3342 true, GSI_SAME_STMT); 3343 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3344 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3345 true, GSI_SAME_STMT); 3346 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3347 NULL_TREE, NULL_TREE); 3348 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3349 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3350 expand_omp_regimplify_p, NULL, NULL) 3351 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3352 expand_omp_regimplify_p, NULL, NULL)) 3353 { 3354 gsi = gsi_for_stmt (cond_stmt); 3355 gimple_regimplify_operands (cond_stmt, &gsi); 3356 } 3357 ep = split_block (entry_bb, cond_stmt); 3358 ep->flags = EDGE_TRUE_VALUE; 3359 entry_bb = ep->dest; 3360 ep->probability = profile_probability::very_likely (); 3361 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); 3362 ep->probability = profile_probability::very_unlikely (); 3363 if (gimple_in_ssa_p (cfun)) 3364 { 3365 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; 3366 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3367 !gsi_end_p (gpi); gsi_next (&gpi)) 3368 { 3369 gphi *phi = gpi.phi (); 3370 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3371 ep, UNKNOWN_LOCATION); 3372 } 3373 } 3374 gsi = gsi_last_bb (entry_bb); 3375 } 3376 3377 switch (gimple_omp_for_kind (fd->for_stmt)) 3378 { 3379 case GF_OMP_FOR_KIND_FOR: 3380 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3381 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3382 break; 3383 case GF_OMP_FOR_KIND_DISTRIBUTE: 3384 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3385 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3386 break; 3387 default: 3388 gcc_unreachable (); 3389 } 3390 nthreads = build_call_expr (nthreads, 0); 3391 nthreads = fold_convert (itype, nthreads); 3392 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3393 true, GSI_SAME_STMT); 3394 threadid = build_call_expr (threadid, 0); 3395 threadid = fold_convert (itype, threadid); 3396 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3397 true, GSI_SAME_STMT); 3398 3399 n1 = fd->loop.n1; 3400 n2 = fd->loop.n2; 3401 step = fd->loop.step; 3402 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3403 { 3404 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3405 OMP_CLAUSE__LOOPTEMP_); 3406 gcc_assert (innerc); 3407 n1 = OMP_CLAUSE_DECL (innerc); 3408 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3409 OMP_CLAUSE__LOOPTEMP_); 3410 gcc_assert (innerc); 3411 n2 = OMP_CLAUSE_DECL (innerc); 3412 } 3413 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3414 true, NULL_TREE, true, GSI_SAME_STMT); 3415 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3416 true, NULL_TREE, true, GSI_SAME_STMT); 3417 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3418 true, NULL_TREE, true, GSI_SAME_STMT); 3419 3420 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3421 t = fold_build2 (PLUS_EXPR, itype, step, t); 3422 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3423 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3424 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3425 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3426 fold_build1 (NEGATE_EXPR, itype, t), 3427 fold_build1 (NEGATE_EXPR, itype, step)); 3428 else 3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3430 t = fold_convert (itype, t); 3431 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3432 3433 q = create_tmp_reg (itype, "q"); 3434 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); 3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3436 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); 3437 3438 tt = create_tmp_reg (itype, "tt"); 3439 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); 3440 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); 3441 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); 3442 3443 t = build2 (LT_EXPR, boolean_type_node, threadid, tt); 3444 gcond *cond_stmt = gimple_build_cond_empty (t); 3445 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3446 3447 second_bb = split_block (entry_bb, cond_stmt)->dest; 3448 gsi = gsi_last_nondebug_bb (second_bb); 3449 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3450 3451 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), 3452 GSI_SAME_STMT); 3453 gassign *assign_stmt 3454 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); 3455 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3456 3457 third_bb = split_block (second_bb, assign_stmt)->dest; 3458 gsi = gsi_last_nondebug_bb (third_bb); 3459 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3460 3461 t = build2 (MULT_EXPR, itype, q, threadid); 3462 t = build2 (PLUS_EXPR, itype, t, tt); 3463 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3464 3465 t = fold_build2 (PLUS_EXPR, itype, s0, q); 3466 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); 3467 3468 t = build2 (GE_EXPR, boolean_type_node, s0, e0); 3469 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3470 3471 /* Remove the GIMPLE_OMP_FOR statement. */ 3472 gsi_remove (&gsi, true); 3473 3474 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3475 gsi = gsi_start_bb (seq_start_bb); 3476 3477 tree startvar = fd->loop.v; 3478 tree endvar = NULL_TREE; 3479 3480 if (gimple_omp_for_combined_p (fd->for_stmt)) 3481 { 3482 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3483 ? gimple_omp_parallel_clauses (inner_stmt) 3484 : gimple_omp_for_clauses (inner_stmt); 3485 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3486 gcc_assert (innerc); 3487 startvar = OMP_CLAUSE_DECL (innerc); 3488 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3489 OMP_CLAUSE__LOOPTEMP_); 3490 gcc_assert (innerc); 3491 endvar = OMP_CLAUSE_DECL (innerc); 3492 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3493 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3494 { 3495 int i; 3496 for (i = 1; i < fd->collapse; i++) 3497 { 3498 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3499 OMP_CLAUSE__LOOPTEMP_); 3500 gcc_assert (innerc); 3501 } 3502 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3503 OMP_CLAUSE__LOOPTEMP_); 3504 if (innerc) 3505 { 3506 /* If needed (distribute parallel for with lastprivate), 3507 propagate down the total number of iterations. */ 3508 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 3509 fd->loop.n2); 3510 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 3511 GSI_CONTINUE_LINKING); 3512 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 3513 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3514 } 3515 } 3516 } 3517 t = fold_convert (itype, s0); 3518 t = fold_build2 (MULT_EXPR, itype, t, step); 3519 if (POINTER_TYPE_P (type)) 3520 t = fold_build_pointer_plus (n1, t); 3521 else 3522 t = fold_build2 (PLUS_EXPR, type, t, n1); 3523 t = fold_convert (TREE_TYPE (startvar), t); 3524 t = force_gimple_operand_gsi (&gsi, t, 3525 DECL_P (startvar) 3526 && TREE_ADDRESSABLE (startvar), 3527 NULL_TREE, false, GSI_CONTINUE_LINKING); 3528 assign_stmt = gimple_build_assign (startvar, t); 3529 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3530 3531 t = fold_convert (itype, e0); 3532 t = fold_build2 (MULT_EXPR, itype, t, step); 3533 if (POINTER_TYPE_P (type)) 3534 t = fold_build_pointer_plus (n1, t); 3535 else 3536 t = fold_build2 (PLUS_EXPR, type, t, n1); 3537 t = fold_convert (TREE_TYPE (startvar), t); 3538 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3539 false, GSI_CONTINUE_LINKING); 3540 if (endvar) 3541 { 3542 assign_stmt = gimple_build_assign (endvar, e); 3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3544 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 3545 assign_stmt = gimple_build_assign (fd->loop.v, e); 3546 else 3547 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 3548 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3549 } 3550 /* Handle linear clause adjustments. */ 3551 tree itercnt = NULL_TREE; 3552 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 3553 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 3554 c; c = OMP_CLAUSE_CHAIN (c)) 3555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 3556 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 3557 { 3558 tree d = OMP_CLAUSE_DECL (c); 3559 bool is_ref = omp_is_reference (d); 3560 tree t = d, a, dest; 3561 if (is_ref) 3562 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 3563 if (itercnt == NULL_TREE) 3564 { 3565 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3566 { 3567 itercnt = fold_build2 (MINUS_EXPR, itype, 3568 fold_convert (itype, n1), 3569 fold_convert (itype, fd->loop.n1)); 3570 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); 3571 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); 3572 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 3573 NULL_TREE, false, 3574 GSI_CONTINUE_LINKING); 3575 } 3576 else 3577 itercnt = s0; 3578 } 3579 tree type = TREE_TYPE (t); 3580 if (POINTER_TYPE_P (type)) 3581 type = sizetype; 3582 a = fold_build2 (MULT_EXPR, type, 3583 fold_convert (type, itercnt), 3584 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 3585 dest = unshare_expr (t); 3586 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 3587 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); 3588 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3589 false, GSI_CONTINUE_LINKING); 3590 assign_stmt = gimple_build_assign (dest, t); 3591 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 3592 } 3593 if (fd->collapse > 1) 3594 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 3595 3596 if (!broken_loop) 3597 { 3598 /* The code controlling the sequential loop replaces the 3599 GIMPLE_OMP_CONTINUE. */ 3600 gsi = gsi_last_nondebug_bb (cont_bb); 3601 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 3602 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 3603 vmain = gimple_omp_continue_control_use (cont_stmt); 3604 vback = gimple_omp_continue_control_def (cont_stmt); 3605 3606 if (!gimple_omp_for_combined_p (fd->for_stmt)) 3607 { 3608 if (POINTER_TYPE_P (type)) 3609 t = fold_build_pointer_plus (vmain, step); 3610 else 3611 t = fold_build2 (PLUS_EXPR, type, vmain, step); 3612 t = force_gimple_operand_gsi (&gsi, t, 3613 DECL_P (vback) 3614 && TREE_ADDRESSABLE (vback), 3615 NULL_TREE, true, GSI_SAME_STMT); 3616 assign_stmt = gimple_build_assign (vback, t); 3617 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3618 3619 t = build2 (fd->loop.cond_code, boolean_type_node, 3620 DECL_P (vback) && TREE_ADDRESSABLE (vback) 3621 ? t : vback, e); 3622 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 3623 } 3624 3625 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 3626 gsi_remove (&gsi, true); 3627 3628 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 3629 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 3630 } 3631 3632 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 3633 gsi = gsi_last_nondebug_bb (exit_bb); 3634 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 3635 { 3636 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 3637 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 3638 } 3639 gsi_remove (&gsi, true); 3640 3641 /* Connect all the blocks. */ 3642 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); 3643 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4); 3644 ep = find_edge (entry_bb, second_bb); 3645 ep->flags = EDGE_TRUE_VALUE; 3646 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4); 3647 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; 3648 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; 3649 3650 if (!broken_loop) 3651 { 3652 ep = find_edge (cont_bb, body_bb); 3653 if (ep == NULL) 3654 { 3655 ep = BRANCH_EDGE (cont_bb); 3656 gcc_assert (single_succ (ep->dest) == body_bb); 3657 } 3658 if (gimple_omp_for_combined_p (fd->for_stmt)) 3659 { 3660 remove_edge (ep); 3661 ep = NULL; 3662 } 3663 else if (fd->collapse > 1) 3664 { 3665 remove_edge (ep); 3666 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 3667 } 3668 else 3669 ep->flags = EDGE_TRUE_VALUE; 3670 find_edge (cont_bb, fin_bb)->flags 3671 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 3672 } 3673 3674 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); 3675 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); 3676 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); 3677 3678 set_immediate_dominator (CDI_DOMINATORS, body_bb, 3679 recompute_dominator (CDI_DOMINATORS, body_bb)); 3680 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 3681 recompute_dominator (CDI_DOMINATORS, fin_bb)); 3682 3683 struct loop *loop = body_bb->loop_father; 3684 if (loop != entry_bb->loop_father) 3685 { 3686 gcc_assert (broken_loop || loop->header == body_bb); 3687 gcc_assert (broken_loop 3688 || loop->latch == region->cont 3689 || single_pred (loop->latch) == region->cont); 3690 return; 3691 } 3692 3693 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 3694 { 3695 loop = alloc_loop (); 3696 loop->header = body_bb; 3697 if (collapse_bb == NULL) 3698 loop->latch = cont_bb; 3699 add_loop (loop, body_bb->loop_father); 3700 } 3701} 3702 3703/* Return phi in E->DEST with ARG on edge E. */ 3704 3705static gphi * 3706find_phi_with_arg_on_edge (tree arg, edge e) 3707{ 3708 basic_block bb = e->dest; 3709 3710 for (gphi_iterator gpi = gsi_start_phis (bb); 3711 !gsi_end_p (gpi); 3712 gsi_next (&gpi)) 3713 { 3714 gphi *phi = gpi.phi (); 3715 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) 3716 return phi; 3717 } 3718 3719 return NULL; 3720} 3721 3722/* A subroutine of expand_omp_for. Generate code for a parallel 3723 loop with static schedule and a specified chunk size. Given 3724 parameters: 3725 3726 for (V = N1; V cond N2; V += STEP) BODY; 3727 3728 where COND is "<" or ">", we generate pseudocode 3729 3730 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; 3731 if (cond is <) 3732 adj = STEP - 1; 3733 else 3734 adj = STEP + 1; 3735 if ((__typeof (V)) -1 > 0 && cond is >) 3736 n = -(adj + N2 - N1) / -STEP; 3737 else 3738 n = (adj + N2 - N1) / STEP; 3739 trip = 0; 3740 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is 3741 here so that V is defined 3742 if the loop is not entered 3743 L0: 3744 s0 = (trip * nthreads + threadid) * CHUNK; 3745 e0 = min (s0 + CHUNK, n); 3746 if (s0 < n) goto L1; else goto L4; 3747 L1: 3748 V = s0 * STEP + N1; 3749 e = e0 * STEP + N1; 3750 L2: 3751 BODY; 3752 V += STEP; 3753 if (V cond e) goto L2; else goto L3; 3754 L3: 3755 trip += 1; 3756 goto L0; 3757 L4: 3758*/ 3759 3760static void 3761expand_omp_for_static_chunk (struct omp_region *region, 3762 struct omp_for_data *fd, gimple *inner_stmt) 3763{ 3764 tree n, s0, e0, e, t; 3765 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; 3766 tree type, itype, vmain, vback, vextra; 3767 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; 3768 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; 3769 gimple_stmt_iterator gsi; 3770 edge se; 3771 bool broken_loop = region->cont == NULL; 3772 tree *counts = NULL; 3773 tree n1, n2, step; 3774 3775 itype = type = TREE_TYPE (fd->loop.v); 3776 if (POINTER_TYPE_P (type)) 3777 itype = signed_type_for (type); 3778 3779 entry_bb = region->entry; 3780 se = split_block (entry_bb, last_stmt (entry_bb)); 3781 entry_bb = se->src; 3782 iter_part_bb = se->dest; 3783 cont_bb = region->cont; 3784 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); 3785 fin_bb = BRANCH_EDGE (iter_part_bb)->dest; 3786 gcc_assert (broken_loop 3787 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); 3788 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); 3789 body_bb = single_succ (seq_start_bb); 3790 if (!broken_loop) 3791 { 3792 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb 3793 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); 3794 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 3795 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); 3796 } 3797 exit_bb = region->exit; 3798 3799 /* Trip and adjustment setup goes in ENTRY_BB. */ 3800 gsi = gsi_last_nondebug_bb (entry_bb); 3801 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 3802 3803 if (fd->collapse > 1) 3804 { 3805 int first_zero_iter = -1, dummy = -1; 3806 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 3807 3808 counts = XALLOCAVEC (tree, fd->collapse); 3809 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 3810 fin_bb, first_zero_iter, 3811 dummy_bb, dummy, l2_dom_bb); 3812 t = NULL_TREE; 3813 } 3814 else if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3815 t = integer_one_node; 3816 else 3817 t = fold_binary (fd->loop.cond_code, boolean_type_node, 3818 fold_convert (type, fd->loop.n1), 3819 fold_convert (type, fd->loop.n2)); 3820 if (fd->collapse == 1 3821 && TYPE_UNSIGNED (type) 3822 && (t == NULL_TREE || !integer_onep (t))) 3823 { 3824 n1 = fold_convert (type, unshare_expr (fd->loop.n1)); 3825 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, 3826 true, GSI_SAME_STMT); 3827 n2 = fold_convert (type, unshare_expr (fd->loop.n2)); 3828 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, 3829 true, GSI_SAME_STMT); 3830 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, 3831 NULL_TREE, NULL_TREE); 3832 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); 3833 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), 3834 expand_omp_regimplify_p, NULL, NULL) 3835 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), 3836 expand_omp_regimplify_p, NULL, NULL)) 3837 { 3838 gsi = gsi_for_stmt (cond_stmt); 3839 gimple_regimplify_operands (cond_stmt, &gsi); 3840 } 3841 se = split_block (entry_bb, cond_stmt); 3842 se->flags = EDGE_TRUE_VALUE; 3843 entry_bb = se->dest; 3844 se->probability = profile_probability::very_likely (); 3845 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); 3846 se->probability = profile_probability::very_unlikely (); 3847 if (gimple_in_ssa_p (cfun)) 3848 { 3849 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; 3850 for (gphi_iterator gpi = gsi_start_phis (fin_bb); 3851 !gsi_end_p (gpi); gsi_next (&gpi)) 3852 { 3853 gphi *phi = gpi.phi (); 3854 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), 3855 se, UNKNOWN_LOCATION); 3856 } 3857 } 3858 gsi = gsi_last_bb (entry_bb); 3859 } 3860 3861 switch (gimple_omp_for_kind (fd->for_stmt)) 3862 { 3863 case GF_OMP_FOR_KIND_FOR: 3864 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); 3865 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); 3866 break; 3867 case GF_OMP_FOR_KIND_DISTRIBUTE: 3868 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); 3869 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); 3870 break; 3871 default: 3872 gcc_unreachable (); 3873 } 3874 nthreads = build_call_expr (nthreads, 0); 3875 nthreads = fold_convert (itype, nthreads); 3876 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, 3877 true, GSI_SAME_STMT); 3878 threadid = build_call_expr (threadid, 0); 3879 threadid = fold_convert (itype, threadid); 3880 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 3881 true, GSI_SAME_STMT); 3882 3883 n1 = fd->loop.n1; 3884 n2 = fd->loop.n2; 3885 step = fd->loop.step; 3886 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 3887 { 3888 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 3889 OMP_CLAUSE__LOOPTEMP_); 3890 gcc_assert (innerc); 3891 n1 = OMP_CLAUSE_DECL (innerc); 3892 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3893 OMP_CLAUSE__LOOPTEMP_); 3894 gcc_assert (innerc); 3895 n2 = OMP_CLAUSE_DECL (innerc); 3896 } 3897 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 3898 true, NULL_TREE, true, GSI_SAME_STMT); 3899 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 3900 true, NULL_TREE, true, GSI_SAME_STMT); 3901 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 3902 true, NULL_TREE, true, GSI_SAME_STMT); 3903 tree chunk_size = fold_convert (itype, fd->chunk_size); 3904 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); 3905 chunk_size 3906 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, 3907 GSI_SAME_STMT); 3908 3909 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); 3910 t = fold_build2 (PLUS_EXPR, itype, step, t); 3911 t = fold_build2 (PLUS_EXPR, itype, t, n2); 3912 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); 3913 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) 3914 t = fold_build2 (TRUNC_DIV_EXPR, itype, 3915 fold_build1 (NEGATE_EXPR, itype, t), 3916 fold_build1 (NEGATE_EXPR, itype, step)); 3917 else 3918 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); 3919 t = fold_convert (itype, t); 3920 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3921 true, GSI_SAME_STMT); 3922 3923 trip_var = create_tmp_reg (itype, ".trip"); 3924 if (gimple_in_ssa_p (cfun)) 3925 { 3926 trip_init = make_ssa_name (trip_var); 3927 trip_main = make_ssa_name (trip_var); 3928 trip_back = make_ssa_name (trip_var); 3929 } 3930 else 3931 { 3932 trip_init = trip_var; 3933 trip_main = trip_var; 3934 trip_back = trip_var; 3935 } 3936 3937 gassign *assign_stmt 3938 = gimple_build_assign (trip_init, build_int_cst (itype, 0)); 3939 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 3940 3941 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); 3942 t = fold_build2 (MULT_EXPR, itype, t, step); 3943 if (POINTER_TYPE_P (type)) 3944 t = fold_build_pointer_plus (n1, t); 3945 else 3946 t = fold_build2 (PLUS_EXPR, type, t, n1); 3947 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3948 true, GSI_SAME_STMT); 3949 3950 /* Remove the GIMPLE_OMP_FOR. */ 3951 gsi_remove (&gsi, true); 3952 3953 gimple_stmt_iterator gsif = gsi; 3954 3955 /* Iteration space partitioning goes in ITER_PART_BB. */ 3956 gsi = gsi_last_bb (iter_part_bb); 3957 3958 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); 3959 t = fold_build2 (PLUS_EXPR, itype, t, threadid); 3960 t = fold_build2 (MULT_EXPR, itype, t, chunk_size); 3961 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3962 false, GSI_CONTINUE_LINKING); 3963 3964 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); 3965 t = fold_build2 (MIN_EXPR, itype, t, n); 3966 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 3967 false, GSI_CONTINUE_LINKING); 3968 3969 t = build2 (LT_EXPR, boolean_type_node, s0, n); 3970 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); 3971 3972 /* Setup code for sequential iteration goes in SEQ_START_BB. */ 3973 gsi = gsi_start_bb (seq_start_bb); 3974 3975 tree startvar = fd->loop.v; 3976 tree endvar = NULL_TREE; 3977 3978 if (gimple_omp_for_combined_p (fd->for_stmt)) 3979 { 3980 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL 3981 ? gimple_omp_parallel_clauses (inner_stmt) 3982 : gimple_omp_for_clauses (inner_stmt); 3983 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 3984 gcc_assert (innerc); 3985 startvar = OMP_CLAUSE_DECL (innerc); 3986 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3987 OMP_CLAUSE__LOOPTEMP_); 3988 gcc_assert (innerc); 3989 endvar = OMP_CLAUSE_DECL (innerc); 3990 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST 3991 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) 3992 { 3993 int i; 3994 for (i = 1; i < fd->collapse; i++) 3995 { 3996 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 3997 OMP_CLAUSE__LOOPTEMP_); 3998 gcc_assert (innerc); 3999 } 4000 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4001 OMP_CLAUSE__LOOPTEMP_); 4002 if (innerc) 4003 { 4004 /* If needed (distribute parallel for with lastprivate), 4005 propagate down the total number of iterations. */ 4006 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), 4007 fd->loop.n2); 4008 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, 4009 GSI_CONTINUE_LINKING); 4010 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4011 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4012 } 4013 } 4014 } 4015 4016 t = fold_convert (itype, s0); 4017 t = fold_build2 (MULT_EXPR, itype, t, step); 4018 if (POINTER_TYPE_P (type)) 4019 t = fold_build_pointer_plus (n1, t); 4020 else 4021 t = fold_build2 (PLUS_EXPR, type, t, n1); 4022 t = fold_convert (TREE_TYPE (startvar), t); 4023 t = force_gimple_operand_gsi (&gsi, t, 4024 DECL_P (startvar) 4025 && TREE_ADDRESSABLE (startvar), 4026 NULL_TREE, false, GSI_CONTINUE_LINKING); 4027 assign_stmt = gimple_build_assign (startvar, t); 4028 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4029 4030 t = fold_convert (itype, e0); 4031 t = fold_build2 (MULT_EXPR, itype, t, step); 4032 if (POINTER_TYPE_P (type)) 4033 t = fold_build_pointer_plus (n1, t); 4034 else 4035 t = fold_build2 (PLUS_EXPR, type, t, n1); 4036 t = fold_convert (TREE_TYPE (startvar), t); 4037 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4038 false, GSI_CONTINUE_LINKING); 4039 if (endvar) 4040 { 4041 assign_stmt = gimple_build_assign (endvar, e); 4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4043 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4044 assign_stmt = gimple_build_assign (fd->loop.v, e); 4045 else 4046 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4047 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4048 } 4049 /* Handle linear clause adjustments. */ 4050 tree itercnt = NULL_TREE, itercntbias = NULL_TREE; 4051 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) 4052 for (tree c = gimple_omp_for_clauses (fd->for_stmt); 4053 c; c = OMP_CLAUSE_CHAIN (c)) 4054 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR 4055 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) 4056 { 4057 tree d = OMP_CLAUSE_DECL (c); 4058 bool is_ref = omp_is_reference (d); 4059 tree t = d, a, dest; 4060 if (is_ref) 4061 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); 4062 tree type = TREE_TYPE (t); 4063 if (POINTER_TYPE_P (type)) 4064 type = sizetype; 4065 dest = unshare_expr (t); 4066 tree v = create_tmp_var (TREE_TYPE (t), NULL); 4067 expand_omp_build_assign (&gsif, v, t); 4068 if (itercnt == NULL_TREE) 4069 { 4070 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4071 { 4072 itercntbias 4073 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), 4074 fold_convert (itype, fd->loop.n1)); 4075 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, 4076 itercntbias, step); 4077 itercntbias 4078 = force_gimple_operand_gsi (&gsif, itercntbias, true, 4079 NULL_TREE, true, 4080 GSI_SAME_STMT); 4081 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); 4082 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, 4083 NULL_TREE, false, 4084 GSI_CONTINUE_LINKING); 4085 } 4086 else 4087 itercnt = s0; 4088 } 4089 a = fold_build2 (MULT_EXPR, type, 4090 fold_convert (type, itercnt), 4091 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); 4092 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR 4093 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); 4094 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4095 false, GSI_CONTINUE_LINKING); 4096 assign_stmt = gimple_build_assign (dest, t); 4097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4098 } 4099 if (fd->collapse > 1) 4100 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4101 4102 if (!broken_loop) 4103 { 4104 /* The code controlling the sequential loop goes in CONT_BB, 4105 replacing the GIMPLE_OMP_CONTINUE. */ 4106 gsi = gsi_last_nondebug_bb (cont_bb); 4107 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 4108 vmain = gimple_omp_continue_control_use (cont_stmt); 4109 vback = gimple_omp_continue_control_def (cont_stmt); 4110 4111 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4112 { 4113 if (POINTER_TYPE_P (type)) 4114 t = fold_build_pointer_plus (vmain, step); 4115 else 4116 t = fold_build2 (PLUS_EXPR, type, vmain, step); 4117 if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) 4118 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4119 true, GSI_SAME_STMT); 4120 assign_stmt = gimple_build_assign (vback, t); 4121 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 4122 4123 if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) 4124 t = build2 (EQ_EXPR, boolean_type_node, 4125 build_int_cst (itype, 0), 4126 build_int_cst (itype, 1)); 4127 else 4128 t = build2 (fd->loop.cond_code, boolean_type_node, 4129 DECL_P (vback) && TREE_ADDRESSABLE (vback) 4130 ? t : vback, e); 4131 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 4132 } 4133 4134 /* Remove GIMPLE_OMP_CONTINUE. */ 4135 gsi_remove (&gsi, true); 4136 4137 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 4138 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 4139 4140 /* Trip update code goes into TRIP_UPDATE_BB. */ 4141 gsi = gsi_start_bb (trip_update_bb); 4142 4143 t = build_int_cst (itype, 1); 4144 t = build2 (PLUS_EXPR, itype, trip_main, t); 4145 assign_stmt = gimple_build_assign (trip_back, t); 4146 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4147 } 4148 4149 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ 4150 gsi = gsi_last_nondebug_bb (exit_bb); 4151 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) 4152 { 4153 t = gimple_omp_return_lhs (gsi_stmt (gsi)); 4154 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); 4155 } 4156 gsi_remove (&gsi, true); 4157 4158 /* Connect the new blocks. */ 4159 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; 4160 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; 4161 4162 if (!broken_loop) 4163 { 4164 se = find_edge (cont_bb, body_bb); 4165 if (se == NULL) 4166 { 4167 se = BRANCH_EDGE (cont_bb); 4168 gcc_assert (single_succ (se->dest) == body_bb); 4169 } 4170 if (gimple_omp_for_combined_p (fd->for_stmt)) 4171 { 4172 remove_edge (se); 4173 se = NULL; 4174 } 4175 else if (fd->collapse > 1) 4176 { 4177 remove_edge (se); 4178 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 4179 } 4180 else 4181 se->flags = EDGE_TRUE_VALUE; 4182 find_edge (cont_bb, trip_update_bb)->flags 4183 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 4184 4185 redirect_edge_and_branch (single_succ_edge (trip_update_bb), 4186 iter_part_bb); 4187 } 4188 4189 if (gimple_in_ssa_p (cfun)) 4190 { 4191 gphi_iterator psi; 4192 gphi *phi; 4193 edge re, ene; 4194 edge_var_map *vm; 4195 size_t i; 4196 4197 gcc_assert (fd->collapse == 1 && !broken_loop); 4198 4199 /* When we redirect the edge from trip_update_bb to iter_part_bb, we 4200 remove arguments of the phi nodes in fin_bb. We need to create 4201 appropriate phi nodes in iter_part_bb instead. */ 4202 se = find_edge (iter_part_bb, fin_bb); 4203 re = single_succ_edge (trip_update_bb); 4204 vec<edge_var_map> *head = redirect_edge_var_map_vector (re); 4205 ene = single_succ_edge (entry_bb); 4206 4207 psi = gsi_start_phis (fin_bb); 4208 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); 4209 gsi_next (&psi), ++i) 4210 { 4211 gphi *nphi; 4212 source_location locus; 4213 4214 phi = psi.phi (); 4215 if (operand_equal_p (gimple_phi_arg_def (phi, 0), 4216 redirect_edge_var_map_def (vm), 0)) 4217 continue; 4218 4219 t = gimple_phi_result (phi); 4220 gcc_assert (t == redirect_edge_var_map_result (vm)); 4221 4222 if (!single_pred_p (fin_bb)) 4223 t = copy_ssa_name (t, phi); 4224 4225 nphi = create_phi_node (t, iter_part_bb); 4226 4227 t = PHI_ARG_DEF_FROM_EDGE (phi, se); 4228 locus = gimple_phi_arg_location_from_edge (phi, se); 4229 4230 /* A special case -- fd->loop.v is not yet computed in 4231 iter_part_bb, we need to use vextra instead. */ 4232 if (t == fd->loop.v) 4233 t = vextra; 4234 add_phi_arg (nphi, t, ene, locus); 4235 locus = redirect_edge_var_map_location (vm); 4236 tree back_arg = redirect_edge_var_map_def (vm); 4237 add_phi_arg (nphi, back_arg, re, locus); 4238 edge ce = find_edge (cont_bb, body_bb); 4239 if (ce == NULL) 4240 { 4241 ce = BRANCH_EDGE (cont_bb); 4242 gcc_assert (single_succ (ce->dest) == body_bb); 4243 ce = single_succ_edge (ce->dest); 4244 } 4245 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); 4246 gcc_assert (inner_loop_phi != NULL); 4247 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), 4248 find_edge (seq_start_bb, body_bb), locus); 4249 4250 if (!single_pred_p (fin_bb)) 4251 add_phi_arg (phi, gimple_phi_result (nphi), se, locus); 4252 } 4253 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); 4254 redirect_edge_var_map_clear (re); 4255 if (single_pred_p (fin_bb)) 4256 while (1) 4257 { 4258 psi = gsi_start_phis (fin_bb); 4259 if (gsi_end_p (psi)) 4260 break; 4261 remove_phi_node (&psi, false); 4262 } 4263 4264 /* Make phi node for trip. */ 4265 phi = create_phi_node (trip_main, iter_part_bb); 4266 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), 4267 UNKNOWN_LOCATION); 4268 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), 4269 UNKNOWN_LOCATION); 4270 } 4271 4272 if (!broken_loop) 4273 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); 4274 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, 4275 recompute_dominator (CDI_DOMINATORS, iter_part_bb)); 4276 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 4277 recompute_dominator (CDI_DOMINATORS, fin_bb)); 4278 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, 4279 recompute_dominator (CDI_DOMINATORS, seq_start_bb)); 4280 set_immediate_dominator (CDI_DOMINATORS, body_bb, 4281 recompute_dominator (CDI_DOMINATORS, body_bb)); 4282 4283 if (!broken_loop) 4284 { 4285 struct loop *loop = body_bb->loop_father; 4286 struct loop *trip_loop = alloc_loop (); 4287 trip_loop->header = iter_part_bb; 4288 trip_loop->latch = trip_update_bb; 4289 add_loop (trip_loop, iter_part_bb->loop_father); 4290 4291 if (loop != entry_bb->loop_father) 4292 { 4293 gcc_assert (loop->header == body_bb); 4294 gcc_assert (loop->latch == region->cont 4295 || single_pred (loop->latch) == region->cont); 4296 trip_loop->inner = loop; 4297 return; 4298 } 4299 4300 if (!gimple_omp_for_combined_p (fd->for_stmt)) 4301 { 4302 loop = alloc_loop (); 4303 loop->header = body_bb; 4304 if (collapse_bb == NULL) 4305 loop->latch = cont_bb; 4306 add_loop (loop, trip_loop); 4307 } 4308 } 4309} 4310 4311/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing 4312 loop. Given parameters: 4313 4314 for (V = N1; V cond N2; V += STEP) BODY; 4315 4316 where COND is "<" or ">", we generate pseudocode 4317 4318 V = N1; 4319 goto L1; 4320 L0: 4321 BODY; 4322 V += STEP; 4323 L1: 4324 if (V cond N2) goto L0; else goto L2; 4325 L2: 4326 4327 For collapsed loops, given parameters: 4328 collapse(3) 4329 for (V1 = N11; V1 cond1 N12; V1 += STEP1) 4330 for (V2 = N21; V2 cond2 N22; V2 += STEP2) 4331 for (V3 = N31; V3 cond3 N32; V3 += STEP3) 4332 BODY; 4333 4334 we generate pseudocode 4335 4336 if (cond3 is <) 4337 adj = STEP3 - 1; 4338 else 4339 adj = STEP3 + 1; 4340 count3 = (adj + N32 - N31) / STEP3; 4341 if (cond2 is <) 4342 adj = STEP2 - 1; 4343 else 4344 adj = STEP2 + 1; 4345 count2 = (adj + N22 - N21) / STEP2; 4346 if (cond1 is <) 4347 adj = STEP1 - 1; 4348 else 4349 adj = STEP1 + 1; 4350 count1 = (adj + N12 - N11) / STEP1; 4351 count = count1 * count2 * count3; 4352 V = 0; 4353 V1 = N11; 4354 V2 = N21; 4355 V3 = N31; 4356 goto L1; 4357 L0: 4358 BODY; 4359 V += 1; 4360 V3 += STEP3; 4361 V2 += (V3 cond3 N32) ? 0 : STEP2; 4362 V3 = (V3 cond3 N32) ? V3 : N31; 4363 V1 += (V2 cond2 N22) ? 0 : STEP1; 4364 V2 = (V2 cond2 N22) ? V2 : N21; 4365 L1: 4366 if (V < count) goto L0; else goto L2; 4367 L2: 4368 4369 */ 4370 4371static void 4372expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) 4373{ 4374 tree type, t; 4375 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; 4376 gimple_stmt_iterator gsi; 4377 gimple *stmt; 4378 gcond *cond_stmt; 4379 bool broken_loop = region->cont == NULL; 4380 edge e, ne; 4381 tree *counts = NULL; 4382 int i; 4383 int safelen_int = INT_MAX; 4384 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4385 OMP_CLAUSE_SAFELEN); 4386 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4387 OMP_CLAUSE__SIMDUID_); 4388 tree n1, n2; 4389 4390 if (safelen) 4391 { 4392 poly_uint64 val; 4393 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); 4394 if (!poly_int_tree_p (safelen, &val)) 4395 safelen_int = 0; 4396 else 4397 safelen_int = MIN (constant_lower_bound (val), INT_MAX); 4398 if (safelen_int == 1) 4399 safelen_int = 0; 4400 } 4401 type = TREE_TYPE (fd->loop.v); 4402 entry_bb = region->entry; 4403 cont_bb = region->cont; 4404 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4405 gcc_assert (broken_loop 4406 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4407 l0_bb = FALLTHRU_EDGE (entry_bb)->dest; 4408 if (!broken_loop) 4409 { 4410 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); 4411 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4412 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; 4413 l2_bb = BRANCH_EDGE (entry_bb)->dest; 4414 } 4415 else 4416 { 4417 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; 4418 l1_bb = split_edge (BRANCH_EDGE (entry_bb)); 4419 l2_bb = single_succ (l1_bb); 4420 } 4421 exit_bb = region->exit; 4422 l2_dom_bb = NULL; 4423 4424 gsi = gsi_last_nondebug_bb (entry_bb); 4425 4426 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4427 /* Not needed in SSA form right now. */ 4428 gcc_assert (!gimple_in_ssa_p (cfun)); 4429 if (fd->collapse > 1) 4430 { 4431 int first_zero_iter = -1, dummy = -1; 4432 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; 4433 4434 counts = XALLOCAVEC (tree, fd->collapse); 4435 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4436 zero_iter_bb, first_zero_iter, 4437 dummy_bb, dummy, l2_dom_bb); 4438 } 4439 if (l2_dom_bb == NULL) 4440 l2_dom_bb = l1_bb; 4441 4442 n1 = fd->loop.n1; 4443 n2 = fd->loop.n2; 4444 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4445 { 4446 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4447 OMP_CLAUSE__LOOPTEMP_); 4448 gcc_assert (innerc); 4449 n1 = OMP_CLAUSE_DECL (innerc); 4450 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4451 OMP_CLAUSE__LOOPTEMP_); 4452 gcc_assert (innerc); 4453 n2 = OMP_CLAUSE_DECL (innerc); 4454 } 4455 tree step = fd->loop.step; 4456 4457 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4458 OMP_CLAUSE__SIMT_); 4459 if (is_simt) 4460 { 4461 cfun->curr_properties &= ~PROP_gimple_lomp_dev; 4462 is_simt = safelen_int > 1; 4463 } 4464 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; 4465 if (is_simt) 4466 { 4467 simt_lane = create_tmp_var (unsigned_type_node); 4468 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); 4469 gimple_call_set_lhs (g, simt_lane); 4470 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 4471 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, 4472 fold_convert (TREE_TYPE (step), simt_lane)); 4473 n1 = fold_convert (type, n1); 4474 if (POINTER_TYPE_P (type)) 4475 n1 = fold_build_pointer_plus (n1, offset); 4476 else 4477 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); 4478 4479 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ 4480 if (fd->collapse > 1) 4481 simt_maxlane = build_one_cst (unsigned_type_node); 4482 else if (safelen_int < omp_max_simt_vf ()) 4483 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); 4484 tree vf 4485 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, 4486 unsigned_type_node, 0); 4487 if (simt_maxlane) 4488 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); 4489 vf = fold_convert (TREE_TYPE (step), vf); 4490 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); 4491 } 4492 4493 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); 4494 if (fd->collapse > 1) 4495 { 4496 if (gimple_omp_for_combined_into_p (fd->for_stmt)) 4497 { 4498 gsi_prev (&gsi); 4499 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); 4500 gsi_next (&gsi); 4501 } 4502 else 4503 for (i = 0; i < fd->collapse; i++) 4504 { 4505 tree itype = TREE_TYPE (fd->loops[i].v); 4506 if (POINTER_TYPE_P (itype)) 4507 itype = signed_type_for (itype); 4508 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); 4509 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4510 } 4511 } 4512 4513 /* Remove the GIMPLE_OMP_FOR statement. */ 4514 gsi_remove (&gsi, true); 4515 4516 if (!broken_loop) 4517 { 4518 /* Code to control the increment goes in the CONT_BB. */ 4519 gsi = gsi_last_nondebug_bb (cont_bb); 4520 stmt = gsi_stmt (gsi); 4521 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); 4522 4523 if (POINTER_TYPE_P (type)) 4524 t = fold_build_pointer_plus (fd->loop.v, step); 4525 else 4526 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4527 expand_omp_build_assign (&gsi, fd->loop.v, t); 4528 4529 if (fd->collapse > 1) 4530 { 4531 i = fd->collapse - 1; 4532 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) 4533 { 4534 t = fold_convert (sizetype, fd->loops[i].step); 4535 t = fold_build_pointer_plus (fd->loops[i].v, t); 4536 } 4537 else 4538 { 4539 t = fold_convert (TREE_TYPE (fd->loops[i].v), 4540 fd->loops[i].step); 4541 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), 4542 fd->loops[i].v, t); 4543 } 4544 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4545 4546 for (i = fd->collapse - 1; i > 0; i--) 4547 { 4548 tree itype = TREE_TYPE (fd->loops[i].v); 4549 tree itype2 = TREE_TYPE (fd->loops[i - 1].v); 4550 if (POINTER_TYPE_P (itype2)) 4551 itype2 = signed_type_for (itype2); 4552 t = fold_convert (itype2, fd->loops[i - 1].step); 4553 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4554 GSI_SAME_STMT); 4555 t = build3 (COND_EXPR, itype2, 4556 build2 (fd->loops[i].cond_code, boolean_type_node, 4557 fd->loops[i].v, 4558 fold_convert (itype, fd->loops[i].n2)), 4559 build_int_cst (itype2, 0), t); 4560 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) 4561 t = fold_build_pointer_plus (fd->loops[i - 1].v, t); 4562 else 4563 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); 4564 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); 4565 4566 t = fold_convert (itype, fd->loops[i].n1); 4567 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, 4568 GSI_SAME_STMT); 4569 t = build3 (COND_EXPR, itype, 4570 build2 (fd->loops[i].cond_code, boolean_type_node, 4571 fd->loops[i].v, 4572 fold_convert (itype, fd->loops[i].n2)), 4573 fd->loops[i].v, t); 4574 expand_omp_build_assign (&gsi, fd->loops[i].v, t); 4575 } 4576 } 4577 4578 /* Remove GIMPLE_OMP_CONTINUE. */ 4579 gsi_remove (&gsi, true); 4580 } 4581 4582 /* Emit the condition in L1_BB. */ 4583 gsi = gsi_start_bb (l1_bb); 4584 4585 t = fold_convert (type, n2); 4586 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4587 false, GSI_CONTINUE_LINKING); 4588 tree v = fd->loop.v; 4589 if (DECL_P (v) && TREE_ADDRESSABLE (v)) 4590 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, 4591 false, GSI_CONTINUE_LINKING); 4592 t = build2 (fd->loop.cond_code, boolean_type_node, v, t); 4593 cond_stmt = gimple_build_cond_empty (t); 4594 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); 4595 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, 4596 NULL, NULL) 4597 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, 4598 NULL, NULL)) 4599 { 4600 gsi = gsi_for_stmt (cond_stmt); 4601 gimple_regimplify_operands (cond_stmt, &gsi); 4602 } 4603 4604 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ 4605 if (is_simt) 4606 { 4607 gsi = gsi_start_bb (l2_bb); 4608 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); 4609 if (POINTER_TYPE_P (type)) 4610 t = fold_build_pointer_plus (fd->loop.v, step); 4611 else 4612 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); 4613 expand_omp_build_assign (&gsi, fd->loop.v, t); 4614 } 4615 4616 /* Remove GIMPLE_OMP_RETURN. */ 4617 gsi = gsi_last_nondebug_bb (exit_bb); 4618 gsi_remove (&gsi, true); 4619 4620 /* Connect the new blocks. */ 4621 remove_edge (FALLTHRU_EDGE (entry_bb)); 4622 4623 if (!broken_loop) 4624 { 4625 remove_edge (BRANCH_EDGE (entry_bb)); 4626 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); 4627 4628 e = BRANCH_EDGE (l1_bb); 4629 ne = FALLTHRU_EDGE (l1_bb); 4630 e->flags = EDGE_TRUE_VALUE; 4631 } 4632 else 4633 { 4634 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 4635 4636 ne = single_succ_edge (l1_bb); 4637 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); 4638 4639 } 4640 ne->flags = EDGE_FALSE_VALUE; 4641 e->probability = profile_probability::guessed_always ().apply_scale (7, 8); 4642 ne->probability = e->probability.invert (); 4643 4644 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); 4645 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); 4646 4647 if (simt_maxlane) 4648 { 4649 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, 4650 NULL_TREE, NULL_TREE); 4651 gsi = gsi_last_bb (entry_bb); 4652 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); 4653 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); 4654 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; 4655 FALLTHRU_EDGE (entry_bb)->probability 4656 = profile_probability::guessed_always ().apply_scale (7, 8); 4657 BRANCH_EDGE (entry_bb)->probability 4658 = FALLTHRU_EDGE (entry_bb)->probability.invert (); 4659 l2_dom_bb = entry_bb; 4660 } 4661 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); 4662 4663 if (!broken_loop) 4664 { 4665 struct loop *loop = alloc_loop (); 4666 loop->header = l1_bb; 4667 loop->latch = cont_bb; 4668 add_loop (loop, l1_bb->loop_father); 4669 loop->safelen = safelen_int; 4670 if (simduid) 4671 { 4672 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); 4673 cfun->has_simduid_loops = true; 4674 } 4675 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize 4676 the loop. */ 4677 if ((flag_tree_loop_vectorize 4678 || !global_options_set.x_flag_tree_loop_vectorize) 4679 && flag_tree_loop_optimize 4680 && loop->safelen > 1) 4681 { 4682 loop->force_vectorize = true; 4683 cfun->has_force_vectorize_loops = true; 4684 } 4685 } 4686 else if (simduid) 4687 cfun->has_simduid_loops = true; 4688} 4689 4690/* Taskloop construct is represented after gimplification with 4691 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 4692 in between them. This routine expands the outer GIMPLE_OMP_FOR, 4693 which should just compute all the needed loop temporaries 4694 for GIMPLE_OMP_TASK. */ 4695 4696static void 4697expand_omp_taskloop_for_outer (struct omp_region *region, 4698 struct omp_for_data *fd, 4699 gimple *inner_stmt) 4700{ 4701 tree type, bias = NULL_TREE; 4702 basic_block entry_bb, cont_bb, exit_bb; 4703 gimple_stmt_iterator gsi; 4704 gassign *assign_stmt; 4705 tree *counts = NULL; 4706 int i; 4707 4708 gcc_assert (inner_stmt); 4709 gcc_assert (region->cont); 4710 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK 4711 && gimple_omp_task_taskloop_p (inner_stmt)); 4712 type = TREE_TYPE (fd->loop.v); 4713 4714 /* See if we need to bias by LLONG_MIN. */ 4715 if (fd->iter_type == long_long_unsigned_type_node 4716 && TREE_CODE (type) == INTEGER_TYPE 4717 && !TYPE_UNSIGNED (type)) 4718 { 4719 tree n1, n2; 4720 4721 if (fd->loop.cond_code == LT_EXPR) 4722 { 4723 n1 = fd->loop.n1; 4724 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 4725 } 4726 else 4727 { 4728 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 4729 n2 = fd->loop.n1; 4730 } 4731 if (TREE_CODE (n1) != INTEGER_CST 4732 || TREE_CODE (n2) != INTEGER_CST 4733 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 4734 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 4735 } 4736 4737 entry_bb = region->entry; 4738 cont_bb = region->cont; 4739 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4740 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); 4741 exit_bb = region->exit; 4742 4743 gsi = gsi_last_nondebug_bb (entry_bb); 4744 gimple *for_stmt = gsi_stmt (gsi); 4745 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); 4746 if (fd->collapse > 1) 4747 { 4748 int first_zero_iter = -1, dummy = -1; 4749 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; 4750 4751 counts = XALLOCAVEC (tree, fd->collapse); 4752 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4753 zero_iter_bb, first_zero_iter, 4754 dummy_bb, dummy, l2_dom_bb); 4755 4756 if (zero_iter_bb) 4757 { 4758 /* Some counts[i] vars might be uninitialized if 4759 some loop has zero iterations. But the body shouldn't 4760 be executed in that case, so just avoid uninit warnings. */ 4761 for (i = first_zero_iter; i < fd->collapse; i++) 4762 if (SSA_VAR_P (counts[i])) 4763 TREE_NO_WARNING (counts[i]) = 1; 4764 gsi_prev (&gsi); 4765 edge e = split_block (entry_bb, gsi_stmt (gsi)); 4766 entry_bb = e->dest; 4767 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); 4768 gsi = gsi_last_bb (entry_bb); 4769 set_immediate_dominator (CDI_DOMINATORS, entry_bb, 4770 get_immediate_dominator (CDI_DOMINATORS, 4771 zero_iter_bb)); 4772 } 4773 } 4774 4775 tree t0, t1; 4776 t1 = fd->loop.n2; 4777 t0 = fd->loop.n1; 4778 if (POINTER_TYPE_P (TREE_TYPE (t0)) 4779 && TYPE_PRECISION (TREE_TYPE (t0)) 4780 != TYPE_PRECISION (fd->iter_type)) 4781 { 4782 /* Avoid casting pointers to integer of a different size. */ 4783 tree itype = signed_type_for (type); 4784 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); 4785 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); 4786 } 4787 else 4788 { 4789 t1 = fold_convert (fd->iter_type, t1); 4790 t0 = fold_convert (fd->iter_type, t0); 4791 } 4792 if (bias) 4793 { 4794 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); 4795 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); 4796 } 4797 4798 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), 4799 OMP_CLAUSE__LOOPTEMP_); 4800 gcc_assert (innerc); 4801 tree startvar = OMP_CLAUSE_DECL (innerc); 4802 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 4803 gcc_assert (innerc); 4804 tree endvar = OMP_CLAUSE_DECL (innerc); 4805 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) 4806 { 4807 gcc_assert (innerc); 4808 for (i = 1; i < fd->collapse; i++) 4809 { 4810 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4811 OMP_CLAUSE__LOOPTEMP_); 4812 gcc_assert (innerc); 4813 } 4814 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4815 OMP_CLAUSE__LOOPTEMP_); 4816 if (innerc) 4817 { 4818 /* If needed (inner taskloop has lastprivate clause), propagate 4819 down the total number of iterations. */ 4820 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, 4821 NULL_TREE, false, 4822 GSI_CONTINUE_LINKING); 4823 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); 4824 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4825 } 4826 } 4827 4828 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, 4829 GSI_CONTINUE_LINKING); 4830 assign_stmt = gimple_build_assign (startvar, t0); 4831 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4832 4833 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, 4834 GSI_CONTINUE_LINKING); 4835 assign_stmt = gimple_build_assign (endvar, t1); 4836 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4837 if (fd->collapse > 1) 4838 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4839 4840 /* Remove the GIMPLE_OMP_FOR statement. */ 4841 gsi = gsi_for_stmt (for_stmt); 4842 gsi_remove (&gsi, true); 4843 4844 gsi = gsi_last_nondebug_bb (cont_bb); 4845 gsi_remove (&gsi, true); 4846 4847 gsi = gsi_last_nondebug_bb (exit_bb); 4848 gsi_remove (&gsi, true); 4849 4850 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 4851 remove_edge (BRANCH_EDGE (entry_bb)); 4852 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always (); 4853 remove_edge (BRANCH_EDGE (cont_bb)); 4854 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); 4855 set_immediate_dominator (CDI_DOMINATORS, region->entry, 4856 recompute_dominator (CDI_DOMINATORS, region->entry)); 4857} 4858 4859/* Taskloop construct is represented after gimplification with 4860 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched 4861 in between them. This routine expands the inner GIMPLE_OMP_FOR. 4862 GOMP_taskloop{,_ull} function arranges for each task to be given just 4863 a single range of iterations. */ 4864 4865static void 4866expand_omp_taskloop_for_inner (struct omp_region *region, 4867 struct omp_for_data *fd, 4868 gimple *inner_stmt) 4869{ 4870 tree e, t, type, itype, vmain, vback, bias = NULL_TREE; 4871 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; 4872 basic_block fin_bb; 4873 gimple_stmt_iterator gsi; 4874 edge ep; 4875 bool broken_loop = region->cont == NULL; 4876 tree *counts = NULL; 4877 tree n1, n2, step; 4878 4879 itype = type = TREE_TYPE (fd->loop.v); 4880 if (POINTER_TYPE_P (type)) 4881 itype = signed_type_for (type); 4882 4883 /* See if we need to bias by LLONG_MIN. */ 4884 if (fd->iter_type == long_long_unsigned_type_node 4885 && TREE_CODE (type) == INTEGER_TYPE 4886 && !TYPE_UNSIGNED (type)) 4887 { 4888 tree n1, n2; 4889 4890 if (fd->loop.cond_code == LT_EXPR) 4891 { 4892 n1 = fd->loop.n1; 4893 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); 4894 } 4895 else 4896 { 4897 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); 4898 n2 = fd->loop.n1; 4899 } 4900 if (TREE_CODE (n1) != INTEGER_CST 4901 || TREE_CODE (n2) != INTEGER_CST 4902 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) 4903 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); 4904 } 4905 4906 entry_bb = region->entry; 4907 cont_bb = region->cont; 4908 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); 4909 fin_bb = BRANCH_EDGE (entry_bb)->dest; 4910 gcc_assert (broken_loop 4911 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); 4912 body_bb = FALLTHRU_EDGE (entry_bb)->dest; 4913 if (!broken_loop) 4914 { 4915 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); 4916 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); 4917 } 4918 exit_bb = region->exit; 4919 4920 /* Iteration space partitioning goes in ENTRY_BB. */ 4921 gsi = gsi_last_nondebug_bb (entry_bb); 4922 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); 4923 4924 if (fd->collapse > 1) 4925 { 4926 int first_zero_iter = -1, dummy = -1; 4927 basic_block l2_dom_bb = NULL, dummy_bb = NULL; 4928 4929 counts = XALLOCAVEC (tree, fd->collapse); 4930 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, 4931 fin_bb, first_zero_iter, 4932 dummy_bb, dummy, l2_dom_bb); 4933 t = NULL_TREE; 4934 } 4935 else 4936 t = integer_one_node; 4937 4938 step = fd->loop.step; 4939 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), 4940 OMP_CLAUSE__LOOPTEMP_); 4941 gcc_assert (innerc); 4942 n1 = OMP_CLAUSE_DECL (innerc); 4943 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); 4944 gcc_assert (innerc); 4945 n2 = OMP_CLAUSE_DECL (innerc); 4946 if (bias) 4947 { 4948 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); 4949 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); 4950 } 4951 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 4952 true, NULL_TREE, true, GSI_SAME_STMT); 4953 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), 4954 true, NULL_TREE, true, GSI_SAME_STMT); 4955 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 4956 true, NULL_TREE, true, GSI_SAME_STMT); 4957 4958 tree startvar = fd->loop.v; 4959 tree endvar = NULL_TREE; 4960 4961 if (gimple_omp_for_combined_p (fd->for_stmt)) 4962 { 4963 tree clauses = gimple_omp_for_clauses (inner_stmt); 4964 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); 4965 gcc_assert (innerc); 4966 startvar = OMP_CLAUSE_DECL (innerc); 4967 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), 4968 OMP_CLAUSE__LOOPTEMP_); 4969 gcc_assert (innerc); 4970 endvar = OMP_CLAUSE_DECL (innerc); 4971 } 4972 t = fold_convert (TREE_TYPE (startvar), n1); 4973 t = force_gimple_operand_gsi (&gsi, t, 4974 DECL_P (startvar) 4975 && TREE_ADDRESSABLE (startvar), 4976 NULL_TREE, false, GSI_CONTINUE_LINKING); 4977 gimple *assign_stmt = gimple_build_assign (startvar, t); 4978 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4979 4980 t = fold_convert (TREE_TYPE (startvar), n2); 4981 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, 4982 false, GSI_CONTINUE_LINKING); 4983 if (endvar) 4984 { 4985 assign_stmt = gimple_build_assign (endvar, e); 4986 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4987 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) 4988 assign_stmt = gimple_build_assign (fd->loop.v, e); 4989 else 4990 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); 4991 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); 4992 } 4993 if (fd->collapse > 1) 4994 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); 4995 4996 if (!broken_loop) 4997 { 4998 /* The code controlling the sequential loop replaces the 4999 GIMPLE_OMP_CONTINUE. */ 5000 gsi = gsi_last_nondebug_bb (cont_bb); 5001 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5002 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); 5003 vmain = gimple_omp_continue_control_use (cont_stmt); 5004 vback = gimple_omp_continue_control_def (cont_stmt); 5005 5006 if (!gimple_omp_for_combined_p (fd->for_stmt)) 5007 { 5008 if (POINTER_TYPE_P (type)) 5009 t = fold_build_pointer_plus (vmain, step); 5010 else 5011 t = fold_build2 (PLUS_EXPR, type, vmain, step); 5012 t = force_gimple_operand_gsi (&gsi, t, 5013 DECL_P (vback) 5014 && TREE_ADDRESSABLE (vback), 5015 NULL_TREE, true, GSI_SAME_STMT); 5016 assign_stmt = gimple_build_assign (vback, t); 5017 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 5018 5019 t = build2 (fd->loop.cond_code, boolean_type_node, 5020 DECL_P (vback) && TREE_ADDRESSABLE (vback) 5021 ? t : vback, e); 5022 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); 5023 } 5024 5025 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 5026 gsi_remove (&gsi, true); 5027 5028 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) 5029 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); 5030 } 5031 5032 /* Remove the GIMPLE_OMP_FOR statement. */ 5033 gsi = gsi_for_stmt (fd->for_stmt); 5034 gsi_remove (&gsi, true); 5035 5036 /* Remove the GIMPLE_OMP_RETURN statement. */ 5037 gsi = gsi_last_nondebug_bb (exit_bb); 5038 gsi_remove (&gsi, true); 5039 5040 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always (); 5041 if (!broken_loop) 5042 remove_edge (BRANCH_EDGE (entry_bb)); 5043 else 5044 { 5045 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); 5046 region->outer->cont = NULL; 5047 } 5048 5049 /* Connect all the blocks. */ 5050 if (!broken_loop) 5051 { 5052 ep = find_edge (cont_bb, body_bb); 5053 if (gimple_omp_for_combined_p (fd->for_stmt)) 5054 { 5055 remove_edge (ep); 5056 ep = NULL; 5057 } 5058 else if (fd->collapse > 1) 5059 { 5060 remove_edge (ep); 5061 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); 5062 } 5063 else 5064 ep->flags = EDGE_TRUE_VALUE; 5065 find_edge (cont_bb, fin_bb)->flags 5066 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; 5067 } 5068 5069 set_immediate_dominator (CDI_DOMINATORS, body_bb, 5070 recompute_dominator (CDI_DOMINATORS, body_bb)); 5071 if (!broken_loop) 5072 set_immediate_dominator (CDI_DOMINATORS, fin_bb, 5073 recompute_dominator (CDI_DOMINATORS, fin_bb)); 5074 5075 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) 5076 { 5077 struct loop *loop = alloc_loop (); 5078 loop->header = body_bb; 5079 if (collapse_bb == NULL) 5080 loop->latch = cont_bb; 5081 add_loop (loop, body_bb->loop_father); 5082 } 5083} 5084 5085/* A subroutine of expand_omp_for. Generate code for an OpenACC 5086 partitioned loop. The lowering here is abstracted, in that the 5087 loop parameters are passed through internal functions, which are 5088 further lowered by oacc_device_lower, once we get to the target 5089 compiler. The loop is of the form: 5090 5091 for (V = B; V LTGT E; V += S) {BODY} 5092 5093 where LTGT is < or >. We may have a specified chunking size, CHUNKING 5094 (constant 0 for no chunking) and we will have a GWV partitioning 5095 mask, specifying dimensions over which the loop is to be 5096 partitioned (see note below). We generate code that looks like 5097 (this ignores tiling): 5098 5099 <entry_bb> [incoming FALL->body, BRANCH->exit] 5100 typedef signedintify (typeof (V)) T; // underlying signed integral type 5101 T range = E - B; 5102 T chunk_no = 0; 5103 T DIR = LTGT == '<' ? +1 : -1; 5104 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); 5105 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); 5106 5107 <head_bb> [created by splitting end of entry_bb] 5108 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); 5109 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); 5110 if (!(offset LTGT bound)) goto bottom_bb; 5111 5112 <body_bb> [incoming] 5113 V = B + offset; 5114 {BODY} 5115 5116 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] 5117 offset += step; 5118 if (offset LTGT bound) goto body_bb; [*] 5119 5120 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb 5121 chunk_no++; 5122 if (chunk < chunk_max) goto head_bb; 5123 5124 <exit_bb> [incoming] 5125 V = B + ((range -/+ 1) / S +/- 1) * S [*] 5126 5127 [*] Needed if V live at end of loop. */ 5128 5129static void 5130expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) 5131{ 5132 tree v = fd->loop.v; 5133 enum tree_code cond_code = fd->loop.cond_code; 5134 enum tree_code plus_code = PLUS_EXPR; 5135 5136 tree chunk_size = integer_minus_one_node; 5137 tree gwv = integer_zero_node; 5138 tree iter_type = TREE_TYPE (v); 5139 tree diff_type = iter_type; 5140 tree plus_type = iter_type; 5141 struct oacc_collapse *counts = NULL; 5142 5143 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) 5144 == GF_OMP_FOR_KIND_OACC_LOOP); 5145 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); 5146 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); 5147 5148 if (POINTER_TYPE_P (iter_type)) 5149 { 5150 plus_code = POINTER_PLUS_EXPR; 5151 plus_type = sizetype; 5152 } 5153 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) 5154 diff_type = signed_type_for (diff_type); 5155 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) 5156 diff_type = integer_type_node; 5157 5158 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ 5159 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ 5160 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ 5161 basic_block bottom_bb = NULL; 5162 5163 /* entry_bb has two sucessors; the branch edge is to the exit 5164 block, fallthrough edge to body. */ 5165 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 5166 && BRANCH_EDGE (entry_bb)->dest == exit_bb); 5167 5168 /* If cont_bb non-NULL, it has 2 successors. The branch successor is 5169 body_bb, or to a block whose only successor is the body_bb. Its 5170 fallthrough successor is the final block (same as the branch 5171 successor of the entry_bb). */ 5172 if (cont_bb) 5173 { 5174 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; 5175 basic_block bed = BRANCH_EDGE (cont_bb)->dest; 5176 5177 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); 5178 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); 5179 } 5180 else 5181 gcc_assert (!gimple_in_ssa_p (cfun)); 5182 5183 /* The exit block only has entry_bb and cont_bb as predecessors. */ 5184 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); 5185 5186 tree chunk_no; 5187 tree chunk_max = NULL_TREE; 5188 tree bound, offset; 5189 tree step = create_tmp_var (diff_type, ".step"); 5190 bool up = cond_code == LT_EXPR; 5191 tree dir = build_int_cst (diff_type, up ? +1 : -1); 5192 bool chunking = !gimple_in_ssa_p (cfun); 5193 bool negating; 5194 5195 /* Tiling vars. */ 5196 tree tile_size = NULL_TREE; 5197 tree element_s = NULL_TREE; 5198 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE; 5199 basic_block elem_body_bb = NULL; 5200 basic_block elem_cont_bb = NULL; 5201 5202 /* SSA instances. */ 5203 tree offset_incr = NULL_TREE; 5204 tree offset_init = NULL_TREE; 5205 5206 gimple_stmt_iterator gsi; 5207 gassign *ass; 5208 gcall *call; 5209 gimple *stmt; 5210 tree expr; 5211 location_t loc; 5212 edge split, be, fte; 5213 5214 /* Split the end of entry_bb to create head_bb. */ 5215 split = split_block (entry_bb, last_stmt (entry_bb)); 5216 basic_block head_bb = split->dest; 5217 entry_bb = split->src; 5218 5219 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ 5220 gsi = gsi_last_nondebug_bb (entry_bb); 5221 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); 5222 loc = gimple_location (for_stmt); 5223 5224 if (gimple_in_ssa_p (cfun)) 5225 { 5226 offset_init = gimple_omp_for_index (for_stmt, 0); 5227 gcc_assert (integer_zerop (fd->loop.n1)); 5228 /* The SSA parallelizer does gang parallelism. */ 5229 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); 5230 } 5231 5232 if (fd->collapse > 1 || fd->tiling) 5233 { 5234 gcc_assert (!gimple_in_ssa_p (cfun) && up); 5235 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); 5236 tree total = expand_oacc_collapse_init (fd, &gsi, counts, 5237 TREE_TYPE (fd->loop.n2), loc); 5238 5239 if (SSA_VAR_P (fd->loop.n2)) 5240 { 5241 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, 5242 true, GSI_SAME_STMT); 5243 ass = gimple_build_assign (fd->loop.n2, total); 5244 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5245 } 5246 } 5247 5248 tree b = fd->loop.n1; 5249 tree e = fd->loop.n2; 5250 tree s = fd->loop.step; 5251 5252 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); 5253 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); 5254 5255 /* Convert the step, avoiding possible unsigned->signed overflow. */ 5256 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); 5257 if (negating) 5258 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); 5259 s = fold_convert (diff_type, s); 5260 if (negating) 5261 s = fold_build1 (NEGATE_EXPR, diff_type, s); 5262 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); 5263 5264 if (!chunking) 5265 chunk_size = integer_zero_node; 5266 expr = fold_convert (diff_type, chunk_size); 5267 chunk_size = force_gimple_operand_gsi (&gsi, expr, true, 5268 NULL_TREE, true, GSI_SAME_STMT); 5269 5270 if (fd->tiling) 5271 { 5272 /* Determine the tile size and element step, 5273 modify the outer loop step size. */ 5274 tile_size = create_tmp_var (diff_type, ".tile_size"); 5275 expr = build_int_cst (diff_type, 1); 5276 for (int ix = 0; ix < fd->collapse; ix++) 5277 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr); 5278 expr = force_gimple_operand_gsi (&gsi, expr, true, 5279 NULL_TREE, true, GSI_SAME_STMT); 5280 ass = gimple_build_assign (tile_size, expr); 5281 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5282 5283 element_s = create_tmp_var (diff_type, ".element_s"); 5284 ass = gimple_build_assign (element_s, s); 5285 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5286 5287 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size); 5288 s = force_gimple_operand_gsi (&gsi, expr, true, 5289 NULL_TREE, true, GSI_SAME_STMT); 5290 } 5291 5292 /* Determine the range, avoiding possible unsigned->signed overflow. */ 5293 negating = !up && TYPE_UNSIGNED (iter_type); 5294 expr = fold_build2 (MINUS_EXPR, plus_type, 5295 fold_convert (plus_type, negating ? b : e), 5296 fold_convert (plus_type, negating ? e : b)); 5297 expr = fold_convert (diff_type, expr); 5298 if (negating) 5299 expr = fold_build1 (NEGATE_EXPR, diff_type, expr); 5300 tree range = force_gimple_operand_gsi (&gsi, expr, true, 5301 NULL_TREE, true, GSI_SAME_STMT); 5302 5303 chunk_no = build_int_cst (diff_type, 0); 5304 if (chunking) 5305 { 5306 gcc_assert (!gimple_in_ssa_p (cfun)); 5307 5308 expr = chunk_no; 5309 chunk_max = create_tmp_var (diff_type, ".chunk_max"); 5310 chunk_no = create_tmp_var (diff_type, ".chunk_no"); 5311 5312 ass = gimple_build_assign (chunk_no, expr); 5313 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5314 5315 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5316 build_int_cst (integer_type_node, 5317 IFN_GOACC_LOOP_CHUNKS), 5318 dir, range, s, chunk_size, gwv); 5319 gimple_call_set_lhs (call, chunk_max); 5320 gimple_set_location (call, loc); 5321 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5322 } 5323 else 5324 chunk_size = chunk_no; 5325 5326 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, 5327 build_int_cst (integer_type_node, 5328 IFN_GOACC_LOOP_STEP), 5329 dir, range, s, chunk_size, gwv); 5330 gimple_call_set_lhs (call, step); 5331 gimple_set_location (call, loc); 5332 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5333 5334 /* Remove the GIMPLE_OMP_FOR. */ 5335 gsi_remove (&gsi, true); 5336 5337 /* Fixup edges from head_bb. */ 5338 be = BRANCH_EDGE (head_bb); 5339 fte = FALLTHRU_EDGE (head_bb); 5340 be->flags |= EDGE_FALSE_VALUE; 5341 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5342 5343 basic_block body_bb = fte->dest; 5344 5345 if (gimple_in_ssa_p (cfun)) 5346 { 5347 gsi = gsi_last_nondebug_bb (cont_bb); 5348 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5349 5350 offset = gimple_omp_continue_control_use (cont_stmt); 5351 offset_incr = gimple_omp_continue_control_def (cont_stmt); 5352 } 5353 else 5354 { 5355 offset = create_tmp_var (diff_type, ".offset"); 5356 offset_init = offset_incr = offset; 5357 } 5358 bound = create_tmp_var (TREE_TYPE (offset), ".bound"); 5359 5360 /* Loop offset & bound go into head_bb. */ 5361 gsi = gsi_start_bb (head_bb); 5362 5363 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5364 build_int_cst (integer_type_node, 5365 IFN_GOACC_LOOP_OFFSET), 5366 dir, range, s, 5367 chunk_size, gwv, chunk_no); 5368 gimple_call_set_lhs (call, offset_init); 5369 gimple_set_location (call, loc); 5370 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5371 5372 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, 5373 build_int_cst (integer_type_node, 5374 IFN_GOACC_LOOP_BOUND), 5375 dir, range, s, 5376 chunk_size, gwv, offset_init); 5377 gimple_call_set_lhs (call, bound); 5378 gimple_set_location (call, loc); 5379 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); 5380 5381 expr = build2 (cond_code, boolean_type_node, offset_init, bound); 5382 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5383 GSI_CONTINUE_LINKING); 5384 5385 /* V assignment goes into body_bb. */ 5386 if (!gimple_in_ssa_p (cfun)) 5387 { 5388 gsi = gsi_start_bb (body_bb); 5389 5390 expr = build2 (plus_code, iter_type, b, 5391 fold_convert (plus_type, offset)); 5392 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5393 true, GSI_SAME_STMT); 5394 ass = gimple_build_assign (v, expr); 5395 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5396 5397 if (fd->collapse > 1 || fd->tiling) 5398 expand_oacc_collapse_vars (fd, false, &gsi, counts, v); 5399 5400 if (fd->tiling) 5401 { 5402 /* Determine the range of the element loop -- usually simply 5403 the tile_size, but could be smaller if the final 5404 iteration of the outer loop is a partial tile. */ 5405 tree e_range = create_tmp_var (diff_type, ".e_range"); 5406 5407 expr = build2 (MIN_EXPR, diff_type, 5408 build2 (MINUS_EXPR, diff_type, bound, offset), 5409 build2 (MULT_EXPR, diff_type, tile_size, 5410 element_s)); 5411 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5412 true, GSI_SAME_STMT); 5413 ass = gimple_build_assign (e_range, expr); 5414 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5415 5416 /* Determine bound, offset & step of inner loop. */ 5417 e_bound = create_tmp_var (diff_type, ".e_bound"); 5418 e_offset = create_tmp_var (diff_type, ".e_offset"); 5419 e_step = create_tmp_var (diff_type, ".e_step"); 5420 5421 /* Mark these as element loops. */ 5422 tree t, e_gwv = integer_minus_one_node; 5423 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ 5424 5425 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); 5426 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5427 element_s, chunk, e_gwv, chunk); 5428 gimple_call_set_lhs (call, e_offset); 5429 gimple_set_location (call, loc); 5430 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5431 5432 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); 5433 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range, 5434 element_s, chunk, e_gwv, e_offset); 5435 gimple_call_set_lhs (call, e_bound); 5436 gimple_set_location (call, loc); 5437 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5438 5439 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP); 5440 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range, 5441 element_s, chunk, e_gwv); 5442 gimple_call_set_lhs (call, e_step); 5443 gimple_set_location (call, loc); 5444 gsi_insert_before (&gsi, call, GSI_SAME_STMT); 5445 5446 /* Add test and split block. */ 5447 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5448 stmt = gimple_build_cond_empty (expr); 5449 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5450 split = split_block (body_bb, stmt); 5451 elem_body_bb = split->dest; 5452 if (cont_bb == body_bb) 5453 cont_bb = elem_body_bb; 5454 body_bb = split->src; 5455 5456 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; 5457 5458 /* Add a dummy exit for the tiled block when cont_bb is missing. */ 5459 if (cont_bb == NULL) 5460 { 5461 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE); 5462 e->probability = profile_probability::even (); 5463 split->probability = profile_probability::even (); 5464 } 5465 5466 /* Initialize the user's loop vars. */ 5467 gsi = gsi_start_bb (elem_body_bb); 5468 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset); 5469 } 5470 } 5471 5472 /* Loop increment goes into cont_bb. If this is not a loop, we 5473 will have spawned threads as if it was, and each one will 5474 execute one iteration. The specification is not explicit about 5475 whether such constructs are ill-formed or not, and they can 5476 occur, especially when noreturn routines are involved. */ 5477 if (cont_bb) 5478 { 5479 gsi = gsi_last_nondebug_bb (cont_bb); 5480 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); 5481 loc = gimple_location (cont_stmt); 5482 5483 if (fd->tiling) 5484 { 5485 /* Insert element loop increment and test. */ 5486 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step); 5487 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5488 true, GSI_SAME_STMT); 5489 ass = gimple_build_assign (e_offset, expr); 5490 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5491 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound); 5492 5493 stmt = gimple_build_cond_empty (expr); 5494 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5495 split = split_block (cont_bb, stmt); 5496 elem_cont_bb = split->src; 5497 cont_bb = split->dest; 5498 5499 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5500 split->probability = profile_probability::unlikely ().guessed (); 5501 edge latch_edge 5502 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE); 5503 latch_edge->probability = profile_probability::likely ().guessed (); 5504 5505 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE); 5506 skip_edge->probability = profile_probability::unlikely ().guessed (); 5507 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx); 5508 loop_entry_edge->probability 5509 = profile_probability::likely ().guessed (); 5510 5511 gsi = gsi_for_stmt (cont_stmt); 5512 } 5513 5514 /* Increment offset. */ 5515 if (gimple_in_ssa_p (cfun)) 5516 expr = build2 (plus_code, iter_type, offset, 5517 fold_convert (plus_type, step)); 5518 else 5519 expr = build2 (PLUS_EXPR, diff_type, offset, step); 5520 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5521 true, GSI_SAME_STMT); 5522 ass = gimple_build_assign (offset_incr, expr); 5523 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5524 expr = build2 (cond_code, boolean_type_node, offset_incr, bound); 5525 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); 5526 5527 /* Remove the GIMPLE_OMP_CONTINUE. */ 5528 gsi_remove (&gsi, true); 5529 5530 /* Fixup edges from cont_bb. */ 5531 be = BRANCH_EDGE (cont_bb); 5532 fte = FALLTHRU_EDGE (cont_bb); 5533 be->flags |= EDGE_TRUE_VALUE; 5534 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5535 5536 if (chunking) 5537 { 5538 /* Split the beginning of exit_bb to make bottom_bb. We 5539 need to insert a nop at the start, because splitting is 5540 after a stmt, not before. */ 5541 gsi = gsi_start_bb (exit_bb); 5542 stmt = gimple_build_nop (); 5543 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); 5544 split = split_block (exit_bb, stmt); 5545 bottom_bb = split->src; 5546 exit_bb = split->dest; 5547 gsi = gsi_last_bb (bottom_bb); 5548 5549 /* Chunk increment and test goes into bottom_bb. */ 5550 expr = build2 (PLUS_EXPR, diff_type, chunk_no, 5551 build_int_cst (diff_type, 1)); 5552 ass = gimple_build_assign (chunk_no, expr); 5553 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); 5554 5555 /* Chunk test at end of bottom_bb. */ 5556 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); 5557 gsi_insert_after (&gsi, gimple_build_cond_empty (expr), 5558 GSI_CONTINUE_LINKING); 5559 5560 /* Fixup edges from bottom_bb. */ 5561 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; 5562 split->probability = profile_probability::unlikely ().guessed (); 5563 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); 5564 latch_edge->probability = profile_probability::likely ().guessed (); 5565 } 5566 } 5567 5568 gsi = gsi_last_nondebug_bb (exit_bb); 5569 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 5570 loc = gimple_location (gsi_stmt (gsi)); 5571 5572 if (!gimple_in_ssa_p (cfun)) 5573 { 5574 /* Insert the final value of V, in case it is live. This is the 5575 value for the only thread that survives past the join. */ 5576 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); 5577 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); 5578 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); 5579 expr = fold_build2 (MULT_EXPR, diff_type, expr, s); 5580 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); 5581 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, 5582 true, GSI_SAME_STMT); 5583 ass = gimple_build_assign (v, expr); 5584 gsi_insert_before (&gsi, ass, GSI_SAME_STMT); 5585 } 5586 5587 /* Remove the OMP_RETURN. */ 5588 gsi_remove (&gsi, true); 5589 5590 if (cont_bb) 5591 { 5592 /* We now have one, two or three nested loops. Update the loop 5593 structures. */ 5594 struct loop *parent = entry_bb->loop_father; 5595 struct loop *body = body_bb->loop_father; 5596 5597 if (chunking) 5598 { 5599 struct loop *chunk_loop = alloc_loop (); 5600 chunk_loop->header = head_bb; 5601 chunk_loop->latch = bottom_bb; 5602 add_loop (chunk_loop, parent); 5603 parent = chunk_loop; 5604 } 5605 else if (parent != body) 5606 { 5607 gcc_assert (body->header == body_bb); 5608 gcc_assert (body->latch == cont_bb 5609 || single_pred (body->latch) == cont_bb); 5610 parent = NULL; 5611 } 5612 5613 if (parent) 5614 { 5615 struct loop *body_loop = alloc_loop (); 5616 body_loop->header = body_bb; 5617 body_loop->latch = cont_bb; 5618 add_loop (body_loop, parent); 5619 5620 if (fd->tiling) 5621 { 5622 /* Insert tiling's element loop. */ 5623 struct loop *inner_loop = alloc_loop (); 5624 inner_loop->header = elem_body_bb; 5625 inner_loop->latch = elem_cont_bb; 5626 add_loop (inner_loop, body_loop); 5627 } 5628 } 5629 } 5630} 5631 5632/* Expand the OMP loop defined by REGION. */ 5633 5634static void 5635expand_omp_for (struct omp_region *region, gimple *inner_stmt) 5636{ 5637 struct omp_for_data fd; 5638 struct omp_for_data_loop *loops; 5639 5640 loops 5641 = (struct omp_for_data_loop *) 5642 alloca (gimple_omp_for_collapse (last_stmt (region->entry)) 5643 * sizeof (struct omp_for_data_loop)); 5644 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), 5645 &fd, loops); 5646 region->sched_kind = fd.sched_kind; 5647 region->sched_modifiers = fd.sched_modifiers; 5648 5649 gcc_assert (EDGE_COUNT (region->entry->succs) == 2); 5650 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5651 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; 5652 if (region->cont) 5653 { 5654 gcc_assert (EDGE_COUNT (region->cont->succs) == 2); 5655 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5656 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; 5657 } 5658 else 5659 /* If there isn't a continue then this is a degerate case where 5660 the introduction of abnormal edges during lowering will prevent 5661 original loops from being detected. Fix that up. */ 5662 loops_state_set (LOOPS_NEED_FIXUP); 5663 5664 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) 5665 expand_omp_simd (region, &fd); 5666 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) 5667 { 5668 gcc_assert (!inner_stmt); 5669 expand_oacc_for (region, &fd); 5670 } 5671 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) 5672 { 5673 if (gimple_omp_for_combined_into_p (fd.for_stmt)) 5674 expand_omp_taskloop_for_inner (region, &fd, inner_stmt); 5675 else 5676 expand_omp_taskloop_for_outer (region, &fd, inner_stmt); 5677 } 5678 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC 5679 && !fd.have_ordered) 5680 { 5681 if (fd.chunk_size == NULL) 5682 expand_omp_for_static_nochunk (region, &fd, inner_stmt); 5683 else 5684 expand_omp_for_static_chunk (region, &fd, inner_stmt); 5685 } 5686 else 5687 { 5688 int fn_index, start_ix, next_ix; 5689 5690 gcc_assert (gimple_omp_for_kind (fd.for_stmt) 5691 == GF_OMP_FOR_KIND_FOR); 5692 if (fd.chunk_size == NULL 5693 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) 5694 fd.chunk_size = integer_zero_node; 5695 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); 5696 switch (fd.sched_kind) 5697 { 5698 case OMP_CLAUSE_SCHEDULE_RUNTIME: 5699 fn_index = 3; 5700 break; 5701 case OMP_CLAUSE_SCHEDULE_DYNAMIC: 5702 case OMP_CLAUSE_SCHEDULE_GUIDED: 5703 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) 5704 && !fd.ordered 5705 && !fd.have_ordered) 5706 { 5707 fn_index = 3 + fd.sched_kind; 5708 break; 5709 } 5710 /* FALLTHRU */ 5711 default: 5712 fn_index = fd.sched_kind; 5713 break; 5714 } 5715 if (!fd.ordered) 5716 fn_index += fd.have_ordered * 6; 5717 if (fd.ordered) 5718 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; 5719 else 5720 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; 5721 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; 5722 if (fd.iter_type == long_long_unsigned_type_node) 5723 { 5724 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START 5725 - (int)BUILT_IN_GOMP_LOOP_STATIC_START); 5726 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT 5727 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); 5728 } 5729 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, 5730 (enum built_in_function) next_ix, inner_stmt); 5731 } 5732 5733 if (gimple_in_ssa_p (cfun)) 5734 update_ssa (TODO_update_ssa_only_virtuals); 5735} 5736 5737/* Expand code for an OpenMP sections directive. In pseudo code, we generate 5738 5739 v = GOMP_sections_start (n); 5740 L0: 5741 switch (v) 5742 { 5743 case 0: 5744 goto L2; 5745 case 1: 5746 section 1; 5747 goto L1; 5748 case 2: 5749 ... 5750 case n: 5751 ... 5752 default: 5753 abort (); 5754 } 5755 L1: 5756 v = GOMP_sections_next (); 5757 goto L0; 5758 L2: 5759 reduction; 5760 5761 If this is a combined parallel sections, replace the call to 5762 GOMP_sections_start with call to GOMP_sections_next. */ 5763 5764static void 5765expand_omp_sections (struct omp_region *region) 5766{ 5767 tree t, u, vin = NULL, vmain, vnext, l2; 5768 unsigned len; 5769 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; 5770 gimple_stmt_iterator si, switch_si; 5771 gomp_sections *sections_stmt; 5772 gimple *stmt; 5773 gomp_continue *cont; 5774 edge_iterator ei; 5775 edge e; 5776 struct omp_region *inner; 5777 unsigned i, casei; 5778 bool exit_reachable = region->cont != NULL; 5779 5780 gcc_assert (region->exit != NULL); 5781 entry_bb = region->entry; 5782 l0_bb = single_succ (entry_bb); 5783 l1_bb = region->cont; 5784 l2_bb = region->exit; 5785 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) 5786 l2 = gimple_block_label (l2_bb); 5787 else 5788 { 5789 /* This can happen if there are reductions. */ 5790 len = EDGE_COUNT (l0_bb->succs); 5791 gcc_assert (len > 0); 5792 e = EDGE_SUCC (l0_bb, len - 1); 5793 si = gsi_last_nondebug_bb (e->dest); 5794 l2 = NULL_TREE; 5795 if (gsi_end_p (si) 5796 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5797 l2 = gimple_block_label (e->dest); 5798 else 5799 FOR_EACH_EDGE (e, ei, l0_bb->succs) 5800 { 5801 si = gsi_last_nondebug_bb (e->dest); 5802 if (gsi_end_p (si) 5803 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) 5804 { 5805 l2 = gimple_block_label (e->dest); 5806 break; 5807 } 5808 } 5809 } 5810 if (exit_reachable) 5811 default_bb = create_empty_bb (l1_bb->prev_bb); 5812 else 5813 default_bb = create_empty_bb (l0_bb); 5814 5815 /* We will build a switch() with enough cases for all the 5816 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work 5817 and a default case to abort if something goes wrong. */ 5818 len = EDGE_COUNT (l0_bb->succs); 5819 5820 /* Use vec::quick_push on label_vec throughout, since we know the size 5821 in advance. */ 5822 auto_vec<tree> label_vec (len); 5823 5824 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the 5825 GIMPLE_OMP_SECTIONS statement. */ 5826 si = gsi_last_nondebug_bb (entry_bb); 5827 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); 5828 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); 5829 vin = gimple_omp_sections_control (sections_stmt); 5830 if (!is_combined_parallel (region)) 5831 { 5832 /* If we are not inside a combined parallel+sections region, 5833 call GOMP_sections_start. */ 5834 t = build_int_cst (unsigned_type_node, len - 1); 5835 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); 5836 stmt = gimple_build_call (u, 1, t); 5837 } 5838 else 5839 { 5840 /* Otherwise, call GOMP_sections_next. */ 5841 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 5842 stmt = gimple_build_call (u, 0); 5843 } 5844 gimple_call_set_lhs (stmt, vin); 5845 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 5846 gsi_remove (&si, true); 5847 5848 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in 5849 L0_BB. */ 5850 switch_si = gsi_last_nondebug_bb (l0_bb); 5851 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); 5852 if (exit_reachable) 5853 { 5854 cont = as_a <gomp_continue *> (last_stmt (l1_bb)); 5855 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); 5856 vmain = gimple_omp_continue_control_use (cont); 5857 vnext = gimple_omp_continue_control_def (cont); 5858 } 5859 else 5860 { 5861 vmain = vin; 5862 vnext = NULL_TREE; 5863 } 5864 5865 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); 5866 label_vec.quick_push (t); 5867 i = 1; 5868 5869 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ 5870 for (inner = region->inner, casei = 1; 5871 inner; 5872 inner = inner->next, i++, casei++) 5873 { 5874 basic_block s_entry_bb, s_exit_bb; 5875 5876 /* Skip optional reduction region. */ 5877 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) 5878 { 5879 --i; 5880 --casei; 5881 continue; 5882 } 5883 5884 s_entry_bb = inner->entry; 5885 s_exit_bb = inner->exit; 5886 5887 t = gimple_block_label (s_entry_bb); 5888 u = build_int_cst (unsigned_type_node, casei); 5889 u = build_case_label (u, NULL, t); 5890 label_vec.quick_push (u); 5891 5892 si = gsi_last_nondebug_bb (s_entry_bb); 5893 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); 5894 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); 5895 gsi_remove (&si, true); 5896 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; 5897 5898 if (s_exit_bb == NULL) 5899 continue; 5900 5901 si = gsi_last_nondebug_bb (s_exit_bb); 5902 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 5903 gsi_remove (&si, true); 5904 5905 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; 5906 } 5907 5908 /* Error handling code goes in DEFAULT_BB. */ 5909 t = gimple_block_label (default_bb); 5910 u = build_case_label (NULL, NULL, t); 5911 make_edge (l0_bb, default_bb, 0); 5912 add_bb_to_loop (default_bb, current_loops->tree_root); 5913 5914 stmt = gimple_build_switch (vmain, u, label_vec); 5915 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); 5916 gsi_remove (&switch_si, true); 5917 5918 si = gsi_start_bb (default_bb); 5919 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); 5920 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); 5921 5922 if (exit_reachable) 5923 { 5924 tree bfn_decl; 5925 5926 /* Code to get the next section goes in L1_BB. */ 5927 si = gsi_last_nondebug_bb (l1_bb); 5928 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); 5929 5930 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); 5931 stmt = gimple_build_call (bfn_decl, 0); 5932 gimple_call_set_lhs (stmt, vnext); 5933 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 5934 gsi_remove (&si, true); 5935 5936 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; 5937 } 5938 5939 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ 5940 si = gsi_last_nondebug_bb (l2_bb); 5941 if (gimple_omp_return_nowait_p (gsi_stmt (si))) 5942 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); 5943 else if (gimple_omp_return_lhs (gsi_stmt (si))) 5944 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); 5945 else 5946 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); 5947 stmt = gimple_build_call (t, 0); 5948 if (gimple_omp_return_lhs (gsi_stmt (si))) 5949 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); 5950 gsi_insert_after (&si, stmt, GSI_SAME_STMT); 5951 gsi_remove (&si, true); 5952 5953 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); 5954} 5955 5956/* Expand code for an OpenMP single directive. We've already expanded 5957 much of the code, here we simply place the GOMP_barrier call. */ 5958 5959static void 5960expand_omp_single (struct omp_region *region) 5961{ 5962 basic_block entry_bb, exit_bb; 5963 gimple_stmt_iterator si; 5964 5965 entry_bb = region->entry; 5966 exit_bb = region->exit; 5967 5968 si = gsi_last_nondebug_bb (entry_bb); 5969 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); 5970 gsi_remove (&si, true); 5971 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 5972 5973 si = gsi_last_nondebug_bb (exit_bb); 5974 if (!gimple_omp_return_nowait_p (gsi_stmt (si))) 5975 { 5976 tree t = gimple_omp_return_lhs (gsi_stmt (si)); 5977 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); 5978 } 5979 gsi_remove (&si, true); 5980 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 5981} 5982 5983/* Generic expansion for OpenMP synchronization directives: master, 5984 ordered and critical. All we need to do here is remove the entry 5985 and exit markers for REGION. */ 5986 5987static void 5988expand_omp_synch (struct omp_region *region) 5989{ 5990 basic_block entry_bb, exit_bb; 5991 gimple_stmt_iterator si; 5992 5993 entry_bb = region->entry; 5994 exit_bb = region->exit; 5995 5996 si = gsi_last_nondebug_bb (entry_bb); 5997 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE 5998 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER 5999 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP 6000 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED 6001 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL 6002 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); 6003 gsi_remove (&si, true); 6004 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 6005 6006 if (exit_bb) 6007 { 6008 si = gsi_last_nondebug_bb (exit_bb); 6009 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); 6010 gsi_remove (&si, true); 6011 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; 6012 } 6013} 6014 6015/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6016 operation as a normal volatile load. */ 6017 6018static bool 6019expand_omp_atomic_load (basic_block load_bb, tree addr, 6020 tree loaded_val, int index) 6021{ 6022 enum built_in_function tmpbase; 6023 gimple_stmt_iterator gsi; 6024 basic_block store_bb; 6025 location_t loc; 6026 gimple *stmt; 6027 tree decl, call, type, itype; 6028 6029 gsi = gsi_last_nondebug_bb (load_bb); 6030 stmt = gsi_stmt (gsi); 6031 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6032 loc = gimple_location (stmt); 6033 6034 /* ??? If the target does not implement atomic_load_optab[mode], and mode 6035 is smaller than word size, then expand_atomic_load assumes that the load 6036 is atomic. We could avoid the builtin entirely in this case. */ 6037 6038 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6039 decl = builtin_decl_explicit (tmpbase); 6040 if (decl == NULL_TREE) 6041 return false; 6042 6043 type = TREE_TYPE (loaded_val); 6044 itype = TREE_TYPE (TREE_TYPE (decl)); 6045 6046 call = build_call_expr_loc (loc, decl, 2, addr, 6047 build_int_cst (NULL, 6048 gimple_omp_atomic_seq_cst_p (stmt) 6049 ? MEMMODEL_SEQ_CST 6050 : MEMMODEL_RELAXED)); 6051 if (!useless_type_conversion_p (type, itype)) 6052 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6053 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6054 6055 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6056 gsi_remove (&gsi, true); 6057 6058 store_bb = single_succ (load_bb); 6059 gsi = gsi_last_nondebug_bb (store_bb); 6060 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6061 gsi_remove (&gsi, true); 6062 6063 if (gimple_in_ssa_p (cfun)) 6064 update_ssa (TODO_update_ssa_no_phi); 6065 6066 return true; 6067} 6068 6069/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6070 operation as a normal volatile store. */ 6071 6072static bool 6073expand_omp_atomic_store (basic_block load_bb, tree addr, 6074 tree loaded_val, tree stored_val, int index) 6075{ 6076 enum built_in_function tmpbase; 6077 gimple_stmt_iterator gsi; 6078 basic_block store_bb = single_succ (load_bb); 6079 location_t loc; 6080 gimple *stmt; 6081 tree decl, call, type, itype; 6082 machine_mode imode; 6083 bool exchange; 6084 6085 gsi = gsi_last_nondebug_bb (load_bb); 6086 stmt = gsi_stmt (gsi); 6087 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); 6088 6089 /* If the load value is needed, then this isn't a store but an exchange. */ 6090 exchange = gimple_omp_atomic_need_value_p (stmt); 6091 6092 gsi = gsi_last_nondebug_bb (store_bb); 6093 stmt = gsi_stmt (gsi); 6094 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); 6095 loc = gimple_location (stmt); 6096 6097 /* ??? If the target does not implement atomic_store_optab[mode], and mode 6098 is smaller than word size, then expand_atomic_store assumes that the store 6099 is atomic. We could avoid the builtin entirely in this case. */ 6100 6101 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); 6102 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); 6103 decl = builtin_decl_explicit (tmpbase); 6104 if (decl == NULL_TREE) 6105 return false; 6106 6107 type = TREE_TYPE (stored_val); 6108 6109 /* Dig out the type of the function's second argument. */ 6110 itype = TREE_TYPE (decl); 6111 itype = TYPE_ARG_TYPES (itype); 6112 itype = TREE_CHAIN (itype); 6113 itype = TREE_VALUE (itype); 6114 imode = TYPE_MODE (itype); 6115 6116 if (exchange && !can_atomic_exchange_p (imode, true)) 6117 return false; 6118 6119 if (!useless_type_conversion_p (itype, type)) 6120 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); 6121 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, 6122 build_int_cst (NULL, 6123 gimple_omp_atomic_seq_cst_p (stmt) 6124 ? MEMMODEL_SEQ_CST 6125 : MEMMODEL_RELAXED)); 6126 if (exchange) 6127 { 6128 if (!useless_type_conversion_p (type, itype)) 6129 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); 6130 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); 6131 } 6132 6133 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6134 gsi_remove (&gsi, true); 6135 6136 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ 6137 gsi = gsi_last_nondebug_bb (load_bb); 6138 gsi_remove (&gsi, true); 6139 6140 if (gimple_in_ssa_p (cfun)) 6141 update_ssa (TODO_update_ssa_no_phi); 6142 6143 return true; 6144} 6145 6146/* A subroutine of expand_omp_atomic. Attempt to implement the atomic 6147 operation as a __atomic_fetch_op builtin. INDEX is log2 of the 6148 size of the data type, and thus usable to find the index of the builtin 6149 decl. Returns false if the expression is not of the proper form. */ 6150 6151static bool 6152expand_omp_atomic_fetch_op (basic_block load_bb, 6153 tree addr, tree loaded_val, 6154 tree stored_val, int index) 6155{ 6156 enum built_in_function oldbase, newbase, tmpbase; 6157 tree decl, itype, call; 6158 tree lhs, rhs; 6159 basic_block store_bb = single_succ (load_bb); 6160 gimple_stmt_iterator gsi; 6161 gimple *stmt; 6162 location_t loc; 6163 enum tree_code code; 6164 bool need_old, need_new; 6165 machine_mode imode; 6166 bool seq_cst; 6167 6168 /* We expect to find the following sequences: 6169 6170 load_bb: 6171 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) 6172 6173 store_bb: 6174 val = tmp OP something; (or: something OP tmp) 6175 GIMPLE_OMP_STORE (val) 6176 6177 ???FIXME: Allow a more flexible sequence. 6178 Perhaps use data flow to pick the statements. 6179 6180 */ 6181 6182 gsi = gsi_after_labels (store_bb); 6183 stmt = gsi_stmt (gsi); 6184 if (is_gimple_debug (stmt)) 6185 { 6186 gsi_next_nondebug (&gsi); 6187 if (gsi_end_p (gsi)) 6188 return false; 6189 stmt = gsi_stmt (gsi); 6190 } 6191 loc = gimple_location (stmt); 6192 if (!is_gimple_assign (stmt)) 6193 return false; 6194 gsi_next_nondebug (&gsi); 6195 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) 6196 return false; 6197 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); 6198 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); 6199 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); 6200 gcc_checking_assert (!need_old || !need_new); 6201 6202 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) 6203 return false; 6204 6205 /* Check for one of the supported fetch-op operations. */ 6206 code = gimple_assign_rhs_code (stmt); 6207 switch (code) 6208 { 6209 case PLUS_EXPR: 6210 case POINTER_PLUS_EXPR: 6211 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; 6212 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; 6213 break; 6214 case MINUS_EXPR: 6215 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; 6216 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; 6217 break; 6218 case BIT_AND_EXPR: 6219 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; 6220 newbase = BUILT_IN_ATOMIC_AND_FETCH_N; 6221 break; 6222 case BIT_IOR_EXPR: 6223 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; 6224 newbase = BUILT_IN_ATOMIC_OR_FETCH_N; 6225 break; 6226 case BIT_XOR_EXPR: 6227 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; 6228 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; 6229 break; 6230 default: 6231 return false; 6232 } 6233 6234 /* Make sure the expression is of the proper form. */ 6235 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) 6236 rhs = gimple_assign_rhs2 (stmt); 6237 else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) 6238 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) 6239 rhs = gimple_assign_rhs1 (stmt); 6240 else 6241 return false; 6242 6243 tmpbase = ((enum built_in_function) 6244 ((need_new ? newbase : oldbase) + index + 1)); 6245 decl = builtin_decl_explicit (tmpbase); 6246 if (decl == NULL_TREE) 6247 return false; 6248 itype = TREE_TYPE (TREE_TYPE (decl)); 6249 imode = TYPE_MODE (itype); 6250 6251 /* We could test all of the various optabs involved, but the fact of the 6252 matter is that (with the exception of i486 vs i586 and xadd) all targets 6253 that support any atomic operaton optab also implements compare-and-swap. 6254 Let optabs.c take care of expanding any compare-and-swap loop. */ 6255 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode)) 6256 return false; 6257 6258 gsi = gsi_last_nondebug_bb (load_bb); 6259 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); 6260 6261 /* OpenMP does not imply any barrier-like semantics on its atomic ops. 6262 It only requires that the operation happen atomically. Thus we can 6263 use the RELAXED memory model. */ 6264 call = build_call_expr_loc (loc, decl, 3, addr, 6265 fold_convert_loc (loc, itype, rhs), 6266 build_int_cst (NULL, 6267 seq_cst ? MEMMODEL_SEQ_CST 6268 : MEMMODEL_RELAXED)); 6269 6270 if (need_old || need_new) 6271 { 6272 lhs = need_old ? loaded_val : stored_val; 6273 call = fold_convert_loc (loc, TREE_TYPE (lhs), call); 6274 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); 6275 } 6276 else 6277 call = fold_convert_loc (loc, void_type_node, call); 6278 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); 6279 gsi_remove (&gsi, true); 6280 6281 gsi = gsi_last_nondebug_bb (store_bb); 6282 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); 6283 gsi_remove (&gsi, true); 6284 gsi = gsi_last_nondebug_bb (store_bb); 6285 stmt = gsi_stmt (gsi); 6286 gsi_remove (&gsi, true); 6287 6288 if (gimple_in_ssa_p (cfun)) 6289 { 6290 release_defs (stmt); 6291 update_ssa (TODO_update_ssa_no_phi); 6292 } 6293 6294 return true; 6295} 6296 6297/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6298 6299 oldval = *addr; 6300 repeat: 6301 newval = rhs; // with oldval replacing *addr in rhs 6302 oldval = __sync_val_compare_and_swap (addr, oldval, newval); 6303 if (oldval != newval) 6304 goto repeat; 6305 6306 INDEX is log2 of the size of the data type, and thus usable to find the 6307 index of the builtin decl. */ 6308 6309static bool 6310expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, 6311 tree addr, tree loaded_val, tree stored_val, 6312 int index) 6313{ 6314 tree loadedi, storedi, initial, new_storedi, old_vali; 6315 tree type, itype, cmpxchg, iaddr, atype; 6316 gimple_stmt_iterator si; 6317 basic_block loop_header = single_succ (load_bb); 6318 gimple *phi, *stmt; 6319 edge e; 6320 enum built_in_function fncode; 6321 6322 /* ??? We need a non-pointer interface to __atomic_compare_exchange in 6323 order to use the RELAXED memory model effectively. */ 6324 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N 6325 + index + 1); 6326 cmpxchg = builtin_decl_explicit (fncode); 6327 if (cmpxchg == NULL_TREE) 6328 return false; 6329 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 6330 atype = type; 6331 itype = TREE_TYPE (TREE_TYPE (cmpxchg)); 6332 6333 if (!can_compare_and_swap_p (TYPE_MODE (itype), true) 6334 || !can_atomic_load_p (TYPE_MODE (itype))) 6335 return false; 6336 6337 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ 6338 si = gsi_last_nondebug_bb (load_bb); 6339 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6340 6341 /* For floating-point values, we'll need to view-convert them to integers 6342 so that we can perform the atomic compare and swap. Simplify the 6343 following code by always setting up the "i"ntegral variables. */ 6344 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) 6345 { 6346 tree iaddr_val; 6347 6348 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, 6349 true)); 6350 atype = itype; 6351 iaddr_val 6352 = force_gimple_operand_gsi (&si, 6353 fold_convert (TREE_TYPE (iaddr), addr), 6354 false, NULL_TREE, true, GSI_SAME_STMT); 6355 stmt = gimple_build_assign (iaddr, iaddr_val); 6356 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6357 loadedi = create_tmp_var (itype); 6358 if (gimple_in_ssa_p (cfun)) 6359 loadedi = make_ssa_name (loadedi); 6360 } 6361 else 6362 { 6363 iaddr = addr; 6364 loadedi = loaded_val; 6365 } 6366 6367 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); 6368 tree loaddecl = builtin_decl_explicit (fncode); 6369 if (loaddecl) 6370 initial 6371 = fold_convert (atype, 6372 build_call_expr (loaddecl, 2, iaddr, 6373 build_int_cst (NULL_TREE, 6374 MEMMODEL_RELAXED))); 6375 else 6376 { 6377 tree off 6378 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode, 6379 true), 0); 6380 initial = build2 (MEM_REF, atype, iaddr, off); 6381 } 6382 6383 initial 6384 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, 6385 GSI_SAME_STMT); 6386 6387 /* Move the value to the LOADEDI temporary. */ 6388 if (gimple_in_ssa_p (cfun)) 6389 { 6390 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); 6391 phi = create_phi_node (loadedi, loop_header); 6392 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), 6393 initial); 6394 } 6395 else 6396 gsi_insert_before (&si, 6397 gimple_build_assign (loadedi, initial), 6398 GSI_SAME_STMT); 6399 if (loadedi != loaded_val) 6400 { 6401 gimple_stmt_iterator gsi2; 6402 tree x; 6403 6404 x = build1 (VIEW_CONVERT_EXPR, type, loadedi); 6405 gsi2 = gsi_start_bb (loop_header); 6406 if (gimple_in_ssa_p (cfun)) 6407 { 6408 gassign *stmt; 6409 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6410 true, GSI_SAME_STMT); 6411 stmt = gimple_build_assign (loaded_val, x); 6412 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); 6413 } 6414 else 6415 { 6416 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); 6417 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, 6418 true, GSI_SAME_STMT); 6419 } 6420 } 6421 gsi_remove (&si, true); 6422 6423 si = gsi_last_nondebug_bb (store_bb); 6424 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6425 6426 if (iaddr == addr) 6427 storedi = stored_val; 6428 else 6429 storedi 6430 = force_gimple_operand_gsi (&si, 6431 build1 (VIEW_CONVERT_EXPR, itype, 6432 stored_val), true, NULL_TREE, true, 6433 GSI_SAME_STMT); 6434 6435 /* Build the compare&swap statement. */ 6436 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); 6437 new_storedi = force_gimple_operand_gsi (&si, 6438 fold_convert (TREE_TYPE (loadedi), 6439 new_storedi), 6440 true, NULL_TREE, 6441 true, GSI_SAME_STMT); 6442 6443 if (gimple_in_ssa_p (cfun)) 6444 old_vali = loadedi; 6445 else 6446 { 6447 old_vali = create_tmp_var (TREE_TYPE (loadedi)); 6448 stmt = gimple_build_assign (old_vali, loadedi); 6449 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6450 6451 stmt = gimple_build_assign (loadedi, new_storedi); 6452 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6453 } 6454 6455 /* Note that we always perform the comparison as an integer, even for 6456 floating point. This allows the atomic operation to properly 6457 succeed even with NaNs and -0.0. */ 6458 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali); 6459 stmt = gimple_build_cond_empty (ne); 6460 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6461 6462 /* Update cfg. */ 6463 e = single_succ_edge (store_bb); 6464 e->flags &= ~EDGE_FALLTHRU; 6465 e->flags |= EDGE_FALSE_VALUE; 6466 /* Expect no looping. */ 6467 e->probability = profile_probability::guessed_always (); 6468 6469 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); 6470 e->probability = profile_probability::guessed_never (); 6471 6472 /* Copy the new value to loadedi (we already did that before the condition 6473 if we are not in SSA). */ 6474 if (gimple_in_ssa_p (cfun)) 6475 { 6476 phi = gimple_seq_first_stmt (phi_nodes (loop_header)); 6477 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); 6478 } 6479 6480 /* Remove GIMPLE_OMP_ATOMIC_STORE. */ 6481 gsi_remove (&si, true); 6482 6483 struct loop *loop = alloc_loop (); 6484 loop->header = loop_header; 6485 loop->latch = store_bb; 6486 add_loop (loop, loop_header->loop_father); 6487 6488 if (gimple_in_ssa_p (cfun)) 6489 update_ssa (TODO_update_ssa_no_phi); 6490 6491 return true; 6492} 6493 6494/* A subroutine of expand_omp_atomic. Implement the atomic operation as: 6495 6496 GOMP_atomic_start (); 6497 *addr = rhs; 6498 GOMP_atomic_end (); 6499 6500 The result is not globally atomic, but works so long as all parallel 6501 references are within #pragma omp atomic directives. According to 6502 responses received from omp@openmp.org, appears to be within spec. 6503 Which makes sense, since that's how several other compilers handle 6504 this situation as well. 6505 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're 6506 expanding. STORED_VAL is the operand of the matching 6507 GIMPLE_OMP_ATOMIC_STORE. 6508 6509 We replace 6510 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with 6511 loaded_val = *addr; 6512 6513 and replace 6514 GIMPLE_OMP_ATOMIC_STORE (stored_val) with 6515 *addr = stored_val; 6516*/ 6517 6518static bool 6519expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, 6520 tree addr, tree loaded_val, tree stored_val) 6521{ 6522 gimple_stmt_iterator si; 6523 gassign *stmt; 6524 tree t; 6525 6526 si = gsi_last_nondebug_bb (load_bb); 6527 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); 6528 6529 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); 6530 t = build_call_expr (t, 0); 6531 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6532 6533 tree mem = build_simple_mem_ref (addr); 6534 TREE_TYPE (mem) = TREE_TYPE (loaded_val); 6535 TREE_OPERAND (mem, 1) 6536 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode, 6537 true), 6538 TREE_OPERAND (mem, 1)); 6539 stmt = gimple_build_assign (loaded_val, mem); 6540 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6541 gsi_remove (&si, true); 6542 6543 si = gsi_last_nondebug_bb (store_bb); 6544 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); 6545 6546 stmt = gimple_build_assign (unshare_expr (mem), stored_val); 6547 gsi_insert_before (&si, stmt, GSI_SAME_STMT); 6548 6549 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); 6550 t = build_call_expr (t, 0); 6551 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); 6552 gsi_remove (&si, true); 6553 6554 if (gimple_in_ssa_p (cfun)) 6555 update_ssa (TODO_update_ssa_no_phi); 6556 return true; 6557} 6558 6559/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand 6560 using expand_omp_atomic_fetch_op. If it failed, we try to 6561 call expand_omp_atomic_pipeline, and if it fails too, the 6562 ultimate fallback is wrapping the operation in a mutex 6563 (expand_omp_atomic_mutex). REGION is the atomic region built 6564 by build_omp_regions_1(). */ 6565 6566static void 6567expand_omp_atomic (struct omp_region *region) 6568{ 6569 basic_block load_bb = region->entry, store_bb = region->exit; 6570 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); 6571 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); 6572 tree loaded_val = gimple_omp_atomic_load_lhs (load); 6573 tree addr = gimple_omp_atomic_load_rhs (load); 6574 tree stored_val = gimple_omp_atomic_store_val (store); 6575 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val)); 6576 HOST_WIDE_INT index; 6577 6578 /* Make sure the type is one of the supported sizes. */ 6579 index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); 6580 index = exact_log2 (index); 6581 if (index >= 0 && index <= 4) 6582 { 6583 unsigned int align = TYPE_ALIGN_UNIT (type); 6584 6585 /* __sync builtins require strict data alignment. */ 6586 if (exact_log2 (align) >= index) 6587 { 6588 /* Atomic load. */ 6589 scalar_mode smode; 6590 if (loaded_val == stored_val 6591 && (is_int_mode (TYPE_MODE (type), &smode) 6592 || is_float_mode (TYPE_MODE (type), &smode)) 6593 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 6594 && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) 6595 return; 6596 6597 /* Atomic store. */ 6598 if ((is_int_mode (TYPE_MODE (type), &smode) 6599 || is_float_mode (TYPE_MODE (type), &smode)) 6600 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD 6601 && store_bb == single_succ (load_bb) 6602 && first_stmt (store_bb) == store 6603 && expand_omp_atomic_store (load_bb, addr, loaded_val, 6604 stored_val, index)) 6605 return; 6606 6607 /* When possible, use specialized atomic update functions. */ 6608 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) 6609 && store_bb == single_succ (load_bb) 6610 && expand_omp_atomic_fetch_op (load_bb, addr, 6611 loaded_val, stored_val, index)) 6612 return; 6613 6614 /* If we don't have specialized __sync builtins, try and implement 6615 as a compare and swap loop. */ 6616 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, 6617 loaded_val, stored_val, index)) 6618 return; 6619 } 6620 } 6621 6622 /* The ultimate fallback is wrapping the operation in a mutex. */ 6623 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); 6624} 6625 6626/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending 6627 at REGION_EXIT. */ 6628 6629static void 6630mark_loops_in_oacc_kernels_region (basic_block region_entry, 6631 basic_block region_exit) 6632{ 6633 struct loop *outer = region_entry->loop_father; 6634 gcc_assert (region_exit == NULL || outer == region_exit->loop_father); 6635 6636 /* Don't parallelize the kernels region if it contains more than one outer 6637 loop. */ 6638 unsigned int nr_outer_loops = 0; 6639 struct loop *single_outer = NULL; 6640 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) 6641 { 6642 gcc_assert (loop_outer (loop) == outer); 6643 6644 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) 6645 continue; 6646 6647 if (region_exit != NULL 6648 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) 6649 continue; 6650 6651 nr_outer_loops++; 6652 single_outer = loop; 6653 } 6654 if (nr_outer_loops != 1) 6655 return; 6656 6657 for (struct loop *loop = single_outer->inner; 6658 loop != NULL; 6659 loop = loop->inner) 6660 if (loop->next) 6661 return; 6662 6663 /* Mark the loops in the region. */ 6664 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) 6665 loop->in_oacc_kernels_region = true; 6666} 6667 6668/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ 6669 6670struct GTY(()) grid_launch_attributes_trees 6671{ 6672 tree kernel_dim_array_type; 6673 tree kernel_lattrs_dimnum_decl; 6674 tree kernel_lattrs_grid_decl; 6675 tree kernel_lattrs_group_decl; 6676 tree kernel_launch_attributes_type; 6677}; 6678 6679static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; 6680 6681/* Create types used to pass kernel launch attributes to target. */ 6682 6683static void 6684grid_create_kernel_launch_attr_types (void) 6685{ 6686 if (grid_attr_trees) 6687 return; 6688 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); 6689 6690 tree dim_arr_index_type 6691 = build_index_type (build_int_cst (integer_type_node, 2)); 6692 grid_attr_trees->kernel_dim_array_type 6693 = build_array_type (uint32_type_node, dim_arr_index_type); 6694 6695 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); 6696 grid_attr_trees->kernel_lattrs_dimnum_decl 6697 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), 6698 uint32_type_node); 6699 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; 6700 6701 grid_attr_trees->kernel_lattrs_grid_decl 6702 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), 6703 grid_attr_trees->kernel_dim_array_type); 6704 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) 6705 = grid_attr_trees->kernel_lattrs_dimnum_decl; 6706 grid_attr_trees->kernel_lattrs_group_decl 6707 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), 6708 grid_attr_trees->kernel_dim_array_type); 6709 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) 6710 = grid_attr_trees->kernel_lattrs_grid_decl; 6711 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, 6712 "__gomp_kernel_launch_attributes", 6713 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); 6714} 6715 6716/* Insert before the current statement in GSI a store of VALUE to INDEX of 6717 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be 6718 of type uint32_type_node. */ 6719 6720static void 6721grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, 6722 tree fld_decl, int index, tree value) 6723{ 6724 tree ref = build4 (ARRAY_REF, uint32_type_node, 6725 build3 (COMPONENT_REF, 6726 grid_attr_trees->kernel_dim_array_type, 6727 range_var, fld_decl, NULL_TREE), 6728 build_int_cst (integer_type_node, index), 6729 NULL_TREE, NULL_TREE); 6730 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); 6731} 6732 6733/* Return a tree representation of a pointer to a structure with grid and 6734 work-group size information. Statements filling that information will be 6735 inserted before GSI, TGT_STMT is the target statement which has the 6736 necessary information in it. */ 6737 6738static tree 6739grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, 6740 gomp_target *tgt_stmt) 6741{ 6742 grid_create_kernel_launch_attr_types (); 6743 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, 6744 "__kernel_launch_attrs"); 6745 6746 unsigned max_dim = 0; 6747 for (tree clause = gimple_omp_target_clauses (tgt_stmt); 6748 clause; 6749 clause = OMP_CLAUSE_CHAIN (clause)) 6750 { 6751 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) 6752 continue; 6753 6754 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); 6755 max_dim = MAX (dim, max_dim); 6756 6757 grid_insert_store_range_dim (gsi, lattrs, 6758 grid_attr_trees->kernel_lattrs_grid_decl, 6759 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); 6760 grid_insert_store_range_dim (gsi, lattrs, 6761 grid_attr_trees->kernel_lattrs_group_decl, 6762 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); 6763 } 6764 6765 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, 6766 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); 6767 gcc_checking_assert (max_dim <= 2); 6768 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); 6769 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), 6770 GSI_SAME_STMT); 6771 TREE_ADDRESSABLE (lattrs) = 1; 6772 return build_fold_addr_expr (lattrs); 6773} 6774 6775/* Build target argument identifier from the DEVICE identifier, value 6776 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ 6777 6778static tree 6779get_target_argument_identifier_1 (int device, bool subseqent_param, int id) 6780{ 6781 tree t = build_int_cst (integer_type_node, device); 6782 if (subseqent_param) 6783 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6784 build_int_cst (integer_type_node, 6785 GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); 6786 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6787 build_int_cst (integer_type_node, id)); 6788 return t; 6789} 6790 6791/* Like above but return it in type that can be directly stored as an element 6792 of the argument array. */ 6793 6794static tree 6795get_target_argument_identifier (int device, bool subseqent_param, int id) 6796{ 6797 tree t = get_target_argument_identifier_1 (device, subseqent_param, id); 6798 return fold_convert (ptr_type_node, t); 6799} 6800 6801/* Return a target argument consisting of DEVICE identifier, value identifier 6802 ID, and the actual VALUE. */ 6803 6804static tree 6805get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, 6806 tree value) 6807{ 6808 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, 6809 fold_convert (integer_type_node, value), 6810 build_int_cst (unsigned_type_node, 6811 GOMP_TARGET_ARG_VALUE_SHIFT)); 6812 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, 6813 get_target_argument_identifier_1 (device, false, id)); 6814 t = fold_convert (ptr_type_node, t); 6815 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); 6816} 6817 6818/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, 6819 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, 6820 otherwise push an identifier (with DEVICE and ID) and the VALUE in two 6821 arguments. */ 6822 6823static void 6824push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, 6825 int id, tree value, vec <tree> *args) 6826{ 6827 if (tree_fits_shwi_p (value) 6828 && tree_to_shwi (value) > -(1 << 15) 6829 && tree_to_shwi (value) < (1 << 15)) 6830 args->quick_push (get_target_argument_value (gsi, device, id, value)); 6831 else 6832 { 6833 args->quick_push (get_target_argument_identifier (device, true, id)); 6834 value = fold_convert (ptr_type_node, value); 6835 value = force_gimple_operand_gsi (gsi, value, true, NULL, true, 6836 GSI_SAME_STMT); 6837 args->quick_push (value); 6838 } 6839} 6840 6841/* Create an array of arguments that is then passed to GOMP_target. */ 6842 6843static tree 6844get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) 6845{ 6846 auto_vec <tree, 6> args; 6847 tree clauses = gimple_omp_target_clauses (tgt_stmt); 6848 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); 6849 if (c) 6850 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); 6851 else 6852 t = integer_minus_one_node; 6853 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 6854 GOMP_TARGET_ARG_NUM_TEAMS, t, &args); 6855 6856 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); 6857 if (c) 6858 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); 6859 else 6860 t = integer_minus_one_node; 6861 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, 6862 GOMP_TARGET_ARG_THREAD_LIMIT, t, 6863 &args); 6864 6865 /* Add HSA-specific grid sizes, if available. */ 6866 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 6867 OMP_CLAUSE__GRIDDIM_)) 6868 { 6869 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; 6870 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); 6871 args.quick_push (t); 6872 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); 6873 } 6874 6875 /* Produce more, perhaps device specific, arguments here. */ 6876 6877 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, 6878 args.length () + 1), 6879 ".omp_target_args"); 6880 for (unsigned i = 0; i < args.length (); i++) 6881 { 6882 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 6883 build_int_cst (integer_type_node, i), 6884 NULL_TREE, NULL_TREE); 6885 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), 6886 GSI_SAME_STMT); 6887 } 6888 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, 6889 build_int_cst (integer_type_node, args.length ()), 6890 NULL_TREE, NULL_TREE); 6891 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), 6892 GSI_SAME_STMT); 6893 TREE_ADDRESSABLE (argarray) = 1; 6894 return build_fold_addr_expr (argarray); 6895} 6896 6897/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ 6898 6899static void 6900expand_omp_target (struct omp_region *region) 6901{ 6902 basic_block entry_bb, exit_bb, new_bb; 6903 struct function *child_cfun; 6904 tree child_fn, block, t; 6905 gimple_stmt_iterator gsi; 6906 gomp_target *entry_stmt; 6907 gimple *stmt; 6908 edge e; 6909 bool offloaded, data_region; 6910 6911 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); 6912 new_bb = region->entry; 6913 6914 offloaded = is_gimple_omp_offloaded (entry_stmt); 6915 switch (gimple_omp_target_kind (entry_stmt)) 6916 { 6917 case GF_OMP_TARGET_KIND_REGION: 6918 case GF_OMP_TARGET_KIND_UPDATE: 6919 case GF_OMP_TARGET_KIND_ENTER_DATA: 6920 case GF_OMP_TARGET_KIND_EXIT_DATA: 6921 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 6922 case GF_OMP_TARGET_KIND_OACC_KERNELS: 6923 case GF_OMP_TARGET_KIND_OACC_UPDATE: 6924 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 6925 case GF_OMP_TARGET_KIND_OACC_DECLARE: 6926 data_region = false; 6927 break; 6928 case GF_OMP_TARGET_KIND_DATA: 6929 case GF_OMP_TARGET_KIND_OACC_DATA: 6930 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 6931 data_region = true; 6932 break; 6933 default: 6934 gcc_unreachable (); 6935 } 6936 6937 child_fn = NULL_TREE; 6938 child_cfun = NULL; 6939 if (offloaded) 6940 { 6941 child_fn = gimple_omp_target_child_fn (entry_stmt); 6942 child_cfun = DECL_STRUCT_FUNCTION (child_fn); 6943 } 6944 6945 /* Supported by expand_omp_taskreg, but not here. */ 6946 if (child_cfun != NULL) 6947 gcc_checking_assert (!child_cfun->cfg); 6948 gcc_checking_assert (!gimple_in_ssa_p (cfun)); 6949 6950 entry_bb = region->entry; 6951 exit_bb = region->exit; 6952 6953 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) 6954 { 6955 mark_loops_in_oacc_kernels_region (region->entry, region->exit); 6956 6957 /* Further down, both OpenACC kernels and OpenACC parallel constructs 6958 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the 6959 two, there is an "oacc kernels" attribute set for OpenACC kernels. */ 6960 DECL_ATTRIBUTES (child_fn) 6961 = tree_cons (get_identifier ("oacc kernels"), 6962 NULL_TREE, DECL_ATTRIBUTES (child_fn)); 6963 } 6964 6965 if (offloaded) 6966 { 6967 unsigned srcidx, dstidx, num; 6968 6969 /* If the offloading region needs data sent from the parent 6970 function, then the very first statement (except possible 6971 tree profile counter updates) of the offloading body 6972 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since 6973 &.OMP_DATA_O is passed as an argument to the child function, 6974 we need to replace it with the argument as seen by the child 6975 function. 6976 6977 In most cases, this will end up being the identity assignment 6978 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had 6979 a function call that has been inlined, the original PARM_DECL 6980 .OMP_DATA_I may have been converted into a different local 6981 variable. In which case, we need to keep the assignment. */ 6982 tree data_arg = gimple_omp_target_data_arg (entry_stmt); 6983 if (data_arg) 6984 { 6985 basic_block entry_succ_bb = single_succ (entry_bb); 6986 gimple_stmt_iterator gsi; 6987 tree arg; 6988 gimple *tgtcopy_stmt = NULL; 6989 tree sender = TREE_VEC_ELT (data_arg, 0); 6990 6991 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) 6992 { 6993 gcc_assert (!gsi_end_p (gsi)); 6994 stmt = gsi_stmt (gsi); 6995 if (gimple_code (stmt) != GIMPLE_ASSIGN) 6996 continue; 6997 6998 if (gimple_num_ops (stmt) == 2) 6999 { 7000 tree arg = gimple_assign_rhs1 (stmt); 7001 7002 /* We're ignoring the subcode because we're 7003 effectively doing a STRIP_NOPS. */ 7004 7005 if (TREE_CODE (arg) == ADDR_EXPR 7006 && TREE_OPERAND (arg, 0) == sender) 7007 { 7008 tgtcopy_stmt = stmt; 7009 break; 7010 } 7011 } 7012 } 7013 7014 gcc_assert (tgtcopy_stmt != NULL); 7015 arg = DECL_ARGUMENTS (child_fn); 7016 7017 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); 7018 gsi_remove (&gsi, true); 7019 } 7020 7021 /* Declare local variables needed in CHILD_CFUN. */ 7022 block = DECL_INITIAL (child_fn); 7023 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); 7024 /* The gimplifier could record temporaries in the offloading block 7025 rather than in containing function's local_decls chain, 7026 which would mean cgraph missed finalizing them. Do it now. */ 7027 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) 7028 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) 7029 varpool_node::finalize_decl (t); 7030 DECL_SAVED_TREE (child_fn) = NULL; 7031 /* We'll create a CFG for child_fn, so no gimple body is needed. */ 7032 gimple_set_body (child_fn, NULL); 7033 TREE_USED (block) = 1; 7034 7035 /* Reset DECL_CONTEXT on function arguments. */ 7036 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) 7037 DECL_CONTEXT (t) = child_fn; 7038 7039 /* Split ENTRY_BB at GIMPLE_*, 7040 so that it can be moved to the child function. */ 7041 gsi = gsi_last_nondebug_bb (entry_bb); 7042 stmt = gsi_stmt (gsi); 7043 gcc_assert (stmt 7044 && gimple_code (stmt) == gimple_code (entry_stmt)); 7045 e = split_block (entry_bb, stmt); 7046 gsi_remove (&gsi, true); 7047 entry_bb = e->dest; 7048 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; 7049 7050 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ 7051 if (exit_bb) 7052 { 7053 gsi = gsi_last_nondebug_bb (exit_bb); 7054 gcc_assert (!gsi_end_p (gsi) 7055 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7056 stmt = gimple_build_return (NULL); 7057 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); 7058 gsi_remove (&gsi, true); 7059 } 7060 7061 /* Move the offloading region into CHILD_CFUN. */ 7062 7063 block = gimple_block (entry_stmt); 7064 7065 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); 7066 if (exit_bb) 7067 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; 7068 /* When the OMP expansion process cannot guarantee an up-to-date 7069 loop tree arrange for the child function to fixup loops. */ 7070 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7071 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; 7072 7073 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ 7074 num = vec_safe_length (child_cfun->local_decls); 7075 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) 7076 { 7077 t = (*child_cfun->local_decls)[srcidx]; 7078 if (DECL_CONTEXT (t) == cfun->decl) 7079 continue; 7080 if (srcidx != dstidx) 7081 (*child_cfun->local_decls)[dstidx] = t; 7082 dstidx++; 7083 } 7084 if (dstidx != num) 7085 vec_safe_truncate (child_cfun->local_decls, dstidx); 7086 7087 /* Inform the callgraph about the new function. */ 7088 child_cfun->curr_properties = cfun->curr_properties; 7089 child_cfun->has_simduid_loops |= cfun->has_simduid_loops; 7090 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; 7091 cgraph_node *node = cgraph_node::get_create (child_fn); 7092 node->parallelized_function = 1; 7093 cgraph_node::add_new_function (child_fn, true); 7094 7095 /* Add the new function to the offload table. */ 7096 if (ENABLE_OFFLOADING) 7097 { 7098 if (in_lto_p) 7099 DECL_PRESERVE_P (child_fn) = 1; 7100 vec_safe_push (offload_funcs, child_fn); 7101 } 7102 7103 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) 7104 && !DECL_ASSEMBLER_NAME_SET_P (child_fn); 7105 7106 /* Fix the callgraph edges for child_cfun. Those for cfun will be 7107 fixed in a following pass. */ 7108 push_cfun (child_cfun); 7109 if (need_asm) 7110 assign_assembler_name_if_needed (child_fn); 7111 cgraph_edge::rebuild_edges (); 7112 7113 /* Some EH regions might become dead, see PR34608. If 7114 pass_cleanup_cfg isn't the first pass to happen with the 7115 new child, these dead EH edges might cause problems. 7116 Clean them up now. */ 7117 if (flag_exceptions) 7118 { 7119 basic_block bb; 7120 bool changed = false; 7121 7122 FOR_EACH_BB_FN (bb, cfun) 7123 changed |= gimple_purge_dead_eh_edges (bb); 7124 if (changed) 7125 cleanup_tree_cfg (); 7126 } 7127 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7128 verify_loop_structure (); 7129 pop_cfun (); 7130 7131 if (dump_file && !gimple_in_ssa_p (cfun)) 7132 { 7133 omp_any_child_fn_dumped = true; 7134 dump_function_header (dump_file, child_fn, dump_flags); 7135 dump_function_to_file (child_fn, dump_file, dump_flags); 7136 } 7137 7138 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); 7139 } 7140 7141 /* Emit a library call to launch the offloading region, or do data 7142 transfers. */ 7143 tree t1, t2, t3, t4, device, cond, depend, c, clauses; 7144 enum built_in_function start_ix; 7145 location_t clause_loc; 7146 unsigned int flags_i = 0; 7147 7148 switch (gimple_omp_target_kind (entry_stmt)) 7149 { 7150 case GF_OMP_TARGET_KIND_REGION: 7151 start_ix = BUILT_IN_GOMP_TARGET; 7152 break; 7153 case GF_OMP_TARGET_KIND_DATA: 7154 start_ix = BUILT_IN_GOMP_TARGET_DATA; 7155 break; 7156 case GF_OMP_TARGET_KIND_UPDATE: 7157 start_ix = BUILT_IN_GOMP_TARGET_UPDATE; 7158 break; 7159 case GF_OMP_TARGET_KIND_ENTER_DATA: 7160 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7161 break; 7162 case GF_OMP_TARGET_KIND_EXIT_DATA: 7163 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; 7164 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; 7165 break; 7166 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7167 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7168 start_ix = BUILT_IN_GOACC_PARALLEL; 7169 break; 7170 case GF_OMP_TARGET_KIND_OACC_DATA: 7171 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7172 start_ix = BUILT_IN_GOACC_DATA_START; 7173 break; 7174 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7175 start_ix = BUILT_IN_GOACC_UPDATE; 7176 break; 7177 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7178 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; 7179 break; 7180 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7181 start_ix = BUILT_IN_GOACC_DECLARE; 7182 break; 7183 default: 7184 gcc_unreachable (); 7185 } 7186 7187 clauses = gimple_omp_target_clauses (entry_stmt); 7188 7189 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime 7190 library choose) and there is no conditional. */ 7191 cond = NULL_TREE; 7192 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); 7193 7194 c = omp_find_clause (clauses, OMP_CLAUSE_IF); 7195 if (c) 7196 cond = OMP_CLAUSE_IF_EXPR (c); 7197 7198 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); 7199 if (c) 7200 { 7201 /* Even if we pass it to all library function calls, it is currently only 7202 defined/used for the OpenMP target ones. */ 7203 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET 7204 || start_ix == BUILT_IN_GOMP_TARGET_DATA 7205 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE 7206 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); 7207 7208 device = OMP_CLAUSE_DEVICE_ID (c); 7209 clause_loc = OMP_CLAUSE_LOCATION (c); 7210 } 7211 else 7212 clause_loc = gimple_location (entry_stmt); 7213 7214 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); 7215 if (c) 7216 flags_i |= GOMP_TARGET_FLAG_NOWAIT; 7217 7218 /* Ensure 'device' is of the correct type. */ 7219 device = fold_convert_loc (clause_loc, integer_type_node, device); 7220 7221 /* If we found the clause 'if (cond)', build 7222 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ 7223 if (cond) 7224 { 7225 cond = gimple_boolify (cond); 7226 7227 basic_block cond_bb, then_bb, else_bb; 7228 edge e; 7229 tree tmp_var; 7230 7231 tmp_var = create_tmp_var (TREE_TYPE (device)); 7232 if (offloaded) 7233 e = split_block_after_labels (new_bb); 7234 else 7235 { 7236 gsi = gsi_last_nondebug_bb (new_bb); 7237 gsi_prev (&gsi); 7238 e = split_block (new_bb, gsi_stmt (gsi)); 7239 } 7240 cond_bb = e->src; 7241 new_bb = e->dest; 7242 remove_edge (e); 7243 7244 then_bb = create_empty_bb (cond_bb); 7245 else_bb = create_empty_bb (then_bb); 7246 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); 7247 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); 7248 7249 stmt = gimple_build_cond_empty (cond); 7250 gsi = gsi_last_bb (cond_bb); 7251 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7252 7253 gsi = gsi_start_bb (then_bb); 7254 stmt = gimple_build_assign (tmp_var, device); 7255 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7256 7257 gsi = gsi_start_bb (else_bb); 7258 stmt = gimple_build_assign (tmp_var, 7259 build_int_cst (integer_type_node, 7260 GOMP_DEVICE_HOST_FALLBACK)); 7261 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); 7262 7263 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); 7264 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); 7265 add_bb_to_loop (then_bb, cond_bb->loop_father); 7266 add_bb_to_loop (else_bb, cond_bb->loop_father); 7267 make_edge (then_bb, new_bb, EDGE_FALLTHRU); 7268 make_edge (else_bb, new_bb, EDGE_FALLTHRU); 7269 7270 device = tmp_var; 7271 gsi = gsi_last_nondebug_bb (new_bb); 7272 } 7273 else 7274 { 7275 gsi = gsi_last_nondebug_bb (new_bb); 7276 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, 7277 true, GSI_SAME_STMT); 7278 } 7279 7280 t = gimple_omp_target_data_arg (entry_stmt); 7281 if (t == NULL) 7282 { 7283 t1 = size_zero_node; 7284 t2 = build_zero_cst (ptr_type_node); 7285 t3 = t2; 7286 t4 = t2; 7287 } 7288 else 7289 { 7290 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); 7291 t1 = size_binop (PLUS_EXPR, t1, size_int (1)); 7292 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); 7293 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); 7294 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); 7295 } 7296 7297 gimple *g; 7298 bool tagging = false; 7299 /* The maximum number used by any start_ix, without varargs. */ 7300 auto_vec<tree, 11> args; 7301 args.quick_push (device); 7302 if (offloaded) 7303 args.quick_push (build_fold_addr_expr (child_fn)); 7304 args.quick_push (t1); 7305 args.quick_push (t2); 7306 args.quick_push (t3); 7307 args.quick_push (t4); 7308 switch (start_ix) 7309 { 7310 case BUILT_IN_GOACC_DATA_START: 7311 case BUILT_IN_GOACC_DECLARE: 7312 case BUILT_IN_GOMP_TARGET_DATA: 7313 break; 7314 case BUILT_IN_GOMP_TARGET: 7315 case BUILT_IN_GOMP_TARGET_UPDATE: 7316 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: 7317 args.quick_push (build_int_cst (unsigned_type_node, flags_i)); 7318 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); 7319 if (c) 7320 depend = OMP_CLAUSE_DECL (c); 7321 else 7322 depend = build_int_cst (ptr_type_node, 0); 7323 args.quick_push (depend); 7324 if (start_ix == BUILT_IN_GOMP_TARGET) 7325 args.quick_push (get_target_arguments (&gsi, entry_stmt)); 7326 break; 7327 case BUILT_IN_GOACC_PARALLEL: 7328 oacc_set_fn_attrib (child_fn, clauses, &args); 7329 tagging = true; 7330 /* FALLTHRU */ 7331 case BUILT_IN_GOACC_ENTER_EXIT_DATA: 7332 case BUILT_IN_GOACC_UPDATE: 7333 { 7334 tree t_async = NULL_TREE; 7335 7336 /* If present, use the value specified by the respective 7337 clause, making sure that is of the correct type. */ 7338 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); 7339 if (c) 7340 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7341 integer_type_node, 7342 OMP_CLAUSE_ASYNC_EXPR (c)); 7343 else if (!tagging) 7344 /* Default values for t_async. */ 7345 t_async = fold_convert_loc (gimple_location (entry_stmt), 7346 integer_type_node, 7347 build_int_cst (integer_type_node, 7348 GOMP_ASYNC_SYNC)); 7349 if (tagging && t_async) 7350 { 7351 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; 7352 7353 if (TREE_CODE (t_async) == INTEGER_CST) 7354 { 7355 /* See if we can pack the async arg in to the tag's 7356 operand. */ 7357 i_async = TREE_INT_CST_LOW (t_async); 7358 if (i_async < GOMP_LAUNCH_OP_MAX) 7359 t_async = NULL_TREE; 7360 else 7361 i_async = GOMP_LAUNCH_OP_MAX; 7362 } 7363 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, 7364 i_async)); 7365 } 7366 if (t_async) 7367 args.safe_push (t_async); 7368 7369 /* Save the argument index, and ... */ 7370 unsigned t_wait_idx = args.length (); 7371 unsigned num_waits = 0; 7372 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); 7373 if (!tagging || c) 7374 /* ... push a placeholder. */ 7375 args.safe_push (integer_zero_node); 7376 7377 for (; c; c = OMP_CLAUSE_CHAIN (c)) 7378 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) 7379 { 7380 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), 7381 integer_type_node, 7382 OMP_CLAUSE_WAIT_EXPR (c))); 7383 num_waits++; 7384 } 7385 7386 if (!tagging || num_waits) 7387 { 7388 tree len; 7389 7390 /* Now that we know the number, update the placeholder. */ 7391 if (tagging) 7392 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); 7393 else 7394 len = build_int_cst (integer_type_node, num_waits); 7395 len = fold_convert_loc (gimple_location (entry_stmt), 7396 unsigned_type_node, len); 7397 args[t_wait_idx] = len; 7398 } 7399 } 7400 break; 7401 default: 7402 gcc_unreachable (); 7403 } 7404 if (tagging) 7405 /* Push terminal marker - zero. */ 7406 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); 7407 7408 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); 7409 gimple_set_location (g, gimple_location (entry_stmt)); 7410 gsi_insert_before (&gsi, g, GSI_SAME_STMT); 7411 if (!offloaded) 7412 { 7413 g = gsi_stmt (gsi); 7414 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); 7415 gsi_remove (&gsi, true); 7416 } 7417 if (data_region && region->exit) 7418 { 7419 gsi = gsi_last_nondebug_bb (region->exit); 7420 g = gsi_stmt (gsi); 7421 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); 7422 gsi_remove (&gsi, true); 7423 } 7424} 7425 7426/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with 7427 iteration variable derived from the thread number. INTRA_GROUP means this 7428 is an expansion of a loop iterating over work-items within a separate 7429 iteration over groups. */ 7430 7431static void 7432grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) 7433{ 7434 gimple_stmt_iterator gsi; 7435 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7436 gcc_checking_assert (gimple_omp_for_kind (for_stmt) 7437 == GF_OMP_FOR_KIND_GRID_LOOP); 7438 size_t collapse = gimple_omp_for_collapse (for_stmt); 7439 struct omp_for_data_loop *loops 7440 = XALLOCAVEC (struct omp_for_data_loop, 7441 gimple_omp_for_collapse (for_stmt)); 7442 struct omp_for_data fd; 7443 7444 remove_edge (BRANCH_EDGE (kfor->entry)); 7445 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; 7446 7447 gcc_assert (kfor->cont); 7448 omp_extract_for_data (for_stmt, &fd, loops); 7449 7450 gsi = gsi_start_bb (body_bb); 7451 7452 for (size_t dim = 0; dim < collapse; dim++) 7453 { 7454 tree type, itype; 7455 itype = type = TREE_TYPE (fd.loops[dim].v); 7456 if (POINTER_TYPE_P (type)) 7457 itype = signed_type_for (type); 7458 7459 tree n1 = fd.loops[dim].n1; 7460 tree step = fd.loops[dim].step; 7461 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), 7462 true, NULL_TREE, true, GSI_SAME_STMT); 7463 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), 7464 true, NULL_TREE, true, GSI_SAME_STMT); 7465 tree threadid; 7466 if (gimple_omp_for_grid_group_iter (for_stmt)) 7467 { 7468 gcc_checking_assert (!intra_group); 7469 threadid = build_call_expr (builtin_decl_explicit 7470 (BUILT_IN_HSA_WORKGROUPID), 1, 7471 build_int_cstu (unsigned_type_node, dim)); 7472 } 7473 else if (intra_group) 7474 threadid = build_call_expr (builtin_decl_explicit 7475 (BUILT_IN_HSA_WORKITEMID), 1, 7476 build_int_cstu (unsigned_type_node, dim)); 7477 else 7478 threadid = build_call_expr (builtin_decl_explicit 7479 (BUILT_IN_HSA_WORKITEMABSID), 1, 7480 build_int_cstu (unsigned_type_node, dim)); 7481 threadid = fold_convert (itype, threadid); 7482 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, 7483 true, GSI_SAME_STMT); 7484 7485 tree startvar = fd.loops[dim].v; 7486 tree t = fold_build2 (MULT_EXPR, itype, threadid, step); 7487 if (POINTER_TYPE_P (type)) 7488 t = fold_build_pointer_plus (n1, t); 7489 else 7490 t = fold_build2 (PLUS_EXPR, type, t, n1); 7491 t = fold_convert (type, t); 7492 t = force_gimple_operand_gsi (&gsi, t, 7493 DECL_P (startvar) 7494 && TREE_ADDRESSABLE (startvar), 7495 NULL_TREE, true, GSI_SAME_STMT); 7496 gassign *assign_stmt = gimple_build_assign (startvar, t); 7497 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); 7498 } 7499 /* Remove the omp for statement. */ 7500 gsi = gsi_last_nondebug_bb (kfor->entry); 7501 gsi_remove (&gsi, true); 7502 7503 /* Remove the GIMPLE_OMP_CONTINUE statement. */ 7504 gsi = gsi_last_nondebug_bb (kfor->cont); 7505 gcc_assert (!gsi_end_p (gsi) 7506 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); 7507 gsi_remove (&gsi, true); 7508 7509 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ 7510 gsi = gsi_last_nondebug_bb (kfor->exit); 7511 gcc_assert (!gsi_end_p (gsi) 7512 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7513 if (intra_group) 7514 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); 7515 gsi_remove (&gsi, true); 7516 7517 /* Fixup the much simpler CFG. */ 7518 remove_edge (find_edge (kfor->cont, body_bb)); 7519 7520 if (kfor->cont != body_bb) 7521 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); 7522 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); 7523} 7524 7525/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap 7526 argument_decls. */ 7527 7528struct grid_arg_decl_map 7529{ 7530 tree old_arg; 7531 tree new_arg; 7532}; 7533 7534/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones 7535 pertaining to kernel function. */ 7536 7537static tree 7538grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) 7539{ 7540 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; 7541 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; 7542 tree t = *tp; 7543 7544 if (t == adm->old_arg) 7545 *tp = adm->new_arg; 7546 *walk_subtrees = !TYPE_P (t) && !DECL_P (t); 7547 return NULL_TREE; 7548} 7549 7550/* If TARGET region contains a kernel body for loop, remove its region from the 7551 TARGET and expand it in HSA gridified kernel fashion. */ 7552 7553static void 7554grid_expand_target_grid_body (struct omp_region *target) 7555{ 7556 if (!hsa_gen_requested_p ()) 7557 return; 7558 7559 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); 7560 struct omp_region **pp; 7561 7562 for (pp = &target->inner; *pp; pp = &(*pp)->next) 7563 if ((*pp)->type == GIMPLE_OMP_GRID_BODY) 7564 break; 7565 7566 struct omp_region *gpukernel = *pp; 7567 7568 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); 7569 if (!gpukernel) 7570 { 7571 /* HSA cannot handle OACC stuff. */ 7572 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) 7573 return; 7574 gcc_checking_assert (orig_child_fndecl); 7575 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7576 OMP_CLAUSE__GRIDDIM_)); 7577 cgraph_node *n = cgraph_node::get (orig_child_fndecl); 7578 7579 hsa_register_kernel (n); 7580 return; 7581 } 7582 7583 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), 7584 OMP_CLAUSE__GRIDDIM_)); 7585 tree inside_block 7586 = gimple_block (first_stmt (single_succ (gpukernel->entry))); 7587 *pp = gpukernel->next; 7588 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) 7589 if ((*pp)->type == GIMPLE_OMP_FOR) 7590 break; 7591 7592 struct omp_region *kfor = *pp; 7593 gcc_assert (kfor); 7594 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); 7595 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); 7596 *pp = kfor->next; 7597 if (kfor->inner) 7598 { 7599 if (gimple_omp_for_grid_group_iter (for_stmt)) 7600 { 7601 struct omp_region **next_pp; 7602 for (pp = &kfor->inner; *pp; pp = next_pp) 7603 { 7604 next_pp = &(*pp)->next; 7605 if ((*pp)->type != GIMPLE_OMP_FOR) 7606 continue; 7607 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); 7608 gcc_assert (gimple_omp_for_kind (inner) 7609 == GF_OMP_FOR_KIND_GRID_LOOP); 7610 grid_expand_omp_for_loop (*pp, true); 7611 *pp = (*pp)->next; 7612 next_pp = pp; 7613 } 7614 } 7615 expand_omp (kfor->inner); 7616 } 7617 if (gpukernel->inner) 7618 expand_omp (gpukernel->inner); 7619 7620 tree kern_fndecl = copy_node (orig_child_fndecl); 7621 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); 7622 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); 7623 tree tgtblock = gimple_block (tgt_stmt); 7624 tree fniniblock = make_node (BLOCK); 7625 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; 7626 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); 7627 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); 7628 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; 7629 DECL_INITIAL (kern_fndecl) = fniniblock; 7630 push_struct_function (kern_fndecl); 7631 cfun->function_end_locus = gimple_location (tgt_stmt); 7632 init_tree_ssa (cfun); 7633 pop_cfun (); 7634 7635 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); 7636 gcc_assert (!DECL_CHAIN (old_parm_decl)); 7637 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); 7638 DECL_CONTEXT (new_parm_decl) = kern_fndecl; 7639 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; 7640 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); 7641 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); 7642 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; 7643 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); 7644 kern_cfun->curr_properties = cfun->curr_properties; 7645 7646 grid_expand_omp_for_loop (kfor, false); 7647 7648 /* Remove the omp for statement. */ 7649 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry); 7650 gsi_remove (&gsi, true); 7651 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real 7652 return. */ 7653 gsi = gsi_last_nondebug_bb (gpukernel->exit); 7654 gcc_assert (!gsi_end_p (gsi) 7655 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); 7656 gimple *ret_stmt = gimple_build_return (NULL); 7657 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); 7658 gsi_remove (&gsi, true); 7659 7660 /* Statements in the first BB in the target construct have been produced by 7661 target lowering and must be copied inside the GPUKERNEL, with the two 7662 exceptions of the first OMP statement and the OMP_DATA assignment 7663 statement. */ 7664 gsi = gsi_start_bb (single_succ (gpukernel->entry)); 7665 tree data_arg = gimple_omp_target_data_arg (tgt_stmt); 7666 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; 7667 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); 7668 !gsi_end_p (tsi); gsi_next (&tsi)) 7669 { 7670 gimple *stmt = gsi_stmt (tsi); 7671 if (is_gimple_omp (stmt)) 7672 break; 7673 if (sender 7674 && is_gimple_assign (stmt) 7675 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR 7676 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) 7677 continue; 7678 gimple *copy = gimple_copy (stmt); 7679 gsi_insert_before (&gsi, copy, GSI_SAME_STMT); 7680 gimple_set_block (copy, fniniblock); 7681 } 7682 7683 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), 7684 gpukernel->exit, inside_block); 7685 7686 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); 7687 kcn->mark_force_output (); 7688 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); 7689 7690 hsa_register_kernel (kcn, orig_child); 7691 7692 cgraph_node::add_new_function (kern_fndecl, true); 7693 push_cfun (kern_cfun); 7694 cgraph_edge::rebuild_edges (); 7695 7696 /* Re-map any mention of the PARM_DECL of the original function to the 7697 PARM_DECL of the new one. 7698 7699 TODO: It would be great if lowering produced references into the GPU 7700 kernel decl straight away and we did not have to do this. */ 7701 struct grid_arg_decl_map adm; 7702 adm.old_arg = old_parm_decl; 7703 adm.new_arg = new_parm_decl; 7704 basic_block bb; 7705 FOR_EACH_BB_FN (bb, kern_cfun) 7706 { 7707 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 7708 { 7709 gimple *stmt = gsi_stmt (gsi); 7710 struct walk_stmt_info wi; 7711 memset (&wi, 0, sizeof (wi)); 7712 wi.info = &adm; 7713 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); 7714 } 7715 } 7716 pop_cfun (); 7717 7718 return; 7719} 7720 7721/* Expand the parallel region tree rooted at REGION. Expansion 7722 proceeds in depth-first order. Innermost regions are expanded 7723 first. This way, parallel regions that require a new function to 7724 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any 7725 internal dependencies in their body. */ 7726 7727static void 7728expand_omp (struct omp_region *region) 7729{ 7730 omp_any_child_fn_dumped = false; 7731 while (region) 7732 { 7733 location_t saved_location; 7734 gimple *inner_stmt = NULL; 7735 7736 /* First, determine whether this is a combined parallel+workshare 7737 region. */ 7738 if (region->type == GIMPLE_OMP_PARALLEL) 7739 determine_parallel_type (region); 7740 else if (region->type == GIMPLE_OMP_TARGET) 7741 grid_expand_target_grid_body (region); 7742 7743 if (region->type == GIMPLE_OMP_FOR 7744 && gimple_omp_for_combined_p (last_stmt (region->entry))) 7745 inner_stmt = last_stmt (region->inner->entry); 7746 7747 if (region->inner) 7748 expand_omp (region->inner); 7749 7750 saved_location = input_location; 7751 if (gimple_has_location (last_stmt (region->entry))) 7752 input_location = gimple_location (last_stmt (region->entry)); 7753 7754 switch (region->type) 7755 { 7756 case GIMPLE_OMP_PARALLEL: 7757 case GIMPLE_OMP_TASK: 7758 expand_omp_taskreg (region); 7759 break; 7760 7761 case GIMPLE_OMP_FOR: 7762 expand_omp_for (region, inner_stmt); 7763 break; 7764 7765 case GIMPLE_OMP_SECTIONS: 7766 expand_omp_sections (region); 7767 break; 7768 7769 case GIMPLE_OMP_SECTION: 7770 /* Individual omp sections are handled together with their 7771 parent GIMPLE_OMP_SECTIONS region. */ 7772 break; 7773 7774 case GIMPLE_OMP_SINGLE: 7775 expand_omp_single (region); 7776 break; 7777 7778 case GIMPLE_OMP_ORDERED: 7779 { 7780 gomp_ordered *ord_stmt 7781 = as_a <gomp_ordered *> (last_stmt (region->entry)); 7782 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), 7783 OMP_CLAUSE_DEPEND)) 7784 { 7785 /* We'll expand these when expanding corresponding 7786 worksharing region with ordered(n) clause. */ 7787 gcc_assert (region->outer 7788 && region->outer->type == GIMPLE_OMP_FOR); 7789 region->ord_stmt = ord_stmt; 7790 break; 7791 } 7792 } 7793 /* FALLTHRU */ 7794 case GIMPLE_OMP_MASTER: 7795 case GIMPLE_OMP_TASKGROUP: 7796 case GIMPLE_OMP_CRITICAL: 7797 case GIMPLE_OMP_TEAMS: 7798 expand_omp_synch (region); 7799 break; 7800 7801 case GIMPLE_OMP_ATOMIC_LOAD: 7802 expand_omp_atomic (region); 7803 break; 7804 7805 case GIMPLE_OMP_TARGET: 7806 expand_omp_target (region); 7807 break; 7808 7809 default: 7810 gcc_unreachable (); 7811 } 7812 7813 input_location = saved_location; 7814 region = region->next; 7815 } 7816 if (omp_any_child_fn_dumped) 7817 { 7818 if (dump_file) 7819 dump_function_header (dump_file, current_function_decl, dump_flags); 7820 omp_any_child_fn_dumped = false; 7821 } 7822} 7823 7824/* Helper for build_omp_regions. Scan the dominator tree starting at 7825 block BB. PARENT is the region that contains BB. If SINGLE_TREE is 7826 true, the function ends once a single tree is built (otherwise, whole 7827 forest of OMP constructs may be built). */ 7828 7829static void 7830build_omp_regions_1 (basic_block bb, struct omp_region *parent, 7831 bool single_tree) 7832{ 7833 gimple_stmt_iterator gsi; 7834 gimple *stmt; 7835 basic_block son; 7836 7837 gsi = gsi_last_nondebug_bb (bb); 7838 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) 7839 { 7840 struct omp_region *region; 7841 enum gimple_code code; 7842 7843 stmt = gsi_stmt (gsi); 7844 code = gimple_code (stmt); 7845 if (code == GIMPLE_OMP_RETURN) 7846 { 7847 /* STMT is the return point out of region PARENT. Mark it 7848 as the exit point and make PARENT the immediately 7849 enclosing region. */ 7850 gcc_assert (parent); 7851 region = parent; 7852 region->exit = bb; 7853 parent = parent->outer; 7854 } 7855 else if (code == GIMPLE_OMP_ATOMIC_STORE) 7856 { 7857 /* GIMPLE_OMP_ATOMIC_STORE is analogous to 7858 GIMPLE_OMP_RETURN, but matches with 7859 GIMPLE_OMP_ATOMIC_LOAD. */ 7860 gcc_assert (parent); 7861 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); 7862 region = parent; 7863 region->exit = bb; 7864 parent = parent->outer; 7865 } 7866 else if (code == GIMPLE_OMP_CONTINUE) 7867 { 7868 gcc_assert (parent); 7869 parent->cont = bb; 7870 } 7871 else if (code == GIMPLE_OMP_SECTIONS_SWITCH) 7872 { 7873 /* GIMPLE_OMP_SECTIONS_SWITCH is part of 7874 GIMPLE_OMP_SECTIONS, and we do nothing for it. */ 7875 } 7876 else 7877 { 7878 region = new_omp_region (bb, code, parent); 7879 /* Otherwise... */ 7880 if (code == GIMPLE_OMP_TARGET) 7881 { 7882 switch (gimple_omp_target_kind (stmt)) 7883 { 7884 case GF_OMP_TARGET_KIND_REGION: 7885 case GF_OMP_TARGET_KIND_DATA: 7886 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 7887 case GF_OMP_TARGET_KIND_OACC_KERNELS: 7888 case GF_OMP_TARGET_KIND_OACC_DATA: 7889 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 7890 break; 7891 case GF_OMP_TARGET_KIND_UPDATE: 7892 case GF_OMP_TARGET_KIND_ENTER_DATA: 7893 case GF_OMP_TARGET_KIND_EXIT_DATA: 7894 case GF_OMP_TARGET_KIND_OACC_UPDATE: 7895 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 7896 case GF_OMP_TARGET_KIND_OACC_DECLARE: 7897 /* ..., other than for those stand-alone directives... */ 7898 region = NULL; 7899 break; 7900 default: 7901 gcc_unreachable (); 7902 } 7903 } 7904 else if (code == GIMPLE_OMP_ORDERED 7905 && omp_find_clause (gimple_omp_ordered_clauses 7906 (as_a <gomp_ordered *> (stmt)), 7907 OMP_CLAUSE_DEPEND)) 7908 /* #pragma omp ordered depend is also just a stand-alone 7909 directive. */ 7910 region = NULL; 7911 /* ..., this directive becomes the parent for a new region. */ 7912 if (region) 7913 parent = region; 7914 } 7915 } 7916 7917 if (single_tree && !parent) 7918 return; 7919 7920 for (son = first_dom_son (CDI_DOMINATORS, bb); 7921 son; 7922 son = next_dom_son (CDI_DOMINATORS, son)) 7923 build_omp_regions_1 (son, parent, single_tree); 7924} 7925 7926/* Builds the tree of OMP regions rooted at ROOT, storing it to 7927 root_omp_region. */ 7928 7929static void 7930build_omp_regions_root (basic_block root) 7931{ 7932 gcc_assert (root_omp_region == NULL); 7933 build_omp_regions_1 (root, NULL, true); 7934 gcc_assert (root_omp_region != NULL); 7935} 7936 7937/* Expands omp construct (and its subconstructs) starting in HEAD. */ 7938 7939void 7940omp_expand_local (basic_block head) 7941{ 7942 build_omp_regions_root (head); 7943 if (dump_file && (dump_flags & TDF_DETAILS)) 7944 { 7945 fprintf (dump_file, "\nOMP region tree\n\n"); 7946 dump_omp_region (dump_file, root_omp_region, 0); 7947 fprintf (dump_file, "\n"); 7948 } 7949 7950 remove_exit_barriers (root_omp_region); 7951 expand_omp (root_omp_region); 7952 7953 omp_free_regions (); 7954} 7955 7956/* Scan the CFG and build a tree of OMP regions. Return the root of 7957 the OMP region tree. */ 7958 7959static void 7960build_omp_regions (void) 7961{ 7962 gcc_assert (root_omp_region == NULL); 7963 calculate_dominance_info (CDI_DOMINATORS); 7964 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); 7965} 7966 7967/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ 7968 7969static unsigned int 7970execute_expand_omp (void) 7971{ 7972 build_omp_regions (); 7973 7974 if (!root_omp_region) 7975 return 0; 7976 7977 if (dump_file) 7978 { 7979 fprintf (dump_file, "\nOMP region tree\n\n"); 7980 dump_omp_region (dump_file, root_omp_region, 0); 7981 fprintf (dump_file, "\n"); 7982 } 7983 7984 remove_exit_barriers (root_omp_region); 7985 7986 expand_omp (root_omp_region); 7987 7988 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) 7989 verify_loop_structure (); 7990 cleanup_tree_cfg (); 7991 7992 omp_free_regions (); 7993 7994 return 0; 7995} 7996 7997/* OMP expansion -- the default pass, run before creation of SSA form. */ 7998 7999namespace { 8000 8001const pass_data pass_data_expand_omp = 8002{ 8003 GIMPLE_PASS, /* type */ 8004 "ompexp", /* name */ 8005 OPTGROUP_OMP, /* optinfo_flags */ 8006 TV_NONE, /* tv_id */ 8007 PROP_gimple_any, /* properties_required */ 8008 PROP_gimple_eomp, /* properties_provided */ 8009 0, /* properties_destroyed */ 8010 0, /* todo_flags_start */ 8011 0, /* todo_flags_finish */ 8012}; 8013 8014class pass_expand_omp : public gimple_opt_pass 8015{ 8016public: 8017 pass_expand_omp (gcc::context *ctxt) 8018 : gimple_opt_pass (pass_data_expand_omp, ctxt) 8019 {} 8020 8021 /* opt_pass methods: */ 8022 virtual unsigned int execute (function *) 8023 { 8024 bool gate = ((flag_openacc != 0 || flag_openmp != 0 8025 || flag_openmp_simd != 0) 8026 && !seen_error ()); 8027 8028 /* This pass always runs, to provide PROP_gimple_eomp. 8029 But often, there is nothing to do. */ 8030 if (!gate) 8031 return 0; 8032 8033 return execute_expand_omp (); 8034 } 8035 8036}; // class pass_expand_omp 8037 8038} // anon namespace 8039 8040gimple_opt_pass * 8041make_pass_expand_omp (gcc::context *ctxt) 8042{ 8043 return new pass_expand_omp (ctxt); 8044} 8045 8046namespace { 8047 8048const pass_data pass_data_expand_omp_ssa = 8049{ 8050 GIMPLE_PASS, /* type */ 8051 "ompexpssa", /* name */ 8052 OPTGROUP_OMP, /* optinfo_flags */ 8053 TV_NONE, /* tv_id */ 8054 PROP_cfg | PROP_ssa, /* properties_required */ 8055 PROP_gimple_eomp, /* properties_provided */ 8056 0, /* properties_destroyed */ 8057 0, /* todo_flags_start */ 8058 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ 8059}; 8060 8061class pass_expand_omp_ssa : public gimple_opt_pass 8062{ 8063public: 8064 pass_expand_omp_ssa (gcc::context *ctxt) 8065 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) 8066 {} 8067 8068 /* opt_pass methods: */ 8069 virtual bool gate (function *fun) 8070 { 8071 return !(fun->curr_properties & PROP_gimple_eomp); 8072 } 8073 virtual unsigned int execute (function *) { return execute_expand_omp (); } 8074 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } 8075 8076}; // class pass_expand_omp_ssa 8077 8078} // anon namespace 8079 8080gimple_opt_pass * 8081make_pass_expand_omp_ssa (gcc::context *ctxt) 8082{ 8083 return new pass_expand_omp_ssa (ctxt); 8084} 8085 8086/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant 8087 GIMPLE_* codes. */ 8088 8089bool 8090omp_make_gimple_edges (basic_block bb, struct omp_region **region, 8091 int *region_idx) 8092{ 8093 gimple *last = last_stmt (bb); 8094 enum gimple_code code = gimple_code (last); 8095 struct omp_region *cur_region = *region; 8096 bool fallthru = false; 8097 8098 switch (code) 8099 { 8100 case GIMPLE_OMP_PARALLEL: 8101 case GIMPLE_OMP_TASK: 8102 case GIMPLE_OMP_FOR: 8103 case GIMPLE_OMP_SINGLE: 8104 case GIMPLE_OMP_TEAMS: 8105 case GIMPLE_OMP_MASTER: 8106 case GIMPLE_OMP_TASKGROUP: 8107 case GIMPLE_OMP_CRITICAL: 8108 case GIMPLE_OMP_SECTION: 8109 case GIMPLE_OMP_GRID_BODY: 8110 cur_region = new_omp_region (bb, code, cur_region); 8111 fallthru = true; 8112 break; 8113 8114 case GIMPLE_OMP_ORDERED: 8115 cur_region = new_omp_region (bb, code, cur_region); 8116 fallthru = true; 8117 if (omp_find_clause (gimple_omp_ordered_clauses 8118 (as_a <gomp_ordered *> (last)), 8119 OMP_CLAUSE_DEPEND)) 8120 cur_region = cur_region->outer; 8121 break; 8122 8123 case GIMPLE_OMP_TARGET: 8124 cur_region = new_omp_region (bb, code, cur_region); 8125 fallthru = true; 8126 switch (gimple_omp_target_kind (last)) 8127 { 8128 case GF_OMP_TARGET_KIND_REGION: 8129 case GF_OMP_TARGET_KIND_DATA: 8130 case GF_OMP_TARGET_KIND_OACC_PARALLEL: 8131 case GF_OMP_TARGET_KIND_OACC_KERNELS: 8132 case GF_OMP_TARGET_KIND_OACC_DATA: 8133 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: 8134 break; 8135 case GF_OMP_TARGET_KIND_UPDATE: 8136 case GF_OMP_TARGET_KIND_ENTER_DATA: 8137 case GF_OMP_TARGET_KIND_EXIT_DATA: 8138 case GF_OMP_TARGET_KIND_OACC_UPDATE: 8139 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: 8140 case GF_OMP_TARGET_KIND_OACC_DECLARE: 8141 cur_region = cur_region->outer; 8142 break; 8143 default: 8144 gcc_unreachable (); 8145 } 8146 break; 8147 8148 case GIMPLE_OMP_SECTIONS: 8149 cur_region = new_omp_region (bb, code, cur_region); 8150 fallthru = true; 8151 break; 8152 8153 case GIMPLE_OMP_SECTIONS_SWITCH: 8154 fallthru = false; 8155 break; 8156 8157 case GIMPLE_OMP_ATOMIC_LOAD: 8158 case GIMPLE_OMP_ATOMIC_STORE: 8159 fallthru = true; 8160 break; 8161 8162 case GIMPLE_OMP_RETURN: 8163 /* In the case of a GIMPLE_OMP_SECTION, the edge will go 8164 somewhere other than the next block. This will be 8165 created later. */ 8166 cur_region->exit = bb; 8167 if (cur_region->type == GIMPLE_OMP_TASK) 8168 /* Add an edge corresponding to not scheduling the task 8169 immediately. */ 8170 make_edge (cur_region->entry, bb, EDGE_ABNORMAL); 8171 fallthru = cur_region->type != GIMPLE_OMP_SECTION; 8172 cur_region = cur_region->outer; 8173 break; 8174 8175 case GIMPLE_OMP_CONTINUE: 8176 cur_region->cont = bb; 8177 switch (cur_region->type) 8178 { 8179 case GIMPLE_OMP_FOR: 8180 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE 8181 succs edges as abnormal to prevent splitting 8182 them. */ 8183 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; 8184 /* Make the loopback edge. */ 8185 make_edge (bb, single_succ (cur_region->entry), 8186 EDGE_ABNORMAL); 8187 8188 /* Create an edge from GIMPLE_OMP_FOR to exit, which 8189 corresponds to the case that the body of the loop 8190 is not executed at all. */ 8191 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); 8192 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); 8193 fallthru = false; 8194 break; 8195 8196 case GIMPLE_OMP_SECTIONS: 8197 /* Wire up the edges into and out of the nested sections. */ 8198 { 8199 basic_block switch_bb = single_succ (cur_region->entry); 8200 8201 struct omp_region *i; 8202 for (i = cur_region->inner; i ; i = i->next) 8203 { 8204 gcc_assert (i->type == GIMPLE_OMP_SECTION); 8205 make_edge (switch_bb, i->entry, 0); 8206 make_edge (i->exit, bb, EDGE_FALLTHRU); 8207 } 8208 8209 /* Make the loopback edge to the block with 8210 GIMPLE_OMP_SECTIONS_SWITCH. */ 8211 make_edge (bb, switch_bb, 0); 8212 8213 /* Make the edge from the switch to exit. */ 8214 make_edge (switch_bb, bb->next_bb, 0); 8215 fallthru = false; 8216 } 8217 break; 8218 8219 case GIMPLE_OMP_TASK: 8220 fallthru = true; 8221 break; 8222 8223 default: 8224 gcc_unreachable (); 8225 } 8226 break; 8227 8228 default: 8229 gcc_unreachable (); 8230 } 8231 8232 if (*region != cur_region) 8233 { 8234 *region = cur_region; 8235 if (cur_region) 8236 *region_idx = cur_region->entry->index; 8237 else 8238 *region_idx = 0; 8239 } 8240 8241 return fallthru; 8242} 8243 8244#include "gt-omp-expand.h" 8245